rslearn 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rslearn/config/dataset.py +30 -23
  2. rslearn/data_sources/__init__.py +2 -0
  3. rslearn/data_sources/aws_landsat.py +44 -161
  4. rslearn/data_sources/aws_open_data.py +2 -4
  5. rslearn/data_sources/aws_sentinel1.py +1 -3
  6. rslearn/data_sources/aws_sentinel2_element84.py +54 -165
  7. rslearn/data_sources/climate_data_store.py +1 -3
  8. rslearn/data_sources/copernicus.py +1 -2
  9. rslearn/data_sources/data_source.py +1 -1
  10. rslearn/data_sources/direct_materialize_data_source.py +336 -0
  11. rslearn/data_sources/earthdaily.py +52 -155
  12. rslearn/data_sources/earthdatahub.py +425 -0
  13. rslearn/data_sources/eurocrops.py +1 -2
  14. rslearn/data_sources/gcp_public_data.py +1 -2
  15. rslearn/data_sources/google_earth_engine.py +1 -2
  16. rslearn/data_sources/hf_srtm.py +595 -0
  17. rslearn/data_sources/local_files.py +3 -3
  18. rslearn/data_sources/openstreetmap.py +1 -1
  19. rslearn/data_sources/planet.py +1 -2
  20. rslearn/data_sources/planet_basemap.py +1 -2
  21. rslearn/data_sources/planetary_computer.py +183 -186
  22. rslearn/data_sources/soilgrids.py +3 -3
  23. rslearn/data_sources/stac.py +1 -2
  24. rslearn/data_sources/usda_cdl.py +1 -3
  25. rslearn/data_sources/usgs_landsat.py +7 -254
  26. rslearn/data_sources/utils.py +204 -64
  27. rslearn/data_sources/worldcereal.py +1 -1
  28. rslearn/data_sources/worldcover.py +1 -1
  29. rslearn/data_sources/worldpop.py +1 -1
  30. rslearn/data_sources/xyz_tiles.py +5 -9
  31. rslearn/dataset/materialize.py +5 -1
  32. rslearn/models/clay/clay.py +3 -3
  33. rslearn/models/concatenate_features.py +6 -1
  34. rslearn/models/detr/detr.py +4 -1
  35. rslearn/models/dinov3.py +0 -1
  36. rslearn/models/olmoearth_pretrain/model.py +3 -1
  37. rslearn/models/pooling_decoder.py +1 -1
  38. rslearn/models/prithvi.py +0 -1
  39. rslearn/models/simple_time_series.py +97 -35
  40. rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} +120 -117
  41. rslearn/train/data_module.py +32 -27
  42. rslearn/train/dataset.py +260 -117
  43. rslearn/train/dataset_index.py +156 -0
  44. rslearn/train/lightning_module.py +1 -1
  45. rslearn/train/model_context.py +19 -3
  46. rslearn/train/prediction_writer.py +69 -41
  47. rslearn/train/tasks/classification.py +1 -1
  48. rslearn/train/tasks/detection.py +5 -5
  49. rslearn/train/tasks/per_pixel_regression.py +13 -13
  50. rslearn/train/tasks/regression.py +1 -1
  51. rslearn/train/tasks/segmentation.py +26 -13
  52. rslearn/train/transforms/concatenate.py +17 -27
  53. rslearn/train/transforms/crop.py +8 -19
  54. rslearn/train/transforms/flip.py +4 -10
  55. rslearn/train/transforms/mask.py +9 -15
  56. rslearn/train/transforms/normalize.py +31 -82
  57. rslearn/train/transforms/pad.py +7 -13
  58. rslearn/train/transforms/resize.py +5 -22
  59. rslearn/train/transforms/select_bands.py +16 -36
  60. rslearn/train/transforms/sentinel1.py +4 -16
  61. rslearn/utils/__init__.py +2 -0
  62. rslearn/utils/geometry.py +21 -0
  63. rslearn/utils/m2m_api.py +251 -0
  64. rslearn/utils/retry_session.py +43 -0
  65. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/METADATA +6 -3
  66. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/RECORD +71 -66
  67. rslearn/data_sources/earthdata_srtm.py +0 -282
  68. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/WHEEL +0 -0
  69. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/entry_points.txt +0 -0
  70. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/LICENSE +0 -0
  71. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/NOTICE +0 -0
  72. {rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/top_level.txt +0 -0
@@ -3,27 +3,26 @@
3
3
  import os
4
4
  import tempfile
5
5
  import xml.etree.ElementTree as ET
6
+ from collections.abc import Callable
6
7
  from datetime import datetime, timedelta
7
8
  from typing import Any
8
9
 
9
- import affine
10
10
  import numpy.typing as npt
11
11
  import planetary_computer
12
12
  import rasterio
13
13
  import requests
14
- from rasterio.enums import Resampling
15
14
  from typing_extensions import override
16
15
  from upath import UPath
17
16
 
18
- from rslearn.config import LayerConfig
19
17
  from rslearn.data_sources import DataSourceContext
18
+ from rslearn.data_sources.direct_materialize_data_source import (
19
+ DirectMaterializeDataSource,
20
+ )
20
21
  from rslearn.data_sources.stac import SourceItem, StacDataSource
21
- from rslearn.dataset import Window
22
- from rslearn.dataset.materialize import RasterMaterializer
23
22
  from rslearn.log_utils import get_logger
24
- from rslearn.tile_stores import TileStore, TileStoreWithLayer
23
+ from rslearn.tile_stores import TileStoreWithLayer
25
24
  from rslearn.utils.fsspec import join_upath
26
- from rslearn.utils.geometry import PixelBounds, Projection, STGeometry
25
+ from rslearn.utils.geometry import STGeometry
27
26
  from rslearn.utils.raster_format import get_raster_projection_and_bounds
28
27
  from rslearn.utils.stac import StacClient, StacItem
29
28
 
@@ -124,7 +123,7 @@ class PlanetaryComputerStacClient(StacClient):
124
123
  return all_items
125
124
 
126
125
 
127
- class PlanetaryComputer(StacDataSource, TileStore):
126
+ class PlanetaryComputer(DirectMaterializeDataSource[SourceItem], StacDataSource):
128
127
  """Modality-agnostic data source for data on Microsoft Planetary Computer.
129
128
 
130
129
  If there is a subclass available for a modality, it is recommended to use the
@@ -170,6 +169,9 @@ class PlanetaryComputer(StacDataSource, TileStore):
170
169
  needed each time.
171
170
  context: the data source context.
172
171
  """
172
+ # Initialize the DirectMaterializeDataSource with asset_bands
173
+ DirectMaterializeDataSource.__init__(self, asset_bands=asset_bands)
174
+
173
175
  # Determine the cache_dir to use.
174
176
  cache_upath: UPath | None = None
175
177
  if cache_dir is not None:
@@ -185,7 +187,8 @@ class PlanetaryComputer(StacDataSource, TileStore):
185
187
  if skip_items_missing_assets:
186
188
  required_assets = list(asset_bands.keys())
187
189
 
188
- super().__init__(
190
+ StacDataSource.__init__(
191
+ self,
189
192
  endpoint=self.STAC_ENDPOINT,
190
193
  collection_name=collection_name,
191
194
  query=query,
@@ -198,10 +201,52 @@ class PlanetaryComputer(StacDataSource, TileStore):
198
201
  # Replace the client with PlanetaryComputerStacClient to handle PC's pagination limits.
199
202
  self.client = PlanetaryComputerStacClient(self.STAC_ENDPOINT)
200
203
 
201
- self.asset_bands = asset_bands
202
204
  self.timeout = timeout
203
205
  self.skip_items_missing_assets = skip_items_missing_assets
204
206
 
207
+ # --- DirectMaterializeDataSource implementation ---
208
+
209
+ def get_asset_url(self, item_name: str, asset_key: str) -> str:
210
+ """Get the signed URL to read the asset for the given item and asset key.
211
+
212
+ Args:
213
+ item_name: the name of the item.
214
+ asset_key: the key identifying which asset to get.
215
+
216
+ Returns:
217
+ the signed URL to read the asset from.
218
+ """
219
+ item = self.get_item_by_name(item_name)
220
+ return planetary_computer.sign(item.asset_urls[asset_key])
221
+
222
+ def get_raster_bands(self, layer_name: str, item_name: str) -> list[list[str]]:
223
+ """Get the sets of bands that have been stored for the specified item.
224
+
225
+ Args:
226
+ layer_name: the layer name or alias.
227
+ item_name: the item.
228
+
229
+ Returns:
230
+ a list of lists of bands that are in the tile store (with one raster
231
+ stored corresponding to each inner list). If no rasters are ready for
232
+ this item, returns empty list.
233
+ """
234
+ if self.skip_items_missing_assets:
235
+ # In this case we can assume that the item has all of the assets.
236
+ return list(self.asset_bands.values())
237
+
238
+ # Otherwise we have to lookup the STAC item to see which assets it has.
239
+ # Here we use get_item_by_name since it handles caching.
240
+ item = self.get_item_by_name(item_name)
241
+ all_bands = []
242
+ for asset_key, band_names in self.asset_bands.items():
243
+ if asset_key not in item.asset_urls:
244
+ continue
245
+ all_bands.append(band_names)
246
+ return all_bands
247
+
248
+ # --- DataSource implementation ---
249
+
205
250
  def ingest(
206
251
  self,
207
252
  tile_store: TileStoreWithLayer,
@@ -255,152 +300,6 @@ class PlanetaryComputer(StacDataSource, TileStore):
255
300
  asset_key,
256
301
  )
257
302
 
258
- def is_raster_ready(
259
- self, layer_name: str, item_name: str, bands: list[str]
260
- ) -> bool:
261
- """Checks if this raster has been written to the store.
262
-
263
- Args:
264
- layer_name: the layer name or alias.
265
- item_name: the item.
266
- bands: the list of bands identifying which specific raster to read.
267
-
268
- Returns:
269
- whether there is a raster in the store matching the source, item, and
270
- bands.
271
- """
272
- # Always ready since we wrap accesses to Planetary Computer.
273
- return True
274
-
275
- def get_raster_bands(self, layer_name: str, item_name: str) -> list[list[str]]:
276
- """Get the sets of bands that have been stored for the specified item.
277
-
278
- Args:
279
- layer_name: the layer name or alias.
280
- item_name: the item.
281
-
282
- Returns:
283
- a list of lists of bands that are in the tile store (with one raster
284
- stored corresponding to each inner list). If no rasters are ready for
285
- this item, returns empty list.
286
- """
287
- if self.skip_items_missing_assets:
288
- # In this case we can assume that the item has all of the assets.
289
- return list(self.asset_bands.values())
290
-
291
- # Otherwise we have to lookup the STAC item to see which assets it has.
292
- # Here we use get_item_by_name since it handles caching.
293
- item = self.get_item_by_name(item_name)
294
- all_bands = []
295
- for asset_key, band_names in self.asset_bands.items():
296
- if asset_key not in item.asset_urls:
297
- continue
298
- all_bands.append(band_names)
299
- return all_bands
300
-
301
- def _get_asset_by_band(self, bands: list[str]) -> str:
302
- """Get the name of the asset based on the band names."""
303
- for asset_key, asset_bands in self.asset_bands.items():
304
- if bands == asset_bands:
305
- return asset_key
306
-
307
- raise ValueError(f"no raster with bands {bands}")
308
-
309
- def get_raster_bounds(
310
- self, layer_name: str, item_name: str, bands: list[str], projection: Projection
311
- ) -> PixelBounds:
312
- """Get the bounds of the raster in the specified projection.
313
-
314
- Args:
315
- layer_name: the layer name or alias.
316
- item_name: the item to check.
317
- bands: the list of bands identifying which specific raster to read. These
318
- bands must match the bands of a stored raster.
319
- projection: the projection to get the raster's bounds in.
320
-
321
- Returns:
322
- the bounds of the raster in the projection.
323
- """
324
- item = self.get_item_by_name(item_name)
325
- geom = item.geometry.to_projection(projection)
326
- return (
327
- int(geom.shp.bounds[0]),
328
- int(geom.shp.bounds[1]),
329
- int(geom.shp.bounds[2]),
330
- int(geom.shp.bounds[3]),
331
- )
332
-
333
- def read_raster(
334
- self,
335
- layer_name: str,
336
- item_name: str,
337
- bands: list[str],
338
- projection: Projection,
339
- bounds: PixelBounds,
340
- resampling: Resampling = Resampling.bilinear,
341
- ) -> npt.NDArray[Any]:
342
- """Read raster data from the store.
343
-
344
- Args:
345
- layer_name: the layer name or alias.
346
- item_name: the item to read.
347
- bands: the list of bands identifying which specific raster to read. These
348
- bands must match the bands of a stored raster.
349
- projection: the projection to read in.
350
- bounds: the bounds to read.
351
- resampling: the resampling method to use in case reprojection is needed.
352
-
353
- Returns:
354
- the raster data
355
- """
356
- asset_key = self._get_asset_by_band(bands)
357
- item = self.get_item_by_name(item_name)
358
- asset_url = planetary_computer.sign(item.asset_urls[asset_key])
359
-
360
- # Construct the transform to use for the warped dataset.
361
- wanted_transform = affine.Affine(
362
- projection.x_resolution,
363
- 0,
364
- bounds[0] * projection.x_resolution,
365
- 0,
366
- projection.y_resolution,
367
- bounds[1] * projection.y_resolution,
368
- )
369
-
370
- with rasterio.open(asset_url) as src:
371
- with rasterio.vrt.WarpedVRT(
372
- src,
373
- crs=projection.crs,
374
- transform=wanted_transform,
375
- width=bounds[2] - bounds[0],
376
- height=bounds[3] - bounds[1],
377
- resampling=resampling,
378
- ) as vrt:
379
- return vrt.read()
380
-
381
- def materialize(
382
- self,
383
- window: Window,
384
- item_groups: list[list[SourceItem]],
385
- layer_name: str,
386
- layer_cfg: LayerConfig,
387
- ) -> None:
388
- """Materialize data for the window.
389
-
390
- Args:
391
- window: the window to materialize
392
- item_groups: the items from get_items
393
- layer_name: the name of this layer
394
- layer_cfg: the config of this layer
395
- """
396
- RasterMaterializer().materialize(
397
- TileStoreWithLayer(self, layer_name),
398
- window,
399
- layer_name,
400
- layer_cfg,
401
- item_groups,
402
- )
403
-
404
303
 
405
304
  class Sentinel2(PlanetaryComputer):
406
305
  """A data source for Sentinel-2 L2A data on Microsoft Planetary Computer.
@@ -548,46 +447,144 @@ class Sentinel2(PlanetaryComputer):
548
447
  asset_key,
549
448
  )
550
449
 
551
- def read_raster(
552
- self,
553
- layer_name: str,
554
- item_name: str,
555
- bands: list[str],
556
- projection: Projection,
557
- bounds: PixelBounds,
558
- resampling: Resampling = Resampling.bilinear,
559
- ) -> npt.NDArray[Any]:
560
- """Read raster data from the store.
450
+ def get_read_callback(
451
+ self, item_name: str, asset_key: str
452
+ ) -> Callable[[npt.NDArray[Any]], npt.NDArray[Any]] | None:
453
+ """Return a callback to harmonize Sentinel-2 data if needed.
561
454
 
562
455
  Args:
563
- layer_name: the layer name or alias.
564
- item_name: the item to read.
565
- bands: the list of bands identifying which specific raster to read. These
566
- bands must match the bands of a stored raster.
567
- projection: the projection to read in.
568
- bounds: the bounds to read.
569
- resampling: the resampling method to use in case reprojection is needed.
456
+ item_name: the name of the item being read.
457
+ asset_key: the key identifying which asset is being read.
570
458
 
571
459
  Returns:
572
- the raster data
460
+ A callback function for harmonization, or None if not needed.
573
461
  """
574
- # We override read_raster because we may need to harmonize the data.
575
- raw_data = super().read_raster(
576
- layer_name, item_name, bands, projection, bounds, resampling=resampling
577
- )
578
-
579
462
  # TCI (visual) image does not need harmonization.
580
- if not self.harmonize or bands == self.BANDS["visual"]:
581
- return raw_data
463
+ if not self.harmonize or asset_key == "visual":
464
+ return None
582
465
 
583
466
  item = self.get_item_by_name(item_name)
584
- harmonize_callback = get_harmonize_callback(self._get_product_xml(item))
467
+ return get_harmonize_callback(self._get_product_xml(item))
468
+
469
+
470
+ class LandsatC2L2(PlanetaryComputer):
471
+ """A data source for Landsat Collection 2 Level-2 data on Planetary Computer.
472
+
473
+ This data source targets Landsat 8/9 items in the `landsat-c2-l2` collection.
474
+ Band names exposed by this data source are Landsat-style band identifiers
475
+ (e.g. "B4", "B5", "B10") for maximum compatibility with
476
+ `rslearn.data_sources.aws_landsat.LandsatOliTirs`.
585
477
 
586
- if harmonize_callback is None:
587
- return raw_data
478
+ For convenience, configuration also accepts STAC `common_name` values (e.g. "red",
479
+ "nir08") and STAC `eo:bands[].name` aliases (e.g. "OLI_B4", "TIRS_B10"), which are
480
+ normalized to the Landsat-style band identifiers above.
588
481
 
589
- array = harmonize_callback(raw_data)
590
- return array
482
+ Note: this is Level-2 data, not Level-1. If you need Level-1-specific bands
483
+ (e.g. panchromatic/cirrus or thermal band 11), use
484
+ `rslearn.data_sources.aws_landsat.LandsatOliTirs`.
485
+ """
486
+
487
+ COLLECTION_NAME = "landsat-c2-l2"
488
+
489
+ # Map STAC asset keys (common_name) to the Landsat band identifiers we expose.
490
+ # Planetary Computer assets for `landsat-c2-l2` are keyed by common_name.
491
+ ASSET_COMMON_NAME_TO_BAND = {
492
+ "coastal": "B1",
493
+ "blue": "B2",
494
+ "green": "B3",
495
+ "red": "B4",
496
+ "nir08": "B5",
497
+ "swir16": "B6",
498
+ "swir22": "B7",
499
+ "lwir11": "B10",
500
+ }
501
+
502
+ BAND_TO_ASSET_COMMON_NAME = {v: k for k, v in ASSET_COMMON_NAME_TO_BAND.items()}
503
+
504
+ # STAC eo:bands name -> Landsat-style band identifiers.
505
+ STAC_BAND_NAME_ALIASES = {
506
+ "OLI_B1": "B1",
507
+ "OLI_B2": "B2",
508
+ "OLI_B3": "B3",
509
+ "OLI_B4": "B4",
510
+ "OLI_B5": "B5",
511
+ "OLI_B6": "B6",
512
+ "OLI_B7": "B7",
513
+ "TIRS_B10": "B10",
514
+ }
515
+
516
+ DEFAULT_PLATFORM_QUERY = {"platform": {"in": ["landsat-8", "landsat-9"]}}
517
+
518
+ @classmethod
519
+ def _normalize_band_name(cls, band: str) -> str:
520
+ if band in cls.BAND_TO_ASSET_COMMON_NAME:
521
+ return band
522
+ if band in cls.ASSET_COMMON_NAME_TO_BAND:
523
+ return cls.ASSET_COMMON_NAME_TO_BAND[band]
524
+ if band in cls.STAC_BAND_NAME_ALIASES:
525
+ return cls.STAC_BAND_NAME_ALIASES[band]
526
+ if band in {"B8", "B9", "B11"}:
527
+ raise ValueError(
528
+ f"LandsatC2L2 does not provide {band} in the Planetary Computer "
529
+ "landsat-c2-l2 collection. Use rslearn.data_sources.aws_landsat.LandsatOliTirs "
530
+ "for Level-1 bands like panchromatic (B8), cirrus (B9), or thermal band 11 (B11)."
531
+ )
532
+ raise ValueError(
533
+ f"unknown Landsat band '{band}'. Use one of {sorted(cls.BAND_TO_ASSET_COMMON_NAME.keys())} "
534
+ f"(Landsat band names), {sorted(cls.ASSET_COMMON_NAME_TO_BAND.keys())} (STAC common names), "
535
+ f"or {sorted(cls.STAC_BAND_NAME_ALIASES.keys())} (STAC band names)."
536
+ )
537
+
538
+ def __init__(
539
+ self,
540
+ band_names: list[str] | None = None,
541
+ query: dict[str, Any] | None = None,
542
+ context: DataSourceContext = DataSourceContext(),
543
+ **kwargs: Any,
544
+ ):
545
+ """Initialize a new LandsatC2L2 instance.
546
+
547
+ Args:
548
+ band_names: optional list of band names to expose. Values can be either
549
+ STAC common names (preferred) or STAC `eo:bands[].name` aliases.
550
+ If not provided, defaults to the reflectance bands listed in BANDS.
551
+ query: optional STAC query filter to use. If not set, this defaults to a
552
+ platform filter for Landsat 8/9. If set, the provided query is used
553
+ as-is (no implicit platform filtering is added).
554
+ context: the data source context.
555
+ kwargs: additional arguments to pass to PlanetaryComputer.
556
+ """
557
+ # Prefer determining bands from the configured layer config (if present).
558
+ if context.layer_config is not None:
559
+ requested_bands = {
560
+ band
561
+ for band_set in context.layer_config.band_sets
562
+ for band in band_set.bands
563
+ }
564
+ band_names = [self._normalize_band_name(band) for band in requested_bands]
565
+ elif band_names is not None:
566
+ band_names = [self._normalize_band_name(band) for band in band_names]
567
+ else:
568
+ band_names = list(self.BAND_TO_ASSET_COMMON_NAME.keys())
569
+
570
+ # Landsat C2 L2 assets are keyed by common name; each asset is a single band.
571
+ # We expose Landsat-style band identifiers (B1, B2, ...).
572
+ asset_bands = {
573
+ self.BAND_TO_ASSET_COMMON_NAME[band]: [band] for band in band_names
574
+ }
575
+
576
+ if query is None:
577
+ query = self.DEFAULT_PLATFORM_QUERY
578
+
579
+ super().__init__(
580
+ collection_name=self.COLLECTION_NAME,
581
+ asset_bands=asset_bands,
582
+ query=query,
583
+ # Skip per-item asset checks; required assets are derived from asset_bands.
584
+ skip_items_missing_assets=True,
585
+ context=context,
586
+ **kwargs,
587
+ )
591
588
 
592
589
 
593
590
  class Sentinel1(PlanetaryComputer):
@@ -22,7 +22,7 @@ from rslearn.config import LayerConfig, QueryConfig
22
22
  from rslearn.dataset import Window
23
23
  from rslearn.dataset.materialize import RasterMaterializer
24
24
  from rslearn.tile_stores import TileStore, TileStoreWithLayer
25
- from rslearn.utils import PixelBounds, Projection, STGeometry
25
+ from rslearn.utils import PixelBounds, Projection, STGeometry, get_global_raster_bounds
26
26
  from rslearn.utils.geometry import get_global_geometry
27
27
  from rslearn.utils.raster_format import get_transform_from_projection_and_bounds
28
28
 
@@ -139,7 +139,7 @@ class SoilGrids(DataSource, TileStore):
139
139
  groups.append(cur_groups)
140
140
  return groups
141
141
 
142
- def deserialize_item(self, serialized_item: Any) -> Item:
142
+ def deserialize_item(self, serialized_item: dict) -> Item:
143
143
  """Deserialize an item from JSON-decoded data."""
144
144
  return Item.deserialize(serialized_item)
145
145
 
@@ -170,7 +170,7 @@ class SoilGrids(DataSource, TileStore):
170
170
  """Return (approximate) bounds for this raster in the requested projection."""
171
171
  # We don't know bounds without an extra metadata request; treat as "very large"
172
172
  # so materialization always attempts reads for windows.
173
- return (-(10**9), -(10**9), 10**9, 10**9)
173
+ return get_global_raster_bounds(projection)
174
174
 
175
175
  def _download_geotiff(
176
176
  self,
@@ -270,7 +270,6 @@ class StacDataSource(ItemLookupDataSource[SourceItem]):
270
270
 
271
271
  return groups
272
272
 
273
- def deserialize_item(self, serialized_item: Any) -> SourceItem:
273
+ def deserialize_item(self, serialized_item: dict) -> SourceItem:
274
274
  """Deserializes an item from JSON-decoded data."""
275
- assert isinstance(serialized_item, dict)
276
275
  return SourceItem.deserialize(serialized_item)
@@ -4,7 +4,6 @@ import os
4
4
  import tempfile
5
5
  import zipfile
6
6
  from datetime import UTC, datetime, timedelta
7
- from typing import Any
8
7
 
9
8
  import requests
10
9
  import requests.auth
@@ -137,9 +136,8 @@ class CDL(DataSource):
137
136
 
138
137
  return groups
139
138
 
140
- def deserialize_item(self, serialized_item: Any) -> Item:
139
+ def deserialize_item(self, serialized_item: dict) -> Item:
141
140
  """Deserializes an item from JSON-decoded data."""
142
- assert isinstance(serialized_item, dict)
143
141
  return Item.deserialize(serialized_item)
144
142
 
145
143
  def ingest(