rslearn 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rslearn/config/dataset.py CHANGED
@@ -8,7 +8,6 @@ from typing import Any
8
8
  import numpy as np
9
9
  import numpy.typing as npt
10
10
  import pytimeparse
11
- import torch
12
11
  from rasterio.enums import Resampling
13
12
 
14
13
  from rslearn.utils import PixelBounds, Projection
@@ -49,15 +48,6 @@ class DType(Enum):
49
48
  return np.float32
50
49
  raise ValueError(f"unable to handle numpy dtype {self}")
51
50
 
52
- def get_torch_dtype(self) -> torch.dtype:
53
- """Returns pytorch dtype object corresponding to this DType."""
54
- if self == DType.INT32:
55
- return torch.int32
56
- elif self == DType.FLOAT32:
57
- return torch.float32
58
- else:
59
- raise ValueError(f"unable to handle torch dtype {self}")
60
-
61
51
 
62
52
  RESAMPLING_METHODS = {
63
53
  "nearest": Resampling.nearest,
@@ -0,0 +1,67 @@
1
+ """LightningCLI for rslearn."""
2
+
3
+ import sys
4
+
5
+ from lightning.pytorch.cli import LightningArgumentParser, LightningCLI
6
+
7
+ from rslearn.arg_parser import RslearnArgumentParser
8
+ from rslearn.train.data_module import RslearnDataModule
9
+ from rslearn.train.lightning_module import RslearnLightningModule
10
+
11
+
12
+ class RslearnLightningCLI(LightningCLI):
13
+ """LightningCLI that links data.tasks to model.tasks and supports environment variables."""
14
+
15
+ def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
16
+ """Link data.tasks to model.tasks.
17
+
18
+ Args:
19
+ parser: the argument parser
20
+ """
21
+ # Link data.tasks to model.tasks
22
+ parser.link_arguments(
23
+ "data.init_args.task", "model.init_args.task", apply_on="instantiate"
24
+ )
25
+
26
+ def before_instantiate_classes(self) -> None:
27
+ """Called before Lightning class initialization.
28
+
29
+ Sets the dataset path for any configured RslearnPredictionWriter callbacks.
30
+ """
31
+ subcommand = self.config.subcommand
32
+ c = self.config[subcommand]
33
+
34
+ # If there is a RslearnPredictionWriter, set its path.
35
+ prediction_writer_callback = None
36
+ if "callbacks" in c.trainer:
37
+ for existing_callback in c.trainer.callbacks:
38
+ if (
39
+ existing_callback.class_path
40
+ == "rslearn.train.prediction_writer.RslearnWriter"
41
+ ):
42
+ prediction_writer_callback = existing_callback
43
+ if prediction_writer_callback:
44
+ prediction_writer_callback.init_args.path = c.data.init_args.path
45
+
46
+ # Disable the sampler replacement, since the rslearn data module will set the
47
+ # sampler as needed.
48
+ c.trainer.use_distributed_sampler = False
49
+
50
+ # For predict, make sure that return_predictions is False.
51
+ # Otherwise all the predictions would be stored in memory which can lead to
52
+ # high memory consumption.
53
+ if subcommand == "predict":
54
+ c.return_predictions = False
55
+
56
+
57
+ def model_handler() -> None:
58
+ """Handler for any rslearn model X commands."""
59
+ RslearnLightningCLI(
60
+ model_class=RslearnLightningModule,
61
+ datamodule_class=RslearnDataModule,
62
+ args=sys.argv[2:],
63
+ subclass_mode_model=True,
64
+ subclass_mode_data=True,
65
+ save_config_kwargs={"overwrite": True},
66
+ parser_class=RslearnArgumentParser,
67
+ )
rslearn/main.py CHANGED
@@ -10,11 +10,9 @@ from datetime import UTC, datetime, timedelta
10
10
  from typing import Any, TypeVar
11
11
 
12
12
  import tqdm
13
- from lightning.pytorch.cli import LightningArgumentParser, LightningCLI
14
13
  from rasterio.crs import CRS
15
14
  from upath import UPath
16
15
 
17
- from rslearn.arg_parser import RslearnArgumentParser
18
16
  from rslearn.config import LayerConfig
19
17
  from rslearn.const import WGS84_EPSG
20
18
  from rslearn.data_sources import Item, data_source_from_config
@@ -38,8 +36,6 @@ from rslearn.dataset.manage import (
38
36
  )
39
37
  from rslearn.log_utils import get_logger
40
38
  from rslearn.tile_stores import get_tile_store_with_layer
41
- from rslearn.train.data_module import RslearnDataModule
42
- from rslearn.train.lightning_module import RslearnLightningModule
43
39
  from rslearn.utils import Projection, STGeometry
44
40
 
45
41
  logger = get_logger(__name__)
@@ -831,85 +827,35 @@ def dataset_build_index() -> None:
831
827
  index.save_index(ds_path)
832
828
 
833
829
 
834
- class RslearnLightningCLI(LightningCLI):
835
- """LightningCLI that links data.tasks to model.tasks and supports environment variables."""
836
-
837
- def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
838
- """Link data.tasks to model.tasks.
839
-
840
- Args:
841
- parser: the argument parser
842
- """
843
- # Link data.tasks to model.tasks
844
- parser.link_arguments(
845
- "data.init_args.task", "model.init_args.task", apply_on="instantiate"
846
- )
847
-
848
- def before_instantiate_classes(self) -> None:
849
- """Called before Lightning class initialization.
850
-
851
- Sets the dataset path for any configured RslearnPredictionWriter callbacks.
852
- """
853
- subcommand = self.config.subcommand
854
- c = self.config[subcommand]
855
-
856
- # If there is a RslearnPredictionWriter, set its path.
857
- prediction_writer_callback = None
858
- if "callbacks" in c.trainer:
859
- for existing_callback in c.trainer.callbacks:
860
- if (
861
- existing_callback.class_path
862
- == "rslearn.train.prediction_writer.RslearnWriter"
863
- ):
864
- prediction_writer_callback = existing_callback
865
- if prediction_writer_callback:
866
- prediction_writer_callback.init_args.path = c.data.init_args.path
867
-
868
- # Disable the sampler replacement, since the rslearn data module will set the
869
- # sampler as needed.
870
- c.trainer.use_distributed_sampler = False
871
-
872
- # For predict, make sure that return_predictions is False.
873
- # Otherwise all the predictions would be stored in memory which can lead to
874
- # high memory consumption.
875
- if subcommand == "predict":
876
- c.return_predictions = False
877
-
878
-
879
- def model_handler() -> None:
880
- """Handler for any rslearn model X commands."""
881
- RslearnLightningCLI(
882
- model_class=RslearnLightningModule,
883
- datamodule_class=RslearnDataModule,
884
- args=sys.argv[2:],
885
- subclass_mode_model=True,
886
- subclass_mode_data=True,
887
- save_config_kwargs={"overwrite": True},
888
- parser_class=RslearnArgumentParser,
889
- )
890
-
891
-
892
830
  @register_handler("model", "fit")
893
831
  def model_fit() -> None:
894
832
  """Handler for rslearn model fit."""
833
+ from .lightning_cli import model_handler
834
+
895
835
  model_handler()
896
836
 
897
837
 
898
838
  @register_handler("model", "validate")
899
839
  def model_validate() -> None:
900
840
  """Handler for rslearn model validate."""
841
+ from .lightning_cli import model_handler
842
+
901
843
  model_handler()
902
844
 
903
845
 
904
846
  @register_handler("model", "test")
905
847
  def model_test() -> None:
906
848
  """Handler for rslearn model test."""
849
+ from .lightning_cli import model_handler
850
+
907
851
  model_handler()
908
852
 
909
853
 
910
854
  @register_handler("model", "predict")
911
855
  def model_predict() -> None:
912
856
  """Handler for rslearn model predict."""
857
+ from .lightning_cli import model_handler
858
+
913
859
  model_handler()
914
860
 
915
861
 
@@ -0,0 +1,458 @@
1
+ """Wrapper around ModelDataset to load all patches (crops) in a window."""
2
+
3
+ import itertools
4
+ from collections.abc import Iterable, Iterator
5
+ from typing import Any
6
+
7
+ import shapely
8
+ import torch
9
+
10
+ from rslearn.dataset import Window
11
+ from rslearn.train.dataset import ModelDataset
12
+ from rslearn.utils.geometry import PixelBounds, STGeometry
13
+
14
+
15
+ def get_window_patch_options(
16
+ patch_size: tuple[int, int],
17
+ overlap_size: tuple[int, int],
18
+ bounds: PixelBounds,
19
+ ) -> list[PixelBounds]:
20
+ """Get the bounds of each input patch within the window bounds.
21
+
22
+ This is used when running inference on all patches (crops) of a large window, to
23
+ compute the position of each patch.
24
+
25
+ Args:
26
+ patch_size: the size of the patches to extract.
27
+ overlap_size: the size of the overlap between patches.
28
+ bounds: the window bounds to divide up into smaller patches.
29
+
30
+ Returns:
31
+ a list of patch bounds within the overall bounds. The rightmost and
32
+ bottommost patches may extend beyond the provided bounds.
33
+ """
34
+ # We stride the patches by patch_size - overlap_size until the last patch.
35
+ # We handle the last patch with a special case to ensure it does not exceed the
36
+ # window bounds. Instead, it may overlap the previous patch.
37
+ cols = list(
38
+ range(
39
+ bounds[0],
40
+ bounds[2] - patch_size[0],
41
+ patch_size[0] - overlap_size[0],
42
+ )
43
+ ) + [bounds[2] - patch_size[0]]
44
+ rows = list(
45
+ range(
46
+ bounds[1],
47
+ bounds[3] - patch_size[1],
48
+ patch_size[1] - overlap_size[1],
49
+ )
50
+ ) + [bounds[3] - patch_size[1]]
51
+
52
+ patch_bounds: list[PixelBounds] = []
53
+ for col in cols:
54
+ for row in rows:
55
+ patch_bounds.append((col, row, col + patch_size[0], row + patch_size[1]))
56
+ return patch_bounds
57
+
58
+
59
+ def pad_slice_protect(
60
+ raw_inputs: dict[str, Any],
61
+ passthrough_inputs: dict[str, Any],
62
+ patch_size: tuple[int, int],
63
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
64
+ """Pad tensors in-place by patch size to protect slicing near right/bottom edges.
65
+
66
+ Args:
67
+ raw_inputs: the raw inputs to pad.
68
+ passthrough_inputs: the passthrough inputs to pad.
69
+ patch_size: the size of the patches to extract.
70
+
71
+ Returns:
72
+ a tuple of (raw_inputs, passthrough_inputs).
73
+ """
74
+ for d in [raw_inputs, passthrough_inputs]:
75
+ for input_name, value in list(d.items()):
76
+ if not isinstance(value, torch.Tensor):
77
+ continue
78
+ d[input_name] = torch.nn.functional.pad(
79
+ value, pad=(0, patch_size[0], 0, patch_size[1])
80
+ )
81
+ return raw_inputs, passthrough_inputs
82
+
83
+
84
+ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
85
+ """This wraps a ModelDataset to iterate over all patches in that dataset.
86
+
87
+ This should be used when SplitConfig.load_all_patches is enabled. The ModelDataset
88
+ is configured with no patch size (load entire windows), and the dataset is wrapped
89
+ in an AllPatchesDataset.
90
+
91
+ Similar to DistributedSampler, we add extra samples at each rank to ensure
92
+ consistent number of batches across all ranks.
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ dataset: ModelDataset,
98
+ patch_size: tuple[int, int],
99
+ overlap_ratio: float = 0.0,
100
+ rank: int = 0,
101
+ world_size: int = 1,
102
+ ):
103
+ """Create a new IterableAllPatchesDataset.
104
+
105
+ Args:
106
+ dataset: the ModelDataset to wrap.
107
+ patch_size: the size of the patches to extract.
108
+ overlap_ratio: whether to include overlap between the patches. Note that
109
+ the right/bottom-most patches may still overlap since we ensure that
110
+ all patches are contained in the window bounds.
111
+ rank: the global rank of this train worker process.
112
+ world_size: the total number of train worker processes.
113
+ """
114
+ super().__init__()
115
+ self.dataset = dataset
116
+ self.patch_size = patch_size
117
+ self.overlap_size = (
118
+ round(self.patch_size[0] * overlap_ratio),
119
+ round(self.patch_size[1] * overlap_ratio),
120
+ )
121
+ self.rank = rank
122
+ self.world_size = world_size
123
+ self.windows = self.dataset.get_dataset_examples()
124
+
125
+ def set_name(self, name: str) -> None:
126
+ """Sets dataset name.
127
+
128
+ Args:
129
+ name: dataset name
130
+ """
131
+ self.dataset.set_name(name)
132
+
133
+ def get_window_num_patches(self, bounds: PixelBounds) -> int:
134
+ """Get the number of patches for these bounds.
135
+
136
+ This corresponds to the length of the list returned by get_patch_options.
137
+ """
138
+ num_cols = (
139
+ len(
140
+ range(
141
+ bounds[0],
142
+ bounds[2] - self.patch_size[0],
143
+ self.patch_size[0] - self.overlap_size[0],
144
+ )
145
+ )
146
+ + 1
147
+ )
148
+ num_rows = (
149
+ len(
150
+ range(
151
+ bounds[1],
152
+ bounds[3] - self.patch_size[1],
153
+ self.patch_size[1] - self.overlap_size[1],
154
+ )
155
+ )
156
+ + 1
157
+ )
158
+ return num_cols * num_rows
159
+
160
+ def _get_worker_iteration_data(self) -> tuple[Iterable[int], int]:
161
+ """Get the windows we should iterate over.
162
+
163
+ This is split both by training worker (self.rank) and data loader worker (via
164
+ get_worker_info).
165
+
166
+ We also compute the total number of samples that each data loader worker should
167
+ yield. This is important for DDP to ensure that all ranks see the same number
168
+ of batches.
169
+
170
+ Returns:
171
+ a tuple (window_ids, num_samples_per_worker).
172
+ """
173
+ # Figure out the total number of data loader workers and our worker ID.
174
+ worker_info = torch.utils.data.get_worker_info()
175
+ if worker_info is None:
176
+ worker_id = 0
177
+ num_workers = 1
178
+ else:
179
+ worker_id = worker_info.id
180
+ num_workers = worker_info.num_workers
181
+ global_worker_id = self.rank * num_workers + worker_id
182
+ global_num_workers = self.world_size * num_workers
183
+
184
+ # Split up the windows evenly among the workers.
185
+ # We compute this for all workers since we will need to see the maximum number
186
+ # of samples under this assignment across workers.
187
+ window_indexes = range(len(self.windows))
188
+ windows_by_worker = [
189
+ window_indexes[cur_rank :: self.world_size][cur_worker_id::num_workers]
190
+ for cur_rank in range(self.world_size)
191
+ for cur_worker_id in range(num_workers)
192
+ ]
193
+
194
+ # Now compute the maximum number of samples across workers.
195
+ max_num_patches = 0
196
+ for worker_windows in windows_by_worker:
197
+ worker_num_patches = 0
198
+ for window_id in worker_windows:
199
+ worker_num_patches += self.get_window_num_patches(
200
+ self.windows[window_id].bounds
201
+ )
202
+ max_num_patches = max(max_num_patches, worker_num_patches)
203
+
204
+ # Each worker needs at least one window, otherwise it won't be able to pad.
205
+ # Unless there are zero windows total, which is fine.
206
+ # Previously we would address this by borrowing the windows from another
207
+ # worker, but this causes issues with RslearnWriter: if we yield the same
208
+ # window from parallel workers, it may end up writing an empty output for that
209
+ # window in the end.
210
+ # So now we raise an error instead, and require the number of workers to be
211
+ # less than the number of windows.
212
+ if len(windows_by_worker[global_worker_id]) == 0 and max_num_patches > 0:
213
+ raise ValueError(
214
+ f"the number of workers {global_num_workers} must be <= the number of windows {len(self.windows)}"
215
+ )
216
+
217
+ return (windows_by_worker[global_worker_id], max_num_patches)
218
+
219
+ def __iter__(
220
+ self,
221
+ ) -> Iterator[tuple[dict[str, Any], dict[str, Any], dict[str, Any]]]:
222
+ """Iterate over all patches in each element of the underlying ModelDataset."""
223
+ # Iterate over the window IDs until we have returned enough samples.
224
+ window_ids, num_samples_needed = self._get_worker_iteration_data()
225
+ num_samples_returned = 0
226
+
227
+ for iteration_idx in itertools.count():
228
+ for window_id in window_ids:
229
+ raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(
230
+ window_id
231
+ )
232
+ bounds = metadata["bounds"]
233
+
234
+ # For simplicity, pad tensors by patch size to ensure that any patch bounds
235
+ # extending outside the window bounds will not have issues when we slice
236
+ # the tensors later.
237
+ pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
238
+
239
+ # Now iterate over the patches and extract/yield the crops.
240
+ # Note that, in case user is leveraging RslearnWriter, it is important that
241
+ # the patch_idx be increasing (as we iterate) within one window.
242
+ patches = get_window_patch_options(
243
+ self.patch_size, self.overlap_size, bounds
244
+ )
245
+ for patch_idx, patch_bounds in enumerate(patches):
246
+ cur_geom = STGeometry(
247
+ metadata["projection"], shapely.box(*patch_bounds), None
248
+ )
249
+ start_offset = (
250
+ patch_bounds[0] - bounds[0],
251
+ patch_bounds[1] - bounds[1],
252
+ )
253
+ end_offset = (
254
+ patch_bounds[2] - bounds[0],
255
+ patch_bounds[3] - bounds[1],
256
+ )
257
+
258
+ # Define a helper function to handle each input dict.
259
+ def crop_input_dict(d: dict[str, Any]) -> dict[str, Any]:
260
+ cropped = {}
261
+ for input_name, value in d.items():
262
+ if isinstance(value, torch.Tensor):
263
+ # Crop the CHW tensor.
264
+ cropped[input_name] = value[
265
+ :,
266
+ start_offset[1] : end_offset[1],
267
+ start_offset[0] : end_offset[0],
268
+ ].clone()
269
+ elif isinstance(value, list):
270
+ cropped[input_name] = [
271
+ feat
272
+ for feat in value
273
+ if cur_geom.intersects(feat.geometry)
274
+ ]
275
+ else:
276
+ raise ValueError(
277
+ "got input that is neither tensor nor feature list"
278
+ )
279
+ return cropped
280
+
281
+ cur_raw_inputs = crop_input_dict(raw_inputs)
282
+ cur_passthrough_inputs = crop_input_dict(passthrough_inputs)
283
+
284
+ # Adjust the metadata as well.
285
+ cur_metadata = metadata.copy()
286
+ cur_metadata["bounds"] = patch_bounds
287
+ cur_metadata["patch_idx"] = patch_idx
288
+ cur_metadata["num_patches"] = len(patches)
289
+
290
+ # Now we can compute input and target dicts via the task.
291
+ input_dict, target_dict = self.dataset.task.process_inputs(
292
+ cur_raw_inputs,
293
+ metadata=cur_metadata,
294
+ load_targets=not self.dataset.split_config.get_skip_targets(),
295
+ )
296
+ input_dict.update(cur_passthrough_inputs)
297
+ input_dict, target_dict = self.dataset.transforms(
298
+ input_dict, target_dict
299
+ )
300
+ input_dict["dataset_source"] = self.dataset.name
301
+
302
+ if num_samples_returned < num_samples_needed:
303
+ yield input_dict, target_dict, cur_metadata
304
+ num_samples_returned += 1
305
+ else:
306
+ assert iteration_idx > 0
307
+
308
+ if num_samples_returned >= num_samples_needed:
309
+ break
310
+
311
+ def get_dataset_examples(self) -> list[Window]:
312
+ """Returns a list of windows in this dataset."""
313
+ return self.dataset.get_dataset_examples()
314
+
315
+
316
+ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
317
+ """This wraps a ModelDataset to iterate over all patches in that dataset.
318
+
319
+ This should be used when SplitConfig.load_all_patches is enabled.
320
+
321
+ This is a simpler version of IterableAllPatchesDataset that caches all windows in memory.
322
+ This is useful for small datasets that fit in memory.
323
+ """
324
+
325
+ def __init__(
326
+ self,
327
+ dataset: ModelDataset,
328
+ patch_size: tuple[int, int],
329
+ overlap_ratio: float = 0.0,
330
+ ):
331
+ """Create a new InMemoryAllPatchesDataset.
332
+
333
+ Args:
334
+ dataset: the ModelDataset to wrap.
335
+ patch_size: the size of the patches to extract.
336
+ overlap_ratio: whether to include overlap between the patches. Note that
337
+ the right/bottom-most patches may still overlap since we ensure that
338
+ all patches are contained in the window bounds.
339
+ """
340
+ super().__init__()
341
+ self.dataset = dataset
342
+ self.patch_size = patch_size
343
+ self.overlap_size = (
344
+ round(self.patch_size[0] * overlap_ratio),
345
+ round(self.patch_size[1] * overlap_ratio),
346
+ )
347
+ self.windows = self.dataset.get_dataset_examples()
348
+ self.window_cache: dict[
349
+ int, tuple[dict[str, Any], dict[str, Any], dict[str, Any]]
350
+ ] = {}
351
+
352
+ # Precompute the batch boundaries for each window
353
+ self.patches = []
354
+ for window_id, window in enumerate(self.windows):
355
+ patch_bounds = get_window_patch_options(
356
+ self.patch_size, self.overlap_size, window.bounds
357
+ )
358
+ for i, patch_bound in enumerate(patch_bounds):
359
+ self.patches.append((window_id, patch_bound, (i, len(patch_bounds))))
360
+
361
+ def get_raw_inputs(
362
+ self, index: int
363
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
364
+ """Get the raw inputs for a single patch. Retrieve from cache if possible.
365
+
366
+ Also crops/pads the tensors by patch size to protect slicing near right/bottom edges.
367
+
368
+ Args:
369
+ index: the index of the patch.
370
+
371
+ Returns:
372
+ a tuple of (raw_inputs, passthrough_inputs, metadata).
373
+ """
374
+ if index in self.window_cache:
375
+ return self.window_cache[index]
376
+
377
+ raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(index)
378
+ pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
379
+
380
+ self.window_cache[index] = (raw_inputs, passthrough_inputs, metadata)
381
+ return self.window_cache[index]
382
+
383
+ @staticmethod
384
+ def _crop_input_dict(
385
+ d: dict[str, Any],
386
+ start_offset: tuple[int, int],
387
+ end_offset: tuple[int, int],
388
+ cur_geom: STGeometry,
389
+ ) -> dict[str, Any]:
390
+ """Crop a dictionary of inputs to the given bounds."""
391
+ cropped = {}
392
+ for input_name, value in d.items():
393
+ if isinstance(value, torch.Tensor):
394
+ cropped[input_name] = value[
395
+ :,
396
+ start_offset[1] : end_offset[1],
397
+ start_offset[0] : end_offset[0],
398
+ ].clone()
399
+ elif isinstance(value, list):
400
+ cropped[input_name] = [
401
+ feat for feat in value if cur_geom.intersects(feat.geometry)
402
+ ]
403
+ else:
404
+ raise ValueError("got input that is neither tensor nor feature list")
405
+ return cropped
406
+
407
+ def __len__(self) -> int:
408
+ """Return the total number of patches in the dataset."""
409
+ return len(self.patches)
410
+
411
+ def __getitem__(
412
+ self, index: int
413
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
414
+ """Return (input_dict, target_dict, metadata) for a single flattened patch."""
415
+ (window_id, patch_bounds, (patch_idx, num_patches)) = self.patches[index]
416
+ raw_inputs, passthrough_inputs, metadata = self.get_raw_inputs(window_id)
417
+ bounds = metadata["bounds"]
418
+
419
+ cur_geom = STGeometry(metadata["projection"], shapely.box(*patch_bounds), None)
420
+ start_offset = (patch_bounds[0] - bounds[0], patch_bounds[1] - bounds[1])
421
+ end_offset = (patch_bounds[2] - bounds[0], patch_bounds[3] - bounds[1])
422
+
423
+ cur_raw_inputs = self._crop_input_dict(
424
+ raw_inputs, start_offset, end_offset, cur_geom
425
+ )
426
+ cur_passthrough_inputs = self._crop_input_dict(
427
+ passthrough_inputs, start_offset, end_offset, cur_geom
428
+ )
429
+
430
+ # Adjust the metadata as well.
431
+ cur_metadata = metadata.copy()
432
+ cur_metadata["bounds"] = patch_bounds
433
+ cur_metadata["patch_idx"] = patch_idx
434
+ cur_metadata["num_patches"] = num_patches
435
+
436
+ # Now we can compute input and target dicts via the task.
437
+ input_dict, target_dict = self.dataset.task.process_inputs(
438
+ cur_raw_inputs,
439
+ metadata=cur_metadata,
440
+ load_targets=not self.dataset.split_config.get_skip_targets(),
441
+ )
442
+ input_dict.update(cur_passthrough_inputs)
443
+ input_dict, target_dict = self.dataset.transforms(input_dict, target_dict)
444
+ input_dict["dataset_source"] = self.dataset.name
445
+
446
+ return input_dict, target_dict, cur_metadata
447
+
448
+ def get_dataset_examples(self) -> list[Window]:
449
+ """Returns a list of windows in this dataset."""
450
+ return self.dataset.get_dataset_examples()
451
+
452
+ def set_name(self, name: str) -> None:
453
+ """Sets dataset name.
454
+
455
+ Args:
456
+ name: dataset name
457
+ """
458
+ self.dataset.set_name(name)
@@ -15,10 +15,12 @@ from rslearn.dataset import Dataset
15
15
  from rslearn.log_utils import get_logger
16
16
  from rslearn.train.tasks import Task
17
17
 
18
- from .dataset import (
19
- DataInput,
18
+ from .all_patches_dataset import (
20
19
  InMemoryAllPatchesDataset,
21
20
  IterableAllPatchesDataset,
21
+ )
22
+ from .dataset import (
23
+ DataInput,
22
24
  ModelDataset,
23
25
  MultiDataset,
24
26
  RetryDataset,
rslearn/train/dataset.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """Default Dataset for rslearn."""
2
2
 
3
3
  import hashlib
4
- import itertools
5
4
  import json
6
5
  import multiprocessing
7
6
  import os
@@ -9,10 +8,8 @@ import random
9
8
  import tempfile
10
9
  import time
11
10
  import uuid
12
- from collections.abc import Iterable, Iterator
13
11
  from typing import Any
14
12
 
15
- import shapely
16
13
  import torch
17
14
  import tqdm
18
15
  from rasterio.warp import Resampling
@@ -29,7 +26,7 @@ from rslearn.dataset.window import Window, get_layer_and_group_from_dir_name
29
26
  from rslearn.log_utils import get_logger
30
27
  from rslearn.train.tasks import Task
31
28
  from rslearn.utils.feature import Feature
32
- from rslearn.utils.geometry import PixelBounds, STGeometry
29
+ from rslearn.utils.geometry import PixelBounds
33
30
  from rslearn.utils.mp import star_imap_unordered
34
31
  from rslearn.utils.raster_format import load_raster_format
35
32
  from rslearn.utils.vector_format import load_vector_format
@@ -39,70 +36,14 @@ from .transforms import Sequential
39
36
  logger = get_logger(__name__)
40
37
 
41
38
 
42
- def get_window_patch_options(
43
- patch_size: tuple[int, int],
44
- overlap_size: tuple[int, int],
45
- bounds: PixelBounds,
46
- ) -> list[PixelBounds]:
47
- """Get the bounds of each patch within the overall bounds.
48
-
49
- Args:
50
- patch_size: the size of the patches to extract.
51
- overlap_size: the size of the overlap between patches.
52
- bounds: the window bounds to divide up into smaller patches.
53
-
54
- Returns:
55
- a list of patch bounds within the overall bounds. The rightmost and
56
- bottommost patches may extend beyond the provided bounds.
57
- """
58
- # We stride the patches by patch_size - overlap_size until the last patch.
59
- # We handle the last patch with a special case to ensure it does not exceed the
60
- # window bounds. Instead, it may overlap the previous patch.
61
- cols = list(
62
- range(
63
- bounds[0],
64
- bounds[2] - patch_size[0],
65
- patch_size[0] - overlap_size[0],
66
- )
67
- ) + [bounds[2] - patch_size[0]]
68
- rows = list(
69
- range(
70
- bounds[1],
71
- bounds[3] - patch_size[1],
72
- patch_size[1] - overlap_size[1],
73
- )
74
- ) + [bounds[3] - patch_size[1]]
75
-
76
- patch_bounds: list[PixelBounds] = []
77
- for col in cols:
78
- for row in rows:
79
- patch_bounds.append((col, row, col + patch_size[0], row + patch_size[1]))
80
- return patch_bounds
81
-
82
-
83
- def pad_slice_protect(
84
- raw_inputs: dict[str, Any],
85
- passthrough_inputs: dict[str, Any],
86
- patch_size: tuple[int, int],
87
- ) -> tuple[dict[str, Any], dict[str, Any]]:
88
- """Pad tensors in-place by patch size to protect slicing near right/bottom edges.
89
-
90
- Args:
91
- raw_inputs: the raw inputs to pad.
92
- passthrough_inputs: the passthrough inputs to pad.
93
- patch_size: the size of the patches to extract.
94
-
95
- Returns:
96
- a tuple of (raw_inputs, passthrough_inputs).
97
- """
98
- for d in [raw_inputs, passthrough_inputs]:
99
- for input_name, value in list(d.items()):
100
- if not isinstance(value, torch.Tensor):
101
- continue
102
- d[input_name] = torch.nn.functional.pad(
103
- value, pad=(0, patch_size[0], 0, patch_size[1])
104
- )
105
- return raw_inputs, passthrough_inputs
39
+ def get_torch_dtype(dtype: DType) -> torch.dtype:
40
+ """Convert rslearn DType to torch dtype."""
41
+ if dtype == DType.INT32:
42
+ return torch.int32
43
+ elif dtype == DType.FLOAT32:
44
+ return torch.float32
45
+ else:
46
+ raise ValueError(f"unable to handle {dtype} as a torch dtype")
106
47
 
107
48
 
108
49
  class SamplerFactory:
@@ -296,7 +237,7 @@ def read_raster_layer_for_data_input(
296
237
 
297
238
  image = torch.zeros(
298
239
  (len(needed_bands), bounds[3] - bounds[1], bounds[2] - bounds[0]),
299
- dtype=data_input.dtype.get_torch_dtype(),
240
+ dtype=get_torch_dtype(data_input.dtype),
300
241
  )
301
242
 
302
243
  for band_set, src_indexes, dst_indexes in needed_sets_and_indexes:
@@ -893,383 +834,6 @@ class ModelDataset(torch.utils.data.Dataset):
893
834
  self.name = name
894
835
 
895
836
 
896
- class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
897
- """This wraps a ModelDataset to iterate over all patches in that dataset.
898
-
899
- This should be used when SplitConfig.load_all_patches is enabled. The ModelDataset
900
- is configured with no patch size (load entire windows), and the dataset is wrapped
901
- in an AllPatchesDataset.
902
-
903
- Similar to DistributedSampler, we add extra samples at each rank to ensure
904
- consistent number of batches across all ranks.
905
- """
906
-
907
- def __init__(
908
- self,
909
- dataset: ModelDataset,
910
- patch_size: tuple[int, int],
911
- overlap_ratio: float = 0.0,
912
- rank: int = 0,
913
- world_size: int = 1,
914
- ):
915
- """Create a new IterableAllPatchesDataset.
916
-
917
- Args:
918
- dataset: the ModelDataset to wrap.
919
- patch_size: the size of the patches to extract.
920
- overlap_ratio: whether to include overlap between the patches. Note that
921
- the right/bottom-most patches may still overlap since we ensure that
922
- all patches are contained in the window bounds.
923
- rank: the global rank of this train worker process.
924
- world_size: the total number of train worker processes.
925
- """
926
- super().__init__()
927
- self.dataset = dataset
928
- self.patch_size = patch_size
929
- self.overlap_size = (
930
- round(self.patch_size[0] * overlap_ratio),
931
- round(self.patch_size[1] * overlap_ratio),
932
- )
933
- self.rank = rank
934
- self.world_size = world_size
935
- self.windows = self.dataset.get_dataset_examples()
936
-
937
- def set_name(self, name: str) -> None:
938
- """Sets dataset name.
939
-
940
- Args:
941
- name: dataset name
942
- """
943
- self.dataset.set_name(name)
944
-
945
- def get_window_num_patches(self, bounds: PixelBounds) -> int:
946
- """Get the number of patches for these bounds.
947
-
948
- This corresponds to the length of the list returned by get_patch_options.
949
- """
950
- num_cols = (
951
- len(
952
- range(
953
- bounds[0],
954
- bounds[2] - self.patch_size[0],
955
- self.patch_size[0] - self.overlap_size[0],
956
- )
957
- )
958
- + 1
959
- )
960
- num_rows = (
961
- len(
962
- range(
963
- bounds[1],
964
- bounds[3] - self.patch_size[1],
965
- self.patch_size[1] - self.overlap_size[1],
966
- )
967
- )
968
- + 1
969
- )
970
- return num_cols * num_rows
971
-
972
- def _get_worker_iteration_data(self) -> tuple[Iterable[int], int]:
973
- """Get the windows we should iterate over.
974
-
975
- This is split both by training worker (self.rank) and data loader worker (via
976
- get_worker_info).
977
-
978
- We also compute the total number of samples that each data loader worker should
979
- yield. This is important for DDP to ensure that all ranks see the same number
980
- of batches.
981
-
982
- Returns:
983
- a tuple (window_ids, num_samples_per_worker).
984
- """
985
- # Figure out the total number of data loader workers and our worker ID.
986
- worker_info = torch.utils.data.get_worker_info()
987
- if worker_info is None:
988
- worker_id = 0
989
- num_workers = 1
990
- else:
991
- worker_id = worker_info.id
992
- num_workers = worker_info.num_workers
993
- global_worker_id = self.rank * num_workers + worker_id
994
- global_num_workers = self.world_size * num_workers
995
-
996
- # Split up the windows evenly among the workers.
997
- # We compute this for all workers since we will need to see the maximum number
998
- # of samples under this assignment across workers.
999
- window_indexes = range(len(self.windows))
1000
- windows_by_worker = [
1001
- window_indexes[cur_rank :: self.world_size][cur_worker_id::num_workers]
1002
- for cur_rank in range(self.world_size)
1003
- for cur_worker_id in range(num_workers)
1004
- ]
1005
-
1006
- # Now compute the maximum number of samples across workers.
1007
- max_num_patches = 0
1008
- for worker_windows in windows_by_worker:
1009
- worker_num_patches = 0
1010
- for window_id in worker_windows:
1011
- worker_num_patches += self.get_window_num_patches(
1012
- self.windows[window_id].bounds
1013
- )
1014
- max_num_patches = max(max_num_patches, worker_num_patches)
1015
-
1016
- # Each worker needs at least one window, otherwise it won't be able to pad.
1017
- # Unless there are zero windows total, which is fine.
1018
- # Previously we would address this by borrowing the windows from another
1019
- # worker, but this causes issues with RslearnWriter: if we yield the same
1020
- # window from parallel workers, it may end up writing an empty output for that
1021
- # window in the end.
1022
- # So now we raise an error instead, and require the number of workers to be
1023
- # less than the number of windows.
1024
- if len(windows_by_worker[global_worker_id]) == 0 and max_num_patches > 0:
1025
- raise ValueError(
1026
- f"the number of workers {global_num_workers} must be <= the number of windows {len(self.windows)}"
1027
- )
1028
-
1029
- return (windows_by_worker[global_worker_id], max_num_patches)
1030
-
1031
- def __iter__(
1032
- self,
1033
- ) -> Iterator[tuple[dict[str, Any], dict[str, Any], dict[str, Any]]]:
1034
- """Iterate over all patches in each element of the underlying ModelDataset."""
1035
- # Iterate over the window IDs until we have returned enough samples.
1036
- window_ids, num_samples_needed = self._get_worker_iteration_data()
1037
- num_samples_returned = 0
1038
-
1039
- for iteration_idx in itertools.count():
1040
- for window_id in window_ids:
1041
- raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(
1042
- window_id
1043
- )
1044
- bounds = metadata["bounds"]
1045
-
1046
- # For simplicity, pad tensors by patch size to ensure that any patch bounds
1047
- # extending outside the window bounds will not have issues when we slice
1048
- # the tensors later.
1049
- pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
1050
-
1051
- # Now iterate over the patches and extract/yield the crops.
1052
- # Note that, in case user is leveraging RslearnWriter, it is important that
1053
- # the patch_idx be increasing (as we iterate) within one window.
1054
- patches = get_window_patch_options(
1055
- self.patch_size, self.overlap_size, bounds
1056
- )
1057
- for patch_idx, patch_bounds in enumerate(patches):
1058
- cur_geom = STGeometry(
1059
- metadata["projection"], shapely.box(*patch_bounds), None
1060
- )
1061
- start_offset = (
1062
- patch_bounds[0] - bounds[0],
1063
- patch_bounds[1] - bounds[1],
1064
- )
1065
- end_offset = (
1066
- patch_bounds[2] - bounds[0],
1067
- patch_bounds[3] - bounds[1],
1068
- )
1069
-
1070
- # Define a helper function to handle each input dict.
1071
- def crop_input_dict(d: dict[str, Any]) -> dict[str, Any]:
1072
- cropped = {}
1073
- for input_name, value in d.items():
1074
- if isinstance(value, torch.Tensor):
1075
- # Crop the CHW tensor.
1076
- cropped[input_name] = value[
1077
- :,
1078
- start_offset[1] : end_offset[1],
1079
- start_offset[0] : end_offset[0],
1080
- ].clone()
1081
- elif isinstance(value, list):
1082
- cropped[input_name] = [
1083
- feat
1084
- for feat in value
1085
- if cur_geom.intersects(feat.geometry)
1086
- ]
1087
- else:
1088
- raise ValueError(
1089
- "got input that is neither tensor nor feature list"
1090
- )
1091
- return cropped
1092
-
1093
- cur_raw_inputs = crop_input_dict(raw_inputs)
1094
- cur_passthrough_inputs = crop_input_dict(passthrough_inputs)
1095
-
1096
- # Adjust the metadata as well.
1097
- cur_metadata = metadata.copy()
1098
- cur_metadata["bounds"] = patch_bounds
1099
- cur_metadata["patch_idx"] = patch_idx
1100
- cur_metadata["num_patches"] = len(patches)
1101
-
1102
- # Now we can compute input and target dicts via the task.
1103
- input_dict, target_dict = self.dataset.task.process_inputs(
1104
- cur_raw_inputs,
1105
- metadata=cur_metadata,
1106
- load_targets=not self.dataset.split_config.get_skip_targets(),
1107
- )
1108
- input_dict.update(cur_passthrough_inputs)
1109
- input_dict, target_dict = self.dataset.transforms(
1110
- input_dict, target_dict
1111
- )
1112
- input_dict["dataset_source"] = self.dataset.name
1113
-
1114
- if num_samples_returned < num_samples_needed:
1115
- yield input_dict, target_dict, cur_metadata
1116
- num_samples_returned += 1
1117
- else:
1118
- assert iteration_idx > 0
1119
-
1120
- if num_samples_returned >= num_samples_needed:
1121
- break
1122
-
1123
- def get_dataset_examples(self) -> list[Window]:
1124
- """Returns a list of windows in this dataset."""
1125
- return self.dataset.get_dataset_examples()
1126
-
1127
-
1128
- class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
1129
- """This wraps a ModelDataset to iterate over all patches in that dataset.
1130
-
1131
- This should be used when SplitConfig.load_all_patches is enabled.
1132
-
1133
- This is a simpler version of IterableAllPatchesDataset that caches all windows in memory.
1134
- This is useful for small datasets that fit in memory.
1135
- """
1136
-
1137
- def __init__(
1138
- self,
1139
- dataset: ModelDataset,
1140
- patch_size: tuple[int, int],
1141
- overlap_ratio: float = 0.0,
1142
- ):
1143
- """Create a new InMemoryAllPatchesDataset.
1144
-
1145
- Args:
1146
- dataset: the ModelDataset to wrap.
1147
- patch_size: the size of the patches to extract.
1148
- overlap_ratio: whether to include overlap between the patches. Note that
1149
- the right/bottom-most patches may still overlap since we ensure that
1150
- all patches are contained in the window bounds.
1151
- """
1152
- super().__init__()
1153
- self.dataset = dataset
1154
- self.patch_size = patch_size
1155
- self.overlap_size = (
1156
- round(self.patch_size[0] * overlap_ratio),
1157
- round(self.patch_size[1] * overlap_ratio),
1158
- )
1159
- self.windows = self.dataset.get_dataset_examples()
1160
- self.window_cache: dict[
1161
- int, tuple[dict[str, Any], dict[str, Any], dict[str, Any]]
1162
- ] = {}
1163
-
1164
- # Precompute the batch boundaries for each window
1165
- self.patches = []
1166
- for window_id, window in enumerate(self.windows):
1167
- patch_bounds = get_window_patch_options(
1168
- self.patch_size, self.overlap_size, window.bounds
1169
- )
1170
- for i, patch_bound in enumerate(patch_bounds):
1171
- self.patches.append((window_id, patch_bound, (i, len(patch_bounds))))
1172
-
1173
- def get_raw_inputs(
1174
- self, index: int
1175
- ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
1176
- """Get the raw inputs for a single patch. Retrieve from cache if possible.
1177
-
1178
- Also crops/pads the tensors by patch size to protect slicing near right/bottom edges.
1179
-
1180
- Args:
1181
- index: the index of the patch.
1182
-
1183
- Returns:
1184
- a tuple of (raw_inputs, passthrough_inputs, metadata).
1185
- """
1186
- if index in self.window_cache:
1187
- return self.window_cache[index]
1188
-
1189
- raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(index)
1190
- pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
1191
-
1192
- self.window_cache[index] = (raw_inputs, passthrough_inputs, metadata)
1193
- return self.window_cache[index]
1194
-
1195
- @staticmethod
1196
- def _crop_input_dict(
1197
- d: dict[str, Any],
1198
- start_offset: tuple[int, int],
1199
- end_offset: tuple[int, int],
1200
- cur_geom: STGeometry,
1201
- ) -> dict[str, Any]:
1202
- """Crop a dictionary of inputs to the given bounds."""
1203
- cropped = {}
1204
- for input_name, value in d.items():
1205
- if isinstance(value, torch.Tensor):
1206
- cropped[input_name] = value[
1207
- :,
1208
- start_offset[1] : end_offset[1],
1209
- start_offset[0] : end_offset[0],
1210
- ].clone()
1211
- elif isinstance(value, list):
1212
- cropped[input_name] = [
1213
- feat for feat in value if cur_geom.intersects(feat.geometry)
1214
- ]
1215
- else:
1216
- raise ValueError("got input that is neither tensor nor feature list")
1217
- return cropped
1218
-
1219
- def __len__(self) -> int:
1220
- """Return the total number of patches in the dataset."""
1221
- return len(self.patches)
1222
-
1223
- def __getitem__(
1224
- self, index: int
1225
- ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
1226
- """Return (input_dict, target_dict, metadata) for a single flattened patch."""
1227
- (window_id, patch_bounds, (patch_idx, num_patches)) = self.patches[index]
1228
- raw_inputs, passthrough_inputs, metadata = self.get_raw_inputs(window_id)
1229
- bounds = metadata["bounds"]
1230
-
1231
- cur_geom = STGeometry(metadata["projection"], shapely.box(*patch_bounds), None)
1232
- start_offset = (patch_bounds[0] - bounds[0], patch_bounds[1] - bounds[1])
1233
- end_offset = (patch_bounds[2] - bounds[0], patch_bounds[3] - bounds[1])
1234
-
1235
- cur_raw_inputs = self._crop_input_dict(
1236
- raw_inputs, start_offset, end_offset, cur_geom
1237
- )
1238
- cur_passthrough_inputs = self._crop_input_dict(
1239
- passthrough_inputs, start_offset, end_offset, cur_geom
1240
- )
1241
-
1242
- # Adjust the metadata as well.
1243
- cur_metadata = metadata.copy()
1244
- cur_metadata["bounds"] = patch_bounds
1245
- cur_metadata["patch_idx"] = patch_idx
1246
- cur_metadata["num_patches"] = num_patches
1247
-
1248
- # Now we can compute input and target dicts via the task.
1249
- input_dict, target_dict = self.dataset.task.process_inputs(
1250
- cur_raw_inputs,
1251
- metadata=cur_metadata,
1252
- load_targets=not self.dataset.split_config.get_skip_targets(),
1253
- )
1254
- input_dict.update(cur_passthrough_inputs)
1255
- input_dict, target_dict = self.dataset.transforms(input_dict, target_dict)
1256
- input_dict["dataset_source"] = self.dataset.name
1257
-
1258
- return input_dict, target_dict, cur_metadata
1259
-
1260
- def get_dataset_examples(self) -> list[Window]:
1261
- """Returns a list of windows in this dataset."""
1262
- return self.dataset.get_dataset_examples()
1263
-
1264
- def set_name(self, name: str) -> None:
1265
- """Sets dataset name.
1266
-
1267
- Args:
1268
- name: dataset name
1269
- """
1270
- self.dataset.set_name(name)
1271
-
1272
-
1273
837
  class RetryDataset(torch.utils.data.Dataset):
1274
838
  """A dataset wrapper that retries getitem upon encountering error."""
1275
839
 
rslearn/utils/array.py CHANGED
@@ -1,14 +1,16 @@
1
1
  """Array util functions."""
2
2
 
3
- from typing import Any
3
+ from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import numpy.typing as npt
6
- import torch
6
+
7
+ if TYPE_CHECKING:
8
+ import torch
7
9
 
8
10
 
9
11
  def copy_spatial_array(
10
- src: torch.Tensor | npt.NDArray[Any],
11
- dst: torch.Tensor | npt.NDArray[Any],
12
+ src: "torch.Tensor | npt.NDArray[Any]",
13
+ dst: "torch.Tensor | npt.NDArray[Any]",
12
14
  src_offset: tuple[int, int],
13
15
  dst_offset: tuple[int, int],
14
16
  ) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rslearn
3
- Version: 0.0.13
3
+ Version: 0.0.14
4
4
  Summary: A library for developing remote sensing datasets and models
5
5
  Author: OlmoEarth Team
6
6
  License: Apache License
@@ -1,12 +1,13 @@
1
1
  rslearn/__init__.py,sha256=fFmAen3vxZyosEfPbG0W46IttujYGVxzrGkJ0YutmmY,73
2
2
  rslearn/arg_parser.py,sha256=GNlJncO6Ck_dCNrcg7z_SSG61I-2gKn3Ix2tAxIk9CI,1428
3
3
  rslearn/const.py,sha256=FUCfsvFAs-QarEDJ0grdy0C1HjUjLpNFYGo5I2Vpc5Y,449
4
+ rslearn/lightning_cli.py,sha256=io1Agb2fr-fUu9yOODNJhP8-vJp_v9UbJJA2hkLubKA,2435
4
5
  rslearn/log_utils.py,sha256=unD9gShiuO7cx5Nnq8qqVQ4qrbOOwFVgcHxN5bXuiAo,941
5
- rslearn/main.py,sha256=fLYmm2ZsUTCaJBKZvxu3pc4fB2thaf-p2Qv0AifDlXM,31292
6
+ rslearn/main.py,sha256=JMNMhAHqpb9bDUoKzj6kN659Ft_-gZv_rKUieJcJNwI,29087
6
7
  rslearn/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  rslearn/template_params.py,sha256=Vop0Ha-S44ctCa9lvSZRjrMETznJZlR5y_gJrVIwrPg,791
8
9
  rslearn/config/__init__.py,sha256=Bhf2VVncdMYRC8Wfb4GsJJ13OAJYNCO_ODLSNTmBOHM,638
9
- rslearn/config/dataset.py,sha256=VpXUGKCr45kzE-W27rgF4tPQuyICfwQkJTxb2z9aXQM,21685
10
+ rslearn/config/dataset.py,sha256=lIuFgJG0Hz7nxacFIpbwOyNJqjlkOlaMfWt91Chjb_M,21338
10
11
  rslearn/data_sources/__init__.py,sha256=8_7Pi3agKsatNoxXw74-U5G-QAP-rbdfcH8EkZfJbH4,1449
11
12
  rslearn/data_sources/aws_landsat.py,sha256=GA9H04KagBDm-N37jFdh_aHCX2ZneVdnqT1SNOyAwTs,20829
12
13
  rslearn/data_sources/aws_open_data.py,sha256=nU_D5cqc-wibxq4uyUNb0z-XD0Puf1gZ8v5FMiMAN5w,30258
@@ -107,8 +108,9 @@ rslearn/tile_stores/__init__.py,sha256=o_tWVKu6UwFzZbO9jn_3cmIDqc_Q3qDd6tA9If0T_
107
108
  rslearn/tile_stores/default.py,sha256=PYaDNvBxhJTDKJGw0EjDTSE1OKajR7_iJpMbOjj-mE8,15054
108
109
  rslearn/tile_stores/tile_store.py,sha256=9AeYduDYPp_Ia2NMlq6osptpz_AFGIOQcLJrqZ_m-z0,10469
109
110
  rslearn/train/__init__.py,sha256=fnJyY4aHs5zQqbDKSfXsJZXY_M9fbTsf7dRYaPwZr2M,30
110
- rslearn/train/data_module.py,sha256=K-nQgnOZn-KGq_G2pVOQFtWRrlWih0212i_bkXZ2bEE,23515
111
- rslearn/train/dataset.py,sha256=YiskNlYYcKqZxyw0Xzop1RGLbjMc-oK_rmhrSMVbTQg,51857
111
+ rslearn/train/all_patches_dataset.py,sha256=xFJ96HU3CodrUBzXTsgrmEShosKH79T2SxI0xDVSH3Q,18217
112
+ rslearn/train/data_module.py,sha256=pgut8rEWHIieZ7RR8dUvhtlNqk0egEdznYF3tCvqdHg,23552
113
+ rslearn/train/dataset.py,sha256=8F3bpus25g_NG0-CwMCuznwKxOvBDClNBCOEvDbMyN8,34312
112
114
  rslearn/train/lightning_module.py,sha256=ZLBiId3secUlVs2yzkN-mwVv4rMdh5TkdZYl4vv_Cw0,14466
113
115
  rslearn/train/optimizer.py,sha256=EKSqkmERalDA0bF32Gey7n6z69KLyaUWKlRsGJfKBmE,927
114
116
  rslearn/train/prediction_writer.py,sha256=mDvREwEB5k5_tNuBnYIvAGnxS3sYFWQYvV07V3UEe2k,14106
@@ -138,7 +140,7 @@ rslearn/train/transforms/select_bands.py,sha256=uDfD9G8Z4VTt88QZsjj1FB20QEmzSefh
138
140
  rslearn/train/transforms/sentinel1.py,sha256=FrLaYZs2AjqWQCun8DTFtgo1l0xLxqaFKtDNIehtpDg,1913
139
141
  rslearn/train/transforms/transform.py,sha256=n1Qzqix2dVvej-Q7iPzHeOQbqH79IBlvqPoymxhNVpE,4446
140
142
  rslearn/utils/__init__.py,sha256=GNvdTUmXakiEMnLdje7k1fe5aC7SFVqP757kbpN6Fzw,558
141
- rslearn/utils/array.py,sha256=JwZi7o0uj-dftREzJmqrRVR2joIwBikm3Er9KeHVIZU,2402
143
+ rslearn/utils/array.py,sha256=RC7ygtPnQwU6Lb9kwORvNxatJcaJ76JPsykQvndAfes,2444
142
144
  rslearn/utils/feature.py,sha256=lsg0WThZDJzo1mrbaL04dXYI5G3x-n5FG9aEjj7uUaI,1649
143
145
  rslearn/utils/fsspec.py,sha256=9QwN46heBhjUnth3qFeRNE3W6Wlr6dM3twYVswPnS9o,5300
144
146
  rslearn/utils/geometry.py,sha256=oZllq1aBFcDewTTDYAMnTeP1xR0EdB5Xz3ILmfASo-8,18455
@@ -152,10 +154,10 @@ rslearn/utils/spatial_index.py,sha256=eomJAUgzmjir8j9HZnSgQoJHwN9H0wGTjmJkMkLLfs
152
154
  rslearn/utils/sqlite_index.py,sha256=YGOJi66544e6JNtfSft6YIlHklFdSJO2duxQ4TJ2iu4,2920
153
155
  rslearn/utils/time.py,sha256=2ilSLG94_sxLP3y5RSV5L5CG8CoND_dbdzYEHVtN-I8,387
154
156
  rslearn/utils/vector_format.py,sha256=EIChYCL6GLOILS2TO2JBkca1TuaWsSubWv6iRS3P2ds,16139
155
- rslearn-0.0.13.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
156
- rslearn-0.0.13.dist-info/licenses/NOTICE,sha256=wLPr6rwV_jCg-xEknNGwhnkfRfuoOE9MZ-lru2yZyLI,5070
157
- rslearn-0.0.13.dist-info/METADATA,sha256=44oDmbvkIrjJ0unVNaYeO5OypD6RavmG7l5HUz9Re48,36319
158
- rslearn-0.0.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- rslearn-0.0.13.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
160
- rslearn-0.0.13.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
161
- rslearn-0.0.13.dist-info/RECORD,,
157
+ rslearn-0.0.14.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
158
+ rslearn-0.0.14.dist-info/licenses/NOTICE,sha256=wLPr6rwV_jCg-xEknNGwhnkfRfuoOE9MZ-lru2yZyLI,5070
159
+ rslearn-0.0.14.dist-info/METADATA,sha256=Jbm6ySbM4gkT_5o-RWbRr8APS8TYXq3Q-bWyeda-Uc8,36319
160
+ rslearn-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
161
+ rslearn-0.0.14.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
162
+ rslearn-0.0.14.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
163
+ rslearn-0.0.14.dist-info/RECORD,,