rslearn 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rslearn/dataset/handler_summaries.py +130 -0
- rslearn/dataset/manage.py +157 -22
- rslearn/main.py +60 -8
- rslearn/models/anysat.py +207 -0
- rslearn/models/clay/clay.py +219 -0
- rslearn/models/clay/configs/metadata.yaml +295 -0
- rslearn/models/copernicusfm.py +37 -25
- rslearn/models/dinov3.py +165 -0
- rslearn/models/galileo/__init__.py +5 -0
- rslearn/models/galileo/galileo.py +517 -0
- rslearn/models/galileo/single_file_galileo.py +1672 -0
- rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
- rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
- rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
- rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
- rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
- rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
- rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
- rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
- rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
- rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
- rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
- rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
- rslearn/models/presto/presto.py +10 -7
- rslearn/models/prithvi.py +1122 -0
- rslearn/models/resize_features.py +45 -0
- rslearn/models/simple_time_series.py +65 -10
- rslearn/models/unet.py +17 -11
- rslearn/models/upsample.py +2 -2
- rslearn/tile_stores/default.py +31 -6
- rslearn/train/transforms/normalize.py +34 -5
- rslearn/train/transforms/select_bands.py +67 -0
- rslearn/train/transforms/sentinel1.py +60 -0
- rslearn/utils/geometry.py +61 -1
- rslearn/utils/raster_format.py +7 -1
- rslearn/utils/vector_format.py +13 -10
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/METADATA +144 -15
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/RECORD +42 -18
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/WHEEL +0 -0
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/entry_points.txt +0 -0
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/licenses/LICENSE +0 -0
- {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""This module contains dataclasses for summarizing the results of dataset operations.
|
|
2
|
+
|
|
3
|
+
They can be used by callers to emit telemetry / logs, or discarded.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class LayerPrepareSummary:
|
|
11
|
+
"""Results for preparing a single layer."""
|
|
12
|
+
|
|
13
|
+
# Identity
|
|
14
|
+
layer_name: str
|
|
15
|
+
data_source_name: str
|
|
16
|
+
|
|
17
|
+
# Timing
|
|
18
|
+
duration_seconds: float
|
|
19
|
+
|
|
20
|
+
# Counts
|
|
21
|
+
windows_prepared: int
|
|
22
|
+
windows_skipped: int
|
|
23
|
+
get_items_attempts: int
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class PrepareDatasetWindowsSummary:
|
|
28
|
+
"""Results from prepare_dataset_windows operation for telemetry purposes."""
|
|
29
|
+
|
|
30
|
+
# Timing
|
|
31
|
+
duration_seconds: float
|
|
32
|
+
|
|
33
|
+
# Counts
|
|
34
|
+
total_windows_requested: int
|
|
35
|
+
|
|
36
|
+
# Per-layer summaries
|
|
37
|
+
layer_summaries: list[LayerPrepareSummary]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class IngestCounts:
|
|
42
|
+
"""Known ingestion counts."""
|
|
43
|
+
|
|
44
|
+
items_ingested: int
|
|
45
|
+
geometries_ingested: int
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class UnknownIngestCounts:
|
|
50
|
+
"""Indicates ingestion counts are unknown due to partial failure."""
|
|
51
|
+
|
|
52
|
+
items_attempted: int
|
|
53
|
+
geometries_attempted: int
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class LayerIngestSummary:
|
|
58
|
+
"""Results for ingesting a single layer."""
|
|
59
|
+
|
|
60
|
+
# Identity
|
|
61
|
+
layer_name: str
|
|
62
|
+
data_source_name: str
|
|
63
|
+
|
|
64
|
+
# Timing
|
|
65
|
+
duration_seconds: float
|
|
66
|
+
|
|
67
|
+
# Counts - either known or unknown
|
|
68
|
+
ingest_counts: IngestCounts | UnknownIngestCounts
|
|
69
|
+
ingest_attempts: int
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class IngestDatasetJobsSummary:
|
|
74
|
+
"""Results from ingesting a set of jobs; for telemetry purposes."""
|
|
75
|
+
|
|
76
|
+
# Timing
|
|
77
|
+
duration_seconds: float
|
|
78
|
+
|
|
79
|
+
# Counts
|
|
80
|
+
num_jobs: int
|
|
81
|
+
|
|
82
|
+
# Per-layer summaries
|
|
83
|
+
layer_summaries: list[LayerIngestSummary]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class MaterializeWindowLayerSummary:
|
|
88
|
+
"""Results for materializing a single window layer."""
|
|
89
|
+
|
|
90
|
+
skipped: bool
|
|
91
|
+
materialize_attempts: int
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class MaterializeWindowLayersSummary:
|
|
96
|
+
"""Results for materialize a given layer for all windows in a materialize call."""
|
|
97
|
+
|
|
98
|
+
# Identity
|
|
99
|
+
layer_name: str
|
|
100
|
+
data_source_name: str
|
|
101
|
+
|
|
102
|
+
# Timing
|
|
103
|
+
duration_seconds: float
|
|
104
|
+
|
|
105
|
+
# Counts
|
|
106
|
+
total_windows_requested: int
|
|
107
|
+
num_windows_materialized: int
|
|
108
|
+
materialize_attempts: int
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class MaterializeDatasetWindowsSummary:
|
|
113
|
+
"""Results from materialize_dataset_windows operation for telemetry purposes."""
|
|
114
|
+
|
|
115
|
+
# Timing
|
|
116
|
+
duration_seconds: float
|
|
117
|
+
|
|
118
|
+
# Counts
|
|
119
|
+
total_windows_requested: int
|
|
120
|
+
|
|
121
|
+
# Per-layer summaries
|
|
122
|
+
layer_summaries: list[MaterializeWindowLayersSummary]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class ErrorOutcome:
|
|
127
|
+
"""TBD what goes in here, if anything."""
|
|
128
|
+
|
|
129
|
+
# Timing
|
|
130
|
+
duration_seconds: float
|
rslearn/dataset/manage.py
CHANGED
|
@@ -13,6 +13,13 @@ from rslearn.config import (
|
|
|
13
13
|
RasterLayerConfig,
|
|
14
14
|
)
|
|
15
15
|
from rslearn.data_sources import DataSource, Item
|
|
16
|
+
from rslearn.dataset.handler_summaries import (
|
|
17
|
+
LayerPrepareSummary,
|
|
18
|
+
MaterializeDatasetWindowsSummary,
|
|
19
|
+
MaterializeWindowLayersSummary,
|
|
20
|
+
MaterializeWindowLayerSummary,
|
|
21
|
+
PrepareDatasetWindowsSummary,
|
|
22
|
+
)
|
|
16
23
|
from rslearn.log_utils import get_logger
|
|
17
24
|
from rslearn.tile_stores import TileStore, get_tile_store_with_layer
|
|
18
25
|
|
|
@@ -23,7 +30,24 @@ from .window import Window, WindowLayerData
|
|
|
23
30
|
logger = get_logger(__name__)
|
|
24
31
|
|
|
25
32
|
|
|
26
|
-
|
|
33
|
+
class AttemptsCounter:
|
|
34
|
+
"""A simple counter for tracking attempts (including initial attempt and retries)."""
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
"""Initialize counter with value 0."""
|
|
38
|
+
self.value = 0
|
|
39
|
+
|
|
40
|
+
def increment(self) -> None:
|
|
41
|
+
"""Increment the counter by 1."""
|
|
42
|
+
self.value += 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def retry(
|
|
46
|
+
fn: Callable,
|
|
47
|
+
retry_max_attempts: int,
|
|
48
|
+
retry_backoff: timedelta,
|
|
49
|
+
attempts_counter: AttemptsCounter | None = None,
|
|
50
|
+
) -> Any:
|
|
27
51
|
"""Retry the function multiple times in case of error.
|
|
28
52
|
|
|
29
53
|
The function is retried until either the attempts are exhausted, or the function
|
|
@@ -37,8 +61,11 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
|
|
|
37
61
|
retries. The actual time is (retry_backoff * attempts) * r, where r is a
|
|
38
62
|
random number between 1 and 2, and attempts is the number of attempts tried
|
|
39
63
|
so far.
|
|
64
|
+
attempts_counter: an optional counter to increment for each attempt
|
|
40
65
|
"""
|
|
41
66
|
for attempt_idx in range(retry_max_attempts):
|
|
67
|
+
if attempts_counter:
|
|
68
|
+
attempts_counter.increment()
|
|
42
69
|
try:
|
|
43
70
|
return fn()
|
|
44
71
|
except Exception as e:
|
|
@@ -47,6 +74,8 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
|
|
|
47
74
|
time.sleep(sleep_base_seconds * (1 + random.random()))
|
|
48
75
|
|
|
49
76
|
# Last attempt. This time we don't catch the exception.
|
|
77
|
+
if attempts_counter:
|
|
78
|
+
attempts_counter.increment()
|
|
50
79
|
return fn()
|
|
51
80
|
|
|
52
81
|
|
|
@@ -56,7 +85,7 @@ def prepare_dataset_windows(
|
|
|
56
85
|
force: bool = False,
|
|
57
86
|
retry_max_attempts: int = 0,
|
|
58
87
|
retry_backoff: timedelta = timedelta(minutes=1),
|
|
59
|
-
) ->
|
|
88
|
+
) -> PrepareDatasetWindowsSummary:
|
|
60
89
|
"""Prepare windows in a dataset.
|
|
61
90
|
|
|
62
91
|
Preparing a window involves looking up items corresponding to the window in each of
|
|
@@ -70,10 +99,28 @@ def prepare_dataset_windows(
|
|
|
70
99
|
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
71
100
|
case of error.
|
|
72
101
|
retry_backoff: how long to wait before retrying (see retry).
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
a summary of the prepare operation, fit for telemetry purposes
|
|
73
105
|
"""
|
|
106
|
+
start_time = time.monotonic()
|
|
107
|
+
layer_summaries: list[LayerPrepareSummary] = []
|
|
108
|
+
|
|
74
109
|
# Iterate over retrieved layers, and prepare each one.
|
|
75
110
|
for layer_name, layer_cfg in dataset.layers.items():
|
|
111
|
+
layer_start_time = time.monotonic()
|
|
112
|
+
|
|
76
113
|
if not layer_cfg.data_source:
|
|
114
|
+
layer_summaries.append(
|
|
115
|
+
LayerPrepareSummary(
|
|
116
|
+
layer_name=layer_name,
|
|
117
|
+
data_source_name="N/A",
|
|
118
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
119
|
+
windows_prepared=0,
|
|
120
|
+
windows_skipped=len(windows),
|
|
121
|
+
get_items_attempts=0,
|
|
122
|
+
)
|
|
123
|
+
)
|
|
77
124
|
continue
|
|
78
125
|
data_source_cfg = layer_cfg.data_source
|
|
79
126
|
|
|
@@ -85,7 +132,18 @@ def prepare_dataset_windows(
|
|
|
85
132
|
continue
|
|
86
133
|
needed_windows.append(window)
|
|
87
134
|
logger.info(f"Preparing {len(needed_windows)} windows for layer {layer_name}")
|
|
135
|
+
|
|
88
136
|
if len(needed_windows) == 0:
|
|
137
|
+
layer_summaries.append(
|
|
138
|
+
LayerPrepareSummary(
|
|
139
|
+
layer_name=layer_name,
|
|
140
|
+
data_source_name=data_source_cfg.name,
|
|
141
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
142
|
+
windows_prepared=0,
|
|
143
|
+
windows_skipped=len(windows),
|
|
144
|
+
get_items_attempts=0,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
89
147
|
continue
|
|
90
148
|
|
|
91
149
|
# Create data source after checking for at least one window so it can be fast
|
|
@@ -115,10 +173,12 @@ def prepare_dataset_windows(
|
|
|
115
173
|
|
|
116
174
|
geometries.append(geometry)
|
|
117
175
|
|
|
176
|
+
attempts_counter = AttemptsCounter()
|
|
118
177
|
results = retry(
|
|
119
178
|
fn=lambda: data_source.get_items(geometries, data_source_cfg.query_config),
|
|
120
179
|
retry_max_attempts=retry_max_attempts,
|
|
121
180
|
retry_backoff=retry_backoff,
|
|
181
|
+
attempts_counter=attempts_counter,
|
|
122
182
|
)
|
|
123
183
|
|
|
124
184
|
for window, result in zip(needed_windows, results):
|
|
@@ -131,6 +191,25 @@ def prepare_dataset_windows(
|
|
|
131
191
|
)
|
|
132
192
|
window.save_layer_datas(layer_datas)
|
|
133
193
|
|
|
194
|
+
layer_summaries.append(
|
|
195
|
+
LayerPrepareSummary(
|
|
196
|
+
layer_name=layer_name,
|
|
197
|
+
data_source_name=data_source_cfg.name,
|
|
198
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
199
|
+
windows_prepared=len(needed_windows), # we assume all have succeeded
|
|
200
|
+
windows_skipped=len(windows) - len(needed_windows),
|
|
201
|
+
get_items_attempts=attempts_counter.value,
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
summary = PrepareDatasetWindowsSummary(
|
|
206
|
+
duration_seconds=time.monotonic() - start_time,
|
|
207
|
+
total_windows_requested=len(windows),
|
|
208
|
+
layer_summaries=layer_summaries,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return summary
|
|
212
|
+
|
|
134
213
|
|
|
135
214
|
def ingest_dataset_windows(
|
|
136
215
|
dataset: Dataset,
|
|
@@ -251,7 +330,7 @@ def materialize_window(
|
|
|
251
330
|
layer_cfg: LayerConfig,
|
|
252
331
|
retry_max_attempts: int = 0,
|
|
253
332
|
retry_backoff: timedelta = timedelta(minutes=1),
|
|
254
|
-
) ->
|
|
333
|
+
) -> MaterializeWindowLayerSummary:
|
|
255
334
|
"""Materialize a window.
|
|
256
335
|
|
|
257
336
|
Args:
|
|
@@ -264,10 +343,16 @@ def materialize_window(
|
|
|
264
343
|
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
265
344
|
case of error.
|
|
266
345
|
retry_backoff: how long to wait before retrying (see retry).
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
a summary of the materialize operation, fit for telemetry purposes
|
|
267
349
|
"""
|
|
268
350
|
# Check if layer is materialized already.
|
|
269
351
|
if window.is_layer_completed(layer_name):
|
|
270
|
-
return
|
|
352
|
+
return MaterializeWindowLayerSummary(
|
|
353
|
+
skipped=True,
|
|
354
|
+
materialize_attempts=0,
|
|
355
|
+
)
|
|
271
356
|
|
|
272
357
|
layer_datas = window.load_layer_datas()
|
|
273
358
|
if layer_name not in layer_datas:
|
|
@@ -276,7 +361,11 @@ def materialize_window(
|
|
|
276
361
|
layer_name,
|
|
277
362
|
window.name,
|
|
278
363
|
)
|
|
279
|
-
return
|
|
364
|
+
return MaterializeWindowLayerSummary(
|
|
365
|
+
skipped=True,
|
|
366
|
+
materialize_attempts=0,
|
|
367
|
+
)
|
|
368
|
+
|
|
280
369
|
layer_data = layer_datas[layer_name]
|
|
281
370
|
item_groups = []
|
|
282
371
|
for serialized_group in layer_data.serialized_item_groups:
|
|
@@ -288,6 +377,8 @@ def materialize_window(
|
|
|
288
377
|
|
|
289
378
|
if layer_cfg.data_source is None:
|
|
290
379
|
raise ValueError("data_source is required")
|
|
380
|
+
|
|
381
|
+
attempts_counter = AttemptsCounter()
|
|
291
382
|
if layer_cfg.data_source.ingest:
|
|
292
383
|
if not is_window_ingested(dataset, window, check_layer_name=layer_name):
|
|
293
384
|
logger.info(
|
|
@@ -295,9 +386,12 @@ def materialize_window(
|
|
|
295
386
|
layer_name,
|
|
296
387
|
window.name,
|
|
297
388
|
)
|
|
298
|
-
return
|
|
389
|
+
return MaterializeWindowLayerSummary(
|
|
390
|
+
skipped=True,
|
|
391
|
+
materialize_attempts=0,
|
|
392
|
+
)
|
|
299
393
|
|
|
300
|
-
|
|
394
|
+
logger.info(
|
|
301
395
|
f"Materializing {len(item_groups)} item groups in layer {layer_name} from tile store"
|
|
302
396
|
)
|
|
303
397
|
|
|
@@ -316,11 +410,12 @@ def materialize_window(
|
|
|
316
410
|
),
|
|
317
411
|
retry_max_attempts=retry_max_attempts,
|
|
318
412
|
retry_backoff=retry_backoff,
|
|
413
|
+
attempts_counter=attempts_counter,
|
|
319
414
|
)
|
|
320
415
|
|
|
321
416
|
else:
|
|
322
417
|
# This window is meant to be materialized directly from the data source.
|
|
323
|
-
|
|
418
|
+
logger.info(
|
|
324
419
|
f"Materializing {len(item_groups)} item groups in layer {layer_name} via data source"
|
|
325
420
|
)
|
|
326
421
|
retry(
|
|
@@ -329,15 +424,21 @@ def materialize_window(
|
|
|
329
424
|
),
|
|
330
425
|
retry_max_attempts=retry_max_attempts,
|
|
331
426
|
retry_backoff=retry_backoff,
|
|
427
|
+
attempts_counter=attempts_counter,
|
|
332
428
|
)
|
|
333
429
|
|
|
430
|
+
return MaterializeWindowLayerSummary(
|
|
431
|
+
skipped=False,
|
|
432
|
+
materialize_attempts=attempts_counter.value,
|
|
433
|
+
)
|
|
434
|
+
|
|
334
435
|
|
|
335
436
|
def materialize_dataset_windows(
|
|
336
437
|
dataset: Dataset,
|
|
337
438
|
windows: list[Window],
|
|
338
439
|
retry_max_attempts: int = 0,
|
|
339
440
|
retry_backoff: timedelta = timedelta(minutes=1),
|
|
340
|
-
) ->
|
|
441
|
+
) -> MaterializeDatasetWindowsSummary:
|
|
341
442
|
"""Materialize items for retrieved layers in a dataset.
|
|
342
443
|
|
|
343
444
|
The portions of items corresponding to dataset windows are extracted from the tile
|
|
@@ -349,24 +450,58 @@ def materialize_dataset_windows(
|
|
|
349
450
|
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
350
451
|
case of error.
|
|
351
452
|
retry_backoff: how long to wait before retrying (see retry).
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
a summary of the materialize operation, fit for telemetry purposes
|
|
352
456
|
"""
|
|
457
|
+
start_time = time.monotonic()
|
|
458
|
+
|
|
459
|
+
layer_summaries: list[MaterializeWindowLayersSummary] = []
|
|
460
|
+
|
|
353
461
|
tile_store = dataset.get_tile_store()
|
|
354
462
|
for layer_name, layer_cfg in dataset.layers.items():
|
|
463
|
+
layer_start_time = time.monotonic()
|
|
464
|
+
|
|
465
|
+
total_materialize_attempts = 0
|
|
466
|
+
total_skipped = 0
|
|
467
|
+
data_source_name = "N/A"
|
|
468
|
+
|
|
355
469
|
if not layer_cfg.data_source:
|
|
356
|
-
|
|
470
|
+
total_skipped = len(windows)
|
|
471
|
+
else:
|
|
472
|
+
data_source_name = layer_cfg.data_source.name
|
|
473
|
+
data_source = rslearn.data_sources.data_source_from_config(
|
|
474
|
+
layer_cfg, dataset.path
|
|
475
|
+
)
|
|
357
476
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
477
|
+
for window in windows:
|
|
478
|
+
window_summary = materialize_window(
|
|
479
|
+
window=window,
|
|
480
|
+
dataset=dataset,
|
|
481
|
+
data_source=data_source,
|
|
482
|
+
tile_store=tile_store,
|
|
483
|
+
layer_name=layer_name,
|
|
484
|
+
layer_cfg=layer_cfg,
|
|
485
|
+
retry_max_attempts=retry_max_attempts,
|
|
486
|
+
retry_backoff=retry_backoff,
|
|
487
|
+
)
|
|
488
|
+
total_materialize_attempts += window_summary.materialize_attempts
|
|
489
|
+
if window_summary.skipped:
|
|
490
|
+
total_skipped += 1
|
|
361
491
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
window=window,
|
|
365
|
-
dataset=dataset,
|
|
366
|
-
data_source=data_source,
|
|
367
|
-
tile_store=tile_store,
|
|
492
|
+
layer_summaries.append(
|
|
493
|
+
MaterializeWindowLayersSummary(
|
|
368
494
|
layer_name=layer_name,
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
495
|
+
data_source_name=data_source_name,
|
|
496
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
497
|
+
total_windows_requested=len(windows),
|
|
498
|
+
num_windows_materialized=len(windows) - total_skipped,
|
|
499
|
+
materialize_attempts=total_materialize_attempts,
|
|
372
500
|
)
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
return MaterializeDatasetWindowsSummary(
|
|
504
|
+
duration_seconds=time.monotonic() - start_time,
|
|
505
|
+
total_windows_requested=len(windows),
|
|
506
|
+
layer_summaries=layer_summaries,
|
|
507
|
+
)
|
rslearn/main.py
CHANGED
|
@@ -4,6 +4,7 @@ import argparse
|
|
|
4
4
|
import multiprocessing
|
|
5
5
|
import random
|
|
6
6
|
import sys
|
|
7
|
+
import time
|
|
7
8
|
from collections.abc import Callable
|
|
8
9
|
from datetime import UTC, datetime, timedelta
|
|
9
10
|
from typing import Any, TypeVar
|
|
@@ -19,8 +20,18 @@ from rslearn.const import WGS84_EPSG
|
|
|
19
20
|
from rslearn.data_sources import Item, data_source_from_config
|
|
20
21
|
from rslearn.dataset import Dataset, Window, WindowLayerData
|
|
21
22
|
from rslearn.dataset.add_windows import add_windows_from_box, add_windows_from_file
|
|
23
|
+
from rslearn.dataset.handler_summaries import (
|
|
24
|
+
ErrorOutcome,
|
|
25
|
+
IngestCounts,
|
|
26
|
+
IngestDatasetJobsSummary,
|
|
27
|
+
LayerIngestSummary,
|
|
28
|
+
MaterializeDatasetWindowsSummary,
|
|
29
|
+
PrepareDatasetWindowsSummary,
|
|
30
|
+
UnknownIngestCounts,
|
|
31
|
+
)
|
|
22
32
|
from rslearn.dataset.index import DatasetIndex
|
|
23
33
|
from rslearn.dataset.manage import (
|
|
34
|
+
AttemptsCounter,
|
|
24
35
|
materialize_dataset_windows,
|
|
25
36
|
prepare_dataset_windows,
|
|
26
37
|
retry,
|
|
@@ -287,7 +298,7 @@ def add_apply_on_windows_args(parser: argparse.ArgumentParser) -> None:
|
|
|
287
298
|
|
|
288
299
|
|
|
289
300
|
def apply_on_windows(
|
|
290
|
-
f: Callable[[list[Window]],
|
|
301
|
+
f: Callable[[list[Window]], Any],
|
|
291
302
|
dataset: Dataset,
|
|
292
303
|
group: str | list[str] | None = None,
|
|
293
304
|
names: list[str] | None = None,
|
|
@@ -367,7 +378,7 @@ def apply_on_windows(
|
|
|
367
378
|
p.close()
|
|
368
379
|
|
|
369
380
|
|
|
370
|
-
def apply_on_windows_args(f: Callable[...,
|
|
381
|
+
def apply_on_windows_args(f: Callable[..., Any], args: argparse.Namespace) -> None:
|
|
371
382
|
"""Call apply_on_windows with arguments passed via command-line interface."""
|
|
372
383
|
dataset = Dataset(UPath(args.root), args.disabled_layers)
|
|
373
384
|
apply_on_windows(
|
|
@@ -413,12 +424,12 @@ class PrepareHandler:
|
|
|
413
424
|
"""
|
|
414
425
|
self.dataset = dataset
|
|
415
426
|
|
|
416
|
-
def __call__(self, windows: list[Window]) ->
|
|
427
|
+
def __call__(self, windows: list[Window]) -> PrepareDatasetWindowsSummary:
|
|
417
428
|
"""Prepares the windows from apply_on_windows."""
|
|
418
429
|
logger.info(f"Running prepare on {len(windows)} windows")
|
|
419
430
|
if self.dataset is None:
|
|
420
431
|
raise ValueError("dataset not set")
|
|
421
|
-
prepare_dataset_windows(
|
|
432
|
+
return prepare_dataset_windows(
|
|
422
433
|
self.dataset,
|
|
423
434
|
windows,
|
|
424
435
|
self.force,
|
|
@@ -502,14 +513,20 @@ class IngestHandler:
|
|
|
502
513
|
|
|
503
514
|
def __call__(
|
|
504
515
|
self, jobs: list[tuple[str, LayerConfig, Item, list[STGeometry]]]
|
|
505
|
-
) ->
|
|
516
|
+
) -> IngestDatasetJobsSummary:
|
|
506
517
|
"""Ingest the specified items.
|
|
507
518
|
|
|
508
519
|
The items are computed from list of windows via IngestHandler.get_jobs.
|
|
509
520
|
|
|
510
521
|
Args:
|
|
511
|
-
jobs: list of (layer_name, item, geometries) tuples to ingest.
|
|
522
|
+
jobs: list of (layer_name, layer_cfg, item, geometries) tuples to ingest.
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
summary of the ingest jobs operation fit for telemetry purposes.
|
|
512
526
|
"""
|
|
527
|
+
start_time = time.monotonic()
|
|
528
|
+
layer_summaries: list[LayerIngestSummary] = []
|
|
529
|
+
|
|
513
530
|
logger.info(f"Running ingest for {len(jobs)} jobs")
|
|
514
531
|
import gc
|
|
515
532
|
|
|
@@ -533,6 +550,8 @@ class IngestHandler:
|
|
|
533
550
|
layer_cfg = self.dataset.layers[layer_name]
|
|
534
551
|
data_source = data_source_from_config(layer_cfg, self.dataset.path)
|
|
535
552
|
|
|
553
|
+
attempts_counter = AttemptsCounter()
|
|
554
|
+
ingest_counts: IngestCounts | UnknownIngestCounts
|
|
536
555
|
try:
|
|
537
556
|
retry(
|
|
538
557
|
lambda: data_source.ingest(
|
|
@@ -544,18 +563,47 @@ class IngestHandler:
|
|
|
544
563
|
),
|
|
545
564
|
retry_max_attempts=self.retry_max_attempts,
|
|
546
565
|
retry_backoff=self.retry_backoff,
|
|
566
|
+
attempts_counter=attempts_counter,
|
|
567
|
+
)
|
|
568
|
+
ingest_counts = IngestCounts(
|
|
569
|
+
items_ingested=len(items_and_geometries),
|
|
570
|
+
geometries_ingested=sum(
|
|
571
|
+
len(geometries) for _, geometries in items_and_geometries
|
|
572
|
+
),
|
|
547
573
|
)
|
|
548
574
|
except Exception as e:
|
|
549
575
|
if not self.ignore_errors:
|
|
550
576
|
raise
|
|
551
577
|
|
|
578
|
+
ingest_counts = UnknownIngestCounts(
|
|
579
|
+
items_attempted=len(items_and_geometries),
|
|
580
|
+
geometries_attempted=sum(
|
|
581
|
+
len(geometries) for _, geometries in items_and_geometries
|
|
582
|
+
),
|
|
583
|
+
)
|
|
552
584
|
logger.error(
|
|
553
585
|
"warning: got error while ingesting "
|
|
554
586
|
+ f"{len(items_and_geometries)} items: {e}"
|
|
555
587
|
)
|
|
556
588
|
|
|
589
|
+
layer_summaries.append(
|
|
590
|
+
LayerIngestSummary(
|
|
591
|
+
layer_name=layer_name,
|
|
592
|
+
data_source_name=getattr(layer_cfg.data_source, "name", "N/A"),
|
|
593
|
+
duration_seconds=time.monotonic() - start_time,
|
|
594
|
+
ingest_counts=ingest_counts,
|
|
595
|
+
ingest_attempts=attempts_counter.value,
|
|
596
|
+
)
|
|
597
|
+
)
|
|
598
|
+
|
|
557
599
|
gc.collect()
|
|
558
600
|
|
|
601
|
+
return IngestDatasetJobsSummary(
|
|
602
|
+
duration_seconds=time.monotonic() - start_time,
|
|
603
|
+
num_jobs=len(jobs),
|
|
604
|
+
layer_summaries=layer_summaries,
|
|
605
|
+
)
|
|
606
|
+
|
|
559
607
|
def _load_layer_data_for_windows(
|
|
560
608
|
self, windows: list[Window], workers: int
|
|
561
609
|
) -> list[tuple[Window, dict[str, WindowLayerData]]]:
|
|
@@ -686,13 +734,16 @@ class MaterializeHandler:
|
|
|
686
734
|
"""
|
|
687
735
|
self.dataset = dataset
|
|
688
736
|
|
|
689
|
-
def __call__(
|
|
737
|
+
def __call__(
|
|
738
|
+
self, windows: list[Window]
|
|
739
|
+
) -> MaterializeDatasetWindowsSummary | ErrorOutcome:
|
|
690
740
|
"""Materializes the windows from apply_on_windows."""
|
|
691
741
|
logger.info(f"Running Materialize with {len(windows)} windows")
|
|
742
|
+
start_time = time.monotonic()
|
|
692
743
|
if self.dataset is None:
|
|
693
744
|
raise ValueError("dataset not set")
|
|
694
745
|
try:
|
|
695
|
-
materialize_dataset_windows(
|
|
746
|
+
return materialize_dataset_windows(
|
|
696
747
|
self.dataset,
|
|
697
748
|
windows,
|
|
698
749
|
retry_max_attempts=self.retry_max_attempts,
|
|
@@ -703,6 +754,7 @@ class MaterializeHandler:
|
|
|
703
754
|
logger.error(f"Error materializing windows: {e}")
|
|
704
755
|
raise
|
|
705
756
|
logger.warning(f"Ignoring error while materializing windows: {e}")
|
|
757
|
+
return ErrorOutcome(duration_seconds=time.monotonic() - start_time)
|
|
706
758
|
|
|
707
759
|
|
|
708
760
|
@register_handler("dataset", "materialize")
|