rslearn 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. rslearn/dataset/handler_summaries.py +130 -0
  2. rslearn/dataset/manage.py +157 -22
  3. rslearn/main.py +60 -8
  4. rslearn/models/anysat.py +207 -0
  5. rslearn/models/clay/clay.py +219 -0
  6. rslearn/models/clay/configs/metadata.yaml +295 -0
  7. rslearn/models/copernicusfm.py +37 -25
  8. rslearn/models/dinov3.py +165 -0
  9. rslearn/models/galileo/__init__.py +5 -0
  10. rslearn/models/galileo/galileo.py +517 -0
  11. rslearn/models/galileo/single_file_galileo.py +1672 -0
  12. rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
  13. rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
  14. rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
  15. rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
  16. rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
  17. rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
  18. rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
  19. rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
  20. rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
  21. rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
  22. rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
  23. rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
  24. rslearn/models/presto/presto.py +10 -7
  25. rslearn/models/prithvi.py +1122 -0
  26. rslearn/models/resize_features.py +45 -0
  27. rslearn/models/simple_time_series.py +65 -10
  28. rslearn/models/unet.py +17 -11
  29. rslearn/models/upsample.py +2 -2
  30. rslearn/tile_stores/default.py +31 -6
  31. rslearn/train/transforms/normalize.py +34 -5
  32. rslearn/train/transforms/select_bands.py +67 -0
  33. rslearn/train/transforms/sentinel1.py +60 -0
  34. rslearn/utils/geometry.py +61 -1
  35. rslearn/utils/raster_format.py +7 -1
  36. rslearn/utils/vector_format.py +13 -10
  37. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/METADATA +144 -15
  38. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/RECORD +42 -18
  39. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/WHEEL +0 -0
  40. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/entry_points.txt +0 -0
  41. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/licenses/LICENSE +0 -0
  42. {rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,130 @@
1
+ """This module contains dataclasses for summarizing the results of dataset operations.
2
+
3
+ They can be used by callers to emit telemetry / logs, or discarded.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class LayerPrepareSummary:
11
+ """Results for preparing a single layer."""
12
+
13
+ # Identity
14
+ layer_name: str
15
+ data_source_name: str
16
+
17
+ # Timing
18
+ duration_seconds: float
19
+
20
+ # Counts
21
+ windows_prepared: int
22
+ windows_skipped: int
23
+ get_items_attempts: int
24
+
25
+
26
+ @dataclass
27
+ class PrepareDatasetWindowsSummary:
28
+ """Results from prepare_dataset_windows operation for telemetry purposes."""
29
+
30
+ # Timing
31
+ duration_seconds: float
32
+
33
+ # Counts
34
+ total_windows_requested: int
35
+
36
+ # Per-layer summaries
37
+ layer_summaries: list[LayerPrepareSummary]
38
+
39
+
40
+ @dataclass
41
+ class IngestCounts:
42
+ """Known ingestion counts."""
43
+
44
+ items_ingested: int
45
+ geometries_ingested: int
46
+
47
+
48
+ @dataclass
49
+ class UnknownIngestCounts:
50
+ """Indicates ingestion counts are unknown due to partial failure."""
51
+
52
+ items_attempted: int
53
+ geometries_attempted: int
54
+
55
+
56
+ @dataclass
57
+ class LayerIngestSummary:
58
+ """Results for ingesting a single layer."""
59
+
60
+ # Identity
61
+ layer_name: str
62
+ data_source_name: str
63
+
64
+ # Timing
65
+ duration_seconds: float
66
+
67
+ # Counts - either known or unknown
68
+ ingest_counts: IngestCounts | UnknownIngestCounts
69
+ ingest_attempts: int
70
+
71
+
72
+ @dataclass
73
+ class IngestDatasetJobsSummary:
74
+ """Results from ingesting a set of jobs; for telemetry purposes."""
75
+
76
+ # Timing
77
+ duration_seconds: float
78
+
79
+ # Counts
80
+ num_jobs: int
81
+
82
+ # Per-layer summaries
83
+ layer_summaries: list[LayerIngestSummary]
84
+
85
+
86
+ @dataclass
87
+ class MaterializeWindowLayerSummary:
88
+ """Results for materializing a single window layer."""
89
+
90
+ skipped: bool
91
+ materialize_attempts: int
92
+
93
+
94
+ @dataclass
95
+ class MaterializeWindowLayersSummary:
96
+ """Results for materialize a given layer for all windows in a materialize call."""
97
+
98
+ # Identity
99
+ layer_name: str
100
+ data_source_name: str
101
+
102
+ # Timing
103
+ duration_seconds: float
104
+
105
+ # Counts
106
+ total_windows_requested: int
107
+ num_windows_materialized: int
108
+ materialize_attempts: int
109
+
110
+
111
+ @dataclass
112
+ class MaterializeDatasetWindowsSummary:
113
+ """Results from materialize_dataset_windows operation for telemetry purposes."""
114
+
115
+ # Timing
116
+ duration_seconds: float
117
+
118
+ # Counts
119
+ total_windows_requested: int
120
+
121
+ # Per-layer summaries
122
+ layer_summaries: list[MaterializeWindowLayersSummary]
123
+
124
+
125
+ @dataclass
126
+ class ErrorOutcome:
127
+ """TBD what goes in here, if anything."""
128
+
129
+ # Timing
130
+ duration_seconds: float
rslearn/dataset/manage.py CHANGED
@@ -13,6 +13,13 @@ from rslearn.config import (
13
13
  RasterLayerConfig,
14
14
  )
15
15
  from rslearn.data_sources import DataSource, Item
16
+ from rslearn.dataset.handler_summaries import (
17
+ LayerPrepareSummary,
18
+ MaterializeDatasetWindowsSummary,
19
+ MaterializeWindowLayersSummary,
20
+ MaterializeWindowLayerSummary,
21
+ PrepareDatasetWindowsSummary,
22
+ )
16
23
  from rslearn.log_utils import get_logger
17
24
  from rslearn.tile_stores import TileStore, get_tile_store_with_layer
18
25
 
@@ -23,7 +30,24 @@ from .window import Window, WindowLayerData
23
30
  logger = get_logger(__name__)
24
31
 
25
32
 
26
- def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> Any:
33
+ class AttemptsCounter:
34
+ """A simple counter for tracking attempts (including initial attempt and retries)."""
35
+
36
+ def __init__(self) -> None:
37
+ """Initialize counter with value 0."""
38
+ self.value = 0
39
+
40
+ def increment(self) -> None:
41
+ """Increment the counter by 1."""
42
+ self.value += 1
43
+
44
+
45
+ def retry(
46
+ fn: Callable,
47
+ retry_max_attempts: int,
48
+ retry_backoff: timedelta,
49
+ attempts_counter: AttemptsCounter | None = None,
50
+ ) -> Any:
27
51
  """Retry the function multiple times in case of error.
28
52
 
29
53
  The function is retried until either the attempts are exhausted, or the function
@@ -37,8 +61,11 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
37
61
  retries. The actual time is (retry_backoff * attempts) * r, where r is a
38
62
  random number between 1 and 2, and attempts is the number of attempts tried
39
63
  so far.
64
+ attempts_counter: an optional counter to increment for each attempt
40
65
  """
41
66
  for attempt_idx in range(retry_max_attempts):
67
+ if attempts_counter:
68
+ attempts_counter.increment()
42
69
  try:
43
70
  return fn()
44
71
  except Exception as e:
@@ -47,6 +74,8 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
47
74
  time.sleep(sleep_base_seconds * (1 + random.random()))
48
75
 
49
76
  # Last attempt. This time we don't catch the exception.
77
+ if attempts_counter:
78
+ attempts_counter.increment()
50
79
  return fn()
51
80
 
52
81
 
@@ -56,7 +85,7 @@ def prepare_dataset_windows(
56
85
  force: bool = False,
57
86
  retry_max_attempts: int = 0,
58
87
  retry_backoff: timedelta = timedelta(minutes=1),
59
- ) -> None:
88
+ ) -> PrepareDatasetWindowsSummary:
60
89
  """Prepare windows in a dataset.
61
90
 
62
91
  Preparing a window involves looking up items corresponding to the window in each of
@@ -70,10 +99,28 @@ def prepare_dataset_windows(
70
99
  retry_max_attempts: set greater than zero to retry for this many attempts in
71
100
  case of error.
72
101
  retry_backoff: how long to wait before retrying (see retry).
102
+
103
+ Returns:
104
+ a summary of the prepare operation, fit for telemetry purposes
73
105
  """
106
+ start_time = time.monotonic()
107
+ layer_summaries: list[LayerPrepareSummary] = []
108
+
74
109
  # Iterate over retrieved layers, and prepare each one.
75
110
  for layer_name, layer_cfg in dataset.layers.items():
111
+ layer_start_time = time.monotonic()
112
+
76
113
  if not layer_cfg.data_source:
114
+ layer_summaries.append(
115
+ LayerPrepareSummary(
116
+ layer_name=layer_name,
117
+ data_source_name="N/A",
118
+ duration_seconds=time.monotonic() - layer_start_time,
119
+ windows_prepared=0,
120
+ windows_skipped=len(windows),
121
+ get_items_attempts=0,
122
+ )
123
+ )
77
124
  continue
78
125
  data_source_cfg = layer_cfg.data_source
79
126
 
@@ -85,7 +132,18 @@ def prepare_dataset_windows(
85
132
  continue
86
133
  needed_windows.append(window)
87
134
  logger.info(f"Preparing {len(needed_windows)} windows for layer {layer_name}")
135
+
88
136
  if len(needed_windows) == 0:
137
+ layer_summaries.append(
138
+ LayerPrepareSummary(
139
+ layer_name=layer_name,
140
+ data_source_name=data_source_cfg.name,
141
+ duration_seconds=time.monotonic() - layer_start_time,
142
+ windows_prepared=0,
143
+ windows_skipped=len(windows),
144
+ get_items_attempts=0,
145
+ )
146
+ )
89
147
  continue
90
148
 
91
149
  # Create data source after checking for at least one window so it can be fast
@@ -115,10 +173,12 @@ def prepare_dataset_windows(
115
173
 
116
174
  geometries.append(geometry)
117
175
 
176
+ attempts_counter = AttemptsCounter()
118
177
  results = retry(
119
178
  fn=lambda: data_source.get_items(geometries, data_source_cfg.query_config),
120
179
  retry_max_attempts=retry_max_attempts,
121
180
  retry_backoff=retry_backoff,
181
+ attempts_counter=attempts_counter,
122
182
  )
123
183
 
124
184
  for window, result in zip(needed_windows, results):
@@ -131,6 +191,25 @@ def prepare_dataset_windows(
131
191
  )
132
192
  window.save_layer_datas(layer_datas)
133
193
 
194
+ layer_summaries.append(
195
+ LayerPrepareSummary(
196
+ layer_name=layer_name,
197
+ data_source_name=data_source_cfg.name,
198
+ duration_seconds=time.monotonic() - layer_start_time,
199
+ windows_prepared=len(needed_windows), # we assume all have succeeded
200
+ windows_skipped=len(windows) - len(needed_windows),
201
+ get_items_attempts=attempts_counter.value,
202
+ )
203
+ )
204
+
205
+ summary = PrepareDatasetWindowsSummary(
206
+ duration_seconds=time.monotonic() - start_time,
207
+ total_windows_requested=len(windows),
208
+ layer_summaries=layer_summaries,
209
+ )
210
+
211
+ return summary
212
+
134
213
 
135
214
  def ingest_dataset_windows(
136
215
  dataset: Dataset,
@@ -251,7 +330,7 @@ def materialize_window(
251
330
  layer_cfg: LayerConfig,
252
331
  retry_max_attempts: int = 0,
253
332
  retry_backoff: timedelta = timedelta(minutes=1),
254
- ) -> None:
333
+ ) -> MaterializeWindowLayerSummary:
255
334
  """Materialize a window.
256
335
 
257
336
  Args:
@@ -264,10 +343,16 @@ def materialize_window(
264
343
  retry_max_attempts: set greater than zero to retry for this many attempts in
265
344
  case of error.
266
345
  retry_backoff: how long to wait before retrying (see retry).
346
+
347
+ Returns:
348
+ a summary of the materialize operation, fit for telemetry purposes
267
349
  """
268
350
  # Check if layer is materialized already.
269
351
  if window.is_layer_completed(layer_name):
270
- return
352
+ return MaterializeWindowLayerSummary(
353
+ skipped=True,
354
+ materialize_attempts=0,
355
+ )
271
356
 
272
357
  layer_datas = window.load_layer_datas()
273
358
  if layer_name not in layer_datas:
@@ -276,7 +361,11 @@ def materialize_window(
276
361
  layer_name,
277
362
  window.name,
278
363
  )
279
- return
364
+ return MaterializeWindowLayerSummary(
365
+ skipped=True,
366
+ materialize_attempts=0,
367
+ )
368
+
280
369
  layer_data = layer_datas[layer_name]
281
370
  item_groups = []
282
371
  for serialized_group in layer_data.serialized_item_groups:
@@ -288,6 +377,8 @@ def materialize_window(
288
377
 
289
378
  if layer_cfg.data_source is None:
290
379
  raise ValueError("data_source is required")
380
+
381
+ attempts_counter = AttemptsCounter()
291
382
  if layer_cfg.data_source.ingest:
292
383
  if not is_window_ingested(dataset, window, check_layer_name=layer_name):
293
384
  logger.info(
@@ -295,9 +386,12 @@ def materialize_window(
295
386
  layer_name,
296
387
  window.name,
297
388
  )
298
- return
389
+ return MaterializeWindowLayerSummary(
390
+ skipped=True,
391
+ materialize_attempts=0,
392
+ )
299
393
 
300
- print(
394
+ logger.info(
301
395
  f"Materializing {len(item_groups)} item groups in layer {layer_name} from tile store"
302
396
  )
303
397
 
@@ -316,11 +410,12 @@ def materialize_window(
316
410
  ),
317
411
  retry_max_attempts=retry_max_attempts,
318
412
  retry_backoff=retry_backoff,
413
+ attempts_counter=attempts_counter,
319
414
  )
320
415
 
321
416
  else:
322
417
  # This window is meant to be materialized directly from the data source.
323
- print(
418
+ logger.info(
324
419
  f"Materializing {len(item_groups)} item groups in layer {layer_name} via data source"
325
420
  )
326
421
  retry(
@@ -329,15 +424,21 @@ def materialize_window(
329
424
  ),
330
425
  retry_max_attempts=retry_max_attempts,
331
426
  retry_backoff=retry_backoff,
427
+ attempts_counter=attempts_counter,
332
428
  )
333
429
 
430
+ return MaterializeWindowLayerSummary(
431
+ skipped=False,
432
+ materialize_attempts=attempts_counter.value,
433
+ )
434
+
334
435
 
335
436
  def materialize_dataset_windows(
336
437
  dataset: Dataset,
337
438
  windows: list[Window],
338
439
  retry_max_attempts: int = 0,
339
440
  retry_backoff: timedelta = timedelta(minutes=1),
340
- ) -> None:
441
+ ) -> MaterializeDatasetWindowsSummary:
341
442
  """Materialize items for retrieved layers in a dataset.
342
443
 
343
444
  The portions of items corresponding to dataset windows are extracted from the tile
@@ -349,24 +450,58 @@ def materialize_dataset_windows(
349
450
  retry_max_attempts: set greater than zero to retry for this many attempts in
350
451
  case of error.
351
452
  retry_backoff: how long to wait before retrying (see retry).
453
+
454
+ Returns:
455
+ a summary of the materialize operation, fit for telemetry purposes
352
456
  """
457
+ start_time = time.monotonic()
458
+
459
+ layer_summaries: list[MaterializeWindowLayersSummary] = []
460
+
353
461
  tile_store = dataset.get_tile_store()
354
462
  for layer_name, layer_cfg in dataset.layers.items():
463
+ layer_start_time = time.monotonic()
464
+
465
+ total_materialize_attempts = 0
466
+ total_skipped = 0
467
+ data_source_name = "N/A"
468
+
355
469
  if not layer_cfg.data_source:
356
- continue
470
+ total_skipped = len(windows)
471
+ else:
472
+ data_source_name = layer_cfg.data_source.name
473
+ data_source = rslearn.data_sources.data_source_from_config(
474
+ layer_cfg, dataset.path
475
+ )
357
476
 
358
- data_source = rslearn.data_sources.data_source_from_config(
359
- layer_cfg, dataset.path
360
- )
477
+ for window in windows:
478
+ window_summary = materialize_window(
479
+ window=window,
480
+ dataset=dataset,
481
+ data_source=data_source,
482
+ tile_store=tile_store,
483
+ layer_name=layer_name,
484
+ layer_cfg=layer_cfg,
485
+ retry_max_attempts=retry_max_attempts,
486
+ retry_backoff=retry_backoff,
487
+ )
488
+ total_materialize_attempts += window_summary.materialize_attempts
489
+ if window_summary.skipped:
490
+ total_skipped += 1
361
491
 
362
- for window in windows:
363
- materialize_window(
364
- window=window,
365
- dataset=dataset,
366
- data_source=data_source,
367
- tile_store=tile_store,
492
+ layer_summaries.append(
493
+ MaterializeWindowLayersSummary(
368
494
  layer_name=layer_name,
369
- layer_cfg=layer_cfg,
370
- retry_max_attempts=retry_max_attempts,
371
- retry_backoff=retry_backoff,
495
+ data_source_name=data_source_name,
496
+ duration_seconds=time.monotonic() - layer_start_time,
497
+ total_windows_requested=len(windows),
498
+ num_windows_materialized=len(windows) - total_skipped,
499
+ materialize_attempts=total_materialize_attempts,
372
500
  )
501
+ )
502
+
503
+ return MaterializeDatasetWindowsSummary(
504
+ duration_seconds=time.monotonic() - start_time,
505
+ total_windows_requested=len(windows),
506
+ layer_summaries=layer_summaries,
507
+ )
rslearn/main.py CHANGED
@@ -4,6 +4,7 @@ import argparse
4
4
  import multiprocessing
5
5
  import random
6
6
  import sys
7
+ import time
7
8
  from collections.abc import Callable
8
9
  from datetime import UTC, datetime, timedelta
9
10
  from typing import Any, TypeVar
@@ -19,8 +20,18 @@ from rslearn.const import WGS84_EPSG
19
20
  from rslearn.data_sources import Item, data_source_from_config
20
21
  from rslearn.dataset import Dataset, Window, WindowLayerData
21
22
  from rslearn.dataset.add_windows import add_windows_from_box, add_windows_from_file
23
+ from rslearn.dataset.handler_summaries import (
24
+ ErrorOutcome,
25
+ IngestCounts,
26
+ IngestDatasetJobsSummary,
27
+ LayerIngestSummary,
28
+ MaterializeDatasetWindowsSummary,
29
+ PrepareDatasetWindowsSummary,
30
+ UnknownIngestCounts,
31
+ )
22
32
  from rslearn.dataset.index import DatasetIndex
23
33
  from rslearn.dataset.manage import (
34
+ AttemptsCounter,
24
35
  materialize_dataset_windows,
25
36
  prepare_dataset_windows,
26
37
  retry,
@@ -287,7 +298,7 @@ def add_apply_on_windows_args(parser: argparse.ArgumentParser) -> None:
287
298
 
288
299
 
289
300
  def apply_on_windows(
290
- f: Callable[[list[Window]], None],
301
+ f: Callable[[list[Window]], Any],
291
302
  dataset: Dataset,
292
303
  group: str | list[str] | None = None,
293
304
  names: list[str] | None = None,
@@ -367,7 +378,7 @@ def apply_on_windows(
367
378
  p.close()
368
379
 
369
380
 
370
- def apply_on_windows_args(f: Callable[..., None], args: argparse.Namespace) -> None:
381
+ def apply_on_windows_args(f: Callable[..., Any], args: argparse.Namespace) -> None:
371
382
  """Call apply_on_windows with arguments passed via command-line interface."""
372
383
  dataset = Dataset(UPath(args.root), args.disabled_layers)
373
384
  apply_on_windows(
@@ -413,12 +424,12 @@ class PrepareHandler:
413
424
  """
414
425
  self.dataset = dataset
415
426
 
416
- def __call__(self, windows: list[Window]) -> None:
427
+ def __call__(self, windows: list[Window]) -> PrepareDatasetWindowsSummary:
417
428
  """Prepares the windows from apply_on_windows."""
418
429
  logger.info(f"Running prepare on {len(windows)} windows")
419
430
  if self.dataset is None:
420
431
  raise ValueError("dataset not set")
421
- prepare_dataset_windows(
432
+ return prepare_dataset_windows(
422
433
  self.dataset,
423
434
  windows,
424
435
  self.force,
@@ -502,14 +513,20 @@ class IngestHandler:
502
513
 
503
514
  def __call__(
504
515
  self, jobs: list[tuple[str, LayerConfig, Item, list[STGeometry]]]
505
- ) -> None:
516
+ ) -> IngestDatasetJobsSummary:
506
517
  """Ingest the specified items.
507
518
 
508
519
  The items are computed from list of windows via IngestHandler.get_jobs.
509
520
 
510
521
  Args:
511
- jobs: list of (layer_name, item, geometries) tuples to ingest.
522
+ jobs: list of (layer_name, layer_cfg, item, geometries) tuples to ingest.
523
+
524
+ Returns:
525
+ summary of the ingest jobs operation fit for telemetry purposes.
512
526
  """
527
+ start_time = time.monotonic()
528
+ layer_summaries: list[LayerIngestSummary] = []
529
+
513
530
  logger.info(f"Running ingest for {len(jobs)} jobs")
514
531
  import gc
515
532
 
@@ -533,6 +550,8 @@ class IngestHandler:
533
550
  layer_cfg = self.dataset.layers[layer_name]
534
551
  data_source = data_source_from_config(layer_cfg, self.dataset.path)
535
552
 
553
+ attempts_counter = AttemptsCounter()
554
+ ingest_counts: IngestCounts | UnknownIngestCounts
536
555
  try:
537
556
  retry(
538
557
  lambda: data_source.ingest(
@@ -544,18 +563,47 @@ class IngestHandler:
544
563
  ),
545
564
  retry_max_attempts=self.retry_max_attempts,
546
565
  retry_backoff=self.retry_backoff,
566
+ attempts_counter=attempts_counter,
567
+ )
568
+ ingest_counts = IngestCounts(
569
+ items_ingested=len(items_and_geometries),
570
+ geometries_ingested=sum(
571
+ len(geometries) for _, geometries in items_and_geometries
572
+ ),
547
573
  )
548
574
  except Exception as e:
549
575
  if not self.ignore_errors:
550
576
  raise
551
577
 
578
+ ingest_counts = UnknownIngestCounts(
579
+ items_attempted=len(items_and_geometries),
580
+ geometries_attempted=sum(
581
+ len(geometries) for _, geometries in items_and_geometries
582
+ ),
583
+ )
552
584
  logger.error(
553
585
  "warning: got error while ingesting "
554
586
  + f"{len(items_and_geometries)} items: {e}"
555
587
  )
556
588
 
589
+ layer_summaries.append(
590
+ LayerIngestSummary(
591
+ layer_name=layer_name,
592
+ data_source_name=getattr(layer_cfg.data_source, "name", "N/A"),
593
+ duration_seconds=time.monotonic() - start_time,
594
+ ingest_counts=ingest_counts,
595
+ ingest_attempts=attempts_counter.value,
596
+ )
597
+ )
598
+
557
599
  gc.collect()
558
600
 
601
+ return IngestDatasetJobsSummary(
602
+ duration_seconds=time.monotonic() - start_time,
603
+ num_jobs=len(jobs),
604
+ layer_summaries=layer_summaries,
605
+ )
606
+
559
607
  def _load_layer_data_for_windows(
560
608
  self, windows: list[Window], workers: int
561
609
  ) -> list[tuple[Window, dict[str, WindowLayerData]]]:
@@ -686,13 +734,16 @@ class MaterializeHandler:
686
734
  """
687
735
  self.dataset = dataset
688
736
 
689
- def __call__(self, windows: list[Window]) -> None:
737
+ def __call__(
738
+ self, windows: list[Window]
739
+ ) -> MaterializeDatasetWindowsSummary | ErrorOutcome:
690
740
  """Materializes the windows from apply_on_windows."""
691
741
  logger.info(f"Running Materialize with {len(windows)} windows")
742
+ start_time = time.monotonic()
692
743
  if self.dataset is None:
693
744
  raise ValueError("dataset not set")
694
745
  try:
695
- materialize_dataset_windows(
746
+ return materialize_dataset_windows(
696
747
  self.dataset,
697
748
  windows,
698
749
  retry_max_attempts=self.retry_max_attempts,
@@ -703,6 +754,7 @@ class MaterializeHandler:
703
754
  logger.error(f"Error materializing windows: {e}")
704
755
  raise
705
756
  logger.warning(f"Ignoring error while materializing windows: {e}")
757
+ return ErrorOutcome(duration_seconds=time.monotonic() - start_time)
706
758
 
707
759
 
708
760
  @register_handler("dataset", "materialize")