rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rslearn/arg_parser.py +31 -0
- rslearn/config/__init__.py +6 -12
- rslearn/config/dataset.py +520 -401
- rslearn/const.py +9 -15
- rslearn/data_sources/__init__.py +8 -23
- rslearn/data_sources/aws_landsat.py +242 -98
- rslearn/data_sources/aws_open_data.py +111 -151
- rslearn/data_sources/aws_sentinel1.py +131 -0
- rslearn/data_sources/climate_data_store.py +471 -0
- rslearn/data_sources/copernicus.py +884 -12
- rslearn/data_sources/data_source.py +43 -12
- rslearn/data_sources/earthdaily.py +484 -0
- rslearn/data_sources/earthdata_srtm.py +282 -0
- rslearn/data_sources/eurocrops.py +242 -0
- rslearn/data_sources/gcp_public_data.py +578 -222
- rslearn/data_sources/google_earth_engine.py +461 -135
- rslearn/data_sources/local_files.py +219 -150
- rslearn/data_sources/openstreetmap.py +51 -89
- rslearn/data_sources/planet.py +24 -60
- rslearn/data_sources/planet_basemap.py +275 -0
- rslearn/data_sources/planetary_computer.py +798 -0
- rslearn/data_sources/usda_cdl.py +195 -0
- rslearn/data_sources/usgs_landsat.py +115 -83
- rslearn/data_sources/utils.py +249 -61
- rslearn/data_sources/vector_source.py +1 -0
- rslearn/data_sources/worldcereal.py +449 -0
- rslearn/data_sources/worldcover.py +144 -0
- rslearn/data_sources/worldpop.py +153 -0
- rslearn/data_sources/xyz_tiles.py +150 -107
- rslearn/dataset/__init__.py +8 -2
- rslearn/dataset/add_windows.py +2 -2
- rslearn/dataset/dataset.py +40 -51
- rslearn/dataset/handler_summaries.py +131 -0
- rslearn/dataset/manage.py +313 -74
- rslearn/dataset/materialize.py +431 -107
- rslearn/dataset/remap.py +29 -4
- rslearn/dataset/storage/__init__.py +1 -0
- rslearn/dataset/storage/file.py +202 -0
- rslearn/dataset/storage/storage.py +140 -0
- rslearn/dataset/window.py +181 -44
- rslearn/lightning_cli.py +454 -0
- rslearn/log_utils.py +24 -0
- rslearn/main.py +384 -181
- rslearn/models/anysat.py +215 -0
- rslearn/models/attention_pooling.py +177 -0
- rslearn/models/clay/clay.py +231 -0
- rslearn/models/clay/configs/metadata.yaml +295 -0
- rslearn/models/clip.py +68 -0
- rslearn/models/component.py +111 -0
- rslearn/models/concatenate_features.py +103 -0
- rslearn/models/conv.py +63 -0
- rslearn/models/croma.py +306 -0
- rslearn/models/detr/__init__.py +5 -0
- rslearn/models/detr/box_ops.py +103 -0
- rslearn/models/detr/detr.py +504 -0
- rslearn/models/detr/matcher.py +107 -0
- rslearn/models/detr/position_encoding.py +114 -0
- rslearn/models/detr/transformer.py +429 -0
- rslearn/models/detr/util.py +24 -0
- rslearn/models/dinov3.py +177 -0
- rslearn/models/faster_rcnn.py +30 -28
- rslearn/models/feature_center_crop.py +53 -0
- rslearn/models/fpn.py +19 -8
- rslearn/models/galileo/__init__.py +5 -0
- rslearn/models/galileo/galileo.py +595 -0
- rslearn/models/galileo/single_file_galileo.py +1678 -0
- rslearn/models/module_wrapper.py +65 -0
- rslearn/models/molmo.py +69 -0
- rslearn/models/multitask.py +384 -28
- rslearn/models/olmoearth_pretrain/__init__.py +1 -0
- rslearn/models/olmoearth_pretrain/model.py +421 -0
- rslearn/models/olmoearth_pretrain/norm.py +86 -0
- rslearn/models/panopticon.py +170 -0
- rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
- rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
- rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
- rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
- rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
- rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
- rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
- rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
- rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
- rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
- rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
- rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
- rslearn/models/pick_features.py +17 -10
- rslearn/models/pooling_decoder.py +60 -7
- rslearn/models/presto/__init__.py +5 -0
- rslearn/models/presto/presto.py +297 -0
- rslearn/models/presto/single_file_presto.py +926 -0
- rslearn/models/prithvi.py +1147 -0
- rslearn/models/resize_features.py +59 -0
- rslearn/models/sam2_enc.py +13 -9
- rslearn/models/satlaspretrain.py +38 -18
- rslearn/models/simple_time_series.py +188 -77
- rslearn/models/singletask.py +24 -13
- rslearn/models/ssl4eo_s12.py +40 -30
- rslearn/models/swin.py +44 -32
- rslearn/models/task_embedding.py +250 -0
- rslearn/models/terramind.py +256 -0
- rslearn/models/trunk.py +139 -0
- rslearn/models/unet.py +68 -22
- rslearn/models/upsample.py +48 -0
- rslearn/models/use_croma.py +508 -0
- rslearn/template_params.py +26 -0
- rslearn/tile_stores/__init__.py +41 -18
- rslearn/tile_stores/default.py +409 -0
- rslearn/tile_stores/tile_store.py +236 -132
- rslearn/train/all_patches_dataset.py +530 -0
- rslearn/train/callbacks/adapters.py +53 -0
- rslearn/train/callbacks/freeze_unfreeze.py +348 -17
- rslearn/train/callbacks/gradients.py +129 -0
- rslearn/train/callbacks/peft.py +116 -0
- rslearn/train/data_module.py +444 -20
- rslearn/train/dataset.py +588 -235
- rslearn/train/lightning_module.py +192 -62
- rslearn/train/model_context.py +88 -0
- rslearn/train/optimizer.py +31 -0
- rslearn/train/prediction_writer.py +319 -84
- rslearn/train/scheduler.py +92 -0
- rslearn/train/tasks/classification.py +55 -28
- rslearn/train/tasks/detection.py +132 -76
- rslearn/train/tasks/embedding.py +120 -0
- rslearn/train/tasks/multi_task.py +28 -14
- rslearn/train/tasks/per_pixel_regression.py +291 -0
- rslearn/train/tasks/regression.py +161 -44
- rslearn/train/tasks/segmentation.py +428 -53
- rslearn/train/tasks/task.py +6 -5
- rslearn/train/transforms/__init__.py +1 -1
- rslearn/train/transforms/concatenate.py +54 -10
- rslearn/train/transforms/crop.py +29 -11
- rslearn/train/transforms/flip.py +18 -6
- rslearn/train/transforms/mask.py +78 -0
- rslearn/train/transforms/normalize.py +101 -17
- rslearn/train/transforms/pad.py +19 -7
- rslearn/train/transforms/resize.py +83 -0
- rslearn/train/transforms/select_bands.py +76 -0
- rslearn/train/transforms/sentinel1.py +75 -0
- rslearn/train/transforms/transform.py +89 -70
- rslearn/utils/__init__.py +2 -6
- rslearn/utils/array.py +8 -6
- rslearn/utils/feature.py +2 -2
- rslearn/utils/fsspec.py +90 -1
- rslearn/utils/geometry.py +347 -7
- rslearn/utils/get_utm_ups_crs.py +2 -3
- rslearn/utils/grid_index.py +5 -5
- rslearn/utils/jsonargparse.py +178 -0
- rslearn/utils/mp.py +4 -3
- rslearn/utils/raster_format.py +268 -116
- rslearn/utils/rtree_index.py +64 -17
- rslearn/utils/sqlite_index.py +7 -1
- rslearn/utils/vector_format.py +252 -97
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
- rslearn-0.0.21.dist-info/RECORD +167 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
- rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
- rslearn/data_sources/raster_source.py +0 -309
- rslearn/models/registry.py +0 -5
- rslearn/tile_stores/file.py +0 -242
- rslearn/utils/mgrs.py +0 -24
- rslearn/utils/utils.py +0 -22
- rslearn-0.0.1.dist-info/RECORD +0 -88
- /rslearn/{data_sources/geotiff.py → py.typed} +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""This module contains dataclasses for summarizing the results of dataset operations.
|
|
2
|
+
|
|
3
|
+
They can be used by callers to emit telemetry / logs, or discarded.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class LayerPrepareSummary:
|
|
11
|
+
"""Results for preparing a single layer."""
|
|
12
|
+
|
|
13
|
+
# Identity
|
|
14
|
+
layer_name: str
|
|
15
|
+
data_source_name: str
|
|
16
|
+
|
|
17
|
+
# Timing
|
|
18
|
+
duration_seconds: float
|
|
19
|
+
|
|
20
|
+
# Counts
|
|
21
|
+
windows_prepared: int
|
|
22
|
+
windows_skipped: int
|
|
23
|
+
windows_rejected: int
|
|
24
|
+
get_items_attempts: int
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PrepareDatasetWindowsSummary:
|
|
29
|
+
"""Results from prepare_dataset_windows operation for telemetry purposes."""
|
|
30
|
+
|
|
31
|
+
# Timing
|
|
32
|
+
duration_seconds: float
|
|
33
|
+
|
|
34
|
+
# Counts
|
|
35
|
+
total_windows_requested: int
|
|
36
|
+
|
|
37
|
+
# Per-layer summaries
|
|
38
|
+
layer_summaries: list[LayerPrepareSummary]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class IngestCounts:
|
|
43
|
+
"""Known ingestion counts."""
|
|
44
|
+
|
|
45
|
+
items_ingested: int
|
|
46
|
+
geometries_ingested: int
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class UnknownIngestCounts:
|
|
51
|
+
"""Indicates ingestion counts are unknown due to partial failure."""
|
|
52
|
+
|
|
53
|
+
items_attempted: int
|
|
54
|
+
geometries_attempted: int
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class LayerIngestSummary:
|
|
59
|
+
"""Results for ingesting a single layer."""
|
|
60
|
+
|
|
61
|
+
# Identity
|
|
62
|
+
layer_name: str
|
|
63
|
+
data_source_name: str
|
|
64
|
+
|
|
65
|
+
# Timing
|
|
66
|
+
duration_seconds: float
|
|
67
|
+
|
|
68
|
+
# Counts - either known or unknown
|
|
69
|
+
ingest_counts: IngestCounts | UnknownIngestCounts
|
|
70
|
+
ingest_attempts: int
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class IngestDatasetJobsSummary:
|
|
75
|
+
"""Results from ingesting a set of jobs; for telemetry purposes."""
|
|
76
|
+
|
|
77
|
+
# Timing
|
|
78
|
+
duration_seconds: float
|
|
79
|
+
|
|
80
|
+
# Counts
|
|
81
|
+
num_jobs: int
|
|
82
|
+
|
|
83
|
+
# Per-layer summaries
|
|
84
|
+
layer_summaries: list[LayerIngestSummary]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class MaterializeWindowLayerSummary:
|
|
89
|
+
"""Results for materializing a single window layer."""
|
|
90
|
+
|
|
91
|
+
skipped: bool
|
|
92
|
+
materialize_attempts: int
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class MaterializeWindowLayersSummary:
|
|
97
|
+
"""Results for materialize a given layer for all windows in a materialize call."""
|
|
98
|
+
|
|
99
|
+
# Identity
|
|
100
|
+
layer_name: str
|
|
101
|
+
data_source_name: str
|
|
102
|
+
|
|
103
|
+
# Timing
|
|
104
|
+
duration_seconds: float
|
|
105
|
+
|
|
106
|
+
# Counts
|
|
107
|
+
total_windows_requested: int
|
|
108
|
+
num_windows_materialized: int
|
|
109
|
+
materialize_attempts: int
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class MaterializeDatasetWindowsSummary:
|
|
114
|
+
"""Results from materialize_dataset_windows operation for telemetry purposes."""
|
|
115
|
+
|
|
116
|
+
# Timing
|
|
117
|
+
duration_seconds: float
|
|
118
|
+
|
|
119
|
+
# Counts
|
|
120
|
+
total_windows_requested: int
|
|
121
|
+
|
|
122
|
+
# Per-layer summaries
|
|
123
|
+
layer_summaries: list[MaterializeWindowLayersSummary]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class ErrorOutcome:
|
|
128
|
+
"""TBD what goes in here, if anything."""
|
|
129
|
+
|
|
130
|
+
# Timing
|
|
131
|
+
duration_seconds: float
|
rslearn/dataset/manage.py
CHANGED
|
@@ -1,19 +1,89 @@
|
|
|
1
1
|
"""Functions to manage datasets."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from datetime import timedelta
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from rslearn.config import (
|
|
10
|
+
LayerConfig,
|
|
11
|
+
LayerType,
|
|
12
|
+
)
|
|
5
13
|
from rslearn.data_sources import DataSource, Item
|
|
6
|
-
from rslearn.
|
|
7
|
-
|
|
14
|
+
from rslearn.dataset.handler_summaries import (
|
|
15
|
+
LayerPrepareSummary,
|
|
16
|
+
MaterializeDatasetWindowsSummary,
|
|
17
|
+
MaterializeWindowLayersSummary,
|
|
18
|
+
MaterializeWindowLayerSummary,
|
|
19
|
+
PrepareDatasetWindowsSummary,
|
|
20
|
+
)
|
|
21
|
+
from rslearn.log_utils import get_logger
|
|
22
|
+
from rslearn.tile_stores import TileStore, get_tile_store_with_layer
|
|
8
23
|
|
|
9
24
|
from .dataset import Dataset
|
|
10
|
-
from .materialize import
|
|
25
|
+
from .materialize import Materializer, RasterMaterializer, VectorMaterializer
|
|
11
26
|
from .window import Window, WindowLayerData
|
|
12
27
|
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AttemptsCounter:
|
|
32
|
+
"""A simple counter for tracking attempts (including initial attempt and retries)."""
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
"""Initialize counter with value 0."""
|
|
36
|
+
self.value = 0
|
|
37
|
+
|
|
38
|
+
def increment(self) -> None:
|
|
39
|
+
"""Increment the counter by 1."""
|
|
40
|
+
self.value += 1
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def retry(
|
|
44
|
+
fn: Callable,
|
|
45
|
+
retry_max_attempts: int,
|
|
46
|
+
retry_backoff: timedelta,
|
|
47
|
+
attempts_counter: AttemptsCounter | None = None,
|
|
48
|
+
) -> Any:
|
|
49
|
+
"""Retry the function multiple times in case of error.
|
|
50
|
+
|
|
51
|
+
The function is retried until either the attempts are exhausted, or the function
|
|
52
|
+
runs successfully without raising an Exception.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
fn: the function to call.
|
|
56
|
+
retry_max_attempts: retry this many times (plus the original attempt) before
|
|
57
|
+
giving up (and raising Exception).
|
|
58
|
+
retry_backoff: the base backoff time used to compute how long to wait between
|
|
59
|
+
retries. The actual time is (retry_backoff * attempts) * r, where r is a
|
|
60
|
+
random number between 1 and 2, and attempts is the number of attempts tried
|
|
61
|
+
so far.
|
|
62
|
+
attempts_counter: an optional counter to increment for each attempt
|
|
63
|
+
"""
|
|
64
|
+
for attempt_idx in range(retry_max_attempts):
|
|
65
|
+
if attempts_counter:
|
|
66
|
+
attempts_counter.increment()
|
|
67
|
+
try:
|
|
68
|
+
return fn()
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logger.warning(f"Retrying after catching error in retry loop: {e}")
|
|
71
|
+
sleep_base_seconds = retry_backoff.total_seconds() * (attempt_idx + 1)
|
|
72
|
+
time.sleep(sleep_base_seconds * (1 + random.random()))
|
|
73
|
+
|
|
74
|
+
# Last attempt. This time we don't catch the exception.
|
|
75
|
+
if attempts_counter:
|
|
76
|
+
attempts_counter.increment()
|
|
77
|
+
return fn()
|
|
78
|
+
|
|
13
79
|
|
|
14
80
|
def prepare_dataset_windows(
|
|
15
|
-
dataset: Dataset,
|
|
16
|
-
|
|
81
|
+
dataset: Dataset,
|
|
82
|
+
windows: list[Window],
|
|
83
|
+
force: bool = False,
|
|
84
|
+
retry_max_attempts: int = 0,
|
|
85
|
+
retry_backoff: timedelta = timedelta(minutes=1),
|
|
86
|
+
) -> PrepareDatasetWindowsSummary:
|
|
17
87
|
"""Prepare windows in a dataset.
|
|
18
88
|
|
|
19
89
|
Preparing a window involves looking up items corresponding to the window in each of
|
|
@@ -24,28 +94,73 @@ def prepare_dataset_windows(
|
|
|
24
94
|
windows: the windows to prepare
|
|
25
95
|
force: whether to prepare windows even if they were previously prepared
|
|
26
96
|
(default false)
|
|
97
|
+
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
98
|
+
case of error.
|
|
99
|
+
retry_backoff: how long to wait before retrying (see retry).
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
a summary of the prepare operation, fit for telemetry purposes
|
|
27
103
|
"""
|
|
104
|
+
start_time = time.monotonic()
|
|
105
|
+
layer_summaries: list[LayerPrepareSummary] = []
|
|
106
|
+
|
|
28
107
|
# Iterate over retrieved layers, and prepare each one.
|
|
29
108
|
for layer_name, layer_cfg in dataset.layers.items():
|
|
109
|
+
layer_start_time = time.monotonic()
|
|
110
|
+
|
|
30
111
|
if not layer_cfg.data_source:
|
|
112
|
+
layer_summaries.append(
|
|
113
|
+
LayerPrepareSummary(
|
|
114
|
+
layer_name=layer_name,
|
|
115
|
+
data_source_name="N/A",
|
|
116
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
117
|
+
windows_prepared=0,
|
|
118
|
+
windows_skipped=len(windows),
|
|
119
|
+
windows_rejected=0,
|
|
120
|
+
get_items_attempts=0,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
31
123
|
continue
|
|
124
|
+
data_source_cfg = layer_cfg.data_source
|
|
125
|
+
min_matches = data_source_cfg.query_config.min_matches
|
|
32
126
|
|
|
33
127
|
# Get windows that need to be prepared for this layer.
|
|
128
|
+
# Also track which windows are skipped vs previously rejected.
|
|
34
129
|
needed_windows = []
|
|
130
|
+
windows_skipped = 0
|
|
131
|
+
windows_rejected = 0
|
|
35
132
|
for window in windows:
|
|
36
133
|
layer_datas = window.load_layer_datas()
|
|
37
134
|
if layer_name in layer_datas and not force:
|
|
135
|
+
# Window already has layer data - check if it was previously rejected
|
|
136
|
+
layer_data = layer_datas[layer_name]
|
|
137
|
+
if len(layer_data.serialized_item_groups) == 0 and min_matches > 0:
|
|
138
|
+
# Previously rejected due to min_matches
|
|
139
|
+
windows_rejected += 1
|
|
140
|
+
else:
|
|
141
|
+
# Successfully prepared previously
|
|
142
|
+
windows_skipped += 1
|
|
38
143
|
continue
|
|
39
144
|
needed_windows.append(window)
|
|
40
|
-
|
|
145
|
+
logger.info(f"Preparing {len(needed_windows)} windows for layer {layer_name}")
|
|
146
|
+
|
|
41
147
|
if len(needed_windows) == 0:
|
|
148
|
+
layer_summaries.append(
|
|
149
|
+
LayerPrepareSummary(
|
|
150
|
+
layer_name=layer_name,
|
|
151
|
+
data_source_name=data_source_cfg.class_path,
|
|
152
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
153
|
+
windows_prepared=0,
|
|
154
|
+
windows_skipped=windows_skipped,
|
|
155
|
+
windows_rejected=windows_rejected,
|
|
156
|
+
get_items_attempts=0,
|
|
157
|
+
)
|
|
158
|
+
)
|
|
42
159
|
continue
|
|
43
160
|
|
|
44
161
|
# Create data source after checking for at least one window so it can be fast
|
|
45
162
|
# if there are no windows to prepare.
|
|
46
|
-
data_source =
|
|
47
|
-
layer_cfg, dataset.path
|
|
48
|
-
)
|
|
163
|
+
data_source = layer_cfg.instantiate_data_source(dataset.path)
|
|
49
164
|
|
|
50
165
|
# Get STGeometry for each window.
|
|
51
166
|
geometries = []
|
|
@@ -53,13 +168,13 @@ def prepare_dataset_windows(
|
|
|
53
168
|
geometry = window.get_geometry()
|
|
54
169
|
|
|
55
170
|
# Apply temporal modifiers.
|
|
56
|
-
time_offset =
|
|
171
|
+
time_offset = data_source_cfg.time_offset
|
|
57
172
|
if geometry.time_range and time_offset:
|
|
58
173
|
geometry.time_range = (
|
|
59
174
|
geometry.time_range[0] + time_offset,
|
|
60
175
|
geometry.time_range[1] + time_offset,
|
|
61
176
|
)
|
|
62
|
-
duration =
|
|
177
|
+
duration = data_source_cfg.duration
|
|
63
178
|
if geometry.time_range and duration:
|
|
64
179
|
geometry.time_range = (
|
|
65
180
|
geometry.time_range[0],
|
|
@@ -68,7 +183,15 @@ def prepare_dataset_windows(
|
|
|
68
183
|
|
|
69
184
|
geometries.append(geometry)
|
|
70
185
|
|
|
71
|
-
|
|
186
|
+
attempts_counter = AttemptsCounter()
|
|
187
|
+
results = retry(
|
|
188
|
+
fn=lambda: data_source.get_items(geometries, data_source_cfg.query_config),
|
|
189
|
+
retry_max_attempts=retry_max_attempts,
|
|
190
|
+
retry_backoff=retry_backoff,
|
|
191
|
+
attempts_counter=attempts_counter,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
windows_prepared = 0
|
|
72
195
|
for window, result in zip(needed_windows, results):
|
|
73
196
|
layer_datas = window.load_layer_datas()
|
|
74
197
|
layer_datas[layer_name] = WindowLayerData(
|
|
@@ -79,8 +202,39 @@ def prepare_dataset_windows(
|
|
|
79
202
|
)
|
|
80
203
|
window.save_layer_datas(layer_datas)
|
|
81
204
|
|
|
205
|
+
# If result is empty and min_matches > 0, window was rejected due to min_matches
|
|
206
|
+
if len(result) == 0 and min_matches > 0:
|
|
207
|
+
windows_rejected += 1
|
|
208
|
+
else:
|
|
209
|
+
windows_prepared += 1
|
|
82
210
|
|
|
83
|
-
|
|
211
|
+
layer_summaries.append(
|
|
212
|
+
LayerPrepareSummary(
|
|
213
|
+
layer_name=layer_name,
|
|
214
|
+
data_source_name=data_source_cfg.class_path,
|
|
215
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
216
|
+
windows_prepared=windows_prepared,
|
|
217
|
+
windows_skipped=windows_skipped,
|
|
218
|
+
windows_rejected=windows_rejected,
|
|
219
|
+
get_items_attempts=attempts_counter.value,
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
summary = PrepareDatasetWindowsSummary(
|
|
224
|
+
duration_seconds=time.monotonic() - start_time,
|
|
225
|
+
total_windows_requested=len(windows),
|
|
226
|
+
layer_summaries=layer_summaries,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return summary
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def ingest_dataset_windows(
|
|
233
|
+
dataset: Dataset,
|
|
234
|
+
windows: list[Window],
|
|
235
|
+
retry_max_attempts: int = 0,
|
|
236
|
+
retry_backoff: timedelta = timedelta(minutes=1),
|
|
237
|
+
) -> None:
|
|
84
238
|
"""Ingest items for retrieved layers in a dataset.
|
|
85
239
|
|
|
86
240
|
The items associated with the specified windows are downloaded and divided into
|
|
@@ -89,6 +243,9 @@ def ingest_dataset_windows(dataset: Dataset, windows: list[Window]) -> None:
|
|
|
89
243
|
Args:
|
|
90
244
|
dataset: the dataset
|
|
91
245
|
windows: the windows to ingest
|
|
246
|
+
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
247
|
+
case of error.
|
|
248
|
+
retry_backoff: how long to wait before retrying (see retry).
|
|
92
249
|
"""
|
|
93
250
|
tile_store = dataset.get_tile_store()
|
|
94
251
|
for layer_name, layer_cfg in dataset.layers.items():
|
|
@@ -97,11 +254,9 @@ def ingest_dataset_windows(dataset: Dataset, windows: list[Window]) -> None:
|
|
|
97
254
|
if not layer_cfg.data_source.ingest:
|
|
98
255
|
continue
|
|
99
256
|
|
|
100
|
-
data_source =
|
|
101
|
-
layer_cfg, dataset.path
|
|
102
|
-
)
|
|
257
|
+
data_source = layer_cfg.instantiate_data_source(dataset.path)
|
|
103
258
|
|
|
104
|
-
geometries_by_item = {}
|
|
259
|
+
geometries_by_item: dict = {}
|
|
105
260
|
for window in windows:
|
|
106
261
|
layer_datas = window.load_layer_datas()
|
|
107
262
|
if layer_name not in layer_datas:
|
|
@@ -116,12 +271,20 @@ def ingest_dataset_windows(dataset: Dataset, windows: list[Window]) -> None:
|
|
|
116
271
|
geometries_by_item[item].append(geometry)
|
|
117
272
|
|
|
118
273
|
print(f"Ingesting {len(geometries_by_item)} items in layer {layer_name}")
|
|
119
|
-
cur_tile_store = get_tile_store_for_layer(tile_store, layer_name, layer_cfg)
|
|
120
274
|
geometries_and_items = list(geometries_by_item.items())
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
275
|
+
|
|
276
|
+
# Use retry loop for the actual data source ingest call.
|
|
277
|
+
def ingest() -> None:
|
|
278
|
+
data_source.ingest(
|
|
279
|
+
tile_store=get_tile_store_with_layer(tile_store, layer_name, layer_cfg),
|
|
280
|
+
items=[item for item, _ in geometries_and_items],
|
|
281
|
+
geometries=[geometries for _, geometries in geometries_and_items],
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
retry(
|
|
285
|
+
fn=ingest,
|
|
286
|
+
retry_max_attempts=retry_max_attempts,
|
|
287
|
+
retry_backoff=retry_backoff,
|
|
125
288
|
)
|
|
126
289
|
|
|
127
290
|
|
|
@@ -145,47 +308,31 @@ def is_window_ingested(
|
|
|
145
308
|
continue
|
|
146
309
|
if layer_name not in layer_datas:
|
|
147
310
|
return False
|
|
311
|
+
|
|
312
|
+
layer_tile_store = get_tile_store_with_layer(tile_store, layer_name, layer_cfg)
|
|
313
|
+
|
|
148
314
|
layer_data = layer_datas[layer_name]
|
|
149
315
|
for group in layer_data.serialized_item_groups:
|
|
150
316
|
for serialized_item in group:
|
|
151
317
|
item = Item.deserialize(serialized_item)
|
|
152
318
|
|
|
153
|
-
if layer_cfg.
|
|
319
|
+
if layer_cfg.type == LayerType.RASTER:
|
|
154
320
|
for band_set in layer_cfg.band_sets:
|
|
155
|
-
projection, _ = band_set.get_final_projection_and_bounds(
|
|
156
|
-
window.projection, window.bounds
|
|
157
|
-
)
|
|
158
|
-
cur_tile_store = get_tile_store_for_layer(
|
|
159
|
-
tile_store, layer_name, layer_cfg
|
|
160
|
-
)
|
|
161
|
-
layer_prefix = (item.name,)
|
|
162
321
|
# Make sure that layers exist containing each configured band.
|
|
163
322
|
# And that those layers are marked completed.
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
for suffix in suffixes:
|
|
168
|
-
cur_bands = suffix.split("_")
|
|
323
|
+
available_bands = layer_tile_store.get_raster_bands(item.name)
|
|
324
|
+
wanted_bands = {band for band in band_set.bands}
|
|
325
|
+
for cur_bands in available_bands:
|
|
169
326
|
is_needed = False
|
|
170
327
|
for band in cur_bands:
|
|
171
|
-
if band in
|
|
328
|
+
if band in wanted_bands:
|
|
172
329
|
is_needed = True
|
|
173
|
-
|
|
330
|
+
wanted_bands.remove(band)
|
|
174
331
|
if not is_needed:
|
|
175
332
|
continue
|
|
176
|
-
|
|
177
|
-
if len(needed_bands) > 0:
|
|
333
|
+
if len(wanted_bands) > 0:
|
|
178
334
|
return False
|
|
179
335
|
|
|
180
|
-
for suffix in needed_suffixes:
|
|
181
|
-
layer_id = (item.name, suffix, str(projection))
|
|
182
|
-
ts_layer = get_tile_store_for_layer(
|
|
183
|
-
tile_store, layer_name, layer_cfg
|
|
184
|
-
).get_layer(layer_id)
|
|
185
|
-
if not ts_layer:
|
|
186
|
-
return False
|
|
187
|
-
if not ts_layer.get_metadata().properties.get("completed"):
|
|
188
|
-
return False
|
|
189
336
|
return True
|
|
190
337
|
|
|
191
338
|
|
|
@@ -196,7 +343,9 @@ def materialize_window(
|
|
|
196
343
|
tile_store: TileStore,
|
|
197
344
|
layer_name: str,
|
|
198
345
|
layer_cfg: LayerConfig,
|
|
199
|
-
|
|
346
|
+
retry_max_attempts: int = 0,
|
|
347
|
+
retry_backoff: timedelta = timedelta(minutes=1),
|
|
348
|
+
) -> MaterializeWindowLayerSummary:
|
|
200
349
|
"""Materialize a window.
|
|
201
350
|
|
|
202
351
|
Args:
|
|
@@ -206,11 +355,19 @@ def materialize_window(
|
|
|
206
355
|
tile_store: tile store of the dataset to materialize from
|
|
207
356
|
layer_name: the layer name
|
|
208
357
|
layer_cfg: the layer config
|
|
358
|
+
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
359
|
+
case of error.
|
|
360
|
+
retry_backoff: how long to wait before retrying (see retry).
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
a summary of the materialize operation, fit for telemetry purposes
|
|
209
364
|
"""
|
|
210
365
|
# Check if layer is materialized already.
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
366
|
+
if window.is_layer_completed(layer_name):
|
|
367
|
+
return MaterializeWindowLayerSummary(
|
|
368
|
+
skipped=True,
|
|
369
|
+
materialize_attempts=0,
|
|
370
|
+
)
|
|
214
371
|
|
|
215
372
|
layer_datas = window.load_layer_datas()
|
|
216
373
|
if layer_name not in layer_datas:
|
|
@@ -219,7 +376,11 @@ def materialize_window(
|
|
|
219
376
|
layer_name,
|
|
220
377
|
window.name,
|
|
221
378
|
)
|
|
222
|
-
return
|
|
379
|
+
return MaterializeWindowLayerSummary(
|
|
380
|
+
skipped=True,
|
|
381
|
+
materialize_attempts=0,
|
|
382
|
+
)
|
|
383
|
+
|
|
223
384
|
layer_data = layer_datas[layer_name]
|
|
224
385
|
item_groups = []
|
|
225
386
|
for serialized_group in layer_data.serialized_item_groups:
|
|
@@ -229,6 +390,10 @@ def materialize_window(
|
|
|
229
390
|
item_group.append(item)
|
|
230
391
|
item_groups.append(item_group)
|
|
231
392
|
|
|
393
|
+
if layer_cfg.data_source is None:
|
|
394
|
+
raise ValueError("data_source is required")
|
|
395
|
+
|
|
396
|
+
attempts_counter = AttemptsCounter()
|
|
232
397
|
if layer_cfg.data_source.ingest:
|
|
233
398
|
if not is_window_ingested(dataset, window, check_layer_name=layer_name):
|
|
234
399
|
logger.info(
|
|
@@ -236,30 +401,62 @@ def materialize_window(
|
|
|
236
401
|
layer_name,
|
|
237
402
|
window.name,
|
|
238
403
|
)
|
|
239
|
-
return
|
|
404
|
+
return MaterializeWindowLayerSummary(
|
|
405
|
+
skipped=True,
|
|
406
|
+
materialize_attempts=0,
|
|
407
|
+
)
|
|
240
408
|
|
|
241
|
-
|
|
409
|
+
logger.info(
|
|
242
410
|
f"Materializing {len(item_groups)} item groups in layer {layer_name} from tile store"
|
|
243
411
|
)
|
|
244
412
|
|
|
245
|
-
|
|
246
|
-
|
|
413
|
+
materializer: Materializer
|
|
414
|
+
if layer_cfg.type == LayerType.RASTER:
|
|
415
|
+
materializer = RasterMaterializer()
|
|
416
|
+
elif layer_cfg.type == LayerType.VECTOR:
|
|
417
|
+
materializer = VectorMaterializer()
|
|
247
418
|
else:
|
|
248
|
-
|
|
249
|
-
|
|
419
|
+
raise ValueError(f"unknown layer type {layer_cfg.type}")
|
|
420
|
+
|
|
421
|
+
retry(
|
|
422
|
+
fn=lambda: materializer.materialize(
|
|
423
|
+
get_tile_store_with_layer(tile_store, layer_name, layer_cfg),
|
|
424
|
+
window,
|
|
425
|
+
layer_name,
|
|
426
|
+
layer_cfg,
|
|
427
|
+
item_groups,
|
|
428
|
+
),
|
|
429
|
+
retry_max_attempts=retry_max_attempts,
|
|
430
|
+
retry_backoff=retry_backoff,
|
|
431
|
+
attempts_counter=attempts_counter,
|
|
432
|
+
)
|
|
250
433
|
|
|
251
434
|
else:
|
|
252
435
|
# This window is meant to be materialized directly from the data source.
|
|
253
|
-
|
|
436
|
+
logger.info(
|
|
254
437
|
f"Materializing {len(item_groups)} item groups in layer {layer_name} via data source"
|
|
255
438
|
)
|
|
256
|
-
|
|
257
|
-
data_source.materialize(
|
|
258
|
-
|
|
259
|
-
|
|
439
|
+
retry(
|
|
440
|
+
fn=lambda: data_source.materialize(
|
|
441
|
+
window, item_groups, layer_name, layer_cfg
|
|
442
|
+
),
|
|
443
|
+
retry_max_attempts=retry_max_attempts,
|
|
444
|
+
retry_backoff=retry_backoff,
|
|
445
|
+
attempts_counter=attempts_counter,
|
|
446
|
+
)
|
|
260
447
|
|
|
448
|
+
return MaterializeWindowLayerSummary(
|
|
449
|
+
skipped=False,
|
|
450
|
+
materialize_attempts=attempts_counter.value,
|
|
451
|
+
)
|
|
261
452
|
|
|
262
|
-
|
|
453
|
+
|
|
454
|
+
def materialize_dataset_windows(
|
|
455
|
+
dataset: Dataset,
|
|
456
|
+
windows: list[Window],
|
|
457
|
+
retry_max_attempts: int = 0,
|
|
458
|
+
retry_backoff: timedelta = timedelta(minutes=1),
|
|
459
|
+
) -> MaterializeDatasetWindowsSummary:
|
|
263
460
|
"""Materialize items for retrieved layers in a dataset.
|
|
264
461
|
|
|
265
462
|
The portions of items corresponding to dataset windows are extracted from the tile
|
|
@@ -268,17 +465,59 @@ def materialize_dataset_windows(dataset: Dataset, windows: list[Window]) -> None
|
|
|
268
465
|
Args:
|
|
269
466
|
dataset: the dataset
|
|
270
467
|
windows: the windows to materialize
|
|
468
|
+
retry_max_attempts: set greater than zero to retry for this many attempts in
|
|
469
|
+
case of error.
|
|
470
|
+
retry_backoff: how long to wait before retrying (see retry).
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
a summary of the materialize operation, fit for telemetry purposes
|
|
271
474
|
"""
|
|
475
|
+
start_time = time.monotonic()
|
|
476
|
+
|
|
477
|
+
layer_summaries: list[MaterializeWindowLayersSummary] = []
|
|
478
|
+
|
|
272
479
|
tile_store = dataset.get_tile_store()
|
|
273
480
|
for layer_name, layer_cfg in dataset.layers.items():
|
|
481
|
+
layer_start_time = time.monotonic()
|
|
482
|
+
|
|
483
|
+
total_materialize_attempts = 0
|
|
484
|
+
total_skipped = 0
|
|
485
|
+
data_source_name = "N/A"
|
|
486
|
+
|
|
274
487
|
if not layer_cfg.data_source:
|
|
275
|
-
|
|
488
|
+
total_skipped = len(windows)
|
|
489
|
+
else:
|
|
490
|
+
data_source_name = layer_cfg.data_source.class_path
|
|
491
|
+
data_source = layer_cfg.instantiate_data_source(dataset.path)
|
|
492
|
+
|
|
493
|
+
for window in windows:
|
|
494
|
+
window_summary = materialize_window(
|
|
495
|
+
window=window,
|
|
496
|
+
dataset=dataset,
|
|
497
|
+
data_source=data_source,
|
|
498
|
+
tile_store=tile_store,
|
|
499
|
+
layer_name=layer_name,
|
|
500
|
+
layer_cfg=layer_cfg,
|
|
501
|
+
retry_max_attempts=retry_max_attempts,
|
|
502
|
+
retry_backoff=retry_backoff,
|
|
503
|
+
)
|
|
504
|
+
total_materialize_attempts += window_summary.materialize_attempts
|
|
505
|
+
if window_summary.skipped:
|
|
506
|
+
total_skipped += 1
|
|
276
507
|
|
|
277
|
-
|
|
278
|
-
|
|
508
|
+
layer_summaries.append(
|
|
509
|
+
MaterializeWindowLayersSummary(
|
|
510
|
+
layer_name=layer_name,
|
|
511
|
+
data_source_name=data_source_name,
|
|
512
|
+
duration_seconds=time.monotonic() - layer_start_time,
|
|
513
|
+
total_windows_requested=len(windows),
|
|
514
|
+
num_windows_materialized=len(windows) - total_skipped,
|
|
515
|
+
materialize_attempts=total_materialize_attempts,
|
|
516
|
+
)
|
|
279
517
|
)
|
|
280
518
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
519
|
+
return MaterializeDatasetWindowsSummary(
|
|
520
|
+
duration_seconds=time.monotonic() - start_time,
|
|
521
|
+
total_windows_requested=len(windows),
|
|
522
|
+
layer_summaries=layer_summaries,
|
|
523
|
+
)
|