sfeos-helpers 6.9.0__py3-none-any.whl → 6.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.9.0.dist-info → sfeos_helpers-6.10.1.dist-info}/METADATA +2 -2
- {sfeos_helpers-6.9.0.dist-info → sfeos_helpers-6.10.1.dist-info}/RECORD +19 -18
- stac_fastapi/sfeos_helpers/aggregation/client.py +1 -3
- stac_fastapi/sfeos_helpers/database/__init__.py +32 -2
- stac_fastapi/sfeos_helpers/database/catalogs.py +190 -0
- stac_fastapi/sfeos_helpers/database/datetime.py +54 -1
- stac_fastapi/sfeos_helpers/database/index.py +88 -40
- stac_fastapi/sfeos_helpers/database/query.py +1 -1
- stac_fastapi/sfeos_helpers/database/utils.py +97 -2
- stac_fastapi/sfeos_helpers/mappings.py +2 -2
- stac_fastapi/sfeos_helpers/search_engine/base.py +30 -0
- stac_fastapi/sfeos_helpers/search_engine/index_operations.py +80 -25
- stac_fastapi/sfeos_helpers/search_engine/inserters.py +175 -95
- stac_fastapi/sfeos_helpers/search_engine/managers.py +340 -56
- stac_fastapi/sfeos_helpers/search_engine/selection/base.py +7 -3
- stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +82 -25
- stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +71 -15
- stac_fastapi/sfeos_helpers/version.py +1 -1
- {sfeos_helpers-6.9.0.dist-info → sfeos_helpers-6.10.1.dist-info}/WHEEL +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Async index insertion strategies."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, Dict, List
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
from fastapi import HTTPException, status
|
|
7
7
|
|
|
8
|
+
from stac_fastapi.core.utilities import get_bool_env
|
|
8
9
|
from stac_fastapi.sfeos_helpers.database import (
|
|
9
10
|
extract_date,
|
|
10
11
|
extract_first_date_from_index,
|
|
@@ -14,7 +15,7 @@ from stac_fastapi.sfeos_helpers.database import (
|
|
|
14
15
|
|
|
15
16
|
from .base import BaseIndexInserter
|
|
16
17
|
from .index_operations import IndexOperations
|
|
17
|
-
from .managers import DatetimeIndexManager
|
|
18
|
+
from .managers import DatetimeIndexManager, ProductDatetimes
|
|
18
19
|
from .selection import DatetimeBasedIndexSelector
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
@@ -33,6 +34,25 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
33
34
|
self.client = client
|
|
34
35
|
self.index_operations = index_operations
|
|
35
36
|
self.datetime_manager = DatetimeIndexManager(client, index_operations)
|
|
37
|
+
self.index_selector = DatetimeBasedIndexSelector(client)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def use_datetime(self) -> bool:
|
|
41
|
+
"""Get USE_DATETIME setting dynamically.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
bool: Current value of USE_DATETIME environment variable.
|
|
45
|
+
"""
|
|
46
|
+
return get_bool_env("USE_DATETIME", default=True)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def primary_datetime_name(self) -> str:
|
|
50
|
+
"""Get primary datetime field name based on current USE_DATETIME setting.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str: "datetime" if USE_DATETIME is True, else "start_datetime".
|
|
54
|
+
"""
|
|
55
|
+
return "datetime" if self.use_datetime else "start_datetime"
|
|
36
56
|
|
|
37
57
|
@staticmethod
|
|
38
58
|
def should_create_collection_index() -> bool:
|
|
@@ -55,6 +75,48 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
55
75
|
"""
|
|
56
76
|
return await self.index_operations.create_simple_index(client, collection_id)
|
|
57
77
|
|
|
78
|
+
async def refresh_cache(self) -> None:
|
|
79
|
+
"""Refresh the index selector cache.
|
|
80
|
+
|
|
81
|
+
This method refreshes the cached index information used for
|
|
82
|
+
datetime-based index selection.
|
|
83
|
+
"""
|
|
84
|
+
await self.index_selector.refresh_cache()
|
|
85
|
+
|
|
86
|
+
def validate_datetime_field_update(self, field_path: str) -> None:
|
|
87
|
+
"""Validate if a datetime field can be updated.
|
|
88
|
+
|
|
89
|
+
For datetime-based indexing, the primary datetime field cannot be modified
|
|
90
|
+
because it determines the index where the item is stored.
|
|
91
|
+
|
|
92
|
+
When USE_DATETIME=True, 'properties.datetime' is protected.
|
|
93
|
+
When USE_DATETIME=False, 'properties.start_datetime' and 'properties.end_datetime' are protected.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
field_path (str): The path of the field being updated.
|
|
97
|
+
"""
|
|
98
|
+
# TODO: In the future, updating these fields will be able to move an item between indices by changing the time-based aliases
|
|
99
|
+
if self.use_datetime:
|
|
100
|
+
if field_path == "properties/datetime":
|
|
101
|
+
raise HTTPException(
|
|
102
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
103
|
+
detail=(
|
|
104
|
+
"Updating 'properties.datetime' is not yet supported for datetime-based indexing. "
|
|
105
|
+
"This feature will be available in a future release, enabling automatic "
|
|
106
|
+
"index and time-based alias updates when datetime values change."
|
|
107
|
+
),
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
if field_path in ("properties/start_datetime", "properties/end_datetime"):
|
|
111
|
+
raise HTTPException(
|
|
112
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
113
|
+
detail=(
|
|
114
|
+
f"Updating '{field_path}' is not yet supported for datetime-based indexing. "
|
|
115
|
+
"This feature will be available in a future release, enabling automatic "
|
|
116
|
+
"index and time-based alias updates when datetime values change."
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
|
|
58
120
|
async def get_target_index(
|
|
59
121
|
self, collection_id: str, product: Dict[str, Any]
|
|
60
122
|
) -> str:
|
|
@@ -67,9 +129,8 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
67
129
|
Returns:
|
|
68
130
|
str: Target index name for the product.
|
|
69
131
|
"""
|
|
70
|
-
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
71
132
|
return await self._get_target_index_internal(
|
|
72
|
-
|
|
133
|
+
collection_id, product, check_size=True
|
|
73
134
|
)
|
|
74
135
|
|
|
75
136
|
async def prepare_bulk_actions(
|
|
@@ -89,18 +150,12 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
89
150
|
logger.error(msg)
|
|
90
151
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
|
|
91
152
|
|
|
92
|
-
items.sort(key=lambda item: item["properties"][
|
|
93
|
-
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
94
|
-
|
|
95
|
-
await self._ensure_indexes_exist(index_selector, collection_id, items)
|
|
96
|
-
await self._check_and_handle_oversized_index(
|
|
97
|
-
index_selector, collection_id, items
|
|
98
|
-
)
|
|
153
|
+
items.sort(key=lambda item: item["properties"][self.primary_datetime_name])
|
|
99
154
|
|
|
100
155
|
actions = []
|
|
101
156
|
for item in items:
|
|
102
157
|
target_index = await self._get_target_index_internal(
|
|
103
|
-
|
|
158
|
+
collection_id, item, check_size=True
|
|
104
159
|
)
|
|
105
160
|
actions.append(
|
|
106
161
|
{
|
|
@@ -114,15 +169,13 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
114
169
|
|
|
115
170
|
async def _get_target_index_internal(
|
|
116
171
|
self,
|
|
117
|
-
index_selector,
|
|
118
172
|
collection_id: str,
|
|
119
173
|
product: Dict[str, Any],
|
|
120
174
|
check_size: bool = True,
|
|
121
|
-
) -> str:
|
|
175
|
+
) -> Optional[str]:
|
|
122
176
|
"""Get target index with size checking internally.
|
|
123
177
|
|
|
124
178
|
Args:
|
|
125
|
-
index_selector: Index selector instance.
|
|
126
179
|
collection_id (str): Collection identifier.
|
|
127
180
|
product (Dict[str, Any]): Product data.
|
|
128
181
|
check_size (bool): Whetheru to check index size limits.
|
|
@@ -130,112 +183,139 @@ class DatetimeIndexInserter(BaseIndexInserter):
|
|
|
130
183
|
Returns:
|
|
131
184
|
str: Target index name.
|
|
132
185
|
"""
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
target_index = await index_selector.select_indexes(
|
|
136
|
-
[collection_id], datetime_range
|
|
186
|
+
product_datetimes = self.datetime_manager.validate_product_datetimes(
|
|
187
|
+
product, self.use_datetime
|
|
137
188
|
)
|
|
138
|
-
|
|
189
|
+
primary_datetime_value = (
|
|
190
|
+
product_datetimes.datetime
|
|
191
|
+
if self.use_datetime
|
|
192
|
+
else product_datetimes.start_datetime
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
all_indexes = await self.index_selector.get_collection_indexes(collection_id)
|
|
139
196
|
|
|
140
197
|
if not all_indexes:
|
|
141
198
|
target_index = await self.datetime_manager.handle_new_collection(
|
|
142
|
-
collection_id,
|
|
199
|
+
collection_id, self.primary_datetime_name, product_datetimes
|
|
143
200
|
)
|
|
144
|
-
await
|
|
201
|
+
await self.refresh_cache()
|
|
145
202
|
return target_index
|
|
146
203
|
|
|
147
|
-
all_indexes
|
|
148
|
-
|
|
149
|
-
|
|
204
|
+
all_indexes = sorted(
|
|
205
|
+
all_indexes, key=lambda x: x[0][self.primary_datetime_name]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
target_index = await self.index_selector.select_indexes(
|
|
209
|
+
[collection_id], primary_datetime_value, for_insertion=True
|
|
210
|
+
)
|
|
150
211
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
212
|
+
start_date = extract_date(primary_datetime_value)
|
|
213
|
+
earliest_index_date = extract_first_date_from_index(
|
|
214
|
+
all_indexes[0][0][self.primary_datetime_name]
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if start_date < earliest_index_date:
|
|
218
|
+
target_index = await self.datetime_manager.handle_early_date(
|
|
219
|
+
collection_id,
|
|
220
|
+
self.primary_datetime_name,
|
|
221
|
+
product_datetimes,
|
|
222
|
+
all_indexes[0][0],
|
|
223
|
+
True,
|
|
154
224
|
)
|
|
155
|
-
await
|
|
225
|
+
await self.refresh_cache()
|
|
226
|
+
return target_index
|
|
156
227
|
|
|
157
|
-
|
|
228
|
+
if not target_index:
|
|
229
|
+
target_index = all_indexes[-1][0][self.primary_datetime_name]
|
|
158
230
|
|
|
159
|
-
|
|
231
|
+
aliases_dict, is_first_index = self._find_aliases_for_index(
|
|
232
|
+
all_indexes, target_index
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if target_index != all_indexes[-1][0][self.primary_datetime_name]:
|
|
236
|
+
await self.datetime_manager.handle_early_date(
|
|
237
|
+
collection_id,
|
|
238
|
+
self.primary_datetime_name,
|
|
239
|
+
product_datetimes,
|
|
240
|
+
aliases_dict,
|
|
241
|
+
is_first_index,
|
|
242
|
+
)
|
|
243
|
+
await self.refresh_cache()
|
|
160
244
|
return target_index
|
|
161
245
|
|
|
162
246
|
if check_size and await self.datetime_manager.size_manager.is_index_oversized(
|
|
163
247
|
target_index
|
|
164
248
|
):
|
|
165
|
-
|
|
166
|
-
|
|
249
|
+
latest_item = await self.index_operations.find_latest_item_in_index(
|
|
250
|
+
self.client, target_index
|
|
251
|
+
)
|
|
252
|
+
latest_index_datetimes = ProductDatetimes(
|
|
253
|
+
start_datetime=str(
|
|
254
|
+
extract_date(latest_item["_source"]["properties"]["start_datetime"])
|
|
255
|
+
),
|
|
256
|
+
datetime=str(
|
|
257
|
+
extract_date(latest_item["_source"]["properties"]["datetime"])
|
|
258
|
+
),
|
|
259
|
+
end_datetime=str(
|
|
260
|
+
extract_first_date_from_index(aliases_dict["end_datetime"])
|
|
261
|
+
)
|
|
262
|
+
if aliases_dict.get("end_datetime")
|
|
263
|
+
else None,
|
|
167
264
|
)
|
|
168
|
-
await index_selector.refresh_cache()
|
|
169
|
-
|
|
170
|
-
return target_index
|
|
171
|
-
|
|
172
|
-
async def _ensure_indexes_exist(
|
|
173
|
-
self, index_selector, collection_id: str, items: List[Dict[str, Any]]
|
|
174
|
-
):
|
|
175
|
-
"""Ensure necessary indexes exist for the items.
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
index_selector: Index selector instance.
|
|
179
|
-
collection_id (str): Collection identifier.
|
|
180
|
-
items (List[Dict[str, Any]]): List of items to process.
|
|
181
|
-
"""
|
|
182
|
-
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
183
265
|
|
|
184
|
-
|
|
185
|
-
first_item = items[0]
|
|
186
|
-
await self.index_operations.create_datetime_index(
|
|
187
|
-
self.client,
|
|
266
|
+
await self.datetime_manager.handle_oversized_index(
|
|
188
267
|
collection_id,
|
|
189
|
-
|
|
268
|
+
self.primary_datetime_name,
|
|
269
|
+
product_datetimes,
|
|
270
|
+
latest_index_datetimes,
|
|
271
|
+
aliases_dict,
|
|
272
|
+
)
|
|
273
|
+
await self.refresh_cache()
|
|
274
|
+
all_indexes = await self.index_selector.get_collection_indexes(
|
|
275
|
+
collection_id
|
|
276
|
+
)
|
|
277
|
+
all_indexes = sorted(
|
|
278
|
+
all_indexes, key=lambda x: x[0][self.primary_datetime_name]
|
|
279
|
+
)
|
|
280
|
+
return (
|
|
281
|
+
await self.index_selector.select_indexes(
|
|
282
|
+
[collection_id], primary_datetime_value, for_insertion=True
|
|
283
|
+
)
|
|
284
|
+
or all_indexes[-1][0][self.primary_datetime_name]
|
|
190
285
|
)
|
|
191
|
-
await index_selector.refresh_cache()
|
|
192
286
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
287
|
+
await self.datetime_manager.handle_early_date(
|
|
288
|
+
collection_id,
|
|
289
|
+
self.primary_datetime_name,
|
|
290
|
+
product_datetimes,
|
|
291
|
+
aliases_dict,
|
|
292
|
+
is_first_index,
|
|
293
|
+
)
|
|
294
|
+
await self.refresh_cache()
|
|
295
|
+
all_indexes = await self.index_selector.get_collection_indexes(collection_id)
|
|
296
|
+
all_indexes = sorted(
|
|
297
|
+
all_indexes, key=lambda x: x[0][self.primary_datetime_name]
|
|
298
|
+
)
|
|
299
|
+
return all_indexes[-1][0][self.primary_datetime_name]
|
|
197
300
|
|
|
198
|
-
|
|
199
|
-
|
|
301
|
+
@staticmethod
|
|
302
|
+
def _find_aliases_for_index(
|
|
303
|
+
all_indexes: List, target_index: str
|
|
304
|
+
) -> tuple[Optional[Dict[str, Any]], bool]:
|
|
305
|
+
"""Find aliases for a given index.
|
|
200
306
|
|
|
201
307
|
Args:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
items (List[Dict[str, Any]]): List of items to process.
|
|
308
|
+
all_indexes: List of index alias dictionaries.
|
|
309
|
+
target_index: Target index name to find.
|
|
205
310
|
|
|
206
311
|
Returns:
|
|
207
|
-
None
|
|
312
|
+
Tuple of (aliases_dict or None, is_first_element).
|
|
208
313
|
"""
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
215
|
-
all_indexes.sort()
|
|
216
|
-
latest_index = all_indexes[-1]
|
|
217
|
-
|
|
218
|
-
if first_item_index != latest_index:
|
|
219
|
-
return None
|
|
220
|
-
|
|
221
|
-
if not await self.datetime_manager.size_manager.is_index_oversized(
|
|
222
|
-
first_item_index
|
|
223
|
-
):
|
|
224
|
-
return None
|
|
225
|
-
|
|
226
|
-
latest_item = await self.index_operations.find_latest_item_in_index(
|
|
227
|
-
self.client, latest_index
|
|
228
|
-
)
|
|
229
|
-
product_datetime = latest_item["_source"]["properties"]["datetime"]
|
|
230
|
-
end_date = extract_date(product_datetime)
|
|
231
|
-
await self.index_operations.update_index_alias(
|
|
232
|
-
self.client, str(end_date), latest_index
|
|
233
|
-
)
|
|
234
|
-
next_day_start = end_date + timedelta(days=1)
|
|
235
|
-
await self.index_operations.create_datetime_index(
|
|
236
|
-
self.client, collection_id, str(next_day_start)
|
|
237
|
-
)
|
|
238
|
-
await index_selector.refresh_cache()
|
|
314
|
+
for idx, item in enumerate(all_indexes):
|
|
315
|
+
aliases_dict = item[0]
|
|
316
|
+
if target_index in aliases_dict.values():
|
|
317
|
+
return aliases_dict, idx == 0
|
|
318
|
+
return None, False
|
|
239
319
|
|
|
240
320
|
|
|
241
321
|
class SimpleIndexInserter(BaseIndexInserter):
|