sfeos-helpers 6.8.1__py3-none-any.whl → 6.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  """Async index insertion strategies."""
2
+
2
3
  import logging
3
- from datetime import timedelta
4
- from typing import Any, Dict, List
4
+ from typing import Any, Dict, List, Optional
5
5
 
6
6
  from fastapi import HTTPException, status
7
7
 
8
+ from stac_fastapi.core.utilities import get_bool_env
8
9
  from stac_fastapi.sfeos_helpers.database import (
9
10
  extract_date,
10
11
  extract_first_date_from_index,
@@ -14,7 +15,7 @@ from stac_fastapi.sfeos_helpers.database import (
14
15
 
15
16
  from .base import BaseIndexInserter
16
17
  from .index_operations import IndexOperations
17
- from .managers import DatetimeIndexManager
18
+ from .managers import DatetimeIndexManager, ProductDatetimes
18
19
  from .selection import DatetimeBasedIndexSelector
19
20
 
20
21
  logger = logging.getLogger(__name__)
@@ -33,6 +34,25 @@ class DatetimeIndexInserter(BaseIndexInserter):
33
34
  self.client = client
34
35
  self.index_operations = index_operations
35
36
  self.datetime_manager = DatetimeIndexManager(client, index_operations)
37
+ self.index_selector = DatetimeBasedIndexSelector(client)
38
+
39
+ @property
40
+ def use_datetime(self) -> bool:
41
+ """Get USE_DATETIME setting dynamically.
42
+
43
+ Returns:
44
+ bool: Current value of USE_DATETIME environment variable.
45
+ """
46
+ return get_bool_env("USE_DATETIME", default=True)
47
+
48
+ @property
49
+ def primary_datetime_name(self) -> str:
50
+ """Get primary datetime field name based on current USE_DATETIME setting.
51
+
52
+ Returns:
53
+ str: "datetime" if USE_DATETIME is True, else "start_datetime".
54
+ """
55
+ return "datetime" if self.use_datetime else "start_datetime"
36
56
 
37
57
  @staticmethod
38
58
  def should_create_collection_index() -> bool:
@@ -55,6 +75,48 @@ class DatetimeIndexInserter(BaseIndexInserter):
55
75
  """
56
76
  return await self.index_operations.create_simple_index(client, collection_id)
57
77
 
78
+ async def refresh_cache(self) -> None:
79
+ """Refresh the index selector cache.
80
+
81
+ This method refreshes the cached index information used for
82
+ datetime-based index selection.
83
+ """
84
+ await self.index_selector.refresh_cache()
85
+
86
+ def validate_datetime_field_update(self, field_path: str) -> None:
87
+ """Validate if a datetime field can be updated.
88
+
89
+ For datetime-based indexing, the primary datetime field cannot be modified
90
+ because it determines the index where the item is stored.
91
+
92
+ When USE_DATETIME=True, 'properties.datetime' is protected.
93
+ When USE_DATETIME=False, 'properties.start_datetime' and 'properties.end_datetime' are protected.
94
+
95
+ Args:
96
+ field_path (str): The path of the field being updated.
97
+ """
98
+ # TODO: In the future, updating these fields will be able to move an item between indices by changing the time-based aliases
99
+ if self.use_datetime:
100
+ if field_path == "properties/datetime":
101
+ raise HTTPException(
102
+ status_code=status.HTTP_400_BAD_REQUEST,
103
+ detail=(
104
+ "Updating 'properties.datetime' is not yet supported for datetime-based indexing. "
105
+ "This feature will be available in a future release, enabling automatic "
106
+ "index and time-based alias updates when datetime values change."
107
+ ),
108
+ )
109
+ else:
110
+ if field_path in ("properties/start_datetime", "properties/end_datetime"):
111
+ raise HTTPException(
112
+ status_code=status.HTTP_400_BAD_REQUEST,
113
+ detail=(
114
+ f"Updating '{field_path}' is not yet supported for datetime-based indexing. "
115
+ "This feature will be available in a future release, enabling automatic "
116
+ "index and time-based alias updates when datetime values change."
117
+ ),
118
+ )
119
+
58
120
  async def get_target_index(
59
121
  self, collection_id: str, product: Dict[str, Any]
60
122
  ) -> str:
@@ -67,9 +129,8 @@ class DatetimeIndexInserter(BaseIndexInserter):
67
129
  Returns:
68
130
  str: Target index name for the product.
69
131
  """
70
- index_selector = DatetimeBasedIndexSelector(self.client)
71
132
  return await self._get_target_index_internal(
72
- index_selector, collection_id, product, check_size=True
133
+ collection_id, product, check_size=True
73
134
  )
74
135
 
75
136
  async def prepare_bulk_actions(
@@ -89,18 +150,12 @@ class DatetimeIndexInserter(BaseIndexInserter):
89
150
  logger.error(msg)
90
151
  raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
91
152
 
92
- items.sort(key=lambda item: item["properties"]["datetime"])
93
- index_selector = DatetimeBasedIndexSelector(self.client)
94
-
95
- await self._ensure_indexes_exist(index_selector, collection_id, items)
96
- await self._check_and_handle_oversized_index(
97
- index_selector, collection_id, items
98
- )
153
+ items.sort(key=lambda item: item["properties"][self.primary_datetime_name])
99
154
 
100
155
  actions = []
101
156
  for item in items:
102
157
  target_index = await self._get_target_index_internal(
103
- index_selector, collection_id, item, check_size=False
158
+ collection_id, item, check_size=True
104
159
  )
105
160
  actions.append(
106
161
  {
@@ -114,15 +169,13 @@ class DatetimeIndexInserter(BaseIndexInserter):
114
169
 
115
170
  async def _get_target_index_internal(
116
171
  self,
117
- index_selector,
118
172
  collection_id: str,
119
173
  product: Dict[str, Any],
120
174
  check_size: bool = True,
121
- ) -> str:
175
+ ) -> Optional[str]:
122
176
  """Get target index with size checking internally.
123
177
 
124
178
  Args:
125
- index_selector: Index selector instance.
126
179
  collection_id (str): Collection identifier.
127
180
  product (Dict[str, Any]): Product data.
128
181
  check_size (bool): Whetheru to check index size limits.
@@ -130,112 +183,137 @@ class DatetimeIndexInserter(BaseIndexInserter):
130
183
  Returns:
131
184
  str: Target index name.
132
185
  """
133
- product_datetime = self.datetime_manager.validate_product_datetime(product)
134
- datetime_range = {"gte": product_datetime, "lte": product_datetime}
135
- target_index = await index_selector.select_indexes(
136
- [collection_id], datetime_range
186
+ product_datetimes = self.datetime_manager.validate_product_datetimes(
187
+ product, self.use_datetime
137
188
  )
138
- all_indexes = await index_selector.get_collection_indexes(collection_id)
189
+ primary_datetime_value = (
190
+ product_datetimes.datetime
191
+ if self.use_datetime
192
+ else product_datetimes.start_datetime
193
+ )
194
+
195
+ all_indexes = await self.index_selector.get_collection_indexes(collection_id)
139
196
 
140
197
  if not all_indexes:
141
198
  target_index = await self.datetime_manager.handle_new_collection(
142
- collection_id, product_datetime
199
+ collection_id, self.primary_datetime_name, product_datetimes
143
200
  )
144
- await index_selector.refresh_cache()
201
+ await self.refresh_cache()
145
202
  return target_index
146
203
 
147
- all_indexes.sort()
148
- start_date = extract_date(product_datetime)
149
- end_date = extract_first_date_from_index(all_indexes[0])
204
+ all_indexes = sorted(
205
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
206
+ )
207
+
208
+ target_index = await self.index_selector.select_indexes(
209
+ [collection_id], primary_datetime_value, for_insertion=True
210
+ )
150
211
 
151
- if start_date < end_date:
152
- alias = await self.datetime_manager.handle_early_date(
153
- collection_id, start_date, end_date
212
+ start_date = extract_date(primary_datetime_value)
213
+ earliest_index_date = extract_first_date_from_index(
214
+ all_indexes[0][0][self.primary_datetime_name]
215
+ )
216
+
217
+ if start_date < earliest_index_date:
218
+ target_index = await self.datetime_manager.handle_early_date(
219
+ collection_id,
220
+ self.primary_datetime_name,
221
+ product_datetimes,
222
+ all_indexes[0][0],
223
+ True,
154
224
  )
155
- await index_selector.refresh_cache()
225
+ await self.refresh_cache()
226
+ return target_index
156
227
 
157
- return alias
228
+ if not target_index:
229
+ target_index = all_indexes[-1][0][self.primary_datetime_name]
158
230
 
159
- if target_index != all_indexes[-1]:
231
+ aliases_dict, is_first_index = self._find_aliases_for_index(
232
+ all_indexes, target_index
233
+ )
234
+
235
+ if target_index != all_indexes[-1][0][self.primary_datetime_name]:
236
+ await self.datetime_manager.handle_early_date(
237
+ collection_id,
238
+ self.primary_datetime_name,
239
+ product_datetimes,
240
+ aliases_dict,
241
+ is_first_index,
242
+ )
243
+ await self.refresh_cache()
160
244
  return target_index
161
245
 
162
246
  if check_size and await self.datetime_manager.size_manager.is_index_oversized(
163
247
  target_index
164
248
  ):
165
- target_index = await self.datetime_manager.handle_oversized_index(
166
- collection_id, target_index, product_datetime
249
+ latest_item = await self.index_operations.find_latest_item_in_index(
250
+ self.client, target_index
251
+ )
252
+ latest_index_datetimes = ProductDatetimes(
253
+ start_datetime=str(
254
+ extract_date(latest_item["_source"]["properties"]["start_datetime"])
255
+ ),
256
+ datetime=str(
257
+ extract_date(latest_item["_source"]["properties"]["datetime"])
258
+ ),
259
+ end_datetime=str(
260
+ extract_date(latest_item["_source"]["properties"]["end_datetime"])
261
+ ),
167
262
  )
168
- await index_selector.refresh_cache()
169
-
170
- return target_index
171
-
172
- async def _ensure_indexes_exist(
173
- self, index_selector, collection_id: str, items: List[Dict[str, Any]]
174
- ):
175
- """Ensure necessary indexes exist for the items.
176
-
177
- Args:
178
- index_selector: Index selector instance.
179
- collection_id (str): Collection identifier.
180
- items (List[Dict[str, Any]]): List of items to process.
181
- """
182
- all_indexes = await index_selector.get_collection_indexes(collection_id)
183
263
 
184
- if not all_indexes:
185
- first_item = items[0]
186
- await self.index_operations.create_datetime_index(
187
- self.client,
264
+ await self.datetime_manager.handle_oversized_index(
188
265
  collection_id,
189
- extract_date(first_item["properties"]["datetime"]),
266
+ self.primary_datetime_name,
267
+ product_datetimes,
268
+ latest_index_datetimes,
269
+ aliases_dict,
270
+ )
271
+ await self.refresh_cache()
272
+ all_indexes = await self.index_selector.get_collection_indexes(
273
+ collection_id
274
+ )
275
+ all_indexes = sorted(
276
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
277
+ )
278
+ return (
279
+ await self.index_selector.select_indexes(
280
+ [collection_id], primary_datetime_value, for_insertion=True
281
+ )
282
+ or all_indexes[-1][0][self.primary_datetime_name]
190
283
  )
191
- await index_selector.refresh_cache()
192
284
 
193
- async def _check_and_handle_oversized_index(
194
- self, index_selector, collection_id: str, items: List[Dict[str, Any]]
195
- ) -> None:
196
- """Check if index is oversized and create new index if needed.
285
+ await self.datetime_manager.handle_early_date(
286
+ collection_id,
287
+ self.primary_datetime_name,
288
+ product_datetimes,
289
+ aliases_dict,
290
+ is_first_index,
291
+ )
292
+ await self.refresh_cache()
293
+ all_indexes = await self.index_selector.get_collection_indexes(collection_id)
294
+ all_indexes = sorted(
295
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
296
+ )
297
+ return all_indexes[-1][0][self.primary_datetime_name]
197
298
 
198
- Checks if the index where the first item would be inserted is oversized.
199
- If so, creates a new index starting from the next day.
299
+ @staticmethod
300
+ def _find_aliases_for_index(
301
+ all_indexes: List, target_index: str
302
+ ) -> tuple[Optional[Dict[str, Any]], bool]:
303
+ """Find aliases for a given index.
200
304
 
201
305
  Args:
202
- index_selector: Index selector instance.
203
- collection_id (str): Collection identifier.
204
- items (List[Dict[str, Any]]): List of items to process.
306
+ all_indexes: List of index alias dictionaries.
307
+ target_index: Target index name to find.
205
308
 
206
309
  Returns:
207
- None
310
+ Tuple of (aliases_dict or None, is_first_element).
208
311
  """
209
- first_item = items[0]
210
- first_item_index = await self._get_target_index_internal(
211
- index_selector, collection_id, first_item, check_size=False
212
- )
213
-
214
- all_indexes = await index_selector.get_collection_indexes(collection_id)
215
- all_indexes.sort()
216
- latest_index = all_indexes[-1]
217
-
218
- if first_item_index != latest_index:
219
- return None
220
-
221
- if not await self.datetime_manager.size_manager.is_index_oversized(
222
- first_item_index
223
- ):
224
- return None
225
-
226
- latest_item = await self.index_operations.find_latest_item_in_index(
227
- self.client, latest_index
228
- )
229
- product_datetime = latest_item["_source"]["properties"]["datetime"]
230
- end_date = extract_date(product_datetime)
231
- await self.index_operations.update_index_alias(
232
- self.client, str(end_date), latest_index
233
- )
234
- next_day_start = end_date + timedelta(days=1)
235
- await self.index_operations.create_datetime_index(
236
- self.client, collection_id, str(next_day_start)
237
- )
238
- await index_selector.refresh_cache()
312
+ for idx, item in enumerate(all_indexes):
313
+ aliases_dict = item[0]
314
+ if target_index in aliases_dict.values():
315
+ return aliases_dict, idx == 0
316
+ return None, False
239
317
 
240
318
 
241
319
  class SimpleIndexInserter(BaseIndexInserter):