sfeos-helpers 6.9.0__py3-none-any.whl → 6.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
1
  """Search engine adapters for different implementations."""
2
2
 
3
3
  import uuid
4
- from typing import Any, Dict
4
+ from typing import Any, Dict, List, Literal
5
5
 
6
+ from stac_fastapi.core.utilities import get_bool_env
6
7
  from stac_fastapi.sfeos_helpers.database import (
7
8
  index_alias_by_collection_id,
8
9
  index_by_collection_id,
@@ -18,6 +19,16 @@ from stac_fastapi.sfeos_helpers.mappings import (
18
19
  class IndexOperations:
19
20
  """Base class for search engine adapters with common implementations."""
20
21
 
22
+ @property
23
+ def use_datetime(self) -> bool:
24
+ """Get USE_DATETIME setting dynamically."""
25
+ return get_bool_env("USE_DATETIME", default=True)
26
+
27
+ @property
28
+ def primary_datetime_name(self) -> str:
29
+ """Get primary datetime field name based on current USE_DATETIME setting."""
30
+ return "datetime" if self.use_datetime else "start_datetime"
31
+
21
32
  async def create_simple_index(self, client: Any, collection_id: str) -> str:
22
33
  """Create a simple index for the given collection.
23
34
 
@@ -45,26 +56,51 @@ class IndexOperations:
45
56
  return index_name
46
57
 
47
58
  async def create_datetime_index(
48
- self, client: Any, collection_id: str, start_date: str
59
+ self,
60
+ client: Any,
61
+ collection_id: str,
62
+ start_datetime: str | None,
63
+ datetime: str | None,
64
+ end_datetime: str | None,
49
65
  ) -> str:
50
66
  """Create a datetime-based index for the given collection.
51
67
 
52
68
  Args:
53
69
  client: Search engine client instance.
54
70
  collection_id (str): Collection identifier.
55
- start_date (str): Start date for the alias.
71
+ start_datetime (str | None): Start datetime for the index alias.
72
+ datetime (str | None): Datetime for the datetime alias.
73
+ end_datetime (str | None): End datetime for the index alias.
56
74
 
57
75
  Returns:
58
- str: Created index alias name.
76
+ str: Created datetime alias name.
59
77
  """
60
78
  index_name = self.create_index_name(collection_id)
61
- alias_name = self.create_alias_name(collection_id, start_date)
62
79
  collection_alias = index_alias_by_collection_id(collection_id)
80
+
81
+ aliases: Dict[str, Any] = {
82
+ collection_alias: {},
83
+ }
84
+
85
+ if start_datetime:
86
+ alias_start_date = self.create_alias_name(
87
+ collection_id, "start_datetime", start_datetime
88
+ )
89
+ alias_end_date = self.create_alias_name(
90
+ collection_id, "end_datetime", end_datetime
91
+ )
92
+ aliases[alias_start_date] = {}
93
+ aliases[alias_end_date] = {}
94
+ created_alias = alias_start_date
95
+ else:
96
+ created_alias = self.create_alias_name(collection_id, "datetime", datetime)
97
+ aliases[created_alias] = {}
98
+
63
99
  await client.indices.create(
64
100
  index=index_name,
65
- body=self._create_index_body({collection_alias: {}, alias_name: {}}),
101
+ body=self._create_index_body(aliases),
66
102
  )
67
- return alias_name
103
+ return created_alias
68
104
 
69
105
  @staticmethod
70
106
  async def update_index_alias(client: Any, end_date: str, old_alias: str) -> str:
@@ -90,23 +126,33 @@ class IndexOperations:
90
126
  return new_alias
91
127
 
92
128
  @staticmethod
93
- async def change_alias_name(client: Any, old_alias: str, new_alias: str) -> None:
94
- """Change alias name from old to new.
129
+ async def change_alias_name(
130
+ client: Any,
131
+ old_start_datetime_alias: str,
132
+ aliases_to_change: List[str],
133
+ aliases_to_create: List[str],
134
+ ) -> None:
135
+ """Change alias names by removing old aliases and adding new ones.
95
136
 
96
137
  Args:
97
138
  client: Search engine client instance.
98
- old_alias (str): Current alias name.
99
- new_alias (str): New alias name.
139
+ old_start_datetime_alias (str): Current start_datetime alias name to identify the index.
140
+ aliases_to_change (List[str]): List of old alias names to remove.
141
+ aliases_to_create (List[str]): List of new alias names to add.
100
142
 
101
143
  Returns:
102
144
  None
103
145
  """
104
- aliases_info = await client.indices.get_alias(name=old_alias)
105
- actions = []
146
+ aliases_info = await client.indices.get_alias(name=old_start_datetime_alias)
147
+ index_name = list(aliases_info.keys())[0]
106
148
 
107
- for index_name in aliases_info.keys():
149
+ actions = []
150
+ for old_alias in aliases_to_change:
108
151
  actions.append({"remove": {"index": index_name, "alias": old_alias}})
152
+
153
+ for new_alias in aliases_to_create:
109
154
  actions.append({"add": {"index": index_name, "alias": new_alias}})
155
+
110
156
  await client.indices.update_aliases(body={"actions": actions})
111
157
 
112
158
  @staticmethod
@@ -123,18 +169,23 @@ class IndexOperations:
123
169
  return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{uuid.uuid4()}"
124
170
 
125
171
  @staticmethod
126
- def create_alias_name(collection_id: str, start_date: str) -> str:
127
- """Create index name from collection ID and uuid4.
172
+ def create_alias_name(
173
+ collection_id: str,
174
+ name: Literal["start_datetime", "datetime", "end_datetime"],
175
+ start_date: str,
176
+ ) -> str:
177
+ """Create alias name from collection ID and date.
128
178
 
129
179
  Args:
130
180
  collection_id (str): Collection identifier.
131
- start_date (str): Start date for the alias.
181
+ name (Literal["start_datetime", "datetime", "end_datetime"]): Type of alias to create.
182
+ start_date (str): Date value for the alias.
132
183
 
133
184
  Returns:
134
- str: Alias name with initial date.
185
+ str: Formatted alias name with prefix, type, collection ID, and date.
135
186
  """
136
187
  cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
137
- return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{start_date}"
188
+ return f"{ITEMS_INDEX_PREFIX}{name}_{cleaned.lower()}_{start_date}"
138
189
 
139
190
  @staticmethod
140
191
  def _create_index_body(aliases: Dict[str, Dict]) -> Dict[str, Any]:
@@ -152,21 +203,25 @@ class IndexOperations:
152
203
  "settings": ES_ITEMS_SETTINGS,
153
204
  }
154
205
 
155
- @staticmethod
156
- async def find_latest_item_in_index(client: Any, index_name: str) -> dict[str, Any]:
157
- """Find the latest item date in the specified index.
206
+ async def find_latest_item_in_index(
207
+ self, client: Any, index_name: str
208
+ ) -> dict[str, Any]:
209
+ """Find the latest item in the specified index.
158
210
 
159
211
  Args:
160
212
  client: Search engine client instance.
161
213
  index_name (str): Name of the index to query.
162
214
 
163
215
  Returns:
164
- datetime: Date of the latest item in the index.
216
+ dict[str, Any]: Latest item document from the index with metadata.
165
217
  """
166
218
  query = {
167
219
  "size": 1,
168
- "sort": [{"properties.datetime": {"order": "desc"}}],
169
- "_source": ["properties.datetime"],
220
+ "sort": [{f"properties.{self.primary_datetime_name}": {"order": "desc"}}],
221
+ "_source": [
222
+ "properties.start_datetime",
223
+ "properties.datetime",
224
+ ],
170
225
  }
171
226
 
172
227
  response = await client.search(index=index_name, body=query)
@@ -1,10 +1,11 @@
1
1
  """Async index insertion strategies."""
2
+
2
3
  import logging
3
- from datetime import timedelta
4
- from typing import Any, Dict, List
4
+ from typing import Any, Dict, List, Optional
5
5
 
6
6
  from fastapi import HTTPException, status
7
7
 
8
+ from stac_fastapi.core.utilities import get_bool_env
8
9
  from stac_fastapi.sfeos_helpers.database import (
9
10
  extract_date,
10
11
  extract_first_date_from_index,
@@ -14,7 +15,7 @@ from stac_fastapi.sfeos_helpers.database import (
14
15
 
15
16
  from .base import BaseIndexInserter
16
17
  from .index_operations import IndexOperations
17
- from .managers import DatetimeIndexManager
18
+ from .managers import DatetimeIndexManager, ProductDatetimes
18
19
  from .selection import DatetimeBasedIndexSelector
19
20
 
20
21
  logger = logging.getLogger(__name__)
@@ -33,6 +34,25 @@ class DatetimeIndexInserter(BaseIndexInserter):
33
34
  self.client = client
34
35
  self.index_operations = index_operations
35
36
  self.datetime_manager = DatetimeIndexManager(client, index_operations)
37
+ self.index_selector = DatetimeBasedIndexSelector(client)
38
+
39
+ @property
40
+ def use_datetime(self) -> bool:
41
+ """Get USE_DATETIME setting dynamically.
42
+
43
+ Returns:
44
+ bool: Current value of USE_DATETIME environment variable.
45
+ """
46
+ return get_bool_env("USE_DATETIME", default=True)
47
+
48
+ @property
49
+ def primary_datetime_name(self) -> str:
50
+ """Get primary datetime field name based on current USE_DATETIME setting.
51
+
52
+ Returns:
53
+ str: "datetime" if USE_DATETIME is True, else "start_datetime".
54
+ """
55
+ return "datetime" if self.use_datetime else "start_datetime"
36
56
 
37
57
  @staticmethod
38
58
  def should_create_collection_index() -> bool:
@@ -55,6 +75,48 @@ class DatetimeIndexInserter(BaseIndexInserter):
55
75
  """
56
76
  return await self.index_operations.create_simple_index(client, collection_id)
57
77
 
78
+ async def refresh_cache(self) -> None:
79
+ """Refresh the index selector cache.
80
+
81
+ This method refreshes the cached index information used for
82
+ datetime-based index selection.
83
+ """
84
+ await self.index_selector.refresh_cache()
85
+
86
+ def validate_datetime_field_update(self, field_path: str) -> None:
87
+ """Validate if a datetime field can be updated.
88
+
89
+ For datetime-based indexing, the primary datetime field cannot be modified
90
+ because it determines the index where the item is stored.
91
+
92
+ When USE_DATETIME=True, 'properties.datetime' is protected.
93
+ When USE_DATETIME=False, 'properties.start_datetime' and 'properties.end_datetime' are protected.
94
+
95
+ Args:
96
+ field_path (str): The path of the field being updated.
97
+ """
98
+ # TODO: In the future, updating these fields will be able to move an item between indices by changing the time-based aliases
99
+ if self.use_datetime:
100
+ if field_path == "properties/datetime":
101
+ raise HTTPException(
102
+ status_code=status.HTTP_400_BAD_REQUEST,
103
+ detail=(
104
+ "Updating 'properties.datetime' is not yet supported for datetime-based indexing. "
105
+ "This feature will be available in a future release, enabling automatic "
106
+ "index and time-based alias updates when datetime values change."
107
+ ),
108
+ )
109
+ else:
110
+ if field_path in ("properties/start_datetime", "properties/end_datetime"):
111
+ raise HTTPException(
112
+ status_code=status.HTTP_400_BAD_REQUEST,
113
+ detail=(
114
+ f"Updating '{field_path}' is not yet supported for datetime-based indexing. "
115
+ "This feature will be available in a future release, enabling automatic "
116
+ "index and time-based alias updates when datetime values change."
117
+ ),
118
+ )
119
+
58
120
  async def get_target_index(
59
121
  self, collection_id: str, product: Dict[str, Any]
60
122
  ) -> str:
@@ -67,9 +129,8 @@ class DatetimeIndexInserter(BaseIndexInserter):
67
129
  Returns:
68
130
  str: Target index name for the product.
69
131
  """
70
- index_selector = DatetimeBasedIndexSelector(self.client)
71
132
  return await self._get_target_index_internal(
72
- index_selector, collection_id, product, check_size=True
133
+ collection_id, product, check_size=True
73
134
  )
74
135
 
75
136
  async def prepare_bulk_actions(
@@ -89,18 +150,12 @@ class DatetimeIndexInserter(BaseIndexInserter):
89
150
  logger.error(msg)
90
151
  raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
91
152
 
92
- items.sort(key=lambda item: item["properties"]["datetime"])
93
- index_selector = DatetimeBasedIndexSelector(self.client)
94
-
95
- await self._ensure_indexes_exist(index_selector, collection_id, items)
96
- await self._check_and_handle_oversized_index(
97
- index_selector, collection_id, items
98
- )
153
+ items.sort(key=lambda item: item["properties"][self.primary_datetime_name])
99
154
 
100
155
  actions = []
101
156
  for item in items:
102
157
  target_index = await self._get_target_index_internal(
103
- index_selector, collection_id, item, check_size=False
158
+ collection_id, item, check_size=True
104
159
  )
105
160
  actions.append(
106
161
  {
@@ -114,15 +169,13 @@ class DatetimeIndexInserter(BaseIndexInserter):
114
169
 
115
170
  async def _get_target_index_internal(
116
171
  self,
117
- index_selector,
118
172
  collection_id: str,
119
173
  product: Dict[str, Any],
120
174
  check_size: bool = True,
121
- ) -> str:
175
+ ) -> Optional[str]:
122
176
  """Get target index with size checking internally.
123
177
 
124
178
  Args:
125
- index_selector: Index selector instance.
126
179
  collection_id (str): Collection identifier.
127
180
  product (Dict[str, Any]): Product data.
128
181
  check_size (bool): Whetheru to check index size limits.
@@ -130,112 +183,137 @@ class DatetimeIndexInserter(BaseIndexInserter):
130
183
  Returns:
131
184
  str: Target index name.
132
185
  """
133
- product_datetime = self.datetime_manager.validate_product_datetime(product)
134
- datetime_range = {"gte": product_datetime, "lte": product_datetime}
135
- target_index = await index_selector.select_indexes(
136
- [collection_id], datetime_range
186
+ product_datetimes = self.datetime_manager.validate_product_datetimes(
187
+ product, self.use_datetime
137
188
  )
138
- all_indexes = await index_selector.get_collection_indexes(collection_id)
189
+ primary_datetime_value = (
190
+ product_datetimes.datetime
191
+ if self.use_datetime
192
+ else product_datetimes.start_datetime
193
+ )
194
+
195
+ all_indexes = await self.index_selector.get_collection_indexes(collection_id)
139
196
 
140
197
  if not all_indexes:
141
198
  target_index = await self.datetime_manager.handle_new_collection(
142
- collection_id, product_datetime
199
+ collection_id, self.primary_datetime_name, product_datetimes
143
200
  )
144
- await index_selector.refresh_cache()
201
+ await self.refresh_cache()
145
202
  return target_index
146
203
 
147
- all_indexes.sort()
148
- start_date = extract_date(product_datetime)
149
- end_date = extract_first_date_from_index(all_indexes[0])
204
+ all_indexes = sorted(
205
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
206
+ )
207
+
208
+ target_index = await self.index_selector.select_indexes(
209
+ [collection_id], primary_datetime_value, for_insertion=True
210
+ )
150
211
 
151
- if start_date < end_date:
152
- alias = await self.datetime_manager.handle_early_date(
153
- collection_id, start_date, end_date
212
+ start_date = extract_date(primary_datetime_value)
213
+ earliest_index_date = extract_first_date_from_index(
214
+ all_indexes[0][0][self.primary_datetime_name]
215
+ )
216
+
217
+ if start_date < earliest_index_date:
218
+ target_index = await self.datetime_manager.handle_early_date(
219
+ collection_id,
220
+ self.primary_datetime_name,
221
+ product_datetimes,
222
+ all_indexes[0][0],
223
+ True,
154
224
  )
155
- await index_selector.refresh_cache()
225
+ await self.refresh_cache()
226
+ return target_index
156
227
 
157
- return alias
228
+ if not target_index:
229
+ target_index = all_indexes[-1][0][self.primary_datetime_name]
158
230
 
159
- if target_index != all_indexes[-1]:
231
+ aliases_dict, is_first_index = self._find_aliases_for_index(
232
+ all_indexes, target_index
233
+ )
234
+
235
+ if target_index != all_indexes[-1][0][self.primary_datetime_name]:
236
+ await self.datetime_manager.handle_early_date(
237
+ collection_id,
238
+ self.primary_datetime_name,
239
+ product_datetimes,
240
+ aliases_dict,
241
+ is_first_index,
242
+ )
243
+ await self.refresh_cache()
160
244
  return target_index
161
245
 
162
246
  if check_size and await self.datetime_manager.size_manager.is_index_oversized(
163
247
  target_index
164
248
  ):
165
- target_index = await self.datetime_manager.handle_oversized_index(
166
- collection_id, target_index, product_datetime
249
+ latest_item = await self.index_operations.find_latest_item_in_index(
250
+ self.client, target_index
251
+ )
252
+ latest_index_datetimes = ProductDatetimes(
253
+ start_datetime=str(
254
+ extract_date(latest_item["_source"]["properties"]["start_datetime"])
255
+ ),
256
+ datetime=str(
257
+ extract_date(latest_item["_source"]["properties"]["datetime"])
258
+ ),
259
+ end_datetime=str(
260
+ extract_date(latest_item["_source"]["properties"]["end_datetime"])
261
+ ),
167
262
  )
168
- await index_selector.refresh_cache()
169
-
170
- return target_index
171
-
172
- async def _ensure_indexes_exist(
173
- self, index_selector, collection_id: str, items: List[Dict[str, Any]]
174
- ):
175
- """Ensure necessary indexes exist for the items.
176
-
177
- Args:
178
- index_selector: Index selector instance.
179
- collection_id (str): Collection identifier.
180
- items (List[Dict[str, Any]]): List of items to process.
181
- """
182
- all_indexes = await index_selector.get_collection_indexes(collection_id)
183
263
 
184
- if not all_indexes:
185
- first_item = items[0]
186
- await self.index_operations.create_datetime_index(
187
- self.client,
264
+ await self.datetime_manager.handle_oversized_index(
188
265
  collection_id,
189
- extract_date(first_item["properties"]["datetime"]),
266
+ self.primary_datetime_name,
267
+ product_datetimes,
268
+ latest_index_datetimes,
269
+ aliases_dict,
270
+ )
271
+ await self.refresh_cache()
272
+ all_indexes = await self.index_selector.get_collection_indexes(
273
+ collection_id
274
+ )
275
+ all_indexes = sorted(
276
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
277
+ )
278
+ return (
279
+ await self.index_selector.select_indexes(
280
+ [collection_id], primary_datetime_value, for_insertion=True
281
+ )
282
+ or all_indexes[-1][0][self.primary_datetime_name]
190
283
  )
191
- await index_selector.refresh_cache()
192
284
 
193
- async def _check_and_handle_oversized_index(
194
- self, index_selector, collection_id: str, items: List[Dict[str, Any]]
195
- ) -> None:
196
- """Check if index is oversized and create new index if needed.
285
+ await self.datetime_manager.handle_early_date(
286
+ collection_id,
287
+ self.primary_datetime_name,
288
+ product_datetimes,
289
+ aliases_dict,
290
+ is_first_index,
291
+ )
292
+ await self.refresh_cache()
293
+ all_indexes = await self.index_selector.get_collection_indexes(collection_id)
294
+ all_indexes = sorted(
295
+ all_indexes, key=lambda x: x[0][self.primary_datetime_name]
296
+ )
297
+ return all_indexes[-1][0][self.primary_datetime_name]
197
298
 
198
- Checks if the index where the first item would be inserted is oversized.
199
- If so, creates a new index starting from the next day.
299
+ @staticmethod
300
+ def _find_aliases_for_index(
301
+ all_indexes: List, target_index: str
302
+ ) -> tuple[Optional[Dict[str, Any]], bool]:
303
+ """Find aliases for a given index.
200
304
 
201
305
  Args:
202
- index_selector: Index selector instance.
203
- collection_id (str): Collection identifier.
204
- items (List[Dict[str, Any]]): List of items to process.
306
+ all_indexes: List of index alias dictionaries.
307
+ target_index: Target index name to find.
205
308
 
206
309
  Returns:
207
- None
310
+ Tuple of (aliases_dict or None, is_first_element).
208
311
  """
209
- first_item = items[0]
210
- first_item_index = await self._get_target_index_internal(
211
- index_selector, collection_id, first_item, check_size=False
212
- )
213
-
214
- all_indexes = await index_selector.get_collection_indexes(collection_id)
215
- all_indexes.sort()
216
- latest_index = all_indexes[-1]
217
-
218
- if first_item_index != latest_index:
219
- return None
220
-
221
- if not await self.datetime_manager.size_manager.is_index_oversized(
222
- first_item_index
223
- ):
224
- return None
225
-
226
- latest_item = await self.index_operations.find_latest_item_in_index(
227
- self.client, latest_index
228
- )
229
- product_datetime = latest_item["_source"]["properties"]["datetime"]
230
- end_date = extract_date(product_datetime)
231
- await self.index_operations.update_index_alias(
232
- self.client, str(end_date), latest_index
233
- )
234
- next_day_start = end_date + timedelta(days=1)
235
- await self.index_operations.create_datetime_index(
236
- self.client, collection_id, str(next_day_start)
237
- )
238
- await index_selector.refresh_cache()
312
+ for idx, item in enumerate(all_indexes):
313
+ aliases_dict = item[0]
314
+ if target_index in aliases_dict.values():
315
+ return aliases_dict, idx == 0
316
+ return None, False
239
317
 
240
318
 
241
319
  class SimpleIndexInserter(BaseIndexInserter):