sfeos-helpers 6.0.0__py3-none-any.whl → 6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ """Search engine adapters for different implementations."""
2
+
3
+ import uuid
4
+ from typing import Any, Dict
5
+
6
+ from stac_fastapi.sfeos_helpers.database import (
7
+ index_alias_by_collection_id,
8
+ index_by_collection_id,
9
+ )
10
+ from stac_fastapi.sfeos_helpers.mappings import (
11
+ _ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
12
+ ES_ITEMS_MAPPINGS,
13
+ ES_ITEMS_SETTINGS,
14
+ ITEMS_INDEX_PREFIX,
15
+ )
16
+
17
+
18
+ class IndexOperations:
19
+ """Base class for search engine adapters with common implementations."""
20
+
21
+ async def create_simple_index(self, client: Any, collection_id: str) -> str:
22
+ """Create a simple index for the given collection.
23
+
24
+ Args:
25
+ client: Search engine client instance.
26
+ collection_id (str): Collection identifier.
27
+
28
+ Returns:
29
+ str: Created index name.
30
+ """
31
+ index_name = f"{index_by_collection_id(collection_id)}-000001"
32
+ alias_name = index_alias_by_collection_id(collection_id)
33
+
34
+ await client.indices.create(
35
+ index=index_name,
36
+ body=self._create_index_body({alias_name: {}}),
37
+ params={"ignore": [400]},
38
+ )
39
+ return index_name
40
+
41
+ async def create_datetime_index(
42
+ self, client: Any, collection_id: str, start_date: str
43
+ ) -> str:
44
+ """Create a datetime-based index for the given collection.
45
+
46
+ Args:
47
+ client: Search engine client instance.
48
+ collection_id (str): Collection identifier.
49
+ start_date (str): Start date for the alias.
50
+
51
+ Returns:
52
+ str: Created index alias name.
53
+ """
54
+ index_name = self.create_index_name(collection_id)
55
+ alias_name = self.create_alias_name(collection_id, start_date)
56
+ collection_alias = index_alias_by_collection_id(collection_id)
57
+ await client.indices.create(
58
+ index=index_name,
59
+ body=self._create_index_body({collection_alias: {}, alias_name: {}}),
60
+ )
61
+ return alias_name
62
+
63
+ @staticmethod
64
+ async def update_index_alias(client: Any, end_date: str, old_alias: str) -> str:
65
+ """Update index alias with new end date.
66
+
67
+ Args:
68
+ client: Search engine client instance.
69
+ end_date (str): End date for the alias.
70
+ old_alias (str): Current alias name.
71
+
72
+ Returns:
73
+ str: New alias name.
74
+ """
75
+ new_alias = f"{old_alias}-{end_date}"
76
+ aliases_info = await client.indices.get_alias(name=old_alias)
77
+ actions = []
78
+
79
+ for index_name in aliases_info.keys():
80
+ actions.append({"remove": {"index": index_name, "alias": old_alias}})
81
+ actions.append({"add": {"index": index_name, "alias": new_alias}})
82
+
83
+ await client.indices.update_aliases(body={"actions": actions})
84
+ return new_alias
85
+
86
+ @staticmethod
87
+ async def change_alias_name(client: Any, old_alias: str, new_alias: str) -> None:
88
+ """Change alias name from old to new.
89
+
90
+ Args:
91
+ client: Search engine client instance.
92
+ old_alias (str): Current alias name.
93
+ new_alias (str): New alias name.
94
+
95
+ Returns:
96
+ None
97
+ """
98
+ aliases_info = await client.indices.get_alias(name=old_alias)
99
+ actions = []
100
+
101
+ for index_name in aliases_info.keys():
102
+ actions.append({"remove": {"index": index_name, "alias": old_alias}})
103
+ actions.append({"add": {"index": index_name, "alias": new_alias}})
104
+ await client.indices.update_aliases(body={"actions": actions})
105
+
106
+ @staticmethod
107
+ def create_index_name(collection_id: str) -> str:
108
+ """Create index name from collection ID and uuid4.
109
+
110
+ Args:
111
+ collection_id (str): Collection identifier.
112
+
113
+ Returns:
114
+ str: Formatted index name.
115
+ """
116
+ cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
117
+ return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{uuid.uuid4()}"
118
+
119
+ @staticmethod
120
+ def create_alias_name(collection_id: str, start_date: str) -> str:
121
+ """Create index name from collection ID and uuid4.
122
+
123
+ Args:
124
+ collection_id (str): Collection identifier.
125
+ start_date (str): Start date for the alias.
126
+
127
+ Returns:
128
+ str: Alias name with initial date.
129
+ """
130
+ cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
131
+ return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{start_date}"
132
+
133
+ @staticmethod
134
+ def _create_index_body(aliases: Dict[str, Dict]) -> Dict[str, Any]:
135
+ """Create index body with common settings.
136
+
137
+ Args:
138
+ aliases (Dict[str, Dict]): Aliases configuration.
139
+
140
+ Returns:
141
+ Dict[str, Any]: Index body configuration.
142
+ """
143
+ return {
144
+ "aliases": aliases,
145
+ "mappings": ES_ITEMS_MAPPINGS,
146
+ "settings": ES_ITEMS_SETTINGS,
147
+ }
148
+
149
+ @staticmethod
150
+ async def find_latest_item_in_index(client: Any, index_name: str) -> dict[str, Any]:
151
+ """Find the latest item date in the specified index.
152
+
153
+ Args:
154
+ client: Search engine client instance.
155
+ index_name (str): Name of the index to query.
156
+
157
+ Returns:
158
+ datetime: Date of the latest item in the index.
159
+ """
160
+ query = {
161
+ "size": 1,
162
+ "sort": [{"properties.datetime": {"order": "desc"}}],
163
+ "_source": ["properties.datetime"],
164
+ }
165
+
166
+ response = await client.search(index=index_name, body=query)
167
+ return response["hits"]["hits"][0]
@@ -0,0 +1,309 @@
1
+ """Async index insertion strategies."""
2
+ import logging
3
+ from datetime import timedelta
4
+ from typing import Any, Dict, List
5
+
6
+ from fastapi import HTTPException, status
7
+
8
+ from stac_fastapi.sfeos_helpers.database import (
9
+ extract_date,
10
+ extract_first_date_from_index,
11
+ index_alias_by_collection_id,
12
+ mk_item_id,
13
+ )
14
+
15
+ from .base import BaseIndexInserter
16
+ from .index_operations import IndexOperations
17
+ from .managers import DatetimeIndexManager
18
+ from .selection import DatetimeBasedIndexSelector
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class DatetimeIndexInserter(BaseIndexInserter):
24
+ """Async datetime-based index insertion strategy."""
25
+
26
+ def __init__(self, client: Any, index_operations: IndexOperations):
27
+ """Initialize the async datetime index inserter.
28
+
29
+ Args:
30
+ client: Async search engine client instance.
31
+ index_operations (IndexOperations): Search engine adapter instance.
32
+ """
33
+ self.client = client
34
+ self.index_operations = index_operations
35
+ self.datetime_manager = DatetimeIndexManager(client, index_operations)
36
+
37
+ @staticmethod
38
+ def should_create_collection_index() -> bool:
39
+ """Whether this strategy requires collection index creation.
40
+
41
+ Returns:
42
+ bool: False, as datetime strategy doesn't create collection indexes.
43
+ """
44
+ return False
45
+
46
+ async def create_simple_index(self, client: Any, collection_id: str) -> str:
47
+ """Create a simple index asynchronously.
48
+
49
+ Args:
50
+ client: Search engine client instance.
51
+ collection_id (str): Collection identifier.
52
+
53
+ Returns:
54
+ str: Created index name.
55
+ """
56
+ return await self.index_operations.create_simple_index(client, collection_id)
57
+
58
+ async def get_target_index(
59
+ self, collection_id: str, product: Dict[str, Any]
60
+ ) -> str:
61
+ """Get target index for a single product.
62
+
63
+ Args:
64
+ collection_id (str): Collection identifier.
65
+ product (Dict[str, Any]): Product data containing datetime information.
66
+
67
+ Returns:
68
+ str: Target index name for the product.
69
+ """
70
+ index_selector = DatetimeBasedIndexSelector(self.client)
71
+ return await self._get_target_index_internal(
72
+ index_selector, collection_id, product, check_size=True
73
+ )
74
+
75
+ async def prepare_bulk_actions(
76
+ self, collection_id: str, items: List[Dict[str, Any]]
77
+ ) -> List[Dict[str, Any]]:
78
+ """Prepare bulk actions for multiple items.
79
+
80
+ Args:
81
+ collection_id (str): Collection identifier.
82
+ items (List[Dict[str, Any]]): List of items to process.
83
+
84
+ Returns:
85
+ List[Dict[str, Any]]: List of bulk actions ready for execution.
86
+ """
87
+ if not items:
88
+ msg = "The product list cannot be empty."
89
+ logger.error(msg)
90
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
91
+
92
+ items.sort(key=lambda item: item["properties"]["datetime"])
93
+ index_selector = DatetimeBasedIndexSelector(self.client)
94
+
95
+ await self._ensure_indexes_exist(index_selector, collection_id, items)
96
+ await self._check_and_handle_oversized_index(
97
+ index_selector, collection_id, items
98
+ )
99
+
100
+ actions = []
101
+ for item in items:
102
+ target_index = await self._get_target_index_internal(
103
+ index_selector, collection_id, item, check_size=False
104
+ )
105
+ actions.append(
106
+ {
107
+ "_index": target_index,
108
+ "_id": mk_item_id(item["id"], item["collection"]),
109
+ "_source": item,
110
+ }
111
+ )
112
+
113
+ return actions
114
+
115
+ async def _get_target_index_internal(
116
+ self,
117
+ index_selector,
118
+ collection_id: str,
119
+ product: Dict[str, Any],
120
+ check_size: bool = True,
121
+ ) -> str:
122
+ """Get target index with size checking internally.
123
+
124
+ Args:
125
+ index_selector: Index selector instance.
126
+ collection_id (str): Collection identifier.
127
+ product (Dict[str, Any]): Product data.
128
+ check_size (bool): Whetheru to check index size limits.
129
+
130
+ Returns:
131
+ str: Target index name.
132
+ """
133
+ product_datetime = self.datetime_manager.validate_product_datetime(product)
134
+ datetime_range = {"gte": product_datetime, "lte": product_datetime}
135
+ target_index = await index_selector.select_indexes(
136
+ [collection_id], datetime_range
137
+ )
138
+ all_indexes = await index_selector.get_collection_indexes(collection_id)
139
+
140
+ if not all_indexes:
141
+ target_index = await self.datetime_manager.handle_new_collection(
142
+ collection_id, product_datetime
143
+ )
144
+ await index_selector.refresh_cache()
145
+ return target_index
146
+
147
+ all_indexes.sort()
148
+ start_date = extract_date(product_datetime)
149
+ end_date = extract_first_date_from_index(all_indexes[0])
150
+
151
+ if start_date < end_date:
152
+ alias = await self.datetime_manager.handle_early_date(
153
+ collection_id, start_date, end_date
154
+ )
155
+ await index_selector.refresh_cache()
156
+
157
+ return alias
158
+
159
+ if target_index != all_indexes[-1]:
160
+ return target_index
161
+
162
+ if check_size and await self.datetime_manager.size_manager.is_index_oversized(
163
+ target_index
164
+ ):
165
+ target_index = await self.datetime_manager.handle_oversized_index(
166
+ collection_id, target_index, product_datetime
167
+ )
168
+ await index_selector.refresh_cache()
169
+
170
+ return target_index
171
+
172
+ async def _ensure_indexes_exist(
173
+ self, index_selector, collection_id: str, items: List[Dict[str, Any]]
174
+ ):
175
+ """Ensure necessary indexes exist for the items.
176
+
177
+ Args:
178
+ index_selector: Index selector instance.
179
+ collection_id (str): Collection identifier.
180
+ items (List[Dict[str, Any]]): List of items to process.
181
+ """
182
+ all_indexes = await index_selector.get_collection_indexes(collection_id)
183
+
184
+ if not all_indexes:
185
+ first_item = items[0]
186
+ await self.index_operations.create_datetime_index(
187
+ self.client,
188
+ collection_id,
189
+ extract_date(first_item["properties"]["datetime"]),
190
+ )
191
+ await index_selector.refresh_cache()
192
+
193
+ async def _check_and_handle_oversized_index(
194
+ self, index_selector, collection_id: str, items: List[Dict[str, Any]]
195
+ ) -> None:
196
+ """Check if index is oversized and create new index if needed.
197
+
198
+ Checks if the index where the first item would be inserted is oversized.
199
+ If so, creates a new index starting from the next day.
200
+
201
+ Args:
202
+ index_selector: Index selector instance.
203
+ collection_id (str): Collection identifier.
204
+ items (List[Dict[str, Any]]): List of items to process.
205
+
206
+ Returns:
207
+ None
208
+ """
209
+ first_item = items[0]
210
+ first_item_index = await self._get_target_index_internal(
211
+ index_selector, collection_id, first_item, check_size=False
212
+ )
213
+
214
+ all_indexes = await index_selector.get_collection_indexes(collection_id)
215
+ all_indexes.sort()
216
+ latest_index = all_indexes[-1]
217
+
218
+ if first_item_index != latest_index:
219
+ return None
220
+
221
+ if not await self.datetime_manager.size_manager.is_index_oversized(
222
+ first_item_index
223
+ ):
224
+ return None
225
+
226
+ latest_item = await self.index_operations.find_latest_item_in_index(
227
+ self.client, latest_index
228
+ )
229
+ product_datetime = latest_item["_source"]["properties"]["datetime"]
230
+ end_date = extract_date(product_datetime)
231
+ await self.index_operations.update_index_alias(
232
+ self.client, str(end_date), latest_index
233
+ )
234
+ next_day_start = end_date + timedelta(days=1)
235
+ await self.index_operations.create_datetime_index(
236
+ self.client, collection_id, str(next_day_start)
237
+ )
238
+ await index_selector.refresh_cache()
239
+
240
+
241
+ class SimpleIndexInserter(BaseIndexInserter):
242
+ """Simple async index insertion strategy."""
243
+
244
+ def __init__(self, index_operations: IndexOperations, client: Any):
245
+ """Initialize the async simple index inserter.
246
+
247
+ Args:
248
+ index_operations (IndexOperations): Search engine adapter instance.
249
+ client: Async search engine client instance.
250
+ """
251
+ self.search_adapter = index_operations
252
+ self.client = client
253
+
254
+ @staticmethod
255
+ def should_create_collection_index() -> bool:
256
+ """Whether this strategy requires collection index creation.
257
+
258
+ Returns:
259
+ bool: True, as simple strategy creates collection indexes.
260
+ """
261
+ return True
262
+
263
+ async def create_simple_index(self, client: Any, collection_id: str) -> str:
264
+ """Create a simple index asynchronously.
265
+
266
+ Args:
267
+ client: Search engine client instance.
268
+ collection_id (str): Collection identifier.
269
+
270
+ Returns:
271
+ str: Created index name.
272
+ """
273
+ return await self.search_adapter.create_simple_index(client, collection_id)
274
+
275
+ async def get_target_index(
276
+ self, collection_id: str, product: Dict[str, Any]
277
+ ) -> str:
278
+ """Get target index (always the collection alias).
279
+
280
+ Args:
281
+ collection_id (str): Collection identifier.
282
+ product (Dict[str, Any]): Product data (not used in simple strategy).
283
+
284
+ Returns:
285
+ str: Collection alias name.
286
+ """
287
+ return index_alias_by_collection_id(collection_id)
288
+
289
+ async def prepare_bulk_actions(
290
+ self, collection_id: str, items: List[Dict[str, Any]]
291
+ ) -> List[Dict[str, Any]]:
292
+ """Prepare bulk actions for simple indexing.
293
+
294
+ Args:
295
+ collection_id (str): Collection identifier.
296
+ items (List[Dict[str, Any]]): List of items to process.
297
+
298
+ Returns:
299
+ List[Dict[str, Any]]: List of bulk actions with collection alias as target.
300
+ """
301
+ target_index = index_alias_by_collection_id(collection_id)
302
+ return [
303
+ {
304
+ "_index": target_index,
305
+ "_id": mk_item_id(item["id"], item["collection"]),
306
+ "_source": item,
307
+ }
308
+ for item in items
309
+ ]
@@ -0,0 +1,198 @@
1
+ """Index management utilities."""
2
+
3
+ import logging
4
+ import os
5
+ from datetime import datetime, timedelta
6
+ from typing import Any, Dict
7
+
8
+ from fastapi import HTTPException, status
9
+
10
+ from stac_fastapi.sfeos_helpers.database import (
11
+ extract_date,
12
+ extract_first_date_from_index,
13
+ )
14
+
15
+ from .index_operations import IndexOperations
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class IndexSizeManager:
21
+ """Manages index size limits and operations."""
22
+
23
+ def __init__(self, client: Any):
24
+ """Initialize the index size manager.
25
+
26
+ Args:
27
+ client: Search engine client instance.
28
+ """
29
+ self.client = client
30
+ self.max_size_gb = self._get_max_size_from_env()
31
+
32
+ async def get_index_size_in_gb(self, index_name: str) -> float:
33
+ """Get index size in gigabytes asynchronously.
34
+
35
+ Args:
36
+ index_name (str): Name of the index to check.
37
+
38
+ Returns:
39
+ float: Size of the index in gigabytes.
40
+ """
41
+ data = await self.client.indices.stats(index=index_name)
42
+ return data["_all"]["primaries"]["store"]["size_in_bytes"] / 1e9
43
+
44
+ async def is_index_oversized(self, index_name: str) -> bool:
45
+ """Check if index exceeds size limit asynchronously.
46
+
47
+ Args:
48
+ index_name (str): Name of the index to check.
49
+
50
+ Returns:
51
+ bool: True if index exceeds size limit, False otherwise.
52
+ """
53
+ size_gb = await self.get_index_size_in_gb(index_name)
54
+ is_oversized = size_gb > self.max_size_gb
55
+ gb_milestone = int(size_gb)
56
+ if gb_milestone > 0:
57
+ logger.info(f"Index '{index_name}' size: {gb_milestone}GB")
58
+
59
+ if is_oversized:
60
+ logger.warning(
61
+ f"Index '{index_name}' is oversized: {size_gb:.2f} GB "
62
+ f"(limit: {self.max_size_gb} GB)"
63
+ )
64
+
65
+ return is_oversized
66
+
67
+ @staticmethod
68
+ def _get_max_size_from_env() -> float:
69
+ """Get max size from environment variable with error handling.
70
+
71
+ Returns:
72
+ float: Maximum index size in GB.
73
+
74
+ Raises:
75
+ ValueError: If environment variable contains invalid value.
76
+ """
77
+ env_value = os.getenv("DATETIME_INDEX_MAX_SIZE_GB", "25")
78
+
79
+ try:
80
+ max_size = float(env_value)
81
+ if max_size <= 0:
82
+ raise ValueError(
83
+ f"DATETIME_INDEX_MAX_SIZE_GB must be positive, got: {max_size}"
84
+ )
85
+ return max_size
86
+ except (ValueError, TypeError):
87
+ error_msg = (
88
+ f"Invalid value for DATETIME_INDEX_MAX_SIZE_GB environment variable: "
89
+ f"'{env_value}'. Must be a positive number. Using default value 25.0 GB."
90
+ )
91
+ logger.warning(error_msg)
92
+
93
+ return 25.0
94
+
95
+
96
+ class DatetimeIndexManager:
97
+ """Manages datetime-based index operations."""
98
+
99
+ def __init__(self, client: Any, index_operations: IndexOperations):
100
+ """Initialize the datetime index manager.
101
+
102
+ Args:
103
+ client: Search engine client instance.
104
+ index_operations (IndexOperations): Search engine adapter instance.
105
+ """
106
+ self.client = client
107
+ self.index_operations = index_operations
108
+ self.size_manager = IndexSizeManager(client)
109
+
110
+ @staticmethod
111
+ def validate_product_datetime(product: Dict[str, Any]) -> str:
112
+ """Validate and extract datetime from product.
113
+
114
+ Args:
115
+ product (Dict[str, Any]): Product data containing datetime information.
116
+
117
+ Returns:
118
+ str: Validated product datetime.
119
+
120
+ Raises:
121
+ HTTPException: If product datetime is missing or invalid.
122
+ """
123
+ product_datetime = product["properties"]["datetime"]
124
+ if not product_datetime:
125
+ raise HTTPException(
126
+ status_code=status.HTTP_400_BAD_REQUEST,
127
+ detail="Product datetime is required for indexing",
128
+ )
129
+ return product_datetime
130
+
131
+ async def handle_new_collection(
132
+ self, collection_id: str, product_datetime: str
133
+ ) -> str:
134
+ """Handle index creation for new collection asynchronously.
135
+
136
+ Args:
137
+ collection_id (str): Collection identifier.
138
+ product_datetime (str): Product datetime for index naming.
139
+
140
+
141
+ Returns:
142
+ str: Created index name.
143
+ """
144
+ target_index = await self.index_operations.create_datetime_index(
145
+ self.client, collection_id, extract_date(product_datetime)
146
+ )
147
+ logger.info(
148
+ f"Successfully created index '{target_index}' for collection '{collection_id}'"
149
+ )
150
+ return target_index
151
+
152
+ async def handle_early_date(
153
+ self, collection_id: str, start_date: datetime, end_date: datetime
154
+ ) -> str:
155
+ """Handle product with date earlier than existing indexes asynchronously.
156
+
157
+ Args:
158
+ collection_id (str): Collection identifier.
159
+ start_date (datetime): Start date for the new index.
160
+ end_date (datetime): End date for alias update.
161
+
162
+ Returns:
163
+ str: Updated alias name.
164
+ """
165
+ old_alias = self.index_operations.create_alias_name(
166
+ collection_id, str(end_date)
167
+ )
168
+ new_alias = self.index_operations.create_alias_name(
169
+ collection_id, str(start_date)
170
+ )
171
+ await self.index_operations.change_alias_name(self.client, old_alias, new_alias)
172
+ return new_alias
173
+
174
+ async def handle_oversized_index(
175
+ self, collection_id: str, target_index: str, product_datetime: str
176
+ ) -> str:
177
+ """Handle index that exceeds size limit asynchronously.
178
+
179
+ Args:
180
+ collection_id (str): Collection identifier.
181
+ target_index (str): Current target index name.
182
+ product_datetime (str): Product datetime for new index.
183
+
184
+ Returns:
185
+ str: New or updated index name.
186
+ """
187
+ end_date = extract_date(product_datetime)
188
+ latest_index_start = extract_first_date_from_index(target_index)
189
+
190
+ if end_date != latest_index_start:
191
+ await self.index_operations.update_index_alias(
192
+ self.client, str(end_date), target_index
193
+ )
194
+ target_index = await self.index_operations.create_datetime_index(
195
+ self.client, collection_id, str(end_date + timedelta(days=1))
196
+ )
197
+
198
+ return target_index
@@ -0,0 +1,15 @@
1
+ """Index selection strategies package."""
2
+
3
+ from .base import BaseIndexSelector
4
+ from .cache_manager import IndexAliasLoader, IndexCacheManager
5
+ from .factory import IndexSelectorFactory
6
+ from .selectors import DatetimeBasedIndexSelector, UnfilteredIndexSelector
7
+
8
+ __all__ = [
9
+ "IndexCacheManager",
10
+ "IndexAliasLoader",
11
+ "DatetimeBasedIndexSelector",
12
+ "UnfilteredIndexSelector",
13
+ "IndexSelectorFactory",
14
+ "BaseIndexSelector",
15
+ ]