sfeos-helpers 6.1.0__py3-none-any.whl → 6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.1.0.dist-info → sfeos_helpers-6.2.0.dist-info}/METADATA +78 -3
- sfeos_helpers-6.2.0.dist-info/RECORD +32 -0
- stac_fastapi/sfeos_helpers/aggregation/client.py +5 -2
- stac_fastapi/sfeos_helpers/database/__init__.py +5 -1
- stac_fastapi/sfeos_helpers/database/datetime.py +64 -3
- stac_fastapi/sfeos_helpers/database/index.py +59 -2
- stac_fastapi/sfeos_helpers/search_engine/__init__.py +27 -0
- stac_fastapi/sfeos_helpers/search_engine/base.py +51 -0
- stac_fastapi/sfeos_helpers/search_engine/factory.py +36 -0
- stac_fastapi/sfeos_helpers/search_engine/index_operations.py +167 -0
- stac_fastapi/sfeos_helpers/search_engine/inserters.py +309 -0
- stac_fastapi/sfeos_helpers/search_engine/managers.py +198 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py +15 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/base.py +30 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +127 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/factory.py +37 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +129 -0
- stac_fastapi/sfeos_helpers/version.py +1 -1
- sfeos_helpers-6.1.0.dist-info/RECORD +0 -21
- {sfeos_helpers-6.1.0.dist-info → sfeos_helpers-6.2.0.dist-info}/WHEEL +0 -0
- {sfeos_helpers-6.1.0.dist-info → sfeos_helpers-6.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Async index insertion strategies."""
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from fastapi import HTTPException, status
|
|
7
|
+
|
|
8
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
9
|
+
extract_date,
|
|
10
|
+
extract_first_date_from_index,
|
|
11
|
+
index_alias_by_collection_id,
|
|
12
|
+
mk_item_id,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .base import BaseIndexInserter
|
|
16
|
+
from .index_operations import IndexOperations
|
|
17
|
+
from .managers import DatetimeIndexManager
|
|
18
|
+
from .selection import DatetimeBasedIndexSelector
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DatetimeIndexInserter(BaseIndexInserter):
|
|
24
|
+
"""Async datetime-based index insertion strategy."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, client: Any, index_operations: IndexOperations):
|
|
27
|
+
"""Initialize the async datetime index inserter.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
client: Async search engine client instance.
|
|
31
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
32
|
+
"""
|
|
33
|
+
self.client = client
|
|
34
|
+
self.index_operations = index_operations
|
|
35
|
+
self.datetime_manager = DatetimeIndexManager(client, index_operations)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def should_create_collection_index() -> bool:
|
|
39
|
+
"""Whether this strategy requires collection index creation.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
bool: False, as datetime strategy doesn't create collection indexes.
|
|
43
|
+
"""
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
async def create_simple_index(self, client: Any, collection_id: str) -> str:
|
|
47
|
+
"""Create a simple index asynchronously.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
client: Search engine client instance.
|
|
51
|
+
collection_id (str): Collection identifier.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
str: Created index name.
|
|
55
|
+
"""
|
|
56
|
+
return await self.index_operations.create_simple_index(client, collection_id)
|
|
57
|
+
|
|
58
|
+
async def get_target_index(
|
|
59
|
+
self, collection_id: str, product: Dict[str, Any]
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Get target index for a single product.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
collection_id (str): Collection identifier.
|
|
65
|
+
product (Dict[str, Any]): Product data containing datetime information.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
str: Target index name for the product.
|
|
69
|
+
"""
|
|
70
|
+
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
71
|
+
return await self._get_target_index_internal(
|
|
72
|
+
index_selector, collection_id, product, check_size=True
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
async def prepare_bulk_actions(
|
|
76
|
+
self, collection_id: str, items: List[Dict[str, Any]]
|
|
77
|
+
) -> List[Dict[str, Any]]:
|
|
78
|
+
"""Prepare bulk actions for multiple items.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
collection_id (str): Collection identifier.
|
|
82
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List[Dict[str, Any]]: List of bulk actions ready for execution.
|
|
86
|
+
"""
|
|
87
|
+
if not items:
|
|
88
|
+
msg = "The product list cannot be empty."
|
|
89
|
+
logger.error(msg)
|
|
90
|
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
|
|
91
|
+
|
|
92
|
+
items.sort(key=lambda item: item["properties"]["datetime"])
|
|
93
|
+
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
94
|
+
|
|
95
|
+
await self._ensure_indexes_exist(index_selector, collection_id, items)
|
|
96
|
+
await self._check_and_handle_oversized_index(
|
|
97
|
+
index_selector, collection_id, items
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
actions = []
|
|
101
|
+
for item in items:
|
|
102
|
+
target_index = await self._get_target_index_internal(
|
|
103
|
+
index_selector, collection_id, item, check_size=False
|
|
104
|
+
)
|
|
105
|
+
actions.append(
|
|
106
|
+
{
|
|
107
|
+
"_index": target_index,
|
|
108
|
+
"_id": mk_item_id(item["id"], item["collection"]),
|
|
109
|
+
"_source": item,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return actions
|
|
114
|
+
|
|
115
|
+
async def _get_target_index_internal(
|
|
116
|
+
self,
|
|
117
|
+
index_selector,
|
|
118
|
+
collection_id: str,
|
|
119
|
+
product: Dict[str, Any],
|
|
120
|
+
check_size: bool = True,
|
|
121
|
+
) -> str:
|
|
122
|
+
"""Get target index with size checking internally.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
index_selector: Index selector instance.
|
|
126
|
+
collection_id (str): Collection identifier.
|
|
127
|
+
product (Dict[str, Any]): Product data.
|
|
128
|
+
check_size (bool): Whetheru to check index size limits.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
str: Target index name.
|
|
132
|
+
"""
|
|
133
|
+
product_datetime = self.datetime_manager.validate_product_datetime(product)
|
|
134
|
+
datetime_range = {"gte": product_datetime, "lte": product_datetime}
|
|
135
|
+
target_index = await index_selector.select_indexes(
|
|
136
|
+
[collection_id], datetime_range
|
|
137
|
+
)
|
|
138
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
139
|
+
|
|
140
|
+
if not all_indexes:
|
|
141
|
+
target_index = await self.datetime_manager.handle_new_collection(
|
|
142
|
+
collection_id, product_datetime
|
|
143
|
+
)
|
|
144
|
+
await index_selector.refresh_cache()
|
|
145
|
+
return target_index
|
|
146
|
+
|
|
147
|
+
all_indexes.sort()
|
|
148
|
+
start_date = extract_date(product_datetime)
|
|
149
|
+
end_date = extract_first_date_from_index(all_indexes[0])
|
|
150
|
+
|
|
151
|
+
if start_date < end_date:
|
|
152
|
+
alias = await self.datetime_manager.handle_early_date(
|
|
153
|
+
collection_id, start_date, end_date
|
|
154
|
+
)
|
|
155
|
+
await index_selector.refresh_cache()
|
|
156
|
+
|
|
157
|
+
return alias
|
|
158
|
+
|
|
159
|
+
if target_index != all_indexes[-1]:
|
|
160
|
+
return target_index
|
|
161
|
+
|
|
162
|
+
if check_size and await self.datetime_manager.size_manager.is_index_oversized(
|
|
163
|
+
target_index
|
|
164
|
+
):
|
|
165
|
+
target_index = await self.datetime_manager.handle_oversized_index(
|
|
166
|
+
collection_id, target_index, product_datetime
|
|
167
|
+
)
|
|
168
|
+
await index_selector.refresh_cache()
|
|
169
|
+
|
|
170
|
+
return target_index
|
|
171
|
+
|
|
172
|
+
async def _ensure_indexes_exist(
|
|
173
|
+
self, index_selector, collection_id: str, items: List[Dict[str, Any]]
|
|
174
|
+
):
|
|
175
|
+
"""Ensure necessary indexes exist for the items.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
index_selector: Index selector instance.
|
|
179
|
+
collection_id (str): Collection identifier.
|
|
180
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
181
|
+
"""
|
|
182
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
183
|
+
|
|
184
|
+
if not all_indexes:
|
|
185
|
+
first_item = items[0]
|
|
186
|
+
await self.index_operations.create_datetime_index(
|
|
187
|
+
self.client,
|
|
188
|
+
collection_id,
|
|
189
|
+
extract_date(first_item["properties"]["datetime"]),
|
|
190
|
+
)
|
|
191
|
+
await index_selector.refresh_cache()
|
|
192
|
+
|
|
193
|
+
async def _check_and_handle_oversized_index(
|
|
194
|
+
self, index_selector, collection_id: str, items: List[Dict[str, Any]]
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Check if index is oversized and create new index if needed.
|
|
197
|
+
|
|
198
|
+
Checks if the index where the first item would be inserted is oversized.
|
|
199
|
+
If so, creates a new index starting from the next day.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
index_selector: Index selector instance.
|
|
203
|
+
collection_id (str): Collection identifier.
|
|
204
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
None
|
|
208
|
+
"""
|
|
209
|
+
first_item = items[0]
|
|
210
|
+
first_item_index = await self._get_target_index_internal(
|
|
211
|
+
index_selector, collection_id, first_item, check_size=False
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
215
|
+
all_indexes.sort()
|
|
216
|
+
latest_index = all_indexes[-1]
|
|
217
|
+
|
|
218
|
+
if first_item_index != latest_index:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
if not await self.datetime_manager.size_manager.is_index_oversized(
|
|
222
|
+
first_item_index
|
|
223
|
+
):
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
latest_item = await self.index_operations.find_latest_item_in_index(
|
|
227
|
+
self.client, latest_index
|
|
228
|
+
)
|
|
229
|
+
product_datetime = latest_item["_source"]["properties"]["datetime"]
|
|
230
|
+
end_date = extract_date(product_datetime)
|
|
231
|
+
await self.index_operations.update_index_alias(
|
|
232
|
+
self.client, str(end_date), latest_index
|
|
233
|
+
)
|
|
234
|
+
next_day_start = end_date + timedelta(days=1)
|
|
235
|
+
await self.index_operations.create_datetime_index(
|
|
236
|
+
self.client, collection_id, str(next_day_start)
|
|
237
|
+
)
|
|
238
|
+
await index_selector.refresh_cache()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class SimpleIndexInserter(BaseIndexInserter):
|
|
242
|
+
"""Simple async index insertion strategy."""
|
|
243
|
+
|
|
244
|
+
def __init__(self, index_operations: IndexOperations, client: Any):
|
|
245
|
+
"""Initialize the async simple index inserter.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
249
|
+
client: Async search engine client instance.
|
|
250
|
+
"""
|
|
251
|
+
self.search_adapter = index_operations
|
|
252
|
+
self.client = client
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def should_create_collection_index() -> bool:
|
|
256
|
+
"""Whether this strategy requires collection index creation.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
bool: True, as simple strategy creates collection indexes.
|
|
260
|
+
"""
|
|
261
|
+
return True
|
|
262
|
+
|
|
263
|
+
async def create_simple_index(self, client: Any, collection_id: str) -> str:
|
|
264
|
+
"""Create a simple index asynchronously.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
client: Search engine client instance.
|
|
268
|
+
collection_id (str): Collection identifier.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
str: Created index name.
|
|
272
|
+
"""
|
|
273
|
+
return await self.search_adapter.create_simple_index(client, collection_id)
|
|
274
|
+
|
|
275
|
+
async def get_target_index(
|
|
276
|
+
self, collection_id: str, product: Dict[str, Any]
|
|
277
|
+
) -> str:
|
|
278
|
+
"""Get target index (always the collection alias).
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
collection_id (str): Collection identifier.
|
|
282
|
+
product (Dict[str, Any]): Product data (not used in simple strategy).
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
str: Collection alias name.
|
|
286
|
+
"""
|
|
287
|
+
return index_alias_by_collection_id(collection_id)
|
|
288
|
+
|
|
289
|
+
async def prepare_bulk_actions(
|
|
290
|
+
self, collection_id: str, items: List[Dict[str, Any]]
|
|
291
|
+
) -> List[Dict[str, Any]]:
|
|
292
|
+
"""Prepare bulk actions for simple indexing.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
collection_id (str): Collection identifier.
|
|
296
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List[Dict[str, Any]]: List of bulk actions with collection alias as target.
|
|
300
|
+
"""
|
|
301
|
+
target_index = index_alias_by_collection_id(collection_id)
|
|
302
|
+
return [
|
|
303
|
+
{
|
|
304
|
+
"_index": target_index,
|
|
305
|
+
"_id": mk_item_id(item["id"], item["collection"]),
|
|
306
|
+
"_source": item,
|
|
307
|
+
}
|
|
308
|
+
for item in items
|
|
309
|
+
]
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Index management utilities."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from fastapi import HTTPException, status
|
|
9
|
+
|
|
10
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
11
|
+
extract_date,
|
|
12
|
+
extract_first_date_from_index,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .index_operations import IndexOperations
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IndexSizeManager:
|
|
21
|
+
"""Manages index size limits and operations."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, client: Any):
|
|
24
|
+
"""Initialize the index size manager.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
client: Search engine client instance.
|
|
28
|
+
"""
|
|
29
|
+
self.client = client
|
|
30
|
+
self.max_size_gb = self._get_max_size_from_env()
|
|
31
|
+
|
|
32
|
+
async def get_index_size_in_gb(self, index_name: str) -> float:
|
|
33
|
+
"""Get index size in gigabytes asynchronously.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
index_name (str): Name of the index to check.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
float: Size of the index in gigabytes.
|
|
40
|
+
"""
|
|
41
|
+
data = await self.client.indices.stats(index=index_name)
|
|
42
|
+
return data["_all"]["primaries"]["store"]["size_in_bytes"] / 1e9
|
|
43
|
+
|
|
44
|
+
async def is_index_oversized(self, index_name: str) -> bool:
|
|
45
|
+
"""Check if index exceeds size limit asynchronously.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
index_name (str): Name of the index to check.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
bool: True if index exceeds size limit, False otherwise.
|
|
52
|
+
"""
|
|
53
|
+
size_gb = await self.get_index_size_in_gb(index_name)
|
|
54
|
+
is_oversized = size_gb > self.max_size_gb
|
|
55
|
+
gb_milestone = int(size_gb)
|
|
56
|
+
if gb_milestone > 0:
|
|
57
|
+
logger.info(f"Index '{index_name}' size: {gb_milestone}GB")
|
|
58
|
+
|
|
59
|
+
if is_oversized:
|
|
60
|
+
logger.warning(
|
|
61
|
+
f"Index '{index_name}' is oversized: {size_gb:.2f} GB "
|
|
62
|
+
f"(limit: {self.max_size_gb} GB)"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return is_oversized
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _get_max_size_from_env() -> float:
|
|
69
|
+
"""Get max size from environment variable with error handling.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
float: Maximum index size in GB.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
ValueError: If environment variable contains invalid value.
|
|
76
|
+
"""
|
|
77
|
+
env_value = os.getenv("DATETIME_INDEX_MAX_SIZE_GB", "25")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
max_size = float(env_value)
|
|
81
|
+
if max_size <= 0:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"DATETIME_INDEX_MAX_SIZE_GB must be positive, got: {max_size}"
|
|
84
|
+
)
|
|
85
|
+
return max_size
|
|
86
|
+
except (ValueError, TypeError):
|
|
87
|
+
error_msg = (
|
|
88
|
+
f"Invalid value for DATETIME_INDEX_MAX_SIZE_GB environment variable: "
|
|
89
|
+
f"'{env_value}'. Must be a positive number. Using default value 25.0 GB."
|
|
90
|
+
)
|
|
91
|
+
logger.warning(error_msg)
|
|
92
|
+
|
|
93
|
+
return 25.0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DatetimeIndexManager:
|
|
97
|
+
"""Manages datetime-based index operations."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, client: Any, index_operations: IndexOperations):
|
|
100
|
+
"""Initialize the datetime index manager.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
client: Search engine client instance.
|
|
104
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
105
|
+
"""
|
|
106
|
+
self.client = client
|
|
107
|
+
self.index_operations = index_operations
|
|
108
|
+
self.size_manager = IndexSizeManager(client)
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def validate_product_datetime(product: Dict[str, Any]) -> str:
|
|
112
|
+
"""Validate and extract datetime from product.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
product (Dict[str, Any]): Product data containing datetime information.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
str: Validated product datetime.
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
HTTPException: If product datetime is missing or invalid.
|
|
122
|
+
"""
|
|
123
|
+
product_datetime = product["properties"]["datetime"]
|
|
124
|
+
if not product_datetime:
|
|
125
|
+
raise HTTPException(
|
|
126
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
127
|
+
detail="Product datetime is required for indexing",
|
|
128
|
+
)
|
|
129
|
+
return product_datetime
|
|
130
|
+
|
|
131
|
+
async def handle_new_collection(
|
|
132
|
+
self, collection_id: str, product_datetime: str
|
|
133
|
+
) -> str:
|
|
134
|
+
"""Handle index creation for new collection asynchronously.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
collection_id (str): Collection identifier.
|
|
138
|
+
product_datetime (str): Product datetime for index naming.
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
str: Created index name.
|
|
143
|
+
"""
|
|
144
|
+
target_index = await self.index_operations.create_datetime_index(
|
|
145
|
+
self.client, collection_id, extract_date(product_datetime)
|
|
146
|
+
)
|
|
147
|
+
logger.info(
|
|
148
|
+
f"Successfully created index '{target_index}' for collection '{collection_id}'"
|
|
149
|
+
)
|
|
150
|
+
return target_index
|
|
151
|
+
|
|
152
|
+
async def handle_early_date(
|
|
153
|
+
self, collection_id: str, start_date: datetime, end_date: datetime
|
|
154
|
+
) -> str:
|
|
155
|
+
"""Handle product with date earlier than existing indexes asynchronously.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
collection_id (str): Collection identifier.
|
|
159
|
+
start_date (datetime): Start date for the new index.
|
|
160
|
+
end_date (datetime): End date for alias update.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
str: Updated alias name.
|
|
164
|
+
"""
|
|
165
|
+
old_alias = self.index_operations.create_alias_name(
|
|
166
|
+
collection_id, str(end_date)
|
|
167
|
+
)
|
|
168
|
+
new_alias = self.index_operations.create_alias_name(
|
|
169
|
+
collection_id, str(start_date)
|
|
170
|
+
)
|
|
171
|
+
await self.index_operations.change_alias_name(self.client, old_alias, new_alias)
|
|
172
|
+
return new_alias
|
|
173
|
+
|
|
174
|
+
async def handle_oversized_index(
|
|
175
|
+
self, collection_id: str, target_index: str, product_datetime: str
|
|
176
|
+
) -> str:
|
|
177
|
+
"""Handle index that exceeds size limit asynchronously.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
collection_id (str): Collection identifier.
|
|
181
|
+
target_index (str): Current target index name.
|
|
182
|
+
product_datetime (str): Product datetime for new index.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
str: New or updated index name.
|
|
186
|
+
"""
|
|
187
|
+
end_date = extract_date(product_datetime)
|
|
188
|
+
latest_index_start = extract_first_date_from_index(target_index)
|
|
189
|
+
|
|
190
|
+
if end_date != latest_index_start:
|
|
191
|
+
await self.index_operations.update_index_alias(
|
|
192
|
+
self.client, str(end_date), target_index
|
|
193
|
+
)
|
|
194
|
+
target_index = await self.index_operations.create_datetime_index(
|
|
195
|
+
self.client, collection_id, str(end_date + timedelta(days=1))
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return target_index
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Index selection strategies package."""
|
|
2
|
+
|
|
3
|
+
from .base import BaseIndexSelector
|
|
4
|
+
from .cache_manager import IndexAliasLoader, IndexCacheManager
|
|
5
|
+
from .factory import IndexSelectorFactory
|
|
6
|
+
from .selectors import DatetimeBasedIndexSelector, UnfilteredIndexSelector
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"IndexCacheManager",
|
|
10
|
+
"IndexAliasLoader",
|
|
11
|
+
"DatetimeBasedIndexSelector",
|
|
12
|
+
"UnfilteredIndexSelector",
|
|
13
|
+
"IndexSelectorFactory",
|
|
14
|
+
"BaseIndexSelector",
|
|
15
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Base classes for index selection strategies."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseIndexSelector(ABC):
|
|
8
|
+
"""Base class for async index selectors."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
async def select_indexes(
|
|
12
|
+
self,
|
|
13
|
+
collection_ids: Optional[List[str]],
|
|
14
|
+
datetime_search: Dict[str, Optional[str]],
|
|
15
|
+
) -> str:
|
|
16
|
+
"""Select appropriate indexes asynchronously.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
collection_ids (Optional[List[str]]): List of collection IDs to filter by.
|
|
20
|
+
datetime_search (Dict[str, Optional[str]]): Datetime search criteria.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: Comma-separated string of selected index names.
|
|
24
|
+
"""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
async def refresh_cache(self):
|
|
29
|
+
"""Refresh cache (no-op for unfiltered selector)."""
|
|
30
|
+
pass
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Cache management for index selection strategies."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from stac_fastapi.sfeos_helpers.database import index_alias_by_collection_id
|
|
9
|
+
from stac_fastapi.sfeos_helpers.mappings import ITEMS_INDEX_PREFIX
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IndexCacheManager:
|
|
13
|
+
"""Manages caching of index aliases with expiration."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, cache_ttl_seconds: int = 3600):
|
|
16
|
+
"""Initialize the cache manager.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
cache_ttl_seconds (int): Time-to-live for cache entries in seconds.
|
|
20
|
+
"""
|
|
21
|
+
self._cache: Optional[Dict[str, List[str]]] = None
|
|
22
|
+
self._timestamp: float = 0
|
|
23
|
+
self._ttl = cache_ttl_seconds
|
|
24
|
+
self._lock = threading.Lock()
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_expired(self) -> bool:
|
|
28
|
+
"""Check if the cache has expired.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
bool: True if cache is expired, False otherwise.
|
|
32
|
+
"""
|
|
33
|
+
return time.time() - self._timestamp > self._ttl
|
|
34
|
+
|
|
35
|
+
def get_cache(self) -> Optional[Dict[str, List[str]]]:
|
|
36
|
+
"""Get the current cache if not expired.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Optional[Dict[str, List[str]]]: Cache data if valid, None if expired.
|
|
40
|
+
"""
|
|
41
|
+
with self._lock:
|
|
42
|
+
if self.is_expired:
|
|
43
|
+
return None
|
|
44
|
+
return {k: v.copy() for k, v in self._cache.items()}
|
|
45
|
+
|
|
46
|
+
def set_cache(self, data: Dict[str, List[str]]) -> None:
|
|
47
|
+
"""Set cache data and update timestamp.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
data (Dict[str, List[str]]): Cache data to store.
|
|
51
|
+
"""
|
|
52
|
+
self._cache = data
|
|
53
|
+
self._timestamp = time.time()
|
|
54
|
+
|
|
55
|
+
def clear_cache(self) -> None:
|
|
56
|
+
"""Clear the cache and reset timestamp."""
|
|
57
|
+
self._cache = None
|
|
58
|
+
self._timestamp = 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class IndexAliasLoader:
|
|
62
|
+
"""Asynchronous loader for index aliases."""
|
|
63
|
+
|
|
64
|
+
def __init__(self, client: Any, cache_manager: IndexCacheManager):
|
|
65
|
+
"""Initialize the async alias loader.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
client: Async search engine client instance.
|
|
69
|
+
cache_manager (IndexCacheManager): Cache manager instance.
|
|
70
|
+
"""
|
|
71
|
+
self.client = client
|
|
72
|
+
self.cache_manager = cache_manager
|
|
73
|
+
|
|
74
|
+
async def load_aliases(self) -> Dict[str, List[str]]:
|
|
75
|
+
"""Load index aliases from search engine.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict[str, List[str]]: Mapping of base aliases to item aliases.
|
|
79
|
+
"""
|
|
80
|
+
response = await self.client.indices.get_alias(index=f"{ITEMS_INDEX_PREFIX}*")
|
|
81
|
+
result = defaultdict(list)
|
|
82
|
+
for index_info in response.values():
|
|
83
|
+
aliases = index_info.get("aliases", {})
|
|
84
|
+
items_aliases = sorted(
|
|
85
|
+
[
|
|
86
|
+
alias
|
|
87
|
+
for alias in aliases.keys()
|
|
88
|
+
if alias.startswith(ITEMS_INDEX_PREFIX)
|
|
89
|
+
]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if items_aliases:
|
|
93
|
+
result[items_aliases[0]].extend(items_aliases[1:])
|
|
94
|
+
|
|
95
|
+
self.cache_manager.set_cache(result)
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
async def get_aliases(self) -> Dict[str, List[str]]:
|
|
99
|
+
"""Get aliases from cache or load if expired.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dict[str, List[str]]: Alias mapping data.
|
|
103
|
+
"""
|
|
104
|
+
cached = self.cache_manager.get_cache()
|
|
105
|
+
if cached is not None:
|
|
106
|
+
return cached
|
|
107
|
+
return await self.load_aliases()
|
|
108
|
+
|
|
109
|
+
async def refresh_aliases(self) -> Dict[str, List[str]]:
|
|
110
|
+
"""Force refresh aliases from search engine.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Dict[str, List[str]]: Fresh alias mapping data.
|
|
114
|
+
"""
|
|
115
|
+
return await self.load_aliases()
|
|
116
|
+
|
|
117
|
+
async def get_collection_indexes(self, collection_id: str) -> List[str]:
|
|
118
|
+
"""Get all index aliases for a specific collection.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
collection_id (str): Collection identifier.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List[str]: List of index aliases for the collection.
|
|
125
|
+
"""
|
|
126
|
+
aliases = await self.get_aliases()
|
|
127
|
+
return aliases.get(index_alias_by_collection_id(collection_id), [])
|