sfeos-helpers 6.0.0__py3-none-any.whl → 6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.0.0.dist-info → sfeos_helpers-6.2.0.dist-info}/METADATA +92 -15
- sfeos_helpers-6.2.0.dist-info/RECORD +32 -0
- stac_fastapi/sfeos_helpers/aggregation/client.py +5 -2
- stac_fastapi/sfeos_helpers/database/__init__.py +5 -1
- stac_fastapi/sfeos_helpers/database/datetime.py +64 -3
- stac_fastapi/sfeos_helpers/database/index.py +59 -2
- stac_fastapi/sfeos_helpers/database/query.py +32 -0
- stac_fastapi/sfeos_helpers/search_engine/__init__.py +27 -0
- stac_fastapi/sfeos_helpers/search_engine/base.py +51 -0
- stac_fastapi/sfeos_helpers/search_engine/factory.py +36 -0
- stac_fastapi/sfeos_helpers/search_engine/index_operations.py +167 -0
- stac_fastapi/sfeos_helpers/search_engine/inserters.py +309 -0
- stac_fastapi/sfeos_helpers/search_engine/managers.py +198 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py +15 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/base.py +30 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +127 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/factory.py +37 -0
- stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +129 -0
- stac_fastapi/sfeos_helpers/version.py +1 -1
- sfeos_helpers-6.0.0.dist-info/RECORD +0 -21
- {sfeos_helpers-6.0.0.dist-info → sfeos_helpers-6.2.0.dist-info}/WHEEL +0 -0
- {sfeos_helpers-6.0.0.dist-info → sfeos_helpers-6.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Search engine adapters for different implementations."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
7
|
+
index_alias_by_collection_id,
|
|
8
|
+
index_by_collection_id,
|
|
9
|
+
)
|
|
10
|
+
from stac_fastapi.sfeos_helpers.mappings import (
|
|
11
|
+
_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
|
|
12
|
+
ES_ITEMS_MAPPINGS,
|
|
13
|
+
ES_ITEMS_SETTINGS,
|
|
14
|
+
ITEMS_INDEX_PREFIX,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IndexOperations:
|
|
19
|
+
"""Base class for search engine adapters with common implementations."""
|
|
20
|
+
|
|
21
|
+
async def create_simple_index(self, client: Any, collection_id: str) -> str:
|
|
22
|
+
"""Create a simple index for the given collection.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
client: Search engine client instance.
|
|
26
|
+
collection_id (str): Collection identifier.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: Created index name.
|
|
30
|
+
"""
|
|
31
|
+
index_name = f"{index_by_collection_id(collection_id)}-000001"
|
|
32
|
+
alias_name = index_alias_by_collection_id(collection_id)
|
|
33
|
+
|
|
34
|
+
await client.indices.create(
|
|
35
|
+
index=index_name,
|
|
36
|
+
body=self._create_index_body({alias_name: {}}),
|
|
37
|
+
params={"ignore": [400]},
|
|
38
|
+
)
|
|
39
|
+
return index_name
|
|
40
|
+
|
|
41
|
+
async def create_datetime_index(
|
|
42
|
+
self, client: Any, collection_id: str, start_date: str
|
|
43
|
+
) -> str:
|
|
44
|
+
"""Create a datetime-based index for the given collection.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
client: Search engine client instance.
|
|
48
|
+
collection_id (str): Collection identifier.
|
|
49
|
+
start_date (str): Start date for the alias.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
str: Created index alias name.
|
|
53
|
+
"""
|
|
54
|
+
index_name = self.create_index_name(collection_id)
|
|
55
|
+
alias_name = self.create_alias_name(collection_id, start_date)
|
|
56
|
+
collection_alias = index_alias_by_collection_id(collection_id)
|
|
57
|
+
await client.indices.create(
|
|
58
|
+
index=index_name,
|
|
59
|
+
body=self._create_index_body({collection_alias: {}, alias_name: {}}),
|
|
60
|
+
)
|
|
61
|
+
return alias_name
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
async def update_index_alias(client: Any, end_date: str, old_alias: str) -> str:
|
|
65
|
+
"""Update index alias with new end date.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
client: Search engine client instance.
|
|
69
|
+
end_date (str): End date for the alias.
|
|
70
|
+
old_alias (str): Current alias name.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
str: New alias name.
|
|
74
|
+
"""
|
|
75
|
+
new_alias = f"{old_alias}-{end_date}"
|
|
76
|
+
aliases_info = await client.indices.get_alias(name=old_alias)
|
|
77
|
+
actions = []
|
|
78
|
+
|
|
79
|
+
for index_name in aliases_info.keys():
|
|
80
|
+
actions.append({"remove": {"index": index_name, "alias": old_alias}})
|
|
81
|
+
actions.append({"add": {"index": index_name, "alias": new_alias}})
|
|
82
|
+
|
|
83
|
+
await client.indices.update_aliases(body={"actions": actions})
|
|
84
|
+
return new_alias
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
async def change_alias_name(client: Any, old_alias: str, new_alias: str) -> None:
|
|
88
|
+
"""Change alias name from old to new.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
client: Search engine client instance.
|
|
92
|
+
old_alias (str): Current alias name.
|
|
93
|
+
new_alias (str): New alias name.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
None
|
|
97
|
+
"""
|
|
98
|
+
aliases_info = await client.indices.get_alias(name=old_alias)
|
|
99
|
+
actions = []
|
|
100
|
+
|
|
101
|
+
for index_name in aliases_info.keys():
|
|
102
|
+
actions.append({"remove": {"index": index_name, "alias": old_alias}})
|
|
103
|
+
actions.append({"add": {"index": index_name, "alias": new_alias}})
|
|
104
|
+
await client.indices.update_aliases(body={"actions": actions})
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def create_index_name(collection_id: str) -> str:
|
|
108
|
+
"""Create index name from collection ID and uuid4.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
collection_id (str): Collection identifier.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
str: Formatted index name.
|
|
115
|
+
"""
|
|
116
|
+
cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
|
|
117
|
+
return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{uuid.uuid4()}"
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def create_alias_name(collection_id: str, start_date: str) -> str:
|
|
121
|
+
"""Create index name from collection ID and uuid4.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
collection_id (str): Collection identifier.
|
|
125
|
+
start_date (str): Start date for the alias.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
str: Alias name with initial date.
|
|
129
|
+
"""
|
|
130
|
+
cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
|
|
131
|
+
return f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{start_date}"
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
def _create_index_body(aliases: Dict[str, Dict]) -> Dict[str, Any]:
|
|
135
|
+
"""Create index body with common settings.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
aliases (Dict[str, Dict]): Aliases configuration.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict[str, Any]: Index body configuration.
|
|
142
|
+
"""
|
|
143
|
+
return {
|
|
144
|
+
"aliases": aliases,
|
|
145
|
+
"mappings": ES_ITEMS_MAPPINGS,
|
|
146
|
+
"settings": ES_ITEMS_SETTINGS,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
async def find_latest_item_in_index(client: Any, index_name: str) -> dict[str, Any]:
|
|
151
|
+
"""Find the latest item date in the specified index.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
client: Search engine client instance.
|
|
155
|
+
index_name (str): Name of the index to query.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
datetime: Date of the latest item in the index.
|
|
159
|
+
"""
|
|
160
|
+
query = {
|
|
161
|
+
"size": 1,
|
|
162
|
+
"sort": [{"properties.datetime": {"order": "desc"}}],
|
|
163
|
+
"_source": ["properties.datetime"],
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
response = await client.search(index=index_name, body=query)
|
|
167
|
+
return response["hits"]["hits"][0]
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Async index insertion strategies."""
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from fastapi import HTTPException, status
|
|
7
|
+
|
|
8
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
9
|
+
extract_date,
|
|
10
|
+
extract_first_date_from_index,
|
|
11
|
+
index_alias_by_collection_id,
|
|
12
|
+
mk_item_id,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .base import BaseIndexInserter
|
|
16
|
+
from .index_operations import IndexOperations
|
|
17
|
+
from .managers import DatetimeIndexManager
|
|
18
|
+
from .selection import DatetimeBasedIndexSelector
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DatetimeIndexInserter(BaseIndexInserter):
|
|
24
|
+
"""Async datetime-based index insertion strategy."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, client: Any, index_operations: IndexOperations):
|
|
27
|
+
"""Initialize the async datetime index inserter.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
client: Async search engine client instance.
|
|
31
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
32
|
+
"""
|
|
33
|
+
self.client = client
|
|
34
|
+
self.index_operations = index_operations
|
|
35
|
+
self.datetime_manager = DatetimeIndexManager(client, index_operations)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def should_create_collection_index() -> bool:
|
|
39
|
+
"""Whether this strategy requires collection index creation.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
bool: False, as datetime strategy doesn't create collection indexes.
|
|
43
|
+
"""
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
async def create_simple_index(self, client: Any, collection_id: str) -> str:
|
|
47
|
+
"""Create a simple index asynchronously.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
client: Search engine client instance.
|
|
51
|
+
collection_id (str): Collection identifier.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
str: Created index name.
|
|
55
|
+
"""
|
|
56
|
+
return await self.index_operations.create_simple_index(client, collection_id)
|
|
57
|
+
|
|
58
|
+
async def get_target_index(
|
|
59
|
+
self, collection_id: str, product: Dict[str, Any]
|
|
60
|
+
) -> str:
|
|
61
|
+
"""Get target index for a single product.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
collection_id (str): Collection identifier.
|
|
65
|
+
product (Dict[str, Any]): Product data containing datetime information.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
str: Target index name for the product.
|
|
69
|
+
"""
|
|
70
|
+
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
71
|
+
return await self._get_target_index_internal(
|
|
72
|
+
index_selector, collection_id, product, check_size=True
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
async def prepare_bulk_actions(
|
|
76
|
+
self, collection_id: str, items: List[Dict[str, Any]]
|
|
77
|
+
) -> List[Dict[str, Any]]:
|
|
78
|
+
"""Prepare bulk actions for multiple items.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
collection_id (str): Collection identifier.
|
|
82
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List[Dict[str, Any]]: List of bulk actions ready for execution.
|
|
86
|
+
"""
|
|
87
|
+
if not items:
|
|
88
|
+
msg = "The product list cannot be empty."
|
|
89
|
+
logger.error(msg)
|
|
90
|
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=msg)
|
|
91
|
+
|
|
92
|
+
items.sort(key=lambda item: item["properties"]["datetime"])
|
|
93
|
+
index_selector = DatetimeBasedIndexSelector(self.client)
|
|
94
|
+
|
|
95
|
+
await self._ensure_indexes_exist(index_selector, collection_id, items)
|
|
96
|
+
await self._check_and_handle_oversized_index(
|
|
97
|
+
index_selector, collection_id, items
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
actions = []
|
|
101
|
+
for item in items:
|
|
102
|
+
target_index = await self._get_target_index_internal(
|
|
103
|
+
index_selector, collection_id, item, check_size=False
|
|
104
|
+
)
|
|
105
|
+
actions.append(
|
|
106
|
+
{
|
|
107
|
+
"_index": target_index,
|
|
108
|
+
"_id": mk_item_id(item["id"], item["collection"]),
|
|
109
|
+
"_source": item,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return actions
|
|
114
|
+
|
|
115
|
+
async def _get_target_index_internal(
|
|
116
|
+
self,
|
|
117
|
+
index_selector,
|
|
118
|
+
collection_id: str,
|
|
119
|
+
product: Dict[str, Any],
|
|
120
|
+
check_size: bool = True,
|
|
121
|
+
) -> str:
|
|
122
|
+
"""Get target index with size checking internally.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
index_selector: Index selector instance.
|
|
126
|
+
collection_id (str): Collection identifier.
|
|
127
|
+
product (Dict[str, Any]): Product data.
|
|
128
|
+
check_size (bool): Whetheru to check index size limits.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
str: Target index name.
|
|
132
|
+
"""
|
|
133
|
+
product_datetime = self.datetime_manager.validate_product_datetime(product)
|
|
134
|
+
datetime_range = {"gte": product_datetime, "lte": product_datetime}
|
|
135
|
+
target_index = await index_selector.select_indexes(
|
|
136
|
+
[collection_id], datetime_range
|
|
137
|
+
)
|
|
138
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
139
|
+
|
|
140
|
+
if not all_indexes:
|
|
141
|
+
target_index = await self.datetime_manager.handle_new_collection(
|
|
142
|
+
collection_id, product_datetime
|
|
143
|
+
)
|
|
144
|
+
await index_selector.refresh_cache()
|
|
145
|
+
return target_index
|
|
146
|
+
|
|
147
|
+
all_indexes.sort()
|
|
148
|
+
start_date = extract_date(product_datetime)
|
|
149
|
+
end_date = extract_first_date_from_index(all_indexes[0])
|
|
150
|
+
|
|
151
|
+
if start_date < end_date:
|
|
152
|
+
alias = await self.datetime_manager.handle_early_date(
|
|
153
|
+
collection_id, start_date, end_date
|
|
154
|
+
)
|
|
155
|
+
await index_selector.refresh_cache()
|
|
156
|
+
|
|
157
|
+
return alias
|
|
158
|
+
|
|
159
|
+
if target_index != all_indexes[-1]:
|
|
160
|
+
return target_index
|
|
161
|
+
|
|
162
|
+
if check_size and await self.datetime_manager.size_manager.is_index_oversized(
|
|
163
|
+
target_index
|
|
164
|
+
):
|
|
165
|
+
target_index = await self.datetime_manager.handle_oversized_index(
|
|
166
|
+
collection_id, target_index, product_datetime
|
|
167
|
+
)
|
|
168
|
+
await index_selector.refresh_cache()
|
|
169
|
+
|
|
170
|
+
return target_index
|
|
171
|
+
|
|
172
|
+
async def _ensure_indexes_exist(
|
|
173
|
+
self, index_selector, collection_id: str, items: List[Dict[str, Any]]
|
|
174
|
+
):
|
|
175
|
+
"""Ensure necessary indexes exist for the items.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
index_selector: Index selector instance.
|
|
179
|
+
collection_id (str): Collection identifier.
|
|
180
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
181
|
+
"""
|
|
182
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
183
|
+
|
|
184
|
+
if not all_indexes:
|
|
185
|
+
first_item = items[0]
|
|
186
|
+
await self.index_operations.create_datetime_index(
|
|
187
|
+
self.client,
|
|
188
|
+
collection_id,
|
|
189
|
+
extract_date(first_item["properties"]["datetime"]),
|
|
190
|
+
)
|
|
191
|
+
await index_selector.refresh_cache()
|
|
192
|
+
|
|
193
|
+
async def _check_and_handle_oversized_index(
|
|
194
|
+
self, index_selector, collection_id: str, items: List[Dict[str, Any]]
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Check if index is oversized and create new index if needed.
|
|
197
|
+
|
|
198
|
+
Checks if the index where the first item would be inserted is oversized.
|
|
199
|
+
If so, creates a new index starting from the next day.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
index_selector: Index selector instance.
|
|
203
|
+
collection_id (str): Collection identifier.
|
|
204
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
None
|
|
208
|
+
"""
|
|
209
|
+
first_item = items[0]
|
|
210
|
+
first_item_index = await self._get_target_index_internal(
|
|
211
|
+
index_selector, collection_id, first_item, check_size=False
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
all_indexes = await index_selector.get_collection_indexes(collection_id)
|
|
215
|
+
all_indexes.sort()
|
|
216
|
+
latest_index = all_indexes[-1]
|
|
217
|
+
|
|
218
|
+
if first_item_index != latest_index:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
if not await self.datetime_manager.size_manager.is_index_oversized(
|
|
222
|
+
first_item_index
|
|
223
|
+
):
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
latest_item = await self.index_operations.find_latest_item_in_index(
|
|
227
|
+
self.client, latest_index
|
|
228
|
+
)
|
|
229
|
+
product_datetime = latest_item["_source"]["properties"]["datetime"]
|
|
230
|
+
end_date = extract_date(product_datetime)
|
|
231
|
+
await self.index_operations.update_index_alias(
|
|
232
|
+
self.client, str(end_date), latest_index
|
|
233
|
+
)
|
|
234
|
+
next_day_start = end_date + timedelta(days=1)
|
|
235
|
+
await self.index_operations.create_datetime_index(
|
|
236
|
+
self.client, collection_id, str(next_day_start)
|
|
237
|
+
)
|
|
238
|
+
await index_selector.refresh_cache()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class SimpleIndexInserter(BaseIndexInserter):
|
|
242
|
+
"""Simple async index insertion strategy."""
|
|
243
|
+
|
|
244
|
+
def __init__(self, index_operations: IndexOperations, client: Any):
|
|
245
|
+
"""Initialize the async simple index inserter.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
249
|
+
client: Async search engine client instance.
|
|
250
|
+
"""
|
|
251
|
+
self.search_adapter = index_operations
|
|
252
|
+
self.client = client
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def should_create_collection_index() -> bool:
|
|
256
|
+
"""Whether this strategy requires collection index creation.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
bool: True, as simple strategy creates collection indexes.
|
|
260
|
+
"""
|
|
261
|
+
return True
|
|
262
|
+
|
|
263
|
+
async def create_simple_index(self, client: Any, collection_id: str) -> str:
|
|
264
|
+
"""Create a simple index asynchronously.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
client: Search engine client instance.
|
|
268
|
+
collection_id (str): Collection identifier.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
str: Created index name.
|
|
272
|
+
"""
|
|
273
|
+
return await self.search_adapter.create_simple_index(client, collection_id)
|
|
274
|
+
|
|
275
|
+
async def get_target_index(
|
|
276
|
+
self, collection_id: str, product: Dict[str, Any]
|
|
277
|
+
) -> str:
|
|
278
|
+
"""Get target index (always the collection alias).
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
collection_id (str): Collection identifier.
|
|
282
|
+
product (Dict[str, Any]): Product data (not used in simple strategy).
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
str: Collection alias name.
|
|
286
|
+
"""
|
|
287
|
+
return index_alias_by_collection_id(collection_id)
|
|
288
|
+
|
|
289
|
+
async def prepare_bulk_actions(
|
|
290
|
+
self, collection_id: str, items: List[Dict[str, Any]]
|
|
291
|
+
) -> List[Dict[str, Any]]:
|
|
292
|
+
"""Prepare bulk actions for simple indexing.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
collection_id (str): Collection identifier.
|
|
296
|
+
items (List[Dict[str, Any]]): List of items to process.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List[Dict[str, Any]]: List of bulk actions with collection alias as target.
|
|
300
|
+
"""
|
|
301
|
+
target_index = index_alias_by_collection_id(collection_id)
|
|
302
|
+
return [
|
|
303
|
+
{
|
|
304
|
+
"_index": target_index,
|
|
305
|
+
"_id": mk_item_id(item["id"], item["collection"]),
|
|
306
|
+
"_source": item,
|
|
307
|
+
}
|
|
308
|
+
for item in items
|
|
309
|
+
]
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Index management utilities."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from fastapi import HTTPException, status
|
|
9
|
+
|
|
10
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
11
|
+
extract_date,
|
|
12
|
+
extract_first_date_from_index,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .index_operations import IndexOperations
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IndexSizeManager:
|
|
21
|
+
"""Manages index size limits and operations."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, client: Any):
|
|
24
|
+
"""Initialize the index size manager.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
client: Search engine client instance.
|
|
28
|
+
"""
|
|
29
|
+
self.client = client
|
|
30
|
+
self.max_size_gb = self._get_max_size_from_env()
|
|
31
|
+
|
|
32
|
+
async def get_index_size_in_gb(self, index_name: str) -> float:
|
|
33
|
+
"""Get index size in gigabytes asynchronously.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
index_name (str): Name of the index to check.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
float: Size of the index in gigabytes.
|
|
40
|
+
"""
|
|
41
|
+
data = await self.client.indices.stats(index=index_name)
|
|
42
|
+
return data["_all"]["primaries"]["store"]["size_in_bytes"] / 1e9
|
|
43
|
+
|
|
44
|
+
async def is_index_oversized(self, index_name: str) -> bool:
|
|
45
|
+
"""Check if index exceeds size limit asynchronously.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
index_name (str): Name of the index to check.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
bool: True if index exceeds size limit, False otherwise.
|
|
52
|
+
"""
|
|
53
|
+
size_gb = await self.get_index_size_in_gb(index_name)
|
|
54
|
+
is_oversized = size_gb > self.max_size_gb
|
|
55
|
+
gb_milestone = int(size_gb)
|
|
56
|
+
if gb_milestone > 0:
|
|
57
|
+
logger.info(f"Index '{index_name}' size: {gb_milestone}GB")
|
|
58
|
+
|
|
59
|
+
if is_oversized:
|
|
60
|
+
logger.warning(
|
|
61
|
+
f"Index '{index_name}' is oversized: {size_gb:.2f} GB "
|
|
62
|
+
f"(limit: {self.max_size_gb} GB)"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return is_oversized
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _get_max_size_from_env() -> float:
|
|
69
|
+
"""Get max size from environment variable with error handling.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
float: Maximum index size in GB.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
ValueError: If environment variable contains invalid value.
|
|
76
|
+
"""
|
|
77
|
+
env_value = os.getenv("DATETIME_INDEX_MAX_SIZE_GB", "25")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
max_size = float(env_value)
|
|
81
|
+
if max_size <= 0:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"DATETIME_INDEX_MAX_SIZE_GB must be positive, got: {max_size}"
|
|
84
|
+
)
|
|
85
|
+
return max_size
|
|
86
|
+
except (ValueError, TypeError):
|
|
87
|
+
error_msg = (
|
|
88
|
+
f"Invalid value for DATETIME_INDEX_MAX_SIZE_GB environment variable: "
|
|
89
|
+
f"'{env_value}'. Must be a positive number. Using default value 25.0 GB."
|
|
90
|
+
)
|
|
91
|
+
logger.warning(error_msg)
|
|
92
|
+
|
|
93
|
+
return 25.0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DatetimeIndexManager:
|
|
97
|
+
"""Manages datetime-based index operations."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, client: Any, index_operations: IndexOperations):
|
|
100
|
+
"""Initialize the datetime index manager.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
client: Search engine client instance.
|
|
104
|
+
index_operations (IndexOperations): Search engine adapter instance.
|
|
105
|
+
"""
|
|
106
|
+
self.client = client
|
|
107
|
+
self.index_operations = index_operations
|
|
108
|
+
self.size_manager = IndexSizeManager(client)
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def validate_product_datetime(product: Dict[str, Any]) -> str:
|
|
112
|
+
"""Validate and extract datetime from product.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
product (Dict[str, Any]): Product data containing datetime information.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
str: Validated product datetime.
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
HTTPException: If product datetime is missing or invalid.
|
|
122
|
+
"""
|
|
123
|
+
product_datetime = product["properties"]["datetime"]
|
|
124
|
+
if not product_datetime:
|
|
125
|
+
raise HTTPException(
|
|
126
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
127
|
+
detail="Product datetime is required for indexing",
|
|
128
|
+
)
|
|
129
|
+
return product_datetime
|
|
130
|
+
|
|
131
|
+
async def handle_new_collection(
|
|
132
|
+
self, collection_id: str, product_datetime: str
|
|
133
|
+
) -> str:
|
|
134
|
+
"""Handle index creation for new collection asynchronously.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
collection_id (str): Collection identifier.
|
|
138
|
+
product_datetime (str): Product datetime for index naming.
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
str: Created index name.
|
|
143
|
+
"""
|
|
144
|
+
target_index = await self.index_operations.create_datetime_index(
|
|
145
|
+
self.client, collection_id, extract_date(product_datetime)
|
|
146
|
+
)
|
|
147
|
+
logger.info(
|
|
148
|
+
f"Successfully created index '{target_index}' for collection '{collection_id}'"
|
|
149
|
+
)
|
|
150
|
+
return target_index
|
|
151
|
+
|
|
152
|
+
async def handle_early_date(
|
|
153
|
+
self, collection_id: str, start_date: datetime, end_date: datetime
|
|
154
|
+
) -> str:
|
|
155
|
+
"""Handle product with date earlier than existing indexes asynchronously.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
collection_id (str): Collection identifier.
|
|
159
|
+
start_date (datetime): Start date for the new index.
|
|
160
|
+
end_date (datetime): End date for alias update.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
str: Updated alias name.
|
|
164
|
+
"""
|
|
165
|
+
old_alias = self.index_operations.create_alias_name(
|
|
166
|
+
collection_id, str(end_date)
|
|
167
|
+
)
|
|
168
|
+
new_alias = self.index_operations.create_alias_name(
|
|
169
|
+
collection_id, str(start_date)
|
|
170
|
+
)
|
|
171
|
+
await self.index_operations.change_alias_name(self.client, old_alias, new_alias)
|
|
172
|
+
return new_alias
|
|
173
|
+
|
|
174
|
+
async def handle_oversized_index(
|
|
175
|
+
self, collection_id: str, target_index: str, product_datetime: str
|
|
176
|
+
) -> str:
|
|
177
|
+
"""Handle index that exceeds size limit asynchronously.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
collection_id (str): Collection identifier.
|
|
181
|
+
target_index (str): Current target index name.
|
|
182
|
+
product_datetime (str): Product datetime for new index.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
str: New or updated index name.
|
|
186
|
+
"""
|
|
187
|
+
end_date = extract_date(product_datetime)
|
|
188
|
+
latest_index_start = extract_first_date_from_index(target_index)
|
|
189
|
+
|
|
190
|
+
if end_date != latest_index_start:
|
|
191
|
+
await self.index_operations.update_index_alias(
|
|
192
|
+
self.client, str(end_date), target_index
|
|
193
|
+
)
|
|
194
|
+
target_index = await self.index_operations.create_datetime_index(
|
|
195
|
+
self.client, collection_id, str(end_date + timedelta(days=1))
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return target_index
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Index selection strategies package."""
|
|
2
|
+
|
|
3
|
+
from .base import BaseIndexSelector
|
|
4
|
+
from .cache_manager import IndexAliasLoader, IndexCacheManager
|
|
5
|
+
from .factory import IndexSelectorFactory
|
|
6
|
+
from .selectors import DatetimeBasedIndexSelector, UnfilteredIndexSelector
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"IndexCacheManager",
|
|
10
|
+
"IndexAliasLoader",
|
|
11
|
+
"DatetimeBasedIndexSelector",
|
|
12
|
+
"UnfilteredIndexSelector",
|
|
13
|
+
"IndexSelectorFactory",
|
|
14
|
+
"BaseIndexSelector",
|
|
15
|
+
]
|