sfeos-helpers 6.9.0__py3-none-any.whl → 6.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sfeos_helpers
3
- Version: 6.9.0
3
+ Version: 6.10.1
4
4
  Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
5
5
  Project-URL: Homepage, https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
6
6
  License: MIT
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Classifier: Programming Language :: Python :: 3.14
16
16
  Requires-Python: >=3.11
17
- Requires-Dist: stac-fastapi-core==6.9.0
17
+ Requires-Dist: stac-fastapi-core==6.10.1
18
18
  Description-Content-Type: text/markdown
19
19
 
20
20
  # sfeos-helpers
@@ -1,17 +1,18 @@
1
- stac_fastapi/sfeos_helpers/mappings.py,sha256=NTa2xMYLtF43qWEmCHuAVgC2mihgv2UKdwg_iv9I5KE,12952
2
- stac_fastapi/sfeos_helpers/version.py,sha256=Zec8murh7xqMR7l6fB74XJcq2QdyNqRgOdhLMwBYPAI,45
1
+ stac_fastapi/sfeos_helpers/mappings.py,sha256=U0zrBZ-foLtPy25PwSJSJW5EFBMin03aTrw53natt3g,12964
2
+ stac_fastapi/sfeos_helpers/version.py,sha256=AD8oiYJE5bGoF51nIlCATcNL2aBRMbKASqetnSyzqGA,46
3
3
  stac_fastapi/sfeos_helpers/aggregation/README.md,sha256=SDlvCOpKyaJrlJvx84T2RzCnGALe_PK51zNeo3RP9ac,2122
4
4
  stac_fastapi/sfeos_helpers/aggregation/__init__.py,sha256=Mym17lFh90by1GnoQgMyIKAqRNJnvCgVSXDYzjBiPQk,1210
5
- stac_fastapi/sfeos_helpers/aggregation/client.py,sha256=5aMZPMBbkS17QGLnuDCp9MyAeMj-Z3AOkzZ_JCRNohk,17921
5
+ stac_fastapi/sfeos_helpers/aggregation/client.py,sha256=eaH1kTBRuBbt1Ikqk0rR8hcY3LEFr9_06UgMGP44ZcQ,17861
6
6
  stac_fastapi/sfeos_helpers/aggregation/format.py,sha256=qUW1jjh2EEjy-V7riliFR77grpi-AgsTmP76z60K5Lo,2011
7
7
  stac_fastapi/sfeos_helpers/database/README.md,sha256=TVYFDD4PqDD57ZsWBv4i4LawaL_DAEIOjM6OQuqwLAU,4049
8
- stac_fastapi/sfeos_helpers/database/__init__.py,sha256=Kvnz8hpXq_sSz8K5OW3PoPsvh9864Vv1zWhI5hxgd4o,2891
9
- stac_fastapi/sfeos_helpers/database/datetime.py,sha256=LMh8dFjifpjfB_IKvOqQ7bQWMy4SXAYvWaLHMsad4tg,6806
8
+ stac_fastapi/sfeos_helpers/database/__init__.py,sha256=DATeB7mvDzu26O09QJG_DOmFcKRClMZ9x47224-2W68,3655
9
+ stac_fastapi/sfeos_helpers/database/catalogs.py,sha256=Sux7VAeCLWTk6845pt2YHFEJaecPxZ5vP2RT0G-M45k,6425
10
+ stac_fastapi/sfeos_helpers/database/datetime.py,sha256=yVD-FQCmjjJGM0CxAKqzOWfdotHvVup1r8Pvd68WjTU,8578
10
11
  stac_fastapi/sfeos_helpers/database/document.py,sha256=LtjX15gvaOuZC_k2t_oQhys_c-zRTLN5rwX0hNJkHnM,1725
11
- stac_fastapi/sfeos_helpers/database/index.py,sha256=fddGwVJ3D5zuVj2DyTCujGk_hCXCV4ATCZRxAblo29s,6750
12
+ stac_fastapi/sfeos_helpers/database/index.py,sha256=Eqa94G_lYBGYmscNTC0NPuH9n9EkqGyxV1zidf_xB8I,8587
12
13
  stac_fastapi/sfeos_helpers/database/mapping.py,sha256=4vGUuBLGoBOkQ984pYOciiz9UUWb4sbZyt-iViIsmdM,3809
13
- stac_fastapi/sfeos_helpers/database/query.py,sha256=bbSYe0cLC7oFbhkHR5WTKCF7Ca9iZI3fdanD90KYN98,9476
14
- stac_fastapi/sfeos_helpers/database/utils.py,sha256=CLtZgoUT37oklc9MsExXsxDviv4bzK-ZP7oxAOXS32Y,11780
14
+ stac_fastapi/sfeos_helpers/database/query.py,sha256=Qe78ZmFTyWMKBmz8PpJ3CUiWuoTojbTo9seSxNteC9E,9477
15
+ stac_fastapi/sfeos_helpers/database/utils.py,sha256=p3kmY9fC08l_OKYGfKQWqQpjX4K27R_2iBv8xM8N58o,14751
15
16
  stac_fastapi/sfeos_helpers/filter/README.md,sha256=Rb5qHmDkI-7-o3I82Lb_zfmrviqUj958wef021xI6pQ,1955
16
17
  stac_fastapi/sfeos_helpers/filter/__init__.py,sha256=n3zL_MhEGOoxMz1KeijyK_UKiZ0MKPl90zHtYI5RAy8,1557
17
18
  stac_fastapi/sfeos_helpers/filter/client.py,sha256=_LX3mlW9MYhoMGxvyi7Eg-LJICElQXYGbzzhSPXYRtw,6517
@@ -19,16 +20,16 @@ stac_fastapi/sfeos_helpers/filter/cql2.py,sha256=Cg9kRYD9CVkVSyRqOyB5oVXmlyteSn2
19
20
  stac_fastapi/sfeos_helpers/filter/transform.py,sha256=wu6t7jbhgK9JIROQ5W82sAyCn6lHnBwuoQzb6o63luI,5725
20
21
  stac_fastapi/sfeos_helpers/models/patch.py,sha256=krgopR6UvRV7NthfO61OwjDw9q2OHNLlk6WWaUyBrwk,3113
21
22
  stac_fastapi/sfeos_helpers/search_engine/__init__.py,sha256=Bi0cAtul3FuLjFceTPtEcaWNBfmUX5vKaqDvbSUAm0o,754
22
- stac_fastapi/sfeos_helpers/search_engine/base.py,sha256=9KOLW3NjW9PzWQzqLuhIjQU7FOHdDnB3ZNwDq469JZU,1400
23
+ stac_fastapi/sfeos_helpers/search_engine/base.py,sha256=ww1yCDVuXCK7nIYwmmyxywIvDeB-l5ITVoxkSkdOoT0,2337
23
24
  stac_fastapi/sfeos_helpers/search_engine/factory.py,sha256=nPty3L8esypSVIzl5IKfmqQ1hVUIjMQ183Ksistr1bM,1066
24
- stac_fastapi/sfeos_helpers/search_engine/index_operations.py,sha256=E4ipJVhAW_asah9QN-Coc2T0ir9rsPK6scDDt4jcdUA,5820
25
- stac_fastapi/sfeos_helpers/search_engine/inserters.py,sha256=o-I_4OowMJetMwRFPdq8Oix_DAkMNGBw4fYyoa5W6s0,10562
26
- stac_fastapi/sfeos_helpers/search_engine/managers.py,sha256=nldomKmw8iQfOxeGZbBRGG_rWk-vB5Hy_cOjJ2e0ArE,6454
25
+ stac_fastapi/sfeos_helpers/search_engine/index_operations.py,sha256=AYc8lJ7TAK9tADw4kHDKvzF3nF8uUaCPPXRhWQzNHFo,7809
26
+ stac_fastapi/sfeos_helpers/search_engine/inserters.py,sha256=spyGwbKLT7u9qOMdOCyPS5zoLh8p4KTEPOOy04Uvrho,13780
27
+ stac_fastapi/sfeos_helpers/search_engine/managers.py,sha256=fODO2aAiRhiuEEJic6JgIIR-RnI-EjUCWse0YkEdvM4,18000
27
28
  stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py,sha256=qKd4KzZkERwF_yhIeFcjAUnq5vQarr3CuXxE3SWmt6c,441
28
- stac_fastapi/sfeos_helpers/search_engine/selection/base.py,sha256=106c4FK50cgMmTpPJkWdgbExPkU2yIH4Wq684Ww-fYE,859
29
- stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py,sha256=jG5XYWocCfhMgopA0bknGdw6R6zZ1cjanlX2554TFTA,4039
29
+ stac_fastapi/sfeos_helpers/search_engine/selection/base.py,sha256=YANRYcosHsZQ1jnkjBUFoNF5V_Xh2efg9BGh411MGxI,1067
30
+ stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py,sha256=dLIZRWSQ-t4YWXHDw4pyr1My3G26ewIg53zkKUvWGbo,6086
30
31
  stac_fastapi/sfeos_helpers/search_engine/selection/factory.py,sha256=vbgNVCUW2lviePqzpgsPLxp6IEqcX3GHiahqN2oVObA,1305
31
- stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py,sha256=q83nfCfNfLUqtkHpORwNHNRU9Pa-heeaDIPO0RlHb-8,4779
32
- sfeos_helpers-6.9.0.dist-info/METADATA,sha256=yuWwHyz0TyeDHyyetqj7eqhQ-FoflATvVQqIPsXzalI,3114
33
- sfeos_helpers-6.9.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
- sfeos_helpers-6.9.0.dist-info/RECORD,,
32
+ stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py,sha256=L_NKw1BNNBgu6o6HNupUFinYDFUex2GItr2jABx_0UE,7116
33
+ sfeos_helpers-6.10.1.dist-info/METADATA,sha256=z6T2mi71Kb-r3QHwekALGZSDVf7of943gssfB9---Rs,3116
34
+ sfeos_helpers-6.10.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
35
+ sfeos_helpers-6.10.1.dist-info/RECORD,,
@@ -316,8 +316,6 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
316
316
  search, datetime_search = self.database.apply_datetime_filter(
317
317
  search=search, datetime=aggregate_request.datetime
318
318
  )
319
- else:
320
- datetime_search = {"gte": None, "lte": None}
321
319
 
322
320
  if aggregate_request.bbox:
323
321
  bbox = aggregate_request.bbox
@@ -416,7 +414,7 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
416
414
  geometry_geohash_grid_precision,
417
415
  geometry_geotile_grid_precision,
418
416
  datetime_frequency_interval,
419
- datetime_search,
417
+ aggregate_request.datetime,
420
418
  )
421
419
  except Exception as error:
422
420
  if not isinstance(error, IndexError):
@@ -30,7 +30,19 @@ Function Naming Conventions:
30
30
  """
31
31
 
32
32
  # Re-export all functions for backward compatibility
33
- from .datetime import extract_date, extract_first_date_from_index, return_date
33
+ from .catalogs import (
34
+ search_children_with_pagination_shared,
35
+ search_collections_by_parent_id_shared,
36
+ search_sub_catalogs_with_pagination_shared,
37
+ update_catalog_in_index_shared,
38
+ )
39
+ from .datetime import (
40
+ extract_date,
41
+ extract_first_date_from_index,
42
+ extract_last_date_from_index,
43
+ is_index_closed,
44
+ return_date,
45
+ )
34
46
  from .document import mk_actions, mk_item_id
35
47
  from .index import (
36
48
  create_index_templates_shared,
@@ -48,9 +60,21 @@ from .query import (
48
60
  apply_intersects_filter_shared,
49
61
  populate_sort_shared,
50
62
  )
51
- from .utils import add_bbox_shape_to_collection, get_bool_env, validate_refresh
63
+ from .utils import (
64
+ ItemAlreadyExistsError,
65
+ add_bbox_shape_to_collection,
66
+ check_item_exists_in_alias,
67
+ check_item_exists_in_alias_sync,
68
+ get_bool_env,
69
+ validate_refresh,
70
+ )
52
71
 
53
72
  __all__ = [
73
+ # Catalog operations
74
+ "search_collections_by_parent_id_shared",
75
+ "search_sub_catalogs_with_pagination_shared",
76
+ "update_catalog_in_index_shared",
77
+ "search_children_with_pagination_shared",
54
78
  # Index operations
55
79
  "create_index_templates_shared",
56
80
  "delete_item_index_shared",
@@ -73,8 +97,14 @@ __all__ = [
73
97
  "validate_refresh",
74
98
  "get_bool_env",
75
99
  "add_bbox_shape_to_collection",
100
+ "check_item_exists_in_alias",
101
+ "check_item_exists_in_alias_sync",
102
+ # Errors
103
+ "ItemAlreadyExistsError",
76
104
  # Datetime utilities
77
105
  "return_date",
78
106
  "extract_date",
79
107
  "extract_first_date_from_index",
108
+ "extract_last_date_from_index",
109
+ "is_index_closed",
80
110
  ]
@@ -0,0 +1,190 @@
1
+ """Catalog-related database operations for Elasticsearch/OpenSearch.
2
+
3
+ This module provides helper functions for catalog operations that require
4
+ direct Elasticsearch/OpenSearch client access. These functions are used by
5
+ the CatalogsExtension to maintain database-agnostic code in the core module.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ async def search_collections_by_parent_id_shared(
17
+ es_client: Any, catalog_id: str, size: int = 10000
18
+ ) -> List[Dict[str, Any]]:
19
+ """Search for collections that have a specific catalog as a parent.
20
+
21
+ Args:
22
+ es_client: Elasticsearch/OpenSearch client instance.
23
+ catalog_id: The catalog ID to search for in parent_ids.
24
+ size: Maximum number of results to return (default: 10000).
25
+
26
+ Returns:
27
+ List of collection documents from the search results.
28
+ """
29
+ query_body = {"query": {"term": {"parent_ids": catalog_id}}, "size": size}
30
+ try:
31
+ search_result = await es_client.search(index=COLLECTIONS_INDEX, body=query_body)
32
+ return [hit["_source"] for hit in search_result["hits"]["hits"]]
33
+ except Exception as e:
34
+ logger.error(f"Error searching for collections with parent {catalog_id}: {e}")
35
+ return []
36
+
37
+
38
+ async def search_sub_catalogs_with_pagination_shared(
39
+ es_client: Any,
40
+ catalog_id: str,
41
+ limit: int = 10,
42
+ token: Optional[str] = None,
43
+ ) -> tuple[List[Dict[str, Any]], int, Optional[str]]:
44
+ """Search for sub-catalogs with pagination support.
45
+
46
+ Args:
47
+ es_client: Elasticsearch/OpenSearch client instance.
48
+ catalog_id: The parent catalog ID.
49
+ limit: Maximum number of results to return (default: 10).
50
+ token: Pagination token for cursor-based pagination.
51
+
52
+ Returns:
53
+ Tuple of (catalogs, total_count, next_token).
54
+ """
55
+ sort_fields: List[Dict[str, Any]] = [{"id": {"order": "asc"}}]
56
+ query_body: Dict[str, Any] = {
57
+ "query": {
58
+ "bool": {
59
+ "must": [
60
+ {"term": {"parent_ids": catalog_id}},
61
+ {"term": {"type": "Catalog"}},
62
+ ]
63
+ }
64
+ },
65
+ "sort": sort_fields,
66
+ "size": limit,
67
+ }
68
+
69
+ # Handle pagination cursor (token)
70
+ # Token format: "value1|value2|..." matching the sort fields
71
+ if token:
72
+ try:
73
+ search_after = token.split("|")
74
+ if len(search_after) == len(sort_fields):
75
+ query_body["search_after"] = search_after
76
+ except Exception:
77
+ logger.debug(f"Invalid pagination token: {token}")
78
+
79
+ # Execute the search
80
+ try:
81
+ search_result = await es_client.search(index=COLLECTIONS_INDEX, body=query_body)
82
+ except Exception as e:
83
+ logger.error(f"Error searching for catalogs with parent {catalog_id}: {e}")
84
+ search_result = {"hits": {"hits": []}}
85
+
86
+ # Process results
87
+ hits = search_result.get("hits", {}).get("hits", [])
88
+ total_hits = search_result.get("hits", {}).get("total", {}).get("value", 0)
89
+
90
+ catalogs = [hit["_source"] for hit in hits]
91
+
92
+ # Generate next token if more results exist
93
+ next_token = None
94
+ if len(hits) == limit and len(catalogs) > 0:
95
+ last_hit_sort = hits[-1].get("sort")
96
+ if last_hit_sort:
97
+ next_token = "|".join(str(x) for x in last_hit_sort)
98
+
99
+ return catalogs, total_hits, next_token
100
+
101
+
102
+ async def update_catalog_in_index_shared(
103
+ es_client: Any, catalog_id: str, catalog_data: Dict[str, Any]
104
+ ) -> None:
105
+ """Update a catalog document in the index.
106
+
107
+ Args:
108
+ es_client: Elasticsearch/OpenSearch client instance.
109
+ catalog_id: The catalog ID.
110
+ catalog_data: The catalog document to update.
111
+ """
112
+ try:
113
+ await es_client.index(
114
+ index=COLLECTIONS_INDEX,
115
+ id=catalog_id,
116
+ body=catalog_data,
117
+ refresh=True,
118
+ )
119
+ except Exception as e:
120
+ logger.error(f"Error updating catalog {catalog_id} in index: {e}")
121
+ raise
122
+
123
+
124
+ async def search_children_with_pagination_shared(
125
+ es_client: Any,
126
+ catalog_id: str,
127
+ limit: int = 10,
128
+ token: Optional[str] = None,
129
+ resource_type: Optional[str] = None,
130
+ ) -> tuple[List[Dict[str, Any]], int, Optional[str]]:
131
+ """Search for children (catalogs and collections) with pagination.
132
+
133
+ Args:
134
+ es_client: Elasticsearch/OpenSearch client instance.
135
+ catalog_id: The parent catalog ID.
136
+ limit: Maximum number of results to return (default: 10).
137
+ token: Pagination token for cursor-based pagination.
138
+ resource_type: Optional filter by type (Catalog or Collection).
139
+
140
+ Returns:
141
+ Tuple of (children, total_count, next_token).
142
+ """
143
+ # Base filter: Parent match
144
+ filter_queries = [{"term": {"parent_ids": catalog_id}}]
145
+
146
+ # Optional filter: Type
147
+ if resource_type:
148
+ filter_queries.append({"term": {"type": resource_type}})
149
+
150
+ body = {
151
+ "query": {"bool": {"filter": filter_queries}},
152
+ "sort": [{"id": {"order": "asc"}}],
153
+ "size": limit,
154
+ }
155
+
156
+ # Handle search_after token
157
+ search_after: Optional[List[str]] = None
158
+ if token:
159
+ try:
160
+ search_after_parts = token.split("|")
161
+ # If the number of sort fields doesn't match token parts, ignore the token
162
+ if len(search_after_parts) == len(body["sort"]): # type: ignore
163
+ search_after = search_after_parts
164
+ except Exception:
165
+ search_after = None
166
+
167
+ if search_after is not None:
168
+ body["search_after"] = search_after
169
+
170
+ # Execute search
171
+ try:
172
+ search_result = await es_client.search(index=COLLECTIONS_INDEX, body=body)
173
+ except Exception as e:
174
+ logger.error(f"Error searching for children of catalog {catalog_id}: {e}")
175
+ search_result = {"hits": {"hits": []}}
176
+
177
+ # Process results
178
+ hits = search_result.get("hits", {}).get("hits", [])
179
+ total = search_result.get("hits", {}).get("total", {}).get("value", 0)
180
+
181
+ children = [hit["_source"] for hit in hits]
182
+
183
+ # Generate next token if more results exist
184
+ next_token = None
185
+ if len(hits) == limit:
186
+ next_token_values = hits[-1].get("sort")
187
+ if next_token_values:
188
+ next_token = "|".join(str(val) for val in next_token_values)
189
+
190
+ return children, total, next_token
@@ -145,7 +145,7 @@ def extract_date(date_str: str) -> date:
145
145
  date_str: ISO format date string
146
146
 
147
147
  Returns:
148
- A date object extracted from the input string.
148
+ A date object extracted from the input string or None.
149
149
  """
150
150
  date_str = date_str.replace("Z", "+00:00")
151
151
  return datetime_type.fromisoformat(date_str).date()
@@ -186,3 +186,56 @@ def extract_first_date_from_index(index_name: str) -> date:
186
186
  raise ValueError(
187
187
  f"Invalid date format in index name '{index_name}': '{date_string}'"
188
188
  ) from e
189
+
190
+
191
+ def is_index_closed(alias_name: str) -> bool:
192
+ """Check if an index alias is closed (has two dates indicating a date range).
193
+
194
+ A closed index has an alias like 'items_start_datetime_collection_2025-11-06-2025-11-08'
195
+ indicating a fixed date range that should not be modified.
196
+
197
+ Args:
198
+ alias_name: The alias name to check.
199
+
200
+ Returns:
201
+ True if the alias contains two dates (closed), False if it has one date (open).
202
+ """
203
+ date_pattern = r"\d{4}-\d{2}-\d{2}"
204
+ matches = re.findall(date_pattern, alias_name)
205
+ return len(matches) >= 2
206
+
207
+
208
+ def extract_last_date_from_index(index_name: str) -> date:
209
+ """Extract the last date from an index name containing date patterns.
210
+
211
+ Searches for date patterns (YYYY-MM-DD) within the index name string
212
+ and returns the last found date as a date object.
213
+
214
+ Args:
215
+ index_name: Index name containing date patterns.
216
+
217
+ Returns:
218
+ A date object extracted from the last date pattern found in the index name.
219
+ """
220
+ date_pattern = r"\d{4}-\d{2}-\d{2}"
221
+ matches = re.findall(date_pattern, index_name)
222
+
223
+ if not matches:
224
+ logger.error(f"No date pattern found in index name: '{index_name}'")
225
+ raise ValueError(
226
+ f"No date pattern (YYYY-MM-DD) found in index name: '{index_name}'"
227
+ )
228
+
229
+ date_string = matches[-1]
230
+
231
+ try:
232
+ extracted_date = datetime_type.strptime(date_string, "%Y-%m-%d").date()
233
+ return extracted_date
234
+ except ValueError as e:
235
+ logger.error(
236
+ f"Invalid date format found in index name '{index_name}': "
237
+ f"'{date_string}' - {str(e)}"
238
+ )
239
+ raise ValueError(
240
+ f"Invalid date format in index name '{index_name}': '{date_string}'"
241
+ ) from e
@@ -4,11 +4,9 @@ This module provides functions for creating and managing indices in Elasticsearc
4
4
  """
5
5
 
6
6
  import re
7
- from datetime import datetime
7
+ from datetime import date, datetime
8
8
  from functools import lru_cache
9
- from typing import Any, List, Optional
10
-
11
- from dateutil.parser import parse # type: ignore[import]
9
+ from typing import Any, Dict, List, Optional, Tuple
12
10
 
13
11
  from stac_fastapi.sfeos_helpers.mappings import (
14
12
  _ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
@@ -71,54 +69,104 @@ def indices(collection_ids: Optional[List[str]]) -> str:
71
69
 
72
70
 
73
71
  def filter_indexes_by_datetime(
74
- indexes: List[str], gte: Optional[str], lte: Optional[str]
72
+ collection_indexes: List[Tuple[Dict[str, str], ...]],
73
+ datetime_search: Dict[str, Dict[str, Optional[str]]],
74
+ use_datetime: bool,
75
75
  ) -> List[str]:
76
- """Filter indexes based on datetime range extracted from index names.
76
+ """
77
+ Filter Elasticsearch index aliases based on datetime search criteria.
78
+
79
+ Filters a list of collection indexes by matching their datetime, start_datetime, and end_datetime
80
+ aliases against the provided search criteria. Each criterion can have optional 'gte' (greater than
81
+ or equal) and 'lte' (less than or equal) bounds.
77
82
 
78
83
  Args:
79
- indexes: List of index names containing dates
80
- gte: Greater than or equal date filter (ISO format, optional 'Z' suffix)
81
- lte: Less than or equal date filter (ISO format, optional 'Z' suffix)
84
+ collection_indexes (List[Tuple[Dict[str, str], ...]]): A list of tuples containing dictionaries
85
+ with 'datetime', 'start_datetime', and 'end_datetime' aliases.
86
+ datetime_search (Dict[str, Dict[str, Optional[str]]]): A dictionary with keys 'datetime',
87
+ 'start_datetime', and 'end_datetime', each containing 'gte' and 'lte' criteria as ISO format
88
+ datetime strings or None.
89
+ use_datetime (bool): Flag determining which datetime field to filter on:
90
+ - True: Filters using 'datetime' alias.
91
+ - False: Filters using 'start_datetime' and 'end_datetime' aliases.
82
92
 
83
93
  Returns:
84
- List of filtered index names
94
+ List[str]: A list of start_datetime aliases that match all provided search criteria.
85
95
  """
86
96
 
87
- def parse_datetime(dt_str: str) -> datetime:
88
- """Parse datetime string, handling both with and without 'Z' suffix."""
89
- return parse(dt_str).replace(tzinfo=None)
90
-
91
- def extract_date_range_from_index(index_name: str) -> tuple:
92
- """Extract start and end dates from index name."""
93
- date_pattern = r"(\d{4}-\d{2}-\d{2})"
94
- dates = re.findall(date_pattern, index_name)
95
-
96
- if len(dates) == 1:
97
- start_date = datetime.strptime(dates[0], "%Y-%m-%d")
98
- max_date = datetime.max.replace(microsecond=0)
99
- return start_date, max_date
100
- else:
101
- start_date = datetime.strptime(dates[0], "%Y-%m-%d")
102
- end_date = datetime.strptime(dates[1], "%Y-%m-%d")
103
- return start_date, end_date
104
-
105
- def is_index_in_range(
106
- start_date: datetime, end_date: datetime, gte_dt: datetime, lte_dt: datetime
97
+ def extract_date_from_alias(alias: str) -> Optional[tuple[datetime, datetime]]:
98
+ date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")
99
+ try:
100
+ dates = date_pattern.findall(alias)
101
+
102
+ if not dates:
103
+ return None
104
+
105
+ if len(dates) >= 2:
106
+ return datetime.strptime(dates[-2], "%Y-%m-%d"), datetime.strptime(
107
+ dates[-1], "%Y-%m-%d"
108
+ )
109
+ else:
110
+ date = datetime.strptime(dates[-1], "%Y-%m-%d")
111
+ return date, date
112
+ except (ValueError, IndexError):
113
+ return None
114
+
115
+ def parse_search_date(date_str: Optional[str]) -> Optional[date]:
116
+ if not date_str:
117
+ return None
118
+ date_str = date_str.rstrip("Z")
119
+ return datetime.fromisoformat(date_str).date()
120
+
121
+ def check_criteria(
122
+ value_begin: datetime, value_end: datetime, criteria: Dict
107
123
  ) -> bool:
108
- """Check if index date range overlaps with filter range."""
109
- return not (
110
- end_date.date() < gte_dt.date() or start_date.date() > lte_dt.date()
111
- )
124
+ gte = parse_search_date(criteria.get("gte"))
125
+ lte = parse_search_date(criteria.get("lte"))
112
126
 
113
- gte_dt = parse_datetime(gte) if gte else datetime.min.replace(microsecond=0)
114
- lte_dt = parse_datetime(lte) if lte else datetime.max.replace(microsecond=0)
127
+ if gte and value_end.date() < gte:
128
+ return False
129
+ if lte and value_begin.date() > lte:
130
+ return False
131
+
132
+ return True
115
133
 
116
134
  filtered_indexes = []
117
135
 
118
- for index in indexes:
119
- start_date, end_date = extract_date_range_from_index(index)
120
- if is_index_in_range(start_date, end_date, gte_dt, lte_dt):
121
- filtered_indexes.append(index)
136
+ for index_tuple in collection_indexes:
137
+ if not index_tuple:
138
+ continue
139
+
140
+ index_dict = index_tuple[0]
141
+ start_datetime_alias = index_dict.get("start_datetime")
142
+ end_datetime_alias = index_dict.get("end_datetime")
143
+ datetime_alias = index_dict.get("datetime")
144
+
145
+ if start_datetime_alias:
146
+ start_date = extract_date_from_alias(start_datetime_alias)
147
+ if not check_criteria(
148
+ start_date[0], start_date[1], datetime_search.get("start_datetime", {})
149
+ ):
150
+ continue
151
+ if end_datetime_alias:
152
+ end_date = extract_date_from_alias(end_datetime_alias)
153
+ if not check_criteria(
154
+ end_date[0], end_date[1], datetime_search.get("end_datetime", {})
155
+ ):
156
+ continue
157
+ if datetime_alias:
158
+ datetime_date = extract_date_from_alias(datetime_alias)
159
+ if not check_criteria(
160
+ datetime_date[0], datetime_date[1], datetime_search.get("datetime", {})
161
+ ):
162
+ continue
163
+
164
+ primary_datetime_alias = (
165
+ datetime_alias if use_datetime else start_datetime_alias
166
+ )
167
+
168
+ if primary_datetime_alias is not None:
169
+ filtered_indexes.append(primary_datetime_alias)
122
170
 
123
171
  return filtered_indexes
124
172
 
@@ -130,7 +130,7 @@ def apply_collections_datetime_filter_shared(
130
130
 
131
131
 
132
132
  def apply_collections_bbox_filter_shared(
133
- bbox: Union[str, List[float], None]
133
+ bbox: Union[str, List[float], None],
134
134
  ) -> Optional[Dict[str, Dict]]:
135
135
  """Create a geo_shape filter for collections bbox search.
136
136