sfeos-helpers 6.9.0__tar.gz → 6.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/.gitignore +11 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/PKG-INFO +2 -2
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/pyproject.toml +1 -1
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/aggregation/client.py +1 -3
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/__init__.py +32 -2
- sfeos_helpers-6.10.1/stac_fastapi/sfeos_helpers/database/catalogs.py +190 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/datetime.py +54 -1
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/index.py +88 -40
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/query.py +1 -1
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/utils.py +97 -2
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/mappings.py +2 -2
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/base.py +30 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/index_operations.py +80 -25
- sfeos_helpers-6.10.1/stac_fastapi/sfeos_helpers/search_engine/inserters.py +389 -0
- sfeos_helpers-6.10.1/stac_fastapi/sfeos_helpers/search_engine/managers.py +482 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/selection/base.py +7 -3
- sfeos_helpers-6.10.1/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +184 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +71 -15
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/version.py +1 -1
- sfeos_helpers-6.9.0/stac_fastapi/sfeos_helpers/search_engine/inserters.py +0 -309
- sfeos_helpers-6.9.0/stac_fastapi/sfeos_helpers/search_engine/managers.py +0 -198
- sfeos_helpers-6.9.0/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +0 -127
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/README.md +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/aggregation/README.md +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/aggregation/__init__.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/aggregation/format.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/README.md +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/document.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/mapping.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/filter/README.md +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/filter/__init__.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/filter/client.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/filter/cql2.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/filter/transform.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/models/patch.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/__init__.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/factory.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py +0 -0
- {sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/search_engine/selection/factory.py +0 -0
|
@@ -141,3 +141,14 @@ venv
|
|
|
141
141
|
/docs/src/api/*
|
|
142
142
|
|
|
143
143
|
.DS_Store
|
|
144
|
+
|
|
145
|
+
# Helm
|
|
146
|
+
*.tgz
|
|
147
|
+
charts/*/charts/
|
|
148
|
+
charts/*/requirements.lock
|
|
149
|
+
charts/*/Chart.lock
|
|
150
|
+
helm-chart/stac-fastapi/charts/
|
|
151
|
+
helm-chart/stac-fastapi/Chart.lock
|
|
152
|
+
helm-chart/stac-fastapi/*.tgz
|
|
153
|
+
helm-chart/test-results/
|
|
154
|
+
helm-chart/tmp/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sfeos_helpers
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.10.1
|
|
4
4
|
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
5
|
Project-URL: Homepage, https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
6
|
License: MIT
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.14
|
|
16
16
|
Requires-Python: >=3.11
|
|
17
|
-
Requires-Dist: stac-fastapi-core==6.
|
|
17
|
+
Requires-Dist: stac-fastapi-core==6.10.1
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
|
|
20
20
|
# sfeos-helpers
|
{sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/aggregation/client.py
RENAMED
|
@@ -316,8 +316,6 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
316
316
|
search, datetime_search = self.database.apply_datetime_filter(
|
|
317
317
|
search=search, datetime=aggregate_request.datetime
|
|
318
318
|
)
|
|
319
|
-
else:
|
|
320
|
-
datetime_search = {"gte": None, "lte": None}
|
|
321
319
|
|
|
322
320
|
if aggregate_request.bbox:
|
|
323
321
|
bbox = aggregate_request.bbox
|
|
@@ -416,7 +414,7 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
416
414
|
geometry_geohash_grid_precision,
|
|
417
415
|
geometry_geotile_grid_precision,
|
|
418
416
|
datetime_frequency_interval,
|
|
419
|
-
|
|
417
|
+
aggregate_request.datetime,
|
|
420
418
|
)
|
|
421
419
|
except Exception as error:
|
|
422
420
|
if not isinstance(error, IndexError):
|
{sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/__init__.py
RENAMED
|
@@ -30,7 +30,19 @@ Function Naming Conventions:
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
# Re-export all functions for backward compatibility
|
|
33
|
-
from .
|
|
33
|
+
from .catalogs import (
|
|
34
|
+
search_children_with_pagination_shared,
|
|
35
|
+
search_collections_by_parent_id_shared,
|
|
36
|
+
search_sub_catalogs_with_pagination_shared,
|
|
37
|
+
update_catalog_in_index_shared,
|
|
38
|
+
)
|
|
39
|
+
from .datetime import (
|
|
40
|
+
extract_date,
|
|
41
|
+
extract_first_date_from_index,
|
|
42
|
+
extract_last_date_from_index,
|
|
43
|
+
is_index_closed,
|
|
44
|
+
return_date,
|
|
45
|
+
)
|
|
34
46
|
from .document import mk_actions, mk_item_id
|
|
35
47
|
from .index import (
|
|
36
48
|
create_index_templates_shared,
|
|
@@ -48,9 +60,21 @@ from .query import (
|
|
|
48
60
|
apply_intersects_filter_shared,
|
|
49
61
|
populate_sort_shared,
|
|
50
62
|
)
|
|
51
|
-
from .utils import
|
|
63
|
+
from .utils import (
|
|
64
|
+
ItemAlreadyExistsError,
|
|
65
|
+
add_bbox_shape_to_collection,
|
|
66
|
+
check_item_exists_in_alias,
|
|
67
|
+
check_item_exists_in_alias_sync,
|
|
68
|
+
get_bool_env,
|
|
69
|
+
validate_refresh,
|
|
70
|
+
)
|
|
52
71
|
|
|
53
72
|
__all__ = [
|
|
73
|
+
# Catalog operations
|
|
74
|
+
"search_collections_by_parent_id_shared",
|
|
75
|
+
"search_sub_catalogs_with_pagination_shared",
|
|
76
|
+
"update_catalog_in_index_shared",
|
|
77
|
+
"search_children_with_pagination_shared",
|
|
54
78
|
# Index operations
|
|
55
79
|
"create_index_templates_shared",
|
|
56
80
|
"delete_item_index_shared",
|
|
@@ -73,8 +97,14 @@ __all__ = [
|
|
|
73
97
|
"validate_refresh",
|
|
74
98
|
"get_bool_env",
|
|
75
99
|
"add_bbox_shape_to_collection",
|
|
100
|
+
"check_item_exists_in_alias",
|
|
101
|
+
"check_item_exists_in_alias_sync",
|
|
102
|
+
# Errors
|
|
103
|
+
"ItemAlreadyExistsError",
|
|
76
104
|
# Datetime utilities
|
|
77
105
|
"return_date",
|
|
78
106
|
"extract_date",
|
|
79
107
|
"extract_first_date_from_index",
|
|
108
|
+
"extract_last_date_from_index",
|
|
109
|
+
"is_index_closed",
|
|
80
110
|
]
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Catalog-related database operations for Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides helper functions for catalog operations that require
|
|
4
|
+
direct Elasticsearch/OpenSearch client access. These functions are used by
|
|
5
|
+
the CatalogsExtension to maintain database-agnostic code in the core module.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def search_collections_by_parent_id_shared(
|
|
17
|
+
es_client: Any, catalog_id: str, size: int = 10000
|
|
18
|
+
) -> List[Dict[str, Any]]:
|
|
19
|
+
"""Search for collections that have a specific catalog as a parent.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
es_client: Elasticsearch/OpenSearch client instance.
|
|
23
|
+
catalog_id: The catalog ID to search for in parent_ids.
|
|
24
|
+
size: Maximum number of results to return (default: 10000).
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of collection documents from the search results.
|
|
28
|
+
"""
|
|
29
|
+
query_body = {"query": {"term": {"parent_ids": catalog_id}}, "size": size}
|
|
30
|
+
try:
|
|
31
|
+
search_result = await es_client.search(index=COLLECTIONS_INDEX, body=query_body)
|
|
32
|
+
return [hit["_source"] for hit in search_result["hits"]["hits"]]
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.error(f"Error searching for collections with parent {catalog_id}: {e}")
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def search_sub_catalogs_with_pagination_shared(
|
|
39
|
+
es_client: Any,
|
|
40
|
+
catalog_id: str,
|
|
41
|
+
limit: int = 10,
|
|
42
|
+
token: Optional[str] = None,
|
|
43
|
+
) -> tuple[List[Dict[str, Any]], int, Optional[str]]:
|
|
44
|
+
"""Search for sub-catalogs with pagination support.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
es_client: Elasticsearch/OpenSearch client instance.
|
|
48
|
+
catalog_id: The parent catalog ID.
|
|
49
|
+
limit: Maximum number of results to return (default: 10).
|
|
50
|
+
token: Pagination token for cursor-based pagination.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Tuple of (catalogs, total_count, next_token).
|
|
54
|
+
"""
|
|
55
|
+
sort_fields: List[Dict[str, Any]] = [{"id": {"order": "asc"}}]
|
|
56
|
+
query_body: Dict[str, Any] = {
|
|
57
|
+
"query": {
|
|
58
|
+
"bool": {
|
|
59
|
+
"must": [
|
|
60
|
+
{"term": {"parent_ids": catalog_id}},
|
|
61
|
+
{"term": {"type": "Catalog"}},
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"sort": sort_fields,
|
|
66
|
+
"size": limit,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Handle pagination cursor (token)
|
|
70
|
+
# Token format: "value1|value2|..." matching the sort fields
|
|
71
|
+
if token:
|
|
72
|
+
try:
|
|
73
|
+
search_after = token.split("|")
|
|
74
|
+
if len(search_after) == len(sort_fields):
|
|
75
|
+
query_body["search_after"] = search_after
|
|
76
|
+
except Exception:
|
|
77
|
+
logger.debug(f"Invalid pagination token: {token}")
|
|
78
|
+
|
|
79
|
+
# Execute the search
|
|
80
|
+
try:
|
|
81
|
+
search_result = await es_client.search(index=COLLECTIONS_INDEX, body=query_body)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
logger.error(f"Error searching for catalogs with parent {catalog_id}: {e}")
|
|
84
|
+
search_result = {"hits": {"hits": []}}
|
|
85
|
+
|
|
86
|
+
# Process results
|
|
87
|
+
hits = search_result.get("hits", {}).get("hits", [])
|
|
88
|
+
total_hits = search_result.get("hits", {}).get("total", {}).get("value", 0)
|
|
89
|
+
|
|
90
|
+
catalogs = [hit["_source"] for hit in hits]
|
|
91
|
+
|
|
92
|
+
# Generate next token if more results exist
|
|
93
|
+
next_token = None
|
|
94
|
+
if len(hits) == limit and len(catalogs) > 0:
|
|
95
|
+
last_hit_sort = hits[-1].get("sort")
|
|
96
|
+
if last_hit_sort:
|
|
97
|
+
next_token = "|".join(str(x) for x in last_hit_sort)
|
|
98
|
+
|
|
99
|
+
return catalogs, total_hits, next_token
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def update_catalog_in_index_shared(
|
|
103
|
+
es_client: Any, catalog_id: str, catalog_data: Dict[str, Any]
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Update a catalog document in the index.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
es_client: Elasticsearch/OpenSearch client instance.
|
|
109
|
+
catalog_id: The catalog ID.
|
|
110
|
+
catalog_data: The catalog document to update.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
await es_client.index(
|
|
114
|
+
index=COLLECTIONS_INDEX,
|
|
115
|
+
id=catalog_id,
|
|
116
|
+
body=catalog_data,
|
|
117
|
+
refresh=True,
|
|
118
|
+
)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.error(f"Error updating catalog {catalog_id} in index: {e}")
|
|
121
|
+
raise
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def search_children_with_pagination_shared(
|
|
125
|
+
es_client: Any,
|
|
126
|
+
catalog_id: str,
|
|
127
|
+
limit: int = 10,
|
|
128
|
+
token: Optional[str] = None,
|
|
129
|
+
resource_type: Optional[str] = None,
|
|
130
|
+
) -> tuple[List[Dict[str, Any]], int, Optional[str]]:
|
|
131
|
+
"""Search for children (catalogs and collections) with pagination.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
es_client: Elasticsearch/OpenSearch client instance.
|
|
135
|
+
catalog_id: The parent catalog ID.
|
|
136
|
+
limit: Maximum number of results to return (default: 10).
|
|
137
|
+
token: Pagination token for cursor-based pagination.
|
|
138
|
+
resource_type: Optional filter by type (Catalog or Collection).
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Tuple of (children, total_count, next_token).
|
|
142
|
+
"""
|
|
143
|
+
# Base filter: Parent match
|
|
144
|
+
filter_queries = [{"term": {"parent_ids": catalog_id}}]
|
|
145
|
+
|
|
146
|
+
# Optional filter: Type
|
|
147
|
+
if resource_type:
|
|
148
|
+
filter_queries.append({"term": {"type": resource_type}})
|
|
149
|
+
|
|
150
|
+
body = {
|
|
151
|
+
"query": {"bool": {"filter": filter_queries}},
|
|
152
|
+
"sort": [{"id": {"order": "asc"}}],
|
|
153
|
+
"size": limit,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Handle search_after token
|
|
157
|
+
search_after: Optional[List[str]] = None
|
|
158
|
+
if token:
|
|
159
|
+
try:
|
|
160
|
+
search_after_parts = token.split("|")
|
|
161
|
+
# If the number of sort fields doesn't match token parts, ignore the token
|
|
162
|
+
if len(search_after_parts) == len(body["sort"]): # type: ignore
|
|
163
|
+
search_after = search_after_parts
|
|
164
|
+
except Exception:
|
|
165
|
+
search_after = None
|
|
166
|
+
|
|
167
|
+
if search_after is not None:
|
|
168
|
+
body["search_after"] = search_after
|
|
169
|
+
|
|
170
|
+
# Execute search
|
|
171
|
+
try:
|
|
172
|
+
search_result = await es_client.search(index=COLLECTIONS_INDEX, body=body)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(f"Error searching for children of catalog {catalog_id}: {e}")
|
|
175
|
+
search_result = {"hits": {"hits": []}}
|
|
176
|
+
|
|
177
|
+
# Process results
|
|
178
|
+
hits = search_result.get("hits", {}).get("hits", [])
|
|
179
|
+
total = search_result.get("hits", {}).get("total", {}).get("value", 0)
|
|
180
|
+
|
|
181
|
+
children = [hit["_source"] for hit in hits]
|
|
182
|
+
|
|
183
|
+
# Generate next token if more results exist
|
|
184
|
+
next_token = None
|
|
185
|
+
if len(hits) == limit:
|
|
186
|
+
next_token_values = hits[-1].get("sort")
|
|
187
|
+
if next_token_values:
|
|
188
|
+
next_token = "|".join(str(val) for val in next_token_values)
|
|
189
|
+
|
|
190
|
+
return children, total, next_token
|
{sfeos_helpers-6.9.0 → sfeos_helpers-6.10.1}/stac_fastapi/sfeos_helpers/database/datetime.py
RENAMED
|
@@ -145,7 +145,7 @@ def extract_date(date_str: str) -> date:
|
|
|
145
145
|
date_str: ISO format date string
|
|
146
146
|
|
|
147
147
|
Returns:
|
|
148
|
-
A date object extracted from the input string.
|
|
148
|
+
A date object extracted from the input string or None.
|
|
149
149
|
"""
|
|
150
150
|
date_str = date_str.replace("Z", "+00:00")
|
|
151
151
|
return datetime_type.fromisoformat(date_str).date()
|
|
@@ -186,3 +186,56 @@ def extract_first_date_from_index(index_name: str) -> date:
|
|
|
186
186
|
raise ValueError(
|
|
187
187
|
f"Invalid date format in index name '{index_name}': '{date_string}'"
|
|
188
188
|
) from e
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def is_index_closed(alias_name: str) -> bool:
|
|
192
|
+
"""Check if an index alias is closed (has two dates indicating a date range).
|
|
193
|
+
|
|
194
|
+
A closed index has an alias like 'items_start_datetime_collection_2025-11-06-2025-11-08'
|
|
195
|
+
indicating a fixed date range that should not be modified.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
alias_name: The alias name to check.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
True if the alias contains two dates (closed), False if it has one date (open).
|
|
202
|
+
"""
|
|
203
|
+
date_pattern = r"\d{4}-\d{2}-\d{2}"
|
|
204
|
+
matches = re.findall(date_pattern, alias_name)
|
|
205
|
+
return len(matches) >= 2
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def extract_last_date_from_index(index_name: str) -> date:
|
|
209
|
+
"""Extract the last date from an index name containing date patterns.
|
|
210
|
+
|
|
211
|
+
Searches for date patterns (YYYY-MM-DD) within the index name string
|
|
212
|
+
and returns the last found date as a date object.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
index_name: Index name containing date patterns.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
A date object extracted from the last date pattern found in the index name.
|
|
219
|
+
"""
|
|
220
|
+
date_pattern = r"\d{4}-\d{2}-\d{2}"
|
|
221
|
+
matches = re.findall(date_pattern, index_name)
|
|
222
|
+
|
|
223
|
+
if not matches:
|
|
224
|
+
logger.error(f"No date pattern found in index name: '{index_name}'")
|
|
225
|
+
raise ValueError(
|
|
226
|
+
f"No date pattern (YYYY-MM-DD) found in index name: '{index_name}'"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
date_string = matches[-1]
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
extracted_date = datetime_type.strptime(date_string, "%Y-%m-%d").date()
|
|
233
|
+
return extracted_date
|
|
234
|
+
except ValueError as e:
|
|
235
|
+
logger.error(
|
|
236
|
+
f"Invalid date format found in index name '{index_name}': "
|
|
237
|
+
f"'{date_string}' - {str(e)}"
|
|
238
|
+
)
|
|
239
|
+
raise ValueError(
|
|
240
|
+
f"Invalid date format in index name '{index_name}': '{date_string}'"
|
|
241
|
+
) from e
|
|
@@ -4,11 +4,9 @@ This module provides functions for creating and managing indices in Elasticsearc
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import re
|
|
7
|
-
from datetime import datetime
|
|
7
|
+
from datetime import date, datetime
|
|
8
8
|
from functools import lru_cache
|
|
9
|
-
from typing import Any, List, Optional
|
|
10
|
-
|
|
11
|
-
from dateutil.parser import parse # type: ignore[import]
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
10
|
|
|
13
11
|
from stac_fastapi.sfeos_helpers.mappings import (
|
|
14
12
|
_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
|
|
@@ -71,54 +69,104 @@ def indices(collection_ids: Optional[List[str]]) -> str:
|
|
|
71
69
|
|
|
72
70
|
|
|
73
71
|
def filter_indexes_by_datetime(
|
|
74
|
-
|
|
72
|
+
collection_indexes: List[Tuple[Dict[str, str], ...]],
|
|
73
|
+
datetime_search: Dict[str, Dict[str, Optional[str]]],
|
|
74
|
+
use_datetime: bool,
|
|
75
75
|
) -> List[str]:
|
|
76
|
-
"""
|
|
76
|
+
"""
|
|
77
|
+
Filter Elasticsearch index aliases based on datetime search criteria.
|
|
78
|
+
|
|
79
|
+
Filters a list of collection indexes by matching their datetime, start_datetime, and end_datetime
|
|
80
|
+
aliases against the provided search criteria. Each criterion can have optional 'gte' (greater than
|
|
81
|
+
or equal) and 'lte' (less than or equal) bounds.
|
|
77
82
|
|
|
78
83
|
Args:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
collection_indexes (List[Tuple[Dict[str, str], ...]]): A list of tuples containing dictionaries
|
|
85
|
+
with 'datetime', 'start_datetime', and 'end_datetime' aliases.
|
|
86
|
+
datetime_search (Dict[str, Dict[str, Optional[str]]]): A dictionary with keys 'datetime',
|
|
87
|
+
'start_datetime', and 'end_datetime', each containing 'gte' and 'lte' criteria as ISO format
|
|
88
|
+
datetime strings or None.
|
|
89
|
+
use_datetime (bool): Flag determining which datetime field to filter on:
|
|
90
|
+
- True: Filters using 'datetime' alias.
|
|
91
|
+
- False: Filters using 'start_datetime' and 'end_datetime' aliases.
|
|
82
92
|
|
|
83
93
|
Returns:
|
|
84
|
-
List of
|
|
94
|
+
List[str]: A list of start_datetime aliases that match all provided search criteria.
|
|
85
95
|
"""
|
|
86
96
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
return
|
|
104
|
-
|
|
105
|
-
def
|
|
106
|
-
|
|
97
|
+
def extract_date_from_alias(alias: str) -> Optional[tuple[datetime, datetime]]:
|
|
98
|
+
date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")
|
|
99
|
+
try:
|
|
100
|
+
dates = date_pattern.findall(alias)
|
|
101
|
+
|
|
102
|
+
if not dates:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
if len(dates) >= 2:
|
|
106
|
+
return datetime.strptime(dates[-2], "%Y-%m-%d"), datetime.strptime(
|
|
107
|
+
dates[-1], "%Y-%m-%d"
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
date = datetime.strptime(dates[-1], "%Y-%m-%d")
|
|
111
|
+
return date, date
|
|
112
|
+
except (ValueError, IndexError):
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def parse_search_date(date_str: Optional[str]) -> Optional[date]:
|
|
116
|
+
if not date_str:
|
|
117
|
+
return None
|
|
118
|
+
date_str = date_str.rstrip("Z")
|
|
119
|
+
return datetime.fromisoformat(date_str).date()
|
|
120
|
+
|
|
121
|
+
def check_criteria(
|
|
122
|
+
value_begin: datetime, value_end: datetime, criteria: Dict
|
|
107
123
|
) -> bool:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
end_date.date() < gte_dt.date() or start_date.date() > lte_dt.date()
|
|
111
|
-
)
|
|
124
|
+
gte = parse_search_date(criteria.get("gte"))
|
|
125
|
+
lte = parse_search_date(criteria.get("lte"))
|
|
112
126
|
|
|
113
|
-
|
|
114
|
-
|
|
127
|
+
if gte and value_end.date() < gte:
|
|
128
|
+
return False
|
|
129
|
+
if lte and value_begin.date() > lte:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
return True
|
|
115
133
|
|
|
116
134
|
filtered_indexes = []
|
|
117
135
|
|
|
118
|
-
for
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
136
|
+
for index_tuple in collection_indexes:
|
|
137
|
+
if not index_tuple:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
index_dict = index_tuple[0]
|
|
141
|
+
start_datetime_alias = index_dict.get("start_datetime")
|
|
142
|
+
end_datetime_alias = index_dict.get("end_datetime")
|
|
143
|
+
datetime_alias = index_dict.get("datetime")
|
|
144
|
+
|
|
145
|
+
if start_datetime_alias:
|
|
146
|
+
start_date = extract_date_from_alias(start_datetime_alias)
|
|
147
|
+
if not check_criteria(
|
|
148
|
+
start_date[0], start_date[1], datetime_search.get("start_datetime", {})
|
|
149
|
+
):
|
|
150
|
+
continue
|
|
151
|
+
if end_datetime_alias:
|
|
152
|
+
end_date = extract_date_from_alias(end_datetime_alias)
|
|
153
|
+
if not check_criteria(
|
|
154
|
+
end_date[0], end_date[1], datetime_search.get("end_datetime", {})
|
|
155
|
+
):
|
|
156
|
+
continue
|
|
157
|
+
if datetime_alias:
|
|
158
|
+
datetime_date = extract_date_from_alias(datetime_alias)
|
|
159
|
+
if not check_criteria(
|
|
160
|
+
datetime_date[0], datetime_date[1], datetime_search.get("datetime", {})
|
|
161
|
+
):
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
primary_datetime_alias = (
|
|
165
|
+
datetime_alias if use_datetime else start_datetime_alias
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if primary_datetime_alias is not None:
|
|
169
|
+
filtered_indexes.append(primary_datetime_alias)
|
|
122
170
|
|
|
123
171
|
return filtered_indexes
|
|
124
172
|
|
|
@@ -130,7 +130,7 @@ def apply_collections_datetime_filter_shared(
|
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
def apply_collections_bbox_filter_shared(
|
|
133
|
-
bbox: Union[str, List[float], None]
|
|
133
|
+
bbox: Union[str, List[float], None],
|
|
134
134
|
) -> Optional[Dict[str, Dict]]:
|
|
135
135
|
"""Create a geo_shape filter for collections bbox search.
|
|
136
136
|
|
|
@@ -5,7 +5,7 @@ in Elasticsearch/OpenSearch, such as parameter validation.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
|
-
from typing import Any, Dict, List, Union
|
|
8
|
+
from typing import Any, Dict, List, Optional, Union
|
|
9
9
|
|
|
10
10
|
from stac_fastapi.core.utilities import bbox2polygon, get_bool_env
|
|
11
11
|
from stac_fastapi.extensions.core.transaction.request import (
|
|
@@ -14,10 +14,73 @@ from stac_fastapi.extensions.core.transaction.request import (
|
|
|
14
14
|
PatchRemove,
|
|
15
15
|
)
|
|
16
16
|
from stac_fastapi.sfeos_helpers.models.patch import ElasticPath, ESCommandSet
|
|
17
|
+
from stac_fastapi.types.errors import ConflictError
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
class ItemAlreadyExistsError(ConflictError):
|
|
23
|
+
"""Error raised when attempting to create an item that already exists.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
item_id: The ID of the item that already exists.
|
|
27
|
+
collection_id: The ID of the collection containing the item.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, item_id: str, collection_id: str):
|
|
31
|
+
"""Initialize the error with item and collection IDs."""
|
|
32
|
+
self.item_id = item_id
|
|
33
|
+
self.collection_id = collection_id
|
|
34
|
+
message = f"Item {item_id} in collection {collection_id} already exists"
|
|
35
|
+
super().__init__(message)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def check_item_exists_in_alias(client: Any, alias: str, doc_id: str) -> bool:
|
|
39
|
+
"""Check if an item exists across all indexes for an alias.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
client: The async Elasticsearch/OpenSearch client.
|
|
43
|
+
alias: The index alias to search against.
|
|
44
|
+
doc_id: The document ID to check for existence.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
bool: True if the item exists in any index under the alias, False otherwise.
|
|
48
|
+
"""
|
|
49
|
+
resp = await client.search(
|
|
50
|
+
index=alias,
|
|
51
|
+
body={
|
|
52
|
+
"query": {"ids": {"values": [doc_id]}},
|
|
53
|
+
"_source": False,
|
|
54
|
+
},
|
|
55
|
+
size=0,
|
|
56
|
+
terminate_after=1,
|
|
57
|
+
)
|
|
58
|
+
return bool(resp["hits"]["total"]["value"])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def check_item_exists_in_alias_sync(client: Any, alias: str, doc_id: str) -> bool:
|
|
62
|
+
"""Check if an item exists across all indexes for an alias (sync).
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
client: The sync Elasticsearch/OpenSearch client.
|
|
66
|
+
alias: The index alias to search against.
|
|
67
|
+
doc_id: The document ID to check for existence.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
bool: True if the item exists in any index under the alias, False otherwise.
|
|
71
|
+
"""
|
|
72
|
+
resp = client.search(
|
|
73
|
+
index=alias,
|
|
74
|
+
body={
|
|
75
|
+
"query": {"ids": {"values": [doc_id]}},
|
|
76
|
+
"_source": False,
|
|
77
|
+
},
|
|
78
|
+
size=0,
|
|
79
|
+
terminate_after=1,
|
|
80
|
+
)
|
|
81
|
+
return bool(resp["hits"]["total"]["value"])
|
|
82
|
+
|
|
83
|
+
|
|
21
84
|
def add_bbox_shape_to_collection(collection: Dict[str, Any]) -> bool:
|
|
22
85
|
"""Add bbox_shape field to a collection document for spatial queries.
|
|
23
86
|
|
|
@@ -354,10 +417,42 @@ def operations_to_script(operations: List, create_nest: bool = False) -> Dict:
|
|
|
354
417
|
commands=commands, operation=operation, path=path, params=params
|
|
355
418
|
)
|
|
356
419
|
|
|
357
|
-
|
|
420
|
+
source = "".join(commands)
|
|
358
421
|
|
|
359
422
|
return {
|
|
360
423
|
"source": source,
|
|
361
424
|
"lang": "painless",
|
|
362
425
|
"params": params,
|
|
363
426
|
}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def add_hidden_filter(
|
|
430
|
+
query: Optional[Dict[str, Any]] = None, hide_item_path: Optional[str] = None
|
|
431
|
+
) -> Dict[str, Any]:
|
|
432
|
+
"""Add hidden filter to a query to exclude hidden items.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
query: Optional Elasticsearch query to combine with hidden filter
|
|
436
|
+
hide_item_path: Path to the hidden field (e.g., "properties._private.hidden")
|
|
437
|
+
If None or empty, return original query (no filtering)
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Query with hidden filter applied
|
|
441
|
+
"""
|
|
442
|
+
if not hide_item_path:
|
|
443
|
+
return query or {"match_all": {}}
|
|
444
|
+
|
|
445
|
+
hidden_filter = {
|
|
446
|
+
"bool": {
|
|
447
|
+
"should": [
|
|
448
|
+
{"term": {hide_item_path: False}},
|
|
449
|
+
{"bool": {"must_not": {"exists": {"field": hide_item_path}}}},
|
|
450
|
+
],
|
|
451
|
+
"minimum_should_match": 1,
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if query:
|
|
456
|
+
return {"bool": {"must": [query, hidden_filter]}}
|
|
457
|
+
else:
|
|
458
|
+
return hidden_filter
|
|
@@ -268,8 +268,8 @@ _BASE_ITEMS_MAPPINGS = {
|
|
|
268
268
|
"properties": {
|
|
269
269
|
# Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md
|
|
270
270
|
"datetime": {"type": "date_nanos"},
|
|
271
|
-
"start_datetime": {"type": "
|
|
272
|
-
"end_datetime": {"type": "
|
|
271
|
+
"start_datetime": {"type": "date_nanos"},
|
|
272
|
+
"end_datetime": {"type": "date_nanos"},
|
|
273
273
|
"created": {"type": "date"},
|
|
274
274
|
"updated": {"type": "date"},
|
|
275
275
|
# Satellite Extension https://github.com/stac-extensions/sat
|