sfeos-helpers 5.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sfeos_helpers-5.0.0a0.dist-info/METADATA +558 -0
- sfeos_helpers-5.0.0a0.dist-info/RECORD +20 -0
- sfeos_helpers-5.0.0a0.dist-info/WHEEL +5 -0
- sfeos_helpers-5.0.0a0.dist-info/top_level.txt +1 -0
- stac_fastapi/sfeos_helpers/aggregation/__init__.py +31 -0
- stac_fastapi/sfeos_helpers/aggregation/client.py +469 -0
- stac_fastapi/sfeos_helpers/aggregation/format.py +60 -0
- stac_fastapi/sfeos_helpers/database/__init__.py +71 -0
- stac_fastapi/sfeos_helpers/database/datetime.py +60 -0
- stac_fastapi/sfeos_helpers/database/document.py +48 -0
- stac_fastapi/sfeos_helpers/database/index.py +130 -0
- stac_fastapi/sfeos_helpers/database/mapping.py +38 -0
- stac_fastapi/sfeos_helpers/database/query.py +85 -0
- stac_fastapi/sfeos_helpers/database/utils.py +50 -0
- stac_fastapi/sfeos_helpers/filter/__init__.py +44 -0
- stac_fastapi/sfeos_helpers/filter/client.py +98 -0
- stac_fastapi/sfeos_helpers/filter/cql2.py +39 -0
- stac_fastapi/sfeos_helpers/filter/transform.py +133 -0
- stac_fastapi/sfeos_helpers/mappings.py +262 -0
- stac_fastapi/sfeos_helpers/version.py +2 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Document operations for Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides functions for working with documents in Elasticsearch/OpenSearch,
|
|
4
|
+
including document ID generation and bulk action creation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
from stac_fastapi.sfeos_helpers.database.index import index_alias_by_collection_id
|
|
10
|
+
from stac_fastapi.types.stac import Item
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def mk_item_id(item_id: str, collection_id: str) -> str:
|
|
14
|
+
"""Create the document id for an Item in Elasticsearch.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
item_id (str): The id of the Item.
|
|
18
|
+
collection_id (str): The id of the Collection that the Item belongs to.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character.
|
|
22
|
+
"""
|
|
23
|
+
return f"{item_id}|{collection_id}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def mk_actions(collection_id: str, processed_items: List[Item]) -> List[Dict[str, Any]]:
|
|
27
|
+
"""Create Elasticsearch bulk actions for a list of processed items.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
collection_id (str): The identifier for the collection the items belong to.
|
|
31
|
+
processed_items (List[Item]): The list of processed items to be bulk indexed.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed,
|
|
35
|
+
each action being a dictionary with the following keys:
|
|
36
|
+
- `_index`: the index to store the document in.
|
|
37
|
+
- `_id`: the document's identifier.
|
|
38
|
+
- `_source`: the source of the document.
|
|
39
|
+
"""
|
|
40
|
+
index_alias = index_alias_by_collection_id(collection_id)
|
|
41
|
+
return [
|
|
42
|
+
{
|
|
43
|
+
"_index": index_alias,
|
|
44
|
+
"_id": mk_item_id(item["id"], item["collection"]),
|
|
45
|
+
"_source": item,
|
|
46
|
+
}
|
|
47
|
+
for item in processed_items
|
|
48
|
+
]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Index management functions for Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides functions for creating and managing indices in Elasticsearch/OpenSearch.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from functools import lru_cache
|
|
7
|
+
from typing import Any, List, Optional
|
|
8
|
+
|
|
9
|
+
from stac_fastapi.sfeos_helpers.mappings import (
|
|
10
|
+
_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
|
|
11
|
+
COLLECTIONS_INDEX,
|
|
12
|
+
ES_COLLECTIONS_MAPPINGS,
|
|
13
|
+
ES_ITEMS_MAPPINGS,
|
|
14
|
+
ES_ITEMS_SETTINGS,
|
|
15
|
+
ITEM_INDICES,
|
|
16
|
+
ITEMS_INDEX_PREFIX,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@lru_cache(256)
|
|
21
|
+
def index_by_collection_id(collection_id: str) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Translate a collection id into an Elasticsearch index name.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
collection_id (str): The collection id to translate into an index name.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: The index name derived from the collection id.
|
|
30
|
+
"""
|
|
31
|
+
cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
|
|
32
|
+
return (
|
|
33
|
+
f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{collection_id.encode('utf-8').hex()}"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@lru_cache(256)
|
|
38
|
+
def index_alias_by_collection_id(collection_id: str) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Translate a collection id into an Elasticsearch index alias.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
collection_id (str): The collection id to translate into an index alias.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
str: The index alias derived from the collection id.
|
|
47
|
+
"""
|
|
48
|
+
cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE)
|
|
49
|
+
return f"{ITEMS_INDEX_PREFIX}{cleaned}"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def indices(collection_ids: Optional[List[str]]) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Get a comma-separated string of index names for a given list of collection ids.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
collection_ids: A list of collection ids.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A string of comma-separated index names. If `collection_ids` is empty, returns the default indices.
|
|
61
|
+
"""
|
|
62
|
+
return (
|
|
63
|
+
",".join(map(index_alias_by_collection_id, collection_ids))
|
|
64
|
+
if collection_ids
|
|
65
|
+
else ITEM_INDICES
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
async def create_index_templates_shared(settings: Any) -> None:
|
|
70
|
+
"""Create index templates for Elasticsearch/OpenSearch Collection and Item indices.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
settings (Any): The settings object containing the client configuration.
|
|
74
|
+
Must have a create_client attribute that returns an Elasticsearch/OpenSearch client.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
None: This function doesn't return any value but creates index templates in the database.
|
|
78
|
+
|
|
79
|
+
Notes:
|
|
80
|
+
This function creates two index templates:
|
|
81
|
+
1. A template for the Collections index with the appropriate mappings
|
|
82
|
+
2. A template for the Items indices with both settings and mappings
|
|
83
|
+
|
|
84
|
+
These templates ensure that any new indices created with matching patterns
|
|
85
|
+
will automatically have the correct structure.
|
|
86
|
+
"""
|
|
87
|
+
client = settings.create_client
|
|
88
|
+
await client.indices.put_index_template(
|
|
89
|
+
name=f"template_{COLLECTIONS_INDEX}",
|
|
90
|
+
body={
|
|
91
|
+
"index_patterns": [f"{COLLECTIONS_INDEX}*"],
|
|
92
|
+
"template": {"mappings": ES_COLLECTIONS_MAPPINGS},
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
await client.indices.put_index_template(
|
|
96
|
+
name=f"template_{ITEMS_INDEX_PREFIX}",
|
|
97
|
+
body={
|
|
98
|
+
"index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
|
|
99
|
+
"template": {"settings": ES_ITEMS_SETTINGS, "mappings": ES_ITEMS_MAPPINGS},
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
await client.close()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
async def delete_item_index_shared(settings: Any, collection_id: str) -> None:
|
|
106
|
+
"""Delete the index for items in a collection.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
settings (Any): The settings object containing the client configuration.
|
|
110
|
+
Must have a create_client attribute that returns an Elasticsearch/OpenSearch client.
|
|
111
|
+
collection_id (str): The ID of the collection whose items index will be deleted.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
None: This function doesn't return any value but deletes an item index in the database.
|
|
115
|
+
|
|
116
|
+
Notes:
|
|
117
|
+
This function deletes an item index and its alias. It first resolves the alias to find
|
|
118
|
+
the actual index name, then deletes both the alias and the index.
|
|
119
|
+
"""
|
|
120
|
+
client = settings.create_client
|
|
121
|
+
|
|
122
|
+
name = index_alias_by_collection_id(collection_id)
|
|
123
|
+
resolved = await client.indices.resolve_index(name=name)
|
|
124
|
+
if "aliases" in resolved and resolved["aliases"]:
|
|
125
|
+
[alias] = resolved["aliases"]
|
|
126
|
+
await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
|
|
127
|
+
await client.indices.delete(index=alias["indices"])
|
|
128
|
+
else:
|
|
129
|
+
await client.indices.delete(index=name)
|
|
130
|
+
await client.close()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Mapping functions for Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides functions for working with Elasticsearch/OpenSearch mappings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def get_queryables_mapping_shared(
|
|
10
|
+
mappings: Dict[str, Dict[str, Any]], collection_id: str = "*"
|
|
11
|
+
) -> Dict[str, str]:
|
|
12
|
+
"""Retrieve mapping of Queryables for search.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
mappings (Dict[str, Dict[str, Any]]): The mapping information returned from
|
|
16
|
+
Elasticsearch/OpenSearch client's indices.get_mapping() method.
|
|
17
|
+
Expected structure is {index_name: {"mappings": {...}}}.
|
|
18
|
+
collection_id (str, optional): The id of the Collection the Queryables
|
|
19
|
+
belongs to. Defaults to "*".
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Dict[str, str]: A dictionary containing the Queryables mappings, where keys are
|
|
23
|
+
field names and values are the corresponding paths in the Elasticsearch/OpenSearch
|
|
24
|
+
document structure.
|
|
25
|
+
"""
|
|
26
|
+
queryables_mapping = {}
|
|
27
|
+
|
|
28
|
+
for mapping in mappings.values():
|
|
29
|
+
fields = mapping["mappings"].get("properties", {})
|
|
30
|
+
properties = fields.pop("properties", {}).get("properties", {}).keys()
|
|
31
|
+
|
|
32
|
+
for field_key in fields:
|
|
33
|
+
queryables_mapping[field_key] = field_key
|
|
34
|
+
|
|
35
|
+
for property_key in properties:
|
|
36
|
+
queryables_mapping[property_key] = f"properties.{property_key}"
|
|
37
|
+
|
|
38
|
+
return queryables_mapping
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Query building functions for Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides functions for building and manipulating Elasticsearch/OpenSearch queries.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from stac_fastapi.sfeos_helpers.mappings import Geometry
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def apply_free_text_filter_shared(
|
|
12
|
+
search: Any, free_text_queries: Optional[List[str]]
|
|
13
|
+
) -> Any:
|
|
14
|
+
"""Create a free text query for Elasticsearch/OpenSearch.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
search (Any): The search object to apply the query to.
|
|
18
|
+
free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Any: The search object with the free text query applied, or the original search
|
|
22
|
+
object if no free_text_queries were provided.
|
|
23
|
+
|
|
24
|
+
Notes:
|
|
25
|
+
This function creates a query_string query that searches for the specified text strings
|
|
26
|
+
in all properties of the documents. The query strings are joined with OR operators.
|
|
27
|
+
"""
|
|
28
|
+
if free_text_queries is not None:
|
|
29
|
+
free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
|
|
30
|
+
search = search.query(
|
|
31
|
+
"query_string", query=f'properties.\\*:"{free_text_query_string}"'
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return search
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def apply_intersects_filter_shared(
|
|
38
|
+
intersects: Geometry,
|
|
39
|
+
) -> Dict[str, Dict]:
|
|
40
|
+
"""Create a geo_shape filter for intersecting geometry.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Dict[str, Dict]: A dictionary containing the geo_shape filter configuration
|
|
47
|
+
that can be used with Elasticsearch/OpenSearch Q objects.
|
|
48
|
+
|
|
49
|
+
Notes:
|
|
50
|
+
This function creates a geo_shape filter configuration to find documents that intersect
|
|
51
|
+
with the specified geometry. The returned dictionary should be wrapped in a Q object
|
|
52
|
+
when applied to a search.
|
|
53
|
+
"""
|
|
54
|
+
return {
|
|
55
|
+
"geo_shape": {
|
|
56
|
+
"geometry": {
|
|
57
|
+
"shape": {
|
|
58
|
+
"type": intersects.type.lower(),
|
|
59
|
+
"coordinates": intersects.coordinates,
|
|
60
|
+
},
|
|
61
|
+
"relation": "intersects",
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
|
|
68
|
+
"""Create a sort configuration for Elasticsearch/OpenSearch queries.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
sortby (List): A list of sort specifications, each containing a field and direction.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
|
|
75
|
+
configurations, or None if no sort was specified.
|
|
76
|
+
|
|
77
|
+
Notes:
|
|
78
|
+
This function transforms a list of sort specifications into the format required by
|
|
79
|
+
Elasticsearch/OpenSearch for sorting query results. The returned dictionary can be
|
|
80
|
+
directly used in search requests.
|
|
81
|
+
"""
|
|
82
|
+
if sortby:
|
|
83
|
+
return {s.field: {"order": s.direction} for s in sortby}
|
|
84
|
+
else:
|
|
85
|
+
return None
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Utility functions for database operations in Elasticsearch/OpenSearch.
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for working with database operations
|
|
4
|
+
in Elasticsearch/OpenSearch, such as parameter validation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
from stac_fastapi.core.utilities import get_bool_env
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def validate_refresh(value: Union[str, bool]) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Validate the `refresh` parameter value.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
value (Union[str, bool]): The `refresh` parameter value, which can be a string or a boolean.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
str: The validated value of the `refresh` parameter, which can be "true", "false", or "wait_for".
|
|
22
|
+
"""
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Handle boolean-like values using get_bool_env
|
|
26
|
+
if isinstance(value, bool) or value in {
|
|
27
|
+
"true",
|
|
28
|
+
"false",
|
|
29
|
+
"1",
|
|
30
|
+
"0",
|
|
31
|
+
"yes",
|
|
32
|
+
"no",
|
|
33
|
+
"y",
|
|
34
|
+
"n",
|
|
35
|
+
}:
|
|
36
|
+
is_true = get_bool_env("DATABASE_REFRESH", default=value)
|
|
37
|
+
return "true" if is_true else "false"
|
|
38
|
+
|
|
39
|
+
# Normalize to lowercase for case-insensitivity
|
|
40
|
+
value = value.lower()
|
|
41
|
+
|
|
42
|
+
# Handle "wait_for" explicitly
|
|
43
|
+
if value == "wait_for":
|
|
44
|
+
return "wait_for"
|
|
45
|
+
|
|
46
|
+
# Log a warning for invalid values and default to "false"
|
|
47
|
+
logger.warning(
|
|
48
|
+
f"Invalid value for `refresh`: '{value}'. Expected 'true', 'false', or 'wait_for'. Defaulting to 'false'."
|
|
49
|
+
)
|
|
50
|
+
return "false"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Shared filter extension methods for stac-fastapi elasticsearch and opensearch backends.
|
|
2
|
+
|
|
3
|
+
This module provides shared functionality for implementing the STAC API Filter Extension
|
|
4
|
+
with Elasticsearch and OpenSearch. It includes:
|
|
5
|
+
|
|
6
|
+
1. Functions for converting CQL2 queries to Elasticsearch/OpenSearch query DSL
|
|
7
|
+
2. Helper functions for field mapping and query transformation
|
|
8
|
+
3. Base implementation of the AsyncBaseFiltersClient for Elasticsearch/OpenSearch
|
|
9
|
+
|
|
10
|
+
The filter package is organized as follows:
|
|
11
|
+
- cql2.py: CQL2 pattern conversion helpers
|
|
12
|
+
- transform.py: Query transformation functions
|
|
13
|
+
- client.py: Filter client implementation
|
|
14
|
+
|
|
15
|
+
When adding new functionality to this package, consider:
|
|
16
|
+
1. Will this code be used by both Elasticsearch and OpenSearch implementations?
|
|
17
|
+
2. Is the functionality stable and unlikely to diverge between implementations?
|
|
18
|
+
3. Is the function well-documented with clear input/output contracts?
|
|
19
|
+
|
|
20
|
+
Function Naming Conventions:
|
|
21
|
+
- Function names should be descriptive and indicate their purpose
|
|
22
|
+
- Parameter names should be consistent across similar functions
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .client import EsAsyncBaseFiltersClient
|
|
26
|
+
|
|
27
|
+
# Re-export the main functions and classes for backward compatibility
|
|
28
|
+
from .cql2 import (
|
|
29
|
+
_replace_like_patterns,
|
|
30
|
+
cql2_like_patterns,
|
|
31
|
+
cql2_like_to_es,
|
|
32
|
+
valid_like_substitutions,
|
|
33
|
+
)
|
|
34
|
+
from .transform import to_es, to_es_field
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"cql2_like_patterns",
|
|
38
|
+
"valid_like_substitutions",
|
|
39
|
+
"cql2_like_to_es",
|
|
40
|
+
"_replace_like_patterns",
|
|
41
|
+
"to_es_field",
|
|
42
|
+
"to_es",
|
|
43
|
+
"EsAsyncBaseFiltersClient",
|
|
44
|
+
]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Filter client implementation for Elasticsearch/OpenSearch."""
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import attr
|
|
7
|
+
|
|
8
|
+
from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
|
|
9
|
+
from stac_fastapi.core.extensions.filter import DEFAULT_QUERYABLES
|
|
10
|
+
from stac_fastapi.extensions.core.filter.client import AsyncBaseFiltersClient
|
|
11
|
+
from stac_fastapi.sfeos_helpers.mappings import ES_MAPPING_TYPE_TO_JSON
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@attr.s
|
|
15
|
+
class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient):
|
|
16
|
+
"""Defines a pattern for implementing the STAC filter extension."""
|
|
17
|
+
|
|
18
|
+
database: BaseDatabaseLogic = attr.ib()
|
|
19
|
+
|
|
20
|
+
async def get_queryables(
|
|
21
|
+
self, collection_id: Optional[str] = None, **kwargs
|
|
22
|
+
) -> Dict[str, Any]:
|
|
23
|
+
"""Get the queryables available for the given collection_id.
|
|
24
|
+
|
|
25
|
+
If collection_id is None, returns the intersection of all
|
|
26
|
+
queryables over all collections.
|
|
27
|
+
|
|
28
|
+
This base implementation returns a blank queryable schema. This is not allowed
|
|
29
|
+
under OGC CQL but it is allowed by the STAC API Filter Extension
|
|
30
|
+
|
|
31
|
+
https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
collection_id (str, optional): The id of the collection to get queryables for.
|
|
35
|
+
**kwargs: additional keyword arguments
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Dict[str, Any]: A dictionary containing the queryables for the given collection.
|
|
39
|
+
"""
|
|
40
|
+
queryables: Dict[str, Any] = {
|
|
41
|
+
"$schema": "https://json-schema.org/draft/2019-09/schema",
|
|
42
|
+
"$id": "https://stac-api.example.com/queryables",
|
|
43
|
+
"type": "object",
|
|
44
|
+
"title": "Queryables for STAC API",
|
|
45
|
+
"description": "Queryable names for the STAC API Item Search filter.",
|
|
46
|
+
"properties": DEFAULT_QUERYABLES,
|
|
47
|
+
"additionalProperties": True,
|
|
48
|
+
}
|
|
49
|
+
if not collection_id:
|
|
50
|
+
return queryables
|
|
51
|
+
|
|
52
|
+
properties: Dict[str, Any] = queryables["properties"]
|
|
53
|
+
queryables.update(
|
|
54
|
+
{
|
|
55
|
+
"properties": properties,
|
|
56
|
+
"additionalProperties": False,
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
mapping_data = await self.database.get_items_mapping(collection_id)
|
|
61
|
+
mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"]
|
|
62
|
+
stack = deque(mapping_properties.items())
|
|
63
|
+
|
|
64
|
+
while stack:
|
|
65
|
+
field_name, field_def = stack.popleft()
|
|
66
|
+
|
|
67
|
+
# Iterate over nested fields
|
|
68
|
+
field_properties = field_def.get("properties")
|
|
69
|
+
if field_properties:
|
|
70
|
+
# Fields in Item Properties should be exposed with their un-prefixed names,
|
|
71
|
+
# and not require expressions to prefix them with properties,
|
|
72
|
+
# e.g., eo:cloud_cover instead of properties.eo:cloud_cover.
|
|
73
|
+
if field_name == "properties":
|
|
74
|
+
stack.extend(field_properties.items())
|
|
75
|
+
else:
|
|
76
|
+
stack.extend(
|
|
77
|
+
(f"{field_name}.{k}", v) for k, v in field_properties.items()
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Skip non-indexed or disabled fields
|
|
81
|
+
field_type = field_def.get("type")
|
|
82
|
+
if not field_type or not field_def.get("enabled", True):
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Generate field properties
|
|
86
|
+
field_result = DEFAULT_QUERYABLES.get(field_name, {})
|
|
87
|
+
properties[field_name] = field_result
|
|
88
|
+
|
|
89
|
+
field_name_human = field_name.replace("_", " ").title()
|
|
90
|
+
field_result.setdefault("title", field_name_human)
|
|
91
|
+
|
|
92
|
+
field_type_json = ES_MAPPING_TYPE_TO_JSON.get(field_type, field_type)
|
|
93
|
+
field_result.setdefault("type", field_type_json)
|
|
94
|
+
|
|
95
|
+
if field_type in {"date", "date_nanos"}:
|
|
96
|
+
field_result.setdefault("format", "date-time")
|
|
97
|
+
|
|
98
|
+
return queryables
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""CQL2 pattern conversion helpers for Elasticsearch/OpenSearch."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
cql2_like_patterns = re.compile(r"\\.|[%_]|\\$")
|
|
6
|
+
valid_like_substitutions = {
|
|
7
|
+
"\\\\": "\\",
|
|
8
|
+
"\\%": "%",
|
|
9
|
+
"\\_": "_",
|
|
10
|
+
"%": "*",
|
|
11
|
+
"_": "?",
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _replace_like_patterns(match: re.Match) -> str:
|
|
16
|
+
pattern = match.group()
|
|
17
|
+
try:
|
|
18
|
+
return valid_like_substitutions[pattern]
|
|
19
|
+
except KeyError:
|
|
20
|
+
raise ValueError(f"'{pattern}' is not a valid escape sequence")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def cql2_like_to_es(string: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
string (str): The string containing CQL2 wildcard characters.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
str: The converted string with Elasticsearch compatible wildcards.
|
|
32
|
+
|
|
33
|
+
Raises:
|
|
34
|
+
ValueError: If an invalid escape sequence is encountered.
|
|
35
|
+
"""
|
|
36
|
+
return cql2_like_patterns.sub(
|
|
37
|
+
repl=_replace_like_patterns,
|
|
38
|
+
string=string,
|
|
39
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Query transformation functions for Elasticsearch/OpenSearch."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from stac_fastapi.core.extensions.filter import (
|
|
6
|
+
AdvancedComparisonOp,
|
|
7
|
+
ComparisonOp,
|
|
8
|
+
LogicalOp,
|
|
9
|
+
SpatialOp,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from .cql2 import cql2_like_to_es
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Map a given field to its corresponding Elasticsearch field according to a predefined mapping.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
field (str): The field name from a user query or filter.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: The mapped field name suitable for Elasticsearch queries.
|
|
24
|
+
"""
|
|
25
|
+
return queryables_mapping.get(field, field)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]:
|
|
29
|
+
"""
|
|
30
|
+
Transform a simplified CQL2 query structure to an Elasticsearch compatible query DSL.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
query (Dict[str, Any]): The query dictionary containing 'op' and 'args'.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Dict[str, Any]: The corresponding Elasticsearch query in the form of a dictionary.
|
|
37
|
+
"""
|
|
38
|
+
if query["op"] in [LogicalOp.AND, LogicalOp.OR, LogicalOp.NOT]:
|
|
39
|
+
bool_type = {
|
|
40
|
+
LogicalOp.AND: "must",
|
|
41
|
+
LogicalOp.OR: "should",
|
|
42
|
+
LogicalOp.NOT: "must_not",
|
|
43
|
+
}[query["op"]]
|
|
44
|
+
return {
|
|
45
|
+
"bool": {
|
|
46
|
+
bool_type: [
|
|
47
|
+
to_es(queryables_mapping, sub_query) for sub_query in query["args"]
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
elif query["op"] in [
|
|
53
|
+
ComparisonOp.EQ,
|
|
54
|
+
ComparisonOp.NEQ,
|
|
55
|
+
ComparisonOp.LT,
|
|
56
|
+
ComparisonOp.LTE,
|
|
57
|
+
ComparisonOp.GT,
|
|
58
|
+
ComparisonOp.GTE,
|
|
59
|
+
]:
|
|
60
|
+
range_op = {
|
|
61
|
+
ComparisonOp.LT: "lt",
|
|
62
|
+
ComparisonOp.LTE: "lte",
|
|
63
|
+
ComparisonOp.GT: "gt",
|
|
64
|
+
ComparisonOp.GTE: "gte",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
68
|
+
value = query["args"][1]
|
|
69
|
+
if isinstance(value, dict) and "timestamp" in value:
|
|
70
|
+
value = value["timestamp"]
|
|
71
|
+
if query["op"] == ComparisonOp.EQ:
|
|
72
|
+
return {"range": {field: {"gte": value, "lte": value}}}
|
|
73
|
+
elif query["op"] == ComparisonOp.NEQ:
|
|
74
|
+
return {
|
|
75
|
+
"bool": {
|
|
76
|
+
"must_not": [{"range": {field: {"gte": value, "lte": value}}}]
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
else:
|
|
80
|
+
return {"range": {field: {range_op[query["op"]]: value}}}
|
|
81
|
+
else:
|
|
82
|
+
if query["op"] == ComparisonOp.EQ:
|
|
83
|
+
return {"term": {field: value}}
|
|
84
|
+
elif query["op"] == ComparisonOp.NEQ:
|
|
85
|
+
return {"bool": {"must_not": [{"term": {field: value}}]}}
|
|
86
|
+
else:
|
|
87
|
+
return {"range": {field: {range_op[query["op"]]: value}}}
|
|
88
|
+
|
|
89
|
+
elif query["op"] == ComparisonOp.IS_NULL:
|
|
90
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
91
|
+
return {"bool": {"must_not": {"exists": {"field": field}}}}
|
|
92
|
+
|
|
93
|
+
elif query["op"] == AdvancedComparisonOp.BETWEEN:
|
|
94
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
95
|
+
gte, lte = query["args"][1], query["args"][2]
|
|
96
|
+
if isinstance(gte, dict) and "timestamp" in gte:
|
|
97
|
+
gte = gte["timestamp"]
|
|
98
|
+
if isinstance(lte, dict) and "timestamp" in lte:
|
|
99
|
+
lte = lte["timestamp"]
|
|
100
|
+
return {"range": {field: {"gte": gte, "lte": lte}}}
|
|
101
|
+
|
|
102
|
+
elif query["op"] == AdvancedComparisonOp.IN:
|
|
103
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
104
|
+
values = query["args"][1]
|
|
105
|
+
if not isinstance(values, list):
|
|
106
|
+
raise ValueError(f"Arg {values} is not a list")
|
|
107
|
+
return {"terms": {field: values}}
|
|
108
|
+
|
|
109
|
+
elif query["op"] == AdvancedComparisonOp.LIKE:
|
|
110
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
111
|
+
pattern = cql2_like_to_es(query["args"][1])
|
|
112
|
+
return {"wildcard": {field: {"value": pattern, "case_insensitive": True}}}
|
|
113
|
+
|
|
114
|
+
elif query["op"] in [
|
|
115
|
+
SpatialOp.S_INTERSECTS,
|
|
116
|
+
SpatialOp.S_CONTAINS,
|
|
117
|
+
SpatialOp.S_WITHIN,
|
|
118
|
+
SpatialOp.S_DISJOINT,
|
|
119
|
+
]:
|
|
120
|
+
field = to_es_field(queryables_mapping, query["args"][0]["property"])
|
|
121
|
+
geometry = query["args"][1]
|
|
122
|
+
|
|
123
|
+
relation_mapping = {
|
|
124
|
+
SpatialOp.S_INTERSECTS: "intersects",
|
|
125
|
+
SpatialOp.S_CONTAINS: "contains",
|
|
126
|
+
SpatialOp.S_WITHIN: "within",
|
|
127
|
+
SpatialOp.S_DISJOINT: "disjoint",
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
relation = relation_mapping[query["op"]]
|
|
131
|
+
return {"geo_shape": {field: {"shape": geometry, "relation": relation}}}
|
|
132
|
+
|
|
133
|
+
return {}
|