sfeos-helpers 6.0.0__tar.gz → 6.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/PKG-INFO +91 -14
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/README.md +90 -13
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/setup.py +1 -1
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/sfeos_helpers.egg-info/PKG-INFO +91 -14
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/sfeos_helpers.egg-info/SOURCES.txt +12 -1
- sfeos_helpers-6.2.0/sfeos_helpers.egg-info/requires.txt +1 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/aggregation/client.py +5 -2
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/__init__.py +5 -1
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/datetime.py +64 -3
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/index.py +59 -2
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/query.py +32 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/__init__.py +27 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/base.py +51 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/factory.py +36 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/index_operations.py +167 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/inserters.py +309 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/managers.py +198 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py +15 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/selection/base.py +30 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +127 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/selection/factory.py +37 -0
- sfeos_helpers-6.2.0/stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +129 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/version.py +1 -1
- sfeos_helpers-6.0.0/sfeos_helpers.egg-info/requires.txt +0 -1
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/setup.cfg +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/sfeos_helpers.egg-info/dependency_links.txt +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/sfeos_helpers.egg-info/not-zip-safe +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/sfeos_helpers.egg-info/top_level.txt +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/aggregation/__init__.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/aggregation/format.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/document.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/mapping.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/database/utils.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/filter/__init__.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/filter/client.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/filter/cql2.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/filter/transform.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/mappings.py +0 -0
- {sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/models/patch.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sfeos_helpers
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.2.0
|
|
4
4
|
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
5
|
Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
6
|
License: MIT
|
|
@@ -33,7 +33,7 @@ Description-Content-Type: text/markdown
|
|
|
33
33
|
[](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members)
|
|
34
34
|
[](https://pypi.org/project/stac-fastapi-elasticsearch/)
|
|
35
35
|
[](https://github.com/radiantearth/stac-spec/tree/v1.1.0)
|
|
36
|
-
[](https://github.com/stac-utils/stac-fastapi)
|
|
37
37
|
|
|
38
38
|
## Sponsors & Supporters
|
|
39
39
|
|
|
@@ -103,6 +103,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
103
103
|
- [Auth](#auth)
|
|
104
104
|
- [Aggregation](#aggregation)
|
|
105
105
|
- [Rate Limiting](#rate-limiting)
|
|
106
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
106
107
|
|
|
107
108
|
## Documentation & Resources
|
|
108
109
|
|
|
@@ -223,28 +224,105 @@ You can customize additional settings in your `.env` file:
|
|
|
223
224
|
|------------------------------|--------------------------------------------------------------------------------------|--------------------------|---------------------------------------------------------------------------------------------|
|
|
224
225
|
| `ES_HOST` | Hostname for external Elasticsearch/OpenSearch. | `localhost` | Optional |
|
|
225
226
|
| `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional |
|
|
226
|
-
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `
|
|
227
|
-
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `
|
|
227
|
+
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `true` | Optional |
|
|
228
|
+
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `true` | Optional |
|
|
229
|
+
| `ES_API_KEY` | API Key for external Elasticsearch/OpenSearch. | N/A | Optional |
|
|
230
|
+
| `ES_TIMEOUT` | Client timeout for Elasticsearch/OpenSearch. | DB client default | Optional |
|
|
228
231
|
| `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-<backend>` | Optional |
|
|
229
232
|
| `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional |
|
|
230
233
|
| `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional |
|
|
231
|
-
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID
|
|
234
|
+
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional |
|
|
232
235
|
| `APP_HOST` | Server bind address. | `0.0.0.0` | Optional |
|
|
233
|
-
| `APP_PORT` | Server port. | `
|
|
236
|
+
| `APP_PORT` | Server port. | `8000` | Optional |
|
|
234
237
|
| `ENVIRONMENT` | Runtime environment. | `local` | Optional |
|
|
235
238
|
| `WEB_CONCURRENCY` | Number of worker processes. | `10` | Optional |
|
|
236
239
|
| `RELOAD` | Enable auto-reload for development. | `true` | Optional |
|
|
237
240
|
| `STAC_FASTAPI_RATE_LIMIT` | API rate limit per client. | `200/minute` | Optional |
|
|
238
|
-
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional
|
|
239
|
-
| `ELASTICSEARCH_VERSION`
|
|
240
|
-
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional
|
|
241
|
-
| `ENABLE_DIRECT_RESPONSE`
|
|
242
|
-
| `RAISE_ON_BULK_ERROR`
|
|
243
|
-
| `DATABASE_REFRESH`
|
|
241
|
+
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional |
|
|
242
|
+
| `ELASTICSEARCH_VERSION` | Version of Elasticsearch to use. | `8.11.0` | Optional |
|
|
243
|
+
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional |
|
|
244
|
+
| `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional |
|
|
245
|
+
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
246
|
+
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
244
247
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
245
248
|
|
|
246
249
|
> [!NOTE]
|
|
247
|
-
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, and `
|
|
250
|
+
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
251
|
+
|
|
252
|
+
## Datetime-Based Index Management
|
|
253
|
+
|
|
254
|
+
### Overview
|
|
255
|
+
|
|
256
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
257
|
+
|
|
258
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
259
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
260
|
+
|
|
261
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
262
|
+
|
|
263
|
+
### When to Use
|
|
264
|
+
|
|
265
|
+
**Recommended for:**
|
|
266
|
+
- Systems with large collections containing millions of items
|
|
267
|
+
- Systems requiring high-performance temporal searching
|
|
268
|
+
|
|
269
|
+
**Pros:**
|
|
270
|
+
- Multiple times faster queries with datetime filter
|
|
271
|
+
- Reduced database load - only relevant indexes are searched
|
|
272
|
+
|
|
273
|
+
**Cons:**
|
|
274
|
+
- Slightly longer item indexing time (automatic index management)
|
|
275
|
+
- Greater management complexity
|
|
276
|
+
|
|
277
|
+
### Configuration
|
|
278
|
+
|
|
279
|
+
#### Enabling Datetime-Based Indexing
|
|
280
|
+
|
|
281
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Related Configuration Variables
|
|
288
|
+
|
|
289
|
+
| Variable | Description | Default | Example |
|
|
290
|
+
|----------|-------------|---------|---------|
|
|
291
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
292
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
293
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
294
|
+
|
|
295
|
+
## How Datetime-Based Indexing Works
|
|
296
|
+
|
|
297
|
+
### Index and Alias Naming Convention
|
|
298
|
+
|
|
299
|
+
The system uses a precise naming convention:
|
|
300
|
+
|
|
301
|
+
**Physical indexes:**
|
|
302
|
+
```
|
|
303
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
**Aliases:**
|
|
307
|
+
```
|
|
308
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
309
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
310
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
**Example:**
|
|
314
|
+
|
|
315
|
+
*Physical indexes:*
|
|
316
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
317
|
+
|
|
318
|
+
*Aliases:*
|
|
319
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
320
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
321
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
322
|
+
|
|
323
|
+
### Index Size Management
|
|
324
|
+
|
|
325
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
248
326
|
|
|
249
327
|
## Interacting with the API
|
|
250
328
|
|
|
@@ -554,4 +632,3 @@ You can customize additional settings in your `.env` file:
|
|
|
554
632
|
- Ensures fair resource allocation among all clients
|
|
555
633
|
|
|
556
634
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
557
|
-
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
[](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members)
|
|
16
16
|
[](https://pypi.org/project/stac-fastapi-elasticsearch/)
|
|
17
17
|
[](https://github.com/radiantearth/stac-spec/tree/v1.1.0)
|
|
18
|
-
[](https://github.com/stac-utils/stac-fastapi)
|
|
19
19
|
|
|
20
20
|
## Sponsors & Supporters
|
|
21
21
|
|
|
@@ -85,6 +85,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
85
85
|
- [Auth](#auth)
|
|
86
86
|
- [Aggregation](#aggregation)
|
|
87
87
|
- [Rate Limiting](#rate-limiting)
|
|
88
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
88
89
|
|
|
89
90
|
## Documentation & Resources
|
|
90
91
|
|
|
@@ -205,28 +206,105 @@ You can customize additional settings in your `.env` file:
|
|
|
205
206
|
|------------------------------|--------------------------------------------------------------------------------------|--------------------------|---------------------------------------------------------------------------------------------|
|
|
206
207
|
| `ES_HOST` | Hostname for external Elasticsearch/OpenSearch. | `localhost` | Optional |
|
|
207
208
|
| `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional |
|
|
208
|
-
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `
|
|
209
|
-
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `
|
|
209
|
+
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `true` | Optional |
|
|
210
|
+
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `true` | Optional |
|
|
211
|
+
| `ES_API_KEY` | API Key for external Elasticsearch/OpenSearch. | N/A | Optional |
|
|
212
|
+
| `ES_TIMEOUT` | Client timeout for Elasticsearch/OpenSearch. | DB client default | Optional |
|
|
210
213
|
| `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-<backend>` | Optional |
|
|
211
214
|
| `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional |
|
|
212
215
|
| `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional |
|
|
213
|
-
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID
|
|
216
|
+
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional |
|
|
214
217
|
| `APP_HOST` | Server bind address. | `0.0.0.0` | Optional |
|
|
215
|
-
| `APP_PORT` | Server port. | `
|
|
218
|
+
| `APP_PORT` | Server port. | `8000` | Optional |
|
|
216
219
|
| `ENVIRONMENT` | Runtime environment. | `local` | Optional |
|
|
217
220
|
| `WEB_CONCURRENCY` | Number of worker processes. | `10` | Optional |
|
|
218
221
|
| `RELOAD` | Enable auto-reload for development. | `true` | Optional |
|
|
219
222
|
| `STAC_FASTAPI_RATE_LIMIT` | API rate limit per client. | `200/minute` | Optional |
|
|
220
|
-
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional
|
|
221
|
-
| `ELASTICSEARCH_VERSION`
|
|
222
|
-
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional
|
|
223
|
-
| `ENABLE_DIRECT_RESPONSE`
|
|
224
|
-
| `RAISE_ON_BULK_ERROR`
|
|
225
|
-
| `DATABASE_REFRESH`
|
|
223
|
+
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional |
|
|
224
|
+
| `ELASTICSEARCH_VERSION` | Version of Elasticsearch to use. | `8.11.0` | Optional |
|
|
225
|
+
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional |
|
|
226
|
+
| `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional |
|
|
227
|
+
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
228
|
+
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
226
229
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
227
230
|
|
|
228
231
|
> [!NOTE]
|
|
229
|
-
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, and `
|
|
232
|
+
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
233
|
+
|
|
234
|
+
## Datetime-Based Index Management
|
|
235
|
+
|
|
236
|
+
### Overview
|
|
237
|
+
|
|
238
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
239
|
+
|
|
240
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
241
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
242
|
+
|
|
243
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
244
|
+
|
|
245
|
+
### When to Use
|
|
246
|
+
|
|
247
|
+
**Recommended for:**
|
|
248
|
+
- Systems with large collections containing millions of items
|
|
249
|
+
- Systems requiring high-performance temporal searching
|
|
250
|
+
|
|
251
|
+
**Pros:**
|
|
252
|
+
- Multiple times faster queries with datetime filter
|
|
253
|
+
- Reduced database load - only relevant indexes are searched
|
|
254
|
+
|
|
255
|
+
**Cons:**
|
|
256
|
+
- Slightly longer item indexing time (automatic index management)
|
|
257
|
+
- Greater management complexity
|
|
258
|
+
|
|
259
|
+
### Configuration
|
|
260
|
+
|
|
261
|
+
#### Enabling Datetime-Based Indexing
|
|
262
|
+
|
|
263
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Related Configuration Variables
|
|
270
|
+
|
|
271
|
+
| Variable | Description | Default | Example |
|
|
272
|
+
|----------|-------------|---------|---------|
|
|
273
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
274
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
275
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
276
|
+
|
|
277
|
+
## How Datetime-Based Indexing Works
|
|
278
|
+
|
|
279
|
+
### Index and Alias Naming Convention
|
|
280
|
+
|
|
281
|
+
The system uses a precise naming convention:
|
|
282
|
+
|
|
283
|
+
**Physical indexes:**
|
|
284
|
+
```
|
|
285
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
**Aliases:**
|
|
289
|
+
```
|
|
290
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
291
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
292
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
**Example:**
|
|
296
|
+
|
|
297
|
+
*Physical indexes:*
|
|
298
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
299
|
+
|
|
300
|
+
*Aliases:*
|
|
301
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
302
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
303
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
304
|
+
|
|
305
|
+
### Index Size Management
|
|
306
|
+
|
|
307
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
230
308
|
|
|
231
309
|
## Interacting with the API
|
|
232
310
|
|
|
@@ -536,4 +614,3 @@ You can customize additional settings in your `.env` file:
|
|
|
536
614
|
- Ensures fair resource allocation among all clients
|
|
537
615
|
|
|
538
616
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
539
|
-
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sfeos-helpers
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.2.0
|
|
4
4
|
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
5
|
Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
6
|
License: MIT
|
|
@@ -33,7 +33,7 @@ Description-Content-Type: text/markdown
|
|
|
33
33
|
[](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members)
|
|
34
34
|
[](https://pypi.org/project/stac-fastapi-elasticsearch/)
|
|
35
35
|
[](https://github.com/radiantearth/stac-spec/tree/v1.1.0)
|
|
36
|
-
[](https://github.com/stac-utils/stac-fastapi)
|
|
37
37
|
|
|
38
38
|
## Sponsors & Supporters
|
|
39
39
|
|
|
@@ -103,6 +103,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
103
103
|
- [Auth](#auth)
|
|
104
104
|
- [Aggregation](#aggregation)
|
|
105
105
|
- [Rate Limiting](#rate-limiting)
|
|
106
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
106
107
|
|
|
107
108
|
## Documentation & Resources
|
|
108
109
|
|
|
@@ -223,28 +224,105 @@ You can customize additional settings in your `.env` file:
|
|
|
223
224
|
|------------------------------|--------------------------------------------------------------------------------------|--------------------------|---------------------------------------------------------------------------------------------|
|
|
224
225
|
| `ES_HOST` | Hostname for external Elasticsearch/OpenSearch. | `localhost` | Optional |
|
|
225
226
|
| `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional |
|
|
226
|
-
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `
|
|
227
|
-
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `
|
|
227
|
+
| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `true` | Optional |
|
|
228
|
+
| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `true` | Optional |
|
|
229
|
+
| `ES_API_KEY` | API Key for external Elasticsearch/OpenSearch. | N/A | Optional |
|
|
230
|
+
| `ES_TIMEOUT` | Client timeout for Elasticsearch/OpenSearch. | DB client default | Optional |
|
|
228
231
|
| `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-<backend>` | Optional |
|
|
229
232
|
| `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional |
|
|
230
233
|
| `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional |
|
|
231
|
-
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID
|
|
234
|
+
| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional |
|
|
232
235
|
| `APP_HOST` | Server bind address. | `0.0.0.0` | Optional |
|
|
233
|
-
| `APP_PORT` | Server port. | `
|
|
236
|
+
| `APP_PORT` | Server port. | `8000` | Optional |
|
|
234
237
|
| `ENVIRONMENT` | Runtime environment. | `local` | Optional |
|
|
235
238
|
| `WEB_CONCURRENCY` | Number of worker processes. | `10` | Optional |
|
|
236
239
|
| `RELOAD` | Enable auto-reload for development. | `true` | Optional |
|
|
237
240
|
| `STAC_FASTAPI_RATE_LIMIT` | API rate limit per client. | `200/minute` | Optional |
|
|
238
|
-
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional
|
|
239
|
-
| `ELASTICSEARCH_VERSION`
|
|
240
|
-
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional
|
|
241
|
-
| `ENABLE_DIRECT_RESPONSE`
|
|
242
|
-
| `RAISE_ON_BULK_ERROR`
|
|
243
|
-
| `DATABASE_REFRESH`
|
|
241
|
+
| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional |
|
|
242
|
+
| `ELASTICSEARCH_VERSION` | Version of Elasticsearch to use. | `8.11.0` | Optional |
|
|
243
|
+
| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional |
|
|
244
|
+
| `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional |
|
|
245
|
+
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
246
|
+
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
244
247
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
245
248
|
|
|
246
249
|
> [!NOTE]
|
|
247
|
-
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, and `
|
|
250
|
+
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
251
|
+
|
|
252
|
+
## Datetime-Based Index Management
|
|
253
|
+
|
|
254
|
+
### Overview
|
|
255
|
+
|
|
256
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
257
|
+
|
|
258
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
259
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
260
|
+
|
|
261
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
262
|
+
|
|
263
|
+
### When to Use
|
|
264
|
+
|
|
265
|
+
**Recommended for:**
|
|
266
|
+
- Systems with large collections containing millions of items
|
|
267
|
+
- Systems requiring high-performance temporal searching
|
|
268
|
+
|
|
269
|
+
**Pros:**
|
|
270
|
+
- Multiple times faster queries with datetime filter
|
|
271
|
+
- Reduced database load - only relevant indexes are searched
|
|
272
|
+
|
|
273
|
+
**Cons:**
|
|
274
|
+
- Slightly longer item indexing time (automatic index management)
|
|
275
|
+
- Greater management complexity
|
|
276
|
+
|
|
277
|
+
### Configuration
|
|
278
|
+
|
|
279
|
+
#### Enabling Datetime-Based Indexing
|
|
280
|
+
|
|
281
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Related Configuration Variables
|
|
288
|
+
|
|
289
|
+
| Variable | Description | Default | Example |
|
|
290
|
+
|----------|-------------|---------|---------|
|
|
291
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
292
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
293
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
294
|
+
|
|
295
|
+
## How Datetime-Based Indexing Works
|
|
296
|
+
|
|
297
|
+
### Index and Alias Naming Convention
|
|
298
|
+
|
|
299
|
+
The system uses a precise naming convention:
|
|
300
|
+
|
|
301
|
+
**Physical indexes:**
|
|
302
|
+
```
|
|
303
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
**Aliases:**
|
|
307
|
+
```
|
|
308
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
309
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
310
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
**Example:**
|
|
314
|
+
|
|
315
|
+
*Physical indexes:*
|
|
316
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
317
|
+
|
|
318
|
+
*Aliases:*
|
|
319
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
320
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
321
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
322
|
+
|
|
323
|
+
### Index Size Management
|
|
324
|
+
|
|
325
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
248
326
|
|
|
249
327
|
## Interacting with the API
|
|
250
328
|
|
|
@@ -554,4 +632,3 @@ You can customize additional settings in your `.env` file:
|
|
|
554
632
|
- Ensures fair resource allocation among all clients
|
|
555
633
|
|
|
556
634
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
557
|
-
|
|
@@ -23,4 +23,15 @@ stac_fastapi/sfeos_helpers/filter/__init__.py
|
|
|
23
23
|
stac_fastapi/sfeos_helpers/filter/client.py
|
|
24
24
|
stac_fastapi/sfeos_helpers/filter/cql2.py
|
|
25
25
|
stac_fastapi/sfeos_helpers/filter/transform.py
|
|
26
|
-
stac_fastapi/sfeos_helpers/models/patch.py
|
|
26
|
+
stac_fastapi/sfeos_helpers/models/patch.py
|
|
27
|
+
stac_fastapi/sfeos_helpers/search_engine/__init__.py
|
|
28
|
+
stac_fastapi/sfeos_helpers/search_engine/base.py
|
|
29
|
+
stac_fastapi/sfeos_helpers/search_engine/factory.py
|
|
30
|
+
stac_fastapi/sfeos_helpers/search_engine/index_operations.py
|
|
31
|
+
stac_fastapi/sfeos_helpers/search_engine/inserters.py
|
|
32
|
+
stac_fastapi/sfeos_helpers/search_engine/managers.py
|
|
33
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py
|
|
34
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/base.py
|
|
35
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py
|
|
36
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/factory.py
|
|
37
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
stac-fastapi.core==6.2.0
|
{sfeos_helpers-6.0.0 → sfeos_helpers-6.2.0}/stac_fastapi/sfeos_helpers/aggregation/client.py
RENAMED
|
@@ -313,9 +313,11 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
313
313
|
)
|
|
314
314
|
|
|
315
315
|
if aggregate_request.datetime:
|
|
316
|
-
search = self.database.apply_datetime_filter(
|
|
317
|
-
search=search,
|
|
316
|
+
search, datetime_search = self.database.apply_datetime_filter(
|
|
317
|
+
search=search, datetime=aggregate_request.datetime
|
|
318
318
|
)
|
|
319
|
+
else:
|
|
320
|
+
datetime_search = {"gte": None, "lte": None}
|
|
319
321
|
|
|
320
322
|
if aggregate_request.bbox:
|
|
321
323
|
bbox = aggregate_request.bbox
|
|
@@ -414,6 +416,7 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
414
416
|
geometry_geohash_grid_precision,
|
|
415
417
|
geometry_geotile_grid_precision,
|
|
416
418
|
datetime_frequency_interval,
|
|
419
|
+
datetime_search,
|
|
417
420
|
)
|
|
418
421
|
except Exception as error:
|
|
419
422
|
if not isinstance(error, IndexError):
|
|
@@ -30,11 +30,12 @@ Function Naming Conventions:
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
# Re-export all functions for backward compatibility
|
|
33
|
-
from .datetime import return_date
|
|
33
|
+
from .datetime import extract_date, extract_first_date_from_index, return_date
|
|
34
34
|
from .document import mk_actions, mk_item_id
|
|
35
35
|
from .index import (
|
|
36
36
|
create_index_templates_shared,
|
|
37
37
|
delete_item_index_shared,
|
|
38
|
+
filter_indexes_by_datetime,
|
|
38
39
|
index_alias_by_collection_id,
|
|
39
40
|
index_by_collection_id,
|
|
40
41
|
indices,
|
|
@@ -53,6 +54,7 @@ __all__ = [
|
|
|
53
54
|
"delete_item_index_shared",
|
|
54
55
|
"index_alias_by_collection_id",
|
|
55
56
|
"index_by_collection_id",
|
|
57
|
+
"filter_indexes_by_datetime",
|
|
56
58
|
"indices",
|
|
57
59
|
# Query operations
|
|
58
60
|
"apply_free_text_filter_shared",
|
|
@@ -68,4 +70,6 @@ __all__ = [
|
|
|
68
70
|
"get_bool_env",
|
|
69
71
|
# Datetime utilities
|
|
70
72
|
"return_date",
|
|
73
|
+
"extract_date",
|
|
74
|
+
"extract_first_date_from_index",
|
|
71
75
|
]
|
|
@@ -4,14 +4,19 @@ This module provides datetime utility functions specifically designed for
|
|
|
4
4
|
Elasticsearch and OpenSearch query formatting.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from datetime import date
|
|
7
10
|
from datetime import datetime as datetime_type
|
|
8
11
|
from typing import Dict, Optional, Union
|
|
9
12
|
|
|
10
13
|
from stac_fastapi.types.rfc3339 import DateTimeType
|
|
11
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
def return_date(
|
|
14
|
-
interval: Optional[Union[DateTimeType, str]]
|
|
19
|
+
interval: Optional[Union[DateTimeType, str]],
|
|
15
20
|
) -> Dict[str, Optional[str]]:
|
|
16
21
|
"""
|
|
17
22
|
Convert a date interval to an Elasticsearch/OpenSearch query format.
|
|
@@ -39,8 +44,14 @@ def return_date(
|
|
|
39
44
|
if isinstance(interval, str):
|
|
40
45
|
if "/" in interval:
|
|
41
46
|
parts = interval.split("/")
|
|
42
|
-
result["gte"] =
|
|
43
|
-
|
|
47
|
+
result["gte"] = (
|
|
48
|
+
parts[0] if parts[0] != ".." else datetime_type.min.isoformat() + "Z"
|
|
49
|
+
)
|
|
50
|
+
result["lte"] = (
|
|
51
|
+
parts[1]
|
|
52
|
+
if len(parts) > 1 and parts[1] != ".."
|
|
53
|
+
else datetime_type.max.isoformat() + "Z"
|
|
54
|
+
)
|
|
44
55
|
else:
|
|
45
56
|
converted_time = interval if interval != ".." else None
|
|
46
57
|
result["gte"] = result["lte"] = converted_time
|
|
@@ -58,3 +69,53 @@ def return_date(
|
|
|
58
69
|
result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
59
70
|
|
|
60
71
|
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def extract_date(date_str: str) -> date:
|
|
75
|
+
"""Extract date from ISO format string.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
date_str: ISO format date string
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A date object extracted from the input string.
|
|
82
|
+
"""
|
|
83
|
+
date_str = date_str.replace("Z", "+00:00")
|
|
84
|
+
return datetime_type.fromisoformat(date_str).date()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_first_date_from_index(index_name: str) -> date:
|
|
88
|
+
"""Extract the first date from an index name containing date patterns.
|
|
89
|
+
|
|
90
|
+
Searches for date patterns (YYYY-MM-DD) within the index name string
|
|
91
|
+
and returns the first found date as a date object.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
index_name: Index name containing date patterns.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
A date object extracted from the first date pattern found in the index name.
|
|
98
|
+
|
|
99
|
+
"""
|
|
100
|
+
date_pattern = r"\d{4}-\d{2}-\d{2}"
|
|
101
|
+
match = re.search(date_pattern, index_name)
|
|
102
|
+
|
|
103
|
+
if not match:
|
|
104
|
+
logger.error(f"No date pattern found in index name: '{index_name}'")
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"No date pattern (YYYY-MM-DD) found in index name: '{index_name}'"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
date_string = match.group(0)
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
extracted_date = datetime_type.strptime(date_string, "%Y-%m-%d").date()
|
|
113
|
+
return extracted_date
|
|
114
|
+
except ValueError as e:
|
|
115
|
+
logger.error(
|
|
116
|
+
f"Invalid date format found in index name '{index_name}': "
|
|
117
|
+
f"'{date_string}' - {str(e)}"
|
|
118
|
+
)
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Invalid date format in index name '{index_name}': '{date_string}'"
|
|
121
|
+
) from e
|