sfeos-helpers 6.1.0__tar.gz → 6.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sfeos_helpers-6.1.0/sfeos_helpers.egg-info → sfeos_helpers-6.2.1}/PKG-INFO +89 -4
- sfeos_helpers-6.1.0/PKG-INFO → sfeos_helpers-6.2.1/README.md +77 -19
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/setup.py +1 -1
- sfeos_helpers-6.1.0/README.md → sfeos_helpers-6.2.1/sfeos_helpers.egg-info/PKG-INFO +104 -1
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/sfeos_helpers.egg-info/SOURCES.txt +12 -1
- sfeos_helpers-6.2.1/sfeos_helpers.egg-info/requires.txt +1 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/sfeos_helpers.egg-info/top_level.txt +1 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/aggregation/client.py +5 -2
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/__init__.py +5 -1
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/datetime.py +64 -3
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/index.py +59 -2
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/query.py +5 -2
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/utils.py +75 -38
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/models/patch.py +124 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/__init__.py +27 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/base.py +51 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/factory.py +36 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/index_operations.py +167 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/inserters.py +309 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/managers.py +198 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py +15 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/selection/base.py +30 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +127 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/selection/factory.py +37 -0
- sfeos_helpers-6.2.1/stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py +129 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/version.py +1 -1
- sfeos_helpers-6.1.0/sfeos_helpers.egg-info/requires.txt +0 -1
- sfeos_helpers-6.1.0/stac_fastapi/sfeos_helpers/models/patch.py +0 -166
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/setup.cfg +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/sfeos_helpers.egg-info/dependency_links.txt +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/sfeos_helpers.egg-info/not-zip-safe +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/aggregation/__init__.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/aggregation/format.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/document.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/database/mapping.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/filter/__init__.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/filter/client.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/filter/cql2.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/filter/transform.py +0 -0
- {sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/mappings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 6.1
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sfeos_helpers
|
|
3
|
+
Version: 6.2.1
|
|
4
4
|
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
5
|
Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
6
|
License: MIT
|
|
@@ -15,6 +15,15 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Classifier: License :: OSI Approved :: MIT License
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: stac-fastapi.core==6.2.1
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
26
|
+
Dynamic: summary
|
|
18
27
|
|
|
19
28
|
# stac-fastapi-elasticsearch-opensearch
|
|
20
29
|
|
|
@@ -103,6 +112,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
103
112
|
- [Auth](#auth)
|
|
104
113
|
- [Aggregation](#aggregation)
|
|
105
114
|
- [Rate Limiting](#rate-limiting)
|
|
115
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
106
116
|
|
|
107
117
|
## Documentation & Resources
|
|
108
118
|
|
|
@@ -244,10 +254,86 @@ You can customize additional settings in your `.env` file:
|
|
|
244
254
|
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
245
255
|
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
246
256
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
257
|
+
| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional |
|
|
247
258
|
|
|
248
259
|
> [!NOTE]
|
|
249
260
|
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
250
261
|
|
|
262
|
+
## Datetime-Based Index Management
|
|
263
|
+
|
|
264
|
+
### Overview
|
|
265
|
+
|
|
266
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
267
|
+
|
|
268
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
269
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
270
|
+
|
|
271
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
272
|
+
|
|
273
|
+
### When to Use
|
|
274
|
+
|
|
275
|
+
**Recommended for:**
|
|
276
|
+
- Systems with large collections containing millions of items
|
|
277
|
+
- Systems requiring high-performance temporal searching
|
|
278
|
+
|
|
279
|
+
**Pros:**
|
|
280
|
+
- Multiple times faster queries with datetime filter
|
|
281
|
+
- Reduced database load - only relevant indexes are searched
|
|
282
|
+
|
|
283
|
+
**Cons:**
|
|
284
|
+
- Slightly longer item indexing time (automatic index management)
|
|
285
|
+
- Greater management complexity
|
|
286
|
+
|
|
287
|
+
### Configuration
|
|
288
|
+
|
|
289
|
+
#### Enabling Datetime-Based Indexing
|
|
290
|
+
|
|
291
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Related Configuration Variables
|
|
298
|
+
|
|
299
|
+
| Variable | Description | Default | Example |
|
|
300
|
+
|----------|-------------|---------|---------|
|
|
301
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
302
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
303
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
304
|
+
|
|
305
|
+
## How Datetime-Based Indexing Works
|
|
306
|
+
|
|
307
|
+
### Index and Alias Naming Convention
|
|
308
|
+
|
|
309
|
+
The system uses a precise naming convention:
|
|
310
|
+
|
|
311
|
+
**Physical indexes:**
|
|
312
|
+
```
|
|
313
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Aliases:**
|
|
317
|
+
```
|
|
318
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
319
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
320
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
**Example:**
|
|
324
|
+
|
|
325
|
+
*Physical indexes:*
|
|
326
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
327
|
+
|
|
328
|
+
*Aliases:*
|
|
329
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
330
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
331
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
332
|
+
|
|
333
|
+
### Index Size Management
|
|
334
|
+
|
|
335
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
336
|
+
|
|
251
337
|
## Interacting with the API
|
|
252
338
|
|
|
253
339
|
- **Creating a Collection**:
|
|
@@ -556,4 +642,3 @@ You can customize additional settings in your `.env` file:
|
|
|
556
642
|
- Ensures fair resource allocation among all clients
|
|
557
643
|
|
|
558
644
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
559
|
-
|
|
@@ -1,21 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: sfeos_helpers
|
|
3
|
-
Version: 6.1.0
|
|
4
|
-
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
|
-
Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
|
-
License: MIT
|
|
7
|
-
Classifier: Intended Audience :: Developers
|
|
8
|
-
Classifier: Intended Audience :: Information Technology
|
|
9
|
-
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
-
Requires-Python: >=3.9
|
|
17
|
-
Description-Content-Type: text/markdown
|
|
18
|
-
|
|
19
1
|
# stac-fastapi-elasticsearch-opensearch
|
|
20
2
|
|
|
21
3
|
<!-- markdownlint-disable MD033 MD041 -->
|
|
@@ -103,6 +85,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
103
85
|
- [Auth](#auth)
|
|
104
86
|
- [Aggregation](#aggregation)
|
|
105
87
|
- [Rate Limiting](#rate-limiting)
|
|
88
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
106
89
|
|
|
107
90
|
## Documentation & Resources
|
|
108
91
|
|
|
@@ -244,10 +227,86 @@ You can customize additional settings in your `.env` file:
|
|
|
244
227
|
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
245
228
|
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
246
229
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
230
|
+
| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional |
|
|
247
231
|
|
|
248
232
|
> [!NOTE]
|
|
249
233
|
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
250
234
|
|
|
235
|
+
## Datetime-Based Index Management
|
|
236
|
+
|
|
237
|
+
### Overview
|
|
238
|
+
|
|
239
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
240
|
+
|
|
241
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
242
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
243
|
+
|
|
244
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
245
|
+
|
|
246
|
+
### When to Use
|
|
247
|
+
|
|
248
|
+
**Recommended for:**
|
|
249
|
+
- Systems with large collections containing millions of items
|
|
250
|
+
- Systems requiring high-performance temporal searching
|
|
251
|
+
|
|
252
|
+
**Pros:**
|
|
253
|
+
- Multiple times faster queries with datetime filter
|
|
254
|
+
- Reduced database load - only relevant indexes are searched
|
|
255
|
+
|
|
256
|
+
**Cons:**
|
|
257
|
+
- Slightly longer item indexing time (automatic index management)
|
|
258
|
+
- Greater management complexity
|
|
259
|
+
|
|
260
|
+
### Configuration
|
|
261
|
+
|
|
262
|
+
#### Enabling Datetime-Based Indexing
|
|
263
|
+
|
|
264
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Related Configuration Variables
|
|
271
|
+
|
|
272
|
+
| Variable | Description | Default | Example |
|
|
273
|
+
|----------|-------------|---------|---------|
|
|
274
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
275
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
276
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
277
|
+
|
|
278
|
+
## How Datetime-Based Indexing Works
|
|
279
|
+
|
|
280
|
+
### Index and Alias Naming Convention
|
|
281
|
+
|
|
282
|
+
The system uses a precise naming convention:
|
|
283
|
+
|
|
284
|
+
**Physical indexes:**
|
|
285
|
+
```
|
|
286
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**Aliases:**
|
|
290
|
+
```
|
|
291
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
292
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
293
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
**Example:**
|
|
297
|
+
|
|
298
|
+
*Physical indexes:*
|
|
299
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
300
|
+
|
|
301
|
+
*Aliases:*
|
|
302
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
303
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
304
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
305
|
+
|
|
306
|
+
### Index Size Management
|
|
307
|
+
|
|
308
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
309
|
+
|
|
251
310
|
## Interacting with the API
|
|
252
311
|
|
|
253
312
|
- **Creating a Collection**:
|
|
@@ -556,4 +615,3 @@ You can customize additional settings in your `.env` file:
|
|
|
556
615
|
- Ensures fair resource allocation among all clients
|
|
557
616
|
|
|
558
617
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
559
|
-
|
|
@@ -1,3 +1,30 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sfeos_helpers
|
|
3
|
+
Version: 6.2.1
|
|
4
|
+
Summary: Helper library for the Elasticsearch and Opensearch stac-fastapi backends.
|
|
5
|
+
Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Intended Audience :: Developers
|
|
8
|
+
Classifier: Intended Audience :: Information Technology
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: stac-fastapi.core==6.2.1
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
26
|
+
Dynamic: summary
|
|
27
|
+
|
|
1
28
|
# stac-fastapi-elasticsearch-opensearch
|
|
2
29
|
|
|
3
30
|
<!-- markdownlint-disable MD033 MD041 -->
|
|
@@ -85,6 +112,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
|
|
|
85
112
|
- [Auth](#auth)
|
|
86
113
|
- [Aggregation](#aggregation)
|
|
87
114
|
- [Rate Limiting](#rate-limiting)
|
|
115
|
+
- [Datetime-Based Index Management](#datetime-based-index-management)
|
|
88
116
|
|
|
89
117
|
## Documentation & Resources
|
|
90
118
|
|
|
@@ -226,10 +254,86 @@ You can customize additional settings in your `.env` file:
|
|
|
226
254
|
| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
|
|
227
255
|
| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
|
|
228
256
|
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
|
|
257
|
+
| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional |
|
|
229
258
|
|
|
230
259
|
> [!NOTE]
|
|
231
260
|
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
|
|
232
261
|
|
|
262
|
+
## Datetime-Based Index Management
|
|
263
|
+
|
|
264
|
+
### Overview
|
|
265
|
+
|
|
266
|
+
SFEOS supports two indexing strategies for managing STAC items:
|
|
267
|
+
|
|
268
|
+
1. **Simple Indexing** (default) - One index per collection
|
|
269
|
+
2. **Datetime-Based Indexing** - Time-partitioned indexes with automatic management
|
|
270
|
+
|
|
271
|
+
The datetime-based indexing strategy is particularly useful for large temporal datasets. When a user provides a datetime parameter in a query, the system knows exactly which index to search, providing **multiple times faster searches** and significantly **reducing database load**.
|
|
272
|
+
|
|
273
|
+
### When to Use
|
|
274
|
+
|
|
275
|
+
**Recommended for:**
|
|
276
|
+
- Systems with large collections containing millions of items
|
|
277
|
+
- Systems requiring high-performance temporal searching
|
|
278
|
+
|
|
279
|
+
**Pros:**
|
|
280
|
+
- Multiple times faster queries with datetime filter
|
|
281
|
+
- Reduced database load - only relevant indexes are searched
|
|
282
|
+
|
|
283
|
+
**Cons:**
|
|
284
|
+
- Slightly longer item indexing time (automatic index management)
|
|
285
|
+
- Greater management complexity
|
|
286
|
+
|
|
287
|
+
### Configuration
|
|
288
|
+
|
|
289
|
+
#### Enabling Datetime-Based Indexing
|
|
290
|
+
|
|
291
|
+
Enable datetime-based indexing by setting the following environment variable:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
ENABLE_DATETIME_INDEX_FILTERING=true
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Related Configuration Variables
|
|
298
|
+
|
|
299
|
+
| Variable | Description | Default | Example |
|
|
300
|
+
|----------|-------------|---------|---------|
|
|
301
|
+
| `ENABLE_DATETIME_INDEX_FILTERING` | Enables time-based index partitioning | `false` | `true` |
|
|
302
|
+
| `DATETIME_INDEX_MAX_SIZE_GB` | Maximum size limit for datetime indexes (GB) - note: add +20% to target size due to ES/OS compression | `25` | `50` |
|
|
303
|
+
| `STAC_ITEMS_INDEX_PREFIX` | Prefix for item indexes | `items_` | `stac_items_` |
|
|
304
|
+
|
|
305
|
+
## How Datetime-Based Indexing Works
|
|
306
|
+
|
|
307
|
+
### Index and Alias Naming Convention
|
|
308
|
+
|
|
309
|
+
The system uses a precise naming convention:
|
|
310
|
+
|
|
311
|
+
**Physical indexes:**
|
|
312
|
+
```
|
|
313
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{uuid4}
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Aliases:**
|
|
317
|
+
```
|
|
318
|
+
{ITEMS_INDEX_PREFIX}{collection-id} # Main collection alias
|
|
319
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime} # Temporal alias
|
|
320
|
+
{ITEMS_INDEX_PREFIX}{collection-id}_{start-datetime}_{end-datetime} # Closed index alias
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
**Example:**
|
|
324
|
+
|
|
325
|
+
*Physical indexes:*
|
|
326
|
+
- `items_sentinel-2-l2a_a1b2c3d4-e5f6-7890-abcd-ef1234567890`
|
|
327
|
+
|
|
328
|
+
*Aliases:*
|
|
329
|
+
- `items_sentinel-2-l2a` - main collection alias
|
|
330
|
+
- `items_sentinel-2-l2a_2024-01-01` - active alias from January 1, 2024
|
|
331
|
+
- `items_sentinel-2-l2a_2024-01-01_2024-03-15` - closed index alias (reached size limit)
|
|
332
|
+
|
|
333
|
+
### Index Size Management
|
|
334
|
+
|
|
335
|
+
**Important - Data Compression:** Elasticsearch and OpenSearch automatically compress data. The configured `DATETIME_INDEX_MAX_SIZE_GB` limit refers to the compressed size on disk. It is recommended to add +20% to the target size to account for compression overhead and metadata.
|
|
336
|
+
|
|
233
337
|
## Interacting with the API
|
|
234
338
|
|
|
235
339
|
- **Creating a Collection**:
|
|
@@ -538,4 +642,3 @@ You can customize additional settings in your `.env` file:
|
|
|
538
642
|
- Ensures fair resource allocation among all clients
|
|
539
643
|
|
|
540
644
|
- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory.
|
|
541
|
-
|
|
@@ -23,4 +23,15 @@ stac_fastapi/sfeos_helpers/filter/__init__.py
|
|
|
23
23
|
stac_fastapi/sfeos_helpers/filter/client.py
|
|
24
24
|
stac_fastapi/sfeos_helpers/filter/cql2.py
|
|
25
25
|
stac_fastapi/sfeos_helpers/filter/transform.py
|
|
26
|
-
stac_fastapi/sfeos_helpers/models/patch.py
|
|
26
|
+
stac_fastapi/sfeos_helpers/models/patch.py
|
|
27
|
+
stac_fastapi/sfeos_helpers/search_engine/__init__.py
|
|
28
|
+
stac_fastapi/sfeos_helpers/search_engine/base.py
|
|
29
|
+
stac_fastapi/sfeos_helpers/search_engine/factory.py
|
|
30
|
+
stac_fastapi/sfeos_helpers/search_engine/index_operations.py
|
|
31
|
+
stac_fastapi/sfeos_helpers/search_engine/inserters.py
|
|
32
|
+
stac_fastapi/sfeos_helpers/search_engine/managers.py
|
|
33
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/__init__.py
|
|
34
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/base.py
|
|
35
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py
|
|
36
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/factory.py
|
|
37
|
+
stac_fastapi/sfeos_helpers/search_engine/selection/selectors.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
stac-fastapi.core==6.2.1
|
{sfeos_helpers-6.1.0 → sfeos_helpers-6.2.1}/stac_fastapi/sfeos_helpers/aggregation/client.py
RENAMED
|
@@ -313,9 +313,11 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
313
313
|
)
|
|
314
314
|
|
|
315
315
|
if aggregate_request.datetime:
|
|
316
|
-
search = self.database.apply_datetime_filter(
|
|
317
|
-
search=search,
|
|
316
|
+
search, datetime_search = self.database.apply_datetime_filter(
|
|
317
|
+
search=search, datetime=aggregate_request.datetime
|
|
318
318
|
)
|
|
319
|
+
else:
|
|
320
|
+
datetime_search = {"gte": None, "lte": None}
|
|
319
321
|
|
|
320
322
|
if aggregate_request.bbox:
|
|
321
323
|
bbox = aggregate_request.bbox
|
|
@@ -414,6 +416,7 @@ class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient):
|
|
|
414
416
|
geometry_geohash_grid_precision,
|
|
415
417
|
geometry_geotile_grid_precision,
|
|
416
418
|
datetime_frequency_interval,
|
|
419
|
+
datetime_search,
|
|
417
420
|
)
|
|
418
421
|
except Exception as error:
|
|
419
422
|
if not isinstance(error, IndexError):
|
|
@@ -30,11 +30,12 @@ Function Naming Conventions:
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
# Re-export all functions for backward compatibility
|
|
33
|
-
from .datetime import return_date
|
|
33
|
+
from .datetime import extract_date, extract_first_date_from_index, return_date
|
|
34
34
|
from .document import mk_actions, mk_item_id
|
|
35
35
|
from .index import (
|
|
36
36
|
create_index_templates_shared,
|
|
37
37
|
delete_item_index_shared,
|
|
38
|
+
filter_indexes_by_datetime,
|
|
38
39
|
index_alias_by_collection_id,
|
|
39
40
|
index_by_collection_id,
|
|
40
41
|
indices,
|
|
@@ -53,6 +54,7 @@ __all__ = [
|
|
|
53
54
|
"delete_item_index_shared",
|
|
54
55
|
"index_alias_by_collection_id",
|
|
55
56
|
"index_by_collection_id",
|
|
57
|
+
"filter_indexes_by_datetime",
|
|
56
58
|
"indices",
|
|
57
59
|
# Query operations
|
|
58
60
|
"apply_free_text_filter_shared",
|
|
@@ -68,4 +70,6 @@ __all__ = [
|
|
|
68
70
|
"get_bool_env",
|
|
69
71
|
# Datetime utilities
|
|
70
72
|
"return_date",
|
|
73
|
+
"extract_date",
|
|
74
|
+
"extract_first_date_from_index",
|
|
71
75
|
]
|
|
@@ -4,14 +4,19 @@ This module provides datetime utility functions specifically designed for
|
|
|
4
4
|
Elasticsearch and OpenSearch query formatting.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from datetime import date
|
|
7
10
|
from datetime import datetime as datetime_type
|
|
8
11
|
from typing import Dict, Optional, Union
|
|
9
12
|
|
|
10
13
|
from stac_fastapi.types.rfc3339 import DateTimeType
|
|
11
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
def return_date(
|
|
14
|
-
interval: Optional[Union[DateTimeType, str]]
|
|
19
|
+
interval: Optional[Union[DateTimeType, str]],
|
|
15
20
|
) -> Dict[str, Optional[str]]:
|
|
16
21
|
"""
|
|
17
22
|
Convert a date interval to an Elasticsearch/OpenSearch query format.
|
|
@@ -39,8 +44,14 @@ def return_date(
|
|
|
39
44
|
if isinstance(interval, str):
|
|
40
45
|
if "/" in interval:
|
|
41
46
|
parts = interval.split("/")
|
|
42
|
-
result["gte"] =
|
|
43
|
-
|
|
47
|
+
result["gte"] = (
|
|
48
|
+
parts[0] if parts[0] != ".." else datetime_type.min.isoformat() + "Z"
|
|
49
|
+
)
|
|
50
|
+
result["lte"] = (
|
|
51
|
+
parts[1]
|
|
52
|
+
if len(parts) > 1 and parts[1] != ".."
|
|
53
|
+
else datetime_type.max.isoformat() + "Z"
|
|
54
|
+
)
|
|
44
55
|
else:
|
|
45
56
|
converted_time = interval if interval != ".." else None
|
|
46
57
|
result["gte"] = result["lte"] = converted_time
|
|
@@ -58,3 +69,53 @@ def return_date(
|
|
|
58
69
|
result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
59
70
|
|
|
60
71
|
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def extract_date(date_str: str) -> date:
|
|
75
|
+
"""Extract date from ISO format string.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
date_str: ISO format date string
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A date object extracted from the input string.
|
|
82
|
+
"""
|
|
83
|
+
date_str = date_str.replace("Z", "+00:00")
|
|
84
|
+
return datetime_type.fromisoformat(date_str).date()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_first_date_from_index(index_name: str) -> date:
|
|
88
|
+
"""Extract the first date from an index name containing date patterns.
|
|
89
|
+
|
|
90
|
+
Searches for date patterns (YYYY-MM-DD) within the index name string
|
|
91
|
+
and returns the first found date as a date object.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
index_name: Index name containing date patterns.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
A date object extracted from the first date pattern found in the index name.
|
|
98
|
+
|
|
99
|
+
"""
|
|
100
|
+
date_pattern = r"\d{4}-\d{2}-\d{2}"
|
|
101
|
+
match = re.search(date_pattern, index_name)
|
|
102
|
+
|
|
103
|
+
if not match:
|
|
104
|
+
logger.error(f"No date pattern found in index name: '{index_name}'")
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"No date pattern (YYYY-MM-DD) found in index name: '{index_name}'"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
date_string = match.group(0)
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
extracted_date = datetime_type.strptime(date_string, "%Y-%m-%d").date()
|
|
113
|
+
return extracted_date
|
|
114
|
+
except ValueError as e:
|
|
115
|
+
logger.error(
|
|
116
|
+
f"Invalid date format found in index name '{index_name}': "
|
|
117
|
+
f"'{date_string}' - {str(e)}"
|
|
118
|
+
)
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Invalid date format in index name '{index_name}': '{date_string}'"
|
|
121
|
+
) from e
|
|
@@ -3,9 +3,13 @@
|
|
|
3
3
|
This module provides functions for creating and managing indices in Elasticsearch/OpenSearch.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
import re
|
|
7
|
+
from datetime import datetime
|
|
6
8
|
from functools import lru_cache
|
|
7
9
|
from typing import Any, List, Optional
|
|
8
10
|
|
|
11
|
+
from dateutil.parser import parse # type: ignore[import]
|
|
12
|
+
|
|
9
13
|
from stac_fastapi.sfeos_helpers.mappings import (
|
|
10
14
|
_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
|
|
11
15
|
COLLECTIONS_INDEX,
|
|
@@ -66,6 +70,59 @@ def indices(collection_ids: Optional[List[str]]) -> str:
|
|
|
66
70
|
)
|
|
67
71
|
|
|
68
72
|
|
|
73
|
+
def filter_indexes_by_datetime(
|
|
74
|
+
indexes: List[str], gte: Optional[str], lte: Optional[str]
|
|
75
|
+
) -> List[str]:
|
|
76
|
+
"""Filter indexes based on datetime range extracted from index names.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
indexes: List of index names containing dates
|
|
80
|
+
gte: Greater than or equal date filter (ISO format, optional 'Z' suffix)
|
|
81
|
+
lte: Less than or equal date filter (ISO format, optional 'Z' suffix)
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List of filtered index names
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def parse_datetime(dt_str: str) -> datetime:
|
|
88
|
+
"""Parse datetime string, handling both with and without 'Z' suffix."""
|
|
89
|
+
return parse(dt_str).replace(tzinfo=None)
|
|
90
|
+
|
|
91
|
+
def extract_date_range_from_index(index_name: str) -> tuple:
|
|
92
|
+
"""Extract start and end dates from index name."""
|
|
93
|
+
date_pattern = r"(\d{4}-\d{2}-\d{2})"
|
|
94
|
+
dates = re.findall(date_pattern, index_name)
|
|
95
|
+
|
|
96
|
+
if len(dates) == 1:
|
|
97
|
+
start_date = datetime.strptime(dates[0], "%Y-%m-%d")
|
|
98
|
+
max_date = datetime.max.replace(microsecond=0)
|
|
99
|
+
return start_date, max_date
|
|
100
|
+
else:
|
|
101
|
+
start_date = datetime.strptime(dates[0], "%Y-%m-%d")
|
|
102
|
+
end_date = datetime.strptime(dates[1], "%Y-%m-%d")
|
|
103
|
+
return start_date, end_date
|
|
104
|
+
|
|
105
|
+
def is_index_in_range(
|
|
106
|
+
start_date: datetime, end_date: datetime, gte_dt: datetime, lte_dt: datetime
|
|
107
|
+
) -> bool:
|
|
108
|
+
"""Check if index date range overlaps with filter range."""
|
|
109
|
+
return not (
|
|
110
|
+
end_date.date() < gte_dt.date() or start_date.date() > lte_dt.date()
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
gte_dt = parse_datetime(gte) if gte else datetime.min.replace(microsecond=0)
|
|
114
|
+
lte_dt = parse_datetime(lte) if lte else datetime.max.replace(microsecond=0)
|
|
115
|
+
|
|
116
|
+
filtered_indexes = []
|
|
117
|
+
|
|
118
|
+
for index in indexes:
|
|
119
|
+
start_date, end_date = extract_date_range_from_index(index)
|
|
120
|
+
if is_index_in_range(start_date, end_date, gte_dt, lte_dt):
|
|
121
|
+
filtered_indexes.append(index)
|
|
122
|
+
|
|
123
|
+
return filtered_indexes
|
|
124
|
+
|
|
125
|
+
|
|
69
126
|
async def create_index_templates_shared(settings: Any) -> None:
|
|
70
127
|
"""Create index templates for Elasticsearch/OpenSearch Collection and Item indices.
|
|
71
128
|
|
|
@@ -120,11 +177,11 @@ async def delete_item_index_shared(settings: Any, collection_id: str) -> None:
|
|
|
120
177
|
client = settings.create_client
|
|
121
178
|
|
|
122
179
|
name = index_alias_by_collection_id(collection_id)
|
|
123
|
-
resolved = await client.indices.resolve_index(name=name)
|
|
180
|
+
resolved = await client.indices.resolve_index(name=name, ignore=[404])
|
|
124
181
|
if "aliases" in resolved and resolved["aliases"]:
|
|
125
182
|
[alias] = resolved["aliases"]
|
|
126
183
|
await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
|
|
127
184
|
await client.indices.delete(index=alias["indices"])
|
|
128
185
|
else:
|
|
129
|
-
await client.indices.delete(index=name)
|
|
186
|
+
await client.indices.delete(index=name, ignore=[404])
|
|
130
187
|
await client.close()
|
|
@@ -80,11 +80,14 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
|
|
|
80
80
|
This function transforms a list of sort specifications into the format required by
|
|
81
81
|
Elasticsearch/OpenSearch for sorting query results. The returned dictionary can be
|
|
82
82
|
directly used in search requests.
|
|
83
|
+
Always includes 'id' as secondary sort to ensure unique pagination tokens.
|
|
83
84
|
"""
|
|
84
85
|
if sortby:
|
|
85
|
-
|
|
86
|
+
sort_config = {s.field: {"order": s.direction} for s in sortby}
|
|
87
|
+
sort_config.setdefault("id", {"order": "asc"})
|
|
88
|
+
return sort_config
|
|
86
89
|
else:
|
|
87
|
-
return
|
|
90
|
+
return {"id": {"order": "asc"}}
|
|
88
91
|
|
|
89
92
|
|
|
90
93
|
def add_collections_to_body(
|