earthcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- earthcode/__init__.py +0 -0
- earthcode/fairtool.py +577 -0
- earthcode/git_add.py +383 -0
- earthcode/gitclerk_add.py +21 -0
- earthcode/metadata_input_definitions.py +338 -0
- earthcode/search.py +209 -0
- earthcode/static.py +569 -0
- earthcode/validator.py +605 -0
- earthcode-0.1.0.dist-info/METADATA +70 -0
- earthcode-0.1.0.dist-info/RECORD +12 -0
- earthcode-0.1.0.dist-info/WHEEL +4 -0
- earthcode-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ProjectCollectionMetadata(BaseModel):
|
|
10
|
+
"""Defines validated inputs used to construct an OSC project collection."""
|
|
11
|
+
|
|
12
|
+
project_id: str = Field(
|
|
13
|
+
...,
|
|
14
|
+
description="Custom project identifier using dashes to separate words, e.g. '4datlantic-ohc'.",
|
|
15
|
+
)
|
|
16
|
+
project_title: str = Field(
|
|
17
|
+
...,
|
|
18
|
+
description="Official project title as used in the ESA contract.",
|
|
19
|
+
)
|
|
20
|
+
project_description: str = Field(
|
|
21
|
+
...,
|
|
22
|
+
description="Short textual description of the project.",
|
|
23
|
+
)
|
|
24
|
+
project_status: str = Field(
|
|
25
|
+
...,
|
|
26
|
+
description="Project status, expected as 'ongoing' or 'completed'.",
|
|
27
|
+
)
|
|
28
|
+
project_license: str = Field(
|
|
29
|
+
...,
|
|
30
|
+
description="Overall license used for project outputs; use an allowed SPDX-style value or 'various'.",
|
|
31
|
+
)
|
|
32
|
+
project_bbox: list[list[float]] = Field(..., description="One or more [west, south, east, north] bounding boxes.")
|
|
33
|
+
project_start_datetime: datetime = Field(
|
|
34
|
+
...,
|
|
35
|
+
description="Project start date and time.",
|
|
36
|
+
)
|
|
37
|
+
project_end_datetime: datetime = Field(
|
|
38
|
+
...,
|
|
39
|
+
description="Project end date and time.",
|
|
40
|
+
)
|
|
41
|
+
project_themes: list[str] = Field(
|
|
42
|
+
...,
|
|
43
|
+
description="One or more OSC themes, e.g. atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.",
|
|
44
|
+
)
|
|
45
|
+
to_name: str = Field(
|
|
46
|
+
...,
|
|
47
|
+
description="Full name of the ESA Technical Officer supporting the project.",
|
|
48
|
+
)
|
|
49
|
+
to_email: str = Field(
|
|
50
|
+
...,
|
|
51
|
+
description="Email address of the ESA Technical Officer.",
|
|
52
|
+
)
|
|
53
|
+
consortium_members: list[tuple[str, str]] = Field(
|
|
54
|
+
...,
|
|
55
|
+
description="Project consortium members as (name, contact_email) tuples.",
|
|
56
|
+
)
|
|
57
|
+
website_link: str = Field(
|
|
58
|
+
...,
|
|
59
|
+
description="Project website URL.",
|
|
60
|
+
)
|
|
61
|
+
eo4society_link: Optional[str] = Field(
|
|
62
|
+
None,
|
|
63
|
+
description="EO4Society project page URL when available.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ProductCollectionMetadata(BaseModel):
|
|
68
|
+
"""Defines validated inputs used to construct an OSC product collection."""
|
|
69
|
+
|
|
70
|
+
product_id: str = Field(
|
|
71
|
+
...,
|
|
72
|
+
description="Custom product identifier distinct from the project ID, using dashes between words.",
|
|
73
|
+
)
|
|
74
|
+
product_title: Optional[str] = Field(
|
|
75
|
+
None,
|
|
76
|
+
description="Human-readable product title.",
|
|
77
|
+
)
|
|
78
|
+
product_description: str = Field(
|
|
79
|
+
...,
|
|
80
|
+
description="General metadata description of the product or dataset.",
|
|
81
|
+
)
|
|
82
|
+
product_bbox: list[list[float]] = Field(..., description="One or more [west, south, east, north] bounding boxes.")
|
|
83
|
+
product_start_datetime: datetime = Field(
|
|
84
|
+
...,
|
|
85
|
+
description="Product or dataset start date and time.",
|
|
86
|
+
)
|
|
87
|
+
product_end_datetime: datetime = Field(
|
|
88
|
+
...,
|
|
89
|
+
description="Product or dataset end date and time.",
|
|
90
|
+
)
|
|
91
|
+
product_license: str = Field(
|
|
92
|
+
...,
|
|
93
|
+
description="License for this product; should be a valid allowed OSC license value.",
|
|
94
|
+
)
|
|
95
|
+
product_keywords: Optional[list[str]] = Field(
|
|
96
|
+
None,
|
|
97
|
+
description="Up to five short keywords to improve product discovery.",
|
|
98
|
+
)
|
|
99
|
+
product_status: str = Field(
|
|
100
|
+
...,
|
|
101
|
+
description="Product status, expected as 'ongoing' or 'completed'.",
|
|
102
|
+
)
|
|
103
|
+
product_region: Optional[str] = Field(
|
|
104
|
+
None,
|
|
105
|
+
description="Semantic region covered by the product, e.g. Belgium or Global.",
|
|
106
|
+
)
|
|
107
|
+
product_themes: list[str] = Field(
|
|
108
|
+
...,
|
|
109
|
+
description="One or more OSC themes applicable to the product.",
|
|
110
|
+
)
|
|
111
|
+
product_missions: list[str] = Field(
|
|
112
|
+
...,
|
|
113
|
+
description="One or more EO mission identifiers used to generate the product.",
|
|
114
|
+
)
|
|
115
|
+
product_variables: list[str] = Field(
|
|
116
|
+
...,
|
|
117
|
+
description="One or more OSC variable identifiers describing the product.",
|
|
118
|
+
)
|
|
119
|
+
project_id: str = Field(
|
|
120
|
+
...,
|
|
121
|
+
description="Identifier of the related project; must match an existing or newly created project.",
|
|
122
|
+
)
|
|
123
|
+
project_title: str = Field(
|
|
124
|
+
...,
|
|
125
|
+
description="Title of the related project corresponding to project_id.",
|
|
126
|
+
)
|
|
127
|
+
product_parameters: Optional[list[str]] = Field(
|
|
128
|
+
None,
|
|
129
|
+
description="CF-convention style parameter names associated with the product, e.g. 'leaf_area_index'.",
|
|
130
|
+
)
|
|
131
|
+
product_doi: Optional[str] = Field(
|
|
132
|
+
None,
|
|
133
|
+
description="DOI URL assigned to the product, if available.",
|
|
134
|
+
)
|
|
135
|
+
access_link: Optional[str] = Field(
|
|
136
|
+
None,
|
|
137
|
+
description="Direct URL for accessing data; this should be a valid URL.",
|
|
138
|
+
)
|
|
139
|
+
documentation_link: Optional[str] = Field(
|
|
140
|
+
None,
|
|
141
|
+
description="URL to supporting documentation such as handbook or validation report.",
|
|
142
|
+
)
|
|
143
|
+
license_link: Optional[str] = Field(
|
|
144
|
+
None,
|
|
145
|
+
description="URL to license, if its not a standard SPDX one.",
|
|
146
|
+
)
|
|
147
|
+
item_link: Optional[str] = Field(
|
|
148
|
+
None,
|
|
149
|
+
description="URL to an external STAC item or collection representing the dataset, if available.",
|
|
150
|
+
)
|
|
151
|
+
item_title: Optional[str] = Field(
|
|
152
|
+
None,
|
|
153
|
+
description="Title to the item.",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class WorkflowMetadata(BaseModel):
|
|
159
|
+
"""Defines validated inputs used to construct an OSC workflow record."""
|
|
160
|
+
|
|
161
|
+
workflow_id: str = Field(
|
|
162
|
+
...,
|
|
163
|
+
description="Custom workflow identifier, distinct from project and product IDs.",
|
|
164
|
+
)
|
|
165
|
+
workflow_title: str = Field(
|
|
166
|
+
...,
|
|
167
|
+
description="Workflow title.",
|
|
168
|
+
)
|
|
169
|
+
workflow_description: str = Field(
|
|
170
|
+
...,
|
|
171
|
+
description="Short description of the workflow purpose and content.",
|
|
172
|
+
)
|
|
173
|
+
workflow_license: str = Field(
|
|
174
|
+
...,
|
|
175
|
+
description="Workflow license value, typically SPDX-style or 'various' when needed.",
|
|
176
|
+
)
|
|
177
|
+
workflow_keywords: list[str] = Field(
|
|
178
|
+
...,
|
|
179
|
+
description="Up to five short keywords to improve workflow discovery.",
|
|
180
|
+
)
|
|
181
|
+
workflow_formats: list[str] = Field(
|
|
182
|
+
...,
|
|
183
|
+
description="Input/output data formats used by the workflow, e.g. GeoTIFF or NetCDF.",
|
|
184
|
+
)
|
|
185
|
+
workflow_themes: list[str] = Field(
|
|
186
|
+
...,
|
|
187
|
+
description="One or more OSC themes associated with the workflow.",
|
|
188
|
+
)
|
|
189
|
+
codeurl: str = Field(
|
|
190
|
+
...,
|
|
191
|
+
description="Active URL to the repository where the workflow/code can be discovered.",
|
|
192
|
+
)
|
|
193
|
+
project_id: str = Field(
|
|
194
|
+
...,
|
|
195
|
+
description="Identifier of the associated project; must match an existing or newly created project.",
|
|
196
|
+
)
|
|
197
|
+
project_title: str = Field(
|
|
198
|
+
...,
|
|
199
|
+
description="Title of the associated project corresponding to project_id.",
|
|
200
|
+
)
|
|
201
|
+
workflow_doi: Optional[str] = Field(
|
|
202
|
+
None,
|
|
203
|
+
description="DOI for the workflow record when available.",
|
|
204
|
+
)
|
|
205
|
+
workflow_bbox: Optional[list[list[float]]] = Field(
|
|
206
|
+
None,
|
|
207
|
+
description="Optional workflow spatial coverage as [west, south, east, north] bounding boxes.",
|
|
208
|
+
)
|
|
209
|
+
workflow_start_datetime: Optional[datetime] = Field(
|
|
210
|
+
None,
|
|
211
|
+
description="Optional workflow validity/execution start datetime.",
|
|
212
|
+
)
|
|
213
|
+
workflow_end_datetime: Optional[datetime] = Field(
|
|
214
|
+
None,
|
|
215
|
+
description="Optional workflow validity/execution end datetime.",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class ExperimentMetadata(BaseModel):
|
|
220
|
+
"""Defines validated inputs used to construct an OSC experiment record."""
|
|
221
|
+
|
|
222
|
+
experiment_id: str = Field(
|
|
223
|
+
...,
|
|
224
|
+
description="Custom identifier of the experiment record.",
|
|
225
|
+
)
|
|
226
|
+
experiment_title: str = Field(
|
|
227
|
+
...,
|
|
228
|
+
description="Experiment title.",
|
|
229
|
+
)
|
|
230
|
+
experiment_description: str = Field(
|
|
231
|
+
...,
|
|
232
|
+
description="Description of the workflow execution that generated the product.",
|
|
233
|
+
)
|
|
234
|
+
experiment_license: str = Field(
|
|
235
|
+
...,
|
|
236
|
+
description="License value for the experiment record.",
|
|
237
|
+
)
|
|
238
|
+
experiment_keywords: list[str] = Field(
|
|
239
|
+
...,
|
|
240
|
+
description="Keywords supporting experiment discovery.",
|
|
241
|
+
)
|
|
242
|
+
experiment_formats: list[str] = Field(
|
|
243
|
+
...,
|
|
244
|
+
description="Input/output formats used in the experiment, e.g. GeoTIFF, Zarr, netCDF.",
|
|
245
|
+
)
|
|
246
|
+
experiment_themes: list[str] = Field(
|
|
247
|
+
...,
|
|
248
|
+
description="One or more OSC themes associated with the experiment.",
|
|
249
|
+
)
|
|
250
|
+
experiment_input_parameters_link: str = Field(
|
|
251
|
+
...,
|
|
252
|
+
description="URL to the specification of experiment input parameters.",
|
|
253
|
+
)
|
|
254
|
+
experiment_enviroment_link: str = Field(
|
|
255
|
+
...,
|
|
256
|
+
description="URL to the runtime environment description used to execute the experiment.",
|
|
257
|
+
)
|
|
258
|
+
workflow_id: str = Field(
|
|
259
|
+
...,
|
|
260
|
+
description="Identifier of the associated workflow.",
|
|
261
|
+
)
|
|
262
|
+
workflow_title: str = Field(
|
|
263
|
+
...,
|
|
264
|
+
description="Title of the associated workflow.",
|
|
265
|
+
)
|
|
266
|
+
product_id: str = Field(
|
|
267
|
+
...,
|
|
268
|
+
description="Identifier of the associated product.",
|
|
269
|
+
)
|
|
270
|
+
product_title: str = Field(
|
|
271
|
+
...,
|
|
272
|
+
description="Title of the associated product.",
|
|
273
|
+
)
|
|
274
|
+
contacts: Optional[list[dict]] = Field(
|
|
275
|
+
None,
|
|
276
|
+
description="Optional contact objects for the experiment record.",
|
|
277
|
+
)
|
|
278
|
+
experiment_bbox: Optional[list[list[float]]] = Field(
|
|
279
|
+
None,
|
|
280
|
+
description="Optional experiment spatial coverage as [west, south, east, north] bounding boxes.",
|
|
281
|
+
)
|
|
282
|
+
experiment_start_datetime: Optional[datetime] = Field(
|
|
283
|
+
None,
|
|
284
|
+
description="Optional experiment start datetime.",
|
|
285
|
+
)
|
|
286
|
+
experiment_end_datetime: Optional[datetime] = Field(
|
|
287
|
+
None,
|
|
288
|
+
description="Optional experiment end datetime.",
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class ItemMetadata(BaseModel):
|
|
293
|
+
"""Defines validated inputs used to construct an OSC STAC Item."""
|
|
294
|
+
|
|
295
|
+
itemid: str = Field(
|
|
296
|
+
...,
|
|
297
|
+
description="Unique STAC item identifier.",
|
|
298
|
+
)
|
|
299
|
+
geometry: dict = Field(
|
|
300
|
+
...,
|
|
301
|
+
description="GeoJSON geometry describing the item footprint.",
|
|
302
|
+
)
|
|
303
|
+
data_time: datetime = Field(
|
|
304
|
+
...,
|
|
305
|
+
description="Primary timestamp of the item.",
|
|
306
|
+
)
|
|
307
|
+
bbox: list[float] = Field(
|
|
308
|
+
...,
|
|
309
|
+
description="Item bounding box as [west, south, east, north].",
|
|
310
|
+
)
|
|
311
|
+
product_id: str = Field(
|
|
312
|
+
...,
|
|
313
|
+
description="Identifier of the parent product/collection for this item.",
|
|
314
|
+
)
|
|
315
|
+
license: str = Field(
|
|
316
|
+
...,
|
|
317
|
+
description="License applicable to this item.",
|
|
318
|
+
)
|
|
319
|
+
description: str = Field(
|
|
320
|
+
...,
|
|
321
|
+
description="Short item description.",
|
|
322
|
+
)
|
|
323
|
+
data_url: str = Field(
|
|
324
|
+
...,
|
|
325
|
+
description="URL to the primary data asset.",
|
|
326
|
+
)
|
|
327
|
+
data_mime_type: str = Field(
|
|
328
|
+
...,
|
|
329
|
+
description="Media type of the primary data asset.",
|
|
330
|
+
)
|
|
331
|
+
data_title: str = Field(
|
|
332
|
+
...,
|
|
333
|
+
description="Human-readable title of the primary data asset.",
|
|
334
|
+
)
|
|
335
|
+
extra_fields: Optional[dict] = Field(
|
|
336
|
+
None,
|
|
337
|
+
description="Additional custom STAC fields to add to the item.",
|
|
338
|
+
)
|
earthcode/search.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Search interface for Lance vector store of Open Science Catalog items.
|
|
2
|
+
|
|
3
|
+
Provides semantic search across products, variables, missions, and projects using FastEmbed embeddings.
|
|
4
|
+
|
|
5
|
+
Returns:
|
|
6
|
+
list[pystac.Collection | pystac.Catalog]: Search results as PySTAC objects.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import shutil
|
|
12
|
+
import tempfile
|
|
13
|
+
import lance
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pystac
|
|
16
|
+
from fastembed import TextEmbedding
|
|
17
|
+
|
|
18
|
+
LANCE_URI = "s3://pangeo-test-fires/vector_store_v5/"
|
|
19
|
+
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
|
20
|
+
LANCE_BASE_STORAGE_OPTIONS = {
|
|
21
|
+
"region": "eu-west-2",
|
|
22
|
+
"aws_skip_signature": "true",
|
|
23
|
+
}
|
|
24
|
+
OPEN_SCIENCE_CATALOG_LINK = "https://opensciencedata.esa.int/stac-browser/#"
|
|
25
|
+
URL_TO_INJECT = {
|
|
26
|
+
"products": OPEN_SCIENCE_CATALOG_LINK + "/products/{id}/collection.json",
|
|
27
|
+
"variables": OPEN_SCIENCE_CATALOG_LINK + "/variables/{id}/catalog.json",
|
|
28
|
+
"projects": OPEN_SCIENCE_CATALOG_LINK + "/projects/{id}/collection.json",
|
|
29
|
+
"eo-missions": OPEN_SCIENCE_CATALOG_LINK + "/eo-missions/{id}/catalog.json",
|
|
30
|
+
"themes": OPEN_SCIENCE_CATALOG_LINK + "/themes/{id}/catalog.json",
|
|
31
|
+
"experiments": OPEN_SCIENCE_CATALOG_LINK + "/experiments/{id}/record.json",
|
|
32
|
+
"workflows": OPEN_SCIENCE_CATALOG_LINK + "/workflows/{id}/record.json",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_ds = None
|
|
36
|
+
_model = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def search(
|
|
40
|
+
query=None,
|
|
41
|
+
*,
|
|
42
|
+
limit=10,
|
|
43
|
+
bbox=None,
|
|
44
|
+
intersects=True,
|
|
45
|
+
collection_ids=None,
|
|
46
|
+
theme=None,
|
|
47
|
+
variable=None,
|
|
48
|
+
mission=None,
|
|
49
|
+
keyword=None,
|
|
50
|
+
type="products",
|
|
51
|
+
):
|
|
52
|
+
# check valid inputs for type
|
|
53
|
+
if type not in ("products", "variables", "eo-missions", "projects"):
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Invalid type '{type}'. Must be one of 'products', 'variables', 'eo-missions', or 'projects'."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# check valid inputs for themes:
|
|
59
|
+
valid_themes = {
|
|
60
|
+
"land",
|
|
61
|
+
"oceans",
|
|
62
|
+
"atmosphere",
|
|
63
|
+
"cryosphere",
|
|
64
|
+
"magnetosphere-ionosphere",
|
|
65
|
+
"solid-earth",
|
|
66
|
+
}
|
|
67
|
+
if theme:
|
|
68
|
+
themes = (
|
|
69
|
+
theme if isinstance(theme, (list, tuple, set)) else [theme]
|
|
70
|
+
) # handle if list or str
|
|
71
|
+
for t in themes:
|
|
72
|
+
if t not in valid_themes:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"Invalid theme '{t}'. Must be one of {sorted(valid_themes)}."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# dataset / model caches
|
|
78
|
+
global _ds, _model
|
|
79
|
+
if _ds is None or getattr(_ds, "uri", None) != LANCE_URI.rstrip("/") + "/":
|
|
80
|
+
_ds = lance.dataset(
|
|
81
|
+
LANCE_URI.rstrip("/") + "/", storage_options=LANCE_BASE_STORAGE_OPTIONS
|
|
82
|
+
)
|
|
83
|
+
if _model is None:
|
|
84
|
+
try:
|
|
85
|
+
_model = TextEmbedding(model_name=MODEL_NAME)
|
|
86
|
+
except Exception as exc:
|
|
87
|
+
if "NO_SUCHFILE" not in str(exc) and "NoSuchFile" not in str(exc):
|
|
88
|
+
raise
|
|
89
|
+
shutil.rmtree(Path(tempfile.gettempdir()) / "fastembed_cache", ignore_errors=True)
|
|
90
|
+
_model = TextEmbedding(model_name=MODEL_NAME)
|
|
91
|
+
|
|
92
|
+
# build filter string
|
|
93
|
+
parts = []
|
|
94
|
+
parts.append(f"`group` = '{type}'")
|
|
95
|
+
if collection_ids:
|
|
96
|
+
if isinstance(collection_ids, str):
|
|
97
|
+
collection_ids = [collection_ids]
|
|
98
|
+
parts.append("id IN (" + ",".join(f"'{c}'" for c in collection_ids) + ")")
|
|
99
|
+
|
|
100
|
+
if theme and type in ("products", "variables"):
|
|
101
|
+
themes = (
|
|
102
|
+
theme if isinstance(theme, (list, tuple, set)) else [theme]
|
|
103
|
+
) # handle if list or str
|
|
104
|
+
theme_filters = [
|
|
105
|
+
f"LOWER(theme_ids) LIKE '%|{str(t).lower()}|%'" for t in themes if t
|
|
106
|
+
]
|
|
107
|
+
if theme_filters:
|
|
108
|
+
parts.append("(" + " OR ".join(theme_filters) + ")")
|
|
109
|
+
|
|
110
|
+
if variable and type == "products":
|
|
111
|
+
variables = variable if isinstance(variable, (list, tuple, set)) else [variable]
|
|
112
|
+
variable_filters = [
|
|
113
|
+
f"LOWER(variable_ids) LIKE '%|{str(v).lower()}|%'" for v in variables if v
|
|
114
|
+
]
|
|
115
|
+
if variable_filters:
|
|
116
|
+
parts.append("(" + " OR ".join(variable_filters) + ")")
|
|
117
|
+
|
|
118
|
+
if mission and type == "products":
|
|
119
|
+
missions = mission if isinstance(mission, (list, tuple, set)) else [mission]
|
|
120
|
+
mission_filters = [
|
|
121
|
+
f"LOWER(mission_ids) LIKE '%|{str(m).lower()}|%'" for m in missions if m
|
|
122
|
+
]
|
|
123
|
+
if mission_filters:
|
|
124
|
+
parts.append("(" + " OR ".join(mission_filters) + ")")
|
|
125
|
+
|
|
126
|
+
if keyword:
|
|
127
|
+
keywords = keyword if isinstance(keyword, (list, tuple, set)) else [keyword]
|
|
128
|
+
kw_filters = [
|
|
129
|
+
"("
|
|
130
|
+
+ " OR ".join(
|
|
131
|
+
[
|
|
132
|
+
f"LOWER(title) LIKE '%{str(kw).lower()}%'",
|
|
133
|
+
f"LOWER(description) LIKE '%{str(kw).lower()}%'",
|
|
134
|
+
f"LOWER(keywords) LIKE '%|{str(kw).lower()}|%'",
|
|
135
|
+
]
|
|
136
|
+
)
|
|
137
|
+
+ ")"
|
|
138
|
+
for kw in keywords
|
|
139
|
+
if kw
|
|
140
|
+
]
|
|
141
|
+
if kw_filters:
|
|
142
|
+
parts.append("(" + " OR ".join(kw_filters) + ")")
|
|
143
|
+
|
|
144
|
+
if bbox and len(bbox) >= 4:
|
|
145
|
+
minx, miny, maxx, maxy = bbox[:4]
|
|
146
|
+
if intersects:
|
|
147
|
+
parts.append(
|
|
148
|
+
f"bbox_minx <= {maxx} AND bbox_maxx >= {minx} AND bbox_miny <= {maxy} AND bbox_maxy >= {miny}"
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
parts.append(
|
|
152
|
+
f"bbox_minx >= {minx} AND bbox_maxx <= {maxx} AND bbox_miny >= {miny} AND bbox_maxy <= {maxy}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
filt = " AND ".join(parts) if parts else None
|
|
156
|
+
|
|
157
|
+
cols = [
|
|
158
|
+
"id",
|
|
159
|
+
"group",
|
|
160
|
+
"title",
|
|
161
|
+
"description",
|
|
162
|
+
"keywords",
|
|
163
|
+
"bbox_minx",
|
|
164
|
+
"bbox_miny",
|
|
165
|
+
"bbox_maxx",
|
|
166
|
+
"bbox_maxy",
|
|
167
|
+
"item_json",
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
if query and query.strip():
|
|
171
|
+
vec = next(_model.embed([query]))
|
|
172
|
+
|
|
173
|
+
tbl = _ds.scanner(
|
|
174
|
+
columns=cols,
|
|
175
|
+
filter=filt,
|
|
176
|
+
nearest={
|
|
177
|
+
"column": "embedding",
|
|
178
|
+
"q": np.asarray(vec, dtype=np.float32),
|
|
179
|
+
"k": limit,
|
|
180
|
+
},
|
|
181
|
+
prefilter=True,
|
|
182
|
+
limit=limit,
|
|
183
|
+
).to_table()
|
|
184
|
+
|
|
185
|
+
else:
|
|
186
|
+
tbl = _ds.to_table(columns=cols, filter=filt, limit=limit)
|
|
187
|
+
|
|
188
|
+
results = []
|
|
189
|
+
|
|
190
|
+
for row in tbl.to_pylist():
|
|
191
|
+
item_j = json.loads(row["item_json"])
|
|
192
|
+
item = (
|
|
193
|
+
pystac.Collection.from_dict(item_j)
|
|
194
|
+
if item_j.get("type") == "Collection"
|
|
195
|
+
else pystac.Catalog.from_dict(item_j)
|
|
196
|
+
)
|
|
197
|
+
item.extra_fields["osc_url"] = URL_TO_INJECT.get(type, "").format(id=row["id"])
|
|
198
|
+
|
|
199
|
+
results.append(item)
|
|
200
|
+
return results
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# if __name__ == "__main__":
|
|
204
|
+
# for grp in ["products", "variables", "eo-missions", "projects"]:
|
|
205
|
+
# print(grp, [c.title for c in search("forest fires", type=grp, limit=2)])
|
|
206
|
+
# print(len(search("forest fires", theme="land", limit=2))) # one or more results expected - with theme = land
|
|
207
|
+
# print(len(search("forest fires", theme="ocean", limit=2))) # no results expected
|
|
208
|
+
# print(search(variable="burned-area")[0].title) # expect something that has a variable of fire
|
|
209
|
+
# print(search(keyword="Seasonal Fire Modeling")[0].title)
|