metadata-crawler 2509.0.0__py3-none-any.whl → 2509.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of metadata-crawler might be problematic. Click here for more details.
- metadata_crawler/_version.py +1 -1
- {metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/METADATA +20 -20
- {metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/RECORD +6 -6
- {metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/WHEEL +0 -0
- {metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/entry_points.txt +0 -0
- {metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/licenses/LICENSE +0 -0
metadata_crawler/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2509.0.
|
|
1
|
+
__version__ = "2509.0.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: metadata-crawler
|
|
3
|
-
Version: 2509.0.
|
|
3
|
+
Version: 2509.0.1
|
|
4
4
|
Summary: Crawl, extract and push climate metadata for indexing.
|
|
5
5
|
Author-email: "DKRZ, Clint" <freva@dkrz.de>
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -83,10 +83,10 @@ Requires-Dist: pytest-env ; extra == "tests"
|
|
|
83
83
|
Requires-Dist: requests ; extra == "tests"
|
|
84
84
|
Requires-Dist: pre-commit ; extra == "tests"
|
|
85
85
|
Requires-Dist: toml ; extra == "tests"
|
|
86
|
-
Project-URL: Documentation, https://
|
|
87
|
-
Project-URL: Home, https://github.com/freva-org/
|
|
88
|
-
Project-URL: Issues, https://github.com/freva-org/
|
|
89
|
-
Project-URL: Source, https://github.com/freva-org/
|
|
86
|
+
Project-URL: Documentation, https://metadata-crawler.readthedocs.io
|
|
87
|
+
Project-URL: Home, https://github.com/freva-org/metadata-crawler
|
|
88
|
+
Project-URL: Issues, https://github.com/freva-org/metadata-crawler/issues
|
|
89
|
+
Project-URL: Source, https://github.com/freva-org/metadata-crawler
|
|
90
90
|
Provides-Extra: dev
|
|
91
91
|
Provides-Extra: doc
|
|
92
92
|
Provides-Extra: mkdoc
|
|
@@ -95,25 +95,25 @@ Provides-Extra: tests
|
|
|
95
95
|
# metadata-crawler
|
|
96
96
|
|
|
97
97
|
[](LICENSE)
|
|
98
|
-
[](https://pypi.org/project/metadata-crawler/)
|
|
99
99
|
[](https://metadata-crawler.readthedocs.io/en/latest/?badge=latest)
|
|
100
100
|
[](https://github.com/freva-org/metadata-crawler/actions)
|
|
101
101
|
[](https://codecov.io/gh/freva-org/metadata-crawler)
|
|
102
102
|
|
|
103
103
|
Harvest, normalise, and index climate / earth-system metadata from **POSIX**,
|
|
104
104
|
**S3/MinIO**, and **OpenStack Swift** using configurable **DRS dialects**
|
|
105
|
-
(CMIP6, CMIP5, CORDEX, …). Output to a temporary **catalogue** (
|
|
106
|
-
|
|
105
|
+
(CMIP6, CMIP5, CORDEX, …). Output to a temporary **catalogue** (JSONLines)
|
|
106
|
+
and then **index** into systems such as **Solr** or **MongoDB**.
|
|
107
107
|
Configuration is **TOML** with inheritance, templating, and computed rules.
|
|
108
108
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
109
|
+
## TL;DR
|
|
110
|
+
|
|
111
|
+
- Define datasets + dialects in ``drs_config.toml``
|
|
112
|
+
- ``mdc add`` → write a temporary catalogue (``jsonl.gz``)
|
|
113
|
+
- ``mdc config`` → inspect a the (merged) crawler config.
|
|
114
|
+
- ``mdc walk-intake`` → inspect the content of an intake catalogue.
|
|
115
|
+
- ``mdc <backend> index`` → push records from catalogue into your index backend
|
|
116
|
+
- ``mdc <backend> delete`` → remove records by facet match
|
|
117
117
|
|
|
118
118
|
## Features
|
|
119
119
|
|
|
@@ -126,7 +126,7 @@ Configuration is **TOML** with inheritance, templating, and computed rules.
|
|
|
126
126
|
dataset attributes/vars
|
|
127
127
|
- **Special rules**: conditionals, cache lookups and function calls (e.g. CMIP6 realm,
|
|
128
128
|
time aggregation)
|
|
129
|
-
- **Index backends**:
|
|
129
|
+
- **Index backends**: MongoDB (Motor), Solr
|
|
130
130
|
- **Sync + Async APIs** and a clean CLI
|
|
131
131
|
- **Docs**: Sphinx with ``pydata_sphinx_theme``
|
|
132
132
|
|
|
@@ -143,14 +143,14 @@ Configuration is **TOML** with inheritance, templating, and computed rules.
|
|
|
143
143
|
```console
|
|
144
144
|
|
|
145
145
|
# 1) Crawl → write catalogue
|
|
146
|
-
mdc
|
|
146
|
+
mdc add \
|
|
147
147
|
cat.yaml \
|
|
148
148
|
--config-file drs_config.toml \
|
|
149
149
|
--dataset cmip6-fs,obs-fs \
|
|
150
150
|
--threads 4 --batch-size 100
|
|
151
151
|
|
|
152
|
-
# 2) Index from catalogue → Solr (or Mongo
|
|
153
|
-
mdc
|
|
152
|
+
# 2) Index from catalogue → Solr (or Mongo)
|
|
153
|
+
mdc solr index \
|
|
154
154
|
cat.yaml \
|
|
155
155
|
--server localhot:8983
|
|
156
156
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
metadata_crawler/__init__.py,sha256=7gEpJjS9FpR6MHRY_Ztk8ORJ8JQ7WZUTV2TfLkaYgqs,6741
|
|
2
2
|
metadata_crawler/__main__.py,sha256=4m56VOh7bb5xmZqb09fFbquke8g6KZfMbb3CUdBA60M,163
|
|
3
|
-
metadata_crawler/_version.py,sha256=
|
|
3
|
+
metadata_crawler/_version.py,sha256=lJ4gM5yptFnF64LPHdDVhj6Mppmsw36i5KAr1dVXO5Y,25
|
|
4
4
|
metadata_crawler/cli.py,sha256=meY5ZfR5VEW5ZorOPWO_b4MyIIQy0wTTPs9OkJ1WnfA,17180
|
|
5
5
|
metadata_crawler/data_collector.py,sha256=9CVr4arKJspyLNLuF2MfkmY_r8x74Mw8hAaDSMouQUA,8372
|
|
6
6
|
metadata_crawler/logger.py,sha256=5Lc0KdzH2HdWkidW-MASW8Pfy7vTMnzPv1-e2V3Any0,4407
|
|
@@ -27,8 +27,8 @@ metadata_crawler/backends/swift.py,sha256=az3ctF_npadjzAybX65CQbDLGoxRnk0ZR7vByo
|
|
|
27
27
|
metadata_crawler/ingester/__init__.py,sha256=Y-c9VkQWMHDLb9WagwITCaEODlYa4p8xW-BkzzSRZXw,55
|
|
28
28
|
metadata_crawler/ingester/mongo.py,sha256=lpWIZ8mo6S8oY887uz2l6Y9pir0sUVEkfgOdDxrjIMM,6142
|
|
29
29
|
metadata_crawler/ingester/solr.py,sha256=EoKS3kFeDTLf9zP22s2DhQGP81T6rTXVWDNT2wWKFkk,5242
|
|
30
|
-
metadata_crawler-2509.0.
|
|
31
|
-
metadata_crawler-2509.0.
|
|
32
|
-
metadata_crawler-2509.0.
|
|
33
|
-
metadata_crawler-2509.0.
|
|
34
|
-
metadata_crawler-2509.0.
|
|
30
|
+
metadata_crawler-2509.0.1.dist-info/entry_points.txt,sha256=4LzS7pbqwUPTD6C-iW42vuhXdtsOJmKXqFZpdpaKwF8,428
|
|
31
|
+
metadata_crawler-2509.0.1.dist-info/licenses/LICENSE,sha256=GAUualebvSlegSVqb86FUqHrHM8WyM145__Nm2r_dfA,1496
|
|
32
|
+
metadata_crawler-2509.0.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
33
|
+
metadata_crawler-2509.0.1.dist-info/METADATA,sha256=dT5Kd5_sBAccA_Qj9O64zksuF7u2iaj-DXqqEDtUDqc,12864
|
|
34
|
+
metadata_crawler-2509.0.1.dist-info/RECORD,,
|
|
File without changes
|
{metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{metadata_crawler-2509.0.0.dist-info → metadata_crawler-2509.0.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|