metadata-crawler 2510.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metadata-crawler might be problematic. Click here for more details.

Files changed (35) hide show
  1. metadata_crawler/__init__.py +263 -0
  2. metadata_crawler/__main__.py +8 -0
  3. metadata_crawler/_version.py +1 -0
  4. metadata_crawler/api/__init__.py +1 -0
  5. metadata_crawler/api/cli.py +57 -0
  6. metadata_crawler/api/config.py +831 -0
  7. metadata_crawler/api/drs_config.toml +440 -0
  8. metadata_crawler/api/index.py +151 -0
  9. metadata_crawler/api/metadata_stores.py +755 -0
  10. metadata_crawler/api/mixin/__init__.py +7 -0
  11. metadata_crawler/api/mixin/lookup_mixin.py +112 -0
  12. metadata_crawler/api/mixin/lookup_tables.py +10010 -0
  13. metadata_crawler/api/mixin/path_mixin.py +46 -0
  14. metadata_crawler/api/mixin/template_mixin.py +145 -0
  15. metadata_crawler/api/storage_backend.py +277 -0
  16. metadata_crawler/backends/__init__.py +1 -0
  17. metadata_crawler/backends/intake.py +211 -0
  18. metadata_crawler/backends/posix.py +121 -0
  19. metadata_crawler/backends/s3.py +140 -0
  20. metadata_crawler/backends/swift.py +305 -0
  21. metadata_crawler/cli.py +547 -0
  22. metadata_crawler/data_collector.py +278 -0
  23. metadata_crawler/ingester/__init__.py +1 -0
  24. metadata_crawler/ingester/mongo.py +206 -0
  25. metadata_crawler/ingester/solr.py +282 -0
  26. metadata_crawler/logger.py +153 -0
  27. metadata_crawler/py.typed +0 -0
  28. metadata_crawler/run.py +419 -0
  29. metadata_crawler/utils/__init__.py +482 -0
  30. metadata_crawler/utils/cftime_utils.py +207 -0
  31. metadata_crawler-2510.1.0.dist-info/METADATA +401 -0
  32. metadata_crawler-2510.1.0.dist-info/RECORD +35 -0
  33. metadata_crawler-2510.1.0.dist-info/WHEEL +4 -0
  34. metadata_crawler-2510.1.0.dist-info/entry_points.txt +14 -0
  35. metadata_crawler-2510.1.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,401 @@
1
+ Metadata-Version: 2.4
2
+ Name: metadata-crawler
3
+ Version: 2510.1.0
4
+ Summary: Crawl, extract and push climate metadata for indexing.
5
+ Author-email: "DKRZ, Clint" <freva@dkrz.de>
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ License-File: LICENSE
20
+ Requires-Dist: aiohttp
21
+ Requires-Dist: appdirs
22
+ Requires-Dist: anyio
23
+ Requires-Dist: ciso8601
24
+ Requires-Dist: fsspec
25
+ Requires-Dist: diskcache
26
+ Requires-Dist: s3fs
27
+ Requires-Dist: jinja2
28
+ Requires-Dist: intake
29
+ Requires-Dist: intake-xarray
30
+ Requires-Dist: intake-esm
31
+ Requires-Dist: pandas
32
+ Requires-Dist: python-dateutil
33
+ Requires-Dist: numpy
34
+ Requires-Dist: orjson
35
+ Requires-Dist: pyarrow
36
+ Requires-Dist: h5netcdf
37
+ Requires-Dist: pydantic<2.12
38
+ Requires-Dist: pyarrow
39
+ Requires-Dist: rich
40
+ Requires-Dist: rich-argparse
41
+ Requires-Dist: tomli
42
+ Requires-Dist: tomlkit
43
+ Requires-Dist: typing_extensions
44
+ Requires-Dist: zarr
45
+ Requires-Dist: xarray
46
+ Requires-Dist: httpx
47
+ Requires-Dist: uvloop
48
+ Requires-Dist: motor
49
+ Requires-Dist: flit ; extra == "dev"
50
+ Requires-Dist: tox ; extra == "dev"
51
+ Requires-Dist: codespell ; extra == "doc"
52
+ Requires-Dist: blacken-docs ; extra == "doc"
53
+ Requires-Dist: numpydoc ; extra == "doc"
54
+ Requires-Dist: sphinx ; extra == "doc"
55
+ Requires-Dist: sphinxcontrib_github_alt ; extra == "doc"
56
+ Requires-Dist: sphinx-execute-code-python3 ; extra == "doc"
57
+ Requires-Dist: sphinx-copybutton ; extra == "doc"
58
+ Requires-Dist: sphinx-sitemap ; extra == "doc"
59
+ Requires-Dist: sphinx-togglebutton ; extra == "doc"
60
+ Requires-Dist: sphinxext-opengraph[social-cards] ; extra == "doc"
61
+ Requires-Dist: pydata-sphinx-theme ; extra == "doc"
62
+ Requires-Dist: myst-parser ; extra == "doc"
63
+ Requires-Dist: mkdocs ; extra == "mkdoc"
64
+ Requires-Dist: mkdocs-material ; extra == "mkdoc"
65
+ Requires-Dist: mkdocstrings[python] ; extra == "mkdoc"
66
+ Requires-Dist: mkdocs-macros-plugin ; extra == "mkdoc"
67
+ Requires-Dist: mkdocs-minify-plugin ; extra == "mkdoc"
68
+ Requires-Dist: mkdocs-redirects ; extra == "mkdoc"
69
+ Requires-Dist: mike ; extra == "mkdoc"
70
+ Requires-Dist: codespell ; extra == "tests"
71
+ Requires-Dist: pydocstyle ; extra == "tests"
72
+ Requires-Dist: types-appdirs ; extra == "tests"
73
+ Requires-Dist: black ; extra == "tests"
74
+ Requires-Dist: isort ; extra == "tests"
75
+ Requires-Dist: mock ; extra == "tests"
76
+ Requires-Dist: mypy ; extra == "tests"
77
+ Requires-Dist: netcdf4 ; extra == "tests"
78
+ Requires-Dist: pandas ; extra == "tests"
79
+ Requires-Dist: intake-parquet ; extra == "tests"
80
+ Requires-Dist: pytest-asyncio ; extra == "tests"
81
+ Requires-Dist: pytest-cov ; extra == "tests"
82
+ Requires-Dist: pytest-env ; extra == "tests"
83
+ Requires-Dist: requests ; extra == "tests"
84
+ Requires-Dist: pre-commit ; extra == "tests"
85
+ Requires-Dist: toml ; extra == "tests"
86
+ Project-URL: Documentation, https://metadata-crawler.readthedocs.io
87
+ Project-URL: Home, https://github.com/freva-org/metadata-crawler
88
+ Project-URL: Issues, https://github.com/freva-org/metadata-crawler/issues
89
+ Project-URL: Source, https://github.com/freva-org/metadata-crawler
90
+ Provides-Extra: dev
91
+ Provides-Extra: doc
92
+ Provides-Extra: mkdoc
93
+ Provides-Extra: tests
94
+
95
+ # metadata-crawler
96
+
97
+ [![License](https://img.shields.io/badge/License-BSD-purple.svg)](LICENSE)
98
+ [![PyPI](https://img.shields.io/pypi/pyversions/metadata-crawler.svg)](https://pypi.org/project/metadata-crawler/)
99
+ [![Conda Version](https://img.shields.io/conda/vn/conda-forge/metadata-crawler.svg)](https://anaconda.org/conda-forge/metadata-crawler)
100
+ [![Docs](https://readthedocs.org/projects/metadata-crawler/badge/?version=latest)](https://metadata-crawler.readthedocs.io/en/latest/?badge=latest)
101
+ [![Tests](https://github.com/freva-org/metadata-crawler/actions/workflows/ci_job.yml/badge.svg)](https://github.com/freva-org/metadata-crawler/actions)
102
+ [![Test-Coverage](https://codecov.io/gh/freva-org/metadata-crawler/graph/badge.svg?token=W2YziDnh2N)](https://codecov.io/gh/freva-org/metadata-crawler)
103
+
104
+
105
+ Harvest, normalise, and index climate / earth-system metadata from **POSIX**,
106
+ **S3/MinIO**, and **OpenStack Swift** using configurable **DRS dialects**
107
+ (CMIP6, CMIP5, CORDEX, …). Output to a temporary **catalogue** (JSONLines)
108
+ and then **index** into systems such as **Solr** or **MongoDB**.
109
+ Configuration is **TOML** with inheritance, templating, and computed rules.
110
+
111
+ ## TL;DR
112
+
113
+ - Define datasets + dialects in ``drs_config.toml``
114
+ - ``mdc add`` → write a temporary catalogue (``jsonl.gz``)
115
+ - ``mdc config`` → inspect a the (merged) crawler config.
116
+ - ``mdc walk-intake`` → inspect the content of an intake catalogue.
117
+ - ``mdc <backend> index`` → push records from catalogue into your index backend
118
+ - ``mdc <backend> delete`` → remove records by facet match
119
+
120
+ ## Features
121
+
122
+ - **Multi-backend discovery**: POSIX, S3/MinIO, Swift (async REST), Intake
123
+ - **Two-stage pipeline**: *crawl → catalogue* then *catalogue → index*
124
+ - **Schema driven**: strong types (e.g. ``string``, ``datetime[2]``,
125
+ ``float[4]``, ``string[]``)
126
+ - **DRS dialects**: packaged CMIP6/CMIP5/CORDEX; build your own via inheritance
127
+ - **Path specs & data specs**: parse directory/filename parts and/or read
128
+ dataset attributes/vars
129
+ - **Special rules**: conditionals, cache lookups and function calls (e.g. CMIP6 realm,
130
+ time aggregation)
131
+ - **Index backends**: MongoDB (Motor), Solr
132
+ - **Sync + Async APIs** and a clean CLI
133
+ - **Docs**: Sphinx with ``pydata_sphinx_theme``
134
+
135
+ ## Install
136
+
137
+ ```console
138
+
139
+ pip install metadata-crawler
140
+ conda install -c conda-forge metadata-crawler
141
+ ```
142
+
143
+ ## Quickstart (CLI)
144
+
145
+ ```console
146
+
147
+ # 1) Crawl → write catalogue
148
+ mdc add \
149
+ cat.yaml \
150
+ --config-file drs_config.toml \
151
+ --dataset cmip6-fs,obs-fs \
152
+ --threads 4 --batch-size 100
153
+
154
+ # 2) Index from catalogue → Solr (or Mongo)
155
+ mdc solr index \
156
+ cat.yaml \
157
+ --server localhot:8983
158
+
159
+ # 3) Delete by facets (supports globs on values)
160
+ mdc delete \
161
+ --server localhost:8983 \
162
+ --facets "file *.nc" --facets "project CMIP6"
163
+ ```
164
+
165
+ > [!NOTE]
166
+ > The CLI is a **custom framework** inspired by Typer (not Typer itself).
167
+ > Use ``--help`` on any subcommand to see all options.
168
+
169
+ ## Minimal config (``drs_config.toml``)
170
+
171
+ ```toml
172
+
173
+ # === Canonical schema ===
174
+ [drs_settings.schema.file]
175
+ key = "file"
176
+ type = "path"
177
+ required = true
178
+ indexed = true
179
+ unique = true
180
+
181
+ [drs_settings.schema.uri]
182
+ key = "uri"
183
+ type = "uri"
184
+ required = true
185
+ indexed = true
186
+
187
+ [drs_settings.schema.variable]
188
+ key = "variable"
189
+ type = "string[]"
190
+ multi_valued = true
191
+ indexed = true
192
+
193
+ [drs_settings.schema.time]
194
+ key = "time"
195
+ type = "datetime[2]" # [start, end]
196
+ indexed = true
197
+ default = []
198
+
199
+ [drs_settings.schema.bbox]
200
+ key = "bbox"
201
+ type = "float[4]" # [W,E,S,N]
202
+ default = [0, 360, -90, 90]
203
+
204
+ # === Dialect: CMIP6 (example) ===
205
+ [drs_settings.dialect.cmip6]
206
+ sources = ["path","data"] # path | data | storage
207
+ defaults.grid_label = "gn"
208
+ specs_dir = ["mip_era","activity_id","institution_id","source_id","experiment_id","member_id","table_id","variable_id","grid_label","version"]
209
+ specs_file = ["variable_id","table_id","source_id","experiment_id","member_id","grid_label","time"]
210
+
211
+ [drs_settings.dialect.cmip6.special.realm]
212
+ type = "method"
213
+ method = "_get_realm"
214
+ args = ["table_id","variable_id","__file_name__"]
215
+
216
+ [drs_settings.dialect.cmip6.special.time_aggregation]
217
+ type = "method"
218
+ method = "_get_aggregation"
219
+ args = ["table_id","variable_id","__file_name__"]
220
+
221
+ # === Dialect: CORDEX (bbox by domain) ===
222
+ [drs_settings.dialect.cordex]
223
+ sources = ["path","data"]
224
+ specs_dir = ["project","product","domain","institution","driving_model","experiment","ensemble","rcm_name","rcm_version","time_frequency","variable","version"]
225
+ specs_file= ["variable","domain","driving_model","experiment","ensemble","rcm_name","rcm_version","time_frequency","time"]
226
+
227
+ [drs_settings.dialect.cordex.special.bbox]
228
+ type = "call"
229
+ method = "dialect['cordex']['domains'].get('{{domain | upper }}', [0,360,-90,90])"
230
+
231
+ [drs_settings.dialect.cordex.domains]
232
+ EUR-11 = [-44.14, 64.40, 22.20, 72.42]
233
+ AFR-44 = [-24.64, 60.28, -45.76, 42.24]
234
+
235
+ # === Datasets ===
236
+ [cmip6-fs]
237
+ root_path = "/data/model/global/cmip6"
238
+ drs_format = "cmip6" # dialect name
239
+ fs_type = "posix"
240
+
241
+ [cmip6-s3]
242
+ root_path = "s3://test-bucket/data/model/global/cmip6"
243
+ drs_format = "cmip6"
244
+ fs_type = "s3"
245
+ storage_options.endpoint_url = "http://127.0.0.1:9000"
246
+ storage_options.aws_access_key_id = "minioadmin"
247
+ storage_options.aws_secret_access_key = "minioadmin"
248
+ storage_options.region_name = "us-east-1"
249
+ storage_options.url_style = "path"
250
+ storage_options.use_ssl = false
251
+
252
+ [obs-fs]
253
+ root_path = "/arch/observations"
254
+ drs_format = "custom"
255
+ # define your specs_dir/specs_file or inherit from another dialect
256
+ ```
257
+
258
+ ## Concepts
259
+
260
+ ### Schema (facet definitions)
261
+
262
+ Each canonical facet describes:
263
+
264
+ - ``key``: where to read value (``"project"``, ``"variable"``,)
265
+ - ``type``: ``string``, ``integer``, ``float``, ``datetime``, with arrays like
266
+ ``float[4]``, ``string[]``, ``datetime[2]``, or special types like ``file``,
267
+ ``uri``, ``fs_type``, ``dataset``, ``fmt``
268
+ - ``required``, ``default``, ``indexed``, ``unique``, ``multi_valued``
269
+
270
+ ### Dialects
271
+
272
+ A dialect tells the crawler how to **interpret paths** and **read data**:
273
+
274
+ - ``sources``: which sources to consult (``path``, ``data``, ``storage``) in priority
275
+ - ``specs_dir`` / ``specs_file``: ordered facet names encoded in directory and file names
276
+ - ``data_specs``: pull values from dataset content (attrs/variables); supports
277
+ ``__variable__`` and templated specs
278
+ - ``special``: computed fields (``conditional`` | ``method`` | ``function``)
279
+ - Optional lookups (e.g., CORDEX ``domains`` for bbox)
280
+
281
+ ### Path specs vs data specs
282
+
283
+ - **Path specs** parse segments from the path, e.g.:
284
+ ``/project/product/institute/model/experiment/.../variable_time.nc``
285
+ - **Data specs** read from the dataset itself (e.g., xarray/global attribute, variable
286
+ attributes, per-var stats). Example: gather all variables ``__variable__``, then
287
+ their units with a templated selector.
288
+
289
+ ### Inheritance
290
+
291
+ Create new dialects/datasets by inheriting:
292
+
293
+ ```toml
294
+
295
+ [drs_settings.dialect.reana]
296
+ inherits_from = "cmip5"
297
+ sources = ["path","data"]
298
+ [drs_settings.dialect.reana.data_specs.read_kws]
299
+ engine = "h5netcdf"
300
+ ```
301
+
302
+ ## Python API
303
+
304
+ ### Async
305
+
306
+ ```python
307
+
308
+ import asyncio
309
+ from metadata_crawler.run import async_add, async_index, async_delete
310
+
311
+ async def main():
312
+ # crawl → catalogue
313
+ await async_add(
314
+ "cat.yaml",
315
+ config_file="drs_config.toml",
316
+ dataset_names=["cmip6-fs"],
317
+ threads=4,
318
+ batch_size=100,
319
+ )
320
+ # index → backend
321
+ await async_index(
322
+ "solr",
323
+ "cat.yaml",
324
+ config_file="drs_config.toml",
325
+ server="localhost:8983",
326
+ )
327
+ # delete by facets
328
+ await async_delete(
329
+ config_path="drs_config.toml",
330
+ index_store="solr",
331
+ facets=[("file", "*.nc")],
332
+ )
333
+
334
+ asyncio.run(main())
335
+ ```
336
+
337
+
338
+ ### Sync (simple wrapper)
339
+
340
+ ```python
341
+
342
+ import asyncio
343
+ from metadata_crawler import add
344
+
345
+ add(
346
+ store="cat.yaml",
347
+ config_file="drs_config.toml",
348
+ dataset_names=["cmip6-fs"],
349
+ )
350
+ ```
351
+
352
+ ## Index backends
353
+
354
+ - **MongoDB** (Motor): upserts by unique facet (e.g., ``file``), bulk deletes (glob → regex)
355
+ - **Solr**: fields align with managed schema; supports multi-valued facets
356
+
357
+
358
+ ## Contributing
359
+
360
+ Development install:
361
+
362
+ ```console
363
+
364
+ git clone https://github.com/freva-org/metadata-crawler.git
365
+ cd metadata-crawler
366
+ pip install -e .
367
+
368
+ ```
369
+
370
+ PRs and issues welcome. Please add tests and keep examples minimal & reproducible
371
+ (use the MinIO compose stack). Run:
372
+
373
+
374
+ ```console
375
+ python -m pip install tox
376
+ tox -e test lint types
377
+ ```
378
+
379
+ ### Benchmarks
380
+ For benchmarking you can create a directory tree with roughly 1.5 M files by
381
+ calling the ``create-cordex.sh`` script in the ``dev-env`` folder:
382
+
383
+ ```console
384
+ ./dev-env/create-cordex.sh
385
+ python dev-env/benchmark.py --max-files 20000
386
+ ```
387
+
388
+
389
+ See ``code-of-conduct.rst`` and ``whatsnew.rst`` for guidelines and changelog.
390
+
391
+ Use MinIO or LocalStack via ``docker-compose`` and seed a bucket (e.g., ``test-bucket``).
392
+ Then point a dataset’s ``fs_type = "s3"`` and set ``storage_options``.
393
+
394
+ ### Documentation
395
+
396
+ Built with Sphinx + ``pydata_sphinx_theme``. Build locally:
397
+
398
+ ```console
399
+ tox -e docs
400
+ ```
401
+
@@ -0,0 +1,35 @@
1
+ metadata_crawler/__init__.py,sha256=dT4ZOngmtO-7fiWqdo80JmeRacG09fy1T8C0bZpFR6Q,7167
2
+ metadata_crawler/__main__.py,sha256=4m56VOh7bb5xmZqb09fFbquke8g6KZfMbb3CUdBA60M,163
3
+ metadata_crawler/_version.py,sha256=_KJS3jBkMy--QJjEfRuFFutGcU0bPMFJLUgB2EZNym4,25
4
+ metadata_crawler/cli.py,sha256=qi77QXtuwO1N3MvLbacdaOZwzpT22FJMpnnp1k6yj-Y,17347
5
+ metadata_crawler/data_collector.py,sha256=7N0zQcxjsqITUVr0JnkFu_beMzrTW-paaw69ESC9rkQ,9063
6
+ metadata_crawler/logger.py,sha256=wNImwUVw0ycvIYrxzthWAgOCujJZhVDCSiCH5KKX5EA,4743
7
+ metadata_crawler/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ metadata_crawler/run.py,sha256=_6mx29Co1HwfPNFWtzTR65CNlopqubj-McmavRM7i80,12869
9
+ metadata_crawler/api/__init__.py,sha256=UUF0_FKgfqgcXYmknxB0Wt1jaLNaf-w_q0tWVJhgV0M,28
10
+ metadata_crawler/api/cli.py,sha256=pgj3iB_Irt74VbG3ZKStLRHKYY_I4bZpbOW1famKDnQ,1498
11
+ metadata_crawler/api/config.py,sha256=8C-qJC8lPmms61vHgmv3Dijppjjrtfnu35fD_SOjk68,29225
12
+ metadata_crawler/api/drs_config.toml,sha256=c3Gc8MGH22xlDOLH_y2TXiiEydmhjzvish-fQi5aGRA,10622
13
+ metadata_crawler/api/index.py,sha256=0yqtXYOyWJJKKkCkIJbUUVG1w2Wt_icYJjXJPZZjSvU,4715
14
+ metadata_crawler/api/metadata_stores.py,sha256=UekPl16KlaF7xiD4X7KVo3EMWz9KE-MT7gKxvgZyvXU,24016
15
+ metadata_crawler/api/storage_backend.py,sha256=jdZZ_3SZcP3gJgw_NmPPdpDEx4D7qfLJDABfupTH9p0,7803
16
+ metadata_crawler/api/mixin/__init__.py,sha256=4Y0T1eM4vLlgFazuC1q2briqx67LyfeCpY_pCICTnjk,197
17
+ metadata_crawler/api/mixin/lookup_mixin.py,sha256=WxJ-ZNs8DcIXS9ThSoIZiepD07jfmLlzyTp65-Z1fLc,3558
18
+ metadata_crawler/api/mixin/lookup_tables.py,sha256=za63xfZB0EvAm66uTTYo52zC0z7Y6VL8DUrP6CJ-DnQ,308683
19
+ metadata_crawler/api/mixin/path_mixin.py,sha256=WKpesEjlwVSJ-VdoYYLEY5oBSAQTsvuv1B38ragAVIM,1247
20
+ metadata_crawler/api/mixin/template_mixin.py,sha256=hxQXiP_JND3fuxBNcs1pZ7cvP-k-lTm5MQg40t0kF54,5105
21
+ metadata_crawler/backends/__init__.py,sha256=yrk1L00ubQlMj3yXI73PPbhAahDKp792PJB-xcXUJIM,35
22
+ metadata_crawler/backends/intake.py,sha256=TkvzBU8Rk49L0Y8e7Exz2nE3iLSWrBAwZnpEJtdlNR8,6595
23
+ metadata_crawler/backends/posix.py,sha256=6sjAoCQHiOOjp_Hvwxn247wHBnoAJYUGequqphyZWaA,3409
24
+ metadata_crawler/backends/s3.py,sha256=2ki-O_rRIb5dJVS9KyMmDDPczGOQTBUa-hmImllqeeE,4602
25
+ metadata_crawler/backends/swift.py,sha256=az3ctF_npadjzAybX65CQbDLGoxRnk0ZR7vByo6lQOM,10954
26
+ metadata_crawler/ingester/__init__.py,sha256=Y-c9VkQWMHDLb9WagwITCaEODlYa4p8xW-BkzzSRZXw,55
27
+ metadata_crawler/ingester/mongo.py,sha256=Ntt3zKVtAX6wDB5aQYCoYrkVWrnvJU2oJJyfYGW30lU,6546
28
+ metadata_crawler/ingester/solr.py,sha256=WrdyOdwMiutmOE1lP_3rOx7h99gbvDjkxU1FMG9zmbs,9560
29
+ metadata_crawler/utils/__init__.py,sha256=VSIoAtorPSiGkkwjnEcO6gKZJzXlOewDzSNUMsGAoo0,14125
30
+ metadata_crawler/utils/cftime_utils.py,sha256=gd64D3kEKOtGmQ7wHnnSJc7Emnw2_LflV52bCZlhTwU,5586
31
+ metadata_crawler-2510.1.0.dist-info/entry_points.txt,sha256=4LzS7pbqwUPTD6C-iW42vuhXdtsOJmKXqFZpdpaKwF8,428
32
+ metadata_crawler-2510.1.0.dist-info/licenses/LICENSE,sha256=GAUualebvSlegSVqb86FUqHrHM8WyM145__Nm2r_dfA,1496
33
+ metadata_crawler-2510.1.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
34
+ metadata_crawler-2510.1.0.dist-info/METADATA,sha256=hmv6Gvv9c-rmGSy59SHJnK2Nn9fq4sNQwlXF35-GnC4,13006
35
+ metadata_crawler-2510.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.12.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,14 @@
1
+ [console_scripts]
2
+ mdc=metadata_crawler.cli:cli
3
+ metadata-crawler=metadata_crawler.cli:cli
4
+
5
+ [metadata_crawler.ingester]
6
+ mongo=metadata_crawler.ingester.mongo:MongoIndex
7
+ solr=metadata_crawler.ingester.solr:SolrIndex
8
+
9
+ [metadata_crawler.storage]
10
+ intake=metadata_crawler.backends.intake:IntakePath
11
+ posix=metadata_crawler.backends.posix:PosixPath
12
+ s3=metadata_crawler.backends.s3:S3Path
13
+ swift=metadata_crawler.backends.swift:SwiftPath
14
+
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, freva-org
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.