metadata-crawler 2509.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metadata-crawler might be problematic. Click here for more details.

Files changed (34) hide show
  1. metadata_crawler/__init__.py +248 -0
  2. metadata_crawler/__main__.py +8 -0
  3. metadata_crawler/_version.py +1 -0
  4. metadata_crawler/api/__init__.py +1 -0
  5. metadata_crawler/api/cli.py +57 -0
  6. metadata_crawler/api/config.py +801 -0
  7. metadata_crawler/api/drs_config.toml +439 -0
  8. metadata_crawler/api/index.py +132 -0
  9. metadata_crawler/api/metadata_stores.py +749 -0
  10. metadata_crawler/api/mixin/__init__.py +7 -0
  11. metadata_crawler/api/mixin/lookup_mixin.py +112 -0
  12. metadata_crawler/api/mixin/lookup_tables.py +10010 -0
  13. metadata_crawler/api/mixin/path_mixin.py +46 -0
  14. metadata_crawler/api/mixin/template_mixin.py +145 -0
  15. metadata_crawler/api/storage_backend.py +277 -0
  16. metadata_crawler/backends/__init__.py +1 -0
  17. metadata_crawler/backends/intake.py +211 -0
  18. metadata_crawler/backends/posix.py +121 -0
  19. metadata_crawler/backends/s3.py +136 -0
  20. metadata_crawler/backends/swift.py +305 -0
  21. metadata_crawler/cli.py +539 -0
  22. metadata_crawler/data_collector.py +258 -0
  23. metadata_crawler/ingester/__init__.py +1 -0
  24. metadata_crawler/ingester/mongo.py +193 -0
  25. metadata_crawler/ingester/solr.py +152 -0
  26. metadata_crawler/logger.py +142 -0
  27. metadata_crawler/py.typed +0 -0
  28. metadata_crawler/run.py +373 -0
  29. metadata_crawler/utils.py +411 -0
  30. metadata_crawler-2509.0.0.dist-info/METADATA +399 -0
  31. metadata_crawler-2509.0.0.dist-info/RECORD +34 -0
  32. metadata_crawler-2509.0.0.dist-info/WHEEL +4 -0
  33. metadata_crawler-2509.0.0.dist-info/entry_points.txt +14 -0
  34. metadata_crawler-2509.0.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,399 @@
1
+ Metadata-Version: 2.4
2
+ Name: metadata-crawler
3
+ Version: 2509.0.0
4
+ Summary: Crawl, extract and push climate metadata for indexing.
5
+ Author-email: "DKRZ, Clint" <freva@dkrz.de>
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ License-File: LICENSE
20
+ Requires-Dist: aiohttp
21
+ Requires-Dist: appdirs
22
+ Requires-Dist: anyio
23
+ Requires-Dist: ciso8601
24
+ Requires-Dist: fsspec
25
+ Requires-Dist: diskcache
26
+ Requires-Dist: s3fs
27
+ Requires-Dist: jinja2
28
+ Requires-Dist: intake
29
+ Requires-Dist: intake-xarray
30
+ Requires-Dist: intake-esm
31
+ Requires-Dist: pandas
32
+ Requires-Dist: python-dateutil
33
+ Requires-Dist: numpy
34
+ Requires-Dist: orjson
35
+ Requires-Dist: pyarrow
36
+ Requires-Dist: h5netcdf
37
+ Requires-Dist: pydantic
38
+ Requires-Dist: pyarrow
39
+ Requires-Dist: rich
40
+ Requires-Dist: rich-argparse
41
+ Requires-Dist: tomli
42
+ Requires-Dist: tomlkit
43
+ Requires-Dist: typing_extensions
44
+ Requires-Dist: zarr
45
+ Requires-Dist: xarray
46
+ Requires-Dist: httpx
47
+ Requires-Dist: uvloop
48
+ Requires-Dist: motor
49
+ Requires-Dist: flit ; extra == "dev"
50
+ Requires-Dist: tox ; extra == "dev"
51
+ Requires-Dist: codespell ; extra == "doc"
52
+ Requires-Dist: blacken-docs ; extra == "doc"
53
+ Requires-Dist: numpydoc ; extra == "doc"
54
+ Requires-Dist: sphinx ; extra == "doc"
55
+ Requires-Dist: sphinxcontrib_github_alt ; extra == "doc"
56
+ Requires-Dist: sphinx-execute-code-python3 ; extra == "doc"
57
+ Requires-Dist: sphinx-copybutton ; extra == "doc"
58
+ Requires-Dist: sphinx-sitemap ; extra == "doc"
59
+ Requires-Dist: sphinx-togglebutton ; extra == "doc"
60
+ Requires-Dist: sphinxext-opengraph[social-cards] ; extra == "doc"
61
+ Requires-Dist: pydata-sphinx-theme ; extra == "doc"
62
+ Requires-Dist: myst-parser ; extra == "doc"
63
+ Requires-Dist: mkdocs ; extra == "mkdoc"
64
+ Requires-Dist: mkdocs-material ; extra == "mkdoc"
65
+ Requires-Dist: mkdocstrings[python] ; extra == "mkdoc"
66
+ Requires-Dist: mkdocs-macros-plugin ; extra == "mkdoc"
67
+ Requires-Dist: mkdocs-minify-plugin ; extra == "mkdoc"
68
+ Requires-Dist: mkdocs-redirects ; extra == "mkdoc"
69
+ Requires-Dist: mike ; extra == "mkdoc"
70
+ Requires-Dist: codespell ; extra == "tests"
71
+ Requires-Dist: pydocstyle ; extra == "tests"
72
+ Requires-Dist: types-appdirs ; extra == "tests"
73
+ Requires-Dist: black ; extra == "tests"
74
+ Requires-Dist: isort ; extra == "tests"
75
+ Requires-Dist: mock ; extra == "tests"
76
+ Requires-Dist: mypy ; extra == "tests"
77
+ Requires-Dist: netcdf4 ; extra == "tests"
78
+ Requires-Dist: pandas ; extra == "tests"
79
+ Requires-Dist: intake-parquet ; extra == "tests"
80
+ Requires-Dist: pytest-asyncio ; extra == "tests"
81
+ Requires-Dist: pytest-cov ; extra == "tests"
82
+ Requires-Dist: pytest-env ; extra == "tests"
83
+ Requires-Dist: requests ; extra == "tests"
84
+ Requires-Dist: pre-commit ; extra == "tests"
85
+ Requires-Dist: toml ; extra == "tests"
86
+ Project-URL: Documentation, https://github.com/freva-org/freva-admin
87
+ Project-URL: Home, https://github.com/freva-org/freva-admin
88
+ Project-URL: Issues, https://github.com/freva-org/freva-admin/issues
89
+ Project-URL: Source, https://github.com/freva-org/freva-admin
90
+ Provides-Extra: dev
91
+ Provides-Extra: doc
92
+ Provides-Extra: mkdoc
93
+ Provides-Extra: tests
94
+
95
+ # metadata-crawler
96
+
97
+ [![License](https://img.shields.io/badge/License-BSD-purple.svg)](LICENSE)
98
+ [![PyPI](https://img.shields.io/pypi/pyversions/freva-client.svg)](https://pypi.org/project/metadata-crawler/)
99
+ [![Docs](https://readthedocs.org/projects/metadata-crawler/badge/?version=latest)](https://metadata-crawler.readthedocs.io/en/latest/?badge=latest)
100
+ [![Tests](https://github.com/freva-org/metadata-crawler/actions/workflows/ci_job.yml/badge.svg)](https://github.com/freva-org/metadata-crawler/actions)
101
+ [![Test-Coverage](https://codecov.io/gh/freva-org/metadata-crawler/graph/badge.svg?token=W2YziDnh2N)](https://codecov.io/gh/freva-org/metadata-crawler)
102
+
103
+ Harvest, normalise, and index climate / earth-system metadata from **POSIX**,
104
+ **S3/MinIO**, and **OpenStack Swift** using configurable **DRS dialects**
105
+ (CMIP6, CMIP5, CORDEX, …). Output to a temporary **catalogue** (DuckDB or
106
+ JSONLines) and then **index** into systems such as **Solr** or **MongoDB**.
107
+ Configuration is **TOML** with inheritance, templating, and computed rules.
108
+
109
+ > [!TIP]
110
+ > **TL;DR**
111
+ > - Define datasets + dialects in ``drs_config.toml``
112
+ > - ``mdc crawl`` → write a temporary catalogue (``jsonl.gz`` or **DuckDB**)
113
+ > - ``mdc config`` → inspect a the (merged) crawler config.
114
+ > - ``mdc walk-intake`` → inspect the content of an intake catalogue.
115
+ > - ``mdc <backend> index`` → push records from catalogue into your index backend
116
+ > - ``mdc <backend> delete`` → remove records by facet match
117
+
118
+ ## Features
119
+
120
+ - **Multi-backend discovery**: POSIX, S3/MinIO, Swift (async REST), Intake
121
+ - **Two-stage pipeline**: *crawl → catalogue* then *catalogue → index*
122
+ - **Schema driven**: strong types (e.g. ``string``, ``datetime[2]``,
123
+ ``float[4]``, ``string[]``)
124
+ - **DRS dialects**: packaged CMIP6/CMIP5/CORDEX; build your own via inheritance
125
+ - **Path specs & data specs**: parse directory/filename parts and/or read
126
+ dataset attributes/vars
127
+ - **Special rules**: conditionals, cache lookups and function calls (e.g. CMIP6 realm,
128
+ time aggregation)
129
+ - **Index backends**: DuckDB (local/S3), MongoDB (Motor), Solr
130
+ - **Sync + Async APIs** and a clean CLI
131
+ - **Docs**: Sphinx with ``pydata_sphinx_theme``
132
+
133
+ ## Install
134
+
135
+ ```console
136
+
137
+ pip install metadata-crawler
138
+ conda install -c conda-forge metadata-crawler
139
+ ```
140
+
141
+ ## Quickstart (CLI)
142
+
143
+ ```console
144
+
145
+ # 1) Crawl → write catalogue
146
+ mdc crawl \
147
+ cat.yaml \
148
+ --config-file drs_config.toml \
149
+ --dataset cmip6-fs,obs-fs \
150
+ --threads 4 --batch-size 100
151
+
152
+ # 2) Index from catalogue → Solr (or Mongo/DuckDB)
153
+ mdc soler index \
154
+ cat.yaml \
155
+ --server localhot:8983
156
+
157
+ # 3) Delete by facets (supports globs on values)
158
+ mdc delete \
159
+ --server localhost:8983 \
160
+ --facets "file *.nc" --facets "project CMIP6"
161
+ ```
162
+
163
+ > [!NOTE]
164
+ > The CLI is a **custom framework** inspired by Typer (not Typer itself).
165
+ > Use ``--help`` on any subcommand to see all options.
166
+
167
+ ## Minimal config (``drs_config.toml``)
168
+
169
+ ```toml
170
+
171
+ # === Canonical schema ===
172
+ [drs_settings.schema.file]
173
+ key = "file"
174
+ type = "path"
175
+ required = true
176
+ indexed = true
177
+ unique = true
178
+
179
+ [drs_settings.schema.uri]
180
+ key = "uri"
181
+ type = "uri"
182
+ required = true
183
+ indexed = true
184
+
185
+ [drs_settings.schema.variable]
186
+ key = "variable"
187
+ type = "string[]"
188
+ multi_valued = true
189
+ indexed = true
190
+
191
+ [drs_settings.schema.time]
192
+ key = "time"
193
+ type = "datetime[2]" # [start, end]
194
+ indexed = true
195
+ default = []
196
+
197
+ [drs_settings.schema.bbox]
198
+ key = "bbox"
199
+ type = "float[4]" # [W,E,S,N]
200
+ default = [0, 360, -90, 90]
201
+
202
+ # === Dialect: CMIP6 (example) ===
203
+ [drs_settings.dialect.cmip6]
204
+ sources = ["path","data"] # path | data | storage
205
+ defaults.grid_label = "gn"
206
+ specs_dir = ["mip_era","activity_id","institution_id","source_id","experiment_id","member_id","table_id","variable_id","grid_label","version"]
207
+ specs_file = ["variable_id","table_id","source_id","experiment_id","member_id","grid_label","time"]
208
+
209
+ [drs_settings.dialect.cmip6.special.realm]
210
+ type = "method"
211
+ method = "_get_realm"
212
+ args = ["table_id","variable_id","__file_name__"]
213
+
214
+ [drs_settings.dialect.cmip6.special.time_aggregation]
215
+ type = "method"
216
+ method = "_get_aggregation"
217
+ args = ["table_id","variable_id","__file_name__"]
218
+
219
+ # === Dialect: CORDEX (bbox by domain) ===
220
+ [drs_settings.dialect.cordex]
221
+ sources = ["path","data"]
222
+ specs_dir = ["project","product","domain","institution","driving_model","experiment","ensemble","rcm_name","rcm_version","time_frequency","variable","version"]
223
+ specs_file= ["variable","domain","driving_model","experiment","ensemble","rcm_name","rcm_version","time_frequency","time"]
224
+
225
+ [drs_settings.dialect.cordex.special.bbox]
226
+ type = "call"
227
+ method = "dialect['cordex']['domains'].get('{{domain | upper }}', [0,360,-90,90])"
228
+
229
+ [drs_settings.dialect.cordex.domains]
230
+ EUR-11 = [-44.14, 64.40, 22.20, 72.42]
231
+ AFR-44 = [-24.64, 60.28, -45.76, 42.24]
232
+
233
+ # === Datasets ===
234
+ [cmip6-fs]
235
+ root_path = "/data/model/global/cmip6"
236
+ drs_format = "cmip6" # dialect name
237
+ fs_type = "posix"
238
+
239
+ [cmip6-s3]
240
+ root_path = "s3://test-bucket/data/model/global/cmip6"
241
+ drs_format = "cmip6"
242
+ fs_type = "s3"
243
+ storage_options.endpoint_url = "http://127.0.0.1:9000"
244
+ storage_options.aws_access_key_id = "minioadmin"
245
+ storage_options.aws_secret_access_key = "minioadmin"
246
+ storage_options.region_name = "us-east-1"
247
+ storage_options.url_style = "path"
248
+ storage_options.use_ssl = false
249
+
250
+ [obs-fs]
251
+ root_path = "/arch/observations"
252
+ drs_format = "custom"
253
+ # define your specs_dir/specs_file or inherit from another dialect
254
+ ```
255
+
256
+ ## Concepts
257
+
258
+ ### Schema (facet definitions)
259
+
260
+ Each canonical facet describes:
261
+
262
+ - ``key``: where to read value (``"project"``, ``"variable"``,)
263
+ - ``type``: ``string``, ``integer``, ``float``, ``datetime``, with arrays like
264
+ ``float[4]``, ``string[]``, ``datetime[2]``, or special types like ``file``,
265
+ ``uri``, ``fs_type``, ``dataset``, ``fmt``
266
+ - ``required``, ``default``, ``indexed``, ``unique``, ``multi_valued``
267
+
268
+ ### Dialects
269
+
270
+ A dialect tells the crawler how to **interpret paths** and **read data**:
271
+
272
+ - ``sources``: which sources to consult (``path``, ``data``, ``storage``) in priority
273
+ - ``specs_dir`` / ``specs_file``: ordered facet names encoded in directory and file names
274
+ - ``data_specs``: pull values from dataset content (attrs/variables); supports
275
+ ``__variable__`` and templated specs
276
+ - ``special``: computed fields (``conditional`` | ``method`` | ``function``)
277
+ - Optional lookups (e.g., CORDEX ``domains`` for bbox)
278
+
279
+ ### Path specs vs data specs
280
+
281
+ - **Path specs** parse segments from the path, e.g.:
282
+ ``/project/product/institute/model/experiment/.../variable_time.nc``
283
+ - **Data specs** read from the dataset itself (e.g., xarray/global attribute, variable
284
+ attributes, per-var stats). Example: gather all variables ``__variable__``, then
285
+ their units with a templated selector.
286
+
287
+ ### Inheritance
288
+
289
+ Create new dialects/datasets by inheriting:
290
+
291
+ ```toml
292
+
293
+ [drs_settings.dialect.reana]
294
+ inherits_from = "cmip5"
295
+ sources = ["path","data"]
296
+ [drs_settings.dialect.reana.data_specs.read_kws]
297
+ engine = "h5netcdf"
298
+ ```
299
+
300
+ ## Python API
301
+
302
+ ### Async
303
+
304
+ ```python
305
+
306
+ import asyncio
307
+ from metadata_crawler.run import async_add, async_index, async_delete
308
+
309
+ async def main():
310
+ # crawl → catalogue
311
+ await async_add(
312
+ "cat.yaml",
313
+ config_file="drs_config.toml",
314
+ dataset_names=["cmip6-fs"],
315
+ threads=4,
316
+ batch_size=100,
317
+ )
318
+ # index → backend
319
+ await async_index(
320
+ "solr",
321
+ "cat.yaml",
322
+ config_file="drs_config.toml",
323
+ server="localhost:8983",
324
+ )
325
+ # delete by facets
326
+ await async_delete(
327
+ config_path="drs_config.toml",
328
+ index_store="solr",
329
+ facets=[("file", "*.nc")],
330
+ )
331
+
332
+ asyncio.run(main())
333
+ ```
334
+
335
+
336
+ ### Sync (simple wrapper)
337
+
338
+ ```python
339
+
340
+ import asyncio
341
+ from metadata_crawler import add
342
+
343
+ add(
344
+ store="cat.yaml",
345
+ config_file="drs_config.toml",
346
+ dataset_names=["cmip6-fs"],
347
+ )
348
+ ```
349
+
350
+ ## Index backends
351
+
352
+ - **MongoDB** (Motor): upserts by unique facet (e.g., ``file``), bulk deletes (glob → regex)
353
+ - **Solr**: fields align with managed schema; supports multi-valued facets
354
+
355
+
356
+ ## Contributing
357
+
358
+ Development install:
359
+
360
+ ```console
361
+
362
+ git clone https://github.com/freva-org/metadata-crawler.git
363
+ cd metadata-crawler
364
+ pip install -e .
365
+
366
+ ```
367
+
368
+ PRs and issues welcome. Please add tests and keep examples minimal & reproducible
369
+ (use the MinIO compose stack). Run:
370
+
371
+
372
+ ```console
373
+ python -m pip install tox
374
+ tox -e test lint types
375
+ ```
376
+
377
+ ### Benchmarks
378
+ For benchmarking you can create a directory tree with roughly 1.5 M files by
379
+ calling the ``create-cordex.sh`` script in the ``dev-env`` folder:
380
+
381
+ ```console
382
+ ./dev-env/create-cordex.sh
383
+ python dev-env/benchmark.py --max-files 20000
384
+ ```
385
+
386
+
387
+ See ``code-of-conduct.rst`` and ``whatsnew.rst`` for guidelines and changelog.
388
+
389
+ Use MinIO or LocalStack via ``docker-compose`` and seed a bucket (e.g., ``test-bucket``).
390
+ Then point a dataset’s ``fs_type = "s3"`` and set ``storage_options``.
391
+
392
+ ### Documentation
393
+
394
+ Built with Sphinx + ``pydata_sphinx_theme``. Build locally:
395
+
396
+ ```console
397
+ tox -e docs
398
+ ```
399
+
@@ -0,0 +1,34 @@
1
+ metadata_crawler/__init__.py,sha256=7gEpJjS9FpR6MHRY_Ztk8ORJ8JQ7WZUTV2TfLkaYgqs,6741
2
+ metadata_crawler/__main__.py,sha256=4m56VOh7bb5xmZqb09fFbquke8g6KZfMbb3CUdBA60M,163
3
+ metadata_crawler/_version.py,sha256=Z6_4SgU9Dpc127xJlyvGKjeWd_Q1ONlOHQO123XGv30,25
4
+ metadata_crawler/cli.py,sha256=meY5ZfR5VEW5ZorOPWO_b4MyIIQy0wTTPs9OkJ1WnfA,17180
5
+ metadata_crawler/data_collector.py,sha256=9CVr4arKJspyLNLuF2MfkmY_r8x74Mw8hAaDSMouQUA,8372
6
+ metadata_crawler/logger.py,sha256=5Lc0KdzH2HdWkidW-MASW8Pfy7vTMnzPv1-e2V3Any0,4407
7
+ metadata_crawler/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ metadata_crawler/run.py,sha256=w1kV4D63dS3mdgDTQj2ngzeSCjZPphWg1HwIJeJ6ATE,11345
9
+ metadata_crawler/utils.py,sha256=QNr_9jZkuuQOrkuO46PrFhUfwLmfCJCq9gWUwwARfyM,11580
10
+ metadata_crawler/api/__init__.py,sha256=UUF0_FKgfqgcXYmknxB0Wt1jaLNaf-w_q0tWVJhgV0M,28
11
+ metadata_crawler/api/cli.py,sha256=pgj3iB_Irt74VbG3ZKStLRHKYY_I4bZpbOW1famKDnQ,1498
12
+ metadata_crawler/api/config.py,sha256=j__JDKYTOR8kYC--HaHlYXfz38rzEhtUvHdO5Bh_j2E,28250
13
+ metadata_crawler/api/drs_config.toml,sha256=90lQaSC2VdJ8OUoc6j27kg6d2OnfxR5a_KZH3W-FZV4,10603
14
+ metadata_crawler/api/index.py,sha256=8g5HdSxluKtCwU45P0w_7LDIaSf200JbB-ekGJiI18c,4130
15
+ metadata_crawler/api/metadata_stores.py,sha256=oWewL6XRmNZ6i5WxYI8Lm2jfpwLqBCGP2p4j3wLLNpQ,23735
16
+ metadata_crawler/api/storage_backend.py,sha256=jdZZ_3SZcP3gJgw_NmPPdpDEx4D7qfLJDABfupTH9p0,7803
17
+ metadata_crawler/api/mixin/__init__.py,sha256=4Y0T1eM4vLlgFazuC1q2briqx67LyfeCpY_pCICTnjk,197
18
+ metadata_crawler/api/mixin/lookup_mixin.py,sha256=WxJ-ZNs8DcIXS9ThSoIZiepD07jfmLlzyTp65-Z1fLc,3558
19
+ metadata_crawler/api/mixin/lookup_tables.py,sha256=za63xfZB0EvAm66uTTYo52zC0z7Y6VL8DUrP6CJ-DnQ,308683
20
+ metadata_crawler/api/mixin/path_mixin.py,sha256=WKpesEjlwVSJ-VdoYYLEY5oBSAQTsvuv1B38ragAVIM,1247
21
+ metadata_crawler/api/mixin/template_mixin.py,sha256=_qDp5n_CPnSYPMBsTia44b1ybBqrJEi-M1NaRkQ0z3U,5106
22
+ metadata_crawler/backends/__init__.py,sha256=yrk1L00ubQlMj3yXI73PPbhAahDKp792PJB-xcXUJIM,35
23
+ metadata_crawler/backends/intake.py,sha256=TkvzBU8Rk49L0Y8e7Exz2nE3iLSWrBAwZnpEJtdlNR8,6595
24
+ metadata_crawler/backends/posix.py,sha256=6sjAoCQHiOOjp_Hvwxn247wHBnoAJYUGequqphyZWaA,3409
25
+ metadata_crawler/backends/s3.py,sha256=DPz_bOyOlUveCwkSLVatwU_mcxUbFvygU_Id1AZVIMA,4455
26
+ metadata_crawler/backends/swift.py,sha256=az3ctF_npadjzAybX65CQbDLGoxRnk0ZR7vByo6lQOM,10954
27
+ metadata_crawler/ingester/__init__.py,sha256=Y-c9VkQWMHDLb9WagwITCaEODlYa4p8xW-BkzzSRZXw,55
28
+ metadata_crawler/ingester/mongo.py,sha256=lpWIZ8mo6S8oY887uz2l6Y9pir0sUVEkfgOdDxrjIMM,6142
29
+ metadata_crawler/ingester/solr.py,sha256=EoKS3kFeDTLf9zP22s2DhQGP81T6rTXVWDNT2wWKFkk,5242
30
+ metadata_crawler-2509.0.0.dist-info/entry_points.txt,sha256=4LzS7pbqwUPTD6C-iW42vuhXdtsOJmKXqFZpdpaKwF8,428
31
+ metadata_crawler-2509.0.0.dist-info/licenses/LICENSE,sha256=GAUualebvSlegSVqb86FUqHrHM8WyM145__Nm2r_dfA,1496
32
+ metadata_crawler-2509.0.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
33
+ metadata_crawler-2509.0.0.dist-info/METADATA,sha256=Dk0trqXYleepz1L8HXwKF-vAdSQww1zBm4Q014G4aOU,12938
34
+ metadata_crawler-2509.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.12.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,14 @@
1
+ [console_scripts]
2
+ mdc=metadata_crawler.cli:cli
3
+ metadata-crawler=metadata_crawler.cli:cli
4
+
5
+ [metadata_crawler.ingester]
6
+ mongo=metadata_crawler.ingester.mongo:MongoIndex
7
+ solr=metadata_crawler.ingester.solr:SolrIndex
8
+
9
+ [metadata_crawler.storage]
10
+ intake=metadata_crawler.backends.intake:IntakePath
11
+ posix=metadata_crawler.backends.posix:PosixPath
12
+ s3=metadata_crawler.backends.s3:S3Path
13
+ swift=metadata_crawler.backends.swift:SwiftPath
14
+
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, freva-org
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.