ddigraph 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE +21 -0
- NOTICE +2 -0
- README.md +160 -0
- ddigraph/__init__.py +108 -0
- ddigraph/api.py +296 -0
- ddigraph/cli.py +694 -0
- ddigraph/config.py +202 -0
- ddigraph/graph/__init__.py +19 -0
- ddigraph/graph/bootstrap.py +158 -0
- ddigraph/ingest/__init__.py +31 -0
- ddigraph/ingest/_compose.py +175 -0
- ddigraph/ingest/_composition_specs.py +581 -0
- ddigraph/ingest/cdi_loader.py +860 -0
- ddigraph/ingest/fragment_loader.py +1261 -0
- ddigraph/ingest/loader.py +3591 -0
- ddigraph/logging.py +49 -0
- ddigraph/metrics.py +40 -0
- ddigraph/paths.py +41 -0
- ddigraph/py.typed +1 -0
- ddigraph/resources.py +47 -0
- ddigraph/schema/__init__.py +21 -0
- ddigraph/schema/_generated/__init__.py +21 -0
- ddigraph/schema/_generated/cdi.py +1549 -0
- ddigraph/schema/_generated/codebook.py +128 -0
- ddigraph/schema/_generated/lifecycle.py +516 -0
- ddigraph/schema/_overrides/__init__.py +14 -0
- ddigraph/schema/_overrides/_loader.py +149 -0
- ddigraph/schema/_overrides/schema_overrides.toml +220 -0
- ddigraph/schema/adapter.py +79 -0
- ddigraph/schema/ddi_graph.py +2127 -0
- ddigraph/schema/definitions/__init__.py +166 -0
- ddigraph/schema/definitions/_dataclasses.py +60 -0
- ddigraph/schema/definitions/cdi.py +22 -0
- ddigraph/schema/definitions/codebook.py +1053 -0
- ddigraph/schema/definitions/lifecycle.py +1515 -0
- ddigraph/schema/neo4j_adapter.py +961 -0
- ddigraph/schemas/README.md +59 -0
- ddigraph/schemas/__init__.py +0 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-attribs-1.xsd +82 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-bdo-1.xsd +85 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkphras-1.xsd +206 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkpres-1.xsd +42 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkstruct-1.xsd +57 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-charent-1.xsd +41 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-datatypes-1.xsd +147 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-framework-1.xsd +74 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-hypertext-1.xsd +51 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlphras-1.xsd +220 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlpres-1.xsd +56 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlstruct-1.xsd +60 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlstyle-1.xsd +27 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-lat1.ent +121 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-list-1.xsd +128 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-notations-1.xsd +105 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-pres-1.xsd +53 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-special.ent +82 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-symbol.ent +204 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-table-1.xsd +323 -0
- ddigraph/schemas/ddi/v3_1/XHTML/xhtml-text-1.xsd +70 -0
- ddigraph/schemas/ddi/v3_1/archive.xsd +1797 -0
- ddigraph/schemas/ddi/v3_1/comparative.xsd +570 -0
- ddigraph/schemas/ddi/v3_1/conceptualcomponent.xsd +1517 -0
- ddigraph/schemas/ddi/v3_1/datacollection.xsd +7300 -0
- ddigraph/schemas/ddi/v3_1/dataset.xsd +243 -0
- ddigraph/schemas/ddi/v3_1/dc.xsd +119 -0
- ddigraph/schemas/ddi/v3_1/dcmitype.xsd +53 -0
- ddigraph/schemas/ddi/v3_1/dcterms.xsd +383 -0
- ddigraph/schemas/ddi/v3_1/ddi-xhtml11-model-1.xsd +462 -0
- ddigraph/schemas/ddi/v3_1/ddi-xhtml11-modules-1.xsd +537 -0
- ddigraph/schemas/ddi/v3_1/ddi-xhtml11.xsd +83 -0
- ddigraph/schemas/ddi/v3_1/ddiprofile.xsd +227 -0
- ddigraph/schemas/ddi/v3_1/group.xsd +1403 -0
- ddigraph/schemas/ddi/v3_1/instance_3_1.xsd +406 -0
- ddigraph/schemas/ddi/v3_1/logicalproduct.xsd +3488 -0
- ddigraph/schemas/ddi/v3_1/physicaldataproduct.xsd +813 -0
- ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_inline.xsd +221 -0
- ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_normal.xsd +237 -0
- ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_tabular.xsd +283 -0
- ddigraph/schemas/ddi/v3_1/physicaldataproduct_proprietary.xsd +237 -0
- ddigraph/schemas/ddi/v3_1/physicalinstance.xsd +833 -0
- ddigraph/schemas/ddi/v3_1/reusable.xsd +8337 -0
- ddigraph/schemas/ddi/v3_1/studyunit.xsd +325 -0
- ddigraph/schemas/ddi/v3_1/xml.xsd +76 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-attribs-1.xsd +82 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-bdo-1.xsd +85 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkphras-1.xsd +206 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkpres-1.xsd +42 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkstruct-1.xsd +57 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-charent-1.xsd +41 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-datatypes-1.xsd +147 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-framework-1.xsd +74 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-hypertext-1.xsd +51 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlphras-1.xsd +220 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlpres-1.xsd +56 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlstruct-1.xsd +60 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlstyle-1.xsd +27 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-lat1.ent +121 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-list-1.xsd +128 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-notations-1.xsd +105 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-pres-1.xsd +53 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-special.ent +82 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-symbol.ent +204 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-table-1.xsd +323 -0
- ddigraph/schemas/ddi/v3_2/XHTML/xhtml-text-1.xsd +70 -0
- ddigraph/schemas/ddi/v3_2/archive.xsd +1797 -0
- ddigraph/schemas/ddi/v3_2/comparative.xsd +570 -0
- ddigraph/schemas/ddi/v3_2/conceptualcomponent.xsd +1517 -0
- ddigraph/schemas/ddi/v3_2/datacollection.xsd +7300 -0
- ddigraph/schemas/ddi/v3_2/dataset.xsd +243 -0
- ddigraph/schemas/ddi/v3_2/dc.xsd +119 -0
- ddigraph/schemas/ddi/v3_2/dcmitype.xsd +53 -0
- ddigraph/schemas/ddi/v3_2/dcterms.xsd +383 -0
- ddigraph/schemas/ddi/v3_2/ddi-xhtml11-model-1.xsd +462 -0
- ddigraph/schemas/ddi/v3_2/ddi-xhtml11-modules-1.xsd +537 -0
- ddigraph/schemas/ddi/v3_2/ddi-xhtml11.xsd +83 -0
- ddigraph/schemas/ddi/v3_2/ddiprofile.xsd +227 -0
- ddigraph/schemas/ddi/v3_2/group.xsd +1403 -0
- ddigraph/schemas/ddi/v3_2/instance_3_2.xsd +406 -0
- ddigraph/schemas/ddi/v3_2/logicalproduct.xsd +3488 -0
- ddigraph/schemas/ddi/v3_2/physicaldataproduct.xsd +813 -0
- ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_inline.xsd +221 -0
- ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_normal.xsd +237 -0
- ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_tabular.xsd +283 -0
- ddigraph/schemas/ddi/v3_2/physicaldataproduct_proprietary.xsd +237 -0
- ddigraph/schemas/ddi/v3_2/physicalinstance.xsd +833 -0
- ddigraph/schemas/ddi/v3_2/reusable.xsd +8337 -0
- ddigraph/schemas/ddi/v3_2/studyunit.xsd +325 -0
- ddigraph/schemas/ddi/v3_2/xml.xsd +76 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-attribs-1.xsd +82 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-bdo-1.xsd +85 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkphras-1.xsd +206 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkpres-1.xsd +42 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkstruct-1.xsd +57 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-charent-1.xsd +41 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-datatypes-1.xsd +147 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-framework-1.xsd +74 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-hypertext-1.xsd +51 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlphras-1.xsd +220 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlpres-1.xsd +56 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlstruct-1.xsd +60 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlstyle-1.xsd +27 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-lat1.ent +121 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-list-1.xsd +128 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-notations-1.xsd +105 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-pres-1.xsd +53 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-special.ent +82 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-symbol.ent +204 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-table-1.xsd +323 -0
- ddigraph/schemas/ddi/v3_3/XHTML/xhtml-text-1.xsd +70 -0
- ddigraph/schemas/ddi/v3_3/archive.xsd +1797 -0
- ddigraph/schemas/ddi/v3_3/comparative.xsd +570 -0
- ddigraph/schemas/ddi/v3_3/conceptualcomponent.xsd +1517 -0
- ddigraph/schemas/ddi/v3_3/datacollection.xsd +7300 -0
- ddigraph/schemas/ddi/v3_3/dataset.xsd +243 -0
- ddigraph/schemas/ddi/v3_3/dc.xsd +119 -0
- ddigraph/schemas/ddi/v3_3/dcmitype.xsd +53 -0
- ddigraph/schemas/ddi/v3_3/dcterms.xsd +383 -0
- ddigraph/schemas/ddi/v3_3/ddi-xhtml11-model-1.xsd +462 -0
- ddigraph/schemas/ddi/v3_3/ddi-xhtml11-modules-1.xsd +537 -0
- ddigraph/schemas/ddi/v3_3/ddi-xhtml11.xsd +83 -0
- ddigraph/schemas/ddi/v3_3/ddiprofile.xsd +227 -0
- ddigraph/schemas/ddi/v3_3/group.xsd +1403 -0
- ddigraph/schemas/ddi/v3_3/instance_3_3.xsd +406 -0
- ddigraph/schemas/ddi/v3_3/logicalproduct.xsd +3488 -0
- ddigraph/schemas/ddi/v3_3/physicaldataproduct.xsd +813 -0
- ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_inline.xsd +221 -0
- ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_normal.xsd +237 -0
- ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_tabular.xsd +283 -0
- ddigraph/schemas/ddi/v3_3/physicaldataproduct_proprietary.xsd +237 -0
- ddigraph/schemas/ddi/v3_3/physicalinstance.xsd +833 -0
- ddigraph/schemas/ddi/v3_3/reusable.xsd +8337 -0
- ddigraph/schemas/ddi/v3_3/studyunit.xsd +325 -0
- ddigraph/schemas/ddi/v3_3/xml.xsd +76 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-attribs-1.xsd +82 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-bdo-1.xsd +85 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-blkphras-1.xsd +206 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-blkpres-1.xsd +42 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-blkstruct-1.xsd +57 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-charent-1.xsd +41 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-datatypes-1.xsd +147 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-framework-1.xsd +74 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-hypertext-1.xsd +51 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-inlphras-1.xsd +220 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-inlpres-1.xsd +56 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-inlstruct-1.xsd +60 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-inlstyle-1.xsd +27 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-lat1.ent +121 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-list-1.xsd +128 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-notations-1.xsd +105 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-pres-1.xsd +53 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-special.ent +82 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-symbol.ent +204 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-table-1.xsd +323 -0
- ddigraph/schemas/ddi-c/XHTML/xhtml-text-1.xsd +70 -0
- ddigraph/schemas/ddi-c/codebook.xsd +11702 -0
- ddigraph/schemas/ddi-c/dc.xsd +118 -0
- ddigraph/schemas/ddi-c/dcmitype.xsd +50 -0
- ddigraph/schemas/ddi-c/dcterms.xsd +382 -0
- ddigraph/schemas/ddi-c/ddi-xhtml11-model-1.xsd +493 -0
- ddigraph/schemas/ddi-c/ddi-xhtml11-modules-1.xsd +581 -0
- ddigraph/schemas/ddi-c/ddi-xhtml11.xsd +116 -0
- ddigraph/schemas/ddi-c/xml.xsd +80 -0
- ddigraph/schemas/ddi-cdi/ontology/Agents.onto.ttl +456 -0
- ddigraph/schemas/ddi-cdi/ontology/Classes.onto.ttl +28 -0
- ddigraph/schemas/ddi-cdi/ontology/Components.onto.ttl +352 -0
- ddigraph/schemas/ddi-cdi/ontology/Conceptual.onto.ttl +1315 -0
- ddigraph/schemas/ddi-cdi/ontology/DDICDILibrary.onto.ttl +24 -0
- ddigraph/schemas/ddi-cdi/ontology/DataDescription.onto.ttl +530 -0
- ddigraph/schemas/ddi-cdi/ontology/DataTypes.onto.ttl +91 -0
- ddigraph/schemas/ddi-cdi/ontology/Dimensional.onto.ttl +315 -0
- ddigraph/schemas/ddi-cdi/ontology/Enumerations.onto.ttl +554 -0
- ddigraph/schemas/ddi-cdi/ontology/FormatDescription.onto.ttl +1775 -0
- ddigraph/schemas/ddi-cdi/ontology/KeyValue.onto.ttl +118 -0
- ddigraph/schemas/ddi-cdi/ontology/Long.onto.ttl +202 -0
- ddigraph/schemas/ddi-cdi/ontology/Process.onto.ttl +698 -0
- ddigraph/schemas/ddi-cdi/ontology/Representations.onto.ttl +1998 -0
- ddigraph/schemas/ddi-cdi/ontology/StructuredDataTypes.onto.ttl +2022 -0
- ddigraph/schemas/ddi-cdi/ontology/Wide.onto.ttl +66 -0
- ddigraph/schemas/ddi-cdi/ontology/ddi-cdi.onto.ttl +23 -0
- ddigraph/schemas/ddi-cdi/xml-schema/ddi-cdi.xsd +21290 -0
- ddigraph/schemas/ddi-cdi/xml-schema/xml.xsd +76 -0
- ddigraph/schemas/license.txt +117 -0
- ddigraph/schemas/manifest.json +215 -0
- ddigraph/schemas/readme.txt +169 -0
- ddigraph/scripts/update_schemas.py +409 -0
- ddigraph/utils/__init__.py +39 -0
- ddigraph/utils/chunking.py +48 -0
- ddigraph/utils/parsing.py +398 -0
- ddigraph/utils/retry.py +77 -0
- ddigraph-0.4.0.dist-info/METADATA +256 -0
- ddigraph-0.4.0.dist-info/RECORD +235 -0
- ddigraph-0.4.0.dist-info/WHEEL +4 -0
- ddigraph-0.4.0.dist-info/entry_points.txt +2 -0
- ddigraph-0.4.0.dist-info/licenses/LICENSE +21 -0
- ddigraph-0.4.0.dist-info/licenses/NOTICE +2 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Philippe Bisson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
NOTICE
ADDED
README.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# ddigraph
|
|
2
|
+
|
|
3
|
+
[](https://github.com/pbisson44/ddigraph/actions)
|
|
4
|
+
[](https://codecov.io/gh/pbisson44/ddigraph)
|
|
5
|
+
[](https://pypi.org/project/ddigraph/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](pyproject.toml)
|
|
8
|
+
[](https://neo4j.com/docs/)
|
|
9
|
+
[](https://docs.astral.sh/ruff/)
|
|
10
|
+
[](https://mypy-lang.org/)
|
|
11
|
+
|
|
12
|
+
A modern Python toolkit that transforms [DDI](https://ddialliance.org/) (Data Documentation
|
|
13
|
+
Initiative) XML metadata into knowledge graphs. Supports **DDI Codebook** and **DDI-L
|
|
14
|
+
FragmentInstance** formats with streaming parsing, batched writes, and full async I/O across
|
|
15
|
+
multiple graph backends.
|
|
16
|
+
|
|
17
|
+
[Documentation](https://pbisson44.github.io/ddigraph/) |
|
|
18
|
+
[Getting Started](https://pbisson44.github.io/ddigraph/getting-started/installation/) |
|
|
19
|
+
[PyPI](https://pypi.org/project/ddigraph/) |
|
|
20
|
+
[Source Code](https://github.com/pbisson44/ddigraph)
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Multi-backend support** -- Neo4j, RDF/SPARQL, Gremlin, NetworkX, and pandas
|
|
27
|
+
- **Streaming XML processing** -- Memory-bounded `iterparse` for files of any size
|
|
28
|
+
- **Batched writes** -- UNWIND-based Cypher for 10-100x fewer database round trips
|
|
29
|
+
- **Async I/O** -- Concurrent parsing and writing with back-pressure control
|
|
30
|
+
- **Format auto-detection** -- Automatically identifies DDI Codebook vs Lifecycle format
|
|
31
|
+
- **Unified schema** -- Single source of truth for all node and relationship definitions
|
|
32
|
+
- **Adapter pattern** -- Plug in custom graph backends via `GraphWriteAdapter` protocol
|
|
33
|
+
- **Production-ready** -- Retry logic, observability hooks, pydantic-based configuration
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
### Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install ddigraph
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Load DDI metadata (CLI)
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Set Neo4j connection
|
|
47
|
+
export DDIGRAPH_NEO4J_URI=bolt://localhost:7687
|
|
48
|
+
export DDIGRAPH_NEO4J_USER=neo4j
|
|
49
|
+
export DDIGRAPH_NEO4J_PASSWORD=secret
|
|
50
|
+
|
|
51
|
+
# Bootstrap schema and load data (format is auto-detected)
|
|
52
|
+
ddigraph bootstrap
|
|
53
|
+
ddigraph load survey.xml --dataset-id my-survey
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Load DDI metadata (Python)
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import asyncio
|
|
60
|
+
from neo4j import AsyncGraphDatabase
|
|
61
|
+
from ddigraph import DDILoader, DDIFragmentLoader, detect_ddi_format
|
|
62
|
+
from ddigraph.config import Settings
|
|
63
|
+
|
|
64
|
+
async def main():
|
|
65
|
+
settings = Settings()
|
|
66
|
+
driver = AsyncGraphDatabase.driver(
|
|
67
|
+
settings.neo4j_uri,
|
|
68
|
+
auth=(settings.neo4j_user, settings.neo4j_password.get_secret_value()),
|
|
69
|
+
)
|
|
70
|
+
path = "survey.xml"
|
|
71
|
+
if detect_ddi_format(path) == "lifecycle":
|
|
72
|
+
loader = DDIFragmentLoader(driver, settings=settings)
|
|
73
|
+
result = await loader.load(path)
|
|
74
|
+
else:
|
|
75
|
+
loader = DDILoader(driver, settings=settings)
|
|
76
|
+
result = await loader.load(path, dataset_id="my-survey")
|
|
77
|
+
print(result) # {'Instrument': 1, 'Sequence': 388, 'QuestionItem': 373, ...}
|
|
78
|
+
await driver.close()
|
|
79
|
+
|
|
80
|
+
asyncio.run(main())
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Supported Formats
|
|
84
|
+
|
|
85
|
+
| Format | Description | Use Case |
|
|
86
|
+
| ------ | ----------- | -------- |
|
|
87
|
+
| **DDI Codebook** | Traditional flat format with central Dataset node | Survey archives, data catalogs |
|
|
88
|
+
| **DDI-L FragmentInstance** | Lifecycle 3.x format with reusable fragments | Questionnaire design, CAPI/CAWI instruments |
|
|
89
|
+
| **DDI-CDI 1.0** | Cross-Domain Integration metadata | Data integration, statistical production |
|
|
90
|
+
|
|
91
|
+
### XSD Coverage
|
|
92
|
+
|
|
93
|
+
`ddigraph` ships with 100 % coverage of every concrete identifiable element
|
|
94
|
+
declared in the bundled XSD schemas (`schemas/`). Coverage is enforced by the
|
|
95
|
+
audit script and a pytest guardrail so new schema releases surface any gaps:
|
|
96
|
+
|
|
97
|
+
| Flavor | Scope | Target | Covered |
|
|
98
|
+
| ----------- | --------------------------------------------------------------------- | -----: | ------: |
|
|
99
|
+
| DDI-L 3.x | Concrete Maintainable + Versionable + Identifiable elements | 189 | 100 % |
|
|
100
|
+
| DDI-C 2.x | Codebook elements with the `GLOBALS` attribute group (no layout tags) | 73 | 100 % |
|
|
101
|
+
| DDI-CDI 1.0 | Concrete top-level entity elements (associations excluded) | 210 | 100 % |
|
|
102
|
+
|
|
103
|
+
Run `python scripts/xsd_coverage.py` to regenerate the audit or
|
|
104
|
+
`python scripts/xsd_coverage.py --json` for machine-readable output.
|
|
105
|
+
|
|
106
|
+
## Supported Backends
|
|
107
|
+
|
|
108
|
+
| Backend | Description | Use Case |
|
|
109
|
+
| ------- | ----------- | -------- |
|
|
110
|
+
| **Neo4j** | Native graph database (Bolt) | Production deployments, complex queries |
|
|
111
|
+
| **RDF/SPARQL** | Semantic web triplestores | Linked data, ontology integration |
|
|
112
|
+
| **Gremlin** | Graph traversal language | JanusGraph, Neptune, Cosmos DB |
|
|
113
|
+
| **NetworkX** | Python graph library | Local analysis, prototyping |
|
|
114
|
+
| **pandas** | DataFrame-based | Tabular analysis, Excel export |
|
|
115
|
+
|
|
116
|
+
## Docker Quick Start
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
docker run --rm --name neo4j-demo \
|
|
120
|
+
-p 7474:7474 -p 7687:7687 \
|
|
121
|
+
-e NEO4J_AUTH=neo4j/password \
|
|
122
|
+
neo4j:5
|
|
123
|
+
|
|
124
|
+
export DDIGRAPH_NEO4J_URI=bolt://localhost:7687
|
|
125
|
+
export DDIGRAPH_NEO4J_USER=neo4j
|
|
126
|
+
export DDIGRAPH_NEO4J_PASSWORD=password
|
|
127
|
+
|
|
128
|
+
ddigraph bootstrap
|
|
129
|
+
ddigraph load your-file.xml --dataset-id demo
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Documentation
|
|
133
|
+
|
|
134
|
+
Full documentation is available at **[pbisson44.github.io/ddigraph](https://pbisson44.github.io/ddigraph/)** in English and French.
|
|
135
|
+
|
|
136
|
+
- [Getting Started](https://pbisson44.github.io/ddigraph/getting-started/installation/) -- Installation, quick start, 10-minute tutorial
|
|
137
|
+
- [User Guide](https://pbisson44.github.io/ddigraph/user-guide/architecture/) -- Architecture, DDI formats, relationships, adapters
|
|
138
|
+
- [Graph Backends](https://pbisson44.github.io/ddigraph/backends/neo4j/) -- Neo4j, RDF/SPARQL, Gremlin, NetworkX
|
|
139
|
+
- [Reference](https://pbisson44.github.io/ddigraph/reference/cli/) -- CLI commands, configuration
|
|
140
|
+
- [Advanced](https://pbisson44.github.io/ddigraph/advanced/tuning/) -- Performance tuning, AI readiness, standards interoperability
|
|
141
|
+
- [Contributing](https://pbisson44.github.io/ddigraph/project/contributing/) -- How to contribute
|
|
142
|
+
|
|
143
|
+
## Development
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
git clone https://github.com/pbisson44/ddigraph.git
|
|
147
|
+
cd ddigraph
|
|
148
|
+
pip install -e ".[dev,docs]"
|
|
149
|
+
|
|
150
|
+
ruff check . && ruff format .
|
|
151
|
+
# Docstring linting is currently enforced for src/ddigraph only.
|
|
152
|
+
pydocstyle src/ddigraph
|
|
153
|
+
mypy .
|
|
154
|
+
pytest
|
|
155
|
+
mkdocs serve
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## License
|
|
159
|
+
|
|
160
|
+
MIT -- see [LICENSE](LICENSE) for details.
|
ddigraph/__init__.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""ddigraph - DDI to Knowledge Graph transformation toolkit.
|
|
2
|
+
|
|
3
|
+
This package transforms DDI (Data Documentation Initiative) XML
|
|
4
|
+
metadata into a Neo4j knowledge graph. Streaming parsers also emit
|
|
5
|
+
records that can drive other backends through the parser tier --
|
|
6
|
+
see ``demo/load_rdf.py``, ``demo/load_gremlin.py``,
|
|
7
|
+
``demo/load_networkx.py``, and ``demo/load_pandas.py`` for examples.
|
|
8
|
+
The high-level entry points are:
|
|
9
|
+
|
|
10
|
+
* :func:`ddigraph.load` -- sync load of a DDI file into a Neo4j target.
|
|
11
|
+
* :func:`ddigraph.aload` -- async equivalent of ``load``.
|
|
12
|
+
* :func:`ddigraph.detect` -- identify the DDI flavor (codebook,
|
|
13
|
+
lifecycle, cdi) of a file without loading.
|
|
14
|
+
* :func:`ddigraph.bootstrap` -- create the indexes/constraints DDI
|
|
15
|
+
ingestion needs.
|
|
16
|
+
|
|
17
|
+
Typical usage::
|
|
18
|
+
|
|
19
|
+
import ddigraph
|
|
20
|
+
|
|
21
|
+
ddigraph.bootstrap(target="bolt://localhost:7687")
|
|
22
|
+
result = ddigraph.load("survey.xml", target="bolt://localhost:7687")
|
|
23
|
+
print(result.nodes_written, "nodes,", result.relationships_written, "relationships")
|
|
24
|
+
|
|
25
|
+
When ``target`` is omitted, connection details come from the env-driven
|
|
26
|
+
:class:`~ddigraph.config.Settings` model (``DDIGRAPH_NEO4J_URI``,
|
|
27
|
+
``DDIGRAPH_NEO4J_USER``, ``DDIGRAPH_NEO4J_PASSWORD``).
|
|
28
|
+
|
|
29
|
+
The public API surface ships in two tiers:
|
|
30
|
+
|
|
31
|
+
* **Supported** -- ``load``, ``aload``, ``detect``, ``bootstrap``,
|
|
32
|
+
``abootstrap``, ``LoadResult``, ``Settings``, ``__version__``.
|
|
33
|
+
These names follow semantic versioning across minor releases.
|
|
34
|
+
* **Power-user** -- ``DDILoader``, ``DDIFragmentLoader``,
|
|
35
|
+
``DDIFragmentParser``, ``DDIBatch``, ``CDIBatch``,
|
|
36
|
+
``CDIBatchStream``, ``DDISchema``, ``Fragment``,
|
|
37
|
+
``FragmentReference``, ``FlavorName``, ``detect_ddi_format``,
|
|
38
|
+
``is_cdi_format``, ``parse_ddi_batches``, ``parse_cdi_batches``.
|
|
39
|
+
Importable from ``ddigraph`` for fine-grained control, but they
|
|
40
|
+
carry no stability guarantee across minor releases. Pin a version
|
|
41
|
+
if you depend on them.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
45
|
+
|
|
46
|
+
from ddigraph.api import (
|
|
47
|
+
FlavorName,
|
|
48
|
+
LoadResult,
|
|
49
|
+
abootstrap,
|
|
50
|
+
aload,
|
|
51
|
+
bootstrap,
|
|
52
|
+
detect,
|
|
53
|
+
load,
|
|
54
|
+
)
|
|
55
|
+
from ddigraph.config import Settings
|
|
56
|
+
from ddigraph.ingest.cdi_loader import (
|
|
57
|
+
CDIBatch,
|
|
58
|
+
CDIBatchStream,
|
|
59
|
+
is_cdi_format,
|
|
60
|
+
parse_cdi_batches,
|
|
61
|
+
)
|
|
62
|
+
from ddigraph.ingest.fragment_loader import (
|
|
63
|
+
DDIFragmentLoader,
|
|
64
|
+
DDIFragmentParser,
|
|
65
|
+
Fragment,
|
|
66
|
+
FragmentReference,
|
|
67
|
+
detect_ddi_format,
|
|
68
|
+
)
|
|
69
|
+
from ddigraph.ingest.loader import DDIBatch, DDILoader, parse_ddi_batches
|
|
70
|
+
from ddigraph.schema.definitions import DDISchema
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
__version__ = version("ddigraph")
|
|
74
|
+
except PackageNotFoundError:
|
|
75
|
+
# Package not installed (development mode)
|
|
76
|
+
__version__ = "0.4.0"
|
|
77
|
+
|
|
78
|
+
# Intentionally split into two tiers (supported / power-user) with a
|
|
79
|
+
# blank-line break instead of alphabetised; see the module docstring.
|
|
80
|
+
__all__ = [ # noqa: RUF022 (tier ordering is intentional)
|
|
81
|
+
# Supported public API -- the 90 % case, semver-stable across
|
|
82
|
+
# minor releases. See the module docstring for details.
|
|
83
|
+
"LoadResult",
|
|
84
|
+
"Settings",
|
|
85
|
+
"__version__",
|
|
86
|
+
"abootstrap",
|
|
87
|
+
"aload",
|
|
88
|
+
"bootstrap",
|
|
89
|
+
"detect",
|
|
90
|
+
"load",
|
|
91
|
+
# Power-user surface -- the parser tier, batch types, and the
|
|
92
|
+
# shared schema container. Importable from ``ddigraph`` but carries
|
|
93
|
+
# no stability guarantee across minor releases.
|
|
94
|
+
"CDIBatch",
|
|
95
|
+
"CDIBatchStream",
|
|
96
|
+
"DDIBatch",
|
|
97
|
+
"DDIFragmentLoader",
|
|
98
|
+
"DDIFragmentParser",
|
|
99
|
+
"DDILoader",
|
|
100
|
+
"DDISchema",
|
|
101
|
+
"FlavorName",
|
|
102
|
+
"Fragment",
|
|
103
|
+
"FragmentReference",
|
|
104
|
+
"detect_ddi_format",
|
|
105
|
+
"is_cdi_format",
|
|
106
|
+
"parse_cdi_batches",
|
|
107
|
+
"parse_ddi_batches",
|
|
108
|
+
]
|
ddigraph/api.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""CRUD-simple public API for ``ddigraph``.
|
|
2
|
+
|
|
3
|
+
The four functions below cover the 90 % case. Power users still have
|
|
4
|
+
:class:`~ddigraph.ingest.loader.DDILoader`,
|
|
5
|
+
:class:`~ddigraph.ingest.fragment_loader.DDIFragmentLoader`, and the
|
|
6
|
+
CDI ``parse_cdi_batches`` family available for fine-grained control;
|
|
7
|
+
this module just spares ordinary callers from building drivers, picking
|
|
8
|
+
a flavor-specific loader, and chaining ``asyncio.run`` themselves.
|
|
9
|
+
|
|
10
|
+
Typical usage::
|
|
11
|
+
|
|
12
|
+
import ddigraph
|
|
13
|
+
|
|
14
|
+
# Bootstrap the target's schema once.
|
|
15
|
+
ddigraph.bootstrap(target="bolt://localhost:7687")
|
|
16
|
+
|
|
17
|
+
# Stream a DDI file into the target. Format auto-detected.
|
|
18
|
+
result = ddigraph.load("survey.xml", target="bolt://localhost:7687")
|
|
19
|
+
print(result.nodes_written, "nodes,", result.relationships_written, "relationships")
|
|
20
|
+
|
|
21
|
+
Connection credentials default to the env-driven
|
|
22
|
+
:class:`~ddigraph.config.Settings` model when ``target`` is omitted, so
|
|
23
|
+
the existing ``DDIGRAPH_NEO4J_*`` (or legacy ``NEO4J_*``) variables
|
|
24
|
+
continue to work.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import asyncio
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from time import perf_counter
|
|
33
|
+
from typing import Literal
|
|
34
|
+
|
|
35
|
+
from neo4j import AsyncDriver, AsyncGraphDatabase
|
|
36
|
+
|
|
37
|
+
from ddigraph.config import Settings
|
|
38
|
+
from ddigraph.graph.bootstrap import ensure_schema as _ensure_schema
|
|
39
|
+
from ddigraph.ingest.fragment_loader import (
|
|
40
|
+
DDIFragmentLoader,
|
|
41
|
+
detect_ddi_format,
|
|
42
|
+
)
|
|
43
|
+
from ddigraph.ingest.loader import DDILoader
|
|
44
|
+
|
|
45
|
+
type FlavorName = Literal["codebook", "lifecycle", "cdi", "unknown"]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class LoadResult:
|
|
50
|
+
"""Summary of a single ``ddigraph.load`` invocation.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
flavor: One of ``"codebook"`` or ``"lifecycle"`` (CDI ingestion
|
|
54
|
+
is not yet covered by this entry point).
|
|
55
|
+
target: The connection URL the load wrote to.
|
|
56
|
+
dataset_id: Identifier assigned to the ingested dataset (the
|
|
57
|
+
codebook flavor always sets one; lifecycle leaves it
|
|
58
|
+
``None``).
|
|
59
|
+
nodes_written: Number of graph nodes the loader recorded.
|
|
60
|
+
relationships_written: Number of relationships recorded.
|
|
61
|
+
duration_s: Wall-clock seconds the load took.
|
|
62
|
+
dry_run: True if the load ran in dry-run mode (no writes).
|
|
63
|
+
totals: The raw per-entity counts the underlying loader returned.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
flavor: FlavorName
|
|
67
|
+
target: str
|
|
68
|
+
dataset_id: str | None
|
|
69
|
+
nodes_written: int
|
|
70
|
+
relationships_written: int
|
|
71
|
+
duration_s: float
|
|
72
|
+
dry_run: bool
|
|
73
|
+
totals: dict[str, int]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def detect(path: str | Path) -> FlavorName:
|
|
77
|
+
"""Return the DDI flavor of ``path``.
|
|
78
|
+
|
|
79
|
+
Thin typed wrapper over
|
|
80
|
+
:func:`ddigraph.ingest.fragment_loader.detect_ddi_format` so callers
|
|
81
|
+
get a real ``Literal`` instead of a free-form string.
|
|
82
|
+
"""
|
|
83
|
+
raw = detect_ddi_format(path)
|
|
84
|
+
if raw in ("codebook", "lifecycle", "cdi"):
|
|
85
|
+
return raw # type: ignore[return-value]
|
|
86
|
+
return "unknown"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _resolve_settings(target: str | None, settings: Settings | None) -> tuple[Settings, str]:
|
|
90
|
+
"""Materialise a ``Settings`` instance and the URI it points at.
|
|
91
|
+
|
|
92
|
+
If ``settings`` is given, ``target`` overrides its ``neo4j_uri``;
|
|
93
|
+
otherwise a fresh ``Settings()`` is built from environment.
|
|
94
|
+
"""
|
|
95
|
+
base = settings or Settings()
|
|
96
|
+
if target is not None:
|
|
97
|
+
# pydantic models are frozen by default; rebuild via model_copy.
|
|
98
|
+
base = base.model_copy(update={"neo4j_uri": target})
|
|
99
|
+
return base, base.neo4j_uri
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _driver(settings: Settings) -> AsyncDriver:
|
|
103
|
+
"""Build an ``AsyncDriver`` from a ``Settings`` instance."""
|
|
104
|
+
return AsyncGraphDatabase.driver(
|
|
105
|
+
settings.neo4j_uri,
|
|
106
|
+
auth=(settings.neo4j_user, settings.neo4j_password.get_secret_value()),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _default_dataset_id(path: str | Path) -> str:
|
|
111
|
+
"""Derive a dataset identifier from a file name (codebook flavor only)."""
|
|
112
|
+
stem = Path(path).stem.replace(" ", "_")
|
|
113
|
+
# A path ending in ``.xml`` with no real stem (e.g. ``/path/.xml``)
|
|
114
|
+
# produces a leading-dot stem; treat that as a missing identifier.
|
|
115
|
+
if not stem or stem.startswith("."):
|
|
116
|
+
return "default"
|
|
117
|
+
return stem
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def aload(
|
|
121
|
+
path: str | Path,
|
|
122
|
+
*,
|
|
123
|
+
target: str | None = None,
|
|
124
|
+
dataset_id: str | None = None,
|
|
125
|
+
dataset_name: str | None = None,
|
|
126
|
+
dry_run: bool = False,
|
|
127
|
+
replace: bool = False,
|
|
128
|
+
settings: Settings | None = None,
|
|
129
|
+
) -> LoadResult:
|
|
130
|
+
"""Async load of a DDI file into the configured Neo4j target.
|
|
131
|
+
|
|
132
|
+
Format auto-detection picks DDI-Codebook or DDI-L Lifecycle and
|
|
133
|
+
dispatches to the matching loader. DDI-CDI is parsed but not yet
|
|
134
|
+
persisted by this entry point (use ``ddigraph.parse_cdi_batches``).
|
|
135
|
+
Non-Neo4j backends (RDF, Gremlin, NetworkX, pandas) are not driven
|
|
136
|
+
through ``load``; use the parser tier and a backend-specific
|
|
137
|
+
adapter (see the ``demo/load_*.py`` examples).
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
path: Filesystem path to the DDI XML.
|
|
141
|
+
target: Neo4j URL (``bolt://...`` or ``neo4j://...``). When
|
|
142
|
+
omitted the env-driven ``DDIGRAPH_NEO4J_URI`` is used.
|
|
143
|
+
dataset_id: Dataset identifier (codebook flavor). Defaults to
|
|
144
|
+
the file stem when not supplied.
|
|
145
|
+
dataset_name: Human-readable dataset name (codebook flavor).
|
|
146
|
+
dry_run: When True, parse and validate without writing.
|
|
147
|
+
replace: When True, purge existing dataset content before
|
|
148
|
+
loading (codebook flavor only; lifecycle ``clear_first``).
|
|
149
|
+
settings: Optional pre-built ``Settings`` instance.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
A :class:`LoadResult` describing the load outcome.
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
ValueError: If ``path`` does not point at a readable XML file.
|
|
156
|
+
NotImplementedError: If ``path`` is a CDI document (not yet
|
|
157
|
+
persisted by this entry point).
|
|
158
|
+
"""
|
|
159
|
+
resolved_settings, target_uri = _resolve_settings(target, settings)
|
|
160
|
+
flavor = detect(path)
|
|
161
|
+
if flavor == "cdi":
|
|
162
|
+
raise NotImplementedError(
|
|
163
|
+
"ddigraph.load/aload does not yet persist DDI-CDI documents. "
|
|
164
|
+
"Use ddigraph.parse_cdi_batches and a custom adapter."
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
start = perf_counter()
|
|
168
|
+
driver = _driver(resolved_settings)
|
|
169
|
+
try:
|
|
170
|
+
totals: dict[str, int]
|
|
171
|
+
if flavor == "lifecycle":
|
|
172
|
+
loader = DDIFragmentLoader(driver, settings=resolved_settings)
|
|
173
|
+
totals = await loader.load(path=path, clear_first=replace)
|
|
174
|
+
resolved_dataset_id: str | None = None
|
|
175
|
+
else:
|
|
176
|
+
# Codebook flavor uses the sync DDILoader under the hood.
|
|
177
|
+
from neo4j import GraphDatabase
|
|
178
|
+
|
|
179
|
+
sync_driver = GraphDatabase.driver(
|
|
180
|
+
resolved_settings.neo4j_uri,
|
|
181
|
+
auth=(
|
|
182
|
+
resolved_settings.neo4j_user,
|
|
183
|
+
resolved_settings.neo4j_password.get_secret_value(),
|
|
184
|
+
),
|
|
185
|
+
)
|
|
186
|
+
try:
|
|
187
|
+
codebook_loader = DDILoader(sync_driver, settings=resolved_settings)
|
|
188
|
+
resolved_dataset_id = dataset_id or _default_dataset_id(path)
|
|
189
|
+
totals = await codebook_loader.load(
|
|
190
|
+
path=path,
|
|
191
|
+
dataset_id=resolved_dataset_id,
|
|
192
|
+
dataset_name=dataset_name,
|
|
193
|
+
dry_run=dry_run,
|
|
194
|
+
replace=replace,
|
|
195
|
+
)
|
|
196
|
+
finally:
|
|
197
|
+
sync_driver.close()
|
|
198
|
+
finally:
|
|
199
|
+
await driver.close()
|
|
200
|
+
|
|
201
|
+
nodes = sum(v for k, v in totals.items() if "relationship" not in k.lower())
|
|
202
|
+
rels = sum(v for k, v in totals.items() if "relationship" in k.lower())
|
|
203
|
+
|
|
204
|
+
return LoadResult(
|
|
205
|
+
flavor=flavor,
|
|
206
|
+
target=target_uri,
|
|
207
|
+
dataset_id=resolved_dataset_id,
|
|
208
|
+
nodes_written=nodes,
|
|
209
|
+
relationships_written=rels,
|
|
210
|
+
duration_s=perf_counter() - start,
|
|
211
|
+
dry_run=dry_run,
|
|
212
|
+
totals=totals,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def load(
|
|
217
|
+
path: str | Path,
|
|
218
|
+
*,
|
|
219
|
+
target: str | None = None,
|
|
220
|
+
dataset_id: str | None = None,
|
|
221
|
+
dataset_name: str | None = None,
|
|
222
|
+
dry_run: bool = False,
|
|
223
|
+
replace: bool = False,
|
|
224
|
+
settings: Settings | None = None,
|
|
225
|
+
) -> LoadResult:
|
|
226
|
+
"""Synchronously load a DDI file into the configured Neo4j target.
|
|
227
|
+
|
|
228
|
+
Internally drives :func:`aload` via :func:`asyncio.run`. Use
|
|
229
|
+
:func:`aload` directly when calling from already-async code.
|
|
230
|
+
|
|
231
|
+
See :func:`aload` for argument details.
|
|
232
|
+
"""
|
|
233
|
+
return asyncio.run(
|
|
234
|
+
aload(
|
|
235
|
+
path,
|
|
236
|
+
target=target,
|
|
237
|
+
dataset_id=dataset_id,
|
|
238
|
+
dataset_name=dataset_name,
|
|
239
|
+
dry_run=dry_run,
|
|
240
|
+
replace=replace,
|
|
241
|
+
settings=settings,
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
async def abootstrap(
|
|
247
|
+
*,
|
|
248
|
+
target: str | None = None,
|
|
249
|
+
include_fragments: bool = True,
|
|
250
|
+
settings: Settings | None = None,
|
|
251
|
+
) -> None:
|
|
252
|
+
"""Async equivalent of :func:`bootstrap`."""
|
|
253
|
+
resolved_settings, _ = _resolve_settings(target, settings)
|
|
254
|
+
driver = _driver(resolved_settings)
|
|
255
|
+
try:
|
|
256
|
+
await _ensure_schema(
|
|
257
|
+
driver,
|
|
258
|
+
database=resolved_settings.neo4j_database,
|
|
259
|
+
include_fragments=include_fragments,
|
|
260
|
+
)
|
|
261
|
+
finally:
|
|
262
|
+
await driver.close()
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def bootstrap(
|
|
266
|
+
*,
|
|
267
|
+
target: str | None = None,
|
|
268
|
+
include_fragments: bool = True,
|
|
269
|
+
settings: Settings | None = None,
|
|
270
|
+
) -> None:
|
|
271
|
+
"""Create the indexes and constraints DDI ingestion needs.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
target: Neo4j URL. Defaults to env-driven settings.
|
|
275
|
+
include_fragments: When True, also create DDI-L Lifecycle
|
|
276
|
+
constraints alongside the Codebook ones.
|
|
277
|
+
settings: Optional pre-built ``Settings`` instance.
|
|
278
|
+
"""
|
|
279
|
+
asyncio.run(
|
|
280
|
+
abootstrap(
|
|
281
|
+
target=target,
|
|
282
|
+
include_fragments=include_fragments,
|
|
283
|
+
settings=settings,
|
|
284
|
+
)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
__all__ = [
|
|
289
|
+
"FlavorName",
|
|
290
|
+
"LoadResult",
|
|
291
|
+
"abootstrap",
|
|
292
|
+
"aload",
|
|
293
|
+
"bootstrap",
|
|
294
|
+
"detect",
|
|
295
|
+
"load",
|
|
296
|
+
]
|