ddigraph 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. LICENSE +21 -0
  2. NOTICE +2 -0
  3. README.md +160 -0
  4. ddigraph/__init__.py +108 -0
  5. ddigraph/api.py +296 -0
  6. ddigraph/cli.py +694 -0
  7. ddigraph/config.py +202 -0
  8. ddigraph/graph/__init__.py +19 -0
  9. ddigraph/graph/bootstrap.py +158 -0
  10. ddigraph/ingest/__init__.py +31 -0
  11. ddigraph/ingest/_compose.py +175 -0
  12. ddigraph/ingest/_composition_specs.py +581 -0
  13. ddigraph/ingest/cdi_loader.py +860 -0
  14. ddigraph/ingest/fragment_loader.py +1261 -0
  15. ddigraph/ingest/loader.py +3591 -0
  16. ddigraph/logging.py +49 -0
  17. ddigraph/metrics.py +40 -0
  18. ddigraph/paths.py +41 -0
  19. ddigraph/py.typed +1 -0
  20. ddigraph/resources.py +47 -0
  21. ddigraph/schema/__init__.py +21 -0
  22. ddigraph/schema/_generated/__init__.py +21 -0
  23. ddigraph/schema/_generated/cdi.py +1549 -0
  24. ddigraph/schema/_generated/codebook.py +128 -0
  25. ddigraph/schema/_generated/lifecycle.py +516 -0
  26. ddigraph/schema/_overrides/__init__.py +14 -0
  27. ddigraph/schema/_overrides/_loader.py +149 -0
  28. ddigraph/schema/_overrides/schema_overrides.toml +220 -0
  29. ddigraph/schema/adapter.py +79 -0
  30. ddigraph/schema/ddi_graph.py +2127 -0
  31. ddigraph/schema/definitions/__init__.py +166 -0
  32. ddigraph/schema/definitions/_dataclasses.py +60 -0
  33. ddigraph/schema/definitions/cdi.py +22 -0
  34. ddigraph/schema/definitions/codebook.py +1053 -0
  35. ddigraph/schema/definitions/lifecycle.py +1515 -0
  36. ddigraph/schema/neo4j_adapter.py +961 -0
  37. ddigraph/schemas/README.md +59 -0
  38. ddigraph/schemas/__init__.py +0 -0
  39. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-attribs-1.xsd +82 -0
  40. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-bdo-1.xsd +85 -0
  41. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkphras-1.xsd +206 -0
  42. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkpres-1.xsd +42 -0
  43. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-blkstruct-1.xsd +57 -0
  44. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-charent-1.xsd +41 -0
  45. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-datatypes-1.xsd +147 -0
  46. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-framework-1.xsd +74 -0
  47. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-hypertext-1.xsd +51 -0
  48. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlphras-1.xsd +220 -0
  49. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlpres-1.xsd +56 -0
  50. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlstruct-1.xsd +60 -0
  51. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-inlstyle-1.xsd +27 -0
  52. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-lat1.ent +121 -0
  53. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-list-1.xsd +128 -0
  54. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-notations-1.xsd +105 -0
  55. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-pres-1.xsd +53 -0
  56. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-special.ent +82 -0
  57. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-symbol.ent +204 -0
  58. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-table-1.xsd +323 -0
  59. ddigraph/schemas/ddi/v3_1/XHTML/xhtml-text-1.xsd +70 -0
  60. ddigraph/schemas/ddi/v3_1/archive.xsd +1797 -0
  61. ddigraph/schemas/ddi/v3_1/comparative.xsd +570 -0
  62. ddigraph/schemas/ddi/v3_1/conceptualcomponent.xsd +1517 -0
  63. ddigraph/schemas/ddi/v3_1/datacollection.xsd +7300 -0
  64. ddigraph/schemas/ddi/v3_1/dataset.xsd +243 -0
  65. ddigraph/schemas/ddi/v3_1/dc.xsd +119 -0
  66. ddigraph/schemas/ddi/v3_1/dcmitype.xsd +53 -0
  67. ddigraph/schemas/ddi/v3_1/dcterms.xsd +383 -0
  68. ddigraph/schemas/ddi/v3_1/ddi-xhtml11-model-1.xsd +462 -0
  69. ddigraph/schemas/ddi/v3_1/ddi-xhtml11-modules-1.xsd +537 -0
  70. ddigraph/schemas/ddi/v3_1/ddi-xhtml11.xsd +83 -0
  71. ddigraph/schemas/ddi/v3_1/ddiprofile.xsd +227 -0
  72. ddigraph/schemas/ddi/v3_1/group.xsd +1403 -0
  73. ddigraph/schemas/ddi/v3_1/instance_3_1.xsd +406 -0
  74. ddigraph/schemas/ddi/v3_1/logicalproduct.xsd +3488 -0
  75. ddigraph/schemas/ddi/v3_1/physicaldataproduct.xsd +813 -0
  76. ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_inline.xsd +221 -0
  77. ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_normal.xsd +237 -0
  78. ddigraph/schemas/ddi/v3_1/physicaldataproduct_ncube_tabular.xsd +283 -0
  79. ddigraph/schemas/ddi/v3_1/physicaldataproduct_proprietary.xsd +237 -0
  80. ddigraph/schemas/ddi/v3_1/physicalinstance.xsd +833 -0
  81. ddigraph/schemas/ddi/v3_1/reusable.xsd +8337 -0
  82. ddigraph/schemas/ddi/v3_1/studyunit.xsd +325 -0
  83. ddigraph/schemas/ddi/v3_1/xml.xsd +76 -0
  84. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-attribs-1.xsd +82 -0
  85. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-bdo-1.xsd +85 -0
  86. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkphras-1.xsd +206 -0
  87. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkpres-1.xsd +42 -0
  88. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-blkstruct-1.xsd +57 -0
  89. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-charent-1.xsd +41 -0
  90. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-datatypes-1.xsd +147 -0
  91. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-framework-1.xsd +74 -0
  92. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-hypertext-1.xsd +51 -0
  93. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlphras-1.xsd +220 -0
  94. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlpres-1.xsd +56 -0
  95. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlstruct-1.xsd +60 -0
  96. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-inlstyle-1.xsd +27 -0
  97. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-lat1.ent +121 -0
  98. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-list-1.xsd +128 -0
  99. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-notations-1.xsd +105 -0
  100. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-pres-1.xsd +53 -0
  101. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-special.ent +82 -0
  102. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-symbol.ent +204 -0
  103. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-table-1.xsd +323 -0
  104. ddigraph/schemas/ddi/v3_2/XHTML/xhtml-text-1.xsd +70 -0
  105. ddigraph/schemas/ddi/v3_2/archive.xsd +1797 -0
  106. ddigraph/schemas/ddi/v3_2/comparative.xsd +570 -0
  107. ddigraph/schemas/ddi/v3_2/conceptualcomponent.xsd +1517 -0
  108. ddigraph/schemas/ddi/v3_2/datacollection.xsd +7300 -0
  109. ddigraph/schemas/ddi/v3_2/dataset.xsd +243 -0
  110. ddigraph/schemas/ddi/v3_2/dc.xsd +119 -0
  111. ddigraph/schemas/ddi/v3_2/dcmitype.xsd +53 -0
  112. ddigraph/schemas/ddi/v3_2/dcterms.xsd +383 -0
  113. ddigraph/schemas/ddi/v3_2/ddi-xhtml11-model-1.xsd +462 -0
  114. ddigraph/schemas/ddi/v3_2/ddi-xhtml11-modules-1.xsd +537 -0
  115. ddigraph/schemas/ddi/v3_2/ddi-xhtml11.xsd +83 -0
  116. ddigraph/schemas/ddi/v3_2/ddiprofile.xsd +227 -0
  117. ddigraph/schemas/ddi/v3_2/group.xsd +1403 -0
  118. ddigraph/schemas/ddi/v3_2/instance_3_2.xsd +406 -0
  119. ddigraph/schemas/ddi/v3_2/logicalproduct.xsd +3488 -0
  120. ddigraph/schemas/ddi/v3_2/physicaldataproduct.xsd +813 -0
  121. ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_inline.xsd +221 -0
  122. ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_normal.xsd +237 -0
  123. ddigraph/schemas/ddi/v3_2/physicaldataproduct_ncube_tabular.xsd +283 -0
  124. ddigraph/schemas/ddi/v3_2/physicaldataproduct_proprietary.xsd +237 -0
  125. ddigraph/schemas/ddi/v3_2/physicalinstance.xsd +833 -0
  126. ddigraph/schemas/ddi/v3_2/reusable.xsd +8337 -0
  127. ddigraph/schemas/ddi/v3_2/studyunit.xsd +325 -0
  128. ddigraph/schemas/ddi/v3_2/xml.xsd +76 -0
  129. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-attribs-1.xsd +82 -0
  130. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-bdo-1.xsd +85 -0
  131. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkphras-1.xsd +206 -0
  132. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkpres-1.xsd +42 -0
  133. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-blkstruct-1.xsd +57 -0
  134. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-charent-1.xsd +41 -0
  135. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-datatypes-1.xsd +147 -0
  136. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-framework-1.xsd +74 -0
  137. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-hypertext-1.xsd +51 -0
  138. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlphras-1.xsd +220 -0
  139. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlpres-1.xsd +56 -0
  140. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlstruct-1.xsd +60 -0
  141. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-inlstyle-1.xsd +27 -0
  142. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-lat1.ent +121 -0
  143. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-list-1.xsd +128 -0
  144. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-notations-1.xsd +105 -0
  145. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-pres-1.xsd +53 -0
  146. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-special.ent +82 -0
  147. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-symbol.ent +204 -0
  148. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-table-1.xsd +323 -0
  149. ddigraph/schemas/ddi/v3_3/XHTML/xhtml-text-1.xsd +70 -0
  150. ddigraph/schemas/ddi/v3_3/archive.xsd +1797 -0
  151. ddigraph/schemas/ddi/v3_3/comparative.xsd +570 -0
  152. ddigraph/schemas/ddi/v3_3/conceptualcomponent.xsd +1517 -0
  153. ddigraph/schemas/ddi/v3_3/datacollection.xsd +7300 -0
  154. ddigraph/schemas/ddi/v3_3/dataset.xsd +243 -0
  155. ddigraph/schemas/ddi/v3_3/dc.xsd +119 -0
  156. ddigraph/schemas/ddi/v3_3/dcmitype.xsd +53 -0
  157. ddigraph/schemas/ddi/v3_3/dcterms.xsd +383 -0
  158. ddigraph/schemas/ddi/v3_3/ddi-xhtml11-model-1.xsd +462 -0
  159. ddigraph/schemas/ddi/v3_3/ddi-xhtml11-modules-1.xsd +537 -0
  160. ddigraph/schemas/ddi/v3_3/ddi-xhtml11.xsd +83 -0
  161. ddigraph/schemas/ddi/v3_3/ddiprofile.xsd +227 -0
  162. ddigraph/schemas/ddi/v3_3/group.xsd +1403 -0
  163. ddigraph/schemas/ddi/v3_3/instance_3_3.xsd +406 -0
  164. ddigraph/schemas/ddi/v3_3/logicalproduct.xsd +3488 -0
  165. ddigraph/schemas/ddi/v3_3/physicaldataproduct.xsd +813 -0
  166. ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_inline.xsd +221 -0
  167. ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_normal.xsd +237 -0
  168. ddigraph/schemas/ddi/v3_3/physicaldataproduct_ncube_tabular.xsd +283 -0
  169. ddigraph/schemas/ddi/v3_3/physicaldataproduct_proprietary.xsd +237 -0
  170. ddigraph/schemas/ddi/v3_3/physicalinstance.xsd +833 -0
  171. ddigraph/schemas/ddi/v3_3/reusable.xsd +8337 -0
  172. ddigraph/schemas/ddi/v3_3/studyunit.xsd +325 -0
  173. ddigraph/schemas/ddi/v3_3/xml.xsd +76 -0
  174. ddigraph/schemas/ddi-c/XHTML/xhtml-attribs-1.xsd +82 -0
  175. ddigraph/schemas/ddi-c/XHTML/xhtml-bdo-1.xsd +85 -0
  176. ddigraph/schemas/ddi-c/XHTML/xhtml-blkphras-1.xsd +206 -0
  177. ddigraph/schemas/ddi-c/XHTML/xhtml-blkpres-1.xsd +42 -0
  178. ddigraph/schemas/ddi-c/XHTML/xhtml-blkstruct-1.xsd +57 -0
  179. ddigraph/schemas/ddi-c/XHTML/xhtml-charent-1.xsd +41 -0
  180. ddigraph/schemas/ddi-c/XHTML/xhtml-datatypes-1.xsd +147 -0
  181. ddigraph/schemas/ddi-c/XHTML/xhtml-framework-1.xsd +74 -0
  182. ddigraph/schemas/ddi-c/XHTML/xhtml-hypertext-1.xsd +51 -0
  183. ddigraph/schemas/ddi-c/XHTML/xhtml-inlphras-1.xsd +220 -0
  184. ddigraph/schemas/ddi-c/XHTML/xhtml-inlpres-1.xsd +56 -0
  185. ddigraph/schemas/ddi-c/XHTML/xhtml-inlstruct-1.xsd +60 -0
  186. ddigraph/schemas/ddi-c/XHTML/xhtml-inlstyle-1.xsd +27 -0
  187. ddigraph/schemas/ddi-c/XHTML/xhtml-lat1.ent +121 -0
  188. ddigraph/schemas/ddi-c/XHTML/xhtml-list-1.xsd +128 -0
  189. ddigraph/schemas/ddi-c/XHTML/xhtml-notations-1.xsd +105 -0
  190. ddigraph/schemas/ddi-c/XHTML/xhtml-pres-1.xsd +53 -0
  191. ddigraph/schemas/ddi-c/XHTML/xhtml-special.ent +82 -0
  192. ddigraph/schemas/ddi-c/XHTML/xhtml-symbol.ent +204 -0
  193. ddigraph/schemas/ddi-c/XHTML/xhtml-table-1.xsd +323 -0
  194. ddigraph/schemas/ddi-c/XHTML/xhtml-text-1.xsd +70 -0
  195. ddigraph/schemas/ddi-c/codebook.xsd +11702 -0
  196. ddigraph/schemas/ddi-c/dc.xsd +118 -0
  197. ddigraph/schemas/ddi-c/dcmitype.xsd +50 -0
  198. ddigraph/schemas/ddi-c/dcterms.xsd +382 -0
  199. ddigraph/schemas/ddi-c/ddi-xhtml11-model-1.xsd +493 -0
  200. ddigraph/schemas/ddi-c/ddi-xhtml11-modules-1.xsd +581 -0
  201. ddigraph/schemas/ddi-c/ddi-xhtml11.xsd +116 -0
  202. ddigraph/schemas/ddi-c/xml.xsd +80 -0
  203. ddigraph/schemas/ddi-cdi/ontology/Agents.onto.ttl +456 -0
  204. ddigraph/schemas/ddi-cdi/ontology/Classes.onto.ttl +28 -0
  205. ddigraph/schemas/ddi-cdi/ontology/Components.onto.ttl +352 -0
  206. ddigraph/schemas/ddi-cdi/ontology/Conceptual.onto.ttl +1315 -0
  207. ddigraph/schemas/ddi-cdi/ontology/DDICDILibrary.onto.ttl +24 -0
  208. ddigraph/schemas/ddi-cdi/ontology/DataDescription.onto.ttl +530 -0
  209. ddigraph/schemas/ddi-cdi/ontology/DataTypes.onto.ttl +91 -0
  210. ddigraph/schemas/ddi-cdi/ontology/Dimensional.onto.ttl +315 -0
  211. ddigraph/schemas/ddi-cdi/ontology/Enumerations.onto.ttl +554 -0
  212. ddigraph/schemas/ddi-cdi/ontology/FormatDescription.onto.ttl +1775 -0
  213. ddigraph/schemas/ddi-cdi/ontology/KeyValue.onto.ttl +118 -0
  214. ddigraph/schemas/ddi-cdi/ontology/Long.onto.ttl +202 -0
  215. ddigraph/schemas/ddi-cdi/ontology/Process.onto.ttl +698 -0
  216. ddigraph/schemas/ddi-cdi/ontology/Representations.onto.ttl +1998 -0
  217. ddigraph/schemas/ddi-cdi/ontology/StructuredDataTypes.onto.ttl +2022 -0
  218. ddigraph/schemas/ddi-cdi/ontology/Wide.onto.ttl +66 -0
  219. ddigraph/schemas/ddi-cdi/ontology/ddi-cdi.onto.ttl +23 -0
  220. ddigraph/schemas/ddi-cdi/xml-schema/ddi-cdi.xsd +21290 -0
  221. ddigraph/schemas/ddi-cdi/xml-schema/xml.xsd +76 -0
  222. ddigraph/schemas/license.txt +117 -0
  223. ddigraph/schemas/manifest.json +215 -0
  224. ddigraph/schemas/readme.txt +169 -0
  225. ddigraph/scripts/update_schemas.py +409 -0
  226. ddigraph/utils/__init__.py +39 -0
  227. ddigraph/utils/chunking.py +48 -0
  228. ddigraph/utils/parsing.py +398 -0
  229. ddigraph/utils/retry.py +77 -0
  230. ddigraph-0.4.0.dist-info/METADATA +256 -0
  231. ddigraph-0.4.0.dist-info/RECORD +235 -0
  232. ddigraph-0.4.0.dist-info/WHEEL +4 -0
  233. ddigraph-0.4.0.dist-info/entry_points.txt +2 -0
  234. ddigraph-0.4.0.dist-info/licenses/LICENSE +21 -0
  235. ddigraph-0.4.0.dist-info/licenses/NOTICE +2 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Philippe Bisson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
NOTICE ADDED
@@ -0,0 +1,2 @@
1
+ ddigraph (formerly neo4ddi) is distributed under the MIT License.
2
+ See the accompanying LICENSE file for the full license text.
README.md ADDED
@@ -0,0 +1,160 @@
1
+ # ddigraph
2
+
3
+ [![CI](https://img.shields.io/github/actions/workflow/status/pbisson44/ddigraph/ci.yml?label=CI&logo=github)](https://github.com/pbisson44/ddigraph/actions)
4
+ [![codecov](https://codecov.io/gh/pbisson44/ddigraph/branch/main/graph/badge.svg)](https://codecov.io/gh/pbisson44/ddigraph)
5
+ [![PyPI](https://img.shields.io/pypi/v/ddigraph?logo=pypi&logoColor=white)](https://pypi.org/project/ddigraph/)
6
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
7
+ [![Python](https://img.shields.io/badge/Python-3.12%E2%80%933.14-blue?logo=python)](pyproject.toml)
8
+ [![Neo4j](https://img.shields.io/badge/Neo4j-5.x-green?logo=neo4j)](https://neo4j.com/docs/)
9
+ [![Code style](https://img.shields.io/badge/code%20style-ruff-000000?logo=ruff&logoColor=white)](https://docs.astral.sh/ruff/)
10
+ [![Type checking](https://img.shields.io/badge/type%20checking-mypy-1678be?logo=mypy&logoColor=white)](https://mypy-lang.org/)
11
+
12
+ A modern Python toolkit that transforms [DDI](https://ddialliance.org/) (Data Documentation
13
+ Initiative) XML metadata into knowledge graphs. Supports **DDI Codebook** and **DDI-L
14
+ FragmentInstance** formats with streaming parsing, batched writes, and full async I/O across
15
+ multiple graph backends.
16
+
17
+ [Documentation](https://pbisson44.github.io/ddigraph/) |
18
+ [Getting Started](https://pbisson44.github.io/ddigraph/getting-started/installation/) |
19
+ [PyPI](https://pypi.org/project/ddigraph/) |
20
+ [Source Code](https://github.com/pbisson44/ddigraph)
21
+
22
+ ---
23
+
24
+ ## Features
25
+
26
+ - **Multi-backend support** -- Neo4j, RDF/SPARQL, Gremlin, NetworkX, and pandas
27
+ - **Streaming XML processing** -- Memory-bounded `iterparse` for files of any size
28
+ - **Batched writes** -- UNWIND-based Cypher for 10-100x fewer database round trips
29
+ - **Async I/O** -- Concurrent parsing and writing with back-pressure control
30
+ - **Format auto-detection** -- Automatically identifies DDI Codebook vs Lifecycle format
31
+ - **Unified schema** -- Single source of truth for all node and relationship definitions
32
+ - **Adapter pattern** -- Plug in custom graph backends via `GraphWriteAdapter` protocol
33
+ - **Production-ready** -- Retry logic, observability hooks, pydantic-based configuration
34
+
35
+ ## Quick Start
36
+
37
+ ### Install
38
+
39
+ ```bash
40
+ pip install ddigraph
41
+ ```
42
+
43
+ ### Load DDI metadata (CLI)
44
+
45
+ ```bash
46
+ # Set Neo4j connection
47
+ export DDIGRAPH_NEO4J_URI=bolt://localhost:7687
48
+ export DDIGRAPH_NEO4J_USER=neo4j
49
+ export DDIGRAPH_NEO4J_PASSWORD=secret
50
+
51
+ # Bootstrap schema and load data (format is auto-detected)
52
+ ddigraph bootstrap
53
+ ddigraph load survey.xml --dataset-id my-survey
54
+ ```
55
+
56
+ ### Load DDI metadata (Python)
57
+
58
+ ```python
59
+ import asyncio
60
+ from neo4j import AsyncGraphDatabase
61
+ from ddigraph import DDILoader, DDIFragmentLoader, detect_ddi_format
62
+ from ddigraph.config import Settings
63
+
64
+ async def main():
65
+ settings = Settings()
66
+ driver = AsyncGraphDatabase.driver(
67
+ settings.neo4j_uri,
68
+ auth=(settings.neo4j_user, settings.neo4j_password.get_secret_value()),
69
+ )
70
+ path = "survey.xml"
71
+ if detect_ddi_format(path) == "lifecycle":
72
+ loader = DDIFragmentLoader(driver, settings=settings)
73
+ result = await loader.load(path)
74
+ else:
75
+ loader = DDILoader(driver, settings=settings)
76
+ result = await loader.load(path, dataset_id="my-survey")
77
+ print(result) # {'Instrument': 1, 'Sequence': 388, 'QuestionItem': 373, ...}
78
+ await driver.close()
79
+
80
+ asyncio.run(main())
81
+ ```
82
+
83
+ ## Supported Formats
84
+
85
+ | Format | Description | Use Case |
86
+ | ------ | ----------- | -------- |
87
+ | **DDI Codebook** | Traditional flat format with central Dataset node | Survey archives, data catalogs |
88
+ | **DDI-L FragmentInstance** | Lifecycle 3.x format with reusable fragments | Questionnaire design, CAPI/CAWI instruments |
89
+ | **DDI-CDI 1.0** | Cross-Domain Integration metadata | Data integration, statistical production |
90
+
91
+ ### XSD Coverage
92
+
93
+ `ddigraph` ships with 100 % coverage of every concrete identifiable element
94
+ declared in the bundled XSD schemas (`schemas/`). Coverage is enforced by the
95
+ audit script and a pytest guardrail so new schema releases surface any gaps:
96
+
97
+ | Flavor | Scope | Target | Covered |
98
+ | ----------- | --------------------------------------------------------------------- | -----: | ------: |
99
+ | DDI-L 3.x | Concrete Maintainable + Versionable + Identifiable elements | 189 | 100 % |
100
+ | DDI-C 2.x | Codebook elements with the `GLOBALS` attribute group (no layout tags) | 73 | 100 % |
101
+ | DDI-CDI 1.0 | Concrete top-level entity elements (associations excluded) | 210 | 100 % |
102
+
103
+ Run `python scripts/xsd_coverage.py` to regenerate the audit or
104
+ `python scripts/xsd_coverage.py --json` for machine-readable output.
105
+
106
+ ## Supported Backends
107
+
108
+ | Backend | Description | Use Case |
109
+ | ------- | ----------- | -------- |
110
+ | **Neo4j** | Native graph database (Bolt) | Production deployments, complex queries |
111
+ | **RDF/SPARQL** | Semantic web triplestores | Linked data, ontology integration |
112
+ | **Gremlin** | Graph traversal language | JanusGraph, Neptune, Cosmos DB |
113
+ | **NetworkX** | Python graph library | Local analysis, prototyping |
114
+ | **pandas** | DataFrame-based | Tabular analysis, Excel export |
115
+
116
+ ## Docker Quick Start
117
+
118
+ ```bash
119
+ docker run --rm --name neo4j-demo \
120
+ -p 7474:7474 -p 7687:7687 \
121
+ -e NEO4J_AUTH=neo4j/password \
122
+ neo4j:5
123
+
124
+ export DDIGRAPH_NEO4J_URI=bolt://localhost:7687
125
+ export DDIGRAPH_NEO4J_USER=neo4j
126
+ export DDIGRAPH_NEO4J_PASSWORD=password
127
+
128
+ ddigraph bootstrap
129
+ ddigraph load your-file.xml --dataset-id demo
130
+ ```
131
+
132
+ ## Documentation
133
+
134
+ Full documentation is available at **[pbisson44.github.io/ddigraph](https://pbisson44.github.io/ddigraph/)** in English and French.
135
+
136
+ - [Getting Started](https://pbisson44.github.io/ddigraph/getting-started/installation/) -- Installation, quick start, 10-minute tutorial
137
+ - [User Guide](https://pbisson44.github.io/ddigraph/user-guide/architecture/) -- Architecture, DDI formats, relationships, adapters
138
+ - [Graph Backends](https://pbisson44.github.io/ddigraph/backends/neo4j/) -- Neo4j, RDF/SPARQL, Gremlin, NetworkX
139
+ - [Reference](https://pbisson44.github.io/ddigraph/reference/cli/) -- CLI commands, configuration
140
+ - [Advanced](https://pbisson44.github.io/ddigraph/advanced/tuning/) -- Performance tuning, AI readiness, standards interoperability
141
+ - [Contributing](https://pbisson44.github.io/ddigraph/project/contributing/) -- How to contribute
142
+
143
+ ## Development
144
+
145
+ ```bash
146
+ git clone https://github.com/pbisson44/ddigraph.git
147
+ cd ddigraph
148
+ pip install -e ".[dev,docs]"
149
+
150
+ ruff check . && ruff format .
151
+ # Docstring linting is currently enforced for src/ddigraph only.
152
+ pydocstyle src/ddigraph
153
+ mypy .
154
+ pytest
155
+ mkdocs serve
156
+ ```
157
+
158
+ ## License
159
+
160
+ MIT -- see [LICENSE](LICENSE) for details.
ddigraph/__init__.py ADDED
@@ -0,0 +1,108 @@
1
+ """ddigraph - DDI to Knowledge Graph transformation toolkit.
2
+
3
+ This package transforms DDI (Data Documentation Initiative) XML
4
+ metadata into a Neo4j knowledge graph. Streaming parsers also emit
5
+ records that can drive other backends through the parser tier --
6
+ see ``demo/load_rdf.py``, ``demo/load_gremlin.py``,
7
+ ``demo/load_networkx.py``, and ``demo/load_pandas.py`` for examples.
8
+ The high-level entry points are:
9
+
10
+ * :func:`ddigraph.load` -- sync load of a DDI file into a Neo4j target.
11
+ * :func:`ddigraph.aload` -- async equivalent of ``load``.
12
+ * :func:`ddigraph.detect` -- identify the DDI flavor (codebook,
13
+ lifecycle, cdi) of a file without loading.
14
+ * :func:`ddigraph.bootstrap` -- create the indexes/constraints DDI
15
+ ingestion needs.
16
+
17
+ Typical usage::
18
+
19
+ import ddigraph
20
+
21
+ ddigraph.bootstrap(target="bolt://localhost:7687")
22
+ result = ddigraph.load("survey.xml", target="bolt://localhost:7687")
23
+ print(result.nodes_written, "nodes,", result.relationships_written, "relationships")
24
+
25
+ When ``target`` is omitted, connection details come from the env-driven
26
+ :class:`~ddigraph.config.Settings` model (``DDIGRAPH_NEO4J_URI``,
27
+ ``DDIGRAPH_NEO4J_USER``, ``DDIGRAPH_NEO4J_PASSWORD``).
28
+
29
+ The public API surface ships in two tiers:
30
+
31
+ * **Supported** -- ``load``, ``aload``, ``detect``, ``bootstrap``,
32
+ ``abootstrap``, ``LoadResult``, ``Settings``, ``__version__``.
33
+ These names follow semantic versioning across minor releases.
34
+ * **Power-user** -- ``DDILoader``, ``DDIFragmentLoader``,
35
+ ``DDIFragmentParser``, ``DDIBatch``, ``CDIBatch``,
36
+ ``CDIBatchStream``, ``DDISchema``, ``Fragment``,
37
+ ``FragmentReference``, ``FlavorName``, ``detect_ddi_format``,
38
+ ``is_cdi_format``, ``parse_ddi_batches``, ``parse_cdi_batches``.
39
+ Importable from ``ddigraph`` for fine-grained control, but they
40
+ carry no stability guarantee across minor releases. Pin a version
41
+ if you depend on them.
42
+ """
43
+
44
+ from importlib.metadata import PackageNotFoundError, version
45
+
46
+ from ddigraph.api import (
47
+ FlavorName,
48
+ LoadResult,
49
+ abootstrap,
50
+ aload,
51
+ bootstrap,
52
+ detect,
53
+ load,
54
+ )
55
+ from ddigraph.config import Settings
56
+ from ddigraph.ingest.cdi_loader import (
57
+ CDIBatch,
58
+ CDIBatchStream,
59
+ is_cdi_format,
60
+ parse_cdi_batches,
61
+ )
62
+ from ddigraph.ingest.fragment_loader import (
63
+ DDIFragmentLoader,
64
+ DDIFragmentParser,
65
+ Fragment,
66
+ FragmentReference,
67
+ detect_ddi_format,
68
+ )
69
+ from ddigraph.ingest.loader import DDIBatch, DDILoader, parse_ddi_batches
70
+ from ddigraph.schema.definitions import DDISchema
71
+
72
+ try:
73
+ __version__ = version("ddigraph")
74
+ except PackageNotFoundError:
75
+ # Package not installed (development mode)
76
+ __version__ = "0.4.0"
77
+
78
+ # Intentionally split into two tiers (supported / power-user) with a
79
+ # blank-line break instead of alphabetised; see the module docstring.
80
+ __all__ = [ # noqa: RUF022 (tier ordering is intentional)
81
+ # Supported public API -- the 90 % case, semver-stable across
82
+ # minor releases. See the module docstring for details.
83
+ "LoadResult",
84
+ "Settings",
85
+ "__version__",
86
+ "abootstrap",
87
+ "aload",
88
+ "bootstrap",
89
+ "detect",
90
+ "load",
91
+ # Power-user surface -- the parser tier, batch types, and the
92
+ # shared schema container. Importable from ``ddigraph`` but carries
93
+ # no stability guarantee across minor releases.
94
+ "CDIBatch",
95
+ "CDIBatchStream",
96
+ "DDIBatch",
97
+ "DDIFragmentLoader",
98
+ "DDIFragmentParser",
99
+ "DDILoader",
100
+ "DDISchema",
101
+ "FlavorName",
102
+ "Fragment",
103
+ "FragmentReference",
104
+ "detect_ddi_format",
105
+ "is_cdi_format",
106
+ "parse_cdi_batches",
107
+ "parse_ddi_batches",
108
+ ]
ddigraph/api.py ADDED
@@ -0,0 +1,296 @@
1
+ """CRUD-simple public API for ``ddigraph``.
2
+
3
+ The four functions below cover the 90 % case. Power users still have
4
+ :class:`~ddigraph.ingest.loader.DDILoader`,
5
+ :class:`~ddigraph.ingest.fragment_loader.DDIFragmentLoader`, and the
6
+ CDI ``parse_cdi_batches`` family available for fine-grained control;
7
+ this module just spares ordinary callers from building drivers, picking
8
+ a flavor-specific loader, and chaining ``asyncio.run`` themselves.
9
+
10
+ Typical usage::
11
+
12
+ import ddigraph
13
+
14
+ # Bootstrap the target's schema once.
15
+ ddigraph.bootstrap(target="bolt://localhost:7687")
16
+
17
+ # Stream a DDI file into the target. Format auto-detected.
18
+ result = ddigraph.load("survey.xml", target="bolt://localhost:7687")
19
+ print(result.nodes_written, "nodes,", result.relationships_written, "relationships")
20
+
21
+ Connection credentials default to the env-driven
22
+ :class:`~ddigraph.config.Settings` model when ``target`` is omitted, so
23
+ the existing ``DDIGRAPH_NEO4J_*`` (or legacy ``NEO4J_*``) variables
24
+ continue to work.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import asyncio
30
+ from dataclasses import dataclass
31
+ from pathlib import Path
32
+ from time import perf_counter
33
+ from typing import Literal
34
+
35
+ from neo4j import AsyncDriver, AsyncGraphDatabase
36
+
37
+ from ddigraph.config import Settings
38
+ from ddigraph.graph.bootstrap import ensure_schema as _ensure_schema
39
+ from ddigraph.ingest.fragment_loader import (
40
+ DDIFragmentLoader,
41
+ detect_ddi_format,
42
+ )
43
+ from ddigraph.ingest.loader import DDILoader
44
+
45
+ type FlavorName = Literal["codebook", "lifecycle", "cdi", "unknown"]
46
+
47
+
48
+ @dataclass(slots=True)
49
+ class LoadResult:
50
+ """Summary of a single ``ddigraph.load`` invocation.
51
+
52
+ Attributes:
53
+ flavor: One of ``"codebook"`` or ``"lifecycle"`` (CDI ingestion
54
+ is not yet covered by this entry point).
55
+ target: The connection URL the load wrote to.
56
+ dataset_id: Identifier assigned to the ingested dataset (the
57
+ codebook flavor always sets one; lifecycle leaves it
58
+ ``None``).
59
+ nodes_written: Number of graph nodes the loader recorded.
60
+ relationships_written: Number of relationships recorded.
61
+ duration_s: Wall-clock seconds the load took.
62
+ dry_run: True if the load ran in dry-run mode (no writes).
63
+ totals: The raw per-entity counts the underlying loader returned.
64
+ """
65
+
66
+ flavor: FlavorName
67
+ target: str
68
+ dataset_id: str | None
69
+ nodes_written: int
70
+ relationships_written: int
71
+ duration_s: float
72
+ dry_run: bool
73
+ totals: dict[str, int]
74
+
75
+
76
+ def detect(path: str | Path) -> FlavorName:
77
+ """Return the DDI flavor of ``path``.
78
+
79
+ Thin typed wrapper over
80
+ :func:`ddigraph.ingest.fragment_loader.detect_ddi_format` so callers
81
+ get a real ``Literal`` instead of a free-form string.
82
+ """
83
+ raw = detect_ddi_format(path)
84
+ if raw in ("codebook", "lifecycle", "cdi"):
85
+ return raw # type: ignore[return-value]
86
+ return "unknown"
87
+
88
+
89
+ def _resolve_settings(target: str | None, settings: Settings | None) -> tuple[Settings, str]:
90
+ """Materialise a ``Settings`` instance and the URI it points at.
91
+
92
+ If ``settings`` is given, ``target`` overrides its ``neo4j_uri``;
93
+ otherwise a fresh ``Settings()`` is built from environment.
94
+ """
95
+ base = settings or Settings()
96
+ if target is not None:
97
+ # pydantic models are frozen by default; rebuild via model_copy.
98
+ base = base.model_copy(update={"neo4j_uri": target})
99
+ return base, base.neo4j_uri
100
+
101
+
102
+ def _driver(settings: Settings) -> AsyncDriver:
103
+ """Build an ``AsyncDriver`` from a ``Settings`` instance."""
104
+ return AsyncGraphDatabase.driver(
105
+ settings.neo4j_uri,
106
+ auth=(settings.neo4j_user, settings.neo4j_password.get_secret_value()),
107
+ )
108
+
109
+
110
+ def _default_dataset_id(path: str | Path) -> str:
111
+ """Derive a dataset identifier from a file name (codebook flavor only)."""
112
+ stem = Path(path).stem.replace(" ", "_")
113
+ # A path ending in ``.xml`` with no real stem (e.g. ``/path/.xml``)
114
+ # produces a leading-dot stem; treat that as a missing identifier.
115
+ if not stem or stem.startswith("."):
116
+ return "default"
117
+ return stem
118
+
119
+
120
+ async def aload(
121
+ path: str | Path,
122
+ *,
123
+ target: str | None = None,
124
+ dataset_id: str | None = None,
125
+ dataset_name: str | None = None,
126
+ dry_run: bool = False,
127
+ replace: bool = False,
128
+ settings: Settings | None = None,
129
+ ) -> LoadResult:
130
+ """Async load of a DDI file into the configured Neo4j target.
131
+
132
+ Format auto-detection picks DDI-Codebook or DDI-L Lifecycle and
133
+ dispatches to the matching loader. DDI-CDI is parsed but not yet
134
+ persisted by this entry point (use ``ddigraph.parse_cdi_batches``).
135
+ Non-Neo4j backends (RDF, Gremlin, NetworkX, pandas) are not driven
136
+ through ``load``; use the parser tier and a backend-specific
137
+ adapter (see the ``demo/load_*.py`` examples).
138
+
139
+ Args:
140
+ path: Filesystem path to the DDI XML.
141
+ target: Neo4j URL (``bolt://...`` or ``neo4j://...``). When
142
+ omitted the env-driven ``DDIGRAPH_NEO4J_URI`` is used.
143
+ dataset_id: Dataset identifier (codebook flavor). Defaults to
144
+ the file stem when not supplied.
145
+ dataset_name: Human-readable dataset name (codebook flavor).
146
+ dry_run: When True, parse and validate without writing.
147
+ replace: When True, purge existing dataset content before
148
+ loading (codebook flavor only; lifecycle ``clear_first``).
149
+ settings: Optional pre-built ``Settings`` instance.
150
+
151
+ Returns:
152
+ A :class:`LoadResult` describing the load outcome.
153
+
154
+ Raises:
155
+ ValueError: If ``path`` does not point at a readable XML file.
156
+ NotImplementedError: If ``path`` is a CDI document (not yet
157
+ persisted by this entry point).
158
+ """
159
+ resolved_settings, target_uri = _resolve_settings(target, settings)
160
+ flavor = detect(path)
161
+ if flavor == "cdi":
162
+ raise NotImplementedError(
163
+ "ddigraph.load/aload does not yet persist DDI-CDI documents. "
164
+ "Use ddigraph.parse_cdi_batches and a custom adapter."
165
+ )
166
+
167
+ start = perf_counter()
168
+ driver = _driver(resolved_settings)
169
+ try:
170
+ totals: dict[str, int]
171
+ if flavor == "lifecycle":
172
+ loader = DDIFragmentLoader(driver, settings=resolved_settings)
173
+ totals = await loader.load(path=path, clear_first=replace)
174
+ resolved_dataset_id: str | None = None
175
+ else:
176
+ # Codebook flavor uses the sync DDILoader under the hood.
177
+ from neo4j import GraphDatabase
178
+
179
+ sync_driver = GraphDatabase.driver(
180
+ resolved_settings.neo4j_uri,
181
+ auth=(
182
+ resolved_settings.neo4j_user,
183
+ resolved_settings.neo4j_password.get_secret_value(),
184
+ ),
185
+ )
186
+ try:
187
+ codebook_loader = DDILoader(sync_driver, settings=resolved_settings)
188
+ resolved_dataset_id = dataset_id or _default_dataset_id(path)
189
+ totals = await codebook_loader.load(
190
+ path=path,
191
+ dataset_id=resolved_dataset_id,
192
+ dataset_name=dataset_name,
193
+ dry_run=dry_run,
194
+ replace=replace,
195
+ )
196
+ finally:
197
+ sync_driver.close()
198
+ finally:
199
+ await driver.close()
200
+
201
+ nodes = sum(v for k, v in totals.items() if "relationship" not in k.lower())
202
+ rels = sum(v for k, v in totals.items() if "relationship" in k.lower())
203
+
204
+ return LoadResult(
205
+ flavor=flavor,
206
+ target=target_uri,
207
+ dataset_id=resolved_dataset_id,
208
+ nodes_written=nodes,
209
+ relationships_written=rels,
210
+ duration_s=perf_counter() - start,
211
+ dry_run=dry_run,
212
+ totals=totals,
213
+ )
214
+
215
+
216
+ def load(
217
+ path: str | Path,
218
+ *,
219
+ target: str | None = None,
220
+ dataset_id: str | None = None,
221
+ dataset_name: str | None = None,
222
+ dry_run: bool = False,
223
+ replace: bool = False,
224
+ settings: Settings | None = None,
225
+ ) -> LoadResult:
226
+ """Synchronously load a DDI file into the configured Neo4j target.
227
+
228
+ Internally drives :func:`aload` via :func:`asyncio.run`. Use
229
+ :func:`aload` directly when calling from already-async code.
230
+
231
+ See :func:`aload` for argument details.
232
+ """
233
+ return asyncio.run(
234
+ aload(
235
+ path,
236
+ target=target,
237
+ dataset_id=dataset_id,
238
+ dataset_name=dataset_name,
239
+ dry_run=dry_run,
240
+ replace=replace,
241
+ settings=settings,
242
+ )
243
+ )
244
+
245
+
246
+ async def abootstrap(
247
+ *,
248
+ target: str | None = None,
249
+ include_fragments: bool = True,
250
+ settings: Settings | None = None,
251
+ ) -> None:
252
+ """Async equivalent of :func:`bootstrap`."""
253
+ resolved_settings, _ = _resolve_settings(target, settings)
254
+ driver = _driver(resolved_settings)
255
+ try:
256
+ await _ensure_schema(
257
+ driver,
258
+ database=resolved_settings.neo4j_database,
259
+ include_fragments=include_fragments,
260
+ )
261
+ finally:
262
+ await driver.close()
263
+
264
+
265
+ def bootstrap(
266
+ *,
267
+ target: str | None = None,
268
+ include_fragments: bool = True,
269
+ settings: Settings | None = None,
270
+ ) -> None:
271
+ """Create the indexes and constraints DDI ingestion needs.
272
+
273
+ Args:
274
+ target: Neo4j URL. Defaults to env-driven settings.
275
+ include_fragments: When True, also create DDI-L Lifecycle
276
+ constraints alongside the Codebook ones.
277
+ settings: Optional pre-built ``Settings`` instance.
278
+ """
279
+ asyncio.run(
280
+ abootstrap(
281
+ target=target,
282
+ include_fragments=include_fragments,
283
+ settings=settings,
284
+ )
285
+ )
286
+
287
+
288
+ __all__ = [
289
+ "FlavorName",
290
+ "LoadResult",
291
+ "abootstrap",
292
+ "aload",
293
+ "bootstrap",
294
+ "detect",
295
+ "load",
296
+ ]