artifact-parser 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. artifact_parser-1.0.0/.gitignore +218 -0
  2. artifact_parser-1.0.0/LICENSE +21 -0
  3. artifact_parser-1.0.0/PKG-INFO +171 -0
  4. artifact_parser-1.0.0/README.md +145 -0
  5. artifact_parser-1.0.0/pyproject.toml +106 -0
  6. artifact_parser-1.0.0/src/artifact_parser/__init__.py +53 -0
  7. artifact_parser-1.0.0/src/artifact_parser/core/__init__.py +19 -0
  8. artifact_parser-1.0.0/src/artifact_parser/core/base.py +13 -0
  9. artifact_parser-1.0.0/src/artifact_parser/core/exceptions.py +13 -0
  10. artifact_parser-1.0.0/src/artifact_parser/core/parser.py +28 -0
  11. artifact_parser-1.0.0/src/artifact_parser/core/registry.py +72 -0
  12. artifact_parser-1.0.0/src/artifact_parser/dbt/__init__.py +31 -0
  13. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/__init__.py +9 -0
  14. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/__init__.py +1 -0
  15. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/catalog/__init__.py +1 -0
  16. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/catalog/catalog_v1.py +86 -0
  17. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/__init__.py +1 -0
  18. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v1.py +1487 -0
  19. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v10.py +1601 -0
  20. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v11.py +3717 -0
  21. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v12.py +5024 -0
  22. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v2.py +1492 -0
  23. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v3.py +1504 -0
  24. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v4.py +1685 -0
  25. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v5.py +1700 -0
  26. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v6.py +1736 -0
  27. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v7.py +1844 -0
  28. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v8.py +1210 -0
  29. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/manifest/manifest_v9.py +1361 -0
  30. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/__init__.py +1 -0
  31. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v1.py +74 -0
  32. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v2.py +75 -0
  33. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v3.py +146 -0
  34. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v4.py +150 -0
  35. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v5.py +79 -0
  36. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/run_results/run_results_v6.py +91 -0
  37. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/sources/__init__.py +1 -0
  38. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/sources/sources_v1.py +87 -0
  39. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/sources/sources_v2.py +99 -0
  40. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/models/sources/sources_v3.py +108 -0
  41. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/parser.py +239 -0
  42. artifact_parser-1.0.0/src/artifact_parser/dbt/generated/version_map.py +112 -0
  43. artifact_parser-1.0.0/src/artifact_parser/dbt/plugin.py +37 -0
  44. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/catalog/catalog_v1.json +435 -0
  45. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v1.json +5073 -0
  46. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v10.json +5692 -0
  47. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v11.json +19837 -0
  48. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v12.json +27316 -0
  49. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v2.json +5127 -0
  50. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v3.json +5225 -0
  51. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v4.json +5939 -0
  52. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v5.json +5984 -0
  53. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v6.json +6209 -0
  54. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v7.json +6569 -0
  55. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v8.json +4434 -0
  56. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/manifest/manifest_v9.json +4965 -0
  57. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v1.json +182 -0
  58. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v2.json +189 -0
  59. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v3.json +381 -0
  60. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v4.json +400 -0
  61. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v5.json +216 -0
  62. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/run-results/run-results_v6.json +275 -0
  63. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/sources/sources_v1.json +211 -0
  64. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/sources/sources_v2.json +261 -0
  65. artifact_parser-1.0.0/src/artifact_parser/dbt/resources/sources/sources_v3.json +290 -0
  66. artifact_parser-1.0.0/src/artifact_parser/dbt/utils.py +39 -0
  67. artifact_parser-1.0.0/src/artifact_parser/py.typed +0 -0
  68. artifact_parser-1.0.0/src/codegen/__init__.py +10 -0
  69. artifact_parser-1.0.0/src/codegen/__main__.py +6 -0
  70. artifact_parser-1.0.0/src/codegen/cli.py +93 -0
  71. artifact_parser-1.0.0/src/codegen/dbt/__init__.py +6 -0
  72. artifact_parser-1.0.0/src/codegen/dbt/artifact_spec.py +55 -0
  73. artifact_parser-1.0.0/src/codegen/dbt/generator.py +330 -0
  74. artifact_parser-1.0.0/src/codegen/dbt/paths.py +26 -0
  75. artifact_parser-1.0.0/src/codegen/dbt/templates/generated_init.py.jinja +9 -0
  76. artifact_parser-1.0.0/src/codegen/dbt/templates/parser.py.jinja +72 -0
  77. artifact_parser-1.0.0/src/codegen/dbt/templates/version_map.py.jinja +31 -0
  78. artifact_parser-1.0.0/src/codegen/dbt/templates/versions.py.jinja +12 -0
  79. artifact_parser-1.0.0/src/codegen/dbt/versions.py +37 -0
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py.cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+ cover/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+ db.sqlite3-journal
62
+
63
+ # Flask stuff:
64
+ instance/
65
+ .webassets-cache
66
+
67
+ # Scrapy stuff:
68
+ .scrapy
69
+
70
+ # Sphinx documentation
71
+ docs/_build/
72
+
73
+ # PyBuilder
74
+ .pybuilder/
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ # For a library or package, you might want to ignore these files since the code is
86
+ # intended to run in multiple environments; otherwise, check them in:
87
+ # .python-version
88
+
89
+ # pipenv
90
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
92
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
93
+ # install all needed dependencies.
94
+ # Pipfile.lock
95
+
96
+ # UV
97
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
98
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
99
+ # commonly ignored for libraries.
100
+ # uv.lock
101
+
102
+ # poetry
103
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107
+ # poetry.lock
108
+ # poetry.toml
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
113
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
114
+ # pdm.lock
115
+ # pdm.toml
116
+ .pdm-python
117
+ .pdm-build/
118
+
119
+ # pixi
120
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
121
+ # pixi.lock
122
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
123
+ # in the .venv directory. It is recommended not to include this directory in version control.
124
+ .pixi
125
+
126
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
127
+ __pypackages__/
128
+
129
+ # Celery stuff
130
+ celerybeat-schedule
131
+ celerybeat.pid
132
+
133
+ # Redis
134
+ *.rdb
135
+ *.aof
136
+ *.pid
137
+
138
+ # RabbitMQ
139
+ mnesia/
140
+ rabbitmq/
141
+ rabbitmq-data/
142
+
143
+ # ActiveMQ
144
+ activemq-data/
145
+
146
+ # SageMath parsed files
147
+ *.sage.py
148
+
149
+ # Environments
150
+ .env
151
+ .envrc
152
+ .venv
153
+ env/
154
+ venv/
155
+ ENV/
156
+ env.bak/
157
+ venv.bak/
158
+
159
+ # Spyder project settings
160
+ .spyderproject
161
+ .spyproject
162
+
163
+ # Rope project settings
164
+ .ropeproject
165
+
166
+ # mkdocs documentation
167
+ /site
168
+
169
+ # mypy
170
+ .mypy_cache/
171
+ .dmypy.json
172
+ dmypy.json
173
+
174
+ # Pyre type checker
175
+ .pyre/
176
+
177
+ # pytype static type analyzer
178
+ .pytype/
179
+
180
+ # Cython debug symbols
181
+ cython_debug/
182
+
183
+ # PyCharm
184
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
185
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
186
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
187
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
188
+ # .idea/
189
+
190
+ # Abstra
191
+ # Abstra is an AI-powered process automation framework.
192
+ # Ignore directories containing user credentials, local state, and settings.
193
+ # Learn more at https://abstra.io/docs
194
+ .abstra/
195
+
196
+ # Visual Studio Code
197
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
198
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
199
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
200
+ # you could uncomment the following to ignore the entire vscode folder
201
+ # .vscode/
202
+ # Temporary file for partial code execution
203
+ tempCodeRunnerFile.py
204
+
205
+ # Ruff stuff:
206
+ .ruff_cache/
207
+
208
+ # PyPI configuration file
209
+ .pypirc
210
+
211
+ # Marimo
212
+ marimo/_static/
213
+ marimo/_lsp/
214
+ __marimo__/
215
+
216
+ # Streamlit
217
+ .streamlit/secrets.toml
218
+ .task/checksum/codegen-check
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dat Nguyen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,171 @@
1
+ Metadata-Version: 2.4
2
+ Name: artifact-parser
3
+ Version: 1.0.0
4
+ Summary: A pluggable framework for parsing data tool artifacts into typed Python models — dbt-core first.
5
+ Project-URL: Homepage, https://github.com/datnguye/artifact-parser
6
+ Project-URL: Repository, https://github.com/datnguye/artifact-parser
7
+ Author-email: Dat Nguyen <datnguyen.it09@gmail.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: artifacts,catalog,dbt,manifest,parser,pydantic
11
+ Classifier: Environment :: Console
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: pydantic>=2.0
20
+ Provides-Extra: codegen
21
+ Requires-Dist: datamodel-code-generator>=0.63.0; extra == 'codegen'
22
+ Requires-Dist: jinja2>=3.1; extra == 'codegen'
23
+ Requires-Dist: typer>=0.26.7; extra == 'codegen'
24
+ Provides-Extra: dbt
25
+ Description-Content-Type: text/markdown
26
+
27
+ # artifact-parser
28
+
29
+ A small, pluggable framework for turning the JSON artifacts that data tools spit
30
+ out into typed, validated Python objects. Point it at a blob, get back a pydantic
31
+ model — no manual key-spelunking, no guessing which schema version you're holding.
32
+
33
+ The framework is deliberately source-agnostic. Each **plugin** owns one family of
34
+ artifacts and registers itself with a shared registry. The first one ships in the
35
+ box: a full **dbt-core** parser (catalog, manifest, run-results, sources).
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ uv add artifact-parser # or: pip install artifact-parser
41
+ ```
42
+
43
+ ## Quick start
44
+
45
+ The headline entry point sniffs any supported artifact and routes it to the right
46
+ plugin — you don't have to know what you're holding:
47
+
48
+ ```python
49
+ import json
50
+ from artifact_parser import parse
51
+
52
+ artifact = json.loads(open("target/manifest.json").read())
53
+ model = parse(artifact) # -> a ManifestV12 (or whatever version it is)
54
+ print(model.metadata.dbt_schema_version)
55
+ ```
56
+
57
+ When you *do* know the artifact family, the dbt plugin's typed helpers are more
58
+ precise (and give better editor autocomplete):
59
+
60
+ ```python
61
+ from artifact_parser.dbt import parse_manifest, parse_catalog
62
+
63
+ manifest = parse_manifest(json.loads(open("target/manifest.json").read()))
64
+ catalog = parse_catalog(json.loads(open("target/catalog.json").read()))
65
+ ```
66
+
67
+ Hand it something it doesn't recognise and it tells you so, loudly, instead of
68
+ returning a half-populated object:
69
+
70
+ ```python
71
+ from artifact_parser import parse, UnknownArtifactError
72
+
73
+ try:
74
+ parse({"metadata": {"dbt_schema_version": "made-up/v99.json"}})
75
+ except UnknownArtifactError as exc:
76
+ print(exc) # No registered parser recognises this artifact. Tried: dbt.
77
+ ```
78
+
79
+ ## Supported dbt artifacts
80
+
81
+ | Artifact | Versions | Generic parser | Version-pinned parsers |
82
+ |---------------|----------|-----------------------|---------------------------------------|
83
+ | `catalog` | v1 | `parse_catalog` | `parse_catalog_v1` |
84
+ | `manifest` | v1–v12 | `parse_manifest` | `parse_manifest_v1` … `_v12` |
85
+ | `run-results` | v1–v6 | `parse_run_results` | `parse_run_results_v1` … `_v6` |
86
+ | `sources` | v1–v3 | `parse_sources` | `parse_sources_v1` … `_v3` |
87
+
88
+ ## Architecture
89
+
90
+ ```
91
+ src/artifact_parser/
92
+ ├── core/ # the framework — no knowledge of any specific tool
93
+ │ ├── base.py # BaseArtifactModel (shared pydantic root)
94
+ │ ├── parser.py # ArtifactParser (the plugin contract)
95
+ │ ├── registry.py # ParserRegistry + the shared `registry` instance
96
+ │ └── exceptions.py # ArtifactParserError + friends
97
+ └── dbt/ # the first plugin: dbt-core artifacts
98
+ ├── plugin.py # DbtArtifactParser (implements ArtifactParser)
99
+ ├── utils.py # schema-version sniffing
100
+ ├── resources/ # committed dbt-core JSON schemas (codegen input)
101
+ └── generated/ # droppable, rebuilt by `codegen dbt`
102
+ ├── parser.py # parse_<artifact>[_vN] public API
103
+ ├── version_map.py# schema-version URL -> model class
104
+ └── models/ # typed pydantic models, one module per version
105
+ ```
106
+
107
+ The generated code is walled off in `generated/`. You can `rm -rf` that whole
108
+ directory and rebuild it with `codegen dbt` (the package still imports while it's
109
+ gone — the dbt plugin just sits out until you regenerate).
110
+
111
+ The flow: a plugin answers *"is this mine?"* (`can_parse`) and *"make it typed"*
112
+ (`parse`). The registry tries plugins in registration order and returns the first
113
+ match. dbt registers itself on import, so `parse(...)` works out of the box.
114
+
115
+ ## Adding a new parser
116
+
117
+ The whole point of the `core/` framework is that the second parser is cheap.
118
+ By hand:
119
+
120
+ 1. Create `src/artifact_parser/<tool>/`.
121
+ 2. Define your models on `BaseArtifactModel`.
122
+ 3. Implement `ArtifactParser` (`name`, `can_parse`, `parse`) in `plugin.py`.
123
+ 4. Register it in the package `__init__.py`: `registry.register(MyParser())`.
124
+ 5. Import your plugin from the top-level `artifact_parser/__init__.py`.
125
+
126
+ That's it — `parse()` now routes matching artifacts to your plugin.
127
+
128
+ ## Development
129
+
130
+ This project uses [uv](https://docs.astral.sh/uv/) and
131
+ [Task](https://taskfile.dev/). Common targets:
132
+
133
+ | Goal | Task |
134
+ |---------------------------------|----------------|
135
+ | Sync the environment | `task install` |
136
+ | Format + autofix | `task format` |
137
+ | Lint (format-check + ruff) | `task lint` |
138
+ | Run tests at 100% coverage | `task test` |
139
+
140
+ `task --list` shows everything. The test suite enforces **100% coverage** of the
141
+ framework and dbt dispatch code (the generated dbt models are excluded — they're
142
+ schema, not logic). Beyond the synthetic fixtures, real artifacts from a live dbt
143
+ build live in `tests/data/` and round-trip through the public `parse()` in
144
+ `tests/artifact_parser/dbt/test_roundtrip.py` — the only tests that exercise
145
+ populated nodes end to end.
146
+
147
+ One non-obvious rule the generator enforces: the generated models are relaxed to
148
+ pydantic `extra="ignore"` (not the `extra="forbid"` dbt's schemas imply), because
149
+ real artifacts carry fields the published schema omits. A strict model would
150
+ reject a perfectly good `manifest.json`. See `CLAUDE.md` for the why.
151
+
152
+ ### CI
153
+
154
+ GitHub Actions back the same gates:
155
+
156
+ | Workflow | What it does |
157
+ |---------------------|-----------------------------------------------------------------------------|
158
+ | `ci.yml` | Lint + 100%-coverage tests on Python 3.10–3.13, plus a **codegen-in-sync** job that fails if the committed `generated/` drifts from a fresh regen. |
159
+ | `schema-watch.yml` | Weekly (and on demand): probes dbt's published schemas, regenerates, and opens a PR if a new version appeared. |
160
+ | `release.yml` | Build + coverage gate, then PyPI Trusted Publishing on a published Release (or TestPyPI via manual dispatch). |
161
+
162
+ Action versions and Python deps are kept current by Dependabot.
163
+
164
+ ## Agentic setup
165
+
166
+ This repo is wired for [Claude Code](https://claude.com/claude-code): a project
167
+ `CLAUDE.md`, a `parser-author` subagent that owns `src/`, slash commands
168
+ (`/test`, `/codegen`), secret-blocking and post-edit lint hooks, and the
169
+ [context7](https://github.com/upstash/context7) MCP for pulling fresh library
170
+ docs. See `CLAUDE.md` for the full tour. It will not write your code for you, but
171
+ it tries hard to keep you from shipping a failing coverage gate.
@@ -0,0 +1,145 @@
1
+ # artifact-parser
2
+
3
+ A small, pluggable framework for turning the JSON artifacts that data tools spit
4
+ out into typed, validated Python objects. Point it at a blob, get back a pydantic
5
+ model — no manual key-spelunking, no guessing which schema version you're holding.
6
+
7
+ The framework is deliberately source-agnostic. Each **plugin** owns one family of
8
+ artifacts and registers itself with a shared registry. The first one ships in the
9
+ box: a full **dbt-core** parser (catalog, manifest, run-results, sources).
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ uv add artifact-parser # or: pip install artifact-parser
15
+ ```
16
+
17
+ ## Quick start
18
+
19
+ The headline entry point sniffs any supported artifact and routes it to the right
20
+ plugin — you don't have to know what you're holding:
21
+
22
+ ```python
23
+ import json
24
+ from artifact_parser import parse
25
+
26
+ artifact = json.loads(open("target/manifest.json").read())
27
+ model = parse(artifact) # -> a ManifestV12 (or whatever version it is)
28
+ print(model.metadata.dbt_schema_version)
29
+ ```
30
+
31
+ When you *do* know the artifact family, the dbt plugin's typed helpers are more
32
+ precise (and give better editor autocomplete):
33
+
34
+ ```python
35
+ from artifact_parser.dbt import parse_manifest, parse_catalog
36
+
37
+ manifest = parse_manifest(json.loads(open("target/manifest.json").read()))
38
+ catalog = parse_catalog(json.loads(open("target/catalog.json").read()))
39
+ ```
40
+
41
+ Hand it something it doesn't recognise and it tells you so, loudly, instead of
42
+ returning a half-populated object:
43
+
44
+ ```python
45
+ from artifact_parser import parse, UnknownArtifactError
46
+
47
+ try:
48
+ parse({"metadata": {"dbt_schema_version": "made-up/v99.json"}})
49
+ except UnknownArtifactError as exc:
50
+ print(exc) # No registered parser recognises this artifact. Tried: dbt.
51
+ ```
52
+
53
+ ## Supported dbt artifacts
54
+
55
+ | Artifact | Versions | Generic parser | Version-pinned parsers |
56
+ |---------------|----------|-----------------------|---------------------------------------|
57
+ | `catalog` | v1 | `parse_catalog` | `parse_catalog_v1` |
58
+ | `manifest` | v1–v12 | `parse_manifest` | `parse_manifest_v1` … `_v12` |
59
+ | `run-results` | v1–v6 | `parse_run_results` | `parse_run_results_v1` … `_v6` |
60
+ | `sources` | v1–v3 | `parse_sources` | `parse_sources_v1` … `_v3` |
61
+
62
+ ## Architecture
63
+
64
+ ```
65
+ src/artifact_parser/
66
+ ├── core/ # the framework — no knowledge of any specific tool
67
+ │ ├── base.py # BaseArtifactModel (shared pydantic root)
68
+ │ ├── parser.py # ArtifactParser (the plugin contract)
69
+ │ ├── registry.py # ParserRegistry + the shared `registry` instance
70
+ │ └── exceptions.py # ArtifactParserError + friends
71
+ └── dbt/ # the first plugin: dbt-core artifacts
72
+ ├── plugin.py # DbtArtifactParser (implements ArtifactParser)
73
+ ├── utils.py # schema-version sniffing
74
+ ├── resources/ # committed dbt-core JSON schemas (codegen input)
75
+ └── generated/ # droppable, rebuilt by `codegen dbt`
76
+ ├── parser.py # parse_<artifact>[_vN] public API
77
+ ├── version_map.py# schema-version URL -> model class
78
+ └── models/ # typed pydantic models, one module per version
79
+ ```
80
+
81
+ The generated code is walled off in `generated/`. You can `rm -rf` that whole
82
+ directory and rebuild it with `codegen dbt` (the package still imports while it's
83
+ gone — the dbt plugin just sits out until you regenerate).
84
+
85
+ The flow: a plugin answers *"is this mine?"* (`can_parse`) and *"make it typed"*
86
+ (`parse`). The registry tries plugins in registration order and returns the first
87
+ match. dbt registers itself on import, so `parse(...)` works out of the box.
88
+
89
+ ## Adding a new parser
90
+
91
+ The whole point of the `core/` framework is that the second parser is cheap.
92
+ By hand:
93
+
94
+ 1. Create `src/artifact_parser/<tool>/`.
95
+ 2. Define your models on `BaseArtifactModel`.
96
+ 3. Implement `ArtifactParser` (`name`, `can_parse`, `parse`) in `plugin.py`.
97
+ 4. Register it in the package `__init__.py`: `registry.register(MyParser())`.
98
+ 5. Import your plugin from the top-level `artifact_parser/__init__.py`.
99
+
100
+ That's it — `parse()` now routes matching artifacts to your plugin.
101
+
102
+ ## Development
103
+
104
+ This project uses [uv](https://docs.astral.sh/uv/) and
105
+ [Task](https://taskfile.dev/). Common targets:
106
+
107
+ | Goal | Task |
108
+ |---------------------------------|----------------|
109
+ | Sync the environment | `task install` |
110
+ | Format + autofix | `task format` |
111
+ | Lint (format-check + ruff) | `task lint` |
112
+ | Run tests at 100% coverage | `task test` |
113
+
114
+ `task --list` shows everything. The test suite enforces **100% coverage** of the
115
+ framework and dbt dispatch code (the generated dbt models are excluded — they're
116
+ schema, not logic). Beyond the synthetic fixtures, real artifacts from a live dbt
117
+ build live in `tests/data/` and round-trip through the public `parse()` in
118
+ `tests/artifact_parser/dbt/test_roundtrip.py` — the only tests that exercise
119
+ populated nodes end to end.
120
+
121
+ One non-obvious rule the generator enforces: the generated models are relaxed to
122
+ pydantic `extra="ignore"` (not the `extra="forbid"` dbt's schemas imply), because
123
+ real artifacts carry fields the published schema omits. A strict model would
124
+ reject a perfectly good `manifest.json`. See `CLAUDE.md` for the why.
125
+
126
+ ### CI
127
+
128
+ GitHub Actions back the same gates:
129
+
130
+ | Workflow | What it does |
131
+ |---------------------|-----------------------------------------------------------------------------|
132
+ | `ci.yml` | Lint + 100%-coverage tests on Python 3.10–3.13, plus a **codegen-in-sync** job that fails if the committed `generated/` drifts from a fresh regen. |
133
+ | `schema-watch.yml` | Weekly (and on demand): probes dbt's published schemas, regenerates, and opens a PR if a new version appeared. |
134
+ | `release.yml` | Build + coverage gate, then PyPI Trusted Publishing on a published Release (or TestPyPI via manual dispatch). |
135
+
136
+ Action versions and Python deps are kept current by Dependabot.
137
+
138
+ ## Agentic setup
139
+
140
+ This repo is wired for [Claude Code](https://claude.com/claude-code): a project
141
+ `CLAUDE.md`, a `parser-author` subagent that owns `src/`, slash commands
142
+ (`/test`, `/codegen`), secret-blocking and post-edit lint hooks, and the
143
+ [context7](https://github.com/upstash/context7) MCP for pulling fresh library
144
+ docs. See `CLAUDE.md` for the full tour. It will not write your code for you, but
145
+ it tries hard to keep you from shipping a failing coverage gate.
@@ -0,0 +1,106 @@
1
+ [project]
2
+ name = "artifact-parser"
3
+ dynamic = ["version"]
4
+ description = "A pluggable framework for parsing data tool artifacts into typed Python models — dbt-core first."
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ requires-python = ">=3.10"
8
+ authors = [{ name = "Dat Nguyen", email = "datnguyen.it09@gmail.com" }]
9
+ keywords = ["dbt", "artifacts", "manifest", "catalog", "parser", "pydantic"]
10
+ classifiers = [
11
+ "Environment :: Console",
12
+ "Operating System :: OS Independent",
13
+ "Programming Language :: Python :: 3.10",
14
+ "Programming Language :: Python :: 3.11",
15
+ "Programming Language :: Python :: 3.12",
16
+ "Programming Language :: Python :: 3.13",
17
+ "Topic :: Software Development :: Libraries :: Python Modules",
18
+ ]
19
+ dependencies = [
20
+ "pydantic>=2.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dbt = []
25
+ codegen = [
26
+ "datamodel-code-generator>=0.63.0",
27
+ "jinja2>=3.1",
28
+ "typer>=0.26.7",
29
+ ]
30
+
31
+ [project.scripts]
32
+ codegen = "codegen.cli:main"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/datnguye/artifact-parser"
36
+ Repository = "https://github.com/datnguye/artifact-parser"
37
+
38
+ [build-system]
39
+ requires = ["hatchling", "hatch-vcs"]
40
+ build-backend = "hatchling.build"
41
+
42
+ [tool.hatch.version]
43
+ source = "vcs"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/artifact_parser", "src/codegen"]
47
+ artifacts = [
48
+ "src/artifact_parser/py.typed",
49
+ "src/artifact_parser/dbt/resources/**/*.json",
50
+ "src/codegen/dbt/templates/*.jinja",
51
+ ]
52
+
53
+ [tool.hatch.build.targets.sdist]
54
+ include = [
55
+ "src/artifact_parser",
56
+ "src/codegen",
57
+ "README.md",
58
+ "LICENSE",
59
+ ]
60
+
61
+ [dependency-groups]
62
+ dev = [
63
+ "artifact-parser[codegen,dbt]",
64
+ "pre-commit>=4.6.0",
65
+ "pytest>=8.0",
66
+ "pytest-cov>=5.0",
67
+ "ruff>=0.4",
68
+ ]
69
+
70
+ [tool.ruff]
71
+ line-length = 88
72
+ target-version = "py310"
73
+
74
+ [tool.ruff.lint]
75
+ select = ["E", "F", "I", "UP", "B", "SIM", "PLC0415"]
76
+
77
+ [tool.ruff.lint.isort]
78
+ force-single-line = true
79
+ known-first-party = ["artifact_parser", "codegen"]
80
+
81
+ [tool.ruff.lint.per-file-ignores]
82
+ "src/artifact_parser/dbt/generated/**/*.py" = ["E501", "I", "UP", "B", "SIM"]
83
+ "src/codegen/cli.py" = ["B008"]
84
+
85
+ [tool.pytest.ini_options]
86
+ testpaths = ["tests"]
87
+ addopts = "-ra --strict-markers --strict-config"
88
+
89
+ [tool.coverage.run]
90
+ source = ["artifact_parser", "codegen"]
91
+ branch = true
92
+ omit = [
93
+ "src/artifact_parser/dbt/generated/*.py",
94
+ "src/artifact_parser/dbt/generated/models/*/*.py",
95
+ "src/codegen/__main__.py",
96
+ ]
97
+
98
+ [tool.coverage.report]
99
+ fail_under = 100
100
+ show_missing = true
101
+ exclude_also = [
102
+ "if __name__ == .__main__.:",
103
+ "if TYPE_CHECKING:",
104
+ "\\.\\.\\.",
105
+ "pragma: no cover",
106
+ ]