artifact-parser 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- artifact_parser/__init__.py +53 -0
- artifact_parser/core/__init__.py +19 -0
- artifact_parser/core/base.py +13 -0
- artifact_parser/core/exceptions.py +13 -0
- artifact_parser/core/parser.py +28 -0
- artifact_parser/core/registry.py +72 -0
- artifact_parser/dbt/__init__.py +31 -0
- artifact_parser/dbt/generated/__init__.py +9 -0
- artifact_parser/dbt/generated/models/__init__.py +1 -0
- artifact_parser/dbt/generated/models/catalog/__init__.py +1 -0
- artifact_parser/dbt/generated/models/catalog/catalog_v1.py +86 -0
- artifact_parser/dbt/generated/models/manifest/__init__.py +1 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v1.py +1487 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v10.py +1601 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v11.py +3717 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v12.py +5024 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v2.py +1492 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v3.py +1504 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v4.py +1685 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v5.py +1700 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v6.py +1736 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v7.py +1844 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v8.py +1210 -0
- artifact_parser/dbt/generated/models/manifest/manifest_v9.py +1361 -0
- artifact_parser/dbt/generated/models/run_results/__init__.py +1 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v1.py +74 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v2.py +75 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v3.py +146 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v4.py +150 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v5.py +79 -0
- artifact_parser/dbt/generated/models/run_results/run_results_v6.py +91 -0
- artifact_parser/dbt/generated/models/sources/__init__.py +1 -0
- artifact_parser/dbt/generated/models/sources/sources_v1.py +87 -0
- artifact_parser/dbt/generated/models/sources/sources_v2.py +99 -0
- artifact_parser/dbt/generated/models/sources/sources_v3.py +108 -0
- artifact_parser/dbt/generated/parser.py +239 -0
- artifact_parser/dbt/generated/version_map.py +112 -0
- artifact_parser/dbt/plugin.py +37 -0
- artifact_parser/dbt/resources/catalog/catalog_v1.json +435 -0
- artifact_parser/dbt/resources/manifest/manifest_v1.json +5073 -0
- artifact_parser/dbt/resources/manifest/manifest_v10.json +5692 -0
- artifact_parser/dbt/resources/manifest/manifest_v11.json +19837 -0
- artifact_parser/dbt/resources/manifest/manifest_v12.json +27316 -0
- artifact_parser/dbt/resources/manifest/manifest_v2.json +5127 -0
- artifact_parser/dbt/resources/manifest/manifest_v3.json +5225 -0
- artifact_parser/dbt/resources/manifest/manifest_v4.json +5939 -0
- artifact_parser/dbt/resources/manifest/manifest_v5.json +5984 -0
- artifact_parser/dbt/resources/manifest/manifest_v6.json +6209 -0
- artifact_parser/dbt/resources/manifest/manifest_v7.json +6569 -0
- artifact_parser/dbt/resources/manifest/manifest_v8.json +4434 -0
- artifact_parser/dbt/resources/manifest/manifest_v9.json +4965 -0
- artifact_parser/dbt/resources/run-results/run-results_v1.json +182 -0
- artifact_parser/dbt/resources/run-results/run-results_v2.json +189 -0
- artifact_parser/dbt/resources/run-results/run-results_v3.json +381 -0
- artifact_parser/dbt/resources/run-results/run-results_v4.json +400 -0
- artifact_parser/dbt/resources/run-results/run-results_v5.json +216 -0
- artifact_parser/dbt/resources/run-results/run-results_v6.json +275 -0
- artifact_parser/dbt/resources/sources/sources_v1.json +211 -0
- artifact_parser/dbt/resources/sources/sources_v2.json +261 -0
- artifact_parser/dbt/resources/sources/sources_v3.json +290 -0
- artifact_parser/dbt/utils.py +39 -0
- artifact_parser/py.typed +0 -0
- artifact_parser-1.0.0.dist-info/METADATA +171 -0
- artifact_parser-1.0.0.dist-info/RECORD +79 -0
- artifact_parser-1.0.0.dist-info/WHEEL +4 -0
- artifact_parser-1.0.0.dist-info/entry_points.txt +2 -0
- artifact_parser-1.0.0.dist-info/licenses/LICENSE +21 -0
- codegen/__init__.py +10 -0
- codegen/__main__.py +6 -0
- codegen/cli.py +93 -0
- codegen/dbt/__init__.py +6 -0
- codegen/dbt/artifact_spec.py +55 -0
- codegen/dbt/generator.py +330 -0
- codegen/dbt/paths.py +26 -0
- codegen/dbt/templates/generated_init.py.jinja +9 -0
- codegen/dbt/templates/parser.py.jinja +72 -0
- codegen/dbt/templates/version_map.py.jinja +31 -0
- codegen/dbt/templates/versions.py.jinja +12 -0
- codegen/dbt/versions.py +37 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""artifact-parser — a pluggable framework for parsing data tool artifacts.
|
|
2
|
+
|
|
3
|
+
The framework is source-agnostic: each plugin owns one family of artifacts and
|
|
4
|
+
registers itself with the shared :data:`~artifact_parser.core.registry.registry`.
|
|
5
|
+
The first (and currently only) plugin parses dbt-core artifacts.
|
|
6
|
+
|
|
7
|
+
The headline entry point is :func:`parse`, which sniffs any supported artifact
|
|
8
|
+
and routes it to the right plugin::
|
|
9
|
+
|
|
10
|
+
from artifact_parser import parse
|
|
11
|
+
model = parse(json.loads(manifest_path.read_text()))
|
|
12
|
+
|
|
13
|
+
For dbt-specific, version-pinned parsing, import from
|
|
14
|
+
:mod:`artifact_parser.dbt` directly.
|
|
15
|
+
|
|
16
|
+
The dbt plugin's generated code lives under ``artifact_parser/dbt/generated/``
|
|
17
|
+
and may be deleted and rebuilt with ``codegen dbt``. While it is absent the dbt
|
|
18
|
+
plugin simply does not register — the framework (and the codegen CLI it needs to
|
|
19
|
+
rebuild itself) still imports cleanly.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import importlib
|
|
23
|
+
import warnings
|
|
24
|
+
|
|
25
|
+
from artifact_parser.core import ArtifactParser
|
|
26
|
+
from artifact_parser.core import ArtifactParserError
|
|
27
|
+
from artifact_parser.core import BaseArtifactModel
|
|
28
|
+
from artifact_parser.core import ParserRegistrationError
|
|
29
|
+
from artifact_parser.core import ParserRegistry
|
|
30
|
+
from artifact_parser.core import UnknownArtifactError
|
|
31
|
+
from artifact_parser.core import registry
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
importlib.import_module("artifact_parser.dbt") # registers the dbt plugin
|
|
35
|
+
except ImportError: # pragma: no cover - only when dbt/generated/ is dropped
|
|
36
|
+
warnings.warn(
|
|
37
|
+
"artifact_parser.dbt is unavailable (generated code missing). "
|
|
38
|
+
"Run `codegen dbt` to rebuild it.",
|
|
39
|
+
stacklevel=2,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
parse = registry.parse
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"ArtifactParser",
|
|
46
|
+
"ArtifactParserError",
|
|
47
|
+
"BaseArtifactModel",
|
|
48
|
+
"ParserRegistrationError",
|
|
49
|
+
"ParserRegistry",
|
|
50
|
+
"UnknownArtifactError",
|
|
51
|
+
"parse",
|
|
52
|
+
"registry",
|
|
53
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Core parser framework: base model, plugin protocol, and the registry."""
|
|
2
|
+
|
|
3
|
+
from artifact_parser.core.base import BaseArtifactModel
|
|
4
|
+
from artifact_parser.core.exceptions import ArtifactParserError
|
|
5
|
+
from artifact_parser.core.exceptions import ParserRegistrationError
|
|
6
|
+
from artifact_parser.core.exceptions import UnknownArtifactError
|
|
7
|
+
from artifact_parser.core.parser import ArtifactParser
|
|
8
|
+
from artifact_parser.core.registry import ParserRegistry
|
|
9
|
+
from artifact_parser.core.registry import registry
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ArtifactParser",
|
|
13
|
+
"ArtifactParserError",
|
|
14
|
+
"BaseArtifactModel",
|
|
15
|
+
"ParserRegistrationError",
|
|
16
|
+
"ParserRegistry",
|
|
17
|
+
"UnknownArtifactError",
|
|
18
|
+
"registry",
|
|
19
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Shared base model for every parsed artifact, regardless of source."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseArtifactModel(BaseModel):
|
|
7
|
+
"""Base class for all artifact models across every parser plugin.
|
|
8
|
+
|
|
9
|
+
Each plugin (dbt-core today, others tomorrow) builds its typed models on
|
|
10
|
+
top of this so the framework has a single, predictable root type to reason
|
|
11
|
+
about. It is intentionally empty — it exists to be an anchor, not a place to
|
|
12
|
+
smuggle behaviour into.
|
|
13
|
+
"""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Exception types raised by the parser framework."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ArtifactParserError(Exception):
|
|
5
|
+
"""Base class for every error this package raises."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UnknownArtifactError(ArtifactParserError):
|
|
9
|
+
"""Raised when no registered parser recognises the given artifact."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ParserRegistrationError(ArtifactParserError):
|
|
13
|
+
"""Raised when a parser is registered under a name that is already taken."""
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""The :class:`ArtifactParser` protocol every plugin implements."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from abc import abstractmethod
|
|
5
|
+
|
|
6
|
+
from artifact_parser.core.base import BaseArtifactModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ArtifactParser(ABC):
|
|
10
|
+
"""Contract for a parser plugin (e.g. the dbt-core artifacts plugin).
|
|
11
|
+
|
|
12
|
+
A plugin owns one *family* of artifacts. It answers two questions: "is this
|
|
13
|
+
blob mine?" (:meth:`can_parse`) and "turn it into a typed model"
|
|
14
|
+
(:meth:`parse`). The :data:`~artifact_parser.core.registry.registry`
|
|
15
|
+
dispatches to the first plugin that claims the blob, so plugins should keep
|
|
16
|
+
:meth:`can_parse` cheap and specific.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
#: Stable, unique identifier for the plugin (e.g. ``"dbt"``).
|
|
20
|
+
name: str
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def can_parse(self, artifact: dict) -> bool:
|
|
24
|
+
"""Return ``True`` if this plugin recognises ``artifact``."""
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def parse(self, artifact: dict) -> BaseArtifactModel:
|
|
28
|
+
"""Parse ``artifact`` into a typed model, or raise on a mismatch."""
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""A small registry that routes an artifact to the plugin that owns it."""
|
|
2
|
+
|
|
3
|
+
from artifact_parser.core.base import BaseArtifactModel
|
|
4
|
+
from artifact_parser.core.exceptions import ParserRegistrationError
|
|
5
|
+
from artifact_parser.core.exceptions import UnknownArtifactError
|
|
6
|
+
from artifact_parser.core.parser import ArtifactParser
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ParserRegistry:
|
|
10
|
+
"""Holds the known parser plugins and dispatches artifacts to them.
|
|
11
|
+
|
|
12
|
+
Registration order is preserved, so :meth:`parse` tries plugins in the
|
|
13
|
+
order they were added and returns the first match. There is one module-level
|
|
14
|
+
instance (:data:`registry`); most callers never build their own.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
self._parsers: dict[str, ArtifactParser] = {}
|
|
19
|
+
|
|
20
|
+
def register(self, parser: ArtifactParser) -> None:
|
|
21
|
+
"""Add ``parser`` to the registry, keyed by its ``name``.
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
ParserRegistrationError: if the name is already taken.
|
|
25
|
+
"""
|
|
26
|
+
if parser.name in self._parsers:
|
|
27
|
+
raise ParserRegistrationError(
|
|
28
|
+
f"A parser named {parser.name!r} is already registered."
|
|
29
|
+
)
|
|
30
|
+
self._parsers[parser.name] = parser
|
|
31
|
+
|
|
32
|
+
def unregister(self, name: str) -> None:
|
|
33
|
+
"""Remove the parser registered under ``name``.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
UnknownArtifactError: if no parser is registered under ``name``.
|
|
37
|
+
"""
|
|
38
|
+
if name not in self._parsers:
|
|
39
|
+
raise UnknownArtifactError(f"No parser named {name!r} is registered.")
|
|
40
|
+
del self._parsers[name]
|
|
41
|
+
|
|
42
|
+
def get(self, name: str) -> ArtifactParser:
|
|
43
|
+
"""Return the parser registered under ``name``.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
UnknownArtifactError: if no parser is registered under ``name``.
|
|
47
|
+
"""
|
|
48
|
+
if name not in self._parsers:
|
|
49
|
+
raise UnknownArtifactError(f"No parser named {name!r} is registered.")
|
|
50
|
+
return self._parsers[name]
|
|
51
|
+
|
|
52
|
+
def names(self) -> list[str]:
|
|
53
|
+
"""Return the registered plugin names, in registration order."""
|
|
54
|
+
return list(self._parsers)
|
|
55
|
+
|
|
56
|
+
def parse(self, artifact: dict) -> BaseArtifactModel:
|
|
57
|
+
"""Parse ``artifact`` with the first plugin that claims it.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
UnknownArtifactError: if no registered plugin recognises it.
|
|
61
|
+
"""
|
|
62
|
+
for parser in self._parsers.values():
|
|
63
|
+
if parser.can_parse(artifact):
|
|
64
|
+
return parser.parse(artifact)
|
|
65
|
+
raise UnknownArtifactError(
|
|
66
|
+
"No registered parser recognises this artifact. "
|
|
67
|
+
f"Tried: {', '.join(self._parsers) or '(none registered)'}."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#: The process-wide registry. Plugins register themselves against this.
|
|
72
|
+
registry = ParserRegistry()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""dbt-core artifact parser — the first plugin of the framework.
|
|
2
|
+
|
|
3
|
+
Importing this package registers :class:`DbtArtifactParser` with the shared
|
|
4
|
+
:data:`~artifact_parser.core.registry.registry`, so ``registry.parse(blob)``
|
|
5
|
+
works as soon as ``artifact_parser`` is imported.
|
|
6
|
+
|
|
7
|
+
The version-dependent code (the typed models, the schema-version lookup table,
|
|
8
|
+
and the ``parse_*`` dispatch) is generated and lives under
|
|
9
|
+
:mod:`artifact_parser.dbt.generated` — a directory you can delete and rebuild
|
|
10
|
+
with ``codegen dbt``. The public ``parse_*`` names are re-exported here so that
|
|
11
|
+
import path never changes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from artifact_parser.core.registry import registry
|
|
15
|
+
from artifact_parser.dbt.generated.parser import parse_catalog
|
|
16
|
+
from artifact_parser.dbt.generated.parser import parse_manifest
|
|
17
|
+
from artifact_parser.dbt.generated.parser import parse_run_results
|
|
18
|
+
from artifact_parser.dbt.generated.parser import parse_sources
|
|
19
|
+
from artifact_parser.dbt.plugin import DbtArtifactParser
|
|
20
|
+
|
|
21
|
+
dbt_parser = DbtArtifactParser()
|
|
22
|
+
registry.register(dbt_parser)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"DbtArtifactParser",
|
|
26
|
+
"dbt_parser",
|
|
27
|
+
"parse_catalog",
|
|
28
|
+
"parse_manifest",
|
|
29
|
+
"parse_run_results",
|
|
30
|
+
"parse_sources",
|
|
31
|
+
]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Generated dbt code — DO NOT EDIT BY HAND.
|
|
2
|
+
|
|
3
|
+
Everything in this package (the typed models, ``version_map.py``, and
|
|
4
|
+
``parser.py``) is produced by ``codegen dbt`` from dbt-core's JSON schemas. The
|
|
5
|
+
whole directory is safe to delete and rebuild::
|
|
6
|
+
|
|
7
|
+
rm -rf src/artifact_parser/dbt/generated
|
|
8
|
+
codegen dbt --skip-download # or drop the flag to pull fresh schemas
|
|
9
|
+
"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Typed pydantic models for every supported dbt artifact schema version."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Generated dbt artifact models."""
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# generated by datamodel-codegen:
|
|
2
|
+
# filename: catalog_v1.json
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
from artifact_parser.core.base import BaseArtifactModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Metadata(BaseArtifactModel):
|
|
14
|
+
model_config = ConfigDict(
|
|
15
|
+
extra="ignore",
|
|
16
|
+
)
|
|
17
|
+
dbt_schema_version: str | None = None
|
|
18
|
+
dbt_version: str | None = "1.11.6"
|
|
19
|
+
generated_at: str | None = None
|
|
20
|
+
invocation_id: str | None = None
|
|
21
|
+
invocation_started_at: str | None = None
|
|
22
|
+
env: dict[str, str] | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Metadata1(BaseArtifactModel):
|
|
26
|
+
model_config = ConfigDict(
|
|
27
|
+
extra="ignore",
|
|
28
|
+
)
|
|
29
|
+
type: str
|
|
30
|
+
schema_: str = Field(..., alias="schema")
|
|
31
|
+
name: str
|
|
32
|
+
database: str | None = None
|
|
33
|
+
comment: str | None = None
|
|
34
|
+
owner: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Columns(BaseArtifactModel):
|
|
38
|
+
model_config = ConfigDict(
|
|
39
|
+
extra="ignore",
|
|
40
|
+
)
|
|
41
|
+
type: str
|
|
42
|
+
index: int
|
|
43
|
+
name: str
|
|
44
|
+
comment: str | None = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Stats(BaseArtifactModel):
|
|
48
|
+
model_config = ConfigDict(
|
|
49
|
+
extra="ignore",
|
|
50
|
+
)
|
|
51
|
+
id: str
|
|
52
|
+
label: str
|
|
53
|
+
value: bool | str | float | None
|
|
54
|
+
include: bool
|
|
55
|
+
description: str | None = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Nodes(BaseArtifactModel):
|
|
59
|
+
model_config = ConfigDict(
|
|
60
|
+
extra="ignore",
|
|
61
|
+
)
|
|
62
|
+
metadata: Metadata1 = Field(..., title="TableMetadata")
|
|
63
|
+
columns: dict[str, Columns]
|
|
64
|
+
stats: dict[str, Stats]
|
|
65
|
+
unique_id: str | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Sources(BaseArtifactModel):
|
|
69
|
+
model_config = ConfigDict(
|
|
70
|
+
extra="ignore",
|
|
71
|
+
)
|
|
72
|
+
metadata: Metadata1 = Field(..., title="TableMetadata")
|
|
73
|
+
columns: dict[str, Columns]
|
|
74
|
+
stats: dict[str, Stats]
|
|
75
|
+
unique_id: str | None = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class CatalogV1(BaseArtifactModel):
|
|
79
|
+
model_config = ConfigDict(
|
|
80
|
+
extra="ignore",
|
|
81
|
+
)
|
|
82
|
+
metadata: Metadata = Field(..., title="CatalogMetadata")
|
|
83
|
+
nodes: dict[str, Nodes]
|
|
84
|
+
sources: dict[str, Sources]
|
|
85
|
+
errors: list[str] | None = None
|
|
86
|
+
field_compile_results: Any = Field(None, alias="_compile_results")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Generated dbt artifact models."""
|