artifact-parser 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. artifact_parser/__init__.py +53 -0
  2. artifact_parser/core/__init__.py +19 -0
  3. artifact_parser/core/base.py +13 -0
  4. artifact_parser/core/exceptions.py +13 -0
  5. artifact_parser/core/parser.py +28 -0
  6. artifact_parser/core/registry.py +72 -0
  7. artifact_parser/dbt/__init__.py +31 -0
  8. artifact_parser/dbt/generated/__init__.py +9 -0
  9. artifact_parser/dbt/generated/models/__init__.py +1 -0
  10. artifact_parser/dbt/generated/models/catalog/__init__.py +1 -0
  11. artifact_parser/dbt/generated/models/catalog/catalog_v1.py +86 -0
  12. artifact_parser/dbt/generated/models/manifest/__init__.py +1 -0
  13. artifact_parser/dbt/generated/models/manifest/manifest_v1.py +1487 -0
  14. artifact_parser/dbt/generated/models/manifest/manifest_v10.py +1601 -0
  15. artifact_parser/dbt/generated/models/manifest/manifest_v11.py +3717 -0
  16. artifact_parser/dbt/generated/models/manifest/manifest_v12.py +5024 -0
  17. artifact_parser/dbt/generated/models/manifest/manifest_v2.py +1492 -0
  18. artifact_parser/dbt/generated/models/manifest/manifest_v3.py +1504 -0
  19. artifact_parser/dbt/generated/models/manifest/manifest_v4.py +1685 -0
  20. artifact_parser/dbt/generated/models/manifest/manifest_v5.py +1700 -0
  21. artifact_parser/dbt/generated/models/manifest/manifest_v6.py +1736 -0
  22. artifact_parser/dbt/generated/models/manifest/manifest_v7.py +1844 -0
  23. artifact_parser/dbt/generated/models/manifest/manifest_v8.py +1210 -0
  24. artifact_parser/dbt/generated/models/manifest/manifest_v9.py +1361 -0
  25. artifact_parser/dbt/generated/models/run_results/__init__.py +1 -0
  26. artifact_parser/dbt/generated/models/run_results/run_results_v1.py +74 -0
  27. artifact_parser/dbt/generated/models/run_results/run_results_v2.py +75 -0
  28. artifact_parser/dbt/generated/models/run_results/run_results_v3.py +146 -0
  29. artifact_parser/dbt/generated/models/run_results/run_results_v4.py +150 -0
  30. artifact_parser/dbt/generated/models/run_results/run_results_v5.py +79 -0
  31. artifact_parser/dbt/generated/models/run_results/run_results_v6.py +91 -0
  32. artifact_parser/dbt/generated/models/sources/__init__.py +1 -0
  33. artifact_parser/dbt/generated/models/sources/sources_v1.py +87 -0
  34. artifact_parser/dbt/generated/models/sources/sources_v2.py +99 -0
  35. artifact_parser/dbt/generated/models/sources/sources_v3.py +108 -0
  36. artifact_parser/dbt/generated/parser.py +239 -0
  37. artifact_parser/dbt/generated/version_map.py +112 -0
  38. artifact_parser/dbt/plugin.py +37 -0
  39. artifact_parser/dbt/resources/catalog/catalog_v1.json +435 -0
  40. artifact_parser/dbt/resources/manifest/manifest_v1.json +5073 -0
  41. artifact_parser/dbt/resources/manifest/manifest_v10.json +5692 -0
  42. artifact_parser/dbt/resources/manifest/manifest_v11.json +19837 -0
  43. artifact_parser/dbt/resources/manifest/manifest_v12.json +27316 -0
  44. artifact_parser/dbt/resources/manifest/manifest_v2.json +5127 -0
  45. artifact_parser/dbt/resources/manifest/manifest_v3.json +5225 -0
  46. artifact_parser/dbt/resources/manifest/manifest_v4.json +5939 -0
  47. artifact_parser/dbt/resources/manifest/manifest_v5.json +5984 -0
  48. artifact_parser/dbt/resources/manifest/manifest_v6.json +6209 -0
  49. artifact_parser/dbt/resources/manifest/manifest_v7.json +6569 -0
  50. artifact_parser/dbt/resources/manifest/manifest_v8.json +4434 -0
  51. artifact_parser/dbt/resources/manifest/manifest_v9.json +4965 -0
  52. artifact_parser/dbt/resources/run-results/run-results_v1.json +182 -0
  53. artifact_parser/dbt/resources/run-results/run-results_v2.json +189 -0
  54. artifact_parser/dbt/resources/run-results/run-results_v3.json +381 -0
  55. artifact_parser/dbt/resources/run-results/run-results_v4.json +400 -0
  56. artifact_parser/dbt/resources/run-results/run-results_v5.json +216 -0
  57. artifact_parser/dbt/resources/run-results/run-results_v6.json +275 -0
  58. artifact_parser/dbt/resources/sources/sources_v1.json +211 -0
  59. artifact_parser/dbt/resources/sources/sources_v2.json +261 -0
  60. artifact_parser/dbt/resources/sources/sources_v3.json +290 -0
  61. artifact_parser/dbt/utils.py +39 -0
  62. artifact_parser/py.typed +0 -0
  63. artifact_parser-1.0.0.dist-info/METADATA +171 -0
  64. artifact_parser-1.0.0.dist-info/RECORD +79 -0
  65. artifact_parser-1.0.0.dist-info/WHEEL +4 -0
  66. artifact_parser-1.0.0.dist-info/entry_points.txt +2 -0
  67. artifact_parser-1.0.0.dist-info/licenses/LICENSE +21 -0
  68. codegen/__init__.py +10 -0
  69. codegen/__main__.py +6 -0
  70. codegen/cli.py +93 -0
  71. codegen/dbt/__init__.py +6 -0
  72. codegen/dbt/artifact_spec.py +55 -0
  73. codegen/dbt/generator.py +330 -0
  74. codegen/dbt/paths.py +26 -0
  75. codegen/dbt/templates/generated_init.py.jinja +9 -0
  76. codegen/dbt/templates/parser.py.jinja +72 -0
  77. codegen/dbt/templates/version_map.py.jinja +31 -0
  78. codegen/dbt/templates/versions.py.jinja +12 -0
  79. codegen/dbt/versions.py +37 -0
@@ -0,0 +1,53 @@
1
+ """artifact-parser — a pluggable framework for parsing data tool artifacts.
2
+
3
+ The framework is source-agnostic: each plugin owns one family of artifacts and
4
+ registers itself with the shared :data:`~artifact_parser.core.registry.registry`.
5
+ The first (and currently only) plugin parses dbt-core artifacts.
6
+
7
+ The headline entry point is :func:`parse`, which sniffs any supported artifact
8
+ and routes it to the right plugin::
9
+
10
+ from artifact_parser import parse
11
+ model = parse(json.loads(manifest_path.read_text()))
12
+
13
+ For dbt-specific, version-pinned parsing, import from
14
+ :mod:`artifact_parser.dbt` directly.
15
+
16
+ The dbt plugin's generated code lives under ``artifact_parser/dbt/generated/``
17
+ and may be deleted and rebuilt with ``codegen dbt``. While it is absent the dbt
18
+ plugin simply does not register — the framework (and the codegen CLI it needs to
19
+ rebuild itself) still imports cleanly.
20
+ """
21
+
22
+ import importlib
23
+ import warnings
24
+
25
+ from artifact_parser.core import ArtifactParser
26
+ from artifact_parser.core import ArtifactParserError
27
+ from artifact_parser.core import BaseArtifactModel
28
+ from artifact_parser.core import ParserRegistrationError
29
+ from artifact_parser.core import ParserRegistry
30
+ from artifact_parser.core import UnknownArtifactError
31
+ from artifact_parser.core import registry
32
+
33
+ try:
34
+ importlib.import_module("artifact_parser.dbt") # registers the dbt plugin
35
+ except ImportError: # pragma: no cover - only when dbt/generated/ is dropped
36
+ warnings.warn(
37
+ "artifact_parser.dbt is unavailable (generated code missing). "
38
+ "Run `codegen dbt` to rebuild it.",
39
+ stacklevel=2,
40
+ )
41
+
42
+ parse = registry.parse
43
+
44
+ __all__ = [
45
+ "ArtifactParser",
46
+ "ArtifactParserError",
47
+ "BaseArtifactModel",
48
+ "ParserRegistrationError",
49
+ "ParserRegistry",
50
+ "UnknownArtifactError",
51
+ "parse",
52
+ "registry",
53
+ ]
@@ -0,0 +1,19 @@
1
+ """Core parser framework: base model, plugin protocol, and the registry."""
2
+
3
+ from artifact_parser.core.base import BaseArtifactModel
4
+ from artifact_parser.core.exceptions import ArtifactParserError
5
+ from artifact_parser.core.exceptions import ParserRegistrationError
6
+ from artifact_parser.core.exceptions import UnknownArtifactError
7
+ from artifact_parser.core.parser import ArtifactParser
8
+ from artifact_parser.core.registry import ParserRegistry
9
+ from artifact_parser.core.registry import registry
10
+
11
+ __all__ = [
12
+ "ArtifactParser",
13
+ "ArtifactParserError",
14
+ "BaseArtifactModel",
15
+ "ParserRegistrationError",
16
+ "ParserRegistry",
17
+ "UnknownArtifactError",
18
+ "registry",
19
+ ]
@@ -0,0 +1,13 @@
1
+ """Shared base model for every parsed artifact, regardless of source."""
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class BaseArtifactModel(BaseModel):
7
+ """Base class for all artifact models across every parser plugin.
8
+
9
+ Each plugin (dbt-core today, others tomorrow) builds its typed models on
10
+ top of this so the framework has a single, predictable root type to reason
11
+ about. It is intentionally empty — it exists to be an anchor, not a place to
12
+ smuggle behaviour into.
13
+ """
@@ -0,0 +1,13 @@
1
+ """Exception types raised by the parser framework."""
2
+
3
+
4
+ class ArtifactParserError(Exception):
5
+ """Base class for every error this package raises."""
6
+
7
+
8
+ class UnknownArtifactError(ArtifactParserError):
9
+ """Raised when no registered parser recognises the given artifact."""
10
+
11
+
12
+ class ParserRegistrationError(ArtifactParserError):
13
+ """Raised when a parser is registered under a name that is already taken."""
@@ -0,0 +1,28 @@
1
+ """The :class:`ArtifactParser` protocol every plugin implements."""
2
+
3
+ from abc import ABC
4
+ from abc import abstractmethod
5
+
6
+ from artifact_parser.core.base import BaseArtifactModel
7
+
8
+
9
+ class ArtifactParser(ABC):
10
+ """Contract for a parser plugin (e.g. the dbt-core artifacts plugin).
11
+
12
+ A plugin owns one *family* of artifacts. It answers two questions: "is this
13
+ blob mine?" (:meth:`can_parse`) and "turn it into a typed model"
14
+ (:meth:`parse`). The :data:`~artifact_parser.core.registry.registry`
15
+ dispatches to the first plugin that claims the blob, so plugins should keep
16
+ :meth:`can_parse` cheap and specific.
17
+ """
18
+
19
+ #: Stable, unique identifier for the plugin (e.g. ``"dbt"``).
20
+ name: str
21
+
22
+ @abstractmethod
23
+ def can_parse(self, artifact: dict) -> bool:
24
+ """Return ``True`` if this plugin recognises ``artifact``."""
25
+
26
+ @abstractmethod
27
+ def parse(self, artifact: dict) -> BaseArtifactModel:
28
+ """Parse ``artifact`` into a typed model, or raise on a mismatch."""
@@ -0,0 +1,72 @@
1
+ """A small registry that routes an artifact to the plugin that owns it."""
2
+
3
+ from artifact_parser.core.base import BaseArtifactModel
4
+ from artifact_parser.core.exceptions import ParserRegistrationError
5
+ from artifact_parser.core.exceptions import UnknownArtifactError
6
+ from artifact_parser.core.parser import ArtifactParser
7
+
8
+
9
+ class ParserRegistry:
10
+ """Holds the known parser plugins and dispatches artifacts to them.
11
+
12
+ Registration order is preserved, so :meth:`parse` tries plugins in the
13
+ order they were added and returns the first match. There is one module-level
14
+ instance (:data:`registry`); most callers never build their own.
15
+ """
16
+
17
+ def __init__(self) -> None:
18
+ self._parsers: dict[str, ArtifactParser] = {}
19
+
20
+ def register(self, parser: ArtifactParser) -> None:
21
+ """Add ``parser`` to the registry, keyed by its ``name``.
22
+
23
+ Raises:
24
+ ParserRegistrationError: if the name is already taken.
25
+ """
26
+ if parser.name in self._parsers:
27
+ raise ParserRegistrationError(
28
+ f"A parser named {parser.name!r} is already registered."
29
+ )
30
+ self._parsers[parser.name] = parser
31
+
32
+ def unregister(self, name: str) -> None:
33
+ """Remove the parser registered under ``name``.
34
+
35
+ Raises:
36
+ UnknownArtifactError: if no parser is registered under ``name``.
37
+ """
38
+ if name not in self._parsers:
39
+ raise UnknownArtifactError(f"No parser named {name!r} is registered.")
40
+ del self._parsers[name]
41
+
42
+ def get(self, name: str) -> ArtifactParser:
43
+ """Return the parser registered under ``name``.
44
+
45
+ Raises:
46
+ UnknownArtifactError: if no parser is registered under ``name``.
47
+ """
48
+ if name not in self._parsers:
49
+ raise UnknownArtifactError(f"No parser named {name!r} is registered.")
50
+ return self._parsers[name]
51
+
52
+ def names(self) -> list[str]:
53
+ """Return the registered plugin names, in registration order."""
54
+ return list(self._parsers)
55
+
56
+ def parse(self, artifact: dict) -> BaseArtifactModel:
57
+ """Parse ``artifact`` with the first plugin that claims it.
58
+
59
+ Raises:
60
+ UnknownArtifactError: if no registered plugin recognises it.
61
+ """
62
+ for parser in self._parsers.values():
63
+ if parser.can_parse(artifact):
64
+ return parser.parse(artifact)
65
+ raise UnknownArtifactError(
66
+ "No registered parser recognises this artifact. "
67
+ f"Tried: {', '.join(self._parsers) or '(none registered)'}."
68
+ )
69
+
70
+
71
+ #: The process-wide registry. Plugins register themselves against this.
72
+ registry = ParserRegistry()
@@ -0,0 +1,31 @@
1
+ """dbt-core artifact parser — the first plugin of the framework.
2
+
3
+ Importing this package registers :class:`DbtArtifactParser` with the shared
4
+ :data:`~artifact_parser.core.registry.registry`, so ``registry.parse(blob)``
5
+ works as soon as ``artifact_parser`` is imported.
6
+
7
+ The version-dependent code (the typed models, the schema-version lookup table,
8
+ and the ``parse_*`` dispatch) is generated and lives under
9
+ :mod:`artifact_parser.dbt.generated` — a directory you can delete and rebuild
10
+ with ``codegen dbt``. The public ``parse_*`` names are re-exported here so that
11
+ import path never changes.
12
+ """
13
+
14
+ from artifact_parser.core.registry import registry
15
+ from artifact_parser.dbt.generated.parser import parse_catalog
16
+ from artifact_parser.dbt.generated.parser import parse_manifest
17
+ from artifact_parser.dbt.generated.parser import parse_run_results
18
+ from artifact_parser.dbt.generated.parser import parse_sources
19
+ from artifact_parser.dbt.plugin import DbtArtifactParser
20
+
21
+ dbt_parser = DbtArtifactParser()
22
+ registry.register(dbt_parser)
23
+
24
+ __all__ = [
25
+ "DbtArtifactParser",
26
+ "dbt_parser",
27
+ "parse_catalog",
28
+ "parse_manifest",
29
+ "parse_run_results",
30
+ "parse_sources",
31
+ ]
@@ -0,0 +1,9 @@
1
+ """Generated dbt code — DO NOT EDIT BY HAND.
2
+
3
+ Everything in this package (the typed models, ``version_map.py``, and
4
+ ``parser.py``) is produced by ``codegen dbt`` from dbt-core's JSON schemas. The
5
+ whole directory is safe to delete and rebuild::
6
+
7
+ rm -rf src/artifact_parser/dbt/generated
8
+ codegen dbt --skip-download # or drop the flag to pull fresh schemas
9
+ """
@@ -0,0 +1 @@
1
+ """Typed pydantic models for every supported dbt artifact schema version."""
@@ -0,0 +1 @@
1
+ """Generated dbt artifact models."""
@@ -0,0 +1,86 @@
1
+ # generated by datamodel-codegen:
2
+ # filename: catalog_v1.json
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Any
7
+
8
+ from pydantic import ConfigDict, Field
9
+
10
+ from artifact_parser.core.base import BaseArtifactModel
11
+
12
+
13
+ class Metadata(BaseArtifactModel):
14
+ model_config = ConfigDict(
15
+ extra="ignore",
16
+ )
17
+ dbt_schema_version: str | None = None
18
+ dbt_version: str | None = "1.11.6"
19
+ generated_at: str | None = None
20
+ invocation_id: str | None = None
21
+ invocation_started_at: str | None = None
22
+ env: dict[str, str] | None = None
23
+
24
+
25
+ class Metadata1(BaseArtifactModel):
26
+ model_config = ConfigDict(
27
+ extra="ignore",
28
+ )
29
+ type: str
30
+ schema_: str = Field(..., alias="schema")
31
+ name: str
32
+ database: str | None = None
33
+ comment: str | None = None
34
+ owner: str | None = None
35
+
36
+
37
+ class Columns(BaseArtifactModel):
38
+ model_config = ConfigDict(
39
+ extra="ignore",
40
+ )
41
+ type: str
42
+ index: int
43
+ name: str
44
+ comment: str | None = None
45
+
46
+
47
+ class Stats(BaseArtifactModel):
48
+ model_config = ConfigDict(
49
+ extra="ignore",
50
+ )
51
+ id: str
52
+ label: str
53
+ value: bool | str | float | None
54
+ include: bool
55
+ description: str | None = None
56
+
57
+
58
+ class Nodes(BaseArtifactModel):
59
+ model_config = ConfigDict(
60
+ extra="ignore",
61
+ )
62
+ metadata: Metadata1 = Field(..., title="TableMetadata")
63
+ columns: dict[str, Columns]
64
+ stats: dict[str, Stats]
65
+ unique_id: str | None = None
66
+
67
+
68
+ class Sources(BaseArtifactModel):
69
+ model_config = ConfigDict(
70
+ extra="ignore",
71
+ )
72
+ metadata: Metadata1 = Field(..., title="TableMetadata")
73
+ columns: dict[str, Columns]
74
+ stats: dict[str, Stats]
75
+ unique_id: str | None = None
76
+
77
+
78
+ class CatalogV1(BaseArtifactModel):
79
+ model_config = ConfigDict(
80
+ extra="ignore",
81
+ )
82
+ metadata: Metadata = Field(..., title="CatalogMetadata")
83
+ nodes: dict[str, Nodes]
84
+ sources: dict[str, Sources]
85
+ errors: list[str] | None = None
86
+ field_compile_results: Any = Field(None, alias="_compile_results")
@@ -0,0 +1 @@
1
+ """Generated dbt artifact models."""