databao-context-engine 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/plugins/dbt/__init__.py +0 -0
- databao_context_engine/plugins/dbt/dbt_chunker.py +47 -0
- databao_context_engine/plugins/dbt/dbt_context_extractor.py +106 -0
- databao_context_engine/plugins/dbt/dbt_plugin.py +25 -0
- databao_context_engine/plugins/dbt/types.py +44 -0
- databao_context_engine/plugins/dbt/types_artifacts.py +58 -0
- databao_context_engine/plugins/plugin_loader.py +2 -1
- {databao_context_engine-0.1.5.dist-info → databao_context_engine-0.1.6.dist-info}/METADATA +1 -1
- {databao_context_engine-0.1.5.dist-info → databao_context_engine-0.1.6.dist-info}/RECORD +11 -5
- {databao_context_engine-0.1.5.dist-info → databao_context_engine-0.1.6.dist-info}/WHEEL +0 -0
- {databao_context_engine-0.1.5.dist-info → databao_context_engine-0.1.6.dist-info}/entry_points.txt +0 -0
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from databao_context_engine.pluginlib.build_plugin import EmbeddableChunk
|
|
4
|
+
from databao_context_engine.plugins.dbt.types import DbtColumn, DbtContext, DbtModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class DbtColumnChunkContent:
|
|
9
|
+
database_name: str
|
|
10
|
+
schema_name: str
|
|
11
|
+
model_name: str
|
|
12
|
+
column: DbtColumn
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_dbt_chunks(context: DbtContext) -> list[EmbeddableChunk]:
|
|
16
|
+
chunks = []
|
|
17
|
+
|
|
18
|
+
for model in context.models:
|
|
19
|
+
chunks.append(_create_model_chunk(model))
|
|
20
|
+
|
|
21
|
+
for column in model.columns:
|
|
22
|
+
chunks.append(_create_column_chunk(model, column))
|
|
23
|
+
|
|
24
|
+
return chunks
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _create_model_chunk(model: DbtModel) -> EmbeddableChunk:
|
|
28
|
+
return EmbeddableChunk(embeddable_text=_build_model_chunk_text(model), content=model)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _build_model_chunk_text(model: DbtModel) -> str:
|
|
32
|
+
# TODO: Use description and potentially other infos?
|
|
33
|
+
return f"Model {model.name} in database {model.database} and schema {model.schema}, with unique id {model.id}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _create_column_chunk(model: DbtModel, column: DbtColumn) -> EmbeddableChunk:
|
|
37
|
+
return EmbeddableChunk(
|
|
38
|
+
embeddable_text=_build_column_chunk_text(model, column),
|
|
39
|
+
content=DbtColumnChunkContent(
|
|
40
|
+
database_name=model.database, schema_name=model.schema, model_name=model.name, column=column
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _build_column_chunk_text(model: DbtModel, column: DbtColumn) -> str:
|
|
46
|
+
# TODO: Use description and potentially other infos?
|
|
47
|
+
return f"Column {column.name} in model {model.id}"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from databao_context_engine.plugins.dbt.types import (
|
|
4
|
+
DbtColumn,
|
|
5
|
+
DbtConfigFile,
|
|
6
|
+
DbtContext,
|
|
7
|
+
DbtMaterialization,
|
|
8
|
+
DbtModel,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.plugins.dbt.types_artifacts import (
|
|
11
|
+
DbtArtifacts,
|
|
12
|
+
DbtCatalog,
|
|
13
|
+
DbtCatalogColumn,
|
|
14
|
+
DbtCatalogNode,
|
|
15
|
+
DbtManifest,
|
|
16
|
+
DbtManifestColumn,
|
|
17
|
+
DbtManifestModel,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_connection(config_file: DbtConfigFile) -> None:
|
|
22
|
+
_read_dbt_artifacts(config_file.dbt_target_folder_path.expanduser())
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def extract_context(config_file: DbtConfigFile) -> DbtContext:
|
|
26
|
+
artifacts = _read_dbt_artifacts(config_file.dbt_target_folder_path.expanduser())
|
|
27
|
+
|
|
28
|
+
return _extract_context_from_artifacts(artifacts)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_dbt_artifacts(dbt_target_folder_path: Path) -> DbtArtifacts:
|
|
32
|
+
if not dbt_target_folder_path.is_dir():
|
|
33
|
+
raise ValueError(f'Invalid "dbt_target_folder_path": not a directory ({dbt_target_folder_path})')
|
|
34
|
+
|
|
35
|
+
# TODO: Check the manifest schema version?
|
|
36
|
+
manifest_file = dbt_target_folder_path.joinpath("manifest.json")
|
|
37
|
+
if not manifest_file.is_file():
|
|
38
|
+
raise ValueError(f'Invalid "dbt_target_folder_path": missing manifest.json file ({manifest_file})')
|
|
39
|
+
|
|
40
|
+
manifest = DbtManifest.model_validate_json(manifest_file.read_text())
|
|
41
|
+
|
|
42
|
+
catalog_file = dbt_target_folder_path.joinpath("catalog.json")
|
|
43
|
+
catalog = DbtCatalog.model_validate_json(catalog_file.read_text()) if catalog_file.is_file() else None
|
|
44
|
+
|
|
45
|
+
return DbtArtifacts(manifest=manifest, catalog=catalog)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _extract_context_from_artifacts(artifacts: DbtArtifacts) -> DbtContext:
|
|
49
|
+
manifest_models = [
|
|
50
|
+
manifest_model
|
|
51
|
+
for manifest_model in artifacts.manifest.nodes.values()
|
|
52
|
+
if isinstance(manifest_model, DbtManifestModel)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
catalog_nodes = artifacts.catalog.nodes if artifacts.catalog else {}
|
|
56
|
+
|
|
57
|
+
# TODO: Extract the stages? Or at least the "highest-level" models (= marts?)
|
|
58
|
+
# TODO: Extract the constraints
|
|
59
|
+
# TODO: Organize the models by schemas? Or by stages?
|
|
60
|
+
return DbtContext(
|
|
61
|
+
models=[
|
|
62
|
+
_manifest_model_to_dbt_model(manifest_model, catalog_nodes.get(manifest_model.unique_id, None))
|
|
63
|
+
for manifest_model in manifest_models
|
|
64
|
+
],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _manifest_model_to_dbt_model(manifest_model: DbtManifestModel, catalog_node: DbtCatalogNode | None) -> DbtModel:
|
|
69
|
+
catalog_columns = catalog_node.columns if catalog_node else {}
|
|
70
|
+
|
|
71
|
+
return DbtModel(
|
|
72
|
+
id=manifest_model.unique_id,
|
|
73
|
+
name=manifest_model.name,
|
|
74
|
+
database=manifest_model.database,
|
|
75
|
+
schema=manifest_model.schema_,
|
|
76
|
+
description=manifest_model.description,
|
|
77
|
+
columns=[
|
|
78
|
+
_manifest_column_to_dbt_column(manifest_column, catalog_columns.get(manifest_column.name))
|
|
79
|
+
for manifest_column in manifest_model.columns.values()
|
|
80
|
+
],
|
|
81
|
+
materialization=_manifest_materialization_to_dbt_materializaton(
|
|
82
|
+
manifest_model.config.materialized if manifest_model.config else None
|
|
83
|
+
),
|
|
84
|
+
primary_key=manifest_model.primary_key,
|
|
85
|
+
depends_on_nodes=manifest_model.depends_on.get("nodes", []) if manifest_model.depends_on else [],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _manifest_column_to_dbt_column(
|
|
90
|
+
manifest_column: DbtManifestColumn, catalog_column: DbtCatalogColumn | None
|
|
91
|
+
) -> DbtColumn:
|
|
92
|
+
return DbtColumn(
|
|
93
|
+
name=manifest_column.name,
|
|
94
|
+
description=manifest_column.description,
|
|
95
|
+
type=catalog_column.type if catalog_column else manifest_column.data_type,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _manifest_materialization_to_dbt_materializaton(materialized: str | None) -> DbtMaterialization | None:
|
|
100
|
+
if materialized is None:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
return DbtMaterialization(materialized)
|
|
105
|
+
except ValueError:
|
|
106
|
+
return None
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from databao_context_engine import BuildDatasourcePlugin
|
|
4
|
+
from databao_context_engine.pluginlib.build_plugin import EmbeddableChunk
|
|
5
|
+
from databao_context_engine.plugins.dbt.dbt_chunker import build_dbt_chunks
|
|
6
|
+
from databao_context_engine.plugins.dbt.dbt_context_extractor import check_connection, extract_context
|
|
7
|
+
from databao_context_engine.plugins.dbt.types import DbtConfigFile
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DbtPlugin(BuildDatasourcePlugin[DbtConfigFile]):
|
|
11
|
+
id = "jetbrains/dbt"
|
|
12
|
+
name = "Dbt Plugin"
|
|
13
|
+
config_file_type = DbtConfigFile
|
|
14
|
+
|
|
15
|
+
def supported_types(self) -> set[str]:
|
|
16
|
+
return {"dbt"}
|
|
17
|
+
|
|
18
|
+
def build_context(self, full_type: str, datasource_name: str, file_config: DbtConfigFile) -> Any:
|
|
19
|
+
return extract_context(file_config)
|
|
20
|
+
|
|
21
|
+
def check_connection(self, full_type: str, datasource_name: str, file_config: DbtConfigFile) -> None:
|
|
22
|
+
check_connection(file_config)
|
|
23
|
+
|
|
24
|
+
def divide_context_into_chunks(self, context: Any) -> list[EmbeddableChunk]:
|
|
25
|
+
return build_dbt_chunks(context)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DbtConfigFile(BaseModel):
|
|
9
|
+
name: str | None = Field(default=None)
|
|
10
|
+
type: str = Field(default="dbt")
|
|
11
|
+
dbt_target_folder_path: Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DbtMaterialization(str, Enum):
|
|
15
|
+
TABLE = "table"
|
|
16
|
+
VIEW = "view"
|
|
17
|
+
|
|
18
|
+
def __str__(self):
|
|
19
|
+
return self.value
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(kw_only=True)
|
|
23
|
+
class DbtColumn:
|
|
24
|
+
name: str
|
|
25
|
+
type: str | None = None
|
|
26
|
+
description: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(kw_only=True)
|
|
30
|
+
class DbtModel:
|
|
31
|
+
id: str
|
|
32
|
+
name: str
|
|
33
|
+
database: str
|
|
34
|
+
schema: str
|
|
35
|
+
columns: list[DbtColumn]
|
|
36
|
+
description: str | None = None
|
|
37
|
+
materialization: DbtMaterialization | None = None
|
|
38
|
+
primary_key: list[str] | None = None
|
|
39
|
+
depends_on_nodes: list[str]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(kw_only=True)
|
|
43
|
+
class DbtContext:
|
|
44
|
+
models: list[DbtModel]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Annotated, Literal
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Discriminator, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DbtManifestNodeConfig(BaseModel):
|
|
8
|
+
materialized: str
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DbtManifestColumn(BaseModel):
|
|
12
|
+
name: str
|
|
13
|
+
description: str | None = None
|
|
14
|
+
data_type: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DbtManifestModel(BaseModel):
|
|
18
|
+
resource_type: Literal["model"]
|
|
19
|
+
unique_id: str
|
|
20
|
+
name: str
|
|
21
|
+
database: str
|
|
22
|
+
schema_: str = Field(alias="schema")
|
|
23
|
+
description: str | None = None
|
|
24
|
+
config: DbtManifestNodeConfig | None = None
|
|
25
|
+
columns: dict[str, DbtManifestColumn]
|
|
26
|
+
depends_on: dict[str, list[str]] | None = None
|
|
27
|
+
primary_key: list[str] | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DbtManifestOtherNode(BaseModel):
|
|
31
|
+
resource_type: Literal["seed", "analysis", "test", "operation", "sql_operation", "snapshot"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
DbtManifestNode = Annotated[DbtManifestModel | DbtManifestOtherNode, Discriminator("resource_type")]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DbtManifest(BaseModel):
|
|
38
|
+
nodes: dict[str, DbtManifestNode]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DbtCatalogColumn(BaseModel):
|
|
42
|
+
name: str
|
|
43
|
+
type: str
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DbtCatalogNode(BaseModel):
|
|
47
|
+
unique_id: str | None = None
|
|
48
|
+
columns: dict[str, DbtCatalogColumn]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DbtCatalog(BaseModel):
|
|
52
|
+
nodes: dict[str, DbtCatalogNode]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(kw_only=True)
|
|
56
|
+
class DbtArtifacts:
|
|
57
|
+
manifest: DbtManifest
|
|
58
|
+
catalog: DbtCatalog | None
|
|
@@ -45,6 +45,7 @@ def _load_builtin_datasource_plugins() -> list[BuildDatasourcePlugin]:
|
|
|
45
45
|
"""Statically register built-in plugins."""
|
|
46
46
|
from databao_context_engine.plugins.databases.duckdb.duckdb_db_plugin import DuckDbPlugin
|
|
47
47
|
from databao_context_engine.plugins.databases.sqlite.sqlite_db_plugin import SQLiteDbPlugin
|
|
48
|
+
from databao_context_engine.plugins.dbt.dbt_plugin import DbtPlugin
|
|
48
49
|
from databao_context_engine.plugins.resources.parquet_plugin import ParquetPlugin
|
|
49
50
|
|
|
50
51
|
# optional plugins are added to the python environment via extras
|
|
@@ -91,7 +92,7 @@ def _load_builtin_datasource_plugins() -> list[BuildDatasourcePlugin]:
|
|
|
91
92
|
except ImportError:
|
|
92
93
|
pass
|
|
93
94
|
|
|
94
|
-
required_plugins: list[BuildDatasourcePlugin] = [DuckDbPlugin(), ParquetPlugin(), SQLiteDbPlugin()]
|
|
95
|
+
required_plugins: list[BuildDatasourcePlugin] = [DuckDbPlugin(), ParquetPlugin(), SQLiteDbPlugin(), DbtPlugin()]
|
|
95
96
|
return required_plugins + optional_plugins
|
|
96
97
|
|
|
97
98
|
|
|
@@ -82,10 +82,16 @@ databao_context_engine/plugins/databases/snowflake/snowflake_introspector.py,sha
|
|
|
82
82
|
databao_context_engine/plugins/databases/sqlite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
83
|
databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py,sha256=MI896G7Yq8NcQ8sTWErAFD7YK8qfDpZZprxoszJ46l8,460
|
|
84
84
|
databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py,sha256=kKFNzclp1NmQ6BF3ylGn86R5PuQh6aqevj6E7_zAKmQ,8361
|
|
85
|
+
databao_context_engine/plugins/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
|
+
databao_context_engine/plugins/dbt/dbt_chunker.py,sha256=5OQo0Y9ouelZV8KQBA0Kqksx72wRhTdRztUOSCKwLZc,1516
|
|
87
|
+
databao_context_engine/plugins/dbt/dbt_context_extractor.py,sha256=8ac2Fx5vv0t_AGqNbx9BXj73l9ZqjDK8r-VAAQukcP0,3748
|
|
88
|
+
databao_context_engine/plugins/dbt/dbt_plugin.py,sha256=-oAVh4u9YZ7j1b6w2K2lwcZT_bOdkN-lL1RRBWgB20k,1026
|
|
89
|
+
databao_context_engine/plugins/dbt/types.py,sha256=yJUVSZa5Ohhx6Znl_20gKwjvgxkBBcxlKrAFFV6tfgY,881
|
|
90
|
+
databao_context_engine/plugins/dbt/types_artifacts.py,sha256=OC2_DL7srcniVqrj9uj5XxNV-7VcA974xqgU1Rzfd5A,1349
|
|
85
91
|
databao_context_engine/plugins/duckdb_tools.py,sha256=46rctnTxDPAhHtaiTp1DxMuuDuRKrtKWJFSSM2w7uUU,645
|
|
86
92
|
databao_context_engine/plugins/files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
93
|
databao_context_engine/plugins/files/unstructured_files_plugin.py,sha256=eqs1anQhYBZh7xu4CwhfkqXQjGE5gJnKEwyJbtUR78E,2384
|
|
88
|
-
databao_context_engine/plugins/plugin_loader.py,sha256=
|
|
94
|
+
databao_context_engine/plugins/plugin_loader.py,sha256=x5cZ8pUwrEYbadqlPddZvBzU2BpdyBWYI4KlEtffFUY,4159
|
|
89
95
|
databao_context_engine/plugins/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
96
|
databao_context_engine/plugins/resources/parquet_chunker.py,sha256=R9WCOBqpKRTVN6t5eeOm_mmnKBOxvjIiQ9zTc8vnUb4,848
|
|
91
97
|
databao_context_engine/plugins/resources/parquet_introspector.py,sha256=Cn_yh6E-dOTOZstlavEGAsV6ZRKZXJraVAl_pzJJuGs,5629
|
|
@@ -129,7 +135,7 @@ databao_context_engine/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
|
129
135
|
databao_context_engine/system/properties.py,sha256=mQ7-_PZeYSESYn1cMUQ0IK7rJEnbhc7t4WesFjAgo-Q,429
|
|
130
136
|
databao_context_engine/templating/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
137
|
databao_context_engine/templating/renderer.py,sha256=W2-0IGStAp6oxANmsKs_Z-UoIR6Gt_c4ILYFa3Hruo4,662
|
|
132
|
-
databao_context_engine-0.1.
|
|
133
|
-
databao_context_engine-0.1.
|
|
134
|
-
databao_context_engine-0.1.
|
|
135
|
-
databao_context_engine-0.1.
|
|
138
|
+
databao_context_engine-0.1.6.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
139
|
+
databao_context_engine-0.1.6.dist-info/entry_points.txt,sha256=5EeQJ1W8zEFh4HuF1bs2zBeoP408oiwuM9UrkJiurgI,138
|
|
140
|
+
databao_context_engine-0.1.6.dist-info/METADATA,sha256=weN9iS4ZtRnnt3tTJY-epHUgNdpMo-dWD4E3F-8dXyk,7773
|
|
141
|
+
databao_context_engine-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
{databao_context_engine-0.1.5.dist-info → databao_context_engine-0.1.6.dist-info}/entry_points.txt
RENAMED
|
File without changes
|