odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from odibi.config import ProjectConfig
|
|
6
|
+
|
|
7
|
+
AIRFLOW_DAG_TEMPLATE = """
|
|
8
|
+
from airflow import DAG
|
|
9
|
+
from airflow.operators.bash import BashOperator
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
|
|
12
|
+
# Generated by Odibi
|
|
13
|
+
# Project: {{ project_name }}
|
|
14
|
+
# Pipeline: {{ pipeline_name }}
|
|
15
|
+
|
|
16
|
+
default_args = {
|
|
17
|
+
'owner': '{{ owner }}',
|
|
18
|
+
'depends_on_past': False,
|
|
19
|
+
'start_date': datetime(2023, 1, 1),
|
|
20
|
+
'email_on_failure': False,
|
|
21
|
+
'email_on_retry': False,
|
|
22
|
+
'retries': {{ retries }},
|
|
23
|
+
'retry_delay': timedelta(minutes=5),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
with DAG(
|
|
27
|
+
'{{ dag_id }}',
|
|
28
|
+
default_args=default_args,
|
|
29
|
+
description='{{ description }}',
|
|
30
|
+
schedule_interval=None, # Set schedule manually or via config
|
|
31
|
+
catchup=False,
|
|
32
|
+
tags=['odibi', '{{ layer }}'],
|
|
33
|
+
) as dag:
|
|
34
|
+
|
|
35
|
+
# --- Nodes ---
|
|
36
|
+
{% for node in nodes %}
|
|
37
|
+
{{ node.safe_name }} = BashOperator(
|
|
38
|
+
task_id='{{ node.name }}',
|
|
39
|
+
bash_command='odibi run --pipeline {{ pipeline_name }} --node {{ node.name }}',
|
|
40
|
+
)
|
|
41
|
+
{% endfor %}
|
|
42
|
+
|
|
43
|
+
# --- Dependencies ---
|
|
44
|
+
{% for node in nodes %}
|
|
45
|
+
{% if node.upstream_vars %}
|
|
46
|
+
# {{ node.name }} depends on {{ node.depends_on }}
|
|
47
|
+
[{{ node.upstream_vars|join(', ') }}] >> {{ node.safe_name }}
|
|
48
|
+
{% endif %}
|
|
49
|
+
{% endfor %}
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AirflowExporter:
|
|
54
|
+
def __init__(self, config: ProjectConfig):
|
|
55
|
+
self.config = config
|
|
56
|
+
|
|
57
|
+
def _sanitize(self, name: str) -> str:
|
|
58
|
+
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
|
59
|
+
|
|
60
|
+
def generate_code(self, pipeline_name: str) -> str:
|
|
61
|
+
# Find pipeline
|
|
62
|
+
pipeline = next((p for p in self.config.pipelines if p.pipeline == pipeline_name), None)
|
|
63
|
+
if not pipeline:
|
|
64
|
+
raise ValueError(f"Pipeline '{pipeline_name}' not found in config")
|
|
65
|
+
|
|
66
|
+
nodes_ctx = []
|
|
67
|
+
for node in pipeline.nodes:
|
|
68
|
+
safe_name = self._sanitize(node.name)
|
|
69
|
+
upstream_vars = [self._sanitize(dep) for dep in node.depends_on]
|
|
70
|
+
|
|
71
|
+
nodes_ctx.append(
|
|
72
|
+
{
|
|
73
|
+
"name": node.name,
|
|
74
|
+
"safe_name": safe_name,
|
|
75
|
+
"depends_on": node.depends_on,
|
|
76
|
+
"upstream_vars": upstream_vars,
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
template = Template(AIRFLOW_DAG_TEMPLATE)
|
|
81
|
+
return template.render(
|
|
82
|
+
project_name=self.config.project,
|
|
83
|
+
pipeline_name=pipeline.pipeline,
|
|
84
|
+
dag_id=f"odibi_{pipeline.pipeline}",
|
|
85
|
+
owner=self.config.owner or "airflow",
|
|
86
|
+
description=pipeline.description or "Odibi Pipeline",
|
|
87
|
+
layer=pipeline.layer or "default",
|
|
88
|
+
retries=self.config.retry.max_attempts if self.config.retry.enabled else 0,
|
|
89
|
+
nodes=nodes_ctx,
|
|
90
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from odibi.config import ProjectConfig
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from dagster import (
|
|
5
|
+
AssetExecutionContext,
|
|
6
|
+
Definitions,
|
|
7
|
+
asset,
|
|
8
|
+
)
|
|
9
|
+
except ImportError:
|
|
10
|
+
# Dagster is optional
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DagsterFactory:
|
|
15
|
+
"""
|
|
16
|
+
Factory to create Dagster definitions from Odibi configuration.
|
|
17
|
+
|
|
18
|
+
Usage in definitions.py:
|
|
19
|
+
from odibi.config import load_config
|
|
20
|
+
from odibi.orchestration.dagster import DagsterFactory
|
|
21
|
+
|
|
22
|
+
config = load_config("odibi.yaml")
|
|
23
|
+
defs = DagsterFactory(config).create_definitions()
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: ProjectConfig):
|
|
27
|
+
self.config = config
|
|
28
|
+
|
|
29
|
+
def create_definitions(self) -> "Definitions":
|
|
30
|
+
if "dagster" not in globals():
|
|
31
|
+
raise ImportError("Dagster not installed. Run 'pip install dagster'")
|
|
32
|
+
|
|
33
|
+
all_assets = []
|
|
34
|
+
|
|
35
|
+
for pipeline in self.config.pipelines:
|
|
36
|
+
for node in pipeline.nodes:
|
|
37
|
+
# Create an asset for each node
|
|
38
|
+
# We use dynamic function creation to bind specific node/pipeline
|
|
39
|
+
|
|
40
|
+
asset_name = node.name.replace("-", "_")
|
|
41
|
+
deps = [dep.replace("-", "_") for dep in node.depends_on]
|
|
42
|
+
group = pipeline.pipeline
|
|
43
|
+
|
|
44
|
+
# Define the asset function
|
|
45
|
+
def make_asset_fn(p_name, n_name):
|
|
46
|
+
@asset(
|
|
47
|
+
name=asset_name,
|
|
48
|
+
deps=deps,
|
|
49
|
+
group_name=group,
|
|
50
|
+
description=node.description,
|
|
51
|
+
compute_kind="odibi",
|
|
52
|
+
op_tags={"odibi/pipeline": p_name, "odibi/node": n_name},
|
|
53
|
+
)
|
|
54
|
+
def _asset_fn(context: AssetExecutionContext):
|
|
55
|
+
# Run Odibi Node
|
|
56
|
+
# We shell out to CLI to ensure clean environment,
|
|
57
|
+
# OR import runner. Shell out is safer for isolation.
|
|
58
|
+
import subprocess
|
|
59
|
+
|
|
60
|
+
context.log.info(f"Running Odibi node: {n_name} in pipeline {p_name}")
|
|
61
|
+
cmd = ["odibi", "run", "--pipeline", p_name, "--node", n_name]
|
|
62
|
+
|
|
63
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
64
|
+
|
|
65
|
+
if result.stdout:
|
|
66
|
+
context.log.info(result.stdout)
|
|
67
|
+
if result.stderr:
|
|
68
|
+
context.log.error(result.stderr)
|
|
69
|
+
|
|
70
|
+
if result.returncode != 0:
|
|
71
|
+
raise Exception(f"Odibi execution failed: {result.stderr}")
|
|
72
|
+
|
|
73
|
+
return _asset_fn
|
|
74
|
+
|
|
75
|
+
all_assets.append(make_asset_fn(pipeline.pipeline, node.name))
|
|
76
|
+
|
|
77
|
+
return Definitions(assets=all_assets)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Dict, Type
|
|
2
|
+
|
|
3
|
+
from odibi.patterns.aggregation import AggregationPattern
|
|
4
|
+
from odibi.patterns.base import Pattern
|
|
5
|
+
from odibi.patterns.date_dimension import DateDimensionPattern
|
|
6
|
+
from odibi.patterns.dimension import DimensionPattern
|
|
7
|
+
from odibi.patterns.fact import FactPattern
|
|
8
|
+
from odibi.patterns.merge import MergePattern
|
|
9
|
+
from odibi.patterns.scd2 import SCD2Pattern
|
|
10
|
+
|
|
11
|
+
_PATTERNS: Dict[str, Type[Pattern]] = {
|
|
12
|
+
"scd2": SCD2Pattern,
|
|
13
|
+
"merge": MergePattern,
|
|
14
|
+
"dimension": DimensionPattern,
|
|
15
|
+
"date_dimension": DateDimensionPattern,
|
|
16
|
+
"aggregation": AggregationPattern,
|
|
17
|
+
"fact": FactPattern,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_pattern_class(name: str) -> Type[Pattern]:
|
|
22
|
+
if name not in _PATTERNS:
|
|
23
|
+
raise ValueError(f"Unknown pattern: '{name}'. Available: {list(_PATTERNS.keys())}")
|
|
24
|
+
return _PATTERNS[name]
|