odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/cli/doctor.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from odibi.pipeline import PipelineManager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def check_dependencies() -> bool:
|
|
9
|
+
"""Check installed dependencies."""
|
|
10
|
+
print("Checking dependencies...")
|
|
11
|
+
deps = [
|
|
12
|
+
("pandas", "Pandas"),
|
|
13
|
+
("duckdb", "DuckDB (Local SQL Engine)"),
|
|
14
|
+
("pyspark", "PySpark (Distributed Engine)"),
|
|
15
|
+
("fastapi", "FastAPI (UI Backend)"),
|
|
16
|
+
("uvicorn", "Uvicorn (UI Server)"),
|
|
17
|
+
("openlineage.client", "OpenLineage (Governance)"),
|
|
18
|
+
("azure.storage.blob", "Azure Blob Storage"),
|
|
19
|
+
("delta", "Delta Lake"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
all_good = True
|
|
23
|
+
for module, name in deps:
|
|
24
|
+
try:
|
|
25
|
+
mod = importlib.import_module(module)
|
|
26
|
+
version = getattr(mod, "__version__", "installed")
|
|
27
|
+
print(f" [OK] {name}: {version}")
|
|
28
|
+
except ImportError:
|
|
29
|
+
# Define optional vs required
|
|
30
|
+
optional = ["pyspark", "openlineage.client", "azure.storage.blob", "delta"]
|
|
31
|
+
|
|
32
|
+
if module in optional:
|
|
33
|
+
print(f" [OPTIONAL] {name}: Not installed")
|
|
34
|
+
else:
|
|
35
|
+
print(f" [MISSING] {name}")
|
|
36
|
+
all_good = False
|
|
37
|
+
|
|
38
|
+
return all_good
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def check_config() -> Optional[PipelineManager]:
|
|
42
|
+
"""Check configuration file."""
|
|
43
|
+
print("\nChecking configuration...")
|
|
44
|
+
config_files = ["odibi.yaml", "project.yaml", "odibi.yml", "project.yml"]
|
|
45
|
+
found_config = None
|
|
46
|
+
for f in config_files:
|
|
47
|
+
if os.path.exists(f):
|
|
48
|
+
found_config = f
|
|
49
|
+
break
|
|
50
|
+
|
|
51
|
+
if not found_config:
|
|
52
|
+
print(" [FAIL] No configuration file found (odibi.yaml or project.yaml)")
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
print(f" [OK] Found {found_config}")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# We use PipelineManager to parse and validate
|
|
59
|
+
manager = PipelineManager.from_yaml(found_config)
|
|
60
|
+
print(" [OK] Configuration is valid")
|
|
61
|
+
return manager
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f" [FAIL] Invalid configuration: {e}")
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def check_connections(manager: PipelineManager) -> bool:
|
|
68
|
+
"""Check connections."""
|
|
69
|
+
print("\nChecking connections...")
|
|
70
|
+
if not manager:
|
|
71
|
+
print(" [SKIP] Skipping connection checks (no config)")
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
all_good = True
|
|
75
|
+
for name, conn in manager.connections.items():
|
|
76
|
+
try:
|
|
77
|
+
conn.validate()
|
|
78
|
+
print(f" [OK] {name} ({conn.__class__.__name__})")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f" [FAIL] {name}: {e}")
|
|
81
|
+
all_good = False
|
|
82
|
+
|
|
83
|
+
return all_good
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def check_system_catalog(manager: PipelineManager) -> bool:
|
|
87
|
+
"""Check system catalog connectivity and integrity."""
|
|
88
|
+
print("\nChecking System Catalog...")
|
|
89
|
+
|
|
90
|
+
if not manager or not manager.catalog_manager:
|
|
91
|
+
print(" [SKIP] System Catalog not configured (Local Only Mode)")
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
print(f" [INFO] Catalog Path: {manager.catalog_manager.base_path}")
|
|
95
|
+
|
|
96
|
+
all_good = True
|
|
97
|
+
required_tables = [
|
|
98
|
+
"meta_tables",
|
|
99
|
+
"meta_runs",
|
|
100
|
+
"meta_patterns",
|
|
101
|
+
"meta_metrics",
|
|
102
|
+
"meta_state",
|
|
103
|
+
"meta_pipelines",
|
|
104
|
+
"meta_nodes",
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
for table in required_tables:
|
|
108
|
+
path = manager.catalog_manager.tables.get(table)
|
|
109
|
+
# Access internal method for check (acceptable for doctor/diagnostic tool)
|
|
110
|
+
exists = manager.catalog_manager._table_exists(path)
|
|
111
|
+
if exists:
|
|
112
|
+
print(f" [OK] {table}")
|
|
113
|
+
else:
|
|
114
|
+
print(f" [FAIL] {table} not found at {path}")
|
|
115
|
+
all_good = False
|
|
116
|
+
|
|
117
|
+
return all_good
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def doctor_command(args) -> int:
|
|
121
|
+
"""Run doctor command."""
|
|
122
|
+
try:
|
|
123
|
+
version = __import__("odibi").__version__
|
|
124
|
+
except Exception:
|
|
125
|
+
version = "unknown"
|
|
126
|
+
|
|
127
|
+
print(f"Odibi Doctor (v{version})")
|
|
128
|
+
print("=" * 40)
|
|
129
|
+
|
|
130
|
+
deps_ok = check_dependencies()
|
|
131
|
+
manager = check_config()
|
|
132
|
+
conns_ok = check_connections(manager)
|
|
133
|
+
|
|
134
|
+
catalog_ok = True
|
|
135
|
+
if manager:
|
|
136
|
+
catalog_ok = check_system_catalog(manager)
|
|
137
|
+
|
|
138
|
+
if not deps_ok:
|
|
139
|
+
print("\n[WARNING] Some required dependencies are missing.")
|
|
140
|
+
return 1
|
|
141
|
+
|
|
142
|
+
if not manager:
|
|
143
|
+
print("\n[WARNING] Configuration issues found.")
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
if not conns_ok:
|
|
147
|
+
print("\n[WARNING] Some connections failed validation.")
|
|
148
|
+
return 1
|
|
149
|
+
|
|
150
|
+
if not catalog_ok:
|
|
151
|
+
print("\n[WARNING] System Catalog issues found.")
|
|
152
|
+
return 1
|
|
153
|
+
|
|
154
|
+
print("\n[SUCCESS] You are ready to run pipelines!")
|
|
155
|
+
return 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def add_doctor_parser(subparsers):
|
|
159
|
+
"""Add doctor parser."""
|
|
160
|
+
parser = subparsers.add_parser("doctor", help="Check environment health")
|
|
161
|
+
return parser
|
odibi/cli/export.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from odibi.config import load_config_from_file
|
|
4
|
+
from odibi.orchestration.airflow import AirflowExporter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def add_export_parser(subparsers):
|
|
8
|
+
export_parser = subparsers.add_parser("export", help="Export pipeline to orchestration code")
|
|
9
|
+
export_parser.add_argument(
|
|
10
|
+
"--target", choices=["airflow", "dagster"], required=True, help="Export target"
|
|
11
|
+
)
|
|
12
|
+
export_parser.add_argument("--pipeline", help="Pipeline name (required for Airflow)")
|
|
13
|
+
export_parser.add_argument("--out", required=True, help="Output file path")
|
|
14
|
+
export_parser.add_argument("--config", default="odibi.yaml", help="Path to odibi.yaml")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def export_command(args):
|
|
18
|
+
"""Export pipeline to orchestration code."""
|
|
19
|
+
config_path = args.config
|
|
20
|
+
if not os.path.exists(config_path):
|
|
21
|
+
print(f"Error: Config file '{config_path}' not found.")
|
|
22
|
+
return 1
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
project_config = load_config_from_file(config_path)
|
|
26
|
+
except Exception as e:
|
|
27
|
+
print(f"Error loading config: {e}")
|
|
28
|
+
return 1
|
|
29
|
+
|
|
30
|
+
if args.target == "airflow":
|
|
31
|
+
if not args.pipeline:
|
|
32
|
+
print("Error: --pipeline is required for Airflow export.")
|
|
33
|
+
return 1
|
|
34
|
+
|
|
35
|
+
exporter = AirflowExporter(project_config)
|
|
36
|
+
try:
|
|
37
|
+
code = exporter.generate_code(args.pipeline)
|
|
38
|
+
with open(args.out, "w", encoding="utf-8") as f:
|
|
39
|
+
f.write(code)
|
|
40
|
+
print(f"Successfully exported Airflow DAG to {args.out}")
|
|
41
|
+
except ValueError as e:
|
|
42
|
+
print(f"Error: {e}")
|
|
43
|
+
return 1
|
|
44
|
+
|
|
45
|
+
elif args.target == "dagster":
|
|
46
|
+
# Generate a definitions.py scaffolding
|
|
47
|
+
content = f"""
|
|
48
|
+
# Generated by Odibi
|
|
49
|
+
import os
|
|
50
|
+
from dagster import Definitions
|
|
51
|
+
from odibi.config import load_config_from_file
|
|
52
|
+
from odibi.orchestration.dagster import DagsterFactory
|
|
53
|
+
|
|
54
|
+
# Load project config
|
|
55
|
+
# Ensure odibi.yaml is in the same directory or adjust path
|
|
56
|
+
config_path = os.path.join(os.path.dirname(__file__), "{config_path}")
|
|
57
|
+
project_config = load_config_from_file(config_path)
|
|
58
|
+
|
|
59
|
+
# Create definitions
|
|
60
|
+
defs = DagsterFactory(project_config).create_definitions()
|
|
61
|
+
"""
|
|
62
|
+
with open(args.out, "w", encoding="utf-8") as f:
|
|
63
|
+
f.write(content)
|
|
64
|
+
print(f"Successfully created Dagster definitions at {args.out}")
|
|
65
|
+
|
|
66
|
+
return 0
|
odibi/cli/graph.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph CLI Command
|
|
3
|
+
=================
|
|
4
|
+
|
|
5
|
+
Visualizes the pipeline dependency graph.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from odibi.graph import DependencyGraph
|
|
9
|
+
from odibi.pipeline import PipelineManager
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def graph_command(args):
|
|
13
|
+
"""
|
|
14
|
+
Handle graph subcommand.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
args: Parsed command-line arguments
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Exit code
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
# Load pipeline manager
|
|
24
|
+
env = getattr(args, "env", None)
|
|
25
|
+
manager = PipelineManager.from_yaml(args.config, env=env)
|
|
26
|
+
|
|
27
|
+
# Determine which pipeline to graph
|
|
28
|
+
pipeline_name = args.pipeline
|
|
29
|
+
if not pipeline_name:
|
|
30
|
+
# Default to first pipeline if not specified
|
|
31
|
+
pipeline_names = manager.list_pipelines()
|
|
32
|
+
if not pipeline_names:
|
|
33
|
+
print("❌ No pipelines found in configuration")
|
|
34
|
+
return 1
|
|
35
|
+
pipeline_name = pipeline_names[0]
|
|
36
|
+
|
|
37
|
+
# Get the pipeline
|
|
38
|
+
try:
|
|
39
|
+
pipeline = manager.get_pipeline(pipeline_name)
|
|
40
|
+
except ValueError:
|
|
41
|
+
print(f"❌ Pipeline '{pipeline_name}' not found")
|
|
42
|
+
return 1
|
|
43
|
+
|
|
44
|
+
# Generate visualization
|
|
45
|
+
if args.format == "ascii":
|
|
46
|
+
print(pipeline.visualize())
|
|
47
|
+
elif args.format == "dot":
|
|
48
|
+
print(_generate_dot(pipeline.graph, pipeline_name, manager.catalog_manager))
|
|
49
|
+
elif args.format == "mermaid":
|
|
50
|
+
print(_generate_mermaid(pipeline.graph, pipeline_name, manager.catalog_manager))
|
|
51
|
+
|
|
52
|
+
return 0
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"❌ Error generating graph: {e}")
|
|
56
|
+
if args.verbose:
|
|
57
|
+
import traceback
|
|
58
|
+
|
|
59
|
+
traceback.print_exc()
|
|
60
|
+
return 1
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _generate_dot(graph: DependencyGraph, pipeline_name: str, catalog_manager=None) -> str:
|
|
64
|
+
"""Generate DOT (Graphviz) representation."""
|
|
65
|
+
lines = []
|
|
66
|
+
lines.append(f'digraph "{pipeline_name}" {{')
|
|
67
|
+
lines.append(" rankdir=LR;")
|
|
68
|
+
lines.append(' node [shape=box, style=rounded, fontname="Helvetica"];')
|
|
69
|
+
lines.append(' edge [fontname="Helvetica"];')
|
|
70
|
+
lines.append("")
|
|
71
|
+
|
|
72
|
+
for node_name in graph.nodes:
|
|
73
|
+
# Add node
|
|
74
|
+
node = graph.nodes[node_name]
|
|
75
|
+
op_type = "unknown"
|
|
76
|
+
if node.read:
|
|
77
|
+
op_type = "read"
|
|
78
|
+
color = "lightblue"
|
|
79
|
+
elif node.write:
|
|
80
|
+
op_type = "write"
|
|
81
|
+
color = "lightgreen"
|
|
82
|
+
elif node.transform:
|
|
83
|
+
op_type = "transform"
|
|
84
|
+
color = "lightyellow"
|
|
85
|
+
|
|
86
|
+
# Enrich with Catalog Stats
|
|
87
|
+
stats_text = ""
|
|
88
|
+
if catalog_manager:
|
|
89
|
+
try:
|
|
90
|
+
avg_rows = catalog_manager.get_average_volume(node_name)
|
|
91
|
+
if avg_rows is not None:
|
|
92
|
+
stats_text = f"\\n~{int(avg_rows)} rows"
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
label = f"{node_name}\\n({op_type}){stats_text}"
|
|
97
|
+
lines.append(f' "{node_name}" [label="{label}", style="filled", fillcolor="{color}"];')
|
|
98
|
+
|
|
99
|
+
# Add edges
|
|
100
|
+
for dep in node.depends_on:
|
|
101
|
+
lines.append(f' "{dep}" -> "{node_name}";')
|
|
102
|
+
|
|
103
|
+
lines.append("}")
|
|
104
|
+
return "\n".join(lines)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _generate_mermaid(graph: DependencyGraph, pipeline_name: str, catalog_manager=None) -> str:
|
|
108
|
+
"""Generate Mermaid diagram."""
|
|
109
|
+
lines = []
|
|
110
|
+
lines.append("graph LR")
|
|
111
|
+
|
|
112
|
+
# Define styles
|
|
113
|
+
lines.append(" classDef read fill:lightblue,stroke:#333,stroke-width:1px;")
|
|
114
|
+
lines.append(" classDef write fill:lightgreen,stroke:#333,stroke-width:1px;")
|
|
115
|
+
lines.append(" classDef transform fill:lightyellow,stroke:#333,stroke-width:1px;")
|
|
116
|
+
|
|
117
|
+
for node_name in graph.nodes:
|
|
118
|
+
node = graph.nodes[node_name]
|
|
119
|
+
style_class = "transform"
|
|
120
|
+
|
|
121
|
+
# Node styling based on type
|
|
122
|
+
if node.read:
|
|
123
|
+
shape = "((" # Circle
|
|
124
|
+
end_shape = "))"
|
|
125
|
+
style_class = "read"
|
|
126
|
+
elif node.write:
|
|
127
|
+
shape = "[/" # Parallelogram
|
|
128
|
+
end_shape = "/]"
|
|
129
|
+
style_class = "write"
|
|
130
|
+
else:
|
|
131
|
+
shape = "[" # Box
|
|
132
|
+
end_shape = "]"
|
|
133
|
+
|
|
134
|
+
# Enrich with Catalog Stats
|
|
135
|
+
label = node_name
|
|
136
|
+
if catalog_manager:
|
|
137
|
+
try:
|
|
138
|
+
avg_rows = catalog_manager.get_average_volume(node_name)
|
|
139
|
+
if avg_rows is not None:
|
|
140
|
+
label = f"{node_name}<br/>~{int(avg_rows)} rows"
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
lines.append(f' {node_name}{shape}"{label}"{end_shape}:::{style_class}')
|
|
145
|
+
|
|
146
|
+
# Edges
|
|
147
|
+
for dep in node.depends_on:
|
|
148
|
+
lines.append(f" {dep} --> {node_name}")
|
|
149
|
+
|
|
150
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Map template names to their relative paths in the repo
|
|
9
|
+
TEMPLATE_MAP = {
|
|
10
|
+
"kitchen": "examples/templates/kitchen_sink.odibi.yaml",
|
|
11
|
+
"full": "examples/templates/template_full.yaml",
|
|
12
|
+
"local": "examples/templates/simple_local.yaml",
|
|
13
|
+
"local-medallion": "examples/templates/simple_local.yaml",
|
|
14
|
+
"azure": "examples/templates/azure_spark.yaml",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def add_init_parser(subparsers):
|
|
19
|
+
"""Add arguments for init-pipeline command."""
|
|
20
|
+
parser = subparsers.add_parser(
|
|
21
|
+
"init-pipeline",
|
|
22
|
+
aliases=["create", "init", "generate-project"],
|
|
23
|
+
help="Initialize a new Odibi project from a template",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument("name", help="Name of the project directory to create")
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--template",
|
|
28
|
+
choices=list(TEMPLATE_MAP.keys()),
|
|
29
|
+
default=None,
|
|
30
|
+
help="Template to use (default: prompt user)",
|
|
31
|
+
)
|
|
32
|
+
# Add --force to overwrite existing directory
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--force", action="store_true", help="Overwrite existing directory if it exists"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def init_pipeline_command(args):
|
|
39
|
+
"""Execute the init-pipeline command."""
|
|
40
|
+
project_name = args.name
|
|
41
|
+
template_name = args.template
|
|
42
|
+
force = args.force
|
|
43
|
+
|
|
44
|
+
# Interactive Prompt
|
|
45
|
+
if template_name is None:
|
|
46
|
+
print("\nSelect a project template:")
|
|
47
|
+
templates = list(TEMPLATE_MAP.keys())
|
|
48
|
+
for i, t in enumerate(templates):
|
|
49
|
+
print(f" {i + 1}. {t}")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
choice = input(f"\nEnter number (default: 1 [{templates[0]}]): ").strip()
|
|
53
|
+
if not choice:
|
|
54
|
+
template_name = templates[0]
|
|
55
|
+
else:
|
|
56
|
+
idx = int(choice) - 1
|
|
57
|
+
if 0 <= idx < len(templates):
|
|
58
|
+
template_name = templates[idx]
|
|
59
|
+
else:
|
|
60
|
+
logger.error("Invalid selection.")
|
|
61
|
+
return 1
|
|
62
|
+
except (ValueError, EOFError, KeyboardInterrupt):
|
|
63
|
+
# Fallback for non-interactive
|
|
64
|
+
template_name = "local"
|
|
65
|
+
logger.info(f"Using default template: {template_name}")
|
|
66
|
+
|
|
67
|
+
# 1. Determine Target Path
|
|
68
|
+
target_dir = Path(os.getcwd()) / project_name
|
|
69
|
+
|
|
70
|
+
if target_dir.exists():
|
|
71
|
+
if not force:
|
|
72
|
+
logger.error(f"Directory '{project_name}' already exists. Use --force to overwrite.")
|
|
73
|
+
return 1
|
|
74
|
+
else:
|
|
75
|
+
logger.warning(f"Overwriting existing directory '{project_name}'...")
|
|
76
|
+
shutil.rmtree(target_dir)
|
|
77
|
+
|
|
78
|
+
# 2. Find Template File
|
|
79
|
+
# Assuming we are running from within the installed package or repo
|
|
80
|
+
# Try to find the repo root relative to this file
|
|
81
|
+
# This file is in odibi/cli/init_pipeline.py
|
|
82
|
+
# Repo root is ../../../
|
|
83
|
+
|
|
84
|
+
current_file = Path(__file__).resolve()
|
|
85
|
+
repo_root = current_file.parent.parent.parent
|
|
86
|
+
|
|
87
|
+
template_rel_path = TEMPLATE_MAP[template_name]
|
|
88
|
+
source_path = repo_root / template_rel_path
|
|
89
|
+
|
|
90
|
+
if not source_path.exists():
|
|
91
|
+
# Fallback: check if we are installed and templates are packaged (not likely in this env but good practice)
|
|
92
|
+
# For now, just fail if not found in repo structure
|
|
93
|
+
logger.error(f"Template file not found at: {source_path}")
|
|
94
|
+
logger.error(
|
|
95
|
+
"Ensure you are running Odibi from the repository root or templates are correctly installed."
|
|
96
|
+
)
|
|
97
|
+
return 1
|
|
98
|
+
|
|
99
|
+
# 3. Create Project Structure
|
|
100
|
+
try:
|
|
101
|
+
os.makedirs(target_dir)
|
|
102
|
+
|
|
103
|
+
# Copy the template to odibi.yaml
|
|
104
|
+
target_file = target_dir / "odibi.yaml"
|
|
105
|
+
shutil.copy2(source_path, target_file)
|
|
106
|
+
|
|
107
|
+
# Create standard directories
|
|
108
|
+
os.makedirs(target_dir / "data", exist_ok=True)
|
|
109
|
+
os.makedirs(target_dir / "data/raw", exist_ok=True)
|
|
110
|
+
os.makedirs(target_dir / "logs", exist_ok=True)
|
|
111
|
+
os.makedirs(target_dir / ".github/workflows", exist_ok=True)
|
|
112
|
+
|
|
113
|
+
# Create sample data for local template
|
|
114
|
+
if template_name in ["local", "local-medallion"]:
|
|
115
|
+
with open(target_dir / "data/raw/sample_data.csv", "w") as f:
|
|
116
|
+
f.write("id,name,value,updated_at\n")
|
|
117
|
+
f.write("1,Item A,100,2023-01-01 10:00:00\n")
|
|
118
|
+
f.write("2,Item B,200,2023-01-01 11:00:00\n")
|
|
119
|
+
f.write("1,Item A (Old),90,2023-01-01 09:00:00\n")
|
|
120
|
+
|
|
121
|
+
# Create Dockerfile
|
|
122
|
+
dockerfile_content = """FROM python:3.11-slim
|
|
123
|
+
|
|
124
|
+
WORKDIR /app
|
|
125
|
+
|
|
126
|
+
# Install system dependencies if needed (e.g., for pyodbc)
|
|
127
|
+
# RUN apt-get update && apt-get install -y unixodbc-dev
|
|
128
|
+
|
|
129
|
+
# Install Odibi
|
|
130
|
+
RUN pip install odibi[all]
|
|
131
|
+
|
|
132
|
+
# Copy project files
|
|
133
|
+
COPY . /app
|
|
134
|
+
|
|
135
|
+
# Default command
|
|
136
|
+
CMD ["odibi", "run", "odibi.yaml"]
|
|
137
|
+
"""
|
|
138
|
+
with open(target_dir / "Dockerfile", "w") as f:
|
|
139
|
+
f.write(dockerfile_content)
|
|
140
|
+
|
|
141
|
+
# Create GitHub CI Workflow
|
|
142
|
+
ci_yaml_content = """name: Odibi CI
|
|
143
|
+
|
|
144
|
+
on:
|
|
145
|
+
push:
|
|
146
|
+
branches: [ "main" ]
|
|
147
|
+
pull_request:
|
|
148
|
+
branches: [ "main" ]
|
|
149
|
+
|
|
150
|
+
jobs:
|
|
151
|
+
validate:
|
|
152
|
+
runs-on: ubuntu-latest
|
|
153
|
+
steps:
|
|
154
|
+
- uses: actions/checkout@v3
|
|
155
|
+
|
|
156
|
+
- name: Set up Python 3.11
|
|
157
|
+
uses: actions/setup-python@v3
|
|
158
|
+
with:
|
|
159
|
+
python-version: "3.11"
|
|
160
|
+
|
|
161
|
+
- name: Install dependencies
|
|
162
|
+
run: |
|
|
163
|
+
python -m pip install --upgrade pip
|
|
164
|
+
pip install odibi[all] pytest
|
|
165
|
+
|
|
166
|
+
- name: Check Health (Doctor)
|
|
167
|
+
run: odibi doctor
|
|
168
|
+
|
|
169
|
+
- name: Validate Configuration
|
|
170
|
+
run: odibi validate odibi.yaml
|
|
171
|
+
|
|
172
|
+
- name: Run Unit Tests
|
|
173
|
+
run: odibi test
|
|
174
|
+
|
|
175
|
+
# Optional: Dry Run
|
|
176
|
+
# - name: Dry Run Pipeline
|
|
177
|
+
# run: odibi run odibi.yaml --dry-run
|
|
178
|
+
"""
|
|
179
|
+
with open(target_dir / ".github/workflows/ci.yaml", "w") as f:
|
|
180
|
+
f.write(ci_yaml_content)
|
|
181
|
+
|
|
182
|
+
# Create .dockerignore
|
|
183
|
+
with open(target_dir / ".dockerignore", "w") as f:
|
|
184
|
+
f.write("data/\nlogs/\n.git/\n__pycache__/\n*.pyc\n")
|
|
185
|
+
|
|
186
|
+
# Create .gitignore
|
|
187
|
+
with open(target_dir / ".gitignore", "w") as f:
|
|
188
|
+
f.write("data/\nlogs/\n__pycache__/\n*.pyc\n.odibi/\nstories/\n")
|
|
189
|
+
|
|
190
|
+
# Generate README.md
|
|
191
|
+
readme_content = f"""# {project_name}
|
|
192
|
+
|
|
193
|
+
A data engineering project built with [Odibi](https://github.com/henryodibi11/Odibi).
|
|
194
|
+
|
|
195
|
+
## Getting Started
|
|
196
|
+
|
|
197
|
+
### Prerequisites
|
|
198
|
+
- Python 3.9+
|
|
199
|
+
- Odibi (`pip install odibi`)
|
|
200
|
+
|
|
201
|
+
### Project Structure
|
|
202
|
+
- `odibi.yaml`: Main pipeline configuration
|
|
203
|
+
- `data/`: Local data storage
|
|
204
|
+
- `stories/`: Execution reports (HTML)
|
|
205
|
+
|
|
206
|
+
### Commands
|
|
207
|
+
|
|
208
|
+
**1. Validate Configuration**
|
|
209
|
+
```bash
|
|
210
|
+
odibi validate odibi.yaml
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
**2. Check Health**
|
|
214
|
+
```bash
|
|
215
|
+
odibi doctor
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
**3. Run Pipeline**
|
|
219
|
+
```bash
|
|
220
|
+
odibi run odibi.yaml
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**4. View Results**
|
|
224
|
+
```bash
|
|
225
|
+
odibi ui
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## CI/CD
|
|
229
|
+
A GitHub Actions workflow is included in `.github/workflows/ci.yaml` that validates the project on every push.
|
|
230
|
+
"""
|
|
231
|
+
with open(target_dir / "README.md", "w") as f:
|
|
232
|
+
f.write(readme_content)
|
|
233
|
+
|
|
234
|
+
logger.info(f"Created new project '{project_name}' using '{template_name}' template.")
|
|
235
|
+
logger.info(f"Location: {target_dir}")
|
|
236
|
+
logger.info(f"Next step: cd {project_name} && odibi run odibi.yaml")
|
|
237
|
+
|
|
238
|
+
return 0
|
|
239
|
+
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.error(f"Failed to create project: {str(e)}")
|
|
242
|
+
return 1
|