odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/cli/doctor.py ADDED
@@ -0,0 +1,161 @@
1
+ import importlib
2
+ import os
3
+ from typing import Optional
4
+
5
+ from odibi.pipeline import PipelineManager
6
+
7
+
8
+ def check_dependencies() -> bool:
9
+ """Check installed dependencies."""
10
+ print("Checking dependencies...")
11
+ deps = [
12
+ ("pandas", "Pandas"),
13
+ ("duckdb", "DuckDB (Local SQL Engine)"),
14
+ ("pyspark", "PySpark (Distributed Engine)"),
15
+ ("fastapi", "FastAPI (UI Backend)"),
16
+ ("uvicorn", "Uvicorn (UI Server)"),
17
+ ("openlineage.client", "OpenLineage (Governance)"),
18
+ ("azure.storage.blob", "Azure Blob Storage"),
19
+ ("delta", "Delta Lake"),
20
+ ]
21
+
22
+ all_good = True
23
+ for module, name in deps:
24
+ try:
25
+ mod = importlib.import_module(module)
26
+ version = getattr(mod, "__version__", "installed")
27
+ print(f" [OK] {name}: {version}")
28
+ except ImportError:
29
+ # Define optional vs required
30
+ optional = ["pyspark", "openlineage.client", "azure.storage.blob", "delta"]
31
+
32
+ if module in optional:
33
+ print(f" [OPTIONAL] {name}: Not installed")
34
+ else:
35
+ print(f" [MISSING] {name}")
36
+ all_good = False
37
+
38
+ return all_good
39
+
40
+
41
+ def check_config() -> Optional[PipelineManager]:
42
+ """Check configuration file."""
43
+ print("\nChecking configuration...")
44
+ config_files = ["odibi.yaml", "project.yaml", "odibi.yml", "project.yml"]
45
+ found_config = None
46
+ for f in config_files:
47
+ if os.path.exists(f):
48
+ found_config = f
49
+ break
50
+
51
+ if not found_config:
52
+ print(" [FAIL] No configuration file found (odibi.yaml or project.yaml)")
53
+ return None
54
+
55
+ print(f" [OK] Found {found_config}")
56
+
57
+ try:
58
+ # We use PipelineManager to parse and validate
59
+ manager = PipelineManager.from_yaml(found_config)
60
+ print(" [OK] Configuration is valid")
61
+ return manager
62
+ except Exception as e:
63
+ print(f" [FAIL] Invalid configuration: {e}")
64
+ return None
65
+
66
+
67
+ def check_connections(manager: PipelineManager) -> bool:
68
+ """Check connections."""
69
+ print("\nChecking connections...")
70
+ if not manager:
71
+ print(" [SKIP] Skipping connection checks (no config)")
72
+ return False
73
+
74
+ all_good = True
75
+ for name, conn in manager.connections.items():
76
+ try:
77
+ conn.validate()
78
+ print(f" [OK] {name} ({conn.__class__.__name__})")
79
+ except Exception as e:
80
+ print(f" [FAIL] {name}: {e}")
81
+ all_good = False
82
+
83
+ return all_good
84
+
85
+
86
+ def check_system_catalog(manager: PipelineManager) -> bool:
87
+ """Check system catalog connectivity and integrity."""
88
+ print("\nChecking System Catalog...")
89
+
90
+ if not manager or not manager.catalog_manager:
91
+ print(" [SKIP] System Catalog not configured (Local Only Mode)")
92
+ return True
93
+
94
+ print(f" [INFO] Catalog Path: {manager.catalog_manager.base_path}")
95
+
96
+ all_good = True
97
+ required_tables = [
98
+ "meta_tables",
99
+ "meta_runs",
100
+ "meta_patterns",
101
+ "meta_metrics",
102
+ "meta_state",
103
+ "meta_pipelines",
104
+ "meta_nodes",
105
+ ]
106
+
107
+ for table in required_tables:
108
+ path = manager.catalog_manager.tables.get(table)
109
+ # Access internal method for check (acceptable for doctor/diagnostic tool)
110
+ exists = manager.catalog_manager._table_exists(path)
111
+ if exists:
112
+ print(f" [OK] {table}")
113
+ else:
114
+ print(f" [FAIL] {table} not found at {path}")
115
+ all_good = False
116
+
117
+ return all_good
118
+
119
+
120
+ def doctor_command(args) -> int:
121
+ """Run doctor command."""
122
+ try:
123
+ version = __import__("odibi").__version__
124
+ except Exception:
125
+ version = "unknown"
126
+
127
+ print(f"Odibi Doctor (v{version})")
128
+ print("=" * 40)
129
+
130
+ deps_ok = check_dependencies()
131
+ manager = check_config()
132
+ conns_ok = check_connections(manager)
133
+
134
+ catalog_ok = True
135
+ if manager:
136
+ catalog_ok = check_system_catalog(manager)
137
+
138
+ if not deps_ok:
139
+ print("\n[WARNING] Some required dependencies are missing.")
140
+ return 1
141
+
142
+ if not manager:
143
+ print("\n[WARNING] Configuration issues found.")
144
+ return 1
145
+
146
+ if not conns_ok:
147
+ print("\n[WARNING] Some connections failed validation.")
148
+ return 1
149
+
150
+ if not catalog_ok:
151
+ print("\n[WARNING] System Catalog issues found.")
152
+ return 1
153
+
154
+ print("\n[SUCCESS] You are ready to run pipelines!")
155
+ return 0
156
+
157
+
158
+ def add_doctor_parser(subparsers):
159
+ """Add doctor parser."""
160
+ parser = subparsers.add_parser("doctor", help="Check environment health")
161
+ return parser
odibi/cli/export.py ADDED
@@ -0,0 +1,66 @@
1
+ import os
2
+
3
+ from odibi.config import load_config_from_file
4
+ from odibi.orchestration.airflow import AirflowExporter
5
+
6
+
7
+ def add_export_parser(subparsers):
8
+ export_parser = subparsers.add_parser("export", help="Export pipeline to orchestration code")
9
+ export_parser.add_argument(
10
+ "--target", choices=["airflow", "dagster"], required=True, help="Export target"
11
+ )
12
+ export_parser.add_argument("--pipeline", help="Pipeline name (required for Airflow)")
13
+ export_parser.add_argument("--out", required=True, help="Output file path")
14
+ export_parser.add_argument("--config", default="odibi.yaml", help="Path to odibi.yaml")
15
+
16
+
17
+ def export_command(args):
18
+ """Export pipeline to orchestration code."""
19
+ config_path = args.config
20
+ if not os.path.exists(config_path):
21
+ print(f"Error: Config file '{config_path}' not found.")
22
+ return 1
23
+
24
+ try:
25
+ project_config = load_config_from_file(config_path)
26
+ except Exception as e:
27
+ print(f"Error loading config: {e}")
28
+ return 1
29
+
30
+ if args.target == "airflow":
31
+ if not args.pipeline:
32
+ print("Error: --pipeline is required for Airflow export.")
33
+ return 1
34
+
35
+ exporter = AirflowExporter(project_config)
36
+ try:
37
+ code = exporter.generate_code(args.pipeline)
38
+ with open(args.out, "w", encoding="utf-8") as f:
39
+ f.write(code)
40
+ print(f"Successfully exported Airflow DAG to {args.out}")
41
+ except ValueError as e:
42
+ print(f"Error: {e}")
43
+ return 1
44
+
45
+ elif args.target == "dagster":
46
+ # Generate a definitions.py scaffolding
47
+ content = f"""
48
+ # Generated by Odibi
49
+ import os
50
+ from dagster import Definitions
51
+ from odibi.config import load_config_from_file
52
+ from odibi.orchestration.dagster import DagsterFactory
53
+
54
+ # Load project config
55
+ # Ensure odibi.yaml is in the same directory or adjust path
56
+ config_path = os.path.join(os.path.dirname(__file__), "{config_path}")
57
+ project_config = load_config_from_file(config_path)
58
+
59
+ # Create definitions
60
+ defs = DagsterFactory(project_config).create_definitions()
61
+ """
62
+ with open(args.out, "w", encoding="utf-8") as f:
63
+ f.write(content)
64
+ print(f"Successfully created Dagster definitions at {args.out}")
65
+
66
+ return 0
odibi/cli/graph.py ADDED
@@ -0,0 +1,150 @@
1
+ """
2
+ Graph CLI Command
3
+ =================
4
+
5
+ Visualizes the pipeline dependency graph.
6
+ """
7
+
8
+ from odibi.graph import DependencyGraph
9
+ from odibi.pipeline import PipelineManager
10
+
11
+
12
+ def graph_command(args):
13
+ """
14
+ Handle graph subcommand.
15
+
16
+ Args:
17
+ args: Parsed command-line arguments
18
+
19
+ Returns:
20
+ Exit code
21
+ """
22
+ try:
23
+ # Load pipeline manager
24
+ env = getattr(args, "env", None)
25
+ manager = PipelineManager.from_yaml(args.config, env=env)
26
+
27
+ # Determine which pipeline to graph
28
+ pipeline_name = args.pipeline
29
+ if not pipeline_name:
30
+ # Default to first pipeline if not specified
31
+ pipeline_names = manager.list_pipelines()
32
+ if not pipeline_names:
33
+ print("❌ No pipelines found in configuration")
34
+ return 1
35
+ pipeline_name = pipeline_names[0]
36
+
37
+ # Get the pipeline
38
+ try:
39
+ pipeline = manager.get_pipeline(pipeline_name)
40
+ except ValueError:
41
+ print(f"❌ Pipeline '{pipeline_name}' not found")
42
+ return 1
43
+
44
+ # Generate visualization
45
+ if args.format == "ascii":
46
+ print(pipeline.visualize())
47
+ elif args.format == "dot":
48
+ print(_generate_dot(pipeline.graph, pipeline_name, manager.catalog_manager))
49
+ elif args.format == "mermaid":
50
+ print(_generate_mermaid(pipeline.graph, pipeline_name, manager.catalog_manager))
51
+
52
+ return 0
53
+
54
+ except Exception as e:
55
+ print(f"❌ Error generating graph: {e}")
56
+ if args.verbose:
57
+ import traceback
58
+
59
+ traceback.print_exc()
60
+ return 1
61
+
62
+
63
+ def _generate_dot(graph: DependencyGraph, pipeline_name: str, catalog_manager=None) -> str:
64
+ """Generate DOT (Graphviz) representation."""
65
+ lines = []
66
+ lines.append(f'digraph "{pipeline_name}" {{')
67
+ lines.append(" rankdir=LR;")
68
+ lines.append(' node [shape=box, style=rounded, fontname="Helvetica"];')
69
+ lines.append(' edge [fontname="Helvetica"];')
70
+ lines.append("")
71
+
72
+ for node_name in graph.nodes:
73
+ # Add node
74
+ node = graph.nodes[node_name]
75
+ op_type = "unknown"
76
+ if node.read:
77
+ op_type = "read"
78
+ color = "lightblue"
79
+ elif node.write:
80
+ op_type = "write"
81
+ color = "lightgreen"
82
+ elif node.transform:
83
+ op_type = "transform"
84
+ color = "lightyellow"
85
+
86
+ # Enrich with Catalog Stats
87
+ stats_text = ""
88
+ if catalog_manager:
89
+ try:
90
+ avg_rows = catalog_manager.get_average_volume(node_name)
91
+ if avg_rows is not None:
92
+ stats_text = f"\\n~{int(avg_rows)} rows"
93
+ except Exception:
94
+ pass
95
+
96
+ label = f"{node_name}\\n({op_type}){stats_text}"
97
+ lines.append(f' "{node_name}" [label="{label}", style="filled", fillcolor="{color}"];')
98
+
99
+ # Add edges
100
+ for dep in node.depends_on:
101
+ lines.append(f' "{dep}" -> "{node_name}";')
102
+
103
+ lines.append("}")
104
+ return "\n".join(lines)
105
+
106
+
107
+ def _generate_mermaid(graph: DependencyGraph, pipeline_name: str, catalog_manager=None) -> str:
108
+ """Generate Mermaid diagram."""
109
+ lines = []
110
+ lines.append("graph LR")
111
+
112
+ # Define styles
113
+ lines.append(" classDef read fill:lightblue,stroke:#333,stroke-width:1px;")
114
+ lines.append(" classDef write fill:lightgreen,stroke:#333,stroke-width:1px;")
115
+ lines.append(" classDef transform fill:lightyellow,stroke:#333,stroke-width:1px;")
116
+
117
+ for node_name in graph.nodes:
118
+ node = graph.nodes[node_name]
119
+ style_class = "transform"
120
+
121
+ # Node styling based on type
122
+ if node.read:
123
+ shape = "((" # Circle
124
+ end_shape = "))"
125
+ style_class = "read"
126
+ elif node.write:
127
+ shape = "[/" # Parallelogram
128
+ end_shape = "/]"
129
+ style_class = "write"
130
+ else:
131
+ shape = "[" # Box
132
+ end_shape = "]"
133
+
134
+ # Enrich with Catalog Stats
135
+ label = node_name
136
+ if catalog_manager:
137
+ try:
138
+ avg_rows = catalog_manager.get_average_volume(node_name)
139
+ if avg_rows is not None:
140
+ label = f"{node_name}<br/>~{int(avg_rows)} rows"
141
+ except Exception:
142
+ pass
143
+
144
+ lines.append(f' {node_name}{shape}"{label}"{end_shape}:::{style_class}')
145
+
146
+ # Edges
147
+ for dep in node.depends_on:
148
+ lines.append(f" {dep} --> {node_name}")
149
+
150
+ return "\n".join(lines)
@@ -0,0 +1,242 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ # Map template names to their relative paths in the repo
9
+ TEMPLATE_MAP = {
10
+ "kitchen": "examples/templates/kitchen_sink.odibi.yaml",
11
+ "full": "examples/templates/template_full.yaml",
12
+ "local": "examples/templates/simple_local.yaml",
13
+ "local-medallion": "examples/templates/simple_local.yaml",
14
+ "azure": "examples/templates/azure_spark.yaml",
15
+ }
16
+
17
+
18
+ def add_init_parser(subparsers):
19
+ """Add arguments for init-pipeline command."""
20
+ parser = subparsers.add_parser(
21
+ "init-pipeline",
22
+ aliases=["create", "init", "generate-project"],
23
+ help="Initialize a new Odibi project from a template",
24
+ )
25
+ parser.add_argument("name", help="Name of the project directory to create")
26
+ parser.add_argument(
27
+ "--template",
28
+ choices=list(TEMPLATE_MAP.keys()),
29
+ default=None,
30
+ help="Template to use (default: prompt user)",
31
+ )
32
+ # Add --force to overwrite existing directory
33
+ parser.add_argument(
34
+ "--force", action="store_true", help="Overwrite existing directory if it exists"
35
+ )
36
+
37
+
38
+ def init_pipeline_command(args):
39
+ """Execute the init-pipeline command."""
40
+ project_name = args.name
41
+ template_name = args.template
42
+ force = args.force
43
+
44
+ # Interactive Prompt
45
+ if template_name is None:
46
+ print("\nSelect a project template:")
47
+ templates = list(TEMPLATE_MAP.keys())
48
+ for i, t in enumerate(templates):
49
+ print(f" {i + 1}. {t}")
50
+
51
+ try:
52
+ choice = input(f"\nEnter number (default: 1 [{templates[0]}]): ").strip()
53
+ if not choice:
54
+ template_name = templates[0]
55
+ else:
56
+ idx = int(choice) - 1
57
+ if 0 <= idx < len(templates):
58
+ template_name = templates[idx]
59
+ else:
60
+ logger.error("Invalid selection.")
61
+ return 1
62
+ except (ValueError, EOFError, KeyboardInterrupt):
63
+ # Fallback for non-interactive
64
+ template_name = "local"
65
+ logger.info(f"Using default template: {template_name}")
66
+
67
+ # 1. Determine Target Path
68
+ target_dir = Path(os.getcwd()) / project_name
69
+
70
+ if target_dir.exists():
71
+ if not force:
72
+ logger.error(f"Directory '{project_name}' already exists. Use --force to overwrite.")
73
+ return 1
74
+ else:
75
+ logger.warning(f"Overwriting existing directory '{project_name}'...")
76
+ shutil.rmtree(target_dir)
77
+
78
+ # 2. Find Template File
79
+ # Assuming we are running from within the installed package or repo
80
+ # Try to find the repo root relative to this file
81
+ # This file is in odibi/cli/init_pipeline.py
82
+ # Repo root is ../../../
83
+
84
+ current_file = Path(__file__).resolve()
85
+ repo_root = current_file.parent.parent.parent
86
+
87
+ template_rel_path = TEMPLATE_MAP[template_name]
88
+ source_path = repo_root / template_rel_path
89
+
90
+ if not source_path.exists():
91
+ # Fallback: check if we are installed and templates are packaged (not likely in this env but good practice)
92
+ # For now, just fail if not found in repo structure
93
+ logger.error(f"Template file not found at: {source_path}")
94
+ logger.error(
95
+ "Ensure you are running Odibi from the repository root or templates are correctly installed."
96
+ )
97
+ return 1
98
+
99
+ # 3. Create Project Structure
100
+ try:
101
+ os.makedirs(target_dir)
102
+
103
+ # Copy the template to odibi.yaml
104
+ target_file = target_dir / "odibi.yaml"
105
+ shutil.copy2(source_path, target_file)
106
+
107
+ # Create standard directories
108
+ os.makedirs(target_dir / "data", exist_ok=True)
109
+ os.makedirs(target_dir / "data/raw", exist_ok=True)
110
+ os.makedirs(target_dir / "logs", exist_ok=True)
111
+ os.makedirs(target_dir / ".github/workflows", exist_ok=True)
112
+
113
+ # Create sample data for local template
114
+ if template_name in ["local", "local-medallion"]:
115
+ with open(target_dir / "data/raw/sample_data.csv", "w") as f:
116
+ f.write("id,name,value,updated_at\n")
117
+ f.write("1,Item A,100,2023-01-01 10:00:00\n")
118
+ f.write("2,Item B,200,2023-01-01 11:00:00\n")
119
+ f.write("1,Item A (Old),90,2023-01-01 09:00:00\n")
120
+
121
+ # Create Dockerfile
122
+ dockerfile_content = """FROM python:3.11-slim
123
+
124
+ WORKDIR /app
125
+
126
+ # Install system dependencies if needed (e.g., for pyodbc)
127
+ # RUN apt-get update && apt-get install -y unixodbc-dev
128
+
129
+ # Install Odibi
130
+ RUN pip install odibi[all]
131
+
132
+ # Copy project files
133
+ COPY . /app
134
+
135
+ # Default command
136
+ CMD ["odibi", "run", "odibi.yaml"]
137
+ """
138
+ with open(target_dir / "Dockerfile", "w") as f:
139
+ f.write(dockerfile_content)
140
+
141
+ # Create GitHub CI Workflow
142
+ ci_yaml_content = """name: Odibi CI
143
+
144
+ on:
145
+ push:
146
+ branches: [ "main" ]
147
+ pull_request:
148
+ branches: [ "main" ]
149
+
150
+ jobs:
151
+ validate:
152
+ runs-on: ubuntu-latest
153
+ steps:
154
+ - uses: actions/checkout@v3
155
+
156
+ - name: Set up Python 3.11
157
+ uses: actions/setup-python@v3
158
+ with:
159
+ python-version: "3.11"
160
+
161
+ - name: Install dependencies
162
+ run: |
163
+ python -m pip install --upgrade pip
164
+ pip install odibi[all] pytest
165
+
166
+ - name: Check Health (Doctor)
167
+ run: odibi doctor
168
+
169
+ - name: Validate Configuration
170
+ run: odibi validate odibi.yaml
171
+
172
+ - name: Run Unit Tests
173
+ run: odibi test
174
+
175
+ # Optional: Dry Run
176
+ # - name: Dry Run Pipeline
177
+ # run: odibi run odibi.yaml --dry-run
178
+ """
179
+ with open(target_dir / ".github/workflows/ci.yaml", "w") as f:
180
+ f.write(ci_yaml_content)
181
+
182
+ # Create .dockerignore
183
+ with open(target_dir / ".dockerignore", "w") as f:
184
+ f.write("data/\nlogs/\n.git/\n__pycache__/\n*.pyc\n")
185
+
186
+ # Create .gitignore
187
+ with open(target_dir / ".gitignore", "w") as f:
188
+ f.write("data/\nlogs/\n__pycache__/\n*.pyc\n.odibi/\nstories/\n")
189
+
190
+ # Generate README.md
191
+ readme_content = f"""# {project_name}
192
+
193
+ A data engineering project built with [Odibi](https://github.com/henryodibi11/Odibi).
194
+
195
+ ## Getting Started
196
+
197
+ ### Prerequisites
198
+ - Python 3.9+
199
+ - Odibi (`pip install odibi`)
200
+
201
+ ### Project Structure
202
+ - `odibi.yaml`: Main pipeline configuration
203
+ - `data/`: Local data storage
204
+ - `stories/`: Execution reports (HTML)
205
+
206
+ ### Commands
207
+
208
+ **1. Validate Configuration**
209
+ ```bash
210
+ odibi validate odibi.yaml
211
+ ```
212
+
213
+ **2. Check Health**
214
+ ```bash
215
+ odibi doctor
216
+ ```
217
+
218
+ **3. Run Pipeline**
219
+ ```bash
220
+ odibi run odibi.yaml
221
+ ```
222
+
223
+ **4. View Results**
224
+ ```bash
225
+ odibi ui
226
+ ```
227
+
228
+ ## CI/CD
229
+ A GitHub Actions workflow is included in `.github/workflows/ci.yaml` that validates the project on every push.
230
+ """
231
+ with open(target_dir / "README.md", "w") as f:
232
+ f.write(readme_content)
233
+
234
+ logger.info(f"Created new project '{project_name}' using '{template_name}' template.")
235
+ logger.info(f"Location: {target_dir}")
236
+ logger.info(f"Next step: cd {project_name} && odibi run odibi.yaml")
237
+
238
+ return 0
239
+
240
+ except Exception as e:
241
+ logger.error(f"Failed to create project: {str(e)}")
242
+ return 1