odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/cli/schema.py ADDED
@@ -0,0 +1,208 @@
1
+ """CLI commands for schema tracking."""
2
+
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Optional
6
+
7
+ from odibi.config import load_config_from_file
8
+
9
+
10
+ def add_schema_parser(subparsers) -> None:
11
+ """Add schema-related subcommands to the CLI."""
12
+ schema_parser = subparsers.add_parser("schema", help="Schema version tracking commands")
13
+ schema_subparsers = schema_parser.add_subparsers(dest="schema_command")
14
+
15
+ # odibi schema history <table>
16
+ history_parser = schema_subparsers.add_parser("history", help="Show schema version history")
17
+ history_parser.add_argument("table", help="Table path (e.g., silver/customers)")
18
+ history_parser.add_argument("--config", help="Path to YAML config file")
19
+ history_parser.add_argument(
20
+ "--limit", type=int, default=10, help="Maximum versions to show (default: 10)"
21
+ )
22
+ history_parser.add_argument(
23
+ "--format",
24
+ choices=["table", "json"],
25
+ default="table",
26
+ help="Output format (default: table)",
27
+ )
28
+
29
+ # odibi schema diff <table> --from <v1> --to <v2>
30
+ diff_parser = schema_subparsers.add_parser("diff", help="Compare two schema versions")
31
+ diff_parser.add_argument("table", help="Table path (e.g., silver/customers)")
32
+ diff_parser.add_argument("--config", help="Path to YAML config file")
33
+ diff_parser.add_argument("--from-version", type=int, help="Source version number")
34
+ diff_parser.add_argument("--to-version", type=int, help="Target version number")
35
+
36
+
37
+ def schema_command(args) -> int:
38
+ """Execute schema commands."""
39
+ if not hasattr(args, "schema_command") or not args.schema_command:
40
+ print("Usage: odibi schema <command> [options]")
41
+ print("Commands: history, diff")
42
+ return 1
43
+
44
+ if args.schema_command == "history":
45
+ return _schema_history(args)
46
+ elif args.schema_command == "diff":
47
+ return _schema_diff(args)
48
+ else:
49
+ print(f"Unknown schema command: {args.schema_command}")
50
+ return 1
51
+
52
+
53
+ def _get_catalog_manager(config_path: Optional[str]):
54
+ """Get CatalogManager instance from config."""
55
+ if not config_path:
56
+ print("Error: --config is required")
57
+ return None
58
+
59
+ try:
60
+ project_config = load_config_from_file(config_path)
61
+
62
+ from odibi.catalog import CatalogManager
63
+ from odibi.engine import get_engine
64
+
65
+ engine = get_engine(project_config.engine)
66
+ system_conn = project_config.connections.get(project_config.system.connection)
67
+
68
+ if hasattr(system_conn, "base_path"):
69
+ base_path = f"{system_conn.base_path.rstrip('/')}/{project_config.system.path}"
70
+ else:
71
+ base_path = project_config.system.path
72
+
73
+ catalog = CatalogManager(
74
+ spark=None,
75
+ config=project_config.system,
76
+ base_path=base_path,
77
+ engine=engine,
78
+ connection=system_conn,
79
+ )
80
+
81
+ return catalog
82
+
83
+ except FileNotFoundError:
84
+ print(f"Error: Config file not found: {config_path}")
85
+ return None
86
+ except Exception as e:
87
+ print(f"Error loading config: {e}")
88
+ return None
89
+
90
+
91
+ def _schema_history(args) -> int:
92
+ """Show schema version history for a table."""
93
+ catalog = _get_catalog_manager(args.config)
94
+ if not catalog:
95
+ return 1
96
+
97
+ history = catalog.get_schema_history(args.table, limit=args.limit)
98
+
99
+ if not history:
100
+ print(f"No schema history found for: {args.table}")
101
+ return 0
102
+
103
+ if args.format == "json":
104
+ print(json.dumps(history, indent=2, default=str))
105
+ return 0
106
+
107
+ # Table format output
108
+ print(f"\nSchema History: {args.table}")
109
+ print("=" * 80)
110
+ print(f"{'Version':<10} {'Captured At':<22} {'Changes'}")
111
+ print("-" * 80)
112
+
113
+ for record in history:
114
+ version = f"v{record.get('schema_version', '?')}"
115
+ captured_at = record.get("captured_at", "")
116
+ if isinstance(captured_at, datetime):
117
+ captured_at = captured_at.strftime("%Y-%m-%d %H:%M:%S")
118
+
119
+ added = record.get("columns_added") or []
120
+ removed = record.get("columns_removed") or []
121
+ changed = record.get("columns_type_changed") or []
122
+
123
+ changes = []
124
+ if added:
125
+ changes.append(f"+{', '.join(added[:3])}" + ("..." if len(added) > 3 else ""))
126
+ if removed:
127
+ changes.append(f"-{', '.join(removed[:3])}" + ("..." if len(removed) > 3 else ""))
128
+ if changed:
129
+ changes.append(f"~{', '.join(changed[:3])}" + ("..." if len(changed) > 3 else ""))
130
+
131
+ if not changes:
132
+ if record.get("schema_version") == 1:
133
+ columns = json.loads(record.get("columns", "{}"))
134
+ changes.append(f"Initial schema ({len(columns)} columns)")
135
+ else:
136
+ changes.append("(no changes detected)")
137
+
138
+ print(f"{version:<10} {captured_at:<22} {'; '.join(changes)}")
139
+
140
+ print()
141
+ return 0
142
+
143
+
144
+ def _schema_diff(args) -> int:
145
+ """Compare two schema versions."""
146
+ catalog = _get_catalog_manager(args.config)
147
+ if not catalog:
148
+ return 1
149
+
150
+ history = catalog.get_schema_history(args.table, limit=100)
151
+
152
+ if not history:
153
+ print(f"No schema history found for: {args.table}")
154
+ return 1
155
+
156
+ # Find the versions
157
+ from_version = args.from_version
158
+ to_version = args.to_version
159
+
160
+ # Default to comparing latest two versions
161
+ if from_version is None and to_version is None:
162
+ if len(history) < 2:
163
+ print("Need at least 2 versions to compare")
164
+ return 1
165
+ to_version = history[0].get("schema_version")
166
+ from_version = history[1].get("schema_version")
167
+
168
+ from_record = None
169
+ to_record = None
170
+
171
+ for record in history:
172
+ if record.get("schema_version") == from_version:
173
+ from_record = record
174
+ if record.get("schema_version") == to_version:
175
+ to_record = record
176
+
177
+ if not from_record:
178
+ print(f"Version v{from_version} not found")
179
+ return 1
180
+ if not to_record:
181
+ print(f"Version v{to_version} not found")
182
+ return 1
183
+
184
+ from_cols = json.loads(from_record.get("columns", "{}"))
185
+ to_cols = json.loads(to_record.get("columns", "{}"))
186
+
187
+ print(f"\nSchema Diff: {args.table}")
188
+ print(f"From v{from_version} → v{to_version}")
189
+ print("=" * 60)
190
+
191
+ all_cols = sorted(set(from_cols.keys()) | set(to_cols.keys()))
192
+
193
+ for col in all_cols:
194
+ in_from = col in from_cols
195
+ in_to = col in to_cols
196
+
197
+ if in_from and in_to:
198
+ if from_cols[col] == to_cols[col]:
199
+ print(f" {col:<30} {to_cols[col]:<20} (unchanged)")
200
+ else:
201
+ print(f"~ {col:<30} {from_cols[col]} → {to_cols[col]}")
202
+ elif in_to and not in_from:
203
+ print(f"+ {col:<30} {to_cols[col]:<20} (added in v{to_version})")
204
+ elif in_from and not in_to:
205
+ print(f"- {col:<30} {from_cols[col]:<20} (removed in v{to_version})")
206
+
207
+ print()
208
+ return 0
odibi/cli/secrets.py ADDED
@@ -0,0 +1,232 @@
1
+ """Secrets management CLI commands."""
2
+
3
+ import os
4
+ import re
5
+ from typing import Set
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ from odibi.utils.config_loader import load_yaml_with_env
10
+ from odibi.utils.setup_helpers import fetch_keyvault_secrets_parallel
11
+
12
+ # Pattern to match ${VAR} or ${env:VAR}
13
+ # Captures the variable name in group 1
14
+ # Same as in odibi/utils/config_loader.py
15
+ ENV_PATTERN = re.compile(r"\$\{(?:env:)?([A-Za-z0-9_]+)\}")
16
+
17
+
18
+ def extract_env_vars(config_path: str) -> Set[str]:
19
+ """Extract environment variables used in config file.
20
+
21
+ Args:
22
+ config_path: Path to YAML config file
23
+
24
+ Returns:
25
+ Set of variable names
26
+ """
27
+ if not os.path.exists(config_path):
28
+ raise FileNotFoundError(f"Config file not found: {config_path}")
29
+
30
+ with open(config_path, "r") as f:
31
+ content = f.read()
32
+
33
+ return set(ENV_PATTERN.findall(content))
34
+
35
+
36
+ def init_command(args) -> int:
37
+ """Create .env.template from config file usage.
38
+
39
+ Args:
40
+ args: Parsed arguments
41
+
42
+ Returns:
43
+ Exit code
44
+ """
45
+ try:
46
+ # Extract variables
47
+ vars_found = extract_env_vars(args.config)
48
+
49
+ if not vars_found:
50
+ print(f"No environment variables found in {args.config}")
51
+ return 0
52
+
53
+ # Create template content
54
+ template_lines = [
55
+ "# Odibi Environment Variables Template",
56
+ f"# Generated from {args.config}",
57
+ "",
58
+ ]
59
+
60
+ for var in sorted(vars_found):
61
+ template_lines.append(f"{var}=")
62
+
63
+ output_path = args.output
64
+
65
+ # Check if file exists
66
+ if os.path.exists(output_path) and not args.force:
67
+ print(f"Error: {output_path} already exists. Use --force to overwrite.")
68
+ return 1
69
+
70
+ # Write file
71
+ with open(output_path, "w") as f:
72
+ f.write("\n".join(template_lines) + "\n")
73
+
74
+ print(f"Created {output_path} with {len(vars_found)} variables.")
75
+ return 0
76
+
77
+ except Exception as e:
78
+ print(f"Error creating secrets template: {e}")
79
+ return 1
80
+
81
+
82
+ class SimpleConnection:
83
+ """Simple connection wrapper for Key Vault checking."""
84
+
85
+ def __init__(self, name: str, data: dict):
86
+ self.name = name
87
+ # Support flat or auth dict structure
88
+ self.key_vault_name = data.get("key_vault_name") or data.get("auth", {}).get(
89
+ "key_vault_name"
90
+ )
91
+ self.secret_name = data.get("secret_name") or data.get("auth", {}).get("secret_name")
92
+ self.account = data.get("account_name") or data.get("account") or "unknown"
93
+
94
+
95
+ def check_keyvault_access(config_path: str) -> bool:
96
+ """Check if Key Vault secrets are accessible.
97
+
98
+ Args:
99
+ config_path: Path to config file
100
+
101
+ Returns:
102
+ True if all checks pass
103
+ """
104
+ print("\nChecking Azure Key Vault access...")
105
+ try:
106
+ # Load config
107
+ config_dict = load_yaml_with_env(config_path)
108
+ connections_data = config_dict.get("connections", {})
109
+
110
+ connections_to_check = {}
111
+ for name, data in connections_data.items():
112
+ if not isinstance(data, dict):
113
+ continue
114
+ conn = SimpleConnection(name, data)
115
+ if conn.key_vault_name and conn.secret_name:
116
+ connections_to_check[name] = conn
117
+
118
+ if not connections_to_check:
119
+ print(" [OK] No Key Vault connections found.")
120
+ return True
121
+
122
+ # Run parallel fetch
123
+ results = fetch_keyvault_secrets_parallel(connections_to_check, verbose=True)
124
+
125
+ failures = [r for r in results.values() if not r.success]
126
+ if failures:
127
+ print(f"\nFailed to access {len(failures)} Key Vault secrets:")
128
+ for f in failures:
129
+ print(f" [FAIL] Connection '{f.connection_name}': {f.error}")
130
+ return False
131
+
132
+ return True
133
+
134
+ except Exception as e:
135
+ print(f"Error checking Key Vaults: {e}")
136
+ return False
137
+
138
+
139
+ def validate_command(args) -> int:
140
+ """Check if required environment variables and Key Vault secrets are accessible.
141
+
142
+ Args:
143
+ args: Parsed arguments
144
+
145
+ Returns:
146
+ Exit code
147
+ """
148
+ exit_code = 0
149
+
150
+ print("Checking Environment Variables...")
151
+ try:
152
+ # Load .env if it exists
153
+ load_dotenv()
154
+
155
+ # Extract variables from config
156
+ vars_required = extract_env_vars(args.config)
157
+
158
+ if not vars_required:
159
+ print(f" [OK] No environment variables found in {args.config}")
160
+ else:
161
+ # Check against environment
162
+ missing_vars = []
163
+ for var in vars_required:
164
+ if var not in os.environ:
165
+ missing_vars.append(var)
166
+
167
+ if missing_vars:
168
+ print(f" [MISSING] Missing {len(missing_vars)} environment variables:")
169
+ for var in sorted(missing_vars):
170
+ print(f" - {var}")
171
+ print(" Please set these variables in your environment or .env file.")
172
+ exit_code = 1
173
+ else:
174
+ print(" [OK] All required environment variables are set.")
175
+
176
+ except Exception as e:
177
+ print(f"Error validating secrets: {e}")
178
+ exit_code = 1
179
+
180
+ # Check Key Vaults
181
+ # We only check if env vars are fine, otherwise config load might fail
182
+ if exit_code == 0:
183
+ if not check_keyvault_access(args.config):
184
+ exit_code = 1
185
+ else:
186
+ print("\nSkipping Key Vault check due to missing environment variables.")
187
+
188
+ return exit_code
189
+
190
+
191
+ def add_secrets_parser(subparsers):
192
+ """Add secrets subparser to main parser.
193
+
194
+ Args:
195
+ subparsers: Main subparsers object
196
+ """
197
+ secrets_parser = subparsers.add_parser(
198
+ "secrets", help="Manage secrets and environment variables"
199
+ )
200
+ secrets_subparsers = secrets_parser.add_subparsers(
201
+ dest="secrets_command", help="Secrets commands"
202
+ )
203
+
204
+ # init command
205
+ init_parser = secrets_subparsers.add_parser("init", help="Create .env.template from config")
206
+ init_parser.add_argument("config", help="Path to YAML config file")
207
+ init_parser.add_argument(
208
+ "-o", "--output", default=".env.template", help="Output file path (default: .env.template)"
209
+ )
210
+ init_parser.add_argument("-f", "--force", action="store_true", help="Overwrite existing file")
211
+
212
+ # validate command
213
+ validate_parser = secrets_subparsers.add_parser("validate", help="Check required variables")
214
+ validate_parser.add_argument("config", help="Path to YAML config file")
215
+
216
+
217
+ def secrets_command(args) -> int:
218
+ """Dispatcher for secrets commands.
219
+
220
+ Args:
221
+ args: Parsed arguments
222
+
223
+ Returns:
224
+ Exit code
225
+ """
226
+ if args.secrets_command == "init":
227
+ return init_command(args)
228
+ elif args.secrets_command == "validate":
229
+ return validate_command(args)
230
+ else:
231
+ print("Error: No secrets command specified")
232
+ return 1