odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ import time
2
+ from typing import Any
3
+
4
+ from odibi.context import EngineContext
5
+ from odibi.patterns.base import Pattern
6
+ from odibi.transformers.merge_transformer import MergeParams, merge
7
+ from odibi.utils.logging_context import get_logging_context
8
+
9
+
10
+ class MergePattern(Pattern):
11
+ """
12
+ Merge Pattern: Upsert/Merge logic.
13
+
14
+ Configuration Options (via params dict):
15
+ - **target** (str): Target table/path.
16
+ - **keys** (list): Join keys.
17
+ - **strategy** (str): 'upsert', 'append_only', 'delete_match'.
18
+ """
19
+
20
+ def validate(self) -> None:
21
+ ctx = get_logging_context()
22
+
23
+ # Support both 'target' and 'path' for compatibility with merge transformer
24
+ target = self.params.get("target") or self.params.get("path")
25
+
26
+ ctx.debug(
27
+ "MergePattern validation starting",
28
+ pattern="MergePattern",
29
+ target=target,
30
+ keys=self.params.get("keys"),
31
+ strategy=self.params.get("strategy"),
32
+ )
33
+
34
+ if not target:
35
+ ctx.error(
36
+ "MergePattern validation failed: 'target' or 'path' is required",
37
+ pattern="MergePattern",
38
+ )
39
+ provided_params = {k: v for k, v in self.params.items() if v is not None}
40
+ raise ValueError(
41
+ f"MergePattern: 'target' or 'path' is required. "
42
+ f"Expected: A target table path string. "
43
+ f"Provided params: {list(provided_params.keys())}. "
44
+ f"Fix: Add 'target' or 'path' to your pattern configuration."
45
+ )
46
+ if not self.params.get("keys"):
47
+ ctx.error(
48
+ "MergePattern validation failed: 'keys' is required",
49
+ pattern="MergePattern",
50
+ )
51
+ source_columns = list(self.source.columns) if hasattr(self.source, "columns") else []
52
+ raise ValueError(
53
+ f"MergePattern: 'keys' is required. "
54
+ f"Expected: A list of column names to match source and target rows for merge. "
55
+ f"Available source columns: {source_columns}. "
56
+ f"Fix: Add 'keys' with columns that uniquely identify rows (e.g., keys=['id'])."
57
+ )
58
+
59
+ ctx.debug(
60
+ "MergePattern validation passed",
61
+ pattern="MergePattern",
62
+ target=self.params.get("target"),
63
+ keys=self.params.get("keys"),
64
+ strategy=self.params.get("strategy", "upsert"),
65
+ )
66
+
67
+ def execute(self, context: EngineContext) -> Any:
68
+ ctx = get_logging_context()
69
+ start_time = time.time()
70
+
71
+ # Support both 'target' and 'path' for compatibility
72
+ target = self.params.get("target") or self.params.get("path")
73
+ keys = self.params.get("keys")
74
+ strategy = self.params.get("strategy", "upsert")
75
+
76
+ ctx.debug(
77
+ "Merge pattern starting",
78
+ pattern="MergePattern",
79
+ target=target,
80
+ keys=keys,
81
+ strategy=strategy,
82
+ )
83
+
84
+ source_count = None
85
+ try:
86
+ if context.engine_type == "spark":
87
+ source_count = context.df.count()
88
+ else:
89
+ source_count = len(context.df)
90
+ ctx.debug(
91
+ "Merge source data loaded",
92
+ pattern="MergePattern",
93
+ source_rows=source_count,
94
+ )
95
+ except Exception:
96
+ ctx.debug("Merge could not determine source row count", pattern="MergePattern")
97
+
98
+ valid_keys = MergeParams.model_fields.keys()
99
+ filtered_params = {k: v for k, v in self.params.items() if k in valid_keys}
100
+
101
+ try:
102
+ merge(context, context.df, **filtered_params)
103
+ except Exception as e:
104
+ elapsed_ms = (time.time() - start_time) * 1000
105
+ ctx.error(
106
+ f"Merge pattern execution failed: {e}",
107
+ pattern="MergePattern",
108
+ error_type=type(e).__name__,
109
+ elapsed_ms=round(elapsed_ms, 2),
110
+ target=target,
111
+ keys=keys,
112
+ strategy=strategy,
113
+ )
114
+ raise
115
+
116
+ elapsed_ms = (time.time() - start_time) * 1000
117
+
118
+ ctx.info(
119
+ "Merge pattern completed",
120
+ pattern="MergePattern",
121
+ elapsed_ms=round(elapsed_ms, 2),
122
+ source_rows=source_count,
123
+ target=target,
124
+ keys=keys,
125
+ strategy=strategy,
126
+ )
127
+
128
+ return context.df
odibi/patterns/scd2.py ADDED
@@ -0,0 +1,148 @@
1
+ import time
2
+ from typing import Any
3
+
4
+ from odibi.context import EngineContext
5
+ from odibi.patterns.base import Pattern
6
+ from odibi.transformers.scd import SCD2Params, scd2
7
+ from odibi.utils.logging_context import get_logging_context
8
+
9
+
10
+ class SCD2Pattern(Pattern):
11
+ """
12
+ SCD2 Pattern: Slowly Changing Dimension Type 2.
13
+
14
+ Tracks history by creating new rows for updates.
15
+
16
+ Configuration Options (via params dict):
17
+ - **keys** (list): Business keys.
18
+ - **time_col** (str): Timestamp column for versioning (default: current time).
19
+ - **valid_from_col** (str): Name of start date column (default: valid_from).
20
+ - **valid_to_col** (str): Name of end date column (default: valid_to).
21
+ - **is_current_col** (str): Name of current flag column (default: is_current).
22
+ """
23
+
24
+ def validate(self) -> None:
25
+ ctx = get_logging_context()
26
+ ctx.debug(
27
+ "SCD2Pattern validation starting",
28
+ pattern="SCD2Pattern",
29
+ keys=self.params.get("keys"),
30
+ target=self.params.get("target"),
31
+ )
32
+
33
+ if not self.params.get("keys"):
34
+ ctx.error(
35
+ "SCD2Pattern validation failed: 'keys' parameter is required",
36
+ pattern="SCD2Pattern",
37
+ )
38
+ raise ValueError(
39
+ "SCD2Pattern: 'keys' parameter is required. "
40
+ f"Expected a list of business key column names, but got: {self.params.get('keys')!r}. "
41
+ f"Available params: {list(self.params.keys())}. "
42
+ "Fix: Provide 'keys' as a list, e.g., keys=['customer_id']."
43
+ )
44
+ if not self.params.get("target"):
45
+ ctx.error(
46
+ "SCD2Pattern validation failed: 'target' parameter is required",
47
+ pattern="SCD2Pattern",
48
+ )
49
+ raise ValueError(
50
+ "SCD2Pattern: 'target' parameter is required. "
51
+ f"Expected a table name or path string, but got: {self.params.get('target')!r}. "
52
+ "Fix: Provide 'target' as a string, e.g., target='dim_customer'."
53
+ )
54
+
55
+ ctx.debug(
56
+ "SCD2Pattern validation passed",
57
+ pattern="SCD2Pattern",
58
+ keys=self.params.get("keys"),
59
+ target=self.params.get("target"),
60
+ )
61
+
62
+ def execute(self, context: EngineContext) -> Any:
63
+ ctx = get_logging_context()
64
+ start_time = time.time()
65
+
66
+ keys = self.params.get("keys")
67
+ target = self.params.get("target")
68
+ valid_from_col = self.params.get("valid_from_col", "valid_from")
69
+ valid_to_col = self.params.get("valid_to_col", "valid_to")
70
+ is_current_col = self.params.get("is_current_col", "is_current")
71
+ track_cols = self.params.get("track_cols")
72
+
73
+ ctx.debug(
74
+ "SCD2 pattern starting",
75
+ pattern="SCD2Pattern",
76
+ keys=keys,
77
+ target=target,
78
+ valid_from_col=valid_from_col,
79
+ valid_to_col=valid_to_col,
80
+ is_current_col=is_current_col,
81
+ track_cols=track_cols,
82
+ )
83
+
84
+ source_count = None
85
+ try:
86
+ if context.engine_type == "spark":
87
+ source_count = context.df.count()
88
+ else:
89
+ source_count = len(context.df)
90
+ ctx.debug("SCD2 source data loaded", pattern="SCD2Pattern", source_rows=source_count)
91
+ except Exception:
92
+ ctx.debug("SCD2 could not determine source row count", pattern="SCD2Pattern")
93
+
94
+ valid_keys = SCD2Params.model_fields.keys()
95
+ filtered_params = {k: v for k, v in self.params.items() if k in valid_keys}
96
+
97
+ try:
98
+ scd_params = SCD2Params(**filtered_params)
99
+ except Exception as e:
100
+ ctx.error(
101
+ f"SCD2 invalid parameters: {e}",
102
+ pattern="SCD2Pattern",
103
+ error_type=type(e).__name__,
104
+ params=filtered_params,
105
+ )
106
+ raise ValueError(
107
+ f"Invalid SCD2 parameters: {e}. "
108
+ f"Provided params: {filtered_params}. "
109
+ f"Valid param names: {list(valid_keys)}."
110
+ )
111
+
112
+ try:
113
+ result_ctx = scd2(context, scd_params)
114
+ except Exception as e:
115
+ elapsed_ms = (time.time() - start_time) * 1000
116
+ ctx.error(
117
+ f"SCD2 pattern execution failed: {e}",
118
+ pattern="SCD2Pattern",
119
+ error_type=type(e).__name__,
120
+ elapsed_ms=round(elapsed_ms, 2),
121
+ )
122
+ raise
123
+
124
+ result_df = result_ctx.df
125
+ elapsed_ms = (time.time() - start_time) * 1000
126
+
127
+ result_count = None
128
+ try:
129
+ if context.engine_type == "spark":
130
+ result_count = result_df.count()
131
+ else:
132
+ result_count = len(result_df)
133
+ except Exception:
134
+ pass
135
+
136
+ ctx.info(
137
+ "SCD2 pattern completed",
138
+ pattern="SCD2Pattern",
139
+ elapsed_ms=round(elapsed_ms, 2),
140
+ source_rows=source_count,
141
+ result_rows=result_count,
142
+ keys=keys,
143
+ target=target,
144
+ valid_from_col=valid_from_col,
145
+ valid_to_col=valid_to_col,
146
+ )
147
+
148
+ return result_df