kelpmesh-core 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. kelpmesh/__init__.py +6 -0
  2. kelpmesh/__main__.py +3 -0
  3. kelpmesh/adapters/__init__.py +51 -0
  4. kelpmesh/adapters/athena.py +347 -0
  5. kelpmesh/adapters/base.py +139 -0
  6. kelpmesh/adapters/bigquery.py +211 -0
  7. kelpmesh/adapters/clickhouse.py +277 -0
  8. kelpmesh/adapters/databricks.py +222 -0
  9. kelpmesh/adapters/duckdb.py +328 -0
  10. kelpmesh/adapters/fabric.py +236 -0
  11. kelpmesh/adapters/hive.py +366 -0
  12. kelpmesh/adapters/mysql.py +221 -0
  13. kelpmesh/adapters/postgres.py +214 -0
  14. kelpmesh/adapters/redshift.py +221 -0
  15. kelpmesh/adapters/snowflake.py +229 -0
  16. kelpmesh/adapters/spark.py +375 -0
  17. kelpmesh/adapters/sqlserver.py +285 -0
  18. kelpmesh/adapters/trino.py +251 -0
  19. kelpmesh/cli/__init__.py +0 -0
  20. kelpmesh/cli/build.py +132 -0
  21. kelpmesh/cli/ci.py +589 -0
  22. kelpmesh/cli/clean.py +30 -0
  23. kelpmesh/cli/compare.py +62 -0
  24. kelpmesh/cli/compile.py +135 -0
  25. kelpmesh/cli/create_test.py +190 -0
  26. kelpmesh/cli/debug.py +237 -0
  27. kelpmesh/cli/deps.py +93 -0
  28. kelpmesh/cli/diff.py +55 -0
  29. kelpmesh/cli/docs.py +71 -0
  30. kelpmesh/cli/export.py +78 -0
  31. kelpmesh/cli/exposures.py +39 -0
  32. kelpmesh/cli/format.py +206 -0
  33. kelpmesh/cli/freshness.py +156 -0
  34. kelpmesh/cli/generate.py +151 -0
  35. kelpmesh/cli/history.py +79 -0
  36. kelpmesh/cli/import_dbt.py +621 -0
  37. kelpmesh/cli/init.py +104 -0
  38. kelpmesh/cli/lint.py +619 -0
  39. kelpmesh/cli/ls.py +45 -0
  40. kelpmesh/cli/main.py +172 -0
  41. kelpmesh/cli/mesh.py +269 -0
  42. kelpmesh/cli/metrics.py +89 -0
  43. kelpmesh/cli/orchestrate.py +51 -0
  44. kelpmesh/cli/package_cli.py +66 -0
  45. kelpmesh/cli/plan.py +202 -0
  46. kelpmesh/cli/pre_commit.py +54 -0
  47. kelpmesh/cli/preview.py +50 -0
  48. kelpmesh/cli/rollback.py +105 -0
  49. kelpmesh/cli/run.py +171 -0
  50. kelpmesh/cli/scan.py +147 -0
  51. kelpmesh/cli/schedule.py +176 -0
  52. kelpmesh/cli/schema.py +68 -0
  53. kelpmesh/cli/security.py +318 -0
  54. kelpmesh/cli/seed.py +244 -0
  55. kelpmesh/cli/serve.py +34 -0
  56. kelpmesh/cli/sources.py +136 -0
  57. kelpmesh/cli/studio.py +72 -0
  58. kelpmesh/cli/test.py +220 -0
  59. kelpmesh/core/__init__.py +0 -0
  60. kelpmesh/core/audits.py +151 -0
  61. kelpmesh/core/ci.py +79 -0
  62. kelpmesh/core/config.py +102 -0
  63. kelpmesh/core/contracts.py +130 -0
  64. kelpmesh/core/crypto.py +72 -0
  65. kelpmesh/core/errors.py +69 -0
  66. kelpmesh/core/executor.py +641 -0
  67. kelpmesh/core/graph.py +95 -0
  68. kelpmesh/core/macros.py +639 -0
  69. kelpmesh/core/model.py +55 -0
  70. kelpmesh/core/packages.py +184 -0
  71. kelpmesh/core/project.py +187 -0
  72. kelpmesh/core/python_runner.py +190 -0
  73. kelpmesh/core/scheduler.py +296 -0
  74. kelpmesh/core/schema_yaml.py +96 -0
  75. kelpmesh/core/substitutions.py +150 -0
  76. kelpmesh/core/versioning.py +106 -0
  77. kelpmesh/diff/__init__.py +0 -0
  78. kelpmesh/diff/comparison.py +62 -0
  79. kelpmesh/diff/engine.py +55 -0
  80. kelpmesh/docs/__init__.py +0 -0
  81. kelpmesh/docs/generator.py +394 -0
  82. kelpmesh/integrations/__init__.py +1 -0
  83. kelpmesh/integrations/bitbucket.py +97 -0
  84. kelpmesh/integrations/github.py +128 -0
  85. kelpmesh/integrations/gitlab.py +93 -0
  86. kelpmesh/mesh/__init__.py +21 -0
  87. kelpmesh/mesh/access.py +110 -0
  88. kelpmesh/mesh/config.py +91 -0
  89. kelpmesh/mesh/contracts.py +200 -0
  90. kelpmesh/mesh/health.py +178 -0
  91. kelpmesh/mesh/resolver.py +143 -0
  92. kelpmesh/observability/__init__.py +0 -0
  93. kelpmesh/observability/alerts.py +106 -0
  94. kelpmesh/observability/anomaly.py +67 -0
  95. kelpmesh/observability/history.py +120 -0
  96. kelpmesh/orchestrate/engine.py +89 -0
  97. kelpmesh/parser/__init__.py +0 -0
  98. kelpmesh/parser/lineage.py +58 -0
  99. kelpmesh/parser/python.py +28 -0
  100. kelpmesh/parser/sql.py +96 -0
  101. kelpmesh/schema/__init__.py +0 -0
  102. kelpmesh/schema/drift.py +98 -0
  103. kelpmesh/security/__init__.py +1 -0
  104. kelpmesh/security/audit.py +94 -0
  105. kelpmesh/security/classifier.py +111 -0
  106. kelpmesh/security/erasure.py +135 -0
  107. kelpmesh/security/masking.py +81 -0
  108. kelpmesh/security/rls.py +93 -0
  109. kelpmesh/semantic/__init__.py +263 -0
  110. kelpmesh/semantic/exporters/__init__.py +26 -0
  111. kelpmesh/semantic/exporters/base.py +62 -0
  112. kelpmesh/semantic/exporters/looker.py +118 -0
  113. kelpmesh/semantic/exporters/manifest.py +73 -0
  114. kelpmesh/semantic/exporters/powerbi.py +148 -0
  115. kelpmesh/semantic/exporters/qlik.py +139 -0
  116. kelpmesh/semantic/exporters/tableau.py +109 -0
  117. kelpmesh/semantic/serve.py +128 -0
  118. kelpmesh/state/__init__.py +0 -0
  119. kelpmesh/state/engine.py +392 -0
  120. kelpmesh/studio/__init__.py +1 -0
  121. kelpmesh/studio/app.py +378 -0
  122. kelpmesh/testing/__init__.py +0 -0
  123. kelpmesh/testing/fixtures.py +247 -0
  124. kelpmesh/testing/runner.py +82 -0
  125. kelpmesh/testing/schema_tests.py +96 -0
  126. kelpmesh_core-1.0.0.dist-info/METADATA +402 -0
  127. kelpmesh_core-1.0.0.dist-info/RECORD +130 -0
  128. kelpmesh_core-1.0.0.dist-info/WHEEL +4 -0
  129. kelpmesh_core-1.0.0.dist-info/entry_points.txt +2 -0
  130. kelpmesh_core-1.0.0.dist-info/licenses/LICENSE +201 -0
kelpmesh/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """
2
+ kelpmesh — Code-native data transformation platform (SQL & Python models).
3
+ Zero telemetry. Zero analytics. Zero phone-home.
4
+ """
5
+
6
+ __phone_home__ = False
kelpmesh/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from kelpmesh.cli.main import main
2
+
3
+ main()
@@ -0,0 +1,51 @@
1
+ from kelpmesh.adapters.base import WarehouseAdapter
2
+ from kelpmesh.core.config import WarehouseConfig
3
+
4
+
5
+ def get_adapter(config: WarehouseConfig, project_path: str | None = None) -> WarehouseAdapter:
6
+ match config.type:
7
+ case "duckdb":
8
+ from kelpmesh.adapters.duckdb import DuckDBAdapter
9
+ return DuckDBAdapter(config, project_path=project_path)
10
+ case "snowflake":
11
+ from kelpmesh.adapters.snowflake import SnowflakeAdapter
12
+ return SnowflakeAdapter(config)
13
+ case "bigquery":
14
+ from kelpmesh.adapters.bigquery import BigQueryAdapter
15
+ return BigQueryAdapter(config)
16
+ case "postgres":
17
+ from kelpmesh.adapters.postgres import PostgresAdapter
18
+ return PostgresAdapter(config)
19
+ case "redshift":
20
+ from kelpmesh.adapters.redshift import RedshiftAdapter
21
+ return RedshiftAdapter(config)
22
+ case "databricks":
23
+ from kelpmesh.adapters.databricks import DatabricksAdapter
24
+ return DatabricksAdapter(config)
25
+ case "fabric":
26
+ from kelpmesh.adapters.fabric import FabricAdapter
27
+ return FabricAdapter(config)
28
+ case "mysql" | "mariadb":
29
+ from kelpmesh.adapters.mysql import MySQLAdapter
30
+ return MySQLAdapter(config)
31
+ case "trino" | "presto":
32
+ from kelpmesh.adapters.trino import TrinoAdapter
33
+ return TrinoAdapter(config)
34
+ case "clickhouse":
35
+ from kelpmesh.adapters.clickhouse import ClickHouseAdapter
36
+ return ClickHouseAdapter(config)
37
+ case "spark":
38
+ from kelpmesh.adapters.spark import SparkAdapter
39
+ return SparkAdapter(config)
40
+ case "athena":
41
+ from kelpmesh.adapters.athena import AthenaAdapter
42
+ return AthenaAdapter(config)
43
+ case "hive":
44
+ from kelpmesh.adapters.hive import HiveAdapter
45
+ return HiveAdapter(config)
46
+ case "sqlserver" | "mssql" | "synapse" | "azuresynapse":
47
+ from kelpmesh.adapters.sqlserver import SQLServerAdapter
48
+ return SQLServerAdapter(config)
49
+ case _:
50
+ from kelpmesh.adapters.duckdb import DuckDBAdapter
51
+ return DuckDBAdapter(config, project_path=project_path)
@@ -0,0 +1,347 @@
1
+ """Amazon Athena adapter for KelpMesh.
2
+
3
+ Install the driver:
4
+ pip install kelpmesh[athena]
5
+
6
+ kelpmesh.yml:
7
+ warehouse:
8
+ type: athena
9
+ host: us-east-1 # AWS region
10
+ database: my_glue_database
11
+ path: "s3://my-bucket/athena-results/" # S3 staging dir for query results
12
+ user: "{{ env_var('AWS_ACCESS_KEY_ID') }}"
13
+ password: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}"
14
+
15
+ Notes:
16
+ - `host` maps to the AWS region (e.g. us-east-1).
17
+ - `path` is the S3 staging directory used by Athena for result output.
18
+ - `user` / `password` are the AWS access key id / secret access key.
19
+ Leave both unset to use the default credential chain (IAM role, env
20
+ vars, ~/.aws/credentials, etc.).
21
+ - Athena does not support INSERT INTO on CTAS tables; incremental runs
22
+ use a CTAS-then-rename workaround.
23
+ - External table creation requires `s3_location` to be passed as a
24
+ keyword argument to execute_model via the `extra` dict (not yet
25
+ exposed in the standard interface — extend as needed).
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from kelpmesh.adapters.base import WarehouseAdapter, sanitize_name
31
+ from kelpmesh.core.config import WarehouseConfig
32
+
33
+
34
+ class AthenaAdapter(WarehouseAdapter):
35
+ def __init__(self, config: WarehouseConfig) -> None:
36
+ self.config = config
37
+ self.conn = None
38
+
39
+ # ------------------------------------------------------------------
40
+ # Connection helpers
41
+ # ------------------------------------------------------------------
42
+
43
+ def connect(self) -> None:
44
+ try:
45
+ import pyathena
46
+ except ImportError:
47
+ raise ImportError(
48
+ "PyAthena not installed. Run: pip install kelpmesh[athena]"
49
+ )
50
+
51
+ kwargs: dict = {
52
+ "s3_staging_dir": self.config.path or "",
53
+ "region_name": self.config.host or "us-east-1",
54
+ }
55
+ if self.config.user:
56
+ kwargs["aws_access_key_id"] = self.config.user
57
+ if self.config.password:
58
+ kwargs["aws_secret_access_key"] = self.config.password
59
+ if self.config.database:
60
+ kwargs["schema_name"] = self.config.database
61
+
62
+ self.conn = pyathena.connect(**kwargs)
63
+
64
+ def disconnect(self) -> None:
65
+ if self.conn:
66
+ self.conn.close()
67
+ self.conn = None
68
+
69
+ def _ensure_conn(self, conn=None):
70
+ c = conn or self.conn
71
+ if not c:
72
+ self.connect()
73
+ return self.conn
74
+ return c
75
+
76
+ # ------------------------------------------------------------------
77
+ # execute
78
+ # ------------------------------------------------------------------
79
+
80
+ def execute(self, sql: str, conn=None) -> list[dict]:
81
+ c = self._ensure_conn(conn)
82
+ cursor = c.cursor()
83
+ try:
84
+ cursor.execute(sql)
85
+ if cursor.description:
86
+ cols = [d[0] for d in cursor.description]
87
+ return [dict(zip(cols, row)) for row in cursor.fetchall()]
88
+ return []
89
+ finally:
90
+ cursor.close()
91
+
92
+ # ------------------------------------------------------------------
93
+ # execute_model
94
+ # ------------------------------------------------------------------
95
+
96
+ def execute_model(
97
+ self,
98
+ sql: str,
99
+ table_name: str,
100
+ materialized: str = "view",
101
+ conn=None,
102
+ unique_key: str | None = None,
103
+ incremental_strategy: str = "append",
104
+ ) -> None:
105
+ db = self.config.database or ""
106
+ safe = f"`{db}`.`{table_name}`" if db else f"`{table_name}`"
107
+ c = self._ensure_conn(conn)
108
+
109
+ if materialized == "incremental":
110
+ if self.table_exists(table_name, conn=c):
111
+ # Athena CTAS tables do not support INSERT INTO.
112
+ # Workaround: create a new CTAS table, then swap.
113
+ tmp = f"_km_inc_{table_name}"
114
+ safe_tmp = f"`{db}`.`{tmp}`" if db else f"`{tmp}`"
115
+ # Drop any leftover temp table
116
+ try:
117
+ self.execute(f"DROP TABLE IF EXISTS {safe_tmp}", conn=c)
118
+ except Exception:
119
+ pass
120
+ self.execute(f"CREATE TABLE {safe_tmp} AS {sql}", conn=c)
121
+ self.execute(f"DROP TABLE IF EXISTS {safe}", conn=c)
122
+ self.execute(
123
+ f"ALTER TABLE {safe_tmp} RENAME TO `{table_name}`", conn=c
124
+ )
125
+ else:
126
+ self.execute(f"CREATE TABLE {safe} AS {sql}", conn=c)
127
+ return
128
+
129
+ self.drop_table(table_name, materialized, conn=c)
130
+ if materialized == "table":
131
+ self.execute(f"CREATE TABLE {safe} AS {sql}", conn=c)
132
+ elif materialized == "ephemeral":
133
+ pass
134
+ else:
135
+ self.execute(f"CREATE OR REPLACE VIEW {safe} AS {sql}", conn=c)
136
+
137
+ # ------------------------------------------------------------------
138
+ # table_exists
139
+ # ------------------------------------------------------------------
140
+
141
+ def table_exists(self, table_name: str, conn=None) -> bool:
142
+ db = self.config.database or ""
143
+ c = self._ensure_conn(conn)
144
+ try:
145
+ if db:
146
+ rows = self.execute(
147
+ "SELECT COUNT(*) AS cnt FROM information_schema.tables "
148
+ f"WHERE table_schema = '{db}' AND table_name = '{table_name}'",
149
+ conn=c,
150
+ )
151
+ else:
152
+ rows = self.execute(
153
+ "SELECT COUNT(*) AS cnt FROM information_schema.tables "
154
+ f"WHERE table_name = '{table_name}'",
155
+ conn=c,
156
+ )
157
+ return (rows[0].get("cnt") or 0) > 0 if rows else False
158
+ except Exception:
159
+ return False
160
+
161
+ # ------------------------------------------------------------------
162
+ # table_schema
163
+ # ------------------------------------------------------------------
164
+
165
+ def table_schema(self, table_name: str, conn=None) -> list[dict]:
166
+ db = self.config.database or ""
167
+ c = self._ensure_conn(conn)
168
+ if db:
169
+ rows = self.execute(
170
+ "SELECT column_name, data_type, is_nullable "
171
+ "FROM information_schema.columns "
172
+ f"WHERE table_schema = '{db}' AND table_name = '{table_name}' "
173
+ "ORDER BY ordinal_position",
174
+ conn=c,
175
+ )
176
+ else:
177
+ rows = self.execute(
178
+ "SELECT column_name, data_type, is_nullable "
179
+ "FROM information_schema.columns "
180
+ f"WHERE table_name = '{table_name}' "
181
+ "ORDER BY ordinal_position",
182
+ conn=c,
183
+ )
184
+ return rows or []
185
+
186
+ # ------------------------------------------------------------------
187
+ # drop_table
188
+ # ------------------------------------------------------------------
189
+
190
+ def drop_table(self, table_name: str, materialized: str = "view", conn=None) -> None:
191
+ db = self.config.database or ""
192
+ safe = f"`{db}`.`{table_name}`" if db else f"`{table_name}`"
193
+ c = self._ensure_conn(conn)
194
+ if materialized == "view":
195
+ self.execute(f"DROP VIEW IF EXISTS {safe}", conn=c)
196
+ else:
197
+ self.execute(f"DROP TABLE IF EXISTS {safe}", conn=c)
198
+
199
+ # ------------------------------------------------------------------
200
+ # execute_snapshot (CTAS workaround — Athena has no MERGE INTO)
201
+ # ------------------------------------------------------------------
202
+
203
+ def execute_snapshot(
204
+ self,
205
+ sql: str,
206
+ table_name: str,
207
+ unique_key: str,
208
+ strategy: str = "timestamp",
209
+ updated_at: str = "updated_at",
210
+ conn=None,
211
+ ) -> None:
212
+ """SCD Type 2 snapshot for Athena.
213
+
214
+ Athena (Presto/Trino SQL engine) does not support MERGE INTO or UPDATE.
215
+ The implementation uses a full-rebuild CTAS pattern:
216
+
217
+ 1. Read the existing snapshot table (if it exists).
218
+ 2. Identify changed rows by joining with incoming data.
219
+ 3. Expire changed rows by setting _valid_to / _is_current.
220
+ 4. Union everything into a new CTAS table, then swap.
221
+ """
222
+ db = self.config.database or ""
223
+ safe = f"`{db}`.`{table_name}`" if db else f"`{table_name}`"
224
+ stage = f"_km_snap_{table_name}"
225
+ safe_stage = f"`{db}`.`{stage}`" if db else f"`{stage}`"
226
+ new_table = f"_km_snap_new_{table_name}"
227
+ safe_new = f"`{db}`.`{new_table}`" if db else f"`{new_table}`"
228
+ c = self._ensure_conn(conn)
229
+
230
+ try:
231
+ if not self.table_exists(table_name, conn=c):
232
+ if strategy == "timestamp":
233
+ dbt_updated_expr = f"CAST(\"{updated_at}\" AS TIMESTAMP)"
234
+ else:
235
+ dbt_updated_expr = "current_timestamp"
236
+
237
+ self.execute(f"""
238
+ CREATE TABLE {safe} AS
239
+ SELECT *,
240
+ to_hex(md5(to_utf8(CAST("{unique_key}" AS VARCHAR)))) AS _scd_id,
241
+ current_timestamp AS _valid_from,
242
+ CAST(NULL AS TIMESTAMP) AS _valid_to,
243
+ TRUE AS _is_current,
244
+ {dbt_updated_expr} AS _dbt_updated_at
245
+ FROM ({sql}) _src
246
+ """, conn=c)
247
+ return
248
+
249
+ # Stage incoming data as a view (Athena supports CREATE OR REPLACE VIEW)
250
+ try:
251
+ self.execute(f"DROP VIEW IF EXISTS {safe_stage}", conn=c)
252
+ except Exception:
253
+ pass
254
+ self.execute(f"CREATE VIEW {safe_stage} AS {sql}", conn=c)
255
+
256
+ if strategy == "timestamp":
257
+ changed_cond = (
258
+ f"CAST(n.\"{updated_at}\" AS TIMESTAMP) > s._dbt_updated_at"
259
+ )
260
+ else:
261
+ cols_info = self.table_schema(table_name, conn=c)
262
+ audit_cols = {
263
+ "_scd_id", "_valid_from", "_valid_to",
264
+ "_is_current", "_dbt_updated_at",
265
+ }
266
+ check_cols = [
267
+ r["column_name"] for r in cols_info
268
+ if r["column_name"] != unique_key
269
+ and r["column_name"] not in audit_cols
270
+ ]
271
+ changed_cond = (
272
+ " OR ".join(
273
+ f"n.\"{col}\" IS DISTINCT FROM s.\"{col}\""
274
+ for col in check_cols
275
+ )
276
+ if check_cols
277
+ else "FALSE"
278
+ )
279
+
280
+ if strategy == "timestamp":
281
+ dbt_updated_insert = f"CAST(n.\"{updated_at}\" AS TIMESTAMP)"
282
+ else:
283
+ dbt_updated_insert = "current_timestamp"
284
+
285
+ # Build the full rebuilt snapshot in a new CTAS table
286
+ try:
287
+ self.execute(f"DROP TABLE IF EXISTS {safe_new}", conn=c)
288
+ except Exception:
289
+ pass
290
+
291
+ self.execute(f"""
292
+ CREATE TABLE {safe_new} AS
293
+ -- Existing rows: expire changed ones, keep unchanged
294
+ SELECT s.*,
295
+ CASE
296
+ WHEN n."{unique_key}" IS NOT NULL AND ({changed_cond})
297
+ THEN current_timestamp
298
+ ELSE s._valid_to
299
+ END AS _valid_to_new,
300
+ CASE
301
+ WHEN n."{unique_key}" IS NOT NULL AND ({changed_cond})
302
+ THEN FALSE
303
+ ELSE s._is_current
304
+ END AS _is_current_new
305
+ FROM {safe} s
306
+ LEFT JOIN {safe_stage} n ON n."{unique_key}" = s."{unique_key}"
307
+ AND s._is_current = TRUE
308
+
309
+ UNION ALL
310
+
311
+ -- New rows for changed or brand-new keys
312
+ SELECT n.*,
313
+ to_hex(md5(to_utf8(CAST(n."{unique_key}" AS VARCHAR)))) AS _scd_id,
314
+ current_timestamp AS _valid_from,
315
+ CAST(NULL AS TIMESTAMP) AS _valid_to_new,
316
+ TRUE AS _is_current_new,
317
+ {dbt_updated_insert} AS _dbt_updated_at
318
+ FROM {safe_stage} n
319
+ WHERE NOT EXISTS (
320
+ SELECT 1 FROM {safe} s
321
+ WHERE s."{unique_key}" = n."{unique_key}" AND s._is_current = TRUE
322
+ )
323
+ OR EXISTS (
324
+ SELECT 1 FROM {safe} s
325
+ WHERE s."{unique_key}" = n."{unique_key}"
326
+ AND s._is_current = TRUE
327
+ AND ({changed_cond})
328
+ )
329
+ """, conn=c)
330
+
331
+ # Swap: drop original, rename new
332
+ self.execute(f"DROP TABLE IF EXISTS {safe}", conn=c)
333
+ self.execute(
334
+ f"ALTER TABLE {safe_new} RENAME TO `{table_name}`", conn=c
335
+ )
336
+ self.execute(f"DROP VIEW IF EXISTS {safe_stage}", conn=c)
337
+
338
+ except Exception:
339
+ try:
340
+ self.execute(f"DROP VIEW IF EXISTS {safe_stage}", conn=c)
341
+ except Exception:
342
+ pass
343
+ try:
344
+ self.execute(f"DROP TABLE IF EXISTS {safe_new}", conn=c)
345
+ except Exception:
346
+ pass
347
+ raise
@@ -0,0 +1,139 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any
4
+
5
+ _IDENTIFIER_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
6
+
7
+
8
+ def sanitize_name(name: str) -> str:
9
+ """Validate and quote a SQL identifier to prevent injection."""
10
+ if not _IDENTIFIER_RE.match(name):
11
+ raise ValueError(f"Invalid SQL identifier: {name!r}")
12
+ return f'"{name}"'
13
+
14
+
15
+ class WarehouseAdapter(ABC):
16
+ @abstractmethod
17
+ def connect(self) -> None:
18
+ ...
19
+
20
+ @abstractmethod
21
+ def disconnect(self) -> None:
22
+ ...
23
+
24
+ @abstractmethod
25
+ def execute(self, sql: str, conn=None) -> Any:
26
+ ...
27
+
28
+ @abstractmethod
29
+ def execute_model(
30
+ self, sql: str, table_name: str, materialized: str = "view",
31
+ conn=None, unique_key: str | None = None,
32
+ incremental_strategy: str = "append",
33
+ ) -> None:
34
+ ...
35
+
36
+ @abstractmethod
37
+ def table_exists(self, table_name: str, conn=None) -> bool:
38
+ ...
39
+
40
+ @abstractmethod
41
+ def table_schema(self, table_name: str, conn=None) -> list[dict]:
42
+ ...
43
+
44
+ @abstractmethod
45
+ def drop_table(self, table_name: str, materialized: str = "view", conn=None) -> None:
46
+ ...
47
+
48
+ def acquire_conn(self) -> Any:
49
+ return None
50
+
51
+ def release_conn(self, conn: Any) -> None:
52
+ pass
53
+
54
+ def preview(self, sql: str, limit: int = 100, conn=None) -> list[dict]:
55
+ wrapped = f"SELECT * FROM ({sql}) AS _km_preview LIMIT {limit}"
56
+ return self.execute(wrapped, conn=conn)
57
+
58
+ def fetch_row_count(self, table_name: str, conn=None) -> int:
59
+ result = self.execute(f"SELECT COUNT(*) AS cnt FROM {table_name}", conn=conn)
60
+ if result and len(result) > 0:
61
+ return result[0]["cnt"]
62
+ return 0
63
+
64
+ def load_csv(self, path: str, table_name: str, delimiter: str = ",") -> None:
65
+ """Load a CSV/TSV file into a table. Override for warehouse-native ingest."""
66
+ import pandas as pd
67
+ df = pd.read_csv(path, sep=delimiter)
68
+ self._write_df(df, table_name)
69
+
70
+ def execute_snapshot(
71
+ self,
72
+ sql: str,
73
+ table_name: str,
74
+ unique_key: str,
75
+ strategy: str = "timestamp",
76
+ updated_at: str = "updated_at",
77
+ conn=None,
78
+ ) -> None:
79
+ """SCD Type 2 snapshot. Override per adapter."""
80
+ raise NotImplementedError(
81
+ f"Snapshots are not yet implemented for {self.__class__.__name__}. "
82
+ f"Supported: DuckDB, Postgres, Snowflake, BigQuery, Databricks, Fabric, Redshift."
83
+ )
84
+
85
+ def execute_materialized_view(
86
+ self,
87
+ sql: str,
88
+ table_name: str,
89
+ conn=None,
90
+ ) -> None:
91
+ """Create or refresh a materialized view. Falls back to table if unsupported."""
92
+ # Default: fall back to regular table (DuckDB, MySQL, Hive don't support MV natively)
93
+ self.drop_table(table_name, materialized="table", conn=conn)
94
+ self.execute_model(sql, table_name, materialized="table", conn=conn)
95
+
96
+ def _write_df(self, df, table_name: str) -> None:
97
+ """Write a pandas DataFrame to the warehouse as a table."""
98
+ import pandas as pd
99
+ from io import StringIO
100
+ buf = StringIO()
101
+ df.to_csv(buf, index=False)
102
+ buf.seek(0)
103
+ lines = []
104
+ for _ in range(20):
105
+ line = buf.readline()
106
+ if not line:
107
+ break
108
+ lines.append(line.strip())
109
+ header = lines[0].split(",") if lines else []
110
+ sample = []
111
+ for line in lines[1:]:
112
+ vals = line.split(",")
113
+ if len(vals) == len(header):
114
+ sample.append(vals)
115
+ import re
116
+ clean = re.sub(r"[^a-zA-Z0-9_]", "_", table_name)
117
+ col_defs = ", ".join(
118
+ f'"{c}" VARCHAR' for c in header
119
+ )
120
+ stmt = f"CREATE TABLE IF NOT EXISTS \"{clean}\" ({col_defs})"
121
+ self.execute(stmt)
122
+ placeholders = ", ".join("?" for _ in header)
123
+ insert = f'INSERT INTO "{clean}" VALUES ({placeholders})'
124
+ for row in sample:
125
+ self.execute(insert, list(row))
126
+ # Rest via chunked INSERT from buffered CSV
127
+ buf.seek(0)
128
+ next(buf) # skip header
129
+ chunk = []
130
+ for line in buf:
131
+ vals = line.strip().split(",")
132
+ if len(vals) == len(header):
133
+ chunk.append(vals)
134
+ if len(chunk) >= 500:
135
+ for row in chunk:
136
+ self.execute(insert, row)
137
+ chunk = []
138
+ for row in chunk:
139
+ self.execute(insert, row)