datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,336 @@
1
+ """Snowflake connector — pulls schema from information_schema."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import warnings
7
+ warnings.filterwarnings("ignore", message=".*incompatible version of 'pyarrow'.*")
8
+
9
+ from datetime import date
10
+ from typing import Any, Dict, List, Tuple
11
+
12
+ from datalex_core.connectors.base import BaseConnector, ConnectorConfig, ConnectorResult, infer_primary_keys, infer_relationships
13
+
14
+
15
+ def _load_private_key(path: str, passphrase: str | None = None) -> bytes:
16
+ """Load an RSA private key from a PEM file and return DER bytes for Snowflake.
17
+
18
+ Handles header/content mismatches (e.g. 'ENCRYPTED PRIVATE KEY' header
19
+ with an unencrypted key body) by trying multiple parsing strategies.
20
+ """
21
+ from cryptography.hazmat.backends import default_backend
22
+ from cryptography.hazmat.primitives import serialization
23
+
24
+ with open(os.path.expanduser(path), "rb") as f:
25
+ pem_data = f.read()
26
+
27
+ pw = passphrase.encode() if passphrase else None
28
+
29
+ # Strategy 1: try as-is with provided passphrase
30
+ # Strategy 2: try without passphrase (header may say ENCRYPTED but body isn't)
31
+ # Strategy 3: fix header to match actual content and retry
32
+ attempts = [
33
+ (pem_data, pw),
34
+ (pem_data, None),
35
+ ]
36
+
37
+ # If header says ENCRYPTED but no passphrase, also try fixing the header
38
+ text = pem_data.decode("utf-8", errors="replace")
39
+ if "ENCRYPTED PRIVATE KEY" in text:
40
+ fixed = text.replace(
41
+ "BEGIN ENCRYPTED PRIVATE KEY", "BEGIN PRIVATE KEY"
42
+ ).replace(
43
+ "END ENCRYPTED PRIVATE KEY", "END PRIVATE KEY"
44
+ ).encode("utf-8")
45
+ attempts.append((fixed, None))
46
+
47
+ last_err = None
48
+ for data, password in attempts:
49
+ try:
50
+ private_key = serialization.load_pem_private_key(
51
+ data, password=password, backend=default_backend(),
52
+ )
53
+ return private_key.private_bytes(
54
+ encoding=serialization.Encoding.DER,
55
+ format=serialization.PrivateFormat.PKCS8,
56
+ encryption_algorithm=serialization.NoEncryption(),
57
+ )
58
+ except Exception as e:
59
+ last_err = e
60
+ continue
61
+
62
+ raise last_err # type: ignore[misc]
63
+
64
+
65
+ _SF_TYPE_MAP = {
66
+ "NUMBER": "decimal",
67
+ "DECIMAL": "decimal",
68
+ "NUMERIC": "decimal",
69
+ "INT": "integer",
70
+ "INTEGER": "integer",
71
+ "BIGINT": "bigint",
72
+ "SMALLINT": "smallint",
73
+ "TINYINT": "tinyint",
74
+ "BYTEINT": "tinyint",
75
+ "FLOAT": "float",
76
+ "FLOAT4": "float",
77
+ "FLOAT8": "float",
78
+ "DOUBLE": "float",
79
+ "DOUBLE PRECISION": "float",
80
+ "REAL": "float",
81
+ "VARCHAR": "string",
82
+ "CHAR": "string",
83
+ "CHARACTER": "string",
84
+ "STRING": "string",
85
+ "TEXT": "text",
86
+ "BINARY": "binary",
87
+ "VARBINARY": "binary",
88
+ "BOOLEAN": "boolean",
89
+ "DATE": "date",
90
+ "DATETIME": "timestamp",
91
+ "TIME": "time",
92
+ "TIMESTAMP": "timestamp",
93
+ "TIMESTAMP_LTZ": "timestamp",
94
+ "TIMESTAMP_NTZ": "timestamp",
95
+ "TIMESTAMP_TZ": "timestamp",
96
+ "VARIANT": "json",
97
+ "OBJECT": "json",
98
+ "ARRAY": "json",
99
+ "GEOGRAPHY": "string",
100
+ "GEOMETRY": "string",
101
+ }
102
+
103
+
104
+ class SnowflakeConnector(BaseConnector):
105
+ connector_type = "snowflake"
106
+ display_name = "Snowflake"
107
+ required_package = "snowflake.connector"
108
+
109
+ def _build_connect_params(self, config: ConnectorConfig) -> Dict[str, Any]:
110
+ """Build connection kwargs, using RSA key-pair auth when private_key_path is set."""
111
+ params: Dict[str, Any] = {
112
+ "account": config.host,
113
+ "user": config.user,
114
+ "warehouse": config.warehouse,
115
+ "database": config.database,
116
+ "schema": config.schema or "PUBLIC",
117
+ }
118
+ if config.private_key_path:
119
+ # Use password as the optional passphrase for the key file
120
+ passphrase = config.password if config.password else None
121
+ params["private_key"] = _load_private_key(config.private_key_path, passphrase)
122
+ else:
123
+ params["password"] = config.password
124
+ return params
125
+
126
+ def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
127
+ try:
128
+ import snowflake.connector
129
+ conn = snowflake.connector.connect(**self._build_connect_params(config))
130
+ conn.close()
131
+ return True, "Connection successful"
132
+ except ImportError:
133
+ return False, "snowflake-connector-python not installed. Run: pip install snowflake-connector-python"
134
+ except FileNotFoundError:
135
+ return False, f"Private key file not found: {config.private_key_path}"
136
+ except Exception as e:
137
+ return False, f"Connection failed: {e}"
138
+
139
+ def _connect(self, config: ConnectorConfig):
140
+ import snowflake.connector
141
+ conn = snowflake.connector.connect(**self._build_connect_params(config))
142
+ # Auto-resume the warehouse if it is suspended
143
+ if config.warehouse:
144
+ try:
145
+ conn.cursor().execute(f"ALTER WAREHOUSE IF EXISTS {config.warehouse} RESUME IF SUSPENDED")
146
+ except Exception:
147
+ pass # permission denied or warehouse doesn't exist — let the main query surface the error
148
+ return conn
149
+
150
+ def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
151
+ conn = self._connect(config)
152
+ try:
153
+ cur = conn.cursor()
154
+ cur.execute(f"SHOW SCHEMAS IN DATABASE {config.database}")
155
+ rows = cur.fetchall()
156
+ results = []
157
+ for row in rows:
158
+ schema_name = row[1] # name is second column in SHOW SCHEMAS
159
+ if schema_name.upper() in ("INFORMATION_SCHEMA",):
160
+ continue
161
+ # Count tables in this schema
162
+ try:
163
+ cur.execute(f"SELECT COUNT(*) FROM {config.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = %s", (schema_name,))
164
+ count = cur.fetchone()[0]
165
+ except Exception:
166
+ count = 0
167
+ results.append({"name": schema_name, "table_count": count})
168
+ return results
169
+ finally:
170
+ conn.close()
171
+
172
+ def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
173
+ schema = config.schema or "PUBLIC"
174
+ conn = self._connect(config)
175
+ try:
176
+ cur = conn.cursor()
177
+ cur.execute("""
178
+ SELECT TABLE_NAME, TABLE_TYPE,
179
+ (SELECT COUNT(*) FROM INFORMATION_SCHEMA.COLUMNS c
180
+ WHERE c.TABLE_SCHEMA = t.TABLE_SCHEMA AND c.TABLE_NAME = t.TABLE_NAME) AS COL_COUNT,
181
+ ROW_COUNT
182
+ FROM INFORMATION_SCHEMA.TABLES t
183
+ WHERE TABLE_SCHEMA = %s
184
+ ORDER BY TABLE_NAME
185
+ """, (schema.upper(),))
186
+ results = []
187
+ for row in cur.fetchall():
188
+ ttype = "view" if "VIEW" in (row[1] or "").upper() else "table"
189
+ results.append({"name": row[0], "type": ttype, "column_count": row[2], "row_count": row[3]})
190
+ return results
191
+ finally:
192
+ conn.close()
193
+
194
+ def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
195
+ conn = self._connect(config)
196
+ try:
197
+ return self._pull(conn, config)
198
+ finally:
199
+ conn.close()
200
+
201
+ def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
202
+ model = self._build_model(config)
203
+ schema_filter = (config.schema or "PUBLIC").upper()
204
+ db_name = (config.database or "").upper()
205
+ cur = conn.cursor()
206
+ warnings: List[str] = []
207
+
208
+ # --- Tables ---
209
+ cur.execute(f"""
210
+ SELECT TABLE_NAME, TABLE_TYPE
211
+ FROM {db_name}.INFORMATION_SCHEMA.TABLES
212
+ WHERE TABLE_SCHEMA = '{schema_filter}'
213
+ AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
214
+ ORDER BY TABLE_NAME
215
+ """)
216
+ tables = cur.fetchall()
217
+
218
+ table_entities: Dict[str, Dict[str, Any]] = {}
219
+ for table_name, table_type in tables:
220
+ if not self._should_include_table(table_name, config):
221
+ continue
222
+ entity_name = self._entity_name(table_name)
223
+ entity_type = "view" if table_type == "VIEW" else "table"
224
+ table_entities[table_name] = {
225
+ "name": entity_name,
226
+ "physical_name": table_name,
227
+ "type": entity_type,
228
+ "description": f"Pulled from Snowflake {db_name}.{schema_filter}.{table_name} on {date.today().isoformat()}",
229
+ "fields": [],
230
+ "schema": schema_filter,
231
+ "database": db_name,
232
+ }
233
+
234
+ # --- Columns ---
235
+ cur.execute(f"""
236
+ SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE,
237
+ COLUMN_DEFAULT, CHARACTER_MAXIMUM_LENGTH,
238
+ NUMERIC_PRECISION, NUMERIC_SCALE
239
+ FROM {db_name}.INFORMATION_SCHEMA.COLUMNS
240
+ WHERE TABLE_SCHEMA = '{schema_filter}'
241
+ ORDER BY TABLE_NAME, ORDINAL_POSITION
242
+ """)
243
+ columns = cur.fetchall()
244
+ total_columns = 0
245
+
246
+ for row in columns:
247
+ tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale = row
248
+ if tname not in table_entities:
249
+ continue
250
+
251
+ dl_type = _SF_TYPE_MAP.get(data_type.upper(), "string")
252
+ if data_type.upper() in ("NUMBER", "DECIMAL", "NUMERIC") and num_prec:
253
+ dl_type = f"decimal({num_prec},{num_scale or 0})"
254
+
255
+ field: Dict[str, Any] = {
256
+ "name": col_name.lower(),
257
+ "type": dl_type,
258
+ "nullable": is_nullable == "YES",
259
+ }
260
+ if col_default is not None:
261
+ field["default"] = str(col_default)
262
+
263
+ table_entities[tname]["fields"].append(field)
264
+ total_columns += 1
265
+
266
+ # --- Primary keys ---
267
+ try:
268
+ for tname in table_entities:
269
+ cur.execute(f"SHOW PRIMARY KEYS IN TABLE {db_name}.{schema_filter}.{tname}")
270
+ pk_rows = cur.fetchall()
271
+ for pk_row in pk_rows:
272
+ pk_col = pk_row[4] if len(pk_row) > 4 else None
273
+ if pk_col:
274
+ for f in table_entities[tname]["fields"]:
275
+ if f["name"] == pk_col.lower():
276
+ f["primary_key"] = True
277
+ f["nullable"] = False
278
+ except Exception as e:
279
+ warnings.append(f"Could not fetch primary keys: {e}")
280
+
281
+ # --- Foreign keys ---
282
+ relationships: List[Dict[str, Any]] = []
283
+ try:
284
+ for tname in table_entities:
285
+ cur.execute(f"SHOW IMPORTED KEYS IN TABLE {db_name}.{schema_filter}.{tname}")
286
+ fk_rows = cur.fetchall()
287
+ for fk_row in fk_rows:
288
+ parent_table = fk_row[2] if len(fk_row) > 2 else None
289
+ parent_col = fk_row[3] if len(fk_row) > 3 else None
290
+ child_col = fk_row[7] if len(fk_row) > 7 else None
291
+ fk_name = fk_row[11] if len(fk_row) > 11 else None
292
+ if parent_table and parent_col and child_col:
293
+ for f in table_entities[tname]["fields"]:
294
+ if f["name"] == child_col.lower():
295
+ f["foreign_key"] = True
296
+ parent_entity = self._entity_name(parent_table)
297
+ child_entity = self._entity_name(tname)
298
+ relationships.append({
299
+ "name": fk_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col.lower()}_fk",
300
+ "from": f"{parent_entity}.{parent_col.lower()}",
301
+ "to": f"{child_entity}.{child_col.lower()}",
302
+ "cardinality": "one_to_many",
303
+ })
304
+ except Exception as e:
305
+ warnings.append(f"Could not fetch foreign keys: {e}")
306
+
307
+ entities_list = list(table_entities.values())
308
+
309
+ # --- Inference: fill in PKs and FKs when constraints are missing ---
310
+ has_any_pk = any(
311
+ f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
312
+ )
313
+ if not has_any_pk:
314
+ entities_list, pk_msgs = infer_primary_keys(entities_list)
315
+ warnings.extend(pk_msgs)
316
+
317
+ if not relationships:
318
+ inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
319
+ relationships.extend(inferred_rels)
320
+ warnings.extend(fk_msgs)
321
+ if inferred_rels:
322
+ warnings.insert(0, f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.")
323
+
324
+ model["entities"] = entities_list
325
+ model["relationships"] = relationships
326
+
327
+ cur.close()
328
+
329
+ return ConnectorResult(
330
+ model=model,
331
+ tables_found=len(table_entities),
332
+ columns_found=total_columns,
333
+ relationships_found=len(relationships),
334
+ indexes_found=0,
335
+ warnings=warnings,
336
+ )