datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Snowflake connector — pulls schema from information_schema."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
warnings.filterwarnings("ignore", message=".*incompatible version of 'pyarrow'.*")
|
|
8
|
+
|
|
9
|
+
from datetime import date
|
|
10
|
+
from typing import Any, Dict, List, Tuple
|
|
11
|
+
|
|
12
|
+
from datalex_core.connectors.base import BaseConnector, ConnectorConfig, ConnectorResult, infer_primary_keys, infer_relationships
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _load_private_key(path: str, passphrase: str | None = None) -> bytes:
|
|
16
|
+
"""Load an RSA private key from a PEM file and return DER bytes for Snowflake.
|
|
17
|
+
|
|
18
|
+
Handles header/content mismatches (e.g. 'ENCRYPTED PRIVATE KEY' header
|
|
19
|
+
with an unencrypted key body) by trying multiple parsing strategies.
|
|
20
|
+
"""
|
|
21
|
+
from cryptography.hazmat.backends import default_backend
|
|
22
|
+
from cryptography.hazmat.primitives import serialization
|
|
23
|
+
|
|
24
|
+
with open(os.path.expanduser(path), "rb") as f:
|
|
25
|
+
pem_data = f.read()
|
|
26
|
+
|
|
27
|
+
pw = passphrase.encode() if passphrase else None
|
|
28
|
+
|
|
29
|
+
# Strategy 1: try as-is with provided passphrase
|
|
30
|
+
# Strategy 2: try without passphrase (header may say ENCRYPTED but body isn't)
|
|
31
|
+
# Strategy 3: fix header to match actual content and retry
|
|
32
|
+
attempts = [
|
|
33
|
+
(pem_data, pw),
|
|
34
|
+
(pem_data, None),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
# If header says ENCRYPTED but no passphrase, also try fixing the header
|
|
38
|
+
text = pem_data.decode("utf-8", errors="replace")
|
|
39
|
+
if "ENCRYPTED PRIVATE KEY" in text:
|
|
40
|
+
fixed = text.replace(
|
|
41
|
+
"BEGIN ENCRYPTED PRIVATE KEY", "BEGIN PRIVATE KEY"
|
|
42
|
+
).replace(
|
|
43
|
+
"END ENCRYPTED PRIVATE KEY", "END PRIVATE KEY"
|
|
44
|
+
).encode("utf-8")
|
|
45
|
+
attempts.append((fixed, None))
|
|
46
|
+
|
|
47
|
+
last_err = None
|
|
48
|
+
for data, password in attempts:
|
|
49
|
+
try:
|
|
50
|
+
private_key = serialization.load_pem_private_key(
|
|
51
|
+
data, password=password, backend=default_backend(),
|
|
52
|
+
)
|
|
53
|
+
return private_key.private_bytes(
|
|
54
|
+
encoding=serialization.Encoding.DER,
|
|
55
|
+
format=serialization.PrivateFormat.PKCS8,
|
|
56
|
+
encryption_algorithm=serialization.NoEncryption(),
|
|
57
|
+
)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
last_err = e
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
raise last_err # type: ignore[misc]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_SF_TYPE_MAP = {
|
|
66
|
+
"NUMBER": "decimal",
|
|
67
|
+
"DECIMAL": "decimal",
|
|
68
|
+
"NUMERIC": "decimal",
|
|
69
|
+
"INT": "integer",
|
|
70
|
+
"INTEGER": "integer",
|
|
71
|
+
"BIGINT": "bigint",
|
|
72
|
+
"SMALLINT": "smallint",
|
|
73
|
+
"TINYINT": "tinyint",
|
|
74
|
+
"BYTEINT": "tinyint",
|
|
75
|
+
"FLOAT": "float",
|
|
76
|
+
"FLOAT4": "float",
|
|
77
|
+
"FLOAT8": "float",
|
|
78
|
+
"DOUBLE": "float",
|
|
79
|
+
"DOUBLE PRECISION": "float",
|
|
80
|
+
"REAL": "float",
|
|
81
|
+
"VARCHAR": "string",
|
|
82
|
+
"CHAR": "string",
|
|
83
|
+
"CHARACTER": "string",
|
|
84
|
+
"STRING": "string",
|
|
85
|
+
"TEXT": "text",
|
|
86
|
+
"BINARY": "binary",
|
|
87
|
+
"VARBINARY": "binary",
|
|
88
|
+
"BOOLEAN": "boolean",
|
|
89
|
+
"DATE": "date",
|
|
90
|
+
"DATETIME": "timestamp",
|
|
91
|
+
"TIME": "time",
|
|
92
|
+
"TIMESTAMP": "timestamp",
|
|
93
|
+
"TIMESTAMP_LTZ": "timestamp",
|
|
94
|
+
"TIMESTAMP_NTZ": "timestamp",
|
|
95
|
+
"TIMESTAMP_TZ": "timestamp",
|
|
96
|
+
"VARIANT": "json",
|
|
97
|
+
"OBJECT": "json",
|
|
98
|
+
"ARRAY": "json",
|
|
99
|
+
"GEOGRAPHY": "string",
|
|
100
|
+
"GEOMETRY": "string",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class SnowflakeConnector(BaseConnector):
|
|
105
|
+
connector_type = "snowflake"
|
|
106
|
+
display_name = "Snowflake"
|
|
107
|
+
required_package = "snowflake.connector"
|
|
108
|
+
|
|
109
|
+
def _build_connect_params(self, config: ConnectorConfig) -> Dict[str, Any]:
|
|
110
|
+
"""Build connection kwargs, using RSA key-pair auth when private_key_path is set."""
|
|
111
|
+
params: Dict[str, Any] = {
|
|
112
|
+
"account": config.host,
|
|
113
|
+
"user": config.user,
|
|
114
|
+
"warehouse": config.warehouse,
|
|
115
|
+
"database": config.database,
|
|
116
|
+
"schema": config.schema or "PUBLIC",
|
|
117
|
+
}
|
|
118
|
+
if config.private_key_path:
|
|
119
|
+
# Use password as the optional passphrase for the key file
|
|
120
|
+
passphrase = config.password if config.password else None
|
|
121
|
+
params["private_key"] = _load_private_key(config.private_key_path, passphrase)
|
|
122
|
+
else:
|
|
123
|
+
params["password"] = config.password
|
|
124
|
+
return params
|
|
125
|
+
|
|
126
|
+
def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
|
|
127
|
+
try:
|
|
128
|
+
import snowflake.connector
|
|
129
|
+
conn = snowflake.connector.connect(**self._build_connect_params(config))
|
|
130
|
+
conn.close()
|
|
131
|
+
return True, "Connection successful"
|
|
132
|
+
except ImportError:
|
|
133
|
+
return False, "snowflake-connector-python not installed. Run: pip install snowflake-connector-python"
|
|
134
|
+
except FileNotFoundError:
|
|
135
|
+
return False, f"Private key file not found: {config.private_key_path}"
|
|
136
|
+
except Exception as e:
|
|
137
|
+
return False, f"Connection failed: {e}"
|
|
138
|
+
|
|
139
|
+
def _connect(self, config: ConnectorConfig):
|
|
140
|
+
import snowflake.connector
|
|
141
|
+
conn = snowflake.connector.connect(**self._build_connect_params(config))
|
|
142
|
+
# Auto-resume the warehouse if it is suspended
|
|
143
|
+
if config.warehouse:
|
|
144
|
+
try:
|
|
145
|
+
conn.cursor().execute(f"ALTER WAREHOUSE IF EXISTS {config.warehouse} RESUME IF SUSPENDED")
|
|
146
|
+
except Exception:
|
|
147
|
+
pass # permission denied or warehouse doesn't exist — let the main query surface the error
|
|
148
|
+
return conn
|
|
149
|
+
|
|
150
|
+
def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
151
|
+
conn = self._connect(config)
|
|
152
|
+
try:
|
|
153
|
+
cur = conn.cursor()
|
|
154
|
+
cur.execute(f"SHOW SCHEMAS IN DATABASE {config.database}")
|
|
155
|
+
rows = cur.fetchall()
|
|
156
|
+
results = []
|
|
157
|
+
for row in rows:
|
|
158
|
+
schema_name = row[1] # name is second column in SHOW SCHEMAS
|
|
159
|
+
if schema_name.upper() in ("INFORMATION_SCHEMA",):
|
|
160
|
+
continue
|
|
161
|
+
# Count tables in this schema
|
|
162
|
+
try:
|
|
163
|
+
cur.execute(f"SELECT COUNT(*) FROM {config.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = %s", (schema_name,))
|
|
164
|
+
count = cur.fetchone()[0]
|
|
165
|
+
except Exception:
|
|
166
|
+
count = 0
|
|
167
|
+
results.append({"name": schema_name, "table_count": count})
|
|
168
|
+
return results
|
|
169
|
+
finally:
|
|
170
|
+
conn.close()
|
|
171
|
+
|
|
172
|
+
def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
173
|
+
schema = config.schema or "PUBLIC"
|
|
174
|
+
conn = self._connect(config)
|
|
175
|
+
try:
|
|
176
|
+
cur = conn.cursor()
|
|
177
|
+
cur.execute("""
|
|
178
|
+
SELECT TABLE_NAME, TABLE_TYPE,
|
|
179
|
+
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.COLUMNS c
|
|
180
|
+
WHERE c.TABLE_SCHEMA = t.TABLE_SCHEMA AND c.TABLE_NAME = t.TABLE_NAME) AS COL_COUNT,
|
|
181
|
+
ROW_COUNT
|
|
182
|
+
FROM INFORMATION_SCHEMA.TABLES t
|
|
183
|
+
WHERE TABLE_SCHEMA = %s
|
|
184
|
+
ORDER BY TABLE_NAME
|
|
185
|
+
""", (schema.upper(),))
|
|
186
|
+
results = []
|
|
187
|
+
for row in cur.fetchall():
|
|
188
|
+
ttype = "view" if "VIEW" in (row[1] or "").upper() else "table"
|
|
189
|
+
results.append({"name": row[0], "type": ttype, "column_count": row[2], "row_count": row[3]})
|
|
190
|
+
return results
|
|
191
|
+
finally:
|
|
192
|
+
conn.close()
|
|
193
|
+
|
|
194
|
+
def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
|
|
195
|
+
conn = self._connect(config)
|
|
196
|
+
try:
|
|
197
|
+
return self._pull(conn, config)
|
|
198
|
+
finally:
|
|
199
|
+
conn.close()
|
|
200
|
+
|
|
201
|
+
def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
|
|
202
|
+
model = self._build_model(config)
|
|
203
|
+
schema_filter = (config.schema or "PUBLIC").upper()
|
|
204
|
+
db_name = (config.database or "").upper()
|
|
205
|
+
cur = conn.cursor()
|
|
206
|
+
warnings: List[str] = []
|
|
207
|
+
|
|
208
|
+
# --- Tables ---
|
|
209
|
+
cur.execute(f"""
|
|
210
|
+
SELECT TABLE_NAME, TABLE_TYPE
|
|
211
|
+
FROM {db_name}.INFORMATION_SCHEMA.TABLES
|
|
212
|
+
WHERE TABLE_SCHEMA = '{schema_filter}'
|
|
213
|
+
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
|
214
|
+
ORDER BY TABLE_NAME
|
|
215
|
+
""")
|
|
216
|
+
tables = cur.fetchall()
|
|
217
|
+
|
|
218
|
+
table_entities: Dict[str, Dict[str, Any]] = {}
|
|
219
|
+
for table_name, table_type in tables:
|
|
220
|
+
if not self._should_include_table(table_name, config):
|
|
221
|
+
continue
|
|
222
|
+
entity_name = self._entity_name(table_name)
|
|
223
|
+
entity_type = "view" if table_type == "VIEW" else "table"
|
|
224
|
+
table_entities[table_name] = {
|
|
225
|
+
"name": entity_name,
|
|
226
|
+
"physical_name": table_name,
|
|
227
|
+
"type": entity_type,
|
|
228
|
+
"description": f"Pulled from Snowflake {db_name}.{schema_filter}.{table_name} on {date.today().isoformat()}",
|
|
229
|
+
"fields": [],
|
|
230
|
+
"schema": schema_filter,
|
|
231
|
+
"database": db_name,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# --- Columns ---
|
|
235
|
+
cur.execute(f"""
|
|
236
|
+
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE,
|
|
237
|
+
COLUMN_DEFAULT, CHARACTER_MAXIMUM_LENGTH,
|
|
238
|
+
NUMERIC_PRECISION, NUMERIC_SCALE
|
|
239
|
+
FROM {db_name}.INFORMATION_SCHEMA.COLUMNS
|
|
240
|
+
WHERE TABLE_SCHEMA = '{schema_filter}'
|
|
241
|
+
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
|
242
|
+
""")
|
|
243
|
+
columns = cur.fetchall()
|
|
244
|
+
total_columns = 0
|
|
245
|
+
|
|
246
|
+
for row in columns:
|
|
247
|
+
tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale = row
|
|
248
|
+
if tname not in table_entities:
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
dl_type = _SF_TYPE_MAP.get(data_type.upper(), "string")
|
|
252
|
+
if data_type.upper() in ("NUMBER", "DECIMAL", "NUMERIC") and num_prec:
|
|
253
|
+
dl_type = f"decimal({num_prec},{num_scale or 0})"
|
|
254
|
+
|
|
255
|
+
field: Dict[str, Any] = {
|
|
256
|
+
"name": col_name.lower(),
|
|
257
|
+
"type": dl_type,
|
|
258
|
+
"nullable": is_nullable == "YES",
|
|
259
|
+
}
|
|
260
|
+
if col_default is not None:
|
|
261
|
+
field["default"] = str(col_default)
|
|
262
|
+
|
|
263
|
+
table_entities[tname]["fields"].append(field)
|
|
264
|
+
total_columns += 1
|
|
265
|
+
|
|
266
|
+
# --- Primary keys ---
|
|
267
|
+
try:
|
|
268
|
+
for tname in table_entities:
|
|
269
|
+
cur.execute(f"SHOW PRIMARY KEYS IN TABLE {db_name}.{schema_filter}.{tname}")
|
|
270
|
+
pk_rows = cur.fetchall()
|
|
271
|
+
for pk_row in pk_rows:
|
|
272
|
+
pk_col = pk_row[4] if len(pk_row) > 4 else None
|
|
273
|
+
if pk_col:
|
|
274
|
+
for f in table_entities[tname]["fields"]:
|
|
275
|
+
if f["name"] == pk_col.lower():
|
|
276
|
+
f["primary_key"] = True
|
|
277
|
+
f["nullable"] = False
|
|
278
|
+
except Exception as e:
|
|
279
|
+
warnings.append(f"Could not fetch primary keys: {e}")
|
|
280
|
+
|
|
281
|
+
# --- Foreign keys ---
|
|
282
|
+
relationships: List[Dict[str, Any]] = []
|
|
283
|
+
try:
|
|
284
|
+
for tname in table_entities:
|
|
285
|
+
cur.execute(f"SHOW IMPORTED KEYS IN TABLE {db_name}.{schema_filter}.{tname}")
|
|
286
|
+
fk_rows = cur.fetchall()
|
|
287
|
+
for fk_row in fk_rows:
|
|
288
|
+
parent_table = fk_row[2] if len(fk_row) > 2 else None
|
|
289
|
+
parent_col = fk_row[3] if len(fk_row) > 3 else None
|
|
290
|
+
child_col = fk_row[7] if len(fk_row) > 7 else None
|
|
291
|
+
fk_name = fk_row[11] if len(fk_row) > 11 else None
|
|
292
|
+
if parent_table and parent_col and child_col:
|
|
293
|
+
for f in table_entities[tname]["fields"]:
|
|
294
|
+
if f["name"] == child_col.lower():
|
|
295
|
+
f["foreign_key"] = True
|
|
296
|
+
parent_entity = self._entity_name(parent_table)
|
|
297
|
+
child_entity = self._entity_name(tname)
|
|
298
|
+
relationships.append({
|
|
299
|
+
"name": fk_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col.lower()}_fk",
|
|
300
|
+
"from": f"{parent_entity}.{parent_col.lower()}",
|
|
301
|
+
"to": f"{child_entity}.{child_col.lower()}",
|
|
302
|
+
"cardinality": "one_to_many",
|
|
303
|
+
})
|
|
304
|
+
except Exception as e:
|
|
305
|
+
warnings.append(f"Could not fetch foreign keys: {e}")
|
|
306
|
+
|
|
307
|
+
entities_list = list(table_entities.values())
|
|
308
|
+
|
|
309
|
+
# --- Inference: fill in PKs and FKs when constraints are missing ---
|
|
310
|
+
has_any_pk = any(
|
|
311
|
+
f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
|
|
312
|
+
)
|
|
313
|
+
if not has_any_pk:
|
|
314
|
+
entities_list, pk_msgs = infer_primary_keys(entities_list)
|
|
315
|
+
warnings.extend(pk_msgs)
|
|
316
|
+
|
|
317
|
+
if not relationships:
|
|
318
|
+
inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
|
|
319
|
+
relationships.extend(inferred_rels)
|
|
320
|
+
warnings.extend(fk_msgs)
|
|
321
|
+
if inferred_rels:
|
|
322
|
+
warnings.insert(0, f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.")
|
|
323
|
+
|
|
324
|
+
model["entities"] = entities_list
|
|
325
|
+
model["relationships"] = relationships
|
|
326
|
+
|
|
327
|
+
cur.close()
|
|
328
|
+
|
|
329
|
+
return ConnectorResult(
|
|
330
|
+
model=model,
|
|
331
|
+
tables_found=len(table_entities),
|
|
332
|
+
columns_found=total_columns,
|
|
333
|
+
relationships_found=len(relationships),
|
|
334
|
+
indexes_found=0,
|
|
335
|
+
warnings=warnings,
|
|
336
|
+
)
|