datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Databricks / Spark SQL connector — pulls schema from Unity Catalog or Hive Metastore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import date
|
|
6
|
+
from typing import Any, Dict, List, Tuple
|
|
7
|
+
|
|
8
|
+
from datalex_core.connectors.base import BaseConnector, ConnectorConfig, ConnectorResult, infer_primary_keys, infer_relationships
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_SPARK_TYPE_MAP = {
|
|
12
|
+
"string": "string",
|
|
13
|
+
"int": "integer",
|
|
14
|
+
"integer": "integer",
|
|
15
|
+
"bigint": "bigint",
|
|
16
|
+
"smallint": "smallint",
|
|
17
|
+
"tinyint": "tinyint",
|
|
18
|
+
"float": "float",
|
|
19
|
+
"double": "float",
|
|
20
|
+
"decimal": "decimal",
|
|
21
|
+
"boolean": "boolean",
|
|
22
|
+
"date": "date",
|
|
23
|
+
"timestamp": "timestamp",
|
|
24
|
+
"timestamp_ntz": "timestamp",
|
|
25
|
+
"binary": "binary",
|
|
26
|
+
"array": "json",
|
|
27
|
+
"map": "json",
|
|
28
|
+
"struct": "json",
|
|
29
|
+
"void": "string",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatabricksConnector(BaseConnector):
|
|
34
|
+
connector_type = "databricks"
|
|
35
|
+
display_name = "Databricks (Unity Catalog / Hive)"
|
|
36
|
+
required_package = "databricks.sql"
|
|
37
|
+
|
|
38
|
+
def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
|
|
39
|
+
try:
|
|
40
|
+
from databricks import sql
|
|
41
|
+
conn = sql.connect(
|
|
42
|
+
server_hostname=config.host,
|
|
43
|
+
http_path=config.extra.get("http_path", ""),
|
|
44
|
+
access_token=config.token,
|
|
45
|
+
)
|
|
46
|
+
conn.close()
|
|
47
|
+
return True, "Connection successful"
|
|
48
|
+
except ImportError:
|
|
49
|
+
return False, "databricks-sql-connector not installed. Run: pip install databricks-sql-connector"
|
|
50
|
+
except Exception as e:
|
|
51
|
+
return False, f"Connection failed: {e}"
|
|
52
|
+
|
|
53
|
+
def _connect(self, config: ConnectorConfig):
|
|
54
|
+
from databricks import sql
|
|
55
|
+
return sql.connect(
|
|
56
|
+
server_hostname=config.host,
|
|
57
|
+
http_path=config.extra.get("http_path", ""),
|
|
58
|
+
access_token=config.token,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
62
|
+
conn = self._connect(config)
|
|
63
|
+
try:
|
|
64
|
+
cur = conn.cursor()
|
|
65
|
+
catalog_name = config.catalog or "main"
|
|
66
|
+
cur.execute(f"SHOW SCHEMAS IN {catalog_name}")
|
|
67
|
+
rows = cur.fetchall()
|
|
68
|
+
results = []
|
|
69
|
+
for row in rows:
|
|
70
|
+
schema_name = row[0]
|
|
71
|
+
if schema_name.lower() in ("information_schema",):
|
|
72
|
+
continue
|
|
73
|
+
try:
|
|
74
|
+
cur.execute(f"SHOW TABLES IN {catalog_name}.{schema_name}")
|
|
75
|
+
count = len(cur.fetchall())
|
|
76
|
+
except Exception:
|
|
77
|
+
count = 0
|
|
78
|
+
results.append({"name": schema_name, "table_count": count})
|
|
79
|
+
return results
|
|
80
|
+
finally:
|
|
81
|
+
conn.close()
|
|
82
|
+
|
|
83
|
+
def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
84
|
+
conn = self._connect(config)
|
|
85
|
+
try:
|
|
86
|
+
cur = conn.cursor()
|
|
87
|
+
catalog_name = config.catalog or "main"
|
|
88
|
+
schema_name = config.schema or "default"
|
|
89
|
+
cur.execute(f"SHOW TABLES IN {catalog_name}.{schema_name}")
|
|
90
|
+
rows = cur.fetchall()
|
|
91
|
+
results = []
|
|
92
|
+
for row in rows:
|
|
93
|
+
table_name = row[1] if len(row) > 1 else row[0]
|
|
94
|
+
try:
|
|
95
|
+
cur.execute(f"DESCRIBE TABLE {catalog_name}.{schema_name}.{table_name}")
|
|
96
|
+
col_count = len(cur.fetchall())
|
|
97
|
+
except Exception:
|
|
98
|
+
col_count = 0
|
|
99
|
+
results.append({"name": table_name, "type": "table", "column_count": col_count, "row_count": None})
|
|
100
|
+
return sorted(results, key=lambda x: x["name"])
|
|
101
|
+
finally:
|
|
102
|
+
conn.close()
|
|
103
|
+
|
|
104
|
+
def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
|
|
105
|
+
conn = self._connect(config)
|
|
106
|
+
try:
|
|
107
|
+
return self._pull(conn, config)
|
|
108
|
+
finally:
|
|
109
|
+
conn.close()
|
|
110
|
+
|
|
111
|
+
def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
|
|
112
|
+
model = self._build_model(config)
|
|
113
|
+
catalog_name = config.catalog or "main"
|
|
114
|
+
schema_name = config.schema or "default"
|
|
115
|
+
cur = conn.cursor()
|
|
116
|
+
warnings: List[str] = []
|
|
117
|
+
|
|
118
|
+
# --- Tables ---
|
|
119
|
+
cur.execute(f"SHOW TABLES IN {catalog_name}.{schema_name}")
|
|
120
|
+
tables_raw = cur.fetchall()
|
|
121
|
+
|
|
122
|
+
table_entities: Dict[str, Dict[str, Any]] = {}
|
|
123
|
+
for row in tables_raw:
|
|
124
|
+
# SHOW TABLES returns (database, tableName, isTemporary)
|
|
125
|
+
table_name = row[1] if len(row) > 1 else row[0]
|
|
126
|
+
if not self._should_include_table(table_name, config):
|
|
127
|
+
continue
|
|
128
|
+
entity_name = self._entity_name(table_name)
|
|
129
|
+
table_entities[table_name] = {
|
|
130
|
+
"name": entity_name,
|
|
131
|
+
"physical_name": table_name,
|
|
132
|
+
"type": "table",
|
|
133
|
+
"description": f"Pulled from Databricks {catalog_name}.{schema_name}.{table_name} on {date.today().isoformat()}",
|
|
134
|
+
"fields": [],
|
|
135
|
+
"schema": schema_name,
|
|
136
|
+
"database": catalog_name,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
# --- Columns via DESCRIBE ---
|
|
140
|
+
total_columns = 0
|
|
141
|
+
for table_name in list(table_entities.keys()):
|
|
142
|
+
try:
|
|
143
|
+
cur.execute(f"DESCRIBE TABLE {catalog_name}.{schema_name}.{table_name}")
|
|
144
|
+
col_rows = cur.fetchall()
|
|
145
|
+
for col_row in col_rows:
|
|
146
|
+
col_name = col_row[0]
|
|
147
|
+
col_type = col_row[1] if len(col_row) > 1 else "string"
|
|
148
|
+
comment = col_row[2] if len(col_row) > 2 else None
|
|
149
|
+
|
|
150
|
+
# Skip partition info / metadata rows
|
|
151
|
+
if col_name.startswith("#") or col_name == "" or col_name.startswith("---"):
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
base_type = col_type.lower().split("(")[0].split("<")[0].strip()
|
|
155
|
+
dl_type = _SPARK_TYPE_MAP.get(base_type, "string")
|
|
156
|
+
if base_type == "decimal" and "(" in col_type:
|
|
157
|
+
dl_type = col_type.lower()
|
|
158
|
+
|
|
159
|
+
field: Dict[str, Any] = {
|
|
160
|
+
"name": col_name,
|
|
161
|
+
"type": dl_type,
|
|
162
|
+
"nullable": True,
|
|
163
|
+
}
|
|
164
|
+
if comment:
|
|
165
|
+
field["description"] = comment
|
|
166
|
+
|
|
167
|
+
table_entities[table_name]["fields"].append(field)
|
|
168
|
+
total_columns += 1
|
|
169
|
+
except Exception as e:
|
|
170
|
+
warnings.append(f"Could not describe table {table_name}: {e}")
|
|
171
|
+
|
|
172
|
+
# --- Primary keys (Unity Catalog) ---
|
|
173
|
+
relationships: List[Dict[str, Any]] = []
|
|
174
|
+
try:
|
|
175
|
+
for table_name in table_entities:
|
|
176
|
+
try:
|
|
177
|
+
cur.execute(f"""
|
|
178
|
+
SELECT column_name
|
|
179
|
+
FROM {catalog_name}.information_schema.table_constraints tc
|
|
180
|
+
JOIN {catalog_name}.information_schema.key_column_usage kcu
|
|
181
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
182
|
+
WHERE tc.table_schema = '{schema_name}'
|
|
183
|
+
AND tc.table_name = '{table_name}'
|
|
184
|
+
AND tc.constraint_type = 'PRIMARY KEY'
|
|
185
|
+
""")
|
|
186
|
+
pk_rows = cur.fetchall()
|
|
187
|
+
for pk_row in pk_rows:
|
|
188
|
+
pk_col = pk_row[0]
|
|
189
|
+
for f in table_entities[table_name]["fields"]:
|
|
190
|
+
if f["name"] == pk_col:
|
|
191
|
+
f["primary_key"] = True
|
|
192
|
+
f["nullable"] = False
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
# --- Foreign keys ---
|
|
197
|
+
for table_name in table_entities:
|
|
198
|
+
try:
|
|
199
|
+
cur.execute(f"""
|
|
200
|
+
SELECT
|
|
201
|
+
kcu.column_name AS child_column,
|
|
202
|
+
ccu.table_name AS parent_table,
|
|
203
|
+
ccu.column_name AS parent_column,
|
|
204
|
+
tc.constraint_name
|
|
205
|
+
FROM {catalog_name}.information_schema.table_constraints tc
|
|
206
|
+
JOIN {catalog_name}.information_schema.key_column_usage kcu
|
|
207
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
208
|
+
JOIN {catalog_name}.information_schema.constraint_column_usage ccu
|
|
209
|
+
ON tc.constraint_name = ccu.constraint_name
|
|
210
|
+
WHERE tc.table_schema = '{schema_name}'
|
|
211
|
+
AND tc.table_name = '{table_name}'
|
|
212
|
+
AND tc.constraint_type = 'FOREIGN KEY'
|
|
213
|
+
""")
|
|
214
|
+
fk_rows = cur.fetchall()
|
|
215
|
+
for fk_row in fk_rows:
|
|
216
|
+
child_col, parent_table, parent_col, fk_name = fk_row
|
|
217
|
+
parent_entity = self._entity_name(parent_table)
|
|
218
|
+
child_entity = self._entity_name(table_name)
|
|
219
|
+
for f in table_entities[table_name]["fields"]:
|
|
220
|
+
if f["name"] == child_col:
|
|
221
|
+
f["foreign_key"] = True
|
|
222
|
+
relationships.append({
|
|
223
|
+
"name": fk_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
|
|
224
|
+
"from": f"{parent_entity}.{parent_col}",
|
|
225
|
+
"to": f"{child_entity}.{child_col}",
|
|
226
|
+
"cardinality": "one_to_many",
|
|
227
|
+
})
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
except Exception as e:
|
|
231
|
+
warnings.append(f"Could not fetch constraints: {e}")
|
|
232
|
+
|
|
233
|
+
entities_list = list(table_entities.values())
|
|
234
|
+
|
|
235
|
+
# --- Inference: fill in PKs and FKs when constraints are missing ---
|
|
236
|
+
has_any_pk = any(
|
|
237
|
+
f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
|
|
238
|
+
)
|
|
239
|
+
if not has_any_pk:
|
|
240
|
+
entities_list, pk_msgs = infer_primary_keys(entities_list)
|
|
241
|
+
warnings.extend(pk_msgs)
|
|
242
|
+
|
|
243
|
+
if not relationships:
|
|
244
|
+
inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
|
|
245
|
+
relationships.extend(inferred_rels)
|
|
246
|
+
warnings.extend(fk_msgs)
|
|
247
|
+
if inferred_rels:
|
|
248
|
+
warnings.insert(0, f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.")
|
|
249
|
+
|
|
250
|
+
model["entities"] = entities_list
|
|
251
|
+
model["relationships"] = relationships
|
|
252
|
+
|
|
253
|
+
cur.close()
|
|
254
|
+
|
|
255
|
+
return ConnectorResult(
|
|
256
|
+
model=model,
|
|
257
|
+
tables_found=len(table_entities),
|
|
258
|
+
columns_found=total_columns,
|
|
259
|
+
relationships_found=len(relationships),
|
|
260
|
+
indexes_found=0,
|
|
261
|
+
warnings=warnings,
|
|
262
|
+
)
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""MySQL connector — pulls schema from information_schema."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import date
|
|
6
|
+
from typing import Any, Dict, List, Tuple
|
|
7
|
+
|
|
8
|
+
from datalex_core.connectors.base import BaseConnector, ConnectorConfig, ConnectorResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_MYSQL_TYPE_MAP = {
|
|
12
|
+
"int": "integer",
|
|
13
|
+
"integer": "integer",
|
|
14
|
+
"bigint": "bigint",
|
|
15
|
+
"smallint": "smallint",
|
|
16
|
+
"tinyint": "tinyint",
|
|
17
|
+
"mediumint": "integer",
|
|
18
|
+
"float": "float",
|
|
19
|
+
"double": "float",
|
|
20
|
+
"decimal": "decimal",
|
|
21
|
+
"numeric": "decimal",
|
|
22
|
+
"varchar": "string",
|
|
23
|
+
"char": "string",
|
|
24
|
+
"text": "text",
|
|
25
|
+
"mediumtext": "text",
|
|
26
|
+
"longtext": "text",
|
|
27
|
+
"tinytext": "text",
|
|
28
|
+
"blob": "binary",
|
|
29
|
+
"mediumblob": "binary",
|
|
30
|
+
"longblob": "binary",
|
|
31
|
+
"tinyblob": "binary",
|
|
32
|
+
"date": "date",
|
|
33
|
+
"datetime": "timestamp",
|
|
34
|
+
"timestamp": "timestamp",
|
|
35
|
+
"time": "time",
|
|
36
|
+
"year": "integer",
|
|
37
|
+
"boolean": "boolean",
|
|
38
|
+
"bool": "boolean",
|
|
39
|
+
"json": "json",
|
|
40
|
+
"binary": "binary",
|
|
41
|
+
"varbinary": "binary",
|
|
42
|
+
"enum": "string",
|
|
43
|
+
"set": "string",
|
|
44
|
+
"bit": "string",
|
|
45
|
+
"geometry": "string",
|
|
46
|
+
"point": "string",
|
|
47
|
+
"linestring": "string",
|
|
48
|
+
"polygon": "string",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class MySQLConnector(BaseConnector):
|
|
53
|
+
connector_type = "mysql"
|
|
54
|
+
display_name = "MySQL"
|
|
55
|
+
required_package = "mysql.connector"
|
|
56
|
+
|
|
57
|
+
def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
|
|
58
|
+
try:
|
|
59
|
+
import mysql.connector
|
|
60
|
+
conn = mysql.connector.connect(
|
|
61
|
+
host=config.host,
|
|
62
|
+
port=config.port or 3306,
|
|
63
|
+
database=config.database,
|
|
64
|
+
user=config.user,
|
|
65
|
+
password=config.password,
|
|
66
|
+
)
|
|
67
|
+
conn.close()
|
|
68
|
+
return True, "Connection successful"
|
|
69
|
+
except ImportError:
|
|
70
|
+
return False, "mysql-connector-python not installed. Run: pip install mysql-connector-python"
|
|
71
|
+
except Exception as e:
|
|
72
|
+
return False, f"Connection failed: {e}"
|
|
73
|
+
|
|
74
|
+
def _connect(self, config: ConnectorConfig):
|
|
75
|
+
import mysql.connector
|
|
76
|
+
return mysql.connector.connect(
|
|
77
|
+
host=config.host,
|
|
78
|
+
port=config.port or 3306,
|
|
79
|
+
database=config.database,
|
|
80
|
+
user=config.user,
|
|
81
|
+
password=config.password,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
85
|
+
conn = self._connect(config)
|
|
86
|
+
try:
|
|
87
|
+
cur = conn.cursor()
|
|
88
|
+
cur.execute("""
|
|
89
|
+
SELECT s.schema_name,
|
|
90
|
+
COUNT(t.table_name) AS table_count
|
|
91
|
+
FROM information_schema.schemata s
|
|
92
|
+
LEFT JOIN information_schema.tables t
|
|
93
|
+
ON t.table_schema = s.schema_name
|
|
94
|
+
AND t.table_type IN ('BASE TABLE', 'VIEW')
|
|
95
|
+
WHERE s.schema_name NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys')
|
|
96
|
+
GROUP BY s.schema_name
|
|
97
|
+
ORDER BY s.schema_name
|
|
98
|
+
""")
|
|
99
|
+
return [{"name": row[0], "table_count": row[1]} for row in cur.fetchall()]
|
|
100
|
+
finally:
|
|
101
|
+
conn.close()
|
|
102
|
+
|
|
103
|
+
def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
104
|
+
schema = config.schema or config.database
|
|
105
|
+
conn = self._connect(config)
|
|
106
|
+
try:
|
|
107
|
+
cur = conn.cursor()
|
|
108
|
+
cur.execute("""
|
|
109
|
+
SELECT t.table_name, t.table_type,
|
|
110
|
+
(SELECT COUNT(*) FROM information_schema.columns c
|
|
111
|
+
WHERE c.table_schema = t.table_schema AND c.table_name = t.table_name) AS col_count,
|
|
112
|
+
t.table_rows
|
|
113
|
+
FROM information_schema.tables t
|
|
114
|
+
WHERE t.table_schema = %s
|
|
115
|
+
AND t.table_type IN ('BASE TABLE', 'VIEW')
|
|
116
|
+
ORDER BY t.table_name
|
|
117
|
+
""", (schema,))
|
|
118
|
+
results = []
|
|
119
|
+
for row in cur.fetchall():
|
|
120
|
+
ttype = "view" if "VIEW" in row[1] else "table"
|
|
121
|
+
results.append({"name": row[0], "type": ttype, "column_count": row[2], "row_count": row[3]})
|
|
122
|
+
return results
|
|
123
|
+
finally:
|
|
124
|
+
conn.close()
|
|
125
|
+
|
|
126
|
+
def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
|
|
127
|
+
conn = self._connect(config)
|
|
128
|
+
try:
|
|
129
|
+
return self._pull(conn, config)
|
|
130
|
+
finally:
|
|
131
|
+
conn.close()
|
|
132
|
+
|
|
133
|
+
def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
|
|
134
|
+
model = self._build_model(config)
|
|
135
|
+
db_name = config.database
|
|
136
|
+
cur = conn.cursor()
|
|
137
|
+
warnings: List[str] = []
|
|
138
|
+
|
|
139
|
+
# --- Tables ---
|
|
140
|
+
cur.execute("""
|
|
141
|
+
SELECT TABLE_NAME, TABLE_TYPE
|
|
142
|
+
FROM information_schema.TABLES
|
|
143
|
+
WHERE TABLE_SCHEMA = %s
|
|
144
|
+
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
|
145
|
+
ORDER BY TABLE_NAME
|
|
146
|
+
""", (db_name,))
|
|
147
|
+
tables = cur.fetchall()
|
|
148
|
+
|
|
149
|
+
table_entities: Dict[str, Dict[str, Any]] = {}
|
|
150
|
+
for table_name, table_type in tables:
|
|
151
|
+
if not self._should_include_table(table_name, config):
|
|
152
|
+
continue
|
|
153
|
+
entity_name = self._entity_name(table_name)
|
|
154
|
+
entity_type = "view" if table_type == "VIEW" else "table"
|
|
155
|
+
table_entities[table_name] = {
|
|
156
|
+
"name": entity_name,
|
|
157
|
+
"physical_name": table_name,
|
|
158
|
+
"type": entity_type,
|
|
159
|
+
"description": f"Pulled from MySQL {db_name}.{table_name} on {date.today().isoformat()}",
|
|
160
|
+
"fields": [],
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
# --- Columns ---
|
|
164
|
+
cur.execute("""
|
|
165
|
+
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE,
|
|
166
|
+
COLUMN_DEFAULT, CHARACTER_MAXIMUM_LENGTH,
|
|
167
|
+
NUMERIC_PRECISION, NUMERIC_SCALE, COLUMN_TYPE, COLUMN_KEY
|
|
168
|
+
FROM information_schema.COLUMNS
|
|
169
|
+
WHERE TABLE_SCHEMA = %s
|
|
170
|
+
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
|
171
|
+
""", (db_name,))
|
|
172
|
+
columns = cur.fetchall()
|
|
173
|
+
total_columns = 0
|
|
174
|
+
|
|
175
|
+
for row in columns:
|
|
176
|
+
tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale, col_type, col_key = row
|
|
177
|
+
if tname not in table_entities:
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
dl_type = _MYSQL_TYPE_MAP.get(data_type.lower(), "string")
|
|
181
|
+
if data_type.lower() in ("decimal", "numeric") and num_prec:
|
|
182
|
+
dl_type = f"decimal({num_prec},{num_scale or 0})"
|
|
183
|
+
if data_type.lower() == "varchar" and char_max_len:
|
|
184
|
+
dl_type = f"varchar({char_max_len})"
|
|
185
|
+
|
|
186
|
+
field: Dict[str, Any] = {
|
|
187
|
+
"name": col_name,
|
|
188
|
+
"type": dl_type,
|
|
189
|
+
"nullable": is_nullable == "YES",
|
|
190
|
+
}
|
|
191
|
+
if col_default is not None:
|
|
192
|
+
field["default"] = str(col_default)
|
|
193
|
+
if col_key == "PRI":
|
|
194
|
+
field["primary_key"] = True
|
|
195
|
+
field["nullable"] = False
|
|
196
|
+
if col_key == "UNI":
|
|
197
|
+
field["unique"] = True
|
|
198
|
+
|
|
199
|
+
table_entities[tname]["fields"].append(field)
|
|
200
|
+
total_columns += 1
|
|
201
|
+
|
|
202
|
+
# --- Foreign keys ---
|
|
203
|
+
cur.execute("""
|
|
204
|
+
SELECT
|
|
205
|
+
TABLE_NAME, COLUMN_NAME,
|
|
206
|
+
REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME,
|
|
207
|
+
CONSTRAINT_NAME
|
|
208
|
+
FROM information_schema.KEY_COLUMN_USAGE
|
|
209
|
+
WHERE TABLE_SCHEMA = %s
|
|
210
|
+
AND REFERENCED_TABLE_NAME IS NOT NULL
|
|
211
|
+
""", (db_name,))
|
|
212
|
+
fk_rows = cur.fetchall()
|
|
213
|
+
relationships: List[Dict[str, Any]] = []
|
|
214
|
+
for child_table, child_col, parent_table, parent_col, constraint_name in fk_rows:
|
|
215
|
+
if child_table in table_entities:
|
|
216
|
+
for f in table_entities[child_table]["fields"]:
|
|
217
|
+
if f["name"] == child_col:
|
|
218
|
+
f["foreign_key"] = True
|
|
219
|
+
parent_entity = self._entity_name(parent_table)
|
|
220
|
+
child_entity = self._entity_name(child_table)
|
|
221
|
+
relationships.append({
|
|
222
|
+
"name": constraint_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
|
|
223
|
+
"from": f"{parent_entity}.{parent_col}",
|
|
224
|
+
"to": f"{child_entity}.{child_col}",
|
|
225
|
+
"cardinality": "one_to_many",
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
# --- Indexes ---
|
|
229
|
+
cur.execute("""
|
|
230
|
+
SELECT INDEX_NAME, TABLE_NAME, NON_UNIQUE, COLUMN_NAME
|
|
231
|
+
FROM information_schema.STATISTICS
|
|
232
|
+
WHERE TABLE_SCHEMA = %s
|
|
233
|
+
AND INDEX_NAME != 'PRIMARY'
|
|
234
|
+
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX
|
|
235
|
+
""", (db_name,))
|
|
236
|
+
idx_rows = cur.fetchall()
|
|
237
|
+
idx_map: Dict[str, Dict[str, Any]] = {}
|
|
238
|
+
for idx_name, tname, non_unique, col_name in idx_rows:
|
|
239
|
+
if tname not in table_entities:
|
|
240
|
+
continue
|
|
241
|
+
key = f"{tname}.{idx_name}"
|
|
242
|
+
if key not in idx_map:
|
|
243
|
+
idx_map[key] = {
|
|
244
|
+
"name": idx_name,
|
|
245
|
+
"entity": self._entity_name(tname),
|
|
246
|
+
"fields": [],
|
|
247
|
+
"unique": non_unique == 0,
|
|
248
|
+
}
|
|
249
|
+
idx_map[key]["fields"].append(col_name)
|
|
250
|
+
|
|
251
|
+
indexes = list(idx_map.values())
|
|
252
|
+
|
|
253
|
+
model["entities"] = list(table_entities.values())
|
|
254
|
+
model["relationships"] = relationships
|
|
255
|
+
model["indexes"] = indexes
|
|
256
|
+
|
|
257
|
+
cur.close()
|
|
258
|
+
|
|
259
|
+
return ConnectorResult(
|
|
260
|
+
model=model,
|
|
261
|
+
tables_found=len(table_entities),
|
|
262
|
+
columns_found=total_columns,
|
|
263
|
+
relationships_found=len(relationships),
|
|
264
|
+
indexes_found=len(indexes),
|
|
265
|
+
warnings=warnings,
|
|
266
|
+
)
|