datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,48 @@
1
+ """Helpers shared by dialect plugins."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from datalex_core.datalex.types import LogicalType
8
+
9
+
10
+ def physical_override(column: Dict[str, Any], dialect: str) -> Optional[str]:
11
+ """Return the per-dialect physical type override for a column, or None."""
12
+ physical = column.get("physical") or {}
13
+ entry = physical.get(dialect)
14
+ if isinstance(entry, dict):
15
+ return entry.get("type")
16
+ return None
17
+
18
+
19
+ def physical_raw_ddl(column: Dict[str, Any], dialect: str) -> Optional[str]:
20
+ physical = column.get("physical") or {}
21
+ entry = physical.get(dialect)
22
+ if isinstance(entry, dict):
23
+ return entry.get("raw_ddl")
24
+ return None
25
+
26
+
27
+ def qualified_table_name(entity: Dict[str, Any], quote, dialect: str) -> str:
28
+ physical = entity.get("physical_name") or entity.get("name")
29
+ parts: List[str] = []
30
+ for key in ("database", "schema"):
31
+ val = entity.get(key)
32
+ if val:
33
+ parts.append(str(val))
34
+ parts.append(str(physical))
35
+ return ".".join(quote(p) for p in parts)
36
+
37
+
38
+ def primary_key_columns(entity: Dict[str, Any]) -> List[str]:
39
+ pks: List[str] = []
40
+ for col in entity.get("columns", []) or []:
41
+ if col.get("primary_key"):
42
+ pks.append(col["name"])
43
+ else:
44
+ for c in col.get("constraints") or []:
45
+ if c.get("type") == "primary_key":
46
+ pks.append(col["name"])
47
+ break
48
+ return pks
@@ -0,0 +1,47 @@
1
+ """DialectPlugin protocol.
2
+
3
+ A dialect plugin is a pure function bundle: given a DataLex entity (dict) it
4
+ renders DDL, migration ALTERs, GRANTs, and type strings. No hidden state.
5
+
6
+ Each plugin exposes:
7
+ * `name` — canonical lowercase dialect name (`postgres`, `snowflake`, ...)
8
+ * `render_type(logical_type, column)` — map a logical type to a physical type string
9
+ * `render_entity(entity)` — emit CREATE TABLE / VIEW etc.
10
+ * `render_alter(old_entity, new_entity)` — emit ALTER statements for a diff
11
+ * `render_grant(policy, entity)` — emit GRANT statements for an access policy
12
+ * `quote(identifier)` — dialect-correct identifier quoting
13
+
14
+ The registry calls `register_dialect(plugin)` at import time; downstream code
15
+ calls `get_dialect(name)` to retrieve it.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from dataclasses import dataclass
21
+ from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
22
+
23
+ from datalex_core.datalex.types import LogicalType
24
+
25
+
26
+ @dataclass
27
+ class RenderContext:
28
+ """Contextual hints passed to render_type. Dialect plugins can ignore it."""
29
+ entity: Optional[Dict[str, Any]] = None
30
+ column: Optional[Dict[str, Any]] = None
31
+
32
+
33
+ @runtime_checkable
34
+ class DialectPlugin(Protocol):
35
+ name: str
36
+
37
+ def quote(self, identifier: str) -> str: ...
38
+
39
+ def render_type(self, logical: LogicalType, ctx: RenderContext) -> str: ...
40
+
41
+ def render_entity(self, entity: Dict[str, Any]) -> str: ...
42
+
43
+ def render_alter(
44
+ self, old_entity: Optional[Dict[str, Any]], new_entity: Optional[Dict[str, Any]]
45
+ ) -> List[str]: ...
46
+
47
+ def render_grant(self, policy: Dict[str, Any], entity: Dict[str, Any]) -> List[str]: ...
@@ -0,0 +1,164 @@
1
+ """Postgres dialect plugin."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from datalex_core.datalex.types import LogicalType
8
+ from datalex_core.dialects.base import DialectPlugin, RenderContext
9
+ from datalex_core.dialects.registry import register_dialect
10
+ from datalex_core.dialects._common import (
11
+ physical_override,
12
+ physical_raw_ddl,
13
+ primary_key_columns,
14
+ qualified_table_name,
15
+ )
16
+
17
+
18
+ _PRIMITIVE_MAP = {
19
+ "string": "TEXT",
20
+ "text": "TEXT",
21
+ "integer": "INTEGER",
22
+ "bigint": "BIGINT",
23
+ "float": "DOUBLE PRECISION",
24
+ "boolean": "BOOLEAN",
25
+ "date": "DATE",
26
+ "timestamp": "TIMESTAMP",
27
+ "timestamp_tz": "TIMESTAMPTZ",
28
+ "interval": "INTERVAL",
29
+ "uuid": "UUID",
30
+ "json": "JSONB",
31
+ "binary": "BYTEA",
32
+ }
33
+
34
+
35
+ class PostgresDialect:
36
+ name = "postgres"
37
+
38
+ def quote(self, identifier: str) -> str:
39
+ # Double-quote every identifier and escape embedded double quotes.
40
+ escaped = identifier.replace('"', '""')
41
+ return f'"{escaped}"'
42
+
43
+ def render_type(self, logical: LogicalType, ctx: RenderContext) -> str:
44
+ column = ctx.column or {}
45
+ override = physical_override(column, self.name)
46
+ if override:
47
+ return override
48
+ raw = physical_raw_ddl(column, self.name)
49
+ if raw:
50
+ return raw
51
+
52
+ if logical.kind == "array":
53
+ inner = self.render_type(logical.children[0], ctx)
54
+ return f"{inner}[]"
55
+ if logical.kind == "map":
56
+ return "JSONB"
57
+ if logical.kind == "struct":
58
+ return "JSONB"
59
+ if logical.kind == "decimal":
60
+ if logical.params:
61
+ return f"NUMERIC({','.join(str(p) for p in logical.params)})"
62
+ return "NUMERIC"
63
+ if logical.kind == "string" and logical.params:
64
+ return f"VARCHAR({logical.params[0]})"
65
+ if logical.kind == "binary" and logical.params:
66
+ return "BYTEA"
67
+
68
+ return _PRIMITIVE_MAP.get(logical.kind, logical.kind.upper())
69
+
70
+ def render_entity(self, entity: Dict[str, Any]) -> str:
71
+ from datalex_core.datalex.types import parse_type # local to avoid cycles
72
+
73
+ name = qualified_table_name(entity, self.quote, self.name)
74
+ lines: List[str] = [f"CREATE TABLE {name} ("]
75
+
76
+ col_lines: List[str] = []
77
+ for col in entity.get("columns", []) or []:
78
+ logical = parse_type(col.get("type", "string"))
79
+ sql_type = self.render_type(logical, RenderContext(entity=entity, column=col))
80
+ piece = f" {self.quote(col['name'])} {sql_type}"
81
+ if col.get("nullable") is False or col.get("primary_key"):
82
+ piece += " NOT NULL"
83
+ default = col.get("default")
84
+ if default is not None:
85
+ piece += f" DEFAULT {_format_default(default)}"
86
+ col_lines.append(piece)
87
+
88
+ pks = primary_key_columns(entity)
89
+ if pks:
90
+ cols = ", ".join(self.quote(c) for c in pks)
91
+ col_lines.append(f" PRIMARY KEY ({cols})")
92
+
93
+ lines.append(",\n".join(col_lines))
94
+ lines.append(");")
95
+
96
+ fk_lines: List[str] = []
97
+ for col in entity.get("columns", []) or []:
98
+ ref = col.get("references")
99
+ if not ref:
100
+ continue
101
+ target = ref.get("entity")
102
+ target_col = ref.get("column")
103
+ on_delete = ref.get("on_delete")
104
+ fk_name = f"fk_{entity['name']}_{col['name']}"
105
+ fk = (
106
+ f"ALTER TABLE {name} ADD CONSTRAINT "
107
+ f"{self.quote(fk_name)} "
108
+ f"FOREIGN KEY ({self.quote(col['name'])}) "
109
+ f"REFERENCES {self.quote(target)} ({self.quote(target_col)})"
110
+ )
111
+ if on_delete:
112
+ fk += f" ON DELETE {on_delete.upper().replace('_', ' ')}"
113
+ fk_lines.append(fk + ";")
114
+
115
+ idx_lines: List[str] = []
116
+ for idx in entity.get("indexes", []) or []:
117
+ unique = "UNIQUE " if idx.get("unique") else ""
118
+ cols = ", ".join(self.quote(c) for c in idx.get("columns", []))
119
+ idx_lines.append(
120
+ f"CREATE {unique}INDEX {self.quote(idx['name'])} ON {name} ({cols});"
121
+ )
122
+
123
+ return "\n".join(lines + ([""] if fk_lines or idx_lines else []) + fk_lines + idx_lines).rstrip() + "\n"
124
+
125
+ def render_alter(
126
+ self,
127
+ old_entity: Optional[Dict[str, Any]],
128
+ new_entity: Optional[Dict[str, Any]],
129
+ ) -> List[str]:
130
+ # Minimal first pass — rely on the diff engine for richer output in Phase B.
131
+ statements: List[str] = []
132
+ if old_entity is None and new_entity is not None:
133
+ statements.append(self.render_entity(new_entity))
134
+ return statements
135
+ if new_entity is None and old_entity is not None:
136
+ statements.append(
137
+ f"DROP TABLE {qualified_table_name(old_entity, self.quote, self.name)};"
138
+ )
139
+ return statements
140
+ return statements
141
+
142
+ def render_grant(self, policy: Dict[str, Any], entity: Dict[str, Any]) -> List[str]:
143
+ out: List[str] = []
144
+ target = qualified_table_name(entity, self.quote, self.name)
145
+ for grant in policy.get("grants", []) or []:
146
+ privs = ", ".join(grant.get("privileges", []))
147
+ principal = grant.get("principal")
148
+ if not principal:
149
+ continue
150
+ out.append(f"GRANT {privs} ON {target} TO {principal};")
151
+ return out
152
+
153
+
154
+ def _format_default(value: Any) -> str:
155
+ if isinstance(value, bool):
156
+ return "TRUE" if value else "FALSE"
157
+ if isinstance(value, (int, float)):
158
+ return str(value)
159
+ if value is None:
160
+ return "NULL"
161
+ return "'" + str(value).replace("'", "''") + "'"
162
+
163
+
164
+ register_dialect(PostgresDialect())
@@ -0,0 +1,36 @@
1
+ """Runtime registry of DialectPlugin instances.
2
+
3
+ Dialects self-register at import time by calling `register_dialect(...)`.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Dict, List, Optional
9
+
10
+ from datalex_core.dialects.base import DialectPlugin
11
+
12
+
13
+ _REGISTRY: Dict[str, DialectPlugin] = {}
14
+
15
+
16
+ def register_dialect(plugin: DialectPlugin) -> None:
17
+ if not isinstance(plugin, DialectPlugin):
18
+ raise TypeError(f"{plugin!r} does not implement DialectPlugin")
19
+ _REGISTRY[plugin.name] = plugin
20
+
21
+
22
+ def get_dialect(name: str) -> Optional[DialectPlugin]:
23
+ return _REGISTRY.get(name)
24
+
25
+
26
+ def known_dialects() -> List[str]:
27
+ return sorted(_REGISTRY.keys())
28
+
29
+
30
+ def require_dialect(name: str) -> DialectPlugin:
31
+ plugin = get_dialect(name)
32
+ if plugin is None:
33
+ raise KeyError(
34
+ f"Dialect '{name}' is not registered. Known: {', '.join(known_dialects())}"
35
+ )
36
+ return plugin
@@ -0,0 +1,129 @@
1
+ """Snowflake dialect plugin."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from datalex_core.datalex.types import LogicalType
8
+ from datalex_core.dialects.base import DialectPlugin, RenderContext
9
+ from datalex_core.dialects.registry import register_dialect
10
+ from datalex_core.dialects._common import (
11
+ physical_override,
12
+ physical_raw_ddl,
13
+ primary_key_columns,
14
+ qualified_table_name,
15
+ )
16
+
17
+
18
+ _PRIMITIVE_MAP = {
19
+ "string": "VARCHAR",
20
+ "text": "VARCHAR",
21
+ "integer": "NUMBER(38,0)",
22
+ "bigint": "NUMBER(38,0)",
23
+ "float": "FLOAT",
24
+ "boolean": "BOOLEAN",
25
+ "date": "DATE",
26
+ "timestamp": "TIMESTAMP_NTZ",
27
+ "timestamp_tz": "TIMESTAMP_TZ",
28
+ "interval": "VARCHAR",
29
+ "uuid": "VARCHAR",
30
+ "json": "VARIANT",
31
+ "binary": "BINARY",
32
+ }
33
+
34
+
35
+ class SnowflakeDialect:
36
+ name = "snowflake"
37
+
38
+ def quote(self, identifier: str) -> str:
39
+ # Snowflake identifiers are case-sensitive when quoted; prefer uppercase.
40
+ escaped = identifier.replace('"', '""')
41
+ return f'"{escaped.upper()}"'
42
+
43
+ def render_type(self, logical: LogicalType, ctx: RenderContext) -> str:
44
+ column = ctx.column or {}
45
+ override = physical_override(column, self.name)
46
+ if override:
47
+ return override
48
+ raw = physical_raw_ddl(column, self.name)
49
+ if raw:
50
+ return raw
51
+
52
+ if logical.kind == "array":
53
+ return "ARRAY"
54
+ if logical.kind == "map":
55
+ return "OBJECT"
56
+ if logical.kind == "struct":
57
+ return "OBJECT"
58
+ if logical.kind == "decimal":
59
+ if logical.params:
60
+ return f"NUMBER({','.join(str(p) for p in logical.params)})"
61
+ return "NUMBER"
62
+ if logical.kind == "string" and logical.params:
63
+ return f"VARCHAR({logical.params[0]})"
64
+
65
+ return _PRIMITIVE_MAP.get(logical.kind, logical.kind.upper())
66
+
67
+ def render_entity(self, entity: Dict[str, Any]) -> str:
68
+ from datalex_core.datalex.types import parse_type
69
+
70
+ name = qualified_table_name(entity, self.quote, self.name)
71
+ lines: List[str] = [f"CREATE OR REPLACE TABLE {name} ("]
72
+
73
+ col_lines: List[str] = []
74
+ for col in entity.get("columns", []) or []:
75
+ logical = parse_type(col.get("type", "string"))
76
+ sql_type = self.render_type(logical, RenderContext(entity=entity, column=col))
77
+ piece = f" {self.quote(col['name'])} {sql_type}"
78
+ if col.get("nullable") is False or col.get("primary_key"):
79
+ piece += " NOT NULL"
80
+ default = col.get("default")
81
+ if default is not None:
82
+ piece += f" DEFAULT {_format_default(default)}"
83
+ col_lines.append(piece)
84
+
85
+ pks = primary_key_columns(entity)
86
+ if pks:
87
+ col_lines.append(
88
+ f" PRIMARY KEY ({', '.join(self.quote(c) for c in pks)})"
89
+ )
90
+
91
+ lines.append(",\n".join(col_lines))
92
+ lines.append(");")
93
+
94
+ return "\n".join(lines) + "\n"
95
+
96
+ def render_alter(
97
+ self,
98
+ old_entity: Optional[Dict[str, Any]],
99
+ new_entity: Optional[Dict[str, Any]],
100
+ ) -> List[str]:
101
+ if old_entity is None and new_entity is not None:
102
+ return [self.render_entity(new_entity)]
103
+ if new_entity is None and old_entity is not None:
104
+ return [f"DROP TABLE IF EXISTS {qualified_table_name(old_entity, self.quote, self.name)};"]
105
+ return []
106
+
107
+ def render_grant(self, policy: Dict[str, Any], entity: Dict[str, Any]) -> List[str]:
108
+ out: List[str] = []
109
+ target = qualified_table_name(entity, self.quote, self.name)
110
+ for grant in policy.get("grants", []) or []:
111
+ privs = ", ".join(grant.get("privileges", []))
112
+ principal = grant.get("principal")
113
+ if not principal:
114
+ continue
115
+ out.append(f"GRANT {privs} ON TABLE {target} TO ROLE {principal};")
116
+ return out
117
+
118
+
119
+ def _format_default(value: Any) -> str:
120
+ if isinstance(value, bool):
121
+ return "TRUE" if value else "FALSE"
122
+ if isinstance(value, (int, float)):
123
+ return str(value)
124
+ if value is None:
125
+ return "NULL"
126
+ return "'" + str(value).replace("'", "''") + "'"
127
+
128
+
129
+ register_dialect(SnowflakeDialect())