datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,166 @@
1
+ from copy import deepcopy
2
+ from typing import Any, Dict, List
3
+
4
+ from datalex_core.modeling import normalize_model
5
+
6
+
7
+ def _sort_fields(fields: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
8
+ return sorted(fields, key=lambda item: item.get("name", ""))
9
+
10
+
11
+ def _sort_entities(entities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
12
+ sorted_entities = []
13
+ for entity in entities:
14
+ cloned = deepcopy(entity)
15
+ cloned["fields"] = _sort_fields(cloned.get("fields", []))
16
+ if "grain" in cloned and isinstance(cloned["grain"], list):
17
+ cloned["grain"] = sorted(cloned["grain"])
18
+ if "tags" in cloned and isinstance(cloned["tags"], list):
19
+ cloned["tags"] = sorted(cloned["tags"])
20
+ if "subtypes" in cloned and isinstance(cloned["subtypes"], list):
21
+ cloned["subtypes"] = sorted(cloned["subtypes"])
22
+ if "dimension_refs" in cloned and isinstance(cloned["dimension_refs"], list):
23
+ cloned["dimension_refs"] = sorted(cloned["dimension_refs"])
24
+ if "link_refs" in cloned and isinstance(cloned["link_refs"], list):
25
+ cloned["link_refs"] = sorted(cloned["link_refs"])
26
+ if "partition_by" in cloned and isinstance(cloned["partition_by"], list):
27
+ cloned["partition_by"] = sorted(cloned["partition_by"])
28
+ if "cluster_by" in cloned and isinstance(cloned["cluster_by"], list):
29
+ cloned["cluster_by"] = sorted(cloned["cluster_by"])
30
+ if "hash_diff_fields" in cloned and isinstance(cloned["hash_diff_fields"], list):
31
+ cloned["hash_diff_fields"] = sorted(cloned["hash_diff_fields"])
32
+ if "candidate_keys" in cloned and isinstance(cloned["candidate_keys"], list):
33
+ cloned["candidate_keys"] = sorted(
34
+ [sorted(keyset) for keyset in cloned["candidate_keys"] if isinstance(keyset, list)],
35
+ key=lambda item: tuple(item),
36
+ )
37
+ if "business_keys" in cloned and isinstance(cloned["business_keys"], list):
38
+ cloned["business_keys"] = sorted(
39
+ [sorted(keyset) for keyset in cloned["business_keys"] if isinstance(keyset, list)],
40
+ key=lambda item: tuple(item),
41
+ )
42
+ sorted_entities.append(cloned)
43
+ return sorted(sorted_entities, key=lambda item: item.get("name", ""))
44
+
45
+
46
+ def _sort_relationships(relationships: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
47
+ return sorted(
48
+ relationships,
49
+ key=lambda item: (
50
+ item.get("name", ""),
51
+ item.get("from", ""),
52
+ item.get("to", ""),
53
+ item.get("cardinality", ""),
54
+ ),
55
+ )
56
+
57
+
58
+ def _sort_rules(rules: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
59
+ return sorted(rules, key=lambda item: (item.get("name", ""), item.get("target", "")))
60
+
61
+
62
+ def _sort_indexes(indexes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
63
+ return sorted(
64
+ deepcopy(indexes),
65
+ key=lambda item: (item.get("name", ""), item.get("entity", "")),
66
+ )
67
+
68
+
69
+ def _sort_glossary(glossary: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
70
+ sorted_terms = []
71
+ for term in glossary:
72
+ cloned = deepcopy(term)
73
+ if "related_fields" in cloned and isinstance(cloned["related_fields"], list):
74
+ cloned["related_fields"] = sorted(cloned["related_fields"])
75
+ if "tags" in cloned and isinstance(cloned["tags"], list):
76
+ cloned["tags"] = sorted(cloned["tags"])
77
+ sorted_terms.append(cloned)
78
+ return sorted(sorted_terms, key=lambda item: item.get("term", ""))
79
+
80
+
81
+ def _sort_metrics(metrics: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
82
+ sorted_metrics = []
83
+ for metric in metrics:
84
+ cloned = deepcopy(metric)
85
+ if "grain" in cloned and isinstance(cloned["grain"], list):
86
+ cloned["grain"] = sorted(cloned["grain"])
87
+ if "dimensions" in cloned and isinstance(cloned["dimensions"], list):
88
+ cloned["dimensions"] = sorted(cloned["dimensions"])
89
+ if "tags" in cloned and isinstance(cloned["tags"], list):
90
+ cloned["tags"] = sorted(cloned["tags"])
91
+ sorted_metrics.append(cloned)
92
+ return sorted(sorted_metrics, key=lambda item: item.get("name", ""))
93
+
94
+
95
+ def _sort_domains(domains: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
96
+ sorted_domains = []
97
+ for domain in domains:
98
+ cloned = deepcopy(domain)
99
+ if "tags" in cloned and isinstance(cloned["tags"], list):
100
+ cloned["tags"] = sorted(cloned["tags"])
101
+ if "examples" in cloned and isinstance(cloned["examples"], list):
102
+ cloned["examples"] = sorted(cloned["examples"], key=lambda item: str(item))
103
+ sorted_domains.append(cloned)
104
+ return sorted(sorted_domains, key=lambda item: item.get("name", ""))
105
+
106
+
107
+ def _sort_enums(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
108
+ sorted_enums = []
109
+ for item in items:
110
+ cloned = deepcopy(item)
111
+ if "values" in cloned and isinstance(cloned["values"], list):
112
+ cloned["values"] = sorted(cloned["values"])
113
+ sorted_enums.append(cloned)
114
+ return sorted(sorted_enums, key=lambda item: item.get("name", ""))
115
+
116
+
117
+ def _sort_templates(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
118
+ sorted_templates = []
119
+ for item in items:
120
+ cloned = deepcopy(item)
121
+ cloned["fields"] = _sort_fields(cloned.get("fields", []))
122
+ if "tags" in cloned and isinstance(cloned["tags"], list):
123
+ cloned["tags"] = sorted(cloned["tags"])
124
+ sorted_templates.append(cloned)
125
+ return sorted(sorted_templates, key=lambda item: item.get("name", ""))
126
+
127
+
128
+ def _sort_subject_areas(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
129
+ return sorted(deepcopy(items), key=lambda item: item.get("name", ""))
130
+
131
+
132
+ def compile_model(model: Dict[str, Any]) -> Dict[str, Any]:
133
+ model = normalize_model(model)
134
+ canonical: Dict[str, Any] = {
135
+ "model": deepcopy(model.get("model", {})),
136
+ "entities": _sort_entities(model.get("entities", [])),
137
+ "relationships": _sort_relationships(model.get("relationships", [])),
138
+ "indexes": _sort_indexes(model.get("indexes", [])),
139
+ "rules": _sort_rules(model.get("rules", [])),
140
+ "metrics": _sort_metrics(model.get("metrics", [])),
141
+ }
142
+
143
+ governance = deepcopy(model.get("governance", {}))
144
+ classification = governance.get("classification")
145
+ if isinstance(classification, dict):
146
+ governance["classification"] = {
147
+ key: classification[key] for key in sorted(classification.keys())
148
+ }
149
+ stewards = governance.get("stewards")
150
+ if isinstance(stewards, dict):
151
+ governance["stewards"] = {key: stewards[key] for key in sorted(stewards.keys())}
152
+
153
+ canonical["governance"] = governance
154
+ canonical["glossary"] = _sort_glossary(model.get("glossary", []))
155
+ canonical["domains"] = _sort_domains(model.get("domains", []))
156
+ canonical["enums"] = _sort_enums(model.get("enums", []))
157
+ canonical["templates"] = _sort_templates(model.get("templates", []))
158
+ canonical["subject_areas"] = _sort_subject_areas(model.get("subject_areas", []))
159
+ canonical["naming_rules"] = deepcopy(model.get("naming_rules", {}))
160
+ canonical["display"] = deepcopy(model.get("display", {}))
161
+
162
+ owners = canonical["model"].get("owners")
163
+ if isinstance(owners, list):
164
+ canonical["model"]["owners"] = sorted(owners)
165
+
166
+ return canonical
@@ -0,0 +1,204 @@
1
+ """Shell completion generators for bash, zsh, and fish."""
2
+
3
+ from typing import List
4
+
5
+ _COMMANDS = [
6
+ "init", "validate", "lint", "compile", "diff", "validate-all",
7
+ "gate", "policy-check", "generate", "import", "resolve",
8
+ "resolve-project", "diff-all", "transform", "standards", "sync",
9
+ "fmt", "stats", "print-schema", "print-policy-schema", "doctor",
10
+ "migrate", "apply", "watch",
11
+ ]
12
+
13
+ _GENERATE_SUBCOMMANDS = ["sql", "dbt", "metadata", "docs", "changelog"]
14
+ _IMPORT_SUBCOMMANDS = ["sql", "dbml", "json-schema", "dbt", "avro"]
15
+ _TRANSFORM_SUBCOMMANDS = ["conceptual-to-logical", "logical-to-physical"]
16
+ _STANDARDS_SUBCOMMANDS = ["check", "fix"]
17
+ _SYNC_SUBCOMMANDS = ["compare", "merge", "pull"]
18
+ _DIALECTS = ["postgres", "snowflake", "bigquery", "databricks"]
19
+
20
+
21
+ def generate_bash_completion() -> str:
22
+ cmds = " ".join(_COMMANDS)
23
+ gen_subs = " ".join(_GENERATE_SUBCOMMANDS)
24
+ imp_subs = " ".join(_IMPORT_SUBCOMMANDS)
25
+ transform_subs = " ".join(_TRANSFORM_SUBCOMMANDS)
26
+ standards_subs = " ".join(_STANDARDS_SUBCOMMANDS)
27
+ sync_subs = " ".join(_SYNC_SUBCOMMANDS)
28
+ dialects = " ".join(_DIALECTS)
29
+
30
+ return f'''# bash completion for dm (DataLex CLI)
31
+ # Add to ~/.bashrc: eval "$(datalex completion bash)"
32
+
33
+ _dm_completions() {{
34
+ local cur prev commands
35
+ COMPREPLY=()
36
+ cur="${{COMP_WORDS[COMP_CWORD]}}"
37
+ prev="${{COMP_WORDS[COMP_CWORD-1]}}"
38
+ commands="{cmds}"
39
+
40
+ case "${{COMP_WORDS[1]}}" in
41
+ generate)
42
+ if [[ $COMP_CWORD -eq 2 ]]; then
43
+ COMPREPLY=( $(compgen -W "{gen_subs}" -- "$cur") )
44
+ return 0
45
+ fi
46
+ ;;
47
+ import)
48
+ if [[ $COMP_CWORD -eq 2 ]]; then
49
+ COMPREPLY=( $(compgen -W "{imp_subs}" -- "$cur") )
50
+ return 0
51
+ fi
52
+ ;;
53
+ transform)
54
+ if [[ $COMP_CWORD -eq 2 ]]; then
55
+ COMPREPLY=( $(compgen -W "{transform_subs}" -- "$cur") )
56
+ return 0
57
+ fi
58
+ ;;
59
+ standards)
60
+ if [[ $COMP_CWORD -eq 2 ]]; then
61
+ COMPREPLY=( $(compgen -W "{standards_subs}" -- "$cur") )
62
+ return 0
63
+ fi
64
+ ;;
65
+ sync)
66
+ if [[ $COMP_CWORD -eq 2 ]]; then
67
+ COMPREPLY=( $(compgen -W "{sync_subs}" -- "$cur") )
68
+ return 0
69
+ fi
70
+ ;;
71
+ esac
72
+
73
+ case "$prev" in
74
+ --dialect)
75
+ COMPREPLY=( $(compgen -W "{dialects}" -- "$cur") )
76
+ return 0
77
+ ;;
78
+ --format)
79
+ COMPREPLY=( $(compgen -W "json yaml table" -- "$cur") )
80
+ return 0
81
+ ;;
82
+ --policy)
83
+ COMPREPLY=( $(compgen -f -X '!*.policy.yaml' -- "$cur") )
84
+ return 0
85
+ ;;
86
+ --schema)
87
+ COMPREPLY=( $(compgen -f -X '!*.json' -- "$cur") )
88
+ return 0
89
+ ;;
90
+ esac
91
+
92
+ if [[ $COMP_CWORD -eq 1 ]]; then
93
+ COMPREPLY=( $(compgen -W "$commands" -- "$cur") )
94
+ return 0
95
+ fi
96
+
97
+ COMPREPLY=( $(compgen -f -- "$cur") )
98
+ return 0
99
+ }}
100
+
101
+ complete -F _dm_completions dm
102
+ '''
103
+
104
+
105
+ def generate_zsh_completion() -> str:
106
+ cmds_list = "\n ".join([f"'{c}:{c} command'" for c in _COMMANDS])
107
+ gen_subs = " ".join(_GENERATE_SUBCOMMANDS)
108
+ imp_subs = " ".join(_IMPORT_SUBCOMMANDS)
109
+ transform_subs = " ".join(_TRANSFORM_SUBCOMMANDS)
110
+ standards_subs = " ".join(_STANDARDS_SUBCOMMANDS)
111
+ sync_subs = " ".join(_SYNC_SUBCOMMANDS)
112
+ dialects = " ".join(_DIALECTS)
113
+
114
+ return f'''#compdef dm
115
+ # zsh completion for dm (DataLex CLI)
116
+ # Add to ~/.zshrc: eval "$(datalex completion zsh)"
117
+
118
+ _dm() {{
119
+ local -a commands
120
+ commands=(
121
+ {cmds_list}
122
+ )
123
+
124
+ _arguments -C \\
125
+ '1:command:->command' \\
126
+ '*::arg:->args'
127
+
128
+ case $state in
129
+ command)
130
+ _describe 'dm commands' commands
131
+ ;;
132
+ args)
133
+ case $words[1] in
134
+ generate)
135
+ _values 'subcommand' {gen_subs}
136
+ ;;
137
+ import)
138
+ _values 'subcommand' {imp_subs}
139
+ ;;
140
+ transform)
141
+ _values 'subcommand' {transform_subs}
142
+ ;;
143
+ standards)
144
+ _values 'subcommand' {standards_subs}
145
+ ;;
146
+ sync)
147
+ _values 'subcommand' {sync_subs}
148
+ ;;
149
+ *)
150
+ case $words[-2] in
151
+ --dialect)
152
+ _values 'dialect' {dialects}
153
+ ;;
154
+ --format)
155
+ _values 'format' json yaml table
156
+ ;;
157
+ --policy)
158
+ _files -g '*.policy.yaml'
159
+ ;;
160
+ --schema)
161
+ _files -g '*.json'
162
+ ;;
163
+ *)
164
+ _files
165
+ ;;
166
+ esac
167
+ ;;
168
+ esac
169
+ ;;
170
+ esac
171
+ }}
172
+
173
+ _dm "$@"
174
+ '''
175
+
176
+
177
+ def generate_fish_completion() -> str:
178
+ lines = [
179
+ "# fish completion for dm (DataLex CLI)",
180
+ "# Add to ~/.config/fish/completions/dm.fish",
181
+ "",
182
+ ]
183
+
184
+ for cmd in _COMMANDS:
185
+ lines.append(f"complete -c dm -n '__fish_use_subcommand' -a '{cmd}' -d '{cmd} command'")
186
+
187
+ lines.append("")
188
+ for sub in _GENERATE_SUBCOMMANDS:
189
+ lines.append(f"complete -c dm -n '__fish_seen_subcommand_from generate' -a '{sub}'")
190
+ for sub in _IMPORT_SUBCOMMANDS:
191
+ lines.append(f"complete -c dm -n '__fish_seen_subcommand_from import' -a '{sub}'")
192
+ for sub in _TRANSFORM_SUBCOMMANDS:
193
+ lines.append(f"complete -c dm -n '__fish_seen_subcommand_from transform' -a '{sub}'")
194
+ for sub in _STANDARDS_SUBCOMMANDS:
195
+ lines.append(f"complete -c dm -n '__fish_seen_subcommand_from standards' -a '{sub}'")
196
+ for sub in _SYNC_SUBCOMMANDS:
197
+ lines.append(f"complete -c dm -n '__fish_seen_subcommand_from sync' -a '{sub}'")
198
+
199
+ lines.append("")
200
+ for d in _DIALECTS:
201
+ lines.append(f"complete -c dm -l dialect -a '{d}'")
202
+ lines.append("complete -c dm -l format -a 'json yaml table'")
203
+
204
+ return "\n".join(lines) + "\n"
@@ -0,0 +1,39 @@
1
+ """Database connectors for pulling schema from live databases.
2
+
3
+ Each connector implements the same interface:
4
+ pull_schema(connection_string, schema=None, tables=None, **kwargs) -> Dict[str, Any]
5
+
6
+ Returns a DataLex model dict ready for use.
7
+ """
8
+
9
+ from datalex_core.connectors.base import (
10
+ BaseConnector,
11
+ ConnectorConfig,
12
+ ConnectorResult,
13
+ get_connector,
14
+ list_connectors,
15
+ )
16
+ from datalex_core.connectors.postgres import PostgresConnector
17
+ from datalex_core.connectors.mysql import MySQLConnector
18
+ from datalex_core.connectors.snowflake import SnowflakeConnector
19
+ from datalex_core.connectors.bigquery import BigQueryConnector
20
+ from datalex_core.connectors.databricks import DatabricksConnector
21
+ from datalex_core.connectors.sqlserver import SQLServerConnector, AzureSQLConnector, AzureFabricConnector
22
+ from datalex_core.connectors.redshift import RedshiftConnector
23
+
24
+ __all__ = [
25
+ "BaseConnector",
26
+ "BigQueryConnector",
27
+ "ConnectorConfig",
28
+ "ConnectorResult",
29
+ "DatabricksConnector",
30
+ "MySQLConnector",
31
+ "PostgresConnector",
32
+ "SnowflakeConnector",
33
+ "SQLServerConnector",
34
+ "AzureSQLConnector",
35
+ "AzureFabricConnector",
36
+ "RedshiftConnector",
37
+ "get_connector",
38
+ "list_connectors",
39
+ ]