PyPI - datalex-cli - Versions diffs - 0.1.1__py3-none-any.whl - Mend

datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

datalex_cli/__init__.py +1 -0
datalex_cli/datalex_cli.py +658 -0
datalex_cli/main.py +2925 -0
datalex_cli-0.1.1.dist-info/METADATA +228 -0
datalex_cli-0.1.1.dist-info/RECORD +64 -0
datalex_cli-0.1.1.dist-info/WHEEL +5 -0
datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
datalex_core/__init__.py +94 -0
datalex_core/_schemas/datalex/common.schema.json +127 -0
datalex_core/_schemas/datalex/domain.schema.json +24 -0
datalex_core/_schemas/datalex/entity.schema.json +158 -0
datalex_core/_schemas/datalex/model.schema.json +141 -0
datalex_core/_schemas/datalex/policy.schema.json +70 -0
datalex_core/_schemas/datalex/project.schema.json +82 -0
datalex_core/_schemas/datalex/snippet.schema.json +24 -0
datalex_core/_schemas/datalex/source.schema.json +104 -0
datalex_core/_schemas/datalex/term.schema.json +30 -0
datalex_core/canonical.py +166 -0
datalex_core/completion.py +204 -0
datalex_core/connectors/__init__.py +39 -0
datalex_core/connectors/base.py +417 -0
datalex_core/connectors/bigquery.py +229 -0
datalex_core/connectors/databricks.py +262 -0
datalex_core/connectors/mysql.py +266 -0
datalex_core/connectors/postgres.py +309 -0
datalex_core/connectors/redshift.py +298 -0
datalex_core/connectors/snowflake.py +336 -0
datalex_core/connectors/sqlserver.py +425 -0
datalex_core/datalex/__init__.py +26 -0
datalex_core/datalex/diff.py +188 -0
datalex_core/datalex/errors.py +85 -0
datalex_core/datalex/loader.py +512 -0
datalex_core/datalex/migrate_layout.py +382 -0
datalex_core/datalex/parse_cache.py +102 -0
datalex_core/datalex/project.py +214 -0
datalex_core/datalex/types.py +224 -0
datalex_core/dbt/__init__.py +18 -0
datalex_core/dbt/emit.py +344 -0
datalex_core/dbt/manifest.py +329 -0
datalex_core/dbt/profiles.py +185 -0
datalex_core/dbt/sync.py +279 -0
datalex_core/dbt/warehouse.py +215 -0
datalex_core/dialects/__init__.py +15 -0
datalex_core/dialects/_common.py +48 -0
datalex_core/dialects/base.py +47 -0
datalex_core/dialects/postgres.py +164 -0
datalex_core/dialects/registry.py +36 -0
datalex_core/dialects/snowflake.py +129 -0
datalex_core/diffing.py +358 -0
datalex_core/docs_generator.py +797 -0
datalex_core/doctor.py +181 -0
datalex_core/generators.py +478 -0
datalex_core/importers.py +1176 -0
datalex_core/issues.py +23 -0
datalex_core/loader.py +21 -0
datalex_core/migrate.py +316 -0
datalex_core/modeling.py +679 -0
datalex_core/packages.py +430 -0
datalex_core/policy.py +1037 -0
datalex_core/resolver.py +456 -0
datalex_core/schema.py +54 -0
datalex_core/semantic.py +1561 -0

datalex_core/canonical.py ADDED Viewed

@@ -0,0 +1,166 @@
+from copy import deepcopy
+from typing import Any, Dict, List
+from datalex_core.modeling import normalize_model
+def _sort_fields(fields: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return sorted(fields, key=lambda item: item.get("name", ""))
+def _sort_entities(entities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_entities = []
+    for entity in entities:
+        cloned = deepcopy(entity)
+        cloned["fields"] = _sort_fields(cloned.get("fields", []))
+        if "grain" in cloned and isinstance(cloned["grain"], list):
+            cloned["grain"] = sorted(cloned["grain"])
+        if "tags" in cloned and isinstance(cloned["tags"], list):
+            cloned["tags"] = sorted(cloned["tags"])
+        if "subtypes" in cloned and isinstance(cloned["subtypes"], list):
+            cloned["subtypes"] = sorted(cloned["subtypes"])
+        if "dimension_refs" in cloned and isinstance(cloned["dimension_refs"], list):
+            cloned["dimension_refs"] = sorted(cloned["dimension_refs"])
+        if "link_refs" in cloned and isinstance(cloned["link_refs"], list):
+            cloned["link_refs"] = sorted(cloned["link_refs"])
+        if "partition_by" in cloned and isinstance(cloned["partition_by"], list):
+            cloned["partition_by"] = sorted(cloned["partition_by"])
+        if "cluster_by" in cloned and isinstance(cloned["cluster_by"], list):
+            cloned["cluster_by"] = sorted(cloned["cluster_by"])
+        if "hash_diff_fields" in cloned and isinstance(cloned["hash_diff_fields"], list):
+            cloned["hash_diff_fields"] = sorted(cloned["hash_diff_fields"])
+        if "candidate_keys" in cloned and isinstance(cloned["candidate_keys"], list):
+            cloned["candidate_keys"] = sorted(
+                [sorted(keyset) for keyset in cloned["candidate_keys"] if isinstance(keyset, list)],
+                key=lambda item: tuple(item),
+            )
+        if "business_keys" in cloned and isinstance(cloned["business_keys"], list):
+            cloned["business_keys"] = sorted(
+                [sorted(keyset) for keyset in cloned["business_keys"] if isinstance(keyset, list)],
+                key=lambda item: tuple(item),
+            )
+        sorted_entities.append(cloned)
+    return sorted(sorted_entities, key=lambda item: item.get("name", ""))
+def _sort_relationships(relationships: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return sorted(
+        relationships,
+        key=lambda item: (
+            item.get("name", ""),
+            item.get("from", ""),
+            item.get("to", ""),
+            item.get("cardinality", ""),
+        ),
+    )
+def _sort_rules(rules: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return sorted(rules, key=lambda item: (item.get("name", ""), item.get("target", "")))
+def _sort_indexes(indexes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return sorted(
+        deepcopy(indexes),
+        key=lambda item: (item.get("name", ""), item.get("entity", "")),
+    )
+def _sort_glossary(glossary: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_terms = []
+    for term in glossary:
+        cloned = deepcopy(term)
+        if "related_fields" in cloned and isinstance(cloned["related_fields"], list):
+            cloned["related_fields"] = sorted(cloned["related_fields"])
+        if "tags" in cloned and isinstance(cloned["tags"], list):
+            cloned["tags"] = sorted(cloned["tags"])
+        sorted_terms.append(cloned)
+    return sorted(sorted_terms, key=lambda item: item.get("term", ""))
+def _sort_metrics(metrics: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_metrics = []
+    for metric in metrics:
+        cloned = deepcopy(metric)
+        if "grain" in cloned and isinstance(cloned["grain"], list):
+            cloned["grain"] = sorted(cloned["grain"])
+        if "dimensions" in cloned and isinstance(cloned["dimensions"], list):
+            cloned["dimensions"] = sorted(cloned["dimensions"])
+        if "tags" in cloned and isinstance(cloned["tags"], list):
+            cloned["tags"] = sorted(cloned["tags"])
+        sorted_metrics.append(cloned)
+    return sorted(sorted_metrics, key=lambda item: item.get("name", ""))
+def _sort_domains(domains: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_domains = []
+    for domain in domains:
+        cloned = deepcopy(domain)
+        if "tags" in cloned and isinstance(cloned["tags"], list):
+            cloned["tags"] = sorted(cloned["tags"])
+        if "examples" in cloned and isinstance(cloned["examples"], list):
+            cloned["examples"] = sorted(cloned["examples"], key=lambda item: str(item))
+        sorted_domains.append(cloned)
+    return sorted(sorted_domains, key=lambda item: item.get("name", ""))
+def _sort_enums(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_enums = []
+    for item in items:
+        cloned = deepcopy(item)
+        if "values" in cloned and isinstance(cloned["values"], list):
+            cloned["values"] = sorted(cloned["values"])
+        sorted_enums.append(cloned)
+    return sorted(sorted_enums, key=lambda item: item.get("name", ""))
+def _sort_templates(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    sorted_templates = []
+    for item in items:
+        cloned = deepcopy(item)
+        cloned["fields"] = _sort_fields(cloned.get("fields", []))
+        if "tags" in cloned and isinstance(cloned["tags"], list):
+            cloned["tags"] = sorted(cloned["tags"])
+        sorted_templates.append(cloned)
+    return sorted(sorted_templates, key=lambda item: item.get("name", ""))
+def _sort_subject_areas(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return sorted(deepcopy(items), key=lambda item: item.get("name", ""))
+def compile_model(model: Dict[str, Any]) -> Dict[str, Any]:
+    model = normalize_model(model)
+    canonical: Dict[str, Any] = {
+        "model": deepcopy(model.get("model", {})),
+        "entities": _sort_entities(model.get("entities", [])),
+        "relationships": _sort_relationships(model.get("relationships", [])),
+        "indexes": _sort_indexes(model.get("indexes", [])),
+        "rules": _sort_rules(model.get("rules", [])),
+        "metrics": _sort_metrics(model.get("metrics", [])),
+    }
+    governance = deepcopy(model.get("governance", {}))
+    classification = governance.get("classification")
+    if isinstance(classification, dict):
+        governance["classification"] = {
+            key: classification[key] for key in sorted(classification.keys())
+        }
+    stewards = governance.get("stewards")
+    if isinstance(stewards, dict):
+        governance["stewards"] = {key: stewards[key] for key in sorted(stewards.keys())}
+    canonical["governance"] = governance
+    canonical["glossary"] = _sort_glossary(model.get("glossary", []))
+    canonical["domains"] = _sort_domains(model.get("domains", []))
+    canonical["enums"] = _sort_enums(model.get("enums", []))
+    canonical["templates"] = _sort_templates(model.get("templates", []))
+    canonical["subject_areas"] = _sort_subject_areas(model.get("subject_areas", []))
+    canonical["naming_rules"] = deepcopy(model.get("naming_rules", {}))
+    canonical["display"] = deepcopy(model.get("display", {}))
+    owners = canonical["model"].get("owners")
+    if isinstance(owners, list):
+        canonical["model"]["owners"] = sorted(owners)
+    return canonical

datalex_core/completion.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""Shell completion generators for bash, zsh, and fish."""
+from typing import List
+_COMMANDS = [
+    "init", "validate", "lint", "compile", "diff", "validate-all",
+    "gate", "policy-check", "generate", "import", "resolve",
+    "resolve-project", "diff-all", "transform", "standards", "sync",
+    "fmt", "stats", "print-schema", "print-policy-schema", "doctor",
+    "migrate", "apply", "watch",
+]
+_GENERATE_SUBCOMMANDS = ["sql", "dbt", "metadata", "docs", "changelog"]
+_IMPORT_SUBCOMMANDS = ["sql", "dbml", "json-schema", "dbt", "avro"]
+_TRANSFORM_SUBCOMMANDS = ["conceptual-to-logical", "logical-to-physical"]
+_STANDARDS_SUBCOMMANDS = ["check", "fix"]
+_SYNC_SUBCOMMANDS = ["compare", "merge", "pull"]
+_DIALECTS = ["postgres", "snowflake", "bigquery", "databricks"]
+def generate_bash_completion() -> str:
+    cmds = " ".join(_COMMANDS)
+    gen_subs = " ".join(_GENERATE_SUBCOMMANDS)
+    imp_subs = " ".join(_IMPORT_SUBCOMMANDS)
+    transform_subs = " ".join(_TRANSFORM_SUBCOMMANDS)
+    standards_subs = " ".join(_STANDARDS_SUBCOMMANDS)
+    sync_subs = " ".join(_SYNC_SUBCOMMANDS)
+    dialects = " ".join(_DIALECTS)
+    return f'''# bash completion for dm (DataLex CLI)
+# Add to ~/.bashrc: eval "$(datalex completion bash)"
+_dm_completions() {{
+    local cur prev commands
+    COMPREPLY=()
+    cur="${{COMP_WORDS[COMP_CWORD]}}"
+    prev="${{COMP_WORDS[COMP_CWORD-1]}}"
+    commands="{cmds}"
+    case "${{COMP_WORDS[1]}}" in
+        generate)
+            if [[ $COMP_CWORD -eq 2 ]]; then
+                COMPREPLY=( $(compgen -W "{gen_subs}" -- "$cur") )
+                return 0
+            fi
+            ;;
+        import)
+            if [[ $COMP_CWORD -eq 2 ]]; then
+                COMPREPLY=( $(compgen -W "{imp_subs}" -- "$cur") )
+                return 0
+            fi
+            ;;
+        transform)
+            if [[ $COMP_CWORD -eq 2 ]]; then
+                COMPREPLY=( $(compgen -W "{transform_subs}" -- "$cur") )
+                return 0
+            fi
+            ;;
+        standards)
+            if [[ $COMP_CWORD -eq 2 ]]; then
+                COMPREPLY=( $(compgen -W "{standards_subs}" -- "$cur") )
+                return 0
+            fi
+            ;;
+        sync)
+            if [[ $COMP_CWORD -eq 2 ]]; then
+                COMPREPLY=( $(compgen -W "{sync_subs}" -- "$cur") )
+                return 0
+            fi
+            ;;
+    esac
+    case "$prev" in
+        --dialect)
+            COMPREPLY=( $(compgen -W "{dialects}" -- "$cur") )
+            return 0
+            ;;
+        --format)
+            COMPREPLY=( $(compgen -W "json yaml table" -- "$cur") )
+            return 0
+            ;;
+        --policy)
+            COMPREPLY=( $(compgen -f -X '!*.policy.yaml' -- "$cur") )
+            return 0
+            ;;
+        --schema)
+            COMPREPLY=( $(compgen -f -X '!*.json' -- "$cur") )
+            return 0
+            ;;
+    esac
+    if [[ $COMP_CWORD -eq 1 ]]; then
+        COMPREPLY=( $(compgen -W "$commands" -- "$cur") )
+        return 0
+    fi
+    COMPREPLY=( $(compgen -f -- "$cur") )
+    return 0
+}}
+complete -F _dm_completions dm
+'''
+def generate_zsh_completion() -> str:
+    cmds_list = "\n            ".join([f"'{c}:{c} command'" for c in _COMMANDS])
+    gen_subs = " ".join(_GENERATE_SUBCOMMANDS)
+    imp_subs = " ".join(_IMPORT_SUBCOMMANDS)
+    transform_subs = " ".join(_TRANSFORM_SUBCOMMANDS)
+    standards_subs = " ".join(_STANDARDS_SUBCOMMANDS)
+    sync_subs = " ".join(_SYNC_SUBCOMMANDS)
+    dialects = " ".join(_DIALECTS)
+    return f'''#compdef dm
+# zsh completion for dm (DataLex CLI)
+# Add to ~/.zshrc: eval "$(datalex completion zsh)"
+_dm() {{
+    local -a commands
+    commands=(
+            {cmds_list}
+    )
+    _arguments -C \\
+        '1:command:->command' \\
+        '*::arg:->args'
+    case $state in
+        command)
+            _describe 'dm commands' commands
+            ;;
+        args)
+            case $words[1] in
+                generate)
+                    _values 'subcommand' {gen_subs}
+                    ;;
+                import)
+                    _values 'subcommand' {imp_subs}
+                    ;;
+                transform)
+                    _values 'subcommand' {transform_subs}
+                    ;;
+                standards)
+                    _values 'subcommand' {standards_subs}
+                    ;;
+                sync)
+                    _values 'subcommand' {sync_subs}
+                    ;;
+                *)
+                    case $words[-2] in
+                        --dialect)
+                            _values 'dialect' {dialects}
+                            ;;
+                        --format)
+                            _values 'format' json yaml table
+                            ;;
+                        --policy)
+                            _files -g '*.policy.yaml'
+                            ;;
+                        --schema)
+                            _files -g '*.json'
+                            ;;
+                        *)
+                            _files
+                            ;;
+                    esac
+                    ;;
+            esac
+            ;;
+    esac
+}}
+_dm "$@"
+'''
+def generate_fish_completion() -> str:
+    lines = [
+        "# fish completion for dm (DataLex CLI)",
+        "# Add to ~/.config/fish/completions/dm.fish",
+        "",
+    ]
+    for cmd in _COMMANDS:
+        lines.append(f"complete -c dm -n '__fish_use_subcommand' -a '{cmd}' -d '{cmd} command'")
+    lines.append("")
+    for sub in _GENERATE_SUBCOMMANDS:
+        lines.append(f"complete -c dm -n '__fish_seen_subcommand_from generate' -a '{sub}'")
+    for sub in _IMPORT_SUBCOMMANDS:
+        lines.append(f"complete -c dm -n '__fish_seen_subcommand_from import' -a '{sub}'")
+    for sub in _TRANSFORM_SUBCOMMANDS:
+        lines.append(f"complete -c dm -n '__fish_seen_subcommand_from transform' -a '{sub}'")
+    for sub in _STANDARDS_SUBCOMMANDS:
+        lines.append(f"complete -c dm -n '__fish_seen_subcommand_from standards' -a '{sub}'")
+    for sub in _SYNC_SUBCOMMANDS:
+        lines.append(f"complete -c dm -n '__fish_seen_subcommand_from sync' -a '{sub}'")
+    lines.append("")
+    for d in _DIALECTS:
+        lines.append(f"complete -c dm -l dialect -a '{d}'")
+    lines.append("complete -c dm -l format -a 'json yaml table'")
+    return "\n".join(lines) + "\n"

datalex_core/connectors/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Database connectors for pulling schema from live databases.
+Each connector implements the same interface:
+  pull_schema(connection_string, schema=None, tables=None, **kwargs) -> Dict[str, Any]
+Returns a DataLex model dict ready for use.
+"""
+from datalex_core.connectors.base import (
+    BaseConnector,
+    ConnectorConfig,
+    ConnectorResult,
+    get_connector,
+    list_connectors,
+)
+from datalex_core.connectors.postgres import PostgresConnector
+from datalex_core.connectors.mysql import MySQLConnector
+from datalex_core.connectors.snowflake import SnowflakeConnector
+from datalex_core.connectors.bigquery import BigQueryConnector
+from datalex_core.connectors.databricks import DatabricksConnector
+from datalex_core.connectors.sqlserver import SQLServerConnector, AzureSQLConnector, AzureFabricConnector
+from datalex_core.connectors.redshift import RedshiftConnector
+__all__ = [
+    "BaseConnector",
+    "BigQueryConnector",
+    "ConnectorConfig",
+    "ConnectorResult",
+    "DatabricksConnector",
+    "MySQLConnector",
+    "PostgresConnector",
+    "SnowflakeConnector",
+    "SQLServerConnector",
+    "AzureSQLConnector",
+    "AzureFabricConnector",
+    "RedshiftConnector",
+    "get_connector",
+    "list_connectors",
+]