PyPI - datalex-cli - Versions diffs - 0.1.1__py3-none-any.whl - Mend

datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

datalex_cli/__init__.py +1 -0
datalex_cli/datalex_cli.py +658 -0
datalex_cli/main.py +2925 -0
datalex_cli-0.1.1.dist-info/METADATA +228 -0
datalex_cli-0.1.1.dist-info/RECORD +64 -0
datalex_cli-0.1.1.dist-info/WHEEL +5 -0
datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
datalex_core/__init__.py +94 -0
datalex_core/_schemas/datalex/common.schema.json +127 -0
datalex_core/_schemas/datalex/domain.schema.json +24 -0
datalex_core/_schemas/datalex/entity.schema.json +158 -0
datalex_core/_schemas/datalex/model.schema.json +141 -0
datalex_core/_schemas/datalex/policy.schema.json +70 -0
datalex_core/_schemas/datalex/project.schema.json +82 -0
datalex_core/_schemas/datalex/snippet.schema.json +24 -0
datalex_core/_schemas/datalex/source.schema.json +104 -0
datalex_core/_schemas/datalex/term.schema.json +30 -0
datalex_core/canonical.py +166 -0
datalex_core/completion.py +204 -0
datalex_core/connectors/__init__.py +39 -0
datalex_core/connectors/base.py +417 -0
datalex_core/connectors/bigquery.py +229 -0
datalex_core/connectors/databricks.py +262 -0
datalex_core/connectors/mysql.py +266 -0
datalex_core/connectors/postgres.py +309 -0
datalex_core/connectors/redshift.py +298 -0
datalex_core/connectors/snowflake.py +336 -0
datalex_core/connectors/sqlserver.py +425 -0
datalex_core/datalex/__init__.py +26 -0
datalex_core/datalex/diff.py +188 -0
datalex_core/datalex/errors.py +85 -0
datalex_core/datalex/loader.py +512 -0
datalex_core/datalex/migrate_layout.py +382 -0
datalex_core/datalex/parse_cache.py +102 -0
datalex_core/datalex/project.py +214 -0
datalex_core/datalex/types.py +224 -0
datalex_core/dbt/__init__.py +18 -0
datalex_core/dbt/emit.py +344 -0
datalex_core/dbt/manifest.py +329 -0
datalex_core/dbt/profiles.py +185 -0
datalex_core/dbt/sync.py +279 -0
datalex_core/dbt/warehouse.py +215 -0
datalex_core/dialects/__init__.py +15 -0
datalex_core/dialects/_common.py +48 -0
datalex_core/dialects/base.py +47 -0
datalex_core/dialects/postgres.py +164 -0
datalex_core/dialects/registry.py +36 -0
datalex_core/dialects/snowflake.py +129 -0
datalex_core/diffing.py +358 -0
datalex_core/docs_generator.py +797 -0
datalex_core/doctor.py +181 -0
datalex_core/generators.py +478 -0
datalex_core/importers.py +1176 -0
datalex_core/issues.py +23 -0
datalex_core/loader.py +21 -0
datalex_core/migrate.py +316 -0
datalex_core/modeling.py +679 -0
datalex_core/packages.py +430 -0
datalex_core/policy.py +1037 -0
datalex_core/resolver.py +456 -0
datalex_core/schema.py +54 -0
datalex_core/semantic.py +1561 -0

datalex_core/connectors/postgres.py ADDED Viewed

@@ -0,0 +1,309 @@
+"""PostgreSQL connector — pulls schema from information_schema."""
+from __future__ import annotations
+from datetime import date
+from typing import Any, Dict, List, Tuple
+from datalex_core.connectors.base import BaseConnector, ConnectorConfig, ConnectorResult
+_PG_TYPE_MAP = {
+    "integer": "integer",
+    "bigint": "bigint",
+    "smallint": "smallint",
+    "serial": "integer",
+    "bigserial": "bigint",
+    "numeric": "decimal",
+    "real": "float",
+    "double precision": "float",
+    "boolean": "boolean",
+    "character varying": "string",
+    "varchar": "string",
+    "character": "string",
+    "char": "string",
+    "text": "text",
+    "date": "date",
+    "timestamp without time zone": "timestamp",
+    "timestamp with time zone": "timestamp",
+    "time without time zone": "time",
+    "time with time zone": "time",
+    "uuid": "uuid",
+    "json": "json",
+    "jsonb": "json",
+    "bytea": "binary",
+    "inet": "string",
+    "cidr": "string",
+    "macaddr": "string",
+    "interval": "string",
+    "array": "json",
+    "xml": "string",
+    "money": "decimal",
+    "bit": "string",
+    "bit varying": "string",
+    "point": "string",
+    "line": "string",
+    "polygon": "string",
+    "tsvector": "string",
+    "tsquery": "string",
+}
+class PostgresConnector(BaseConnector):
+    connector_type = "postgres"
+    display_name = "PostgreSQL"
+    required_package = "psycopg2"
+    def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
+        try:
+            import psycopg2
+            conn = psycopg2.connect(
+                host=config.host,
+                port=config.port or 5432,
+                dbname=config.database,
+                user=config.user,
+                password=config.password,
+            )
+            conn.close()
+            return True, "Connection successful"
+        except ImportError:
+            return False, "psycopg2 not installed. Run: pip install psycopg2-binary"
+        except Exception as e:
+            return False, f"Connection failed: {e}"
+    def _connect(self, config: ConnectorConfig):
+        import psycopg2
+        return psycopg2.connect(
+            host=config.host,
+            port=config.port or 5432,
+            dbname=config.database,
+            user=config.user,
+            password=config.password,
+        )
+    def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
+        conn = self._connect(config)
+        try:
+            cur = conn.cursor()
+            cur.execute("""
+                SELECT s.schema_name,
+                       COUNT(t.table_name) AS table_count
+                FROM information_schema.schemata s
+                LEFT JOIN information_schema.tables t
+                  ON t.table_schema = s.schema_name
+                  AND t.table_type IN ('BASE TABLE', 'VIEW')
+                WHERE s.schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast')
+                GROUP BY s.schema_name
+                ORDER BY s.schema_name
+            """)
+            return [{"name": row[0], "table_count": row[1]} for row in cur.fetchall()]
+        finally:
+            conn.close()
+    def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
+        schema = config.schema or "public"
+        conn = self._connect(config)
+        try:
+            cur = conn.cursor()
+            cur.execute("""
+                SELECT t.table_name, t.table_type,
+                       (SELECT COUNT(*) FROM information_schema.columns c
+                        WHERE c.table_schema = t.table_schema AND c.table_name = t.table_name) AS col_count
+                FROM information_schema.tables t
+                WHERE t.table_schema = %s
+                  AND t.table_type IN ('BASE TABLE', 'VIEW')
+                ORDER BY t.table_name
+            """, (schema,))
+            results = []
+            for row in cur.fetchall():
+                ttype = "view" if "VIEW" in row[1] else "table"
+                results.append({"name": row[0], "type": ttype, "column_count": row[2], "row_count": None})
+            return results
+        finally:
+            conn.close()
+    def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
+        conn = self._connect(config)
+        try:
+            return self._pull(conn, config)
+        finally:
+            conn.close()
+    def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
+        model = self._build_model(config)
+        schema_filter = config.schema or "public"
+        cur = conn.cursor()
+        warnings: List[str] = []
+        # --- Tables ---
+        cur.execute("""
+            SELECT table_name, table_type
+            FROM information_schema.tables
+            WHERE table_schema = %s
+              AND table_type IN ('BASE TABLE', 'VIEW')
+            ORDER BY table_name
+        """, (schema_filter,))
+        tables = cur.fetchall()
+        table_entities: Dict[str, Dict[str, Any]] = {}
+        for table_name, table_type in tables:
+            if not self._should_include_table(table_name, config):
+                continue
+            entity_name = self._entity_name(table_name)
+            entity_type = "view" if table_type == "VIEW" else "table"
+            table_entities[table_name] = {
+                "name": entity_name,
+                "physical_name": table_name,
+                "type": entity_type,
+                "description": f"Pulled from PostgreSQL {config.database}.{schema_filter}.{table_name} on {date.today().isoformat()}",
+                "fields": [],
+            }
+            if schema_filter != "public":
+                table_entities[table_name]["schema"] = schema_filter
+        # --- Columns ---
+        cur.execute("""
+            SELECT table_name, column_name, data_type, is_nullable,
+                   column_default, character_maximum_length,
+                   numeric_precision, numeric_scale, udt_name
+            FROM information_schema.columns
+            WHERE table_schema = %s
+            ORDER BY table_name, ordinal_position
+        """, (schema_filter,))
+        columns = cur.fetchall()
+        total_columns = 0
+        for row in columns:
+            tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale, udt_name = row
+            if tname not in table_entities:
+                continue
+            dl_type = _PG_TYPE_MAP.get(data_type, "string")
+            if data_type == "numeric" and num_prec:
+                dl_type = f"decimal({num_prec},{num_scale or 0})"
+            if data_type in ("character varying", "varchar") and char_max_len:
+                dl_type = f"varchar({char_max_len})"
+            if data_type == "USER-DEFINED":
+                dl_type = udt_name or "string"
+            field: Dict[str, Any] = {
+                "name": col_name,
+                "type": dl_type,
+                "nullable": is_nullable == "YES",
+            }
+            if col_default is not None:
+                cleaned = str(col_default).split("::")[0].strip("'")
+                if not cleaned.startswith("nextval("):
+                    field["default"] = cleaned
+            table_entities[tname]["fields"].append(field)
+            total_columns += 1
+        # --- Primary keys ---
+        cur.execute("""
+            SELECT tc.table_name, kcu.column_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+              AND tc.table_schema = kcu.table_schema
+            WHERE tc.constraint_type = 'PRIMARY KEY'
+              AND tc.table_schema = %s
+        """, (schema_filter,))
+        for tname, col_name in cur.fetchall():
+            if tname in table_entities:
+                for f in table_entities[tname]["fields"]:
+                    if f["name"] == col_name:
+                        f["primary_key"] = True
+                        f["nullable"] = False
+        # --- Unique constraints ---
+        cur.execute("""
+            SELECT tc.table_name, kcu.column_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+              AND tc.table_schema = kcu.table_schema
+            WHERE tc.constraint_type = 'UNIQUE'
+              AND tc.table_schema = %s
+        """, (schema_filter,))
+        for tname, col_name in cur.fetchall():
+            if tname in table_entities:
+                for f in table_entities[tname]["fields"]:
+                    if f["name"] == col_name:
+                        f["unique"] = True
+        # --- Foreign keys ---
+        cur.execute("""
+            SELECT
+                kcu.table_name AS child_table,
+                kcu.column_name AS child_column,
+                ccu.table_name AS parent_table,
+                ccu.column_name AS parent_column,
+                tc.constraint_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+              AND tc.table_schema = kcu.table_schema
+            JOIN information_schema.constraint_column_usage ccu
+              ON tc.constraint_name = ccu.constraint_name
+              AND tc.table_schema = ccu.table_schema
+            WHERE tc.constraint_type = 'FOREIGN KEY'
+              AND tc.table_schema = %s
+        """, (schema_filter,))
+        fk_rows = cur.fetchall()
+        relationships: List[Dict[str, Any]] = []
+        for child_table, child_col, parent_table, parent_col, constraint_name in fk_rows:
+            if child_table in table_entities:
+                for f in table_entities[child_table]["fields"]:
+                    if f["name"] == child_col:
+                        f["foreign_key"] = True
+                parent_entity = self._entity_name(parent_table)
+                child_entity = self._entity_name(child_table)
+                relationships.append({
+                    "name": constraint_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
+                    "from": f"{parent_entity}.{parent_col}",
+                    "to": f"{child_entity}.{child_col}",
+                    "cardinality": "one_to_many",
+                })
+        # --- Indexes ---
+        cur.execute("""
+            SELECT indexname, tablename, indexdef
+            FROM pg_indexes
+            WHERE schemaname = %s
+            ORDER BY tablename, indexname
+        """, (schema_filter,))
+        indexes: List[Dict[str, Any]] = []
+        for idx_name, tname, idx_def in cur.fetchall():
+            if tname not in table_entities:
+                continue
+            if "_pkey" in idx_name:
+                continue
+            is_unique = "UNIQUE" in (idx_def or "").upper()
+            import re
+            cols_match = re.search(r"\(([^)]+)\)", idx_def or "")
+            cols = []
+            if cols_match:
+                cols = [c.strip().split()[0] for c in cols_match.group(1).split(",")]
+            entity_name = self._entity_name(tname)
+            indexes.append({
+                "name": idx_name,
+                "entity": entity_name,
+                "fields": cols,
+                "unique": is_unique,
+            })
+        model["entities"] = list(table_entities.values())
+        model["relationships"] = relationships
+        model["indexes"] = indexes
+        cur.close()
+        return ConnectorResult(
+            model=model,
+            tables_found=len(table_entities),
+            columns_found=total_columns,
+            relationships_found=len(relationships),
+            indexes_found=len(indexes),
+            warnings=warnings,
+        )

datalex_core/connectors/redshift.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""Amazon Redshift connector — pulls schema from information_schema with inference fallback."""
+from __future__ import annotations
+from datetime import date
+from typing import Any, Dict, List, Tuple
+from datalex_core.connectors.base import (
+    BaseConnector,
+    ConnectorConfig,
+    ConnectorResult,
+    infer_primary_keys,
+    infer_relationships,
+)
+_RS_TYPE_MAP = {
+    "smallint": "smallint",
+    "integer": "integer",
+    "bigint": "bigint",
+    "decimal": "decimal",
+    "numeric": "decimal",
+    "real": "float",
+    "double precision": "float",
+    "boolean": "boolean",
+    "character varying": "string",
+    "varchar": "string",
+    "character": "string",
+    "char": "string",
+    "text": "text",
+    "date": "date",
+    "timestamp without time zone": "timestamp",
+    "timestamp with time zone": "timestamp",
+    "time without time zone": "time",
+    "time with time zone": "time",
+    "super": "json",
+    "varbyte": "binary",
+    "binary varying": "binary",
+    "geometry": "string",
+    "geography": "string",
+    "hllsketch": "string",
+}
+class RedshiftConnector(BaseConnector):
+    connector_type = "redshift"
+    display_name = "Amazon Redshift"
+    required_package = "redshift_connector"
+    def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
+        try:
+            conn = self._connect(config)
+            conn.close()
+            return True, "Connection successful"
+        except ImportError:
+            return False, "redshift-connector not installed. Run: pip install redshift-connector"
+        except Exception as e:
+            return False, f"Connection failed: {e}"
+    def _connect(self, config: ConnectorConfig):
+        import redshift_connector
+        return redshift_connector.connect(
+            host=config.host,
+            port=config.port or 5439,
+            database=config.database,
+            user=config.user,
+            password=config.password,
+            timeout=10,
+        )
+    def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
+        conn = self._connect(config)
+        try:
+            cur = conn.cursor()
+            cur.execute(
+                """
+                SELECT n.nspname AS schema_name,
+                       COUNT(t.table_name) AS table_count
+                FROM pg_namespace n
+                LEFT JOIN information_schema.tables t
+                  ON t.table_schema = n.nspname
+                 AND t.table_type IN ('BASE TABLE', 'VIEW')
+                WHERE n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_internal')
+                  AND n.nspname NOT LIKE 'pg_temp_%'
+                GROUP BY n.nspname
+                ORDER BY n.nspname
+                """
+            )
+            return [{"name": row[0], "table_count": int(row[1] or 0)} for row in cur.fetchall()]
+        finally:
+            conn.close()
+    def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
+        schema = config.schema or "public"
+        conn = self._connect(config)
+        try:
+            cur = conn.cursor()
+            cur.execute(
+                """
+                SELECT t.table_name, t.table_type,
+                       (
+                         SELECT COUNT(*)
+                         FROM information_schema.columns c
+                         WHERE c.table_schema = t.table_schema
+                           AND c.table_name = t.table_name
+                       ) AS col_count
+                FROM information_schema.tables t
+                WHERE t.table_schema = %s
+                  AND t.table_type IN ('BASE TABLE', 'VIEW')
+                ORDER BY t.table_name
+                """,
+                (schema,),
+            )
+            results = []
+            for row in cur.fetchall():
+                ttype = "view" if "VIEW" in str(row[1]).upper() else "table"
+                results.append({
+                    "name": row[0],
+                    "type": ttype,
+                    "column_count": int(row[2] or 0),
+                    "row_count": None,
+                })
+            return results
+        finally:
+            conn.close()
+    def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
+        conn = self._connect(config)
+        try:
+            return self._pull(conn, config)
+        finally:
+            conn.close()
+    def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
+        model = self._build_model(config)
+        schema_filter = config.schema or "public"
+        cur = conn.cursor()
+        warnings: List[str] = []
+        cur.execute(
+            """
+            SELECT table_name, table_type
+            FROM information_schema.tables
+            WHERE table_schema = %s
+              AND table_type IN ('BASE TABLE', 'VIEW')
+            ORDER BY table_name
+            """,
+            (schema_filter,),
+        )
+        tables = cur.fetchall()
+        table_entities: Dict[str, Dict[str, Any]] = {}
+        for table_name, table_type in tables:
+            if not self._should_include_table(table_name, config):
+                continue
+            entity_name = self._entity_name(table_name)
+            entity_type = "view" if str(table_type).upper() == "VIEW" else "table"
+            table_entities[table_name] = {
+                "name": entity_name,
+                "physical_name": table_name,
+                "type": entity_type,
+                "description": f"Pulled from Redshift {config.database}.{schema_filter}.{table_name} on {date.today().isoformat()}",
+                "fields": [],
+            }
+            if schema_filter != "public":
+                table_entities[table_name]["schema"] = schema_filter
+        cur.execute(
+            """
+            SELECT table_name, column_name, data_type, is_nullable,
+                   column_default, character_maximum_length,
+                   numeric_precision, numeric_scale
+            FROM information_schema.columns
+            WHERE table_schema = %s
+            ORDER BY table_name, ordinal_position
+            """,
+            (schema_filter,),
+        )
+        total_columns = 0
+        for row in cur.fetchall():
+            tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale = row
+            if tname not in table_entities:
+                continue
+            dl_type = _RS_TYPE_MAP.get((data_type or "").lower(), "string")
+            if str(data_type).lower() in ("decimal", "numeric") and num_prec:
+                dl_type = f"decimal({int(num_prec)},{int(num_scale or 0)})"
+            if str(data_type).lower() in ("character varying", "varchar") and char_max_len:
+                try:
+                    dl_type = f"varchar({int(char_max_len)})"
+                except Exception:
+                    dl_type = "string"
+            field: Dict[str, Any] = {
+                "name": col_name,
+                "type": dl_type,
+                "nullable": str(is_nullable).upper() == "YES",
+            }
+            if col_default is not None:
+                cleaned = str(col_default).split("::")[0].strip("'")
+                if cleaned:
+                    field["default"] = cleaned
+            table_entities[tname]["fields"].append(field)
+            total_columns += 1
+        cur.execute(
+            """
+            SELECT tc.table_name, kcu.column_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+             AND tc.table_schema = kcu.table_schema
+            WHERE tc.constraint_type = 'PRIMARY KEY'
+              AND tc.table_schema = %s
+            """,
+            (schema_filter,),
+        )
+        for tname, col_name in cur.fetchall():
+            if tname in table_entities:
+                for f in table_entities[tname]["fields"]:
+                    if f["name"] == col_name:
+                        f["primary_key"] = True
+                        f["nullable"] = False
+        cur.execute(
+            """
+            SELECT
+                kcu.table_name AS child_table,
+                kcu.column_name AS child_column,
+                ccu.table_name AS parent_table,
+                ccu.column_name AS parent_column,
+                tc.constraint_name
+            FROM information_schema.table_constraints tc
+            JOIN information_schema.key_column_usage kcu
+              ON tc.constraint_name = kcu.constraint_name
+             AND tc.table_schema = kcu.table_schema
+            JOIN information_schema.constraint_column_usage ccu
+              ON tc.constraint_name = ccu.constraint_name
+             AND tc.table_schema = ccu.table_schema
+            WHERE tc.constraint_type = 'FOREIGN KEY'
+              AND tc.table_schema = %s
+            """,
+            (schema_filter,),
+        )
+        fk_rows = cur.fetchall()
+        relationships: List[Dict[str, Any]] = []
+        for child_table, child_col, parent_table, parent_col, constraint_name in fk_rows:
+            if child_table in table_entities:
+                for f in table_entities[child_table]["fields"]:
+                    if f["name"] == child_col:
+                        f["foreign_key"] = True
+                parent_entity = self._entity_name(parent_table)
+                child_entity = self._entity_name(child_table)
+                relationships.append(
+                    {
+                        "name": constraint_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
+                        "from": f"{parent_entity}.{parent_col}",
+                        "to": f"{child_entity}.{child_col}",
+                        "cardinality": "one_to_many",
+                    }
+                )
+        entities_list = list(table_entities.values())
+        has_any_pk = any(
+            f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
+        )
+        if not has_any_pk:
+            entities_list, pk_msgs = infer_primary_keys(entities_list)
+            warnings.extend(pk_msgs)
+        if not relationships:
+            inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
+            relationships.extend(inferred_rels)
+            warnings.extend(fk_msgs)
+            if inferred_rels:
+                warnings.insert(
+                    0,
+                    f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.",
+                )
+        model["entities"] = entities_list
+        model["relationships"] = relationships
+        model["indexes"] = []
+        cur.close()
+        return ConnectorResult(
+            model=model,
+            tables_found=len(table_entities),
+            columns_found=total_columns,
+            relationships_found=len(relationships),
+            indexes_found=0,
+            warnings=warnings,
+        )