PyPI - clearmetric-core - Versions diffs - 0.2.0__py3-none-any.whl - Mend

clearmetric-core 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

clearmetric/cli/__init__.py +157 -0
clearmetric/cli/__main__.py +8 -0
clearmetric/core/__init__.py +78 -0
clearmetric/core/_version.py +3 -0
clearmetric/core/aliases.py +91 -0
clearmetric/core/errors.py +19 -0
clearmetric/core/ids.py +172 -0
clearmetric/core/interop.py +126 -0
clearmetric/core/merge.py +147 -0
clearmetric/core/models.py +68 -0
clearmetric/core/serialize.py +10 -0
clearmetric/lineage/__init__.py +36 -0
clearmetric/lineage/_version.py +5 -0
clearmetric/lineage/api.py +86 -0
clearmetric/lineage/build.py +961 -0
clearmetric/lineage/coverage.py +198 -0
clearmetric/lineage/errors.py +15 -0
clearmetric/lineage/graph.py +143 -0
clearmetric/lineage/loaders.py +249 -0
clearmetric/lineage/models.py +34 -0
clearmetric/lineage/render/json.py +10 -0
clearmetric/lineage/render/mermaid.py +40 -0
clearmetric/lineage/render/text.py +108 -0
clearmetric/lineage/sql_analyzer.py +409 -0
clearmetric/powerbi/__init__.py +28 -0
clearmetric/powerbi/_version.py +5 -0
clearmetric/powerbi/api.py +55 -0
clearmetric/powerbi/build.py +369 -0
clearmetric/powerbi/discovery.py +109 -0
clearmetric/powerbi/errors.py +20 -0
clearmetric/powerbi/m_parser.py +242 -0
clearmetric/powerbi/models.py +63 -0
clearmetric/powerbi/native_sql.py +35 -0
clearmetric/powerbi/render/json.py +15 -0
clearmetric/powerbi/render/text.py +31 -0
clearmetric/powerbi/report_parser.py +245 -0
clearmetric/powerbi/tmdl.py +59 -0
clearmetric/query/__init__.py +39 -0
clearmetric/query/_version.py +5 -0
clearmetric/query/api.py +39 -0
clearmetric/query/ast_utils.py +81 -0
clearmetric/query/build.py +263 -0
clearmetric/query/ctes.py +127 -0
clearmetric/query/errors.py +15 -0
clearmetric/query/models.py +93 -0
clearmetric/query/parser.py +67 -0
clearmetric/query/relations.py +229 -0
clearmetric/query/render/__init__.py +1 -0
clearmetric/query/render/json.py +10 -0
clearmetric/query/render/text.py +42 -0
clearmetric_core-0.2.0.dist-info/METADATA +91 -0
clearmetric_core-0.2.0.dist-info/RECORD +55 -0
clearmetric_core-0.2.0.dist-info/WHEEL +5 -0
clearmetric_core-0.2.0.dist-info/entry_points.txt +2 -0
clearmetric_core-0.2.0.dist-info/top_level.txt +1 -0

clearmetric/cli/__init__.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""ClearMetric Core CLI — ``cm`` command router."""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from clearmetric.core import __version__, render_json
+from clearmetric.lineage import (
+    build_catalog_artifact,
+    build_lineage_map,
+    trace_downstream,
+    trace_upstream,
+)
+from clearmetric.lineage.errors import LineageError
+from clearmetric.lineage.render.mermaid import render_traversal_mermaid
+from clearmetric.lineage.render.text import render_text, render_traversal_tree
+def _build_root_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="cm",
+        description="ClearMetric Core — local compiler, graph engine, and CLI.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"cm {__version__} (ClearMetric Core)",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    compile_parser = subparsers.add_parser(
+        "compile",
+        help="Compile project input into a catalog graph artifact (JSON).",
+    )
+    compile_parser.add_argument(
+        "project_input",
+        help="Path to a dbt manifest.json file or a folder of UTF-8 .sql files.",
+    )
+    compile_parser.add_argument(
+        "--dialect",
+        required=True,
+        help="sqlglot dialect name, for example postgres, snowflake, tsql, or bigquery.",
+    )
+    compile_parser.add_argument(
+        "--format",
+        choices=("json", "text"),
+        default="json",
+        help="Output format (default: json).",
+    )
+    impact_parser = subparsers.add_parser(
+        "impact",
+        help="Trace upstream or downstream column lineage for one selection.",
+    )
+    impact_parser.add_argument(
+        "selection",
+        help="Dataset column selection, for example orders.amount.",
+    )
+    impact_parser.add_argument(
+        "project_input",
+        help="Path to a dbt manifest.json file or a folder of UTF-8 .sql files.",
+    )
+    impact_parser.add_argument(
+        "--dialect",
+        required=True,
+        help="sqlglot dialect name, for example postgres, snowflake, tsql, or bigquery.",
+    )
+    traversal = impact_parser.add_mutually_exclusive_group(required=True)
+    traversal.add_argument(
+        "--upstream",
+        action="store_true",
+        help="Trace upstream lineage for the selection.",
+    )
+    traversal.add_argument(
+        "--downstream",
+        action="store_true",
+        help="Trace downstream impact for the selection.",
+    )
+    impact_parser.add_argument(
+        "--format",
+        choices=("text", "json", "mermaid"),
+        default="text",
+        help="Output format (default: text).",
+    )
+    return parser
+def _run_compile(args: argparse.Namespace) -> int:
+    if args.format == "json":
+        artifact = build_catalog_artifact(args.project_input, dialect=args.dialect)
+        print(json.dumps(render_json(artifact), indent=2, sort_keys=False))
+    else:
+        lineage_map = build_lineage_map(args.project_input, dialect=args.dialect)
+        print(render_text(lineage_map))
+    return 0
+def _run_impact(args: argparse.Namespace) -> int:
+    direction = "upstream" if args.upstream else "downstream"
+    if direction == "upstream":
+        result = trace_upstream(
+            args.project_input,
+            dialect=args.dialect,
+            selection=args.selection,
+        )
+    else:
+        result = trace_downstream(
+            args.project_input,
+            dialect=args.dialect,
+            selection=args.selection,
+        )
+    if args.format == "json":
+        print(json.dumps(result.model_dump(mode="json"), indent=2, sort_keys=False))
+        return 0
+    artifact = build_catalog_artifact(args.project_input, dialect=args.dialect)
+    if args.format == "mermaid":
+        print(
+            render_traversal_mermaid(
+                result.selection_id,
+                artifact,
+                direction=direction,
+            )
+        )
+        return 0
+    print(
+        render_traversal_tree(
+            result,
+            artifact,
+            direction=direction,
+        )
+    )
+    return 0
+def main(argv: list[str] | None = None) -> int:
+    parser = _build_root_parser()
+    args = parser.parse_args(argv)
+    try:
+        if args.command == "compile":
+            return _run_compile(args)
+        if args.command == "impact":
+            return _run_impact(args)
+    except LineageError as exc:
+        print(f"cm error: {exc}", file=sys.stderr)
+        return 1
+    print(f"cm: unknown command {args.command!r}", file=sys.stderr)
+    return 1
+__all__ = ["main"]

clearmetric/cli/__main__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""``python -m clearmetric.cli`` entry."""
+from __future__ import annotations
+from . import main
+if __name__ == "__main__":
+    raise SystemExit(main())

clearmetric/core/__init__.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Public package surface for clearmetric-core."""
+from __future__ import annotations
+from ._version import __version__
+from .aliases import load_table_alias_map
+from .errors import (
+    AliasMapError,
+    CanonicalIdError,
+    ClearMetricError,
+    MergeConflictError,
+)
+from .ids import (
+    asset_id,
+    column_id,
+    cte_id,
+    leaf_name,
+    measure_id,
+    model_id,
+    normalize_identifier,
+    normalize_identifier_part,
+    normalize_identifier_parts,
+    page_id,
+    report_id,
+    schema_name,
+    split_qualified_identifier,
+    table_id,
+    visual_id,
+)
+from .interop import (
+    AliasMap,
+    apply_alias_map,
+    normalize_fqn_for_matching,
+    resolve_table_match,
+    warehouse_table_fqn_candidates,
+    warehouse_table_fqn_candidates_from_name,
+)
+from .merge import merge
+from .models import CatalogArtifact, Edge, Evidence, MatchStatus, Node, Warning
+from .serialize import render_json
+__all__ = [
+    "__version__",
+    "AliasMap",
+    "AliasMapError",
+    "MatchStatus",
+    "apply_alias_map",
+    "asset_id",
+    "CatalogArtifact",
+    "ClearMetricError",
+    "CanonicalIdError",
+    "column_id",
+    "cte_id",
+    "Edge",
+    "Evidence",
+    "leaf_name",
+    "load_table_alias_map",
+    "measure_id",
+    "merge",
+    "normalize_fqn_for_matching",
+    "MergeConflictError",
+    "model_id",
+    "Node",
+    "page_id",
+    "normalize_identifier",
+    "normalize_identifier_part",
+    "normalize_identifier_parts",
+    "render_json",
+    "report_id",
+    "resolve_table_match",
+    "schema_name",
+    "split_qualified_identifier",
+    "table_id",
+    "visual_id",
+    "warehouse_table_fqn_candidates",
+    "warehouse_table_fqn_candidates_from_name",
+    "Warning",
+]

clearmetric/core/_version.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Package version."""
+__version__ = "0.2.0"

clearmetric/core/aliases.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Load versioned table alias files for cross-graph matching."""
+from __future__ import annotations
+from pathlib import Path
+from .errors import AliasMapError
+from .interop import AliasMap, normalize_fqn_for_matching
+_SUPPORTED_VERSION = "1"
+def load_table_alias_map(path: str | Path) -> AliasMap:
+    """
+    Load a version-1 alias file into an ``AliasMap``.
+    Expected format::
+        version: 1
+        table_aliases:
+          salesmart.dbo.orders: orders
+    """
+    file_path = Path(path).expanduser().resolve()
+    if not file_path.is_file():
+        raise AliasMapError(f"Alias file does not exist: {file_path}")
+    version: str | None = None
+    aliases: AliasMap = {}
+    in_table_aliases = False
+    for line_number, raw_line in enumerate(
+        file_path.read_text(encoding="utf-8").splitlines(), start=1
+    ):
+        line = raw_line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("version:"):
+            version = line.split(":", 1)[1].strip()
+            if version != _SUPPORTED_VERSION:
+                raise AliasMapError(
+                    f"Unsupported alias file version {version!r} at {file_path}:{line_number}; "
+                    f"expected {_SUPPORTED_VERSION!r}."
+                )
+            continue
+        if line == "table_aliases:":
+            in_table_aliases = True
+            continue
+        if not in_table_aliases:
+            raise AliasMapError(
+                f"Unexpected content at {file_path}:{line_number}; "
+                "expected 'version:' and 'table_aliases:' sections."
+            )
+        if ":" not in line:
+            raise AliasMapError(
+                f"Invalid alias entry at {file_path}:{line_number}; "
+                "expected 'source: target' form."
+            )
+        source, target = line.split(":", 1)
+        source = source.strip()
+        target = target.strip()
+        if not source or not target:
+            raise AliasMapError(
+                f"Invalid alias entry at {file_path}:{line_number}; "
+                "source and target must be non-empty."
+            )
+        normalized_source = normalize_fqn_for_matching(source)
+        normalized_target = normalize_fqn_for_matching(target)
+        if (
+            normalized_source in aliases
+            and aliases[normalized_source] != normalized_target
+        ):
+            raise AliasMapError(
+                f"Duplicate alias key {normalized_source!r} with conflicting targets "
+                f"at {file_path}:{line_number}."
+            )
+        aliases[normalized_source] = normalized_target
+    if version is None:
+        raise AliasMapError(f"Missing 'version:' in alias file: {file_path}")
+    if not in_table_aliases:
+        raise AliasMapError(
+            f"Missing 'table_aliases:' section in alias file: {file_path}"
+        )
+    return aliases

clearmetric/core/errors.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Shared errors for clearmetric-core."""
+from __future__ import annotations
+class ClearMetricError(Exception):
+    """Base class for clearmetric-core failures."""
+class CanonicalIdError(ClearMetricError):
+    """Raised when an identifier cannot be normalized into a canonical ID."""
+class MergeConflictError(ClearMetricError):
+    """Raised when artifacts cannot be merged without losing information."""
+class AliasMapError(ClearMetricError):
+    """Raised when a table alias file is invalid or unsupported."""

clearmetric/core/ids.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""Canonical identifier normalization and ID builders."""
+from __future__ import annotations
+from collections.abc import Iterable
+from .errors import CanonicalIdError
+_QUOTE_PAIRS = {
+    '"': '"',
+    "`": "`",
+    "[": "]",
+}
+def _strip_matching_quotes(value: str) -> str:
+    if len(value) < 2:
+        return value
+    first = value[0]
+    last = value[-1]
+    expected_last = _QUOTE_PAIRS.get(first)
+    if expected_last == last:
+        return value[1:-1]
+    return value
+def normalize_identifier(value: str) -> str:
+    """Normalize a possibly qualified identifier into canonical dotted form."""
+    parts = split_qualified_identifier(value)
+    return normalize_identifier_parts(parts)
+def normalize_identifier_parts(parts: Iterable[str]) -> str:
+    """Normalize already separated identifier parts into canonical dotted form."""
+    normalized_parts = [
+        normalize_identifier_part(part) for part in parts if str(part).strip()
+    ]
+    if not normalized_parts:
+        raise CanonicalIdError("Identifier must contain at least one non-empty part.")
+    return ".".join(normalized_parts)
+def normalize_identifier_part(part: str) -> str:
+    """Normalize one identifier segment."""
+    value = str(part).strip()
+    if not value:
+        raise CanonicalIdError("Identifier part cannot be empty.")
+    if value == "*":
+        raise CanonicalIdError("Wildcard identifiers cannot be canonicalized.")
+    unquoted = _strip_matching_quotes(value).strip()
+    if not unquoted:
+        raise CanonicalIdError("Identifier part cannot be empty after unquoting.")
+    return unquoted.lower()
+def split_qualified_identifier(value: str) -> list[str]:
+    """Split a qualified identifier on dots while respecting quoted segments."""
+    text = str(value).strip()
+    if not text:
+        raise CanonicalIdError("Identifier cannot be empty.")
+    parts: list[str] = []
+    current: list[str] = []
+    quote_stack: list[str] = []
+    for char in text:
+        if quote_stack:
+            current.append(char)
+            if char == quote_stack[-1]:
+                quote_stack.pop()
+            continue
+        if char in _QUOTE_PAIRS:
+            quote_stack.append(_QUOTE_PAIRS[char])
+            current.append(char)
+            continue
+        if char == ".":
+            part = "".join(current).strip()
+            if not part:
+                raise CanonicalIdError(f"Invalid qualified identifier {value!r}.")
+            parts.append(part)
+            current = []
+            continue
+        current.append(char)
+    if quote_stack:
+        raise CanonicalIdError(f"Unclosed quote in identifier {value!r}.")
+    final_part = "".join(current).strip()
+    if not final_part:
+        raise CanonicalIdError(f"Invalid qualified identifier {value!r}.")
+    parts.append(final_part)
+    return parts
+def table_id(qualified_name: str) -> str:
+    return f"table:{normalize_identifier(qualified_name)}"
+def cte_id(name: str) -> str:
+    return f"cte:{normalize_identifier_part(name)}"
+def column_id(parent_qualified_name: str, column_name: str) -> str:
+    parent = normalize_identifier(parent_qualified_name)
+    column = normalize_identifier_part(column_name)
+    return f"column:{parent}.{column}"
+def model_id(qualified_name: str) -> str:
+    return f"model:{normalize_identifier(qualified_name)}"
+def report_id(qualified_name: str) -> str:
+    return f"report:{normalize_identifier(qualified_name)}"
+def asset_id(qualified_name: str) -> str:
+    return f"asset:{normalize_identifier(qualified_name)}"
+def visual_id(report_qualified_name: str, page_id: str, visual_id_value: str) -> str:
+    parent = normalize_identifier_parts(
+        [report_qualified_name, page_id, visual_id_value]
+    )
+    return f"visual:{parent}"
+def page_id(report_qualified_name: str, page_id_value: str) -> str:
+    parent = normalize_identifier_parts([report_qualified_name, page_id_value])
+    return f"page:{parent}"
+def measure_id(table_qualified_name: str, measure_name: str) -> str:
+    parent = normalize_identifier(table_qualified_name)
+    measure = normalize_identifier_part(measure_name)
+    return f"measure:{parent}.{measure}"
+def schema_name(qualified_name: str) -> str | None:
+    normalized = normalize_identifier(qualified_name)
+    parts = normalized.split(".")
+    if len(parts) <= 1:
+        return None
+    return ".".join(parts[:-1])
+def leaf_name(qualified_name: str) -> str:
+    normalized = normalize_identifier(qualified_name)
+    return normalized.split(".")[-1]
+__all__ = [
+    "asset_id",
+    "column_id",
+    "cte_id",
+    "leaf_name",
+    "measure_id",
+    "model_id",
+    "normalize_identifier",
+    "normalize_identifier_part",
+    "normalize_identifier_parts",
+    "page_id",
+    "report_id",
+    "schema_name",
+    "split_qualified_identifier",
+    "table_id",
+    "visual_id",
+]

clearmetric/core/interop.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""Cross-graph interop: FQN matching, alias maps, and match status."""
+from __future__ import annotations
+from .errors import CanonicalIdError
+from .ids import normalize_identifier
+from .models import MatchStatus
+AliasMap = dict[str, str]
+def normalize_fqn_for_matching(value: str) -> str:
+    """Normalize a fully-qualified name for case-insensitive cross-graph comparison."""
+    return normalize_identifier(value)
+def warehouse_table_fqn_candidates(
+    *,
+    database: str | None = None,
+    schema: str | None = None,
+    table: str,
+) -> list[str]:
+    """Build ordered FQN candidates for matching a warehouse table reference."""
+    if not str(table).strip():
+        raise CanonicalIdError(
+            "Table name is required to build warehouse FQN candidates."
+        )
+    parts: list[str] = []
+    if database and str(database).strip():
+        parts.append(str(database).strip())
+    if schema and str(schema).strip():
+        parts.append(str(schema).strip())
+    parts.append(str(table).strip())
+    candidates: list[str] = []
+    if len(parts) == 3:
+        candidates.append(normalize_fqn_for_matching(".".join(parts)))
+    if len(parts) >= 2:
+        candidates.append(normalize_fqn_for_matching(".".join(parts[-2:])))
+    candidates.append(normalize_fqn_for_matching(parts[-1]))
+    seen: set[str] = set()
+    ordered: list[str] = []
+    for candidate in candidates:
+        if candidate in seen:
+            continue
+        seen.add(candidate)
+        ordered.append(candidate)
+    return ordered
+def warehouse_table_fqn_candidates_from_name(normalized_fqn: str) -> list[str]:
+    """Rebuild ordered warehouse FQN candidates from one normalized dotted name."""
+    parts = normalized_fqn.split(".")
+    if len(parts) == 3:
+        return warehouse_table_fqn_candidates(
+            database=parts[0],
+            schema=parts[1],
+            table=parts[2],
+        )
+    if len(parts) == 2:
+        return warehouse_table_fqn_candidates(schema=parts[0], table=parts[1])
+    if len(parts) == 1:
+        return warehouse_table_fqn_candidates(table=parts[0])
+    raise CanonicalIdError(
+        f"Cannot derive warehouse FQN candidates from name: {normalized_fqn!r}"
+    )
+def apply_alias_map(name: str, alias_map: AliasMap | None) -> str:
+    """Resolve a name through the alias map, returning normalized form."""
+    normalized = normalize_fqn_for_matching(name)
+    if not alias_map:
+        return normalized
+    mapped = alias_map.get(normalized)
+    if mapped is None:
+        return normalized
+    return normalize_fqn_for_matching(mapped)
+def resolve_table_match(
+    source_candidates: list[str],
+    target_table_ids: set[str],
+    *,
+    alias_map: AliasMap | None = None,
+) -> tuple[str | None, MatchStatus]:
+    """
+    Match source FQN candidates against canonical ``table:`` node IDs.
+    Returns the matched ``table:...`` ID and match status.
+    """
+    if not source_candidates:
+        return None, "unresolved"
+    target_by_normalized = {
+        normalize_fqn_for_matching(tid.removeprefix("table:")): tid
+        for tid in target_table_ids
+    }
+    matches: list[str] = []
+    for raw_candidate in source_candidates:
+        candidate = apply_alias_map(raw_candidate, alias_map)
+        for normalized_target, table_id in target_by_normalized.items():
+            if candidate == normalized_target or normalized_target.endswith(
+                f".{candidate}"
+            ):
+                matches.append(table_id)
+    unique_matches = sorted(set(matches))
+    if len(unique_matches) == 1:
+        return unique_matches[0], "resolved"
+    if len(unique_matches) > 1:
+        return unique_matches[0], "ambiguous"
+    return None, "unresolved"
+__all__ = [
+    "AliasMap",
+    "MatchStatus",
+    "apply_alias_map",
+    "normalize_fqn_for_matching",
+    "resolve_table_match",
+    "warehouse_table_fqn_candidates",
+    "warehouse_table_fqn_candidates_from_name",
+]