PyPI - InfoTracker - Versions diffs - 0.1.0__py3-none-any.whl - Mend

InfoTracker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

infotracker/__init__.py +6 -0
infotracker/__main__.py +6 -0
infotracker/adapters.py +65 -0
infotracker/cli.py +150 -0
infotracker/config.py +57 -0
infotracker/diff.py +291 -0
infotracker/engine.py +340 -0
infotracker/lineage.py +122 -0
infotracker/models.py +302 -0
infotracker/parser.py +807 -0
infotracker-0.1.0.dist-info/METADATA +108 -0
infotracker-0.1.0.dist-info/RECORD +14 -0
infotracker-0.1.0.dist-info/WHEEL +4 -0
infotracker-0.1.0.dist-info/entry_points.txt +2 -0

infotracker/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+__all__ = [
+    "__version__",
+]
+__version__ = "0.1.0"

infotracker/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Entry point for the InfoTracker package."""
+from .cli import entrypoint
+if __name__ == "__main__":
+    entrypoint()

infotracker/adapters.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+import json
+import logging
+from typing import Protocol, Dict, Any, Optional
+from .parser import SqlParser
+from .lineage import OpenLineageGenerator
+logger = logging.getLogger(__name__)
+class Adapter(Protocol):
+    name: str
+    dialect: str
+    def extract_lineage(self, sql: str, object_hint: Optional[str] = None) -> str: ...
+class MssqlAdapter:
+    name = "mssql"
+    dialect = "tsql"
+    def __init__(self):
+        self.parser = SqlParser(dialect=self.dialect)
+        self.lineage_generator = OpenLineageGenerator()
+    def extract_lineage(self, sql: str, object_hint: Optional[str] = None) -> str:
+        """Extract lineage from SQL and return OpenLineage JSON as string."""
+        try:
+            obj_info = self.parser.parse_sql_file(sql, object_hint)
+            job_name = f"warehouse/sql/{object_hint}.sql" if object_hint else None
+            json_str = self.lineage_generator.generate(
+                obj_info, job_name=job_name, object_hint=object_hint
+            )
+            return json_str
+        except Exception as exc:
+            logger.error(f"Failed to extract lineage from SQL: {exc}")
+            error_payload = {
+                "eventType": "COMPLETE",
+                "eventTime": "2025-01-01T00:00:00Z",
+                "run": {"runId": "00000000-0000-0000-0000-000000000000"},
+                "job": {"namespace": "infotracker/examples",
+                        "name": f"warehouse/sql/{(object_hint or 'unknown')}.sql"},
+                "inputs": [],
+                "outputs": [{
+                    "namespace": "mssql://localhost/InfoTrackerDW",
+                    "name": object_hint or "unknown",
+                    "facets": {
+                        "schema": {
+                            "_producer": "https://github.com/OpenLineage/OpenLineage",
+                            "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
+                            "fields": [
+                                {"name": "error", "type": "string", "description": f"Error: {exc}"}
+                            ],
+                        }
+                    },
+                }],
+            }
+            return json.dumps(error_payload, indent=2, ensure_ascii=False)
+_ADAPTERS: Dict[str, Adapter] = {
+    "mssql": MssqlAdapter(),
+}
+def get_adapter(name: str) -> Adapter:
+    if name not in _ADAPTERS:
+        raise KeyError(f"Unknown adapter '{name}'. Available: {', '.join(_ADAPTERS)}")
+    return _ADAPTERS[name]

infotracker/cli.py ADDED Viewed

@@ -0,0 +1,150 @@
+from __future__ import annotations
+import logging
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+import typer
+from rich.console import Console
+from rich.table import Table
+from .config import load_config, RuntimeConfig
+from .engine import ExtractRequest, ImpactRequest, DiffRequest, Engine
+app = typer.Typer(add_completion=False, no_args_is_help=True, help="InfoTracker CLI")
+console = Console()
+def version_callback(value: bool):
+    from . import __version__
+    if value:
+        console.print(f"infotracker {__version__}")
+        raise typer.Exit()
+@app.callback()
+def main(
+    ctx: typer.Context,
+    config: Optional[Path] = typer.Option(None, exists=True, dir_okay=False, help="Path to infotracker.yml"),
+    log_level: str = typer.Option("info", help="log level: debug|info|warn|error"),
+    format: str = typer.Option("text", "--format", help="Output format: text|json", show_choices=True),
+    version: bool = typer.Option(False, "--version", callback=version_callback, is_eager=True, help="Show version and exit"),
+):
+    ctx.ensure_object(dict)
+    cfg = load_config(config)
+    # override with CLI flags (precedence)
+    cfg.log_level = log_level
+    cfg.output_format = format
+    ctx.obj["cfg"] = cfg
+    level = getattr(logging, cfg.log_level.upper(), logging.INFO)
+    logging.basicConfig(level=level)
+@app.command()
+def extract(
+    ctx: typer.Context,
+    sql_dir: Optional[Path] = typer.Option(None, exists=True, file_okay=False),
+    out_dir: Optional[Path] = typer.Option(None, file_okay=False),
+    adapter: Optional[str] = typer.Option(None),
+    catalog: Optional[Path] = typer.Option(None, exists=True),
+    fail_on_warn: bool = typer.Option(False),
+    include: list[str] = typer.Option([], "--include", help="Glob include pattern"),
+    exclude: list[str] = typer.Option([], "--exclude", help="Glob exclude pattern"),
+):
+    cfg: RuntimeConfig = ctx.obj["cfg"]
+    engine = Engine(cfg)
+    req = ExtractRequest(
+        sql_dir=sql_dir or Path(cfg.sql_dir),
+        out_dir=out_dir or Path(cfg.out_dir),
+        adapter=adapter or cfg.default_adapter,
+        catalog=catalog,
+        include=include or cfg.include,
+        exclude=exclude or cfg.exclude,
+        fail_on_warn=fail_on_warn,
+    )
+    result = engine.run_extract(req)
+    _emit(result, cfg.output_format)
+    # Handle fail_on_warn
+    if fail_on_warn and result.get("warnings", 0) > 0:
+        console.print(f"[red]ERROR: {result['warnings']} warnings detected with --fail-on-warn enabled[/red]")
+        raise typer.Exit(1)
+@app.command()
+def impact(
+    ctx: typer.Context,
+    selector: str = typer.Option(..., "-s", "--selector", help="[+]db.schema.object.column[+] - use + to indicate direction"),
+    max_depth: Optional[int] = typer.Option(None),
+    out: Optional[Path] = typer.Option(None),
+    graph_dir: Optional[Path] = typer.Option(None, "--graph-dir", help="Directory containing column_graph.json"),
+):
+    cfg: RuntimeConfig = ctx.obj["cfg"]
+    engine = Engine(cfg)
+    req = ImpactRequest(selector=selector, max_depth=max_depth or 2, graph_dir=graph_dir)
+    result = engine.run_impact(req)
+    _emit(result, cfg.output_format, out)
+@app.command()
+def diff(
+    ctx: typer.Context,
+    base: str = typer.Option(..., help="git ref name for base"),
+    head: str = typer.Option(..., help="git ref name for head"),
+    sql_dir: Optional[Path] = typer.Option(None, exists=True, file_okay=False),
+    adapter: Optional[str] = typer.Option(None),
+    severity_threshold: str = typer.Option("BREAKING"),
+):
+    cfg: RuntimeConfig = ctx.obj["cfg"]
+    engine = Engine(cfg)
+    req = DiffRequest(
+        base=base,
+        head=head,
+        sql_dir=sql_dir or Path(cfg.sql_dir),
+        adapter=adapter or cfg.default_adapter,
+        severity_threshold=severity_threshold,
+    )
+    result = engine.run_diff(req)
+    _emit(result, cfg.output_format)
+    raise typer.Exit(code=result.get("exit_code", 0))
+def _emit(payload: dict, fmt: str, out_path: Optional[Path] = None) -> None:
+    from rich.table import Table
+    from rich.console import Console
+    import json
+    console = Console()
+    if fmt == "json":
+        console.print_json(json.dumps(payload, ensure_ascii=False))
+        return
+    # fmt == "text"
+    table = Table(show_header=True, header_style="bold")
+    cols = payload.get("columns", [])
+    for k in cols:
+        table.add_column(str(k))
+    for r in payload.get("rows", []):
+        if isinstance(r, dict):
+            table.add_row(*[str(r.get(c, "")) for c in cols])
+        else:
+            # lista / krotka — dopasuj po pozycji
+            table.add_row(*[str(x) for x in (list(r) + [""] * max(0, len(cols) - len(r)))][:len(cols)])
+    console.print(table)
+def entrypoint() -> None:
+    app()
+if __name__ == "__main__":
+    entrypoint()

infotracker/config.py ADDED Viewed

@@ -0,0 +1,57 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional
+import yaml
+@dataclass
+class RuntimeConfig:
+    default_adapter: str = "mssql"
+    default_database: Optional[str] = None
+    sql_dir: str = "examples/warehouse/sql"
+    out_dir: str = "build/lineage"
+    include: List[str] = field(default_factory=lambda: ["*.sql"])
+    exclude: List[str] = field(default_factory=list)
+    severity_threshold: str = "BREAKING"
+    ignore: List[str] = field(default_factory=list)
+    catalog: Optional[str] = None
+    log_level: str = "info"
+    output_format: str = "text"
+def load_config(path: Optional[Path]) -> RuntimeConfig:
+    cfg = RuntimeConfig()
+    if path is None:
+        # Try repo root default
+        default = Path("infotracker.yml")
+        if default.exists():
+            path = default
+    if path and path.exists():
+        data = yaml.safe_load(path.read_text()) or {}
+        for k, v in data.items():
+            if hasattr(cfg, k):
+                setattr(cfg, k, v)
+    # Load .infotrackerignore if exists
+    ignore_file = Path(".infotrackerignore")
+    patterns: list[str] = []
+    if ignore_file.exists():
+        try:
+            for line in ignore_file.read_text(encoding="utf-8").splitlines():
+                # utnij komentarz inline i białe znaki
+                line = line.split("#", 1)[0].strip()
+                if line:
+                    patterns.append(line)
+        except Exception as e:
+            print(f"Warning: failed to load .infotrackerignore: {e}")
+    # scal z configiem
+    base = list(getattr(cfg, "ignore", []) or [])
+    cfg.ignore = sorted(set(base + patterns))
+    return cfg

infotracker/diff.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""
+Breaking change detection for InfoTracker.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, List, Optional, Set, Any
+from .models import ObjectInfo, ColumnSchema, ColumnLineage, TransformationType
+class ChangeType(Enum):
+    """Types of changes that can be detected."""
+    COLUMN_ADDED = "COLUMN_ADDED"
+    COLUMN_REMOVED = "COLUMN_REMOVED"
+    COLUMN_RENAMED = "COLUMN_RENAMED"
+    COLUMN_TYPE_CHANGED = "COLUMN_TYPE_CHANGED"
+    COLUMN_NULLABILITY_CHANGED = "COLUMN_NULLABILITY_CHANGED"
+    COLUMN_ORDER_CHANGED = "COLUMN_ORDER_CHANGED"
+    LINEAGE_CHANGED = "LINEAGE_CHANGED"
+    OBJECT_ADDED = "OBJECT_ADDED"
+    OBJECT_REMOVED = "OBJECT_REMOVED"
+    OBJECT_TYPE_CHANGED = "OBJECT_TYPE_CHANGED"
+class Severity(Enum):
+    """Severity levels for changes."""
+    BREAKING = "BREAKING"
+    POTENTIALLY_BREAKING = "POTENTIALLY_BREAKING"
+    NON_BREAKING = "NON_BREAKING"
+@dataclass
+class Change:
+    """Represents a single change between two versions."""
+    change_type: ChangeType
+    severity: Severity
+    object_name: str
+    column_name: Optional[str] = None
+    old_value: Any = None
+    new_value: Any = None
+    description: str = ""
+    impact_count: int = 0  # Number of downstream columns affected
+class BreakingChangeDetector:
+    """Detects breaking changes between two sets of object information."""
+    def __init__(self):
+        self.changes: List[Change] = []
+    def detect_changes(self, base_objects: List[ObjectInfo], head_objects: List[ObjectInfo]) -> List[Change]:
+        """Detect changes between base and head object lists."""
+        self.changes = []
+        # Create lookup dictionaries
+        base_map = {obj.name.lower(): obj for obj in base_objects}
+        head_map = {obj.name.lower(): obj for obj in head_objects}
+        # Find object-level changes
+        self._detect_object_changes(base_map, head_map)
+        # Find schema changes for existing objects
+        common_objects = set(base_map.keys()) & set(head_map.keys())
+        for obj_name in common_objects:
+            self._detect_schema_changes(base_map[obj_name], head_map[obj_name])
+            self._detect_lineage_changes(base_map[obj_name], head_map[obj_name])
+        return self.changes
+    def _detect_object_changes(self, base_map: Dict[str, ObjectInfo], head_map: Dict[str, ObjectInfo]) -> None:
+        """Detect object additions, removals, and type changes."""
+        base_names = set(base_map.keys())
+        head_names = set(head_map.keys())
+        # Object additions
+        for added_name in head_names - base_names:
+            obj = head_map[added_name]
+            self.changes.append(Change(
+                change_type=ChangeType.OBJECT_ADDED,
+                severity=Severity.NON_BREAKING,
+                object_name=obj.name,
+                description=f"Added {obj.object_type} '{obj.name}'"
+            ))
+        # Object removals
+        for removed_name in base_names - head_names:
+            obj = base_map[removed_name]
+            self.changes.append(Change(
+                change_type=ChangeType.OBJECT_REMOVED,
+                severity=Severity.BREAKING,
+                object_name=obj.name,
+                description=f"Removed {obj.object_type} '{obj.name}'"
+            ))
+        # Object type changes
+        for common_name in base_names & head_names:
+            base_obj = base_map[common_name]
+            head_obj = head_map[common_name]
+            if base_obj.object_type != head_obj.object_type:
+                self.changes.append(Change(
+                    change_type=ChangeType.OBJECT_TYPE_CHANGED,
+                    severity=Severity.BREAKING,
+                    object_name=base_obj.name,
+                    old_value=base_obj.object_type,
+                    new_value=head_obj.object_type,
+                    description=f"Changed object type from {base_obj.object_type} to {head_obj.object_type}"
+                ))
+    def _detect_schema_changes(self, base_obj: ObjectInfo, head_obj: ObjectInfo) -> None:
+        """Detect schema changes within an object."""
+        base_columns = {col.name.lower(): col for col in base_obj.schema.columns}
+        head_columns = {col.name.lower(): col for col in head_obj.schema.columns}
+        base_names = set(base_columns.keys())
+        head_names = set(head_columns.keys())
+        # Column additions
+        for added_name in head_names - base_names:
+            col = head_columns[added_name]
+            severity = Severity.POTENTIALLY_BREAKING  # Could affect SELECT *
+            self.changes.append(Change(
+                change_type=ChangeType.COLUMN_ADDED,
+                severity=severity,
+                object_name=base_obj.name,
+                column_name=col.name,
+                new_value=f"{col.data_type} {'NULL' if col.nullable else 'NOT NULL'}",
+                description=f"Added column '{col.name}' ({col.data_type})"
+            ))
+        # Column removals
+        for removed_name in base_names - head_names:
+            col = base_columns[removed_name]
+            self.changes.append(Change(
+                change_type=ChangeType.COLUMN_REMOVED,
+                severity=Severity.BREAKING,
+                object_name=base_obj.name,
+                column_name=col.name,
+                old_value=f"{col.data_type} {'NULL' if col.nullable else 'NOT NULL'}",
+                description=f"Removed column '{col.name}'"
+            ))
+        # Column changes for existing columns
+        for common_name in base_names & head_names:
+            base_col = base_columns[common_name]
+            head_col = head_columns[common_name]
+            # Type changes
+            if base_col.data_type != head_col.data_type:
+                severity = self._classify_type_change_severity(base_col.data_type, head_col.data_type)
+                self.changes.append(Change(
+                    change_type=ChangeType.COLUMN_TYPE_CHANGED,
+                    severity=severity,
+                    object_name=base_obj.name,
+                    column_name=base_col.name,
+                    old_value=base_col.data_type,
+                    new_value=head_col.data_type,
+                    description=f"Changed column '{base_col.name}' type from {base_col.data_type} to {head_col.data_type}"
+                ))
+            # Nullability changes
+            if base_col.nullable != head_col.nullable:
+                severity = Severity.BREAKING if not head_col.nullable else Severity.POTENTIALLY_BREAKING
+                self.changes.append(Change(
+                    change_type=ChangeType.COLUMN_NULLABILITY_CHANGED,
+                    severity=severity,
+                    object_name=base_obj.name,
+                    column_name=base_col.name,
+                    old_value="NULL" if base_col.nullable else "NOT NULL",
+                    new_value="NULL" if head_col.nullable else "NOT NULL",
+                    description=f"Changed column '{base_col.name}' nullability"
+                ))
+            # Ordinal changes (column order)
+            if base_col.ordinal != head_col.ordinal:
+                self.changes.append(Change(
+                    change_type=ChangeType.COLUMN_ORDER_CHANGED,
+                    severity=Severity.POTENTIALLY_BREAKING,
+                    object_name=base_obj.name,
+                    column_name=base_col.name,
+                    old_value=base_col.ordinal,
+                    new_value=head_col.ordinal,
+                    description=f"Changed column '{base_col.name}' position from {base_col.ordinal} to {head_col.ordinal}"
+                ))
+    def _detect_lineage_changes(self, base_obj: ObjectInfo, head_obj: ObjectInfo) -> None:
+        """Detect lineage changes for columns."""
+        base_lineage = {lin.output_column.lower(): lin for lin in base_obj.lineage}
+        head_lineage = {lin.output_column.lower(): lin for lin in head_obj.lineage}
+        # Check for lineage changes in common columns
+        for column_name in set(base_lineage.keys()) & set(head_lineage.keys()):
+            base_lin = base_lineage[column_name]
+            head_lin = head_lineage[column_name]
+            # Compare transformation type
+            if base_lin.transformation_type != head_lin.transformation_type:
+                self.changes.append(Change(
+                    change_type=ChangeType.LINEAGE_CHANGED,
+                    severity=Severity.POTENTIALLY_BREAKING,
+                    object_name=base_obj.name,
+                    column_name=base_lin.output_column,
+                    old_value=base_lin.transformation_type.value,
+                    new_value=head_lin.transformation_type.value,
+                    description=f"Changed transformation type for '{base_lin.output_column}'"
+                ))
+            # Compare input fields
+            base_inputs = {(ref.table_name, ref.column_name) for ref in base_lin.input_fields}
+            head_inputs = {(ref.table_name, ref.column_name) for ref in head_lin.input_fields}
+            if base_inputs != head_inputs:
+                self.changes.append(Change(
+                    change_type=ChangeType.LINEAGE_CHANGED,
+                    severity=Severity.POTENTIALLY_BREAKING,
+                    object_name=base_obj.name,
+                    column_name=base_lin.output_column,
+                    old_value=len(base_inputs),
+                    new_value=len(head_inputs),
+                    description=f"Changed input dependencies for '{base_lin.output_column}'"
+                ))
+    def _classify_type_change_severity(self, old_type: str, new_type: str) -> Severity:
+        """Classify the severity of a type change."""
+        old_type = old_type.upper()
+        new_type = new_type.upper()
+        # Common safe widenings
+        safe_widenings = [
+            ("INT", "BIGINT"),
+            ("DECIMAL(10,2)", "DECIMAL(18,2)"),
+            ("VARCHAR(50)", "VARCHAR(100)"),
+            ("NVARCHAR(50)", "NVARCHAR(100)"),
+        ]
+        if (old_type, new_type) in safe_widenings:
+            return Severity.NON_BREAKING
+        # Check for obvious narrowings
+        if ("VARCHAR" in old_type and "VARCHAR" in new_type or
+            "DECIMAL" in old_type and "DECIMAL" in new_type):
+            return Severity.POTENTIALLY_BREAKING
+        # Default to breaking for type changes
+        return Severity.BREAKING
+    def classify_by_severity(self) -> Dict[Severity, List[Change]]:
+        """Group changes by severity level."""
+        result = {severity: [] for severity in Severity}
+        for change in self.changes:
+            result[change.severity].append(change)
+        return result
+    def get_breaking_count(self) -> int:
+        """Get count of breaking changes."""
+        return len([c for c in self.changes if c.severity == Severity.BREAKING])
+    def get_summary(self) -> Dict[str, Any]:
+        """Get summary of changes."""
+        by_severity = self.classify_by_severity()
+        return {
+            "total_changes": len(self.changes),
+            "breaking": len(by_severity[Severity.BREAKING]),
+            "potentially_breaking": len(by_severity[Severity.POTENTIALLY_BREAKING]),
+            "non_breaking": len(by_severity[Severity.NON_BREAKING]),
+            "changes_by_type": self._count_by_type(),
+            "changes": [self._change_to_dict(c) for c in self.changes]
+        }
+    def _count_by_type(self) -> Dict[str, int]:
+        """Count changes by type."""
+        counts = {}
+        for change in self.changes:
+            change_type = change.change_type.value
+            counts[change_type] = counts.get(change_type, 0) + 1
+        return counts
+    def _change_to_dict(self, change: Change) -> Dict[str, Any]:
+        """Convert change to dictionary for JSON serialization."""
+        return {
+            "change_type": change.change_type.value,
+            "severity": change.severity.value,
+            "object_name": change.object_name,
+            "column_name": change.column_name,
+            "old_value": change.old_value,
+            "new_value": change.new_value,
+            "description": change.description,
+            "impact_count": change.impact_count
+        }