PyPI - InfoTracker - Versions diffs - 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

InfoTracker 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

infotracker/__init__.py +1 -1
infotracker/cli.py +11 -0
infotracker/engine.py +27 -11
infotracker/infotracker.yml +1 -1
infotracker/io_utils.py +312 -0
infotracker/lineage.py +143 -13
infotracker/models.py +4 -0
infotracker/openlineage_utils.py +16 -0
infotracker/parser.py +1748 -229
{infotracker-0.3.1.dist-info → infotracker-0.4.0.dist-info}/METADATA +1 -1
infotracker-0.4.0.dist-info/RECORD +17 -0
infotracker-0.3.1.dist-info/RECORD +0 -16
{infotracker-0.3.1.dist-info → infotracker-0.4.0.dist-info}/WHEEL +0 -0
{infotracker-0.3.1.dist-info → infotracker-0.4.0.dist-info}/entry_points.txt +0 -0

infotracker/__init__.py CHANGED Viewed

@@ -2,5 +2,5 @@ __all__ = [
     "__version__",
 ]
-__version__ = "0.3.1"
+__version__ = "0.4.0"

infotracker/cli.py CHANGED Viewed

@@ -12,11 +12,13 @@ from rich.table import Table
 from .config import load_config, RuntimeConfig
 from .engine import ExtractRequest, ImpactRequest, DiffRequest, Engine
+from .io_utils import get_supported_encodings
 app = typer.Typer(add_completion=False, no_args_is_help=True, help="InfoTracker CLI")
 console = Console()
+logging.getLogger("sqlglot").setLevel(logging.ERROR)
 def version_callback(value: bool):
     from . import __version__
@@ -54,8 +56,16 @@ def extract(
     fail_on_warn: bool = typer.Option(False),
     include: list[str] = typer.Option([], "--include", help="Glob include pattern"),
     exclude: list[str] = typer.Option([], "--exclude", help="Glob exclude pattern"),
+    encoding: str = typer.Option("auto", "--encoding", "-e", help="File encoding for SQL files", show_choices=True),
 ):
     cfg: RuntimeConfig = ctx.obj["cfg"]
+    # Validate encoding
+    supported = get_supported_encodings()
+    if encoding not in supported:
+        console.print(f"[red]ERROR: Unsupported encoding '{encoding}'. Supported: {', '.join(supported)}[/red]")
+        raise typer.Exit(1)
     engine = Engine(cfg)
     req = ExtractRequest(
         sql_dir=sql_dir or Path(cfg.sql_dir),
@@ -65,6 +75,7 @@ def extract(
         include=include or cfg.include,
         exclude=exclude or cfg.exclude,
         fail_on_warn=fail_on_warn,
+        encoding=encoding,
     )
     result = engine.run_extract(req)
     _emit(result, cfg.output_format)

infotracker/engine.py CHANGED Viewed

@@ -11,12 +11,14 @@ from fnmatch import fnmatch
 import yaml
 from .adapters import get_adapter
+from .io_utils import read_text_safely
+from .lineage import emit_ol_from_object
 from .models import (
-    ObjectInfo,
+    ObjectInfo,
+    ColumnNode,
     ColumnSchema,
     TableSchema,
     ColumnGraph,
-    ColumnNode,
     ColumnEdge,
     TransformationType,
 )
@@ -35,6 +37,7 @@ class ExtractRequest:
     include: Optional[List[str]] = None
     exclude: Optional[List[str]] = None
     fail_on_warn: bool = False
+    encoding: str = "auto"
 @dataclass
@@ -145,11 +148,11 @@ class Engine:
         sql_file_map: Dict[str, Path] = {}  # object_name -> file_path
         ignore_patterns: List[str] = list(getattr(self.config, "ignore", []) or [])
         # Phase 1: Parse all SQL files and collect objects
         for sql_path in sql_files:
             try:
-                sql_text = sql_path.read_text(encoding="utf-8")
+                sql_text = read_text_safely(sql_path, encoding=req.encoding)
                 obj_info: ObjectInfo = parser.parse_sql_file(sql_text, object_hint=sql_path.stem)
                 # Store mapping for later processing
@@ -179,7 +182,7 @@ class Engine:
             sql_path = sql_file_map[obj_name]
             try:
-                sql_text = sql_path.read_text(encoding="utf-8")
+                sql_text = read_text_safely(sql_path, encoding=req.encoding)
                 # Parse with updated schema registry (now has dependencies resolved)
                 obj_info: ObjectInfo = parser.parse_sql_file(sql_text, object_hint=sql_path.stem)
@@ -191,9 +194,12 @@ class Engine:
                     # Also register in adapter's parser for lineage generation
                     adapter.parser.schema_registry.register(obj_info.schema)
-                # Generate OpenLineage with resolved schema context
-                ol_raw = adapter.extract_lineage(sql_text, object_hint=sql_path.stem)
-                ol_payload: Dict[str, Any] = json.loads(ol_raw) if isinstance(ol_raw, str) else ol_raw
+                # Generate OpenLineage directly from resolved ObjectInfo
+                ol_payload = emit_ol_from_object(
+                    obj_info,
+                    quality_metrics=True,
+                    virtual_proc_outputs=getattr(self.config, "virtual_proc_outputs", True),
+                )
                 # Save to file
                 target = out_dir / f"{sql_path.stem}.json"
@@ -201,15 +207,25 @@ class Engine:
                 outputs.append([str(sql_path), str(target)])
-                # Check for warnings
+                # Check for warnings with enhanced diagnostics
                 out0 = (ol_payload.get("outputs") or [])
                 out0 = out0[0] if out0 else {}
                 facets = out0.get("facets", {})
                 has_schema_fields = bool(facets.get("schema", {}).get("fields"))
                 has_col_lineage = bool(facets.get("columnLineage", {}).get("fields"))
-                if getattr(obj_info, "object_type", "unknown") == "unknown" or not (has_schema_fields or has_col_lineage):
+                # Enhanced warning classification
+                warning_reason = None
+                if getattr(obj_info, "object_type", "unknown") == "unknown":
+                    warning_reason = "UNKNOWN_OBJECT_TYPE"
+                elif hasattr(obj_info, 'no_output_reason') and obj_info.no_output_reason:
+                    warning_reason = obj_info.no_output_reason
+                elif not (has_schema_fields or has_col_lineage):
+                    warning_reason = "NO_SCHEMA_OR_LINEAGE"
+                if warning_reason:
                     warnings += 1
+                    logger.warning("Object %s: %s", obj_info.name, warning_reason)
             except Exception as e:
                 warnings += 1
@@ -287,7 +303,7 @@ class Engine:
             if not ready:
                 # Circular dependency or missing dependency - process remaining arbitrarily
                 ready = [next(iter(remaining.keys()))]
-                logger.warning("Circular or missing dependencies detected, processing: %s", ready[0])
+                logger.info("Circular or missing dependencies detected, processing: %s", ready[0])
             # Process ready nodes
             for node in ready:

infotracker/infotracker.yml CHANGED Viewed

@@ -5,7 +5,7 @@
 default_adapter: mssql
 # Default database name (optional)
-default_database: WarehouseDB
+default_database:
 # Directory containing SQL files to analyze
 sql_dir: examples/warehouse/sql

infotracker/io_utils.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""
+I/O utilities for safe text file reading with encoding detection.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Optional, List
+logger = logging.getLogger(__name__)
+# Common encodings to try in fallback order
+COMMON_ENCODINGS = [
+    'utf-8',
+    'utf-8-sig',
+    'utf-16le',
+    'utf-16be',
+    'cp1250'
+]
+def read_text_safely(path: str | Path, encoding: str = "auto") -> str:
+    """
+    Safely read text file with encoding detection.
+    Args:
+        path: Path to the file to read
+        encoding: Encoding to use. If "auto", will attempt to detect encoding.
+                 Supported: "auto", "utf-8", "utf-8-sig", "utf-16", "utf-16le", "utf-16be", "cp1250"
+    Returns:
+        File content as string with normalized line endings
+    Raises:
+        UnicodeDecodeError: If file cannot be decoded with specified/detected encoding
+        FileNotFoundError: If file doesn't exist
+        IOError: If file cannot be read
+    """
+    file_path = Path(path)
+    try:
+        # Read file as binary first
+        with open(file_path, 'rb') as f:
+            raw_content = f.read()
+    except Exception as e:
+        raise IOError(f"Cannot read file {file_path}: {e}")
+    if not raw_content:
+        return ""
+    if encoding != "auto":
+        # If user forced non-UTF-8 but bytes look like UTF-8, fail early with a clear message
+        if encoding.lower() not in ("utf-8", "utf-8-sig") and _looks_like_utf8(raw_content):
+            raise UnicodeDecodeError(
+                encoding, raw_content, 0, len(raw_content),
+                f"File {file_path} appears to be UTF-8 but '{encoding}' was forced. "
+                f"Try --encoding auto or --encoding utf-8."
+            )
+        # Use specified encoding
+        # 1) Dekodowanie – łap wyłącznie błędy DEKODOWANIA
+        try:
+            content = raw_content.decode(encoding, errors="strict")
+        except UnicodeDecodeError as e:
+            raise UnicodeDecodeError(
+                encoding, raw_content, e.start, e.end,
+                f"Cannot decode {file_path} with {encoding}. "
+                f"Try --encoding auto or specify different encoding (e.g., --encoding cp1250)"
+            )
+        # 2) Walidacja – POZA try/except, żeby nie nadpisać komunikatu "looks malformed"
+        _validate_forced_encoding(raw_content, encoding, content, file_path)
+        logger.debug(f"Successfully read {file_path} with encoding {encoding}")
+    else:
+        # Auto-detect encoding
+        content = _detect_and_decode(raw_content, file_path)
+    # Normalize line endings and remove BOM artifacts
+    content = _normalize_content(content)
+    return content
+def _detect_and_decode(raw_content: bytes, file_path: Path) -> str:
+    """
+    Detect encoding and decode content.
+    Args:
+        raw_content: Raw file bytes
+        file_path: File path for logging
+    Returns:
+        Decoded content string
+    Raises:
+        UnicodeDecodeError: If no encoding works
+    """
+    # Quick BOM check first
+    bom_encoding = _detect_bom(raw_content)
+    if bom_encoding:
+        try:
+            content = raw_content.decode(bom_encoding, errors="strict")
+            logger.debug(f"Detected BOM encoding {bom_encoding} for {file_path}")
+            return content
+        except UnicodeDecodeError:
+            pass  # Fall back to other methods
+    guess = _looks_like_utf16(raw_content)
+    if guess:
+        try:
+            content = raw_content.decode(guess, errors="strict")
+            logger.debug(f"Heuristic detected {guess} for {file_path}")
+            return content
+        except UnicodeDecodeError:
+            pass
+    # Try common encodings
+    last_error = None
+    for encoding in COMMON_ENCODINGS:
+        try:
+            content = raw_content.decode(encoding, errors="strict")
+            logger.debug(f"Detected encoding {encoding} for {file_path}")
+            return content
+        except UnicodeDecodeError as e:
+            last_error = e
+            continue
+    # If charset-normalizer is available, try it as last resort
+    try:
+        import charset_normalizer
+        result = charset_normalizer.from_bytes(raw_content)
+        if result and result.best():
+            encoding = result.best().encoding
+            content = str(result.best())
+            logger.debug(f"charset-normalizer detected encoding {encoding} for {file_path}")
+            return content
+    except ImportError:
+        pass  # charset-normalizer not available, continue with error
+    except Exception:
+        pass  # charset-normalizer failed, continue with error
+    # All attempts failed
+    raise UnicodeDecodeError(
+        "auto-detect", raw_content, 0, len(raw_content),
+        f"Cannot decode {file_path} with any common encoding. "
+        f"Try specifying encoding explicitly (e.g., --encoding cp1250, --encoding utf-16)"
+    )
+def _looks_like_utf16(raw: bytes) -> Optional[str]:
+    # Heurystyka: jeśli >20% bajtów to NUL, to prawie na pewno UTF-16.
+    if not raw:
+        return None
+    null_count = raw.count(0)
+    if null_count / len(raw) < 0.20:
+        return None
+    even_nulls = sum(1 for i in range(0, len(raw), 2) if raw[i] == 0)
+    odd_nulls  = sum(1 for i in range(1, len(raw), 2) if raw[i] == 0)
+    # Jeśli wyraźna przewaga po jednej stronie – wybierz endian
+    if even_nulls > odd_nulls * 1.5:
+        return "utf-16be"
+    if odd_nulls > even_nulls * 1.5:
+        return "utf-16le"
+    return None
+def _looks_like_utf8(raw: bytes) -> bool:
+    """
+    Check if bytes look like UTF-8 encoded text with non-ASCII characters.
+    Args:
+        raw: Raw bytes to check
+    Returns:
+        True if bytes strictly decode as UTF-8 and contain non-ASCII chars
+    """
+    if not raw:
+        return False
+    try:
+        decoded = raw.decode('utf-8', errors='strict')
+        # Check if it contains non-ASCII characters (indicating it's likely UTF-8)
+        return any(ord(c) > 127 for c in decoded)
+    except UnicodeDecodeError:
+        return False
+def _text_quality_score(s: str) -> float:
+    """
+    Calculate text quality score based on printable/whitespace character ratio.
+    Args:
+        s: Text string to analyze
+    Returns:
+        Score from 0.0 to 1.0, where 1.0 means all characters are printable/whitespace
+    """
+    if not s:
+        return 1.0
+    printable_count = sum(1 for c in s if c.isprintable() or c.isspace())
+    return printable_count / len(s)
+def _looks_like_sql(s: str) -> bool:
+    """
+    Check if text contains common SQL tokens.
+    Args:
+        s: Text string to check
+    Returns:
+        True if text contains SQL-like tokens
+    """
+    import re
+    sql_tokens = [
+        r'\bSELECT\b', r'\bFROM\b', r'\bCREATE\b', r'\bTABLE\b',
+        r'\bVIEW\b', r'\bWHERE\b', r'\bJOIN\b', r'\bINSERT\b',
+        r'\bINTO\b', r'\bEXEC\b', r'\bPROCEDURE\b', r'\bFUNCTION\b',
+        r'\bALTER\b', r'\bUPDATE\b', r'\bDELETE\b'
+    ]
+    # Check if any SQL tokens are present (case-insensitive)
+    text_upper = s.upper()
+    return any(re.search(token, text_upper) for token in sql_tokens)
+def _validate_forced_encoding(raw: bytes, forced: str, decoded: str, file_path: Path):
+    """
+    Validate that forced encoding makes sense for the given content.
+    Args:
+        raw: Raw file bytes
+        forced: Forced encoding name
+        decoded: Decoded text content
+        file_path: File path for error messages
+    Raises:
+        UnicodeDecodeError: If forced encoding appears to be wrong
+    """
+    # If forced encoding is not UTF-8 but file looks like UTF-8, warn user
+    if forced.lower() not in ['utf-8', 'utf-8-sig'] and _looks_like_utf8(raw):
+        raise UnicodeDecodeError(
+            forced, raw, 0, len(raw),
+            f"File {file_path} appears to be UTF-8 but '{forced}' was forced. "
+            f"Try --encoding auto or --encoding utf-8."
+        )
+    # Check text quality and SQL-like content
+    quality_score = _text_quality_score(decoded)
+    has_sql_tokens = _looks_like_sql(decoded)
+    # If quality is poor and no SQL tokens found, likely wrong encoding
+    if quality_score < 0.90 and not has_sql_tokens:
+        raise UnicodeDecodeError(
+            forced, raw, 0, len(raw),
+            f"Decoded text with '{forced}' looks malformed (quality={quality_score:.2f}). "
+            f"Try --encoding auto."
+        )
+def _detect_bom(raw_content: bytes) -> Optional[str]:
+    """
+    Detect BOM (Byte Order Mark) and return appropriate encoding.
+    Args:
+        raw_content: Raw file bytes
+    Returns:
+        Encoding name if BOM detected, None otherwise
+    """
+    if raw_content.startswith(b'\xef\xbb\xbf'):
+        return 'utf-8-sig'
+    elif raw_content.startswith(b'\xff\xfe'):
+        # Could be UTF-16 LE or UTF-32 LE, check for UTF-32
+        if len(raw_content) >= 4 and raw_content[2:4] == b'\x00\x00':
+            return None  # UTF-32 LE, not supported in common encodings
+        return 'utf-16le'
+    elif raw_content.startswith(b'\xfe\xff'):
+        return 'utf-16be'
+    elif raw_content.startswith(b'\x00\x00\xfe\xff'):
+        return None  # UTF-32 BE, not supported in common encodings
+    elif raw_content.startswith(b'\xff\xfe\x00\x00'):
+        return None  # UTF-32 LE, not supported in common encodings
+    return None
+def _normalize_content(content: str) -> str:
+    """
+    Normalize content by fixing line endings and removing BOM artifacts.
+    Args:
+        content: Decoded content string
+    Returns:
+        Normalized content string
+    """
+    # Normalize line endings to \n
+    content = content.replace('\r\n', '\n').replace('\r', '\n')
+    # Remove BOM character if present (shouldn't happen with utf-8-sig but just in case)
+    if content.startswith('\ufeff'):
+        content = content[1:]
+    return content
+def get_supported_encodings() -> List[str]:
+    """Get list of supported encodings."""
+    return ["auto"] + COMMON_ENCODINGS

infotracker/lineage.py CHANGED Viewed

@@ -10,6 +10,21 @@ from typing import Dict, List, Any, Optional
 from .models import ObjectInfo, ColumnLineage, TransformationType
+def _ns_for_dep(dep: str, default_ns: str) -> str:
+    """Determine namespace for a dependency based on its database context."""
+    d = (dep or "").strip()
+    dl = d.lower()
+    if dl.startswith("tempdb..#") or dl.startswith("#"):
+        return "mssql://localhost/tempdb"
+    parts = d.split(".")
+    db = parts[0] if len(parts) >= 3 else None
+    return f"mssql://localhost/{db}" if db else (default_ns or "mssql://localhost/InfoTrackerDW")
+def _strip_db_prefix(name: str) -> str:
+    parts = (name or "").split(".")
+    return ".".join(parts[-2:]) if len(parts) >= 2 else (name or "")
 class OpenLineageGenerator:
     """Generates OpenLineage-compliant JSON from ObjectInfo."""
@@ -26,7 +41,7 @@ class OpenLineageGenerator:
         # Build the OpenLineage event
         event = {
             "eventType": "COMPLETE",
-            "eventTime": "2025-01-01T00:00:00Z",  # Fixed timestamp for consistency
+            "eventTime": datetime.now().isoformat()[:19] + "Z",
             "run": {"runId": run_id},
             "job": {
                 "namespace": job_namespace,
@@ -52,19 +67,29 @@ class OpenLineageGenerator:
     def _build_inputs(self, obj_info: ObjectInfo) -> List[Dict[str, Any]]:
         """Build inputs array from object dependencies."""
         inputs = []
         for dep_name in sorted(obj_info.dependencies):
-            inputs.append({
-                "namespace": self.namespace,
-                "name": dep_name
-            })
+             # tempdb: stały namespace
+             if dep_name.startswith('tempdb..#'):
+                 namespace = "mssql://localhost/tempdb"
+             else:
+                 parts = dep_name.split('.')
+                 db = parts[0] if len(parts) >= 3 else None
+                 namespace = f"mssql://localhost/{db}" if db else self.namespace
+             # w name trzymaj schema.table (bez prefiksu DB)
+             name = ".".join(dep_name.split(".")[-2:]) if "." in dep_name else dep_name
+             inputs.append({"namespace": namespace, "name": name})
         return inputs
     def _build_outputs(self, obj_info: ObjectInfo) -> List[Dict[str, Any]]:
         """Build outputs array with schema and lineage facets."""
-        # Use schema's namespace if available, otherwise default namespace
-        output_namespace = obj_info.schema.namespace if obj_info.schema.namespace else self.namespace
+        # Use consistent temp table namespace
+        if obj_info.schema.name.startswith('tempdb..#'):
+            output_namespace = "mssql://localhost/tempdb"
+        else:
+            # Use schema's namespace if available, otherwise default namespace
+            output_namespace = obj_info.schema.namespace if obj_info.schema.namespace else self.namespace
         output = {
             "namespace": output_namespace,
@@ -72,9 +97,13 @@ class OpenLineageGenerator:
             "facets": {}
         }
-        # Add schema facet for all objects with known columns (tables, views, functions, procedures)
-        if obj_info.schema and obj_info.schema.columns:
-            output["facets"]["schema"] = self._build_schema_facet(obj_info)
+        # Add schema facet for tables and procedures with columns
+        # Views should only have columnLineage, not schema
+        if (obj_info.schema and obj_info.schema.columns and
+            obj_info.object_type in ['table', 'temp_table', 'procedure']):
+            schema_facet = self._build_schema_facet(obj_info)
+            if schema_facet:  # Only add if not None (fallback objects)
+                output["facets"]["schema"] = schema_facet
         # Add column lineage facet only if we have lineage (views, not tables)
         if obj_info.lineage:
@@ -82,8 +111,12 @@ class OpenLineageGenerator:
         return [output]
-    def _build_schema_facet(self, obj_info: ObjectInfo) -> Dict[str, Any]:
+    def _build_schema_facet(self, obj_info: ObjectInfo) -> Optional[Dict[str, Any]]:
         """Build schema facet from table schema."""
+        # Skip schema facet for fallback objects to match expected format
+        if getattr(obj_info, 'is_fallback', False) and obj_info.object_type not in ('table', 'temp_table'):
+            return None
         fields = []
         for col in obj_info.schema.columns:
@@ -106,8 +139,14 @@ class OpenLineageGenerator:
             input_fields = []
             for input_ref in lineage.input_fields:
+                # Use consistent temp table namespace for inputs
+                if input_ref.table_name.startswith('tempdb..#'):
+                    namespace = "mssql://localhost/tempdb"
+                else:
+                    namespace = input_ref.namespace
                 input_fields.append({
-                    "namespace": input_ref.namespace,
+                    "namespace": namespace,
                     "name": input_ref.table_name,
                     "field": input_ref.column_name
                 })
@@ -123,3 +162,94 @@ class OpenLineageGenerator:
             "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ColumnLineageDatasetFacet.json",
             "fields": fields
         }
+def emit_ol_from_object(obj: ObjectInfo, job_name: str | None = None, quality_metrics: bool = False, virtual_proc_outputs: bool = False) -> dict:
+    """Emit OpenLineage JSON directly from ObjectInfo without re-parsing."""
+    ns = obj.schema.namespace if obj.schema else "mssql://localhost/InfoTrackerDW"
+    name = obj.schema.name if obj.schema else obj.name
+    # Handle virtual procedure outputs
+    if obj.object_type == "procedure" and virtual_proc_outputs and obj.schema and obj.schema.columns:
+        name = f"procedures.{obj.name}"
+    # Build inputs from dependencies with per-dependency namespaces
+    if obj.lineage:
+        input_pairs = {
+            (f.namespace, f.table_name)
+            for ln in obj.lineage
+            for f in ln.input_fields
+            if getattr(f, "namespace", None) and getattr(f, "table_name", None)
+        }
+        if input_pairs:
+            inputs = [{"namespace": ns2, "name": nm2} for (ns2, nm2) in sorted(input_pairs)]
+        else:
+            inputs = [{"namespace": _ns_for_dep(dep, ns), "name": _strip_db_prefix(dep)}
+                      for dep in sorted(obj.dependencies)]
+    else:
+        inputs = [{"namespace": _ns_for_dep(dep, ns), "name": _strip_db_prefix(dep)}
+                  for dep in sorted(obj.dependencies)]
+    # Build output facets
+    facets = {}
+    # Add schema facet if we have columns and it's not a fallback object
+    if (obj.object_type in ('table', 'temp_table', 'procedure')
+        and obj.schema and obj.schema.columns
+        and not getattr(obj, 'is_fallback', False)):
+        facets["schema"] = {
+            "_producer": "https://github.com/OpenLineage/OpenLineage",
+            "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
+            "fields": [{"name": c.name, "type": c.data_type} for c in obj.schema.columns]
+        }
+    # Add column lineage facet if we have lineage
+    if obj.lineage:
+        lineage_fields = {}
+        for ln in obj.lineage:
+            lineage_fields[ln.output_column] = {
+                "inputFields": [
+                    {"namespace": f.namespace, "name": f.table_name, "field": f.column_name}
+                    for f in ln.input_fields
+                ],
+                "transformationType": ln.transformation_type.value,
+                "transformationDescription": ln.transformation_description
+            }
+        facets["columnLineage"] = {
+            "_producer": "https://github.com/OpenLineage/OpenLineage",
+            "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ColumnLineageDatasetFacet.json",
+            "fields": lineage_fields
+        }
+    # Add quality metrics if requested
+    if quality_metrics:
+        covered = 0
+        if obj.schema and obj.schema.columns:
+            covered = sum(1 for c in obj.schema.columns
+                         if any(ln.output_column == c.name and ln.input_fields for ln in obj.lineage))
+        facets["quality"] = {
+            "lineageCoverage": (covered / max(1, len(obj.schema.columns) if obj.schema else 1)),
+            "isFallback": bool(getattr(obj, 'is_fallback', False)),
+            "reasonCode": getattr(obj, 'no_output_reason', None)
+        }
+    # Build the complete event
+    event = {
+        "eventType": "COMPLETE",
+        "eventTime": datetime.now().isoformat()[:19] + "Z",
+        "run": {"runId": "00000000-0000-0000-0000-000000000000"},
+        "job": {
+        "namespace": "infotracker/examples",
+        "name": job_name or getattr(obj, "job_name", f"warehouse/sql/{obj.name}.sql")
+        },
+        "inputs": inputs,
+        "outputs": [{
+            "namespace": ns,
+            "name": name,
+            "facets": facets
+        }]
+    }
+    return event

InfoTracker 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

InfoTracker 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl