PyPI - InfoTracker - Versions diffs - 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

InfoTracker 0.2.6py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

infotracker/__init__.py +1 -1
infotracker/cli.py +9 -1
infotracker/engine.py +3 -2
infotracker/infotracker.yml +1 -1
infotracker/models.py +140 -45
infotracker/parser.py +53 -26
infotracker-0.3.1.dist-info/METADATA +301 -0
infotracker-0.3.1.dist-info/RECORD +16 -0
infotracker-0.2.6.dist-info/METADATA +0 -285
infotracker-0.2.6.dist-info/RECORD +0 -16
{infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/WHEEL +0 -0
{infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/entry_points.txt +0 -0

infotracker/__init__.py CHANGED Viewed

@@ -2,5 +2,5 @@ __all__ = [
     "__version__",
 ]
-__version__ = "0.1.0"
+__version__ = "0.3.1"

infotracker/cli.py CHANGED Viewed

@@ -96,6 +96,7 @@ def diff(
     base: Optional[Path] = typer.Option(None, "--base", help="Directory containing base OpenLineage artifacts"),
     head: Optional[Path] = typer.Option(None, "--head", help="Directory containing head OpenLineage artifacts"),
     format: str = typer.Option("text", "--format", help="Output format: text|json"),
+    threshold: Optional[str] = typer.Option(None, "--threshold", help="Severity threshold: NON_BREAKING|POTENTIALLY_BREAKING|BREAKING"),
 ):
     """Compare two sets of OpenLineage artifacts for breaking changes."""
     cfg: RuntimeConfig = ctx.obj["cfg"]
@@ -105,7 +106,14 @@ def diff(
         console.print("[red]ERROR: Both --base and --head directories are required[/red]")
         raise typer.Exit(1)
-    result = engine.run_diff(base, head, format)
+    # Validate threshold if provided
+    if threshold is not None:
+        valid_thresholds = ["NON_BREAKING", "POTENTIALLY_BREAKING", "BREAKING"]
+        if threshold not in valid_thresholds:
+            console.print(f"[red]ERROR: Invalid threshold '{threshold}'. Must be one of: {', '.join(valid_thresholds)}[/red]")
+            raise typer.Exit(1)
+    result = engine.run_diff(base, head, format, threshold=threshold)
     _emit(result, format)
     raise typer.Exit(code=result.get("exit_code", 0))

infotracker/engine.py CHANGED Viewed

@@ -475,6 +475,7 @@ class Engine:
             base_dir: Directory containing base OpenLineage JSON artifacts
             head_dir: Directory containing head OpenLineage JSON artifacts
             format: Output format (text|json)
+            **kwargs: Additional options including 'threshold' to override config
         Returns:
             Dict with results including exit_code (1 if breaking changes, 0 otherwise)
@@ -495,8 +496,8 @@ class Engine:
             detector = BreakingChangeDetector()
             report = detector.compare(base_objects, head_objects)
-            # Filter changes based on severity threshold from config
-            threshold = self.config.severity_threshold.upper()
+            # Use threshold from CLI flag if provided, otherwise from config
+            threshold = (kwargs.get('threshold') or self.config.severity_threshold).upper()
             filtered_changes = []
             if threshold == "BREAKING":

infotracker/infotracker.yml CHANGED Viewed

@@ -25,7 +25,7 @@ exclude:
 # Minimum severity level for breaking change detection
 # Options: NON_BREAKING, POTENTIALLY_BREAKING, BREAKING
-severity_threshold: BREAKING
+severity_threshold: NON_BREAKING
 # Objects to ignore during analysis (glob patterns)
 ignore:

infotracker/models.py CHANGED Viewed

@@ -344,50 +344,145 @@ class ColumnGraph:
     def find_columns_wildcard(self, selector: str) -> List[ColumnNode]:
-            """
-            Find columns matching a wildcard pattern.
-            Supports:
-            - Table wildcard:   <ns>.<schema>.<table>.*     → all columns of that table
-            - Column wildcard:  <optional_ns>..<pattern>    → match by COLUMN NAME only:
-                * if pattern contains any of [*?[]] → fnmatch on the column name
-                * otherwise → default to case-insensitive "contains"
-            - Fallback:         fnmatch on the full identifier "ns.schema.table.column"
-            """
-            import fnmatch as _fn
-            sel = (selector or "").strip().lower()
-            # 1) Table wildcard: "...schema.table.*"
-            if sel.endswith(".*"):
-                table_sel = sel[:-1]  # remove trailing '*', keep final dot
-                # simple prefix match on full key
-                return [node for key, node in self._nodes.items() if key.startswith(table_sel)]
-            # 2) Column wildcard: "<optional_ns>..<pattern>"
-            if ".." in sel:
-                ns_part, col_pat = sel.split("..", 1)
-                ns_part = ns_part.strip(".")
-                col_pat = col_pat.strip()
-                # if no explicit wildcard meta, treat as "contains"
-                has_meta = any(ch in col_pat for ch in "*?[]")
-                def col_name_matches(name: str) -> bool:
-                    name = (name or "").lower()
-                    if has_meta:
-                        return _fn.fnmatch(name, col_pat)
-                    return col_pat in name  # default: contains (case-insensitive)
-                if ns_part:
-                    ns_prefix = ns_part + "."
-                    return [
-                        node
-                        for key, node in self._nodes.items()
-                        if key.startswith(ns_prefix) and col_name_matches(getattr(node, "column_name", ""))
+        """
+        Find columns matching a wildcard pattern.
+        Supports:
+        - Table wildcard:   <ns>.<schema>.<table>.*     → all columns of that table
+        - Column wildcard:  <optional_ns>..<pattern>    → match by COLUMN NAME only
+        - Fallback:         fnmatch on the full identifier "ns.schema.table.column"
+        """
+        import fnmatch as _fn
+        # 1) Normalizacja i szybkie wyjścia
+        sel = (selector or "").strip()
+        low = sel.lower()
+        # Pusty/niepełny wzorzec
+        if low in {".", ".."}:
+            return []
+        if ".." in low:
+            ns_part, col_pat = low.split("..", 1)
+            if col_pat.strip() == "":
+                return []
+        # 2) Table wildcard "….*" – obsłuż W OBU wariantach (z i bez namespace)
+        if low.endswith(".*"):
+            left = sel[:-2].strip()
+            if not left:
+                return []
+            # Lokalny helper do dopasowania tabel
+            def _tbl_match(left: str, node_tbl: str) -> bool:
+                lp = (left or "").lower().split(".")
+                tp = (node_tbl or "").lower().split(".")
+                # dopasuj po końcówce: 3, 2 albo 1 segment
+                if len(lp) >= 3:
+                    return tp[-3:] == lp[-3:] or tp[-2:] == lp[-2:]
+                elif len(lp) == 2:
+                    return tp[-2:] == lp[-2:]
+                else:
+                    return tp[-1] == lp[-1] if lp else False
+            if "://" in left:
+                # Z namespace - bardziej dokładne parsowanie
+                # Format: mssql://localhost/InfoTrackerDW.STG.dbo.Orders
+                if "." in left:
+                    # Znajdź pierwszą kropkę po namespace
+                    ns_end = left.find(".")
+                    ns = left[:ns_end]
+                    table = left[ns_end + 1:]
+                    results = [
+                        node for node in self._nodes.values()
+                        if (node.namespace and node.namespace.lower().startswith(ns.lower()) and
+                            _tbl_match(table, node.table_name))
                     ]
                 else:
-                    return [node for node in self._nodes.values() if col_name_matches(getattr(node, "column_name", ""))]
-            # 3) Fallback: fnmatch on the full identifier
-            return [node for key, node in self._nodes.items() if _fn.fnmatch(key, sel)]
+                    results = []
+            else:
+                # Bez namespace
+                results = [
+                    node for node in self._nodes.values()
+                    if _tbl_match(left, node.table_name)
+                ]
+            # Deduplikacja
+            tmp = {}
+            for n in results:
+                tmp[str(n).lower()] = n
+            return list(tmp.values())
+        # 3) Column wildcard "<opcjonalny_prefix>..<column_pattern>" – dodaj semantykę CONTAINS
+        if ".." in low:
+            ns_part, col_pat = low.split("..", 1)
+            col_pat = col_pat.strip()
+            if col_pat == "":
+                return []
+            # Sprawdź czy są wildcardy
+            has_wildcards = any(ch in col_pat for ch in "*?[]")
+            def col_match(name: str) -> bool:
+                n = (name or "").lower()
+                return _fn.fnmatch(n, col_pat) if has_wildcards else (col_pat in n)
+            if ns_part:
+                ns_part = ns_part.strip(".")
+                if "://" in ns_part:
+                    # Sprawdź czy po namespace jest kropka - wtedy reszta to prefiks tabeli
+                    if "." in ns_part:
+                        # Znajdź część po pierwszej kropce po namespace jako prefiks tabeli
+                        first_dot = ns_part.find(".")
+                        table_prefix = ns_part[first_dot + 1:].lower()
+                        results = [
+                            node for node in self._nodes.values()
+                            if (node.table_name and node.table_name.lower().startswith(table_prefix) and
+                                col_match(node.column_name))
+                        ]
+                    else:
+                        # Tylko namespace, bez prefiksu tabeli
+                        results = [
+                            node for node in self._nodes.values()
+                            if (node.namespace and node.namespace.lower().startswith(ns_part) and
+                                col_match(node.column_name))
+                        ]
+                else:
+                    # Brak namespace - traktuj jako prefiks tabeli
+                    results = [
+                        node for node in self._nodes.values()
+                        if (node.table_name and node.table_name.lower().startswith(ns_part) and
+                            col_match(node.column_name))
+                    ]
+            else:
+                results = [
+                    node for node in self._nodes.values()
+                    if col_match(node.column_name)
+                ]
+            # Deduplikacja
+            tmp = {}
+            for n in results:
+                tmp[str(n).lower()] = n
+            return list(tmp.values())
+        # 4) Fallback na pełnym kluczu
+        if not any(ch in selector for ch in "*?[]"):
+            # Potraktuj jako "contains" po pełnym kluczu
+            results = [
+                node for key, node in self._nodes.items()
+                if low in key.lower()
+            ]
+        else:
+            # Są wildcardy - użyj fnmatch
+            results = [
+                node for key, node in self._nodes.items()
+                if _fn.fnmatch(key.lower(), low)
+            ]
+        # Deduplikacja
+        tmp = {}
+        for n in results:
+            tmp[str(n).lower()] = n
+        return list(tmp.values())

infotracker/parser.py CHANGED Viewed

@@ -26,6 +26,15 @@ class SqlParser:
         self.schema_registry = SchemaRegistry()
         self.default_database: Optional[str] = None  # Will be set from config
+    def _clean_proc_name(self, s: str) -> str:
+        """Clean procedure name by removing semicolons and parameters."""
+        return s.strip().rstrip(';').split('(')[0].strip()
+    def _normalize_table_ident(self, s: str) -> str:
+        """Remove brackets and normalize table identifier."""
+        import re
+        return re.sub(r'[\[\]]', '', s)
     def set_default_database(self, default_database: Optional[str]):
         """Set the default database for qualification."""
         self.default_database = default_database
@@ -51,6 +60,10 @@ class SqlParser:
                 re.match(r'(?i)^DROP\s+TABLE\s+#\w+', stripped_line)):
                 continue
+            # Skip GO statements (SQL Server batch separator)
+            if re.match(r'(?im)^\s*GO\s*$', stripped_line):
+                continue
             processed_lines.append(line)
         # Join the lines back together
@@ -67,27 +80,34 @@ class SqlParser:
     def _try_insert_exec_fallback(self, sql_content: str, object_hint: Optional[str] = None) -> Optional[ObjectInfo]:
         """
-        Fallback parser for INSERT INTO #temp EXEC pattern when SQLGlot fails.
+        Fallback parser for INSERT INTO ... EXEC pattern when SQLGlot fails.
+        Handles both temp tables and regular tables.
         """
         import re
-        # Look for INSERT INTO #temp EXEC pattern
-        pattern = r'(?is)INSERT\s+INTO\s+(#\w+)\s+EXEC\s+([^\s(]+)'
-        match = re.search(pattern, sql_content)
+        # Get preprocessed SQL
+        sql_pre = self._preprocess_sql(sql_content)
+        # Look for INSERT INTO ... EXEC pattern (both temp and regular tables)
+        pattern = r'(?is)INSERT\s+INTO\s+([#\[\]\w.]+)\s+EXEC\s+([^\s(;]+)'
+        match = re.search(pattern, sql_pre)
         if not match:
             return None
-        temp_table = match.group(1)  # e.g., "#customer_metrics"
-        proc_name = match.group(2)   # e.g., "dbo.usp_customer_metrics_dataset"
+        raw_table = match.group(1)
+        raw_proc = match.group(2)
-        # Qualify procedure name if needed
-        if '.' not in proc_name and self.default_database:
-            qualified_proc_name = f"{self.default_database}.dbo.{proc_name}"
-        else:
-            qualified_proc_name = proc_name
+        # Clean and normalize names
+        table_name = self._normalize_table_ident(raw_table)
+        proc_name = self._clean_proc_name(raw_proc)
+        # Determine if it's a temp table
+        is_temp = table_name.startswith('#')
+        namespace = "tempdb" if is_temp else "mssql://localhost/InfoTrackerDW"
+        object_type = "temp_table" if is_temp else "table"
-        # Create placeholder columns for the temp table
+        # Create placeholder columns
         placeholder_columns = [
             ColumnSchema(
                 name="output_col_1",
@@ -103,10 +123,10 @@ class SqlParser:
             )
         ]
-        # Create schema for temp table
+        # Create schema
         schema = TableSchema(
-            namespace="tempdb",
-            name=temp_table,
+            namespace=namespace,
+            name=table_name,
             columns=placeholder_columns
         )
@@ -118,24 +138,24 @@ class SqlParser:
                 input_fields=[
                     ColumnReference(
                         namespace="mssql://localhost/InfoTrackerDW",
-                        table_name=qualified_proc_name,
+                        table_name=proc_name,  # Clean procedure name without semicolons
                         column_name="*"
                     )
                 ],
                 transformation_type=TransformationType.EXEC,
-                transformation_description=f"INSERT INTO {temp_table} EXEC {proc_name}"
+                transformation_description=f"INSERT INTO {table_name} EXEC {proc_name}"
             ))
-        # Set dependencies to the procedure
-        dependencies = {qualified_proc_name}
+        # Set dependencies to the clean procedure name
+        dependencies = {proc_name}
         # Register schema in registry
         self.schema_registry.register(schema)
-        # Create and return ObjectInfo
+        # Create and return ObjectInfo with table_name as name (not object_hint)
         return ObjectInfo(
-            name=temp_table,
-            object_type="temp_table",
+            name=table_name,
+            object_type=object_type,
             schema=schema,
             lineage=lineage,
             dependencies=dependencies
@@ -283,7 +303,7 @@ class SqlParser:
                 # Extract procedure name (first identifier after EXEC)
                 parts = exec_text.split()
                 if len(parts) > 1:
-                    procedure_name = parts[1].strip('()').split('(')[0]
+                    procedure_name = self._clean_proc_name(parts[1])
                     dependencies.add(procedure_name)
             # For EXEC temp tables, we create placeholder columns since we can't determine
@@ -615,7 +635,9 @@ class SqlParser:
         select_stmt = stmt
-        if not select_stmt.expressions:
+        # Try to get projections with fallback
+        projections = list(getattr(select_stmt, 'expressions', None) or [])
+        if not projections:
             return lineage, output_columns
         # Handle star expansion first
@@ -627,7 +649,7 @@ class SqlParser:
             return self._handle_union_lineage(select_stmt, view_name)
         # Standard column-by-column processing
-        for i, select_expr in enumerate(select_stmt.expressions):
+        for i, select_expr in enumerate(projections):
             if isinstance(select_expr, exp.Alias):
                 # Aliased column: SELECT column AS alias
                 output_name = str(select_expr.alias)
@@ -641,10 +663,15 @@ class SqlParser:
                     output_name = str(select_expr)
                 source_expr = select_expr
+            # Determine data type for ColumnSchema
+            data_type = "unknown"
+            if isinstance(source_expr, exp.Cast):
+                data_type = str(source_expr.to).upper()
             # Create output column schema
             output_columns.append(ColumnSchema(
                 name=output_name,
-                data_type="unknown",  # Would need type inference
+                data_type=data_type,
                 nullable=True,
                 ordinal=i
             ))

infotracker-0.3.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,301 @@
+Metadata-Version: 2.4
+Name: InfoTracker
+Version: 0.3.1
+Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
+Project-URL: homepage, https://example.com/infotracker
+Project-URL: documentation, https://example.com/infotracker/docs
+Author: InfoTracker Authors
+License: MIT
+Keywords: data-lineage,impact-analysis,lineage,mssql,openlineage,sql
+Classifier: Environment :: Console
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Topic :: Database
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.10
+Requires-Dist: click
+Requires-Dist: networkx>=3.3
+Requires-Dist: packaging>=24.0
+Requires-Dist: pydantic>=2.8.2
+Requires-Dist: pyyaml>=6.0.1
+Requires-Dist: rich
+Requires-Dist: shellingham
+Requires-Dist: sqlglot>=23.0.0
+Requires-Dist: typer
+Provides-Extra: dev
+Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
+Requires-Dist: pytest>=7.4.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# InfoTracker
+**Column-level SQL lineage extraction and impact analysis for MS SQL Server**
+InfoTracker is a powerful command-line tool that parses T-SQL files and generates detailed column-level lineage in OpenLineage format. It supports advanced SQL Server features including table-valued functions, stored procedures, temp tables, and EXEC patterns.
+[![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![PyPI](https://img.shields.io/badge/PyPI-InfoTracker-blue.svg)](https://pypi.org/project/InfoTracker/)
+## 🚀 Features
+- **Column-level lineage** - Track data flow at the column level with precise transformations
+- **Advanced SQL support** - T-SQL dialect with temp tables, variables, CTEs, and window functions
+- **Impact analysis** - Find upstream and downstream dependencies with flexible selectors
+- **Wildcard matching** - Support for table wildcards (`schema.table.*`) and column wildcards (`..pattern`)
+- **Breaking change detection** - Detect schema changes that could break downstream processes
+- **Multiple output formats** - Text tables or JSON for integration with other tools
+- **OpenLineage compatible** - Standard format for data lineage interoperability
+- **Advanced SQL objects** - Table-valued functions (TVF) and dataset-returning procedures
+- **Temp table tracking** - Full lineage through EXEC into temp tables
+## 📦 Installation
+### From PyPI (Recommended)
+```bash
+pip install InfoTracker
+```
+### From GitHub
+```bash
+# Latest stable release
+pip install git+https://github.com/InfoMatePL/InfoTracker.git
+# Development version
+git clone https://github.com/InfoMatePL/InfoTracker.git
+cd InfoTracker
+pip install -e .
+```
+### Verify Installation
+```bash
+infotracker --help
+```
+## ⚡ Quick Start
+### 1. Extract Lineage
+```bash
+# Extract lineage from SQL files
+infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
+```
+### 2. Run Impact Analysis
+```bash
+# Find what feeds into a column (upstream)
+infotracker impact -s "+STG.dbo.Orders.OrderID"
+# Find what uses a column (downstream)
+infotracker impact -s "STG.dbo.Orders.OrderID+"
+# Both directions
+infotracker impact -s "+dbo.fct_sales.Revenue+"
+```
+### 3. Detect Breaking Changes
+```bash
+# Compare two versions of your schema
+infotracker diff --base build/lineage --head build/lineage_new
+```
+## 📖 Selector Syntax
+InfoTracker supports flexible column selectors for precise impact analysis:
+| Selector Format | Description | Example |
+|-----------------|-------------|---------|
+| `table.column` | Simple format (adds default `dbo` schema) | `Orders.OrderID` |
+| `schema.table.column` | Schema-qualified format | `dbo.Orders.OrderID` |
+| `database.schema.table.column` | Database-qualified format | `STG.dbo.Orders.OrderID` |
+| `schema.table.*` | Table wildcard (all columns) | `dbo.fct_sales.*` |
+| `..pattern` | Column wildcard (name contains pattern) | `..revenue` |
+| `..pattern*` | Column wildcard with fnmatch | `..customer*` |
+### Direction Control
+- `selector` - downstream dependencies (default)
+- `+selector` - upstream sources
+- `selector+` - downstream dependencies (explicit)
+- `+selector+` - both upstream and downstream
+## 💡 Examples
+### Basic Usage
+```bash
+# Extract lineage first (always run this before impact analysis)
+infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
+# Basic column lineage
+infotracker impact -s "+dbo.fct_sales.Revenue"        # What feeds this column?
+infotracker impact -s "STG.dbo.Orders.OrderID+"      # What uses this column?
+```
+### Wildcard Selectors
+```bash
+# All columns from a specific table
+infotracker impact -s "dbo.fct_sales.*"
+infotracker impact -s "STG.dbo.Orders.*"
+# Find all columns containing "revenue" (case-insensitive)
+infotracker impact -s "..revenue"
+# Find all columns starting with "customer"
+infotracker impact -s "..customer*"
+```
+### Advanced SQL Objects
+```bash
+# Table-valued function columns (upstream)
+infotracker impact -s "+dbo.fn_customer_orders_tvf.*"
+# Procedure dataset columns (upstream)
+infotracker impact -s "+dbo.usp_customer_metrics_dataset.*"
+# Temp table lineage from EXEC
+infotracker impact -s "+#temp_table.*"
+```
+### Output Formats
+```bash
+# Text output (default, human-readable)
+infotracker impact -s "+..revenue"
+# JSON output (machine-readable)
+infotracker --format json impact -s "..customer*" > customer_lineage.json
+# Control traversal depth
+infotracker impact -s "+dbo.Orders.OrderID" --max-depth 2
+```
+### Breaking Change Detection
+```bash
+# Extract baseline
+infotracker extract --sql-dir sql_v1 --out-dir build/baseline
+# Extract new version
+infotracker extract --sql-dir sql_v2 --out-dir build/current
+# Detect breaking changes
+infotracker diff --base build/baseline --head build/current
+# Filter by severity
+infotracker diff --base build/baseline --head build/current --threshold BREAKING
+```
+## Output Format
+Impact analysis returns these columns:
+- **from** - Source column (fully qualified)
+- **to** - Target column (fully qualified)
+- **direction** - `upstream` or `downstream`
+- **transformation** - Type of transformation (`IDENTITY`, `ARITHMETIC`, `AGGREGATION`, `CASE_AGGREGATION`, `DATE_FUNCTION`, `WINDOW`, etc.)
+- **description** - Human-readable transformation description
+Results are automatically deduplicated. Use `--format json` for machine-readable output.
+### New Transformation Types
+The enhanced transformation taxonomy includes:
+- `ARITHMETIC_AGGREGATION` - Arithmetic operations combined with aggregation functions
+- `COMPLEX_AGGREGATION` - Multi-step calculations involving multiple aggregations
+- `DATE_FUNCTION` - Date/time calculations like DATEDIFF, DATEADD
+- `DATE_FUNCTION_AGGREGATION` - Date functions applied to aggregated results
+- `CASE_AGGREGATION` - CASE statements applied to aggregated results
+### Advanced Object Support
+InfoTracker now supports advanced SQL Server objects:
+**Table-Valued Functions (TVF):**
+- Inline TVF (`RETURN AS SELECT`) - Parsed directly from SELECT statement
+- Multi-statement TVF (`RETURN @table TABLE`) - Extracts schema from table variable definition
+- Function parameters are tracked as filter metadata (don't create columns)
+**Dataset-Returning Procedures:**
+- Procedures ending with SELECT statement are treated as dataset sources
+- Output schema extracted from the final SELECT statement
+- Parameters tracked as filter metadata affecting lineage scope
+**EXEC into Temp Tables:**
+- `INSERT INTO #temp EXEC procedure` patterns create edges from procedure columns to temp table columns
+- Temp table lineage propagates downstream to final targets
+- Supports complex workflow patterns combining functions, procedures, and temp tables
+## Configuration
+InfoTracker follows this configuration precedence:
+1. **CLI flags** (highest priority) - override everything
+2. **infotracker.yml** config file - project defaults
+3. **Built-in defaults** (lowest priority) - fallback values
+## 🔧 Configuration
+Create an `infotracker.yml` file in your project root:
+```yaml
+sql_dirs:
+  - "sql/"
+  - "models/"
+out_dir: "build/lineage"
+exclude_dirs:
+  - "__pycache__"
+  - ".git"
+severity_threshold: "POTENTIALLY_BREAKING"
+```
+### Configuration Options
+| Setting | Description | Default | Examples |
+|---------|-------------|---------|----------|
+| `sql_dirs` | Directories to scan for SQL files | `["."]` | `["sql/", "models/"]` |
+| `out_dir` | Output directory for lineage files | `"lineage"` | `"build/artifacts"` |
+| `exclude_dirs` | Directories to skip | `[]` | `["__pycache__", "node_modules"]` |
+| `severity_threshold` | Breaking change detection level | `"NON_BREAKING"` | `"BREAKING"` |
+## 📚 Documentation
+- **[Architecture](docs/architecture.md)** - Core concepts and design
+- **[Lineage Concepts](docs/lineage_concepts.md)** - Data lineage fundamentals
+- **[CLI Usage](docs/cli_usage.md)** - Complete command reference
+- **[Configuration](docs/configuration.md)** - Advanced configuration options
+- **[DBT Integration](docs/dbt_integration.md)** - Using with DBT projects
+- **[OpenLineage Mapping](docs/openlineage_mapping.md)** - Output format specification
+- **[Breaking Changes](docs/breaking_changes.md)** - Change detection and severity levels
+- **[Advanced Use Cases](docs/advanced_use_cases.md)** - TVFs, stored procedures, and complex scenarios
+- **[Edge Cases](docs/edge_cases.md)** - SELECT *, UNION, temp tables handling
+- **[FAQ](docs/faq.md)** - Common questions and troubleshooting
+## 🧪 Testing
+```bash
+# Run all tests
+pytest
+# Run specific test categories
+pytest tests/test_parser.py     # Parser functionality
+pytest tests/test_wildcard.py   # Wildcard selectors
+pytest tests/test_adapter.py    # SQL dialect adapters
+# Run with coverage
+pytest --cov=infotracker --cov-report=html
+```
+## 📄 License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## 🙏 Acknowledgments
+- [SQLGlot](https://github.com/tobymao/sqlglot) - SQL parsing library
+- [OpenLineage](https://openlineage.io/) - Data lineage standard
+- [Typer](https://typer.tiangolo.com/) - CLI framework
+- [Rich](https://rich.readthedocs.io/) - Terminal formatting
+---
+**InfoTracker** - Making database schema evolution safer, one column at a time. 🎯

infotracker-0.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+infotracker/__init__.py,sha256=TU6dd-1zoswGqK5zIl_o01msZ-pQGxHJlynPUYSYwXY,57
+infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
+infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
+infotracker/cli.py,sha256=Hvid6PuMcygUj4Uxor4iBD5OLkfz_LJ249V0UZpwk8A,6181
+infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
+infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
+infotracker/engine.py,sha256=QhBSSIE0yusHE2jHlsyTu7GG89tRy1BuJ4dG2bPS_Nw,23560
+infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
+infotracker/models.py,sha256=d7EIjOm3evI8YekQWgLE0L1cWiOcU0F34-XdqxBkcTk,18332
+infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
+infotracker/parser.py,sha256=-zz_bmc4Rkb-hT_eDIvvpWxFtdyGFMKcRun9raNX4AY,71335
+infotracker/infotracker.yml,sha256=iRrrrUkdLCvEhw4DHqPnMchDlsJWI3xIJEpwevNU9sg,998
+infotracker-0.3.1.dist-info/METADATA,sha256=dLhABRKb7FaHcmCW0HTwYZnJHlbIZHMHIqSD-sy7KM4,10487
+infotracker-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+infotracker-0.3.1.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
+infotracker-0.3.1.dist-info/RECORD,,

infotracker-0.2.6.dist-info/METADATA DELETED Viewed

@@ -1,285 +0,0 @@
-Metadata-Version: 2.4
-Name: InfoTracker
-Version: 0.2.6
-Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
-Project-URL: homepage, https://example.com/infotracker
-Project-URL: documentation, https://example.com/infotracker/docs
-Author: InfoTracker Authors
-License: MIT
-Keywords: data-lineage,impact-analysis,lineage,mssql,openlineage,sql
-Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Topic :: Database
-Classifier: Topic :: Software Development :: Libraries
-Requires-Python: >=3.10
-Requires-Dist: click
-Requires-Dist: networkx>=3.3
-Requires-Dist: packaging>=24.0
-Requires-Dist: pydantic>=2.8.2
-Requires-Dist: pyyaml>=6.0.1
-Requires-Dist: rich
-Requires-Dist: shellingham
-Requires-Dist: sqlglot>=23.0.0
-Requires-Dist: typer
-Provides-Extra: dev
-Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
-Requires-Dist: pytest>=7.4.0; extra == 'dev'
-Description-Content-Type: text/markdown
-# InfoTracker
-Column-level SQL lineage extraction and impact analysis for MS SQL Server
-## Features
-- **Column-level lineage** - Track data flow at the column level
-- **Parse SQL files** and generate OpenLineage-compatible JSON
-- **Impact analysis** - Find upstream and downstream column dependencies with flexible selectors
-- **Wildcard matching** - Support for table wildcards (`schema.table.*`) and column wildcards (`..pattern`)
-- **Direction control** - Query upstream (`+selector`), downstream (`selector+`), or both (`+selector+`)
-- **Configurable depth** - Control traversal depth with `--max-depth`
-- **Multiple output formats** - Text tables or JSON for scripting
-- **MSSQL support** - T-SQL dialect with temp tables, variables, and stored procedures
-- **Advanced SQL objects** - Support for table-valued functions (TVF) and dataset-returning procedures
-- **Temp table lineage** - Track EXEC into temp tables and propagate lineage downstream
-## Requirements
-- Python 3.10+
-- Virtual environment (activated)
-- Basic SQL knowledge
-- Git and shell
-## Troubleshooting
-- **Error tracebacks on help commands**: Make sure you're running in an activated virtual environment
-- **Command not found**: Activate your virtual environment first
-- **Import errors**: Ensure all dependencies are installed with `pip install -e .`
-- **Column not found**: Use full URI format or check column_graph.json for exact names
-## Quickstart
-### Setup & Installation
-```bash
-# Activate virtual environment first (REQUIRED)
-# Install dependencies
-pip install -e .
-# Verify installation
-infotracker --help
-```
-### Basic Usage
-```bash
-# 1. Extract lineage from SQL files (builds column graph)
-infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
-# 2. Run impact analysis
-infotracker impact -s "STG.dbo.Orders.OrderID"  # downstream dependencies
-infotracker impact -s "+STG.dbo.Orders.OrderID" # upstream sources
-```
-## Selector Syntax
-InfoTracker supports flexible column selectors:
-| Selector Format | Description | Example |
-|-----------------|-------------|---------|
-| `table.column` | Simple format (adds default `dbo` schema) | `Orders.OrderID` |
-| `schema.table.column` | Schema-qualified format | `dbo.Orders.OrderID` |
-| `database.schema.table.column` | Database-qualified format | `STG.dbo.Orders.OrderID` |
-| `schema.table.*` | Table wildcard (all columns) | `dbo.fct_sales.*` |
-| `..pattern` | Column wildcard (name contains pattern) | `..revenue` |
-| `.pattern` | Alias for column wildcard | `.orderid` |
-| Full URI | Complete namespace format | `mssql://localhost/InfoTrackerDW.STG.dbo.Orders.OrderID` |
-### Direction Control
-- `selector` - downstream dependencies (default)
-- `+selector` - upstream sources
-- `selector+` - downstream dependencies (explicit)
-- `+selector+` - both upstream and downstream
-### Selector Cheat Sheet
-**Table wildcards:**
-```bash
-# All columns from a specific table
-infotracker impact -s "dbo.fct_sales.*"
-infotracker impact -s "STG.dbo.Orders.*"
-```
-**Column name matching:**
-```bash
-# Find all columns containing "revenue" (case-insensitive)
-infotracker impact -s "..revenue"
-# Find all columns containing "id"
-infotracker impact -s "..id"
-# Use wildcards for pattern matching
-infotracker impact -s "..customer*"
-```
-**Direction examples:**
-```bash
-# Upstream: what feeds into this column
-infotracker impact -s "+dbo.fct_sales.Revenue"
-# Downstream: what uses this column
-infotracker impact -s "STG.dbo.Orders.OrderID+"
-# Both directions
-infotracker impact -s "+dbo.dim_customer.CustomerID+"
-```
-**Advanced SQL objects:**
-```bash
-# Table-valued function columns (upstream)
-infotracker impact -s "+dbo.fn_customer_orders_tvf.*"
-# Procedure dataset columns (upstream)
-infotracker impact -s "+dbo.usp_customer_metrics_dataset.*"
-# Temp table lineage from EXEC
-infotracker impact -s "+#temp_table.*"
-```
-## Examples
-```bash
-# Extract lineage (run this first)
-infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
-# Basic column lineage
-infotracker impact -s "+dbo.fct_sales.Revenue"        # upstream sources
-infotracker impact -s "STG.dbo.Orders.OrderID+"      # downstream usage
-# Wildcard selectors
-infotracker impact -s "+..revenue+"                   # all revenue columns (both directions)
-infotracker impact -s "dbo.fct_sales.*"              # all columns from table
-infotracker --format json impact -s "..customer*"     # customer columns (JSON output)
-# Advanced SQL objects (NEW)
-infotracker impact -s "+dbo.fn_customer_orders_tvf.*"      # TVF columns (upstream)
-infotracker impact -s "+dbo.usp_customer_metrics_dataset.*" # procedure columns (upstream)
-# Depth control
-infotracker impact -s "+dbo.Orders.OrderID" --max-depth 1
-# Demo the new features with the included examples
-infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
-infotracker impact -s "+dbo.fn_customer_orders_inline.*"
-infotracker impact -s "+dbo.usp_customer_metrics_dataset.TotalRevenue"
-```
-### Copy-Paste Demo Commands
-Test the new TVF and procedure lineage features:
-```bash
-# 1. Extract all lineage (including new TVF/procedure support)
-infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
-# 2. Test TVF lineage
-infotracker --format text impact -s "+dbo.fn_customer_orders_tvf.*"
-# 3. Test procedure lineage
-infotracker --format text impact -s "+dbo.usp_customer_metrics_dataset.*"
-# 4. Test column name contains wildcard
-infotracker --format text impact -s "+..revenue"
-# 5. Show results in JSON format
-infotracker --format json impact -s "..total*" > tvf_lineage.json
-```
-## Output Format
-Impact analysis returns these columns:
-- **from** - Source column (fully qualified)
-- **to** - Target column (fully qualified)
-- **direction** - `upstream` or `downstream`
-- **transformation** - Type of transformation (`IDENTITY`, `ARITHMETIC`, `AGGREGATION`, `CASE_AGGREGATION`, `DATE_FUNCTION`, `WINDOW`, etc.)
-- **description** - Human-readable transformation description
-Results are automatically deduplicated. Use `--format json` for machine-readable output.
-### New Transformation Types
-The enhanced transformation taxonomy includes:
-- `ARITHMETIC_AGGREGATION` - Arithmetic operations combined with aggregation functions
-- `COMPLEX_AGGREGATION` - Multi-step calculations involving multiple aggregations
-- `DATE_FUNCTION` - Date/time calculations like DATEDIFF, DATEADD
-- `DATE_FUNCTION_AGGREGATION` - Date functions applied to aggregated results
-- `CASE_AGGREGATION` - CASE statements applied to aggregated results
-### Advanced Object Support
-InfoTracker now supports advanced SQL Server objects:
-**Table-Valued Functions (TVF):**
-- Inline TVF (`RETURN AS SELECT`) - Parsed directly from SELECT statement
-- Multi-statement TVF (`RETURN @table TABLE`) - Extracts schema from table variable definition
-- Function parameters are tracked as filter metadata (don't create columns)
-**Dataset-Returning Procedures:**
-- Procedures ending with SELECT statement are treated as dataset sources
-- Output schema extracted from the final SELECT statement
-- Parameters tracked as filter metadata affecting lineage scope
-**EXEC into Temp Tables:**
-- `INSERT INTO #temp EXEC procedure` patterns create edges from procedure columns to temp table columns
-- Temp table lineage propagates downstream to final targets
-- Supports complex workflow patterns combining functions, procedures, and temp tables
-## Configuration
-InfoTracker follows this configuration precedence:
-1. **CLI flags** (highest priority) - override everything
-2. **infotracker.yml** config file - project defaults
-3. **Built-in defaults** (lowest priority) - fallback values
-Create an `infotracker.yml` file in your project root:
-```yaml
-default_adapter: mssql
-sql_dir: examples/warehouse/sql
-out_dir: build/lineage
-include: ["*.sql"]
-exclude: ["*_wip.sql"]
-```
-## Documentation
-For detailed information:
-- `docs/overview.md` — what it is, goals, scope
-- `docs/algorithm.md` — how extraction works
-- `docs/lineage_concepts.md` — core concepts with visuals
-- `docs/cli_usage.md` — commands and options
-- `docs/breaking_changes.md` — definition and detection
-- `docs/edge_cases.md` — SELECT *, UNION, temp tables, etc.
-- `docs/adapters.md` — interface and MSSQL specifics
-- `docs/architecture.md` — system and sequence diagrams
-- `docs/configuration.md` — configuration reference
-- `docs/openlineage_mapping.md` — how outputs map to OpenLineage
-- `docs/faq.md` — common questions
-#### Documentation
-- `docs/overview.md` — what it is, goals, scope
-- `docs/algorithm.md` — how extraction works
-- `docs/lineage_concepts.md` — core concepts with visuals
-- `docs/cli_usage.md` — commands and options
-- `docs/breaking_changes.md` — definition and detection
-- `docs/edge_cases.md` — SELECT *, UNION, temp tables, etc.
-- `docs/advanced_use_cases.md` — tabular functions, procedures returning datasets
-- `docs/adapters.md` — interface and MSSQL specifics
-- `docs/architecture.md` — system and sequence diagrams
-- `docs/configuration.md` — configuration reference
-- `docs/openlineage_mapping.md` — how outputs map to OpenLineage
-- `docs/faq.md` — common questions
-- `docs/dbt_integration.md` — how to use with dbt projects
-## License
-MIT (or your team’s preferred license)

infotracker-0.2.6.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
-infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
-infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
-infotracker/cli.py,sha256=PQQoxqSmu8fSFTeGCdLKIKiY7WTcCzddiANYGc1qqe8,5666
-infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
-infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
-infotracker/engine.py,sha256=JlsrzPoB4Xe4qnTrEZ7emYP0K-zkqTqYOGzZiEZesks,23441
-infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
-infotracker/models.py,sha256=aQwU_4V69CnnHdgsybd99uvE3fzoQoW-nwn5aMhxdbU,14796
-infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
-infotracker/parser.py,sha256=8NVtCMvyt7l_dIfAydR_VJGB7A_NBLb2T827ac8uMXc,70255
-infotracker/infotracker.yml,sha256=iTVS246TS4DWLwN-vMiLHPbgDegjGIEpYF5UaL_lTd0,994
-infotracker-0.2.6.dist-info/METADATA,sha256=Ukx6UAXLMs8kAEiRzWNagDVRP2LRMTfeuNN7byn3nqM,10449
-infotracker-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-infotracker-0.2.6.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
-infotracker-0.2.6.dist-info/RECORD,,

{infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

InfoTracker 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl

InfoTracker 0.2.6py3-none-any.whl → 0.3.1py3-none-any.whl