PyPI - InfoTracker - Versions diffs - 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

InfoTracker 0.2.6py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

infotracker/cli.py +9 -1
infotracker/engine.py +3 -2
infotracker/infotracker.yml +1 -1
infotracker/models.py +140 -45
infotracker/parser.py +53 -26
{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/METADATA +1 -1
infotracker-0.3.0.dist-info/RECORD +16 -0
infotracker-0.2.6.dist-info/RECORD +0 -16
{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/WHEEL +0 -0
{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/entry_points.txt +0 -0

infotracker/cli.py CHANGED Viewed

@@ -96,6 +96,7 @@ def diff(
     base: Optional[Path] = typer.Option(None, "--base", help="Directory containing base OpenLineage artifacts"),
     head: Optional[Path] = typer.Option(None, "--head", help="Directory containing head OpenLineage artifacts"),
     format: str = typer.Option("text", "--format", help="Output format: text|json"),
+    threshold: Optional[str] = typer.Option(None, "--threshold", help="Severity threshold: NON_BREAKING|POTENTIALLY_BREAKING|BREAKING"),
 ):
     """Compare two sets of OpenLineage artifacts for breaking changes."""
     cfg: RuntimeConfig = ctx.obj["cfg"]
@@ -105,7 +106,14 @@ def diff(
         console.print("[red]ERROR: Both --base and --head directories are required[/red]")
         raise typer.Exit(1)
-    result = engine.run_diff(base, head, format)
+    # Validate threshold if provided
+    if threshold is not None:
+        valid_thresholds = ["NON_BREAKING", "POTENTIALLY_BREAKING", "BREAKING"]
+        if threshold not in valid_thresholds:
+            console.print(f"[red]ERROR: Invalid threshold '{threshold}'. Must be one of: {', '.join(valid_thresholds)}[/red]")
+            raise typer.Exit(1)
+    result = engine.run_diff(base, head, format, threshold=threshold)
     _emit(result, format)
     raise typer.Exit(code=result.get("exit_code", 0))

infotracker/engine.py CHANGED Viewed

@@ -475,6 +475,7 @@ class Engine:
             base_dir: Directory containing base OpenLineage JSON artifacts
             head_dir: Directory containing head OpenLineage JSON artifacts
             format: Output format (text|json)
+            **kwargs: Additional options including 'threshold' to override config
         Returns:
             Dict with results including exit_code (1 if breaking changes, 0 otherwise)
@@ -495,8 +496,8 @@ class Engine:
             detector = BreakingChangeDetector()
             report = detector.compare(base_objects, head_objects)
-            # Filter changes based on severity threshold from config
-            threshold = self.config.severity_threshold.upper()
+            # Use threshold from CLI flag if provided, otherwise from config
+            threshold = (kwargs.get('threshold') or self.config.severity_threshold).upper()
             filtered_changes = []
             if threshold == "BREAKING":

infotracker/infotracker.yml CHANGED Viewed

@@ -25,7 +25,7 @@ exclude:
 # Minimum severity level for breaking change detection
 # Options: NON_BREAKING, POTENTIALLY_BREAKING, BREAKING
-severity_threshold: BREAKING
+severity_threshold: NON_BREAKING
 # Objects to ignore during analysis (glob patterns)
 ignore:

infotracker/models.py CHANGED Viewed

@@ -344,50 +344,145 @@ class ColumnGraph:
     def find_columns_wildcard(self, selector: str) -> List[ColumnNode]:
-            """
-            Find columns matching a wildcard pattern.
-            Supports:
-            - Table wildcard:   <ns>.<schema>.<table>.*     → all columns of that table
-            - Column wildcard:  <optional_ns>..<pattern>    → match by COLUMN NAME only:
-                * if pattern contains any of [*?[]] → fnmatch on the column name
-                * otherwise → default to case-insensitive "contains"
-            - Fallback:         fnmatch on the full identifier "ns.schema.table.column"
-            """
-            import fnmatch as _fn
-            sel = (selector or "").strip().lower()
-            # 1) Table wildcard: "...schema.table.*"
-            if sel.endswith(".*"):
-                table_sel = sel[:-1]  # remove trailing '*', keep final dot
-                # simple prefix match on full key
-                return [node for key, node in self._nodes.items() if key.startswith(table_sel)]
-            # 2) Column wildcard: "<optional_ns>..<pattern>"
-            if ".." in sel:
-                ns_part, col_pat = sel.split("..", 1)
-                ns_part = ns_part.strip(".")
-                col_pat = col_pat.strip()
-                # if no explicit wildcard meta, treat as "contains"
-                has_meta = any(ch in col_pat for ch in "*?[]")
-                def col_name_matches(name: str) -> bool:
-                    name = (name or "").lower()
-                    if has_meta:
-                        return _fn.fnmatch(name, col_pat)
-                    return col_pat in name  # default: contains (case-insensitive)
-                if ns_part:
-                    ns_prefix = ns_part + "."
-                    return [
-                        node
-                        for key, node in self._nodes.items()
-                        if key.startswith(ns_prefix) and col_name_matches(getattr(node, "column_name", ""))
+        """
+        Find columns matching a wildcard pattern.
+        Supports:
+        - Table wildcard:   <ns>.<schema>.<table>.*     → all columns of that table
+        - Column wildcard:  <optional_ns>..<pattern>    → match by COLUMN NAME only
+        - Fallback:         fnmatch on the full identifier "ns.schema.table.column"
+        """
+        import fnmatch as _fn
+        # 1) Normalizacja i szybkie wyjścia
+        sel = (selector or "").strip()
+        low = sel.lower()
+        # Pusty/niepełny wzorzec
+        if low in {".", ".."}:
+            return []
+        if ".." in low:
+            ns_part, col_pat = low.split("..", 1)
+            if col_pat.strip() == "":
+                return []
+        # 2) Table wildcard "….*" – obsłuż W OBU wariantach (z i bez namespace)
+        if low.endswith(".*"):
+            left = sel[:-2].strip()
+            if not left:
+                return []
+            # Lokalny helper do dopasowania tabel
+            def _tbl_match(left: str, node_tbl: str) -> bool:
+                lp = (left or "").lower().split(".")
+                tp = (node_tbl or "").lower().split(".")
+                # dopasuj po końcówce: 3, 2 albo 1 segment
+                if len(lp) >= 3:
+                    return tp[-3:] == lp[-3:] or tp[-2:] == lp[-2:]
+                elif len(lp) == 2:
+                    return tp[-2:] == lp[-2:]
+                else:
+                    return tp[-1] == lp[-1] if lp else False
+            if "://" in left:
+                # Z namespace - bardziej dokładne parsowanie
+                # Format: mssql://localhost/InfoTrackerDW.STG.dbo.Orders
+                if "." in left:
+                    # Znajdź pierwszą kropkę po namespace
+                    ns_end = left.find(".")
+                    ns = left[:ns_end]
+                    table = left[ns_end + 1:]
+                    results = [
+                        node for node in self._nodes.values()
+                        if (node.namespace and node.namespace.lower().startswith(ns.lower()) and
+                            _tbl_match(table, node.table_name))
                     ]
                 else:
-                    return [node for node in self._nodes.values() if col_name_matches(getattr(node, "column_name", ""))]
-            # 3) Fallback: fnmatch on the full identifier
-            return [node for key, node in self._nodes.items() if _fn.fnmatch(key, sel)]
+                    results = []
+            else:
+                # Bez namespace
+                results = [
+                    node for node in self._nodes.values()
+                    if _tbl_match(left, node.table_name)
+                ]
+            # Deduplikacja
+            tmp = {}
+            for n in results:
+                tmp[str(n).lower()] = n
+            return list(tmp.values())
+        # 3) Column wildcard "<opcjonalny_prefix>..<column_pattern>" – dodaj semantykę CONTAINS
+        if ".." in low:
+            ns_part, col_pat = low.split("..", 1)
+            col_pat = col_pat.strip()
+            if col_pat == "":
+                return []
+            # Sprawdź czy są wildcardy
+            has_wildcards = any(ch in col_pat for ch in "*?[]")
+            def col_match(name: str) -> bool:
+                n = (name or "").lower()
+                return _fn.fnmatch(n, col_pat) if has_wildcards else (col_pat in n)
+            if ns_part:
+                ns_part = ns_part.strip(".")
+                if "://" in ns_part:
+                    # Sprawdź czy po namespace jest kropka - wtedy reszta to prefiks tabeli
+                    if "." in ns_part:
+                        # Znajdź część po pierwszej kropce po namespace jako prefiks tabeli
+                        first_dot = ns_part.find(".")
+                        table_prefix = ns_part[first_dot + 1:].lower()
+                        results = [
+                            node for node in self._nodes.values()
+                            if (node.table_name and node.table_name.lower().startswith(table_prefix) and
+                                col_match(node.column_name))
+                        ]
+                    else:
+                        # Tylko namespace, bez prefiksu tabeli
+                        results = [
+                            node for node in self._nodes.values()
+                            if (node.namespace and node.namespace.lower().startswith(ns_part) and
+                                col_match(node.column_name))
+                        ]
+                else:
+                    # Brak namespace - traktuj jako prefiks tabeli
+                    results = [
+                        node for node in self._nodes.values()
+                        if (node.table_name and node.table_name.lower().startswith(ns_part) and
+                            col_match(node.column_name))
+                    ]
+            else:
+                results = [
+                    node for node in self._nodes.values()
+                    if col_match(node.column_name)
+                ]
+            # Deduplikacja
+            tmp = {}
+            for n in results:
+                tmp[str(n).lower()] = n
+            return list(tmp.values())
+        # 4) Fallback na pełnym kluczu
+        if not any(ch in selector for ch in "*?[]"):
+            # Potraktuj jako "contains" po pełnym kluczu
+            results = [
+                node for key, node in self._nodes.items()
+                if low in key.lower()
+            ]
+        else:
+            # Są wildcardy - użyj fnmatch
+            results = [
+                node for key, node in self._nodes.items()
+                if _fn.fnmatch(key.lower(), low)
+            ]
+        # Deduplikacja
+        tmp = {}
+        for n in results:
+            tmp[str(n).lower()] = n
+        return list(tmp.values())

infotracker/parser.py CHANGED Viewed

@@ -26,6 +26,15 @@ class SqlParser:
         self.schema_registry = SchemaRegistry()
         self.default_database: Optional[str] = None  # Will be set from config
+    def _clean_proc_name(self, s: str) -> str:
+        """Clean procedure name by removing semicolons and parameters."""
+        return s.strip().rstrip(';').split('(')[0].strip()
+    def _normalize_table_ident(self, s: str) -> str:
+        """Remove brackets and normalize table identifier."""
+        import re
+        return re.sub(r'[\[\]]', '', s)
     def set_default_database(self, default_database: Optional[str]):
         """Set the default database for qualification."""
         self.default_database = default_database
@@ -51,6 +60,10 @@ class SqlParser:
                 re.match(r'(?i)^DROP\s+TABLE\s+#\w+', stripped_line)):
                 continue
+            # Skip GO statements (SQL Server batch separator)
+            if re.match(r'(?im)^\s*GO\s*$', stripped_line):
+                continue
             processed_lines.append(line)
         # Join the lines back together
@@ -67,27 +80,34 @@ class SqlParser:
     def _try_insert_exec_fallback(self, sql_content: str, object_hint: Optional[str] = None) -> Optional[ObjectInfo]:
         """
-        Fallback parser for INSERT INTO #temp EXEC pattern when SQLGlot fails.
+        Fallback parser for INSERT INTO ... EXEC pattern when SQLGlot fails.
+        Handles both temp tables and regular tables.
         """
         import re
-        # Look for INSERT INTO #temp EXEC pattern
-        pattern = r'(?is)INSERT\s+INTO\s+(#\w+)\s+EXEC\s+([^\s(]+)'
-        match = re.search(pattern, sql_content)
+        # Get preprocessed SQL
+        sql_pre = self._preprocess_sql(sql_content)
+        # Look for INSERT INTO ... EXEC pattern (both temp and regular tables)
+        pattern = r'(?is)INSERT\s+INTO\s+([#\[\]\w.]+)\s+EXEC\s+([^\s(;]+)'
+        match = re.search(pattern, sql_pre)
         if not match:
             return None
-        temp_table = match.group(1)  # e.g., "#customer_metrics"
-        proc_name = match.group(2)   # e.g., "dbo.usp_customer_metrics_dataset"
+        raw_table = match.group(1)
+        raw_proc = match.group(2)
-        # Qualify procedure name if needed
-        if '.' not in proc_name and self.default_database:
-            qualified_proc_name = f"{self.default_database}.dbo.{proc_name}"
-        else:
-            qualified_proc_name = proc_name
+        # Clean and normalize names
+        table_name = self._normalize_table_ident(raw_table)
+        proc_name = self._clean_proc_name(raw_proc)
+        # Determine if it's a temp table
+        is_temp = table_name.startswith('#')
+        namespace = "tempdb" if is_temp else "mssql://localhost/InfoTrackerDW"
+        object_type = "temp_table" if is_temp else "table"
-        # Create placeholder columns for the temp table
+        # Create placeholder columns
         placeholder_columns = [
             ColumnSchema(
                 name="output_col_1",
@@ -103,10 +123,10 @@ class SqlParser:
             )
         ]
-        # Create schema for temp table
+        # Create schema
         schema = TableSchema(
-            namespace="tempdb",
-            name=temp_table,
+            namespace=namespace,
+            name=table_name,
             columns=placeholder_columns
         )
@@ -118,24 +138,24 @@ class SqlParser:
                 input_fields=[
                     ColumnReference(
                         namespace="mssql://localhost/InfoTrackerDW",
-                        table_name=qualified_proc_name,
+                        table_name=proc_name,  # Clean procedure name without semicolons
                         column_name="*"
                     )
                 ],
                 transformation_type=TransformationType.EXEC,
-                transformation_description=f"INSERT INTO {temp_table} EXEC {proc_name}"
+                transformation_description=f"INSERT INTO {table_name} EXEC {proc_name}"
             ))
-        # Set dependencies to the procedure
-        dependencies = {qualified_proc_name}
+        # Set dependencies to the clean procedure name
+        dependencies = {proc_name}
         # Register schema in registry
         self.schema_registry.register(schema)
-        # Create and return ObjectInfo
+        # Create and return ObjectInfo with table_name as name (not object_hint)
         return ObjectInfo(
-            name=temp_table,
-            object_type="temp_table",
+            name=table_name,
+            object_type=object_type,
             schema=schema,
             lineage=lineage,
             dependencies=dependencies
@@ -283,7 +303,7 @@ class SqlParser:
                 # Extract procedure name (first identifier after EXEC)
                 parts = exec_text.split()
                 if len(parts) > 1:
-                    procedure_name = parts[1].strip('()').split('(')[0]
+                    procedure_name = self._clean_proc_name(parts[1])
                     dependencies.add(procedure_name)
             # For EXEC temp tables, we create placeholder columns since we can't determine
@@ -615,7 +635,9 @@ class SqlParser:
         select_stmt = stmt
-        if not select_stmt.expressions:
+        # Try to get projections with fallback
+        projections = list(getattr(select_stmt, 'expressions', None) or [])
+        if not projections:
             return lineage, output_columns
         # Handle star expansion first
@@ -627,7 +649,7 @@ class SqlParser:
             return self._handle_union_lineage(select_stmt, view_name)
         # Standard column-by-column processing
-        for i, select_expr in enumerate(select_stmt.expressions):
+        for i, select_expr in enumerate(projections):
             if isinstance(select_expr, exp.Alias):
                 # Aliased column: SELECT column AS alias
                 output_name = str(select_expr.alias)
@@ -641,10 +663,15 @@ class SqlParser:
                     output_name = str(select_expr)
                 source_expr = select_expr
+            # Determine data type for ColumnSchema
+            data_type = "unknown"
+            if isinstance(source_expr, exp.Cast):
+                data_type = str(source_expr.to).upper()
             # Create output column schema
             output_columns.append(ColumnSchema(
                 name=output_name,
-                data_type="unknown",  # Would need type inference
+                data_type=data_type,
                 nullable=True,
                 ordinal=i
             ))

{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: InfoTracker
-Version: 0.2.6
+Version: 0.3.0
 Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
 Project-URL: homepage, https://example.com/infotracker
 Project-URL: documentation, https://example.com/infotracker/docs

infotracker-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
+infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
+infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
+infotracker/cli.py,sha256=Hvid6PuMcygUj4Uxor4iBD5OLkfz_LJ249V0UZpwk8A,6181
+infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
+infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
+infotracker/engine.py,sha256=QhBSSIE0yusHE2jHlsyTu7GG89tRy1BuJ4dG2bPS_Nw,23560
+infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
+infotracker/models.py,sha256=d7EIjOm3evI8YekQWgLE0L1cWiOcU0F34-XdqxBkcTk,18332
+infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
+infotracker/parser.py,sha256=-zz_bmc4Rkb-hT_eDIvvpWxFtdyGFMKcRun9raNX4AY,71335
+infotracker/infotracker.yml,sha256=iRrrrUkdLCvEhw4DHqPnMchDlsJWI3xIJEpwevNU9sg,998
+infotracker-0.3.0.dist-info/METADATA,sha256=1QeaLFLL2redY2HD1Xn977cvSUBRQ6izbfZh6Vwmw3w,10449
+infotracker-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+infotracker-0.3.0.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
+infotracker-0.3.0.dist-info/RECORD,,

infotracker-0.2.6.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
-infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
-infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
-infotracker/cli.py,sha256=PQQoxqSmu8fSFTeGCdLKIKiY7WTcCzddiANYGc1qqe8,5666
-infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
-infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
-infotracker/engine.py,sha256=JlsrzPoB4Xe4qnTrEZ7emYP0K-zkqTqYOGzZiEZesks,23441
-infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
-infotracker/models.py,sha256=aQwU_4V69CnnHdgsybd99uvE3fzoQoW-nwn5aMhxdbU,14796
-infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
-infotracker/parser.py,sha256=8NVtCMvyt7l_dIfAydR_VJGB7A_NBLb2T827ac8uMXc,70255
-infotracker/infotracker.yml,sha256=iTVS246TS4DWLwN-vMiLHPbgDegjGIEpYF5UaL_lTd0,994
-infotracker-0.2.6.dist-info/METADATA,sha256=Ukx6UAXLMs8kAEiRzWNagDVRP2LRMTfeuNN7byn3nqM,10449
-infotracker-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-infotracker-0.2.6.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
-infotracker-0.2.6.dist-info/RECORD,,

{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{infotracker-0.2.6.dist-info → infotracker-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

InfoTracker 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl

InfoTracker 0.2.6py3-none-any.whl → 0.3.0py3-none-any.whl