InfoTracker 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
infotracker/cli.py CHANGED
@@ -96,6 +96,7 @@ def diff(
96
96
  base: Optional[Path] = typer.Option(None, "--base", help="Directory containing base OpenLineage artifacts"),
97
97
  head: Optional[Path] = typer.Option(None, "--head", help="Directory containing head OpenLineage artifacts"),
98
98
  format: str = typer.Option("text", "--format", help="Output format: text|json"),
99
+ threshold: Optional[str] = typer.Option(None, "--threshold", help="Severity threshold: NON_BREAKING|POTENTIALLY_BREAKING|BREAKING"),
99
100
  ):
100
101
  """Compare two sets of OpenLineage artifacts for breaking changes."""
101
102
  cfg: RuntimeConfig = ctx.obj["cfg"]
@@ -105,7 +106,14 @@ def diff(
105
106
  console.print("[red]ERROR: Both --base and --head directories are required[/red]")
106
107
  raise typer.Exit(1)
107
108
 
108
- result = engine.run_diff(base, head, format)
109
+ # Validate threshold if provided
110
+ if threshold is not None:
111
+ valid_thresholds = ["NON_BREAKING", "POTENTIALLY_BREAKING", "BREAKING"]
112
+ if threshold not in valid_thresholds:
113
+ console.print(f"[red]ERROR: Invalid threshold '{threshold}'. Must be one of: {', '.join(valid_thresholds)}[/red]")
114
+ raise typer.Exit(1)
115
+
116
+ result = engine.run_diff(base, head, format, threshold=threshold)
109
117
  _emit(result, format)
110
118
  raise typer.Exit(code=result.get("exit_code", 0))
111
119
 
infotracker/engine.py CHANGED
@@ -475,6 +475,7 @@ class Engine:
475
475
  base_dir: Directory containing base OpenLineage JSON artifacts
476
476
  head_dir: Directory containing head OpenLineage JSON artifacts
477
477
  format: Output format (text|json)
478
+ **kwargs: Additional options including 'threshold' to override config
478
479
 
479
480
  Returns:
480
481
  Dict with results including exit_code (1 if breaking changes, 0 otherwise)
@@ -495,8 +496,8 @@ class Engine:
495
496
  detector = BreakingChangeDetector()
496
497
  report = detector.compare(base_objects, head_objects)
497
498
 
498
- # Filter changes based on severity threshold from config
499
- threshold = self.config.severity_threshold.upper()
499
+ # Use threshold from CLI flag if provided, otherwise from config
500
+ threshold = (kwargs.get('threshold') or self.config.severity_threshold).upper()
500
501
  filtered_changes = []
501
502
 
502
503
  if threshold == "BREAKING":
@@ -25,7 +25,7 @@ exclude:
25
25
 
26
26
  # Minimum severity level for breaking change detection
27
27
  # Options: NON_BREAKING, POTENTIALLY_BREAKING, BREAKING
28
- severity_threshold: BREAKING
28
+ severity_threshold: NON_BREAKING
29
29
 
30
30
  # Objects to ignore during analysis (glob patterns)
31
31
  ignore:
infotracker/models.py CHANGED
@@ -344,50 +344,145 @@ class ColumnGraph:
344
344
 
345
345
 
346
346
  def find_columns_wildcard(self, selector: str) -> List[ColumnNode]:
347
- """
348
- Find columns matching a wildcard pattern.
349
-
350
- Supports:
351
- - Table wildcard: <ns>.<schema>.<table>.* → all columns of that table
352
- - Column wildcard: <optional_ns>..<pattern> → match by COLUMN NAME only:
353
- * if pattern contains any of [*?[]] → fnmatch on the column name
354
- * otherwise → default to case-insensitive "contains"
355
- - Fallback: fnmatch on the full identifier "ns.schema.table.column"
356
- """
357
- import fnmatch as _fn
358
-
359
- sel = (selector or "").strip().lower()
360
-
361
- # 1) Table wildcard: "...schema.table.*"
362
- if sel.endswith(".*"):
363
- table_sel = sel[:-1] # remove trailing '*', keep final dot
364
- # simple prefix match on full key
365
- return [node for key, node in self._nodes.items() if key.startswith(table_sel)]
366
-
367
- # 2) Column wildcard: "<optional_ns>..<pattern>"
368
- if ".." in sel:
369
- ns_part, col_pat = sel.split("..", 1)
370
- ns_part = ns_part.strip(".")
371
- col_pat = col_pat.strip()
372
-
373
- # if no explicit wildcard meta, treat as "contains"
374
- has_meta = any(ch in col_pat for ch in "*?[]")
375
-
376
- def col_name_matches(name: str) -> bool:
377
- name = (name or "").lower()
378
- if has_meta:
379
- return _fn.fnmatch(name, col_pat)
380
- return col_pat in name # default: contains (case-insensitive)
381
-
382
- if ns_part:
383
- ns_prefix = ns_part + "."
384
- return [
385
- node
386
- for key, node in self._nodes.items()
387
- if key.startswith(ns_prefix) and col_name_matches(getattr(node, "column_name", ""))
347
+ """
348
+ Find columns matching a wildcard pattern.
349
+
350
+ Supports:
351
+ - Table wildcard: <ns>.<schema>.<table>.* → all columns of that table
352
+ - Column wildcard: <optional_ns>..<pattern> → match by COLUMN NAME only
353
+ - Fallback: fnmatch on the full identifier "ns.schema.table.column"
354
+ """
355
+ import fnmatch as _fn
356
+
357
+ # 1) Normalizacja i szybkie wyjścia
358
+ sel = (selector or "").strip()
359
+ low = sel.lower()
360
+
361
+ # Pusty/niepełny wzorzec
362
+ if low in {".", ".."}:
363
+ return []
364
+
365
+ if ".." in low:
366
+ ns_part, col_pat = low.split("..", 1)
367
+ if col_pat.strip() == "":
368
+ return []
369
+
370
+ # 2) Table wildcard "….*" – obsłuż W OBU wariantach (z i bez namespace)
371
+ if low.endswith(".*"):
372
+ left = sel[:-2].strip()
373
+ if not left:
374
+ return []
375
+
376
+ # Lokalny helper do dopasowania tabel
377
+ def _tbl_match(left: str, node_tbl: str) -> bool:
378
+ lp = (left or "").lower().split(".")
379
+ tp = (node_tbl or "").lower().split(".")
380
+ # dopasuj po końcówce: 3, 2 albo 1 segment
381
+ if len(lp) >= 3:
382
+ return tp[-3:] == lp[-3:] or tp[-2:] == lp[-2:]
383
+ elif len(lp) == 2:
384
+ return tp[-2:] == lp[-2:]
385
+ else:
386
+ return tp[-1] == lp[-1] if lp else False
387
+
388
+ if "://" in left:
389
+ # Z namespace - bardziej dokładne parsowanie
390
+ # Format: mssql://localhost/InfoTrackerDW.STG.dbo.Orders
391
+ if "." in left:
392
+ # Znajdź pierwszą kropkę po namespace
393
+ ns_end = left.find(".")
394
+ ns = left[:ns_end]
395
+ table = left[ns_end + 1:]
396
+
397
+ results = [
398
+ node for node in self._nodes.values()
399
+ if (node.namespace and node.namespace.lower().startswith(ns.lower()) and
400
+ _tbl_match(table, node.table_name))
388
401
  ]
389
402
  else:
390
- return [node for node in self._nodes.values() if col_name_matches(getattr(node, "column_name", ""))]
391
-
392
- # 3) Fallback: fnmatch on the full identifier
393
- return [node for key, node in self._nodes.items() if _fn.fnmatch(key, sel)]
403
+ results = []
404
+ else:
405
+ # Bez namespace
406
+ results = [
407
+ node for node in self._nodes.values()
408
+ if _tbl_match(left, node.table_name)
409
+ ]
410
+
411
+ # Deduplikacja
412
+ tmp = {}
413
+ for n in results:
414
+ tmp[str(n).lower()] = n
415
+ return list(tmp.values())
416
+
417
+ # 3) Column wildcard "<opcjonalny_prefix>..<column_pattern>" – dodaj semantykę CONTAINS
418
+ if ".." in low:
419
+ ns_part, col_pat = low.split("..", 1)
420
+ col_pat = col_pat.strip()
421
+ if col_pat == "":
422
+ return []
423
+
424
+ # Sprawdź czy są wildcardy
425
+ has_wildcards = any(ch in col_pat for ch in "*?[]")
426
+
427
+ def col_match(name: str) -> bool:
428
+ n = (name or "").lower()
429
+ return _fn.fnmatch(n, col_pat) if has_wildcards else (col_pat in n)
430
+
431
+ if ns_part:
432
+ ns_part = ns_part.strip(".")
433
+ if "://" in ns_part:
434
+ # Sprawdź czy po namespace jest kropka - wtedy reszta to prefiks tabeli
435
+ if "." in ns_part:
436
+ # Znajdź część po pierwszej kropce po namespace jako prefiks tabeli
437
+ first_dot = ns_part.find(".")
438
+ table_prefix = ns_part[first_dot + 1:].lower()
439
+ results = [
440
+ node for node in self._nodes.values()
441
+ if (node.table_name and node.table_name.lower().startswith(table_prefix) and
442
+ col_match(node.column_name))
443
+ ]
444
+ else:
445
+ # Tylko namespace, bez prefiksu tabeli
446
+ results = [
447
+ node for node in self._nodes.values()
448
+ if (node.namespace and node.namespace.lower().startswith(ns_part) and
449
+ col_match(node.column_name))
450
+ ]
451
+ else:
452
+ # Brak namespace - traktuj jako prefiks tabeli
453
+ results = [
454
+ node for node in self._nodes.values()
455
+ if (node.table_name and node.table_name.lower().startswith(ns_part) and
456
+ col_match(node.column_name))
457
+ ]
458
+ else:
459
+ results = [
460
+ node for node in self._nodes.values()
461
+ if col_match(node.column_name)
462
+ ]
463
+
464
+ # Deduplikacja
465
+ tmp = {}
466
+ for n in results:
467
+ tmp[str(n).lower()] = n
468
+ return list(tmp.values())
469
+
470
+ # 4) Fallback na pełnym kluczu
471
+ if not any(ch in selector for ch in "*?[]"):
472
+ # Potraktuj jako "contains" po pełnym kluczu
473
+ results = [
474
+ node for key, node in self._nodes.items()
475
+ if low in key.lower()
476
+ ]
477
+ else:
478
+ # Są wildcardy - użyj fnmatch
479
+ results = [
480
+ node for key, node in self._nodes.items()
481
+ if _fn.fnmatch(key.lower(), low)
482
+ ]
483
+
484
+ # Deduplikacja
485
+ tmp = {}
486
+ for n in results:
487
+ tmp[str(n).lower()] = n
488
+ return list(tmp.values())
infotracker/parser.py CHANGED
@@ -26,6 +26,15 @@ class SqlParser:
26
26
  self.schema_registry = SchemaRegistry()
27
27
  self.default_database: Optional[str] = None # Will be set from config
28
28
 
29
+ def _clean_proc_name(self, s: str) -> str:
30
+ """Clean procedure name by removing semicolons and parameters."""
31
+ return s.strip().rstrip(';').split('(')[0].strip()
32
+
33
+ def _normalize_table_ident(self, s: str) -> str:
34
+ """Remove brackets and normalize table identifier."""
35
+ import re
36
+ return re.sub(r'[\[\]]', '', s)
37
+
29
38
  def set_default_database(self, default_database: Optional[str]):
30
39
  """Set the default database for qualification."""
31
40
  self.default_database = default_database
@@ -51,6 +60,10 @@ class SqlParser:
51
60
  re.match(r'(?i)^DROP\s+TABLE\s+#\w+', stripped_line)):
52
61
  continue
53
62
 
63
+ # Skip GO statements (SQL Server batch separator)
64
+ if re.match(r'(?im)^\s*GO\s*$', stripped_line):
65
+ continue
66
+
54
67
  processed_lines.append(line)
55
68
 
56
69
  # Join the lines back together
@@ -67,27 +80,34 @@ class SqlParser:
67
80
 
68
81
  def _try_insert_exec_fallback(self, sql_content: str, object_hint: Optional[str] = None) -> Optional[ObjectInfo]:
69
82
  """
70
- Fallback parser for INSERT INTO #temp EXEC pattern when SQLGlot fails.
83
+ Fallback parser for INSERT INTO ... EXEC pattern when SQLGlot fails.
84
+ Handles both temp tables and regular tables.
71
85
  """
72
86
  import re
73
87
 
74
- # Look for INSERT INTO #temp EXEC pattern
75
- pattern = r'(?is)INSERT\s+INTO\s+(#\w+)\s+EXEC\s+([^\s(]+)'
76
- match = re.search(pattern, sql_content)
88
+ # Get preprocessed SQL
89
+ sql_pre = self._preprocess_sql(sql_content)
90
+
91
+ # Look for INSERT INTO ... EXEC pattern (both temp and regular tables)
92
+ pattern = r'(?is)INSERT\s+INTO\s+([#\[\]\w.]+)\s+EXEC\s+([^\s(;]+)'
93
+ match = re.search(pattern, sql_pre)
77
94
 
78
95
  if not match:
79
96
  return None
80
97
 
81
- temp_table = match.group(1) # e.g., "#customer_metrics"
82
- proc_name = match.group(2) # e.g., "dbo.usp_customer_metrics_dataset"
98
+ raw_table = match.group(1)
99
+ raw_proc = match.group(2)
83
100
 
84
- # Qualify procedure name if needed
85
- if '.' not in proc_name and self.default_database:
86
- qualified_proc_name = f"{self.default_database}.dbo.{proc_name}"
87
- else:
88
- qualified_proc_name = proc_name
101
+ # Clean and normalize names
102
+ table_name = self._normalize_table_ident(raw_table)
103
+ proc_name = self._clean_proc_name(raw_proc)
104
+
105
+ # Determine if it's a temp table
106
+ is_temp = table_name.startswith('#')
107
+ namespace = "tempdb" if is_temp else "mssql://localhost/InfoTrackerDW"
108
+ object_type = "temp_table" if is_temp else "table"
89
109
 
90
- # Create placeholder columns for the temp table
110
+ # Create placeholder columns
91
111
  placeholder_columns = [
92
112
  ColumnSchema(
93
113
  name="output_col_1",
@@ -103,10 +123,10 @@ class SqlParser:
103
123
  )
104
124
  ]
105
125
 
106
- # Create schema for temp table
126
+ # Create schema
107
127
  schema = TableSchema(
108
- namespace="tempdb",
109
- name=temp_table,
128
+ namespace=namespace,
129
+ name=table_name,
110
130
  columns=placeholder_columns
111
131
  )
112
132
 
@@ -118,24 +138,24 @@ class SqlParser:
118
138
  input_fields=[
119
139
  ColumnReference(
120
140
  namespace="mssql://localhost/InfoTrackerDW",
121
- table_name=qualified_proc_name,
141
+ table_name=proc_name, # Clean procedure name without semicolons
122
142
  column_name="*"
123
143
  )
124
144
  ],
125
145
  transformation_type=TransformationType.EXEC,
126
- transformation_description=f"INSERT INTO {temp_table} EXEC {proc_name}"
146
+ transformation_description=f"INSERT INTO {table_name} EXEC {proc_name}"
127
147
  ))
128
148
 
129
- # Set dependencies to the procedure
130
- dependencies = {qualified_proc_name}
149
+ # Set dependencies to the clean procedure name
150
+ dependencies = {proc_name}
131
151
 
132
152
  # Register schema in registry
133
153
  self.schema_registry.register(schema)
134
154
 
135
- # Create and return ObjectInfo
155
+ # Create and return ObjectInfo with table_name as name (not object_hint)
136
156
  return ObjectInfo(
137
- name=temp_table,
138
- object_type="temp_table",
157
+ name=table_name,
158
+ object_type=object_type,
139
159
  schema=schema,
140
160
  lineage=lineage,
141
161
  dependencies=dependencies
@@ -283,7 +303,7 @@ class SqlParser:
283
303
  # Extract procedure name (first identifier after EXEC)
284
304
  parts = exec_text.split()
285
305
  if len(parts) > 1:
286
- procedure_name = parts[1].strip('()').split('(')[0]
306
+ procedure_name = self._clean_proc_name(parts[1])
287
307
  dependencies.add(procedure_name)
288
308
 
289
309
  # For EXEC temp tables, we create placeholder columns since we can't determine
@@ -615,7 +635,9 @@ class SqlParser:
615
635
 
616
636
  select_stmt = stmt
617
637
 
618
- if not select_stmt.expressions:
638
+ # Try to get projections with fallback
639
+ projections = list(getattr(select_stmt, 'expressions', None) or [])
640
+ if not projections:
619
641
  return lineage, output_columns
620
642
 
621
643
  # Handle star expansion first
@@ -627,7 +649,7 @@ class SqlParser:
627
649
  return self._handle_union_lineage(select_stmt, view_name)
628
650
 
629
651
  # Standard column-by-column processing
630
- for i, select_expr in enumerate(select_stmt.expressions):
652
+ for i, select_expr in enumerate(projections):
631
653
  if isinstance(select_expr, exp.Alias):
632
654
  # Aliased column: SELECT column AS alias
633
655
  output_name = str(select_expr.alias)
@@ -641,10 +663,15 @@ class SqlParser:
641
663
  output_name = str(select_expr)
642
664
  source_expr = select_expr
643
665
 
666
+ # Determine data type for ColumnSchema
667
+ data_type = "unknown"
668
+ if isinstance(source_expr, exp.Cast):
669
+ data_type = str(source_expr.to).upper()
670
+
644
671
  # Create output column schema
645
672
  output_columns.append(ColumnSchema(
646
673
  name=output_name,
647
- data_type="unknown", # Would need type inference
674
+ data_type=data_type,
648
675
  nullable=True,
649
676
  ordinal=i
650
677
  ))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: InfoTracker
3
- Version: 0.2.6
3
+ Version: 0.3.0
4
4
  Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
5
5
  Project-URL: homepage, https://example.com/infotracker
6
6
  Project-URL: documentation, https://example.com/infotracker/docs
@@ -0,0 +1,16 @@
1
+ infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
2
+ infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
3
+ infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
4
+ infotracker/cli.py,sha256=Hvid6PuMcygUj4Uxor4iBD5OLkfz_LJ249V0UZpwk8A,6181
5
+ infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
6
+ infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
7
+ infotracker/engine.py,sha256=QhBSSIE0yusHE2jHlsyTu7GG89tRy1BuJ4dG2bPS_Nw,23560
8
+ infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
9
+ infotracker/models.py,sha256=d7EIjOm3evI8YekQWgLE0L1cWiOcU0F34-XdqxBkcTk,18332
10
+ infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
11
+ infotracker/parser.py,sha256=-zz_bmc4Rkb-hT_eDIvvpWxFtdyGFMKcRun9raNX4AY,71335
12
+ infotracker/infotracker.yml,sha256=iRrrrUkdLCvEhw4DHqPnMchDlsJWI3xIJEpwevNU9sg,998
13
+ infotracker-0.3.0.dist-info/METADATA,sha256=1QeaLFLL2redY2HD1Xn977cvSUBRQ6izbfZh6Vwmw3w,10449
14
+ infotracker-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
15
+ infotracker-0.3.0.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
16
+ infotracker-0.3.0.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
2
- infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
3
- infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
4
- infotracker/cli.py,sha256=PQQoxqSmu8fSFTeGCdLKIKiY7WTcCzddiANYGc1qqe8,5666
5
- infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
6
- infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
7
- infotracker/engine.py,sha256=JlsrzPoB4Xe4qnTrEZ7emYP0K-zkqTqYOGzZiEZesks,23441
8
- infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
9
- infotracker/models.py,sha256=aQwU_4V69CnnHdgsybd99uvE3fzoQoW-nwn5aMhxdbU,14796
10
- infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
11
- infotracker/parser.py,sha256=8NVtCMvyt7l_dIfAydR_VJGB7A_NBLb2T827ac8uMXc,70255
12
- infotracker/infotracker.yml,sha256=iTVS246TS4DWLwN-vMiLHPbgDegjGIEpYF5UaL_lTd0,994
13
- infotracker-0.2.6.dist-info/METADATA,sha256=Ukx6UAXLMs8kAEiRzWNagDVRP2LRMTfeuNN7byn3nqM,10449
14
- infotracker-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
15
- infotracker-0.2.6.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
16
- infotracker-0.2.6.dist-info/RECORD,,