InfoTracker 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infotracker/__init__.py +1 -1
- infotracker/cli.py +9 -1
- infotracker/engine.py +3 -2
- infotracker/infotracker.yml +1 -1
- infotracker/models.py +140 -45
- infotracker/parser.py +53 -26
- infotracker-0.3.1.dist-info/METADATA +301 -0
- infotracker-0.3.1.dist-info/RECORD +16 -0
- infotracker-0.2.6.dist-info/METADATA +0 -285
- infotracker-0.2.6.dist-info/RECORD +0 -16
- {infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/WHEEL +0 -0
- {infotracker-0.2.6.dist-info → infotracker-0.3.1.dist-info}/entry_points.txt +0 -0
infotracker/__init__.py
CHANGED
infotracker/cli.py
CHANGED
@@ -96,6 +96,7 @@ def diff(
|
|
96
96
|
base: Optional[Path] = typer.Option(None, "--base", help="Directory containing base OpenLineage artifacts"),
|
97
97
|
head: Optional[Path] = typer.Option(None, "--head", help="Directory containing head OpenLineage artifacts"),
|
98
98
|
format: str = typer.Option("text", "--format", help="Output format: text|json"),
|
99
|
+
threshold: Optional[str] = typer.Option(None, "--threshold", help="Severity threshold: NON_BREAKING|POTENTIALLY_BREAKING|BREAKING"),
|
99
100
|
):
|
100
101
|
"""Compare two sets of OpenLineage artifacts for breaking changes."""
|
101
102
|
cfg: RuntimeConfig = ctx.obj["cfg"]
|
@@ -105,7 +106,14 @@ def diff(
|
|
105
106
|
console.print("[red]ERROR: Both --base and --head directories are required[/red]")
|
106
107
|
raise typer.Exit(1)
|
107
108
|
|
108
|
-
|
109
|
+
# Validate threshold if provided
|
110
|
+
if threshold is not None:
|
111
|
+
valid_thresholds = ["NON_BREAKING", "POTENTIALLY_BREAKING", "BREAKING"]
|
112
|
+
if threshold not in valid_thresholds:
|
113
|
+
console.print(f"[red]ERROR: Invalid threshold '{threshold}'. Must be one of: {', '.join(valid_thresholds)}[/red]")
|
114
|
+
raise typer.Exit(1)
|
115
|
+
|
116
|
+
result = engine.run_diff(base, head, format, threshold=threshold)
|
109
117
|
_emit(result, format)
|
110
118
|
raise typer.Exit(code=result.get("exit_code", 0))
|
111
119
|
|
infotracker/engine.py
CHANGED
@@ -475,6 +475,7 @@ class Engine:
|
|
475
475
|
base_dir: Directory containing base OpenLineage JSON artifacts
|
476
476
|
head_dir: Directory containing head OpenLineage JSON artifacts
|
477
477
|
format: Output format (text|json)
|
478
|
+
**kwargs: Additional options including 'threshold' to override config
|
478
479
|
|
479
480
|
Returns:
|
480
481
|
Dict with results including exit_code (1 if breaking changes, 0 otherwise)
|
@@ -495,8 +496,8 @@ class Engine:
|
|
495
496
|
detector = BreakingChangeDetector()
|
496
497
|
report = detector.compare(base_objects, head_objects)
|
497
498
|
|
498
|
-
#
|
499
|
-
threshold = self.config.severity_threshold.upper()
|
499
|
+
# Use threshold from CLI flag if provided, otherwise from config
|
500
|
+
threshold = (kwargs.get('threshold') or self.config.severity_threshold).upper()
|
500
501
|
filtered_changes = []
|
501
502
|
|
502
503
|
if threshold == "BREAKING":
|
infotracker/infotracker.yml
CHANGED
@@ -25,7 +25,7 @@ exclude:
|
|
25
25
|
|
26
26
|
# Minimum severity level for breaking change detection
|
27
27
|
# Options: NON_BREAKING, POTENTIALLY_BREAKING, BREAKING
|
28
|
-
severity_threshold:
|
28
|
+
severity_threshold: NON_BREAKING
|
29
29
|
|
30
30
|
# Objects to ignore during analysis (glob patterns)
|
31
31
|
ignore:
|
infotracker/models.py
CHANGED
@@ -344,50 +344,145 @@ class ColumnGraph:
|
|
344
344
|
|
345
345
|
|
346
346
|
def find_columns_wildcard(self, selector: str) -> List[ColumnNode]:
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
return [
|
385
|
-
|
386
|
-
|
387
|
-
|
347
|
+
"""
|
348
|
+
Find columns matching a wildcard pattern.
|
349
|
+
|
350
|
+
Supports:
|
351
|
+
- Table wildcard: <ns>.<schema>.<table>.* → all columns of that table
|
352
|
+
- Column wildcard: <optional_ns>..<pattern> → match by COLUMN NAME only
|
353
|
+
- Fallback: fnmatch on the full identifier "ns.schema.table.column"
|
354
|
+
"""
|
355
|
+
import fnmatch as _fn
|
356
|
+
|
357
|
+
# 1) Normalizacja i szybkie wyjścia
|
358
|
+
sel = (selector or "").strip()
|
359
|
+
low = sel.lower()
|
360
|
+
|
361
|
+
# Pusty/niepełny wzorzec
|
362
|
+
if low in {".", ".."}:
|
363
|
+
return []
|
364
|
+
|
365
|
+
if ".." in low:
|
366
|
+
ns_part, col_pat = low.split("..", 1)
|
367
|
+
if col_pat.strip() == "":
|
368
|
+
return []
|
369
|
+
|
370
|
+
# 2) Table wildcard "….*" – obsłuż W OBU wariantach (z i bez namespace)
|
371
|
+
if low.endswith(".*"):
|
372
|
+
left = sel[:-2].strip()
|
373
|
+
if not left:
|
374
|
+
return []
|
375
|
+
|
376
|
+
# Lokalny helper do dopasowania tabel
|
377
|
+
def _tbl_match(left: str, node_tbl: str) -> bool:
|
378
|
+
lp = (left or "").lower().split(".")
|
379
|
+
tp = (node_tbl or "").lower().split(".")
|
380
|
+
# dopasuj po końcówce: 3, 2 albo 1 segment
|
381
|
+
if len(lp) >= 3:
|
382
|
+
return tp[-3:] == lp[-3:] or tp[-2:] == lp[-2:]
|
383
|
+
elif len(lp) == 2:
|
384
|
+
return tp[-2:] == lp[-2:]
|
385
|
+
else:
|
386
|
+
return tp[-1] == lp[-1] if lp else False
|
387
|
+
|
388
|
+
if "://" in left:
|
389
|
+
# Z namespace - bardziej dokładne parsowanie
|
390
|
+
# Format: mssql://localhost/InfoTrackerDW.STG.dbo.Orders
|
391
|
+
if "." in left:
|
392
|
+
# Znajdź pierwszą kropkę po namespace
|
393
|
+
ns_end = left.find(".")
|
394
|
+
ns = left[:ns_end]
|
395
|
+
table = left[ns_end + 1:]
|
396
|
+
|
397
|
+
results = [
|
398
|
+
node for node in self._nodes.values()
|
399
|
+
if (node.namespace and node.namespace.lower().startswith(ns.lower()) and
|
400
|
+
_tbl_match(table, node.table_name))
|
388
401
|
]
|
389
402
|
else:
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
403
|
+
results = []
|
404
|
+
else:
|
405
|
+
# Bez namespace
|
406
|
+
results = [
|
407
|
+
node for node in self._nodes.values()
|
408
|
+
if _tbl_match(left, node.table_name)
|
409
|
+
]
|
410
|
+
|
411
|
+
# Deduplikacja
|
412
|
+
tmp = {}
|
413
|
+
for n in results:
|
414
|
+
tmp[str(n).lower()] = n
|
415
|
+
return list(tmp.values())
|
416
|
+
|
417
|
+
# 3) Column wildcard "<opcjonalny_prefix>..<column_pattern>" – dodaj semantykę CONTAINS
|
418
|
+
if ".." in low:
|
419
|
+
ns_part, col_pat = low.split("..", 1)
|
420
|
+
col_pat = col_pat.strip()
|
421
|
+
if col_pat == "":
|
422
|
+
return []
|
423
|
+
|
424
|
+
# Sprawdź czy są wildcardy
|
425
|
+
has_wildcards = any(ch in col_pat for ch in "*?[]")
|
426
|
+
|
427
|
+
def col_match(name: str) -> bool:
|
428
|
+
n = (name or "").lower()
|
429
|
+
return _fn.fnmatch(n, col_pat) if has_wildcards else (col_pat in n)
|
430
|
+
|
431
|
+
if ns_part:
|
432
|
+
ns_part = ns_part.strip(".")
|
433
|
+
if "://" in ns_part:
|
434
|
+
# Sprawdź czy po namespace jest kropka - wtedy reszta to prefiks tabeli
|
435
|
+
if "." in ns_part:
|
436
|
+
# Znajdź część po pierwszej kropce po namespace jako prefiks tabeli
|
437
|
+
first_dot = ns_part.find(".")
|
438
|
+
table_prefix = ns_part[first_dot + 1:].lower()
|
439
|
+
results = [
|
440
|
+
node for node in self._nodes.values()
|
441
|
+
if (node.table_name and node.table_name.lower().startswith(table_prefix) and
|
442
|
+
col_match(node.column_name))
|
443
|
+
]
|
444
|
+
else:
|
445
|
+
# Tylko namespace, bez prefiksu tabeli
|
446
|
+
results = [
|
447
|
+
node for node in self._nodes.values()
|
448
|
+
if (node.namespace and node.namespace.lower().startswith(ns_part) and
|
449
|
+
col_match(node.column_name))
|
450
|
+
]
|
451
|
+
else:
|
452
|
+
# Brak namespace - traktuj jako prefiks tabeli
|
453
|
+
results = [
|
454
|
+
node for node in self._nodes.values()
|
455
|
+
if (node.table_name and node.table_name.lower().startswith(ns_part) and
|
456
|
+
col_match(node.column_name))
|
457
|
+
]
|
458
|
+
else:
|
459
|
+
results = [
|
460
|
+
node for node in self._nodes.values()
|
461
|
+
if col_match(node.column_name)
|
462
|
+
]
|
463
|
+
|
464
|
+
# Deduplikacja
|
465
|
+
tmp = {}
|
466
|
+
for n in results:
|
467
|
+
tmp[str(n).lower()] = n
|
468
|
+
return list(tmp.values())
|
469
|
+
|
470
|
+
# 4) Fallback na pełnym kluczu
|
471
|
+
if not any(ch in selector for ch in "*?[]"):
|
472
|
+
# Potraktuj jako "contains" po pełnym kluczu
|
473
|
+
results = [
|
474
|
+
node for key, node in self._nodes.items()
|
475
|
+
if low in key.lower()
|
476
|
+
]
|
477
|
+
else:
|
478
|
+
# Są wildcardy - użyj fnmatch
|
479
|
+
results = [
|
480
|
+
node for key, node in self._nodes.items()
|
481
|
+
if _fn.fnmatch(key.lower(), low)
|
482
|
+
]
|
483
|
+
|
484
|
+
# Deduplikacja
|
485
|
+
tmp = {}
|
486
|
+
for n in results:
|
487
|
+
tmp[str(n).lower()] = n
|
488
|
+
return list(tmp.values())
|
infotracker/parser.py
CHANGED
@@ -26,6 +26,15 @@ class SqlParser:
|
|
26
26
|
self.schema_registry = SchemaRegistry()
|
27
27
|
self.default_database: Optional[str] = None # Will be set from config
|
28
28
|
|
29
|
+
def _clean_proc_name(self, s: str) -> str:
|
30
|
+
"""Clean procedure name by removing semicolons and parameters."""
|
31
|
+
return s.strip().rstrip(';').split('(')[0].strip()
|
32
|
+
|
33
|
+
def _normalize_table_ident(self, s: str) -> str:
|
34
|
+
"""Remove brackets and normalize table identifier."""
|
35
|
+
import re
|
36
|
+
return re.sub(r'[\[\]]', '', s)
|
37
|
+
|
29
38
|
def set_default_database(self, default_database: Optional[str]):
|
30
39
|
"""Set the default database for qualification."""
|
31
40
|
self.default_database = default_database
|
@@ -51,6 +60,10 @@ class SqlParser:
|
|
51
60
|
re.match(r'(?i)^DROP\s+TABLE\s+#\w+', stripped_line)):
|
52
61
|
continue
|
53
62
|
|
63
|
+
# Skip GO statements (SQL Server batch separator)
|
64
|
+
if re.match(r'(?im)^\s*GO\s*$', stripped_line):
|
65
|
+
continue
|
66
|
+
|
54
67
|
processed_lines.append(line)
|
55
68
|
|
56
69
|
# Join the lines back together
|
@@ -67,27 +80,34 @@ class SqlParser:
|
|
67
80
|
|
68
81
|
def _try_insert_exec_fallback(self, sql_content: str, object_hint: Optional[str] = None) -> Optional[ObjectInfo]:
|
69
82
|
"""
|
70
|
-
Fallback parser for INSERT INTO
|
83
|
+
Fallback parser for INSERT INTO ... EXEC pattern when SQLGlot fails.
|
84
|
+
Handles both temp tables and regular tables.
|
71
85
|
"""
|
72
86
|
import re
|
73
87
|
|
74
|
-
#
|
75
|
-
|
76
|
-
|
88
|
+
# Get preprocessed SQL
|
89
|
+
sql_pre = self._preprocess_sql(sql_content)
|
90
|
+
|
91
|
+
# Look for INSERT INTO ... EXEC pattern (both temp and regular tables)
|
92
|
+
pattern = r'(?is)INSERT\s+INTO\s+([#\[\]\w.]+)\s+EXEC\s+([^\s(;]+)'
|
93
|
+
match = re.search(pattern, sql_pre)
|
77
94
|
|
78
95
|
if not match:
|
79
96
|
return None
|
80
97
|
|
81
|
-
|
82
|
-
|
98
|
+
raw_table = match.group(1)
|
99
|
+
raw_proc = match.group(2)
|
83
100
|
|
84
|
-
#
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
101
|
+
# Clean and normalize names
|
102
|
+
table_name = self._normalize_table_ident(raw_table)
|
103
|
+
proc_name = self._clean_proc_name(raw_proc)
|
104
|
+
|
105
|
+
# Determine if it's a temp table
|
106
|
+
is_temp = table_name.startswith('#')
|
107
|
+
namespace = "tempdb" if is_temp else "mssql://localhost/InfoTrackerDW"
|
108
|
+
object_type = "temp_table" if is_temp else "table"
|
89
109
|
|
90
|
-
# Create placeholder columns
|
110
|
+
# Create placeholder columns
|
91
111
|
placeholder_columns = [
|
92
112
|
ColumnSchema(
|
93
113
|
name="output_col_1",
|
@@ -103,10 +123,10 @@ class SqlParser:
|
|
103
123
|
)
|
104
124
|
]
|
105
125
|
|
106
|
-
# Create schema
|
126
|
+
# Create schema
|
107
127
|
schema = TableSchema(
|
108
|
-
namespace=
|
109
|
-
name=
|
128
|
+
namespace=namespace,
|
129
|
+
name=table_name,
|
110
130
|
columns=placeholder_columns
|
111
131
|
)
|
112
132
|
|
@@ -118,24 +138,24 @@ class SqlParser:
|
|
118
138
|
input_fields=[
|
119
139
|
ColumnReference(
|
120
140
|
namespace="mssql://localhost/InfoTrackerDW",
|
121
|
-
table_name=
|
141
|
+
table_name=proc_name, # Clean procedure name without semicolons
|
122
142
|
column_name="*"
|
123
143
|
)
|
124
144
|
],
|
125
145
|
transformation_type=TransformationType.EXEC,
|
126
|
-
transformation_description=f"INSERT INTO {
|
146
|
+
transformation_description=f"INSERT INTO {table_name} EXEC {proc_name}"
|
127
147
|
))
|
128
148
|
|
129
|
-
# Set dependencies to the procedure
|
130
|
-
dependencies = {
|
149
|
+
# Set dependencies to the clean procedure name
|
150
|
+
dependencies = {proc_name}
|
131
151
|
|
132
152
|
# Register schema in registry
|
133
153
|
self.schema_registry.register(schema)
|
134
154
|
|
135
|
-
# Create and return ObjectInfo
|
155
|
+
# Create and return ObjectInfo with table_name as name (not object_hint)
|
136
156
|
return ObjectInfo(
|
137
|
-
name=
|
138
|
-
object_type=
|
157
|
+
name=table_name,
|
158
|
+
object_type=object_type,
|
139
159
|
schema=schema,
|
140
160
|
lineage=lineage,
|
141
161
|
dependencies=dependencies
|
@@ -283,7 +303,7 @@ class SqlParser:
|
|
283
303
|
# Extract procedure name (first identifier after EXEC)
|
284
304
|
parts = exec_text.split()
|
285
305
|
if len(parts) > 1:
|
286
|
-
procedure_name = parts[1]
|
306
|
+
procedure_name = self._clean_proc_name(parts[1])
|
287
307
|
dependencies.add(procedure_name)
|
288
308
|
|
289
309
|
# For EXEC temp tables, we create placeholder columns since we can't determine
|
@@ -615,7 +635,9 @@ class SqlParser:
|
|
615
635
|
|
616
636
|
select_stmt = stmt
|
617
637
|
|
618
|
-
|
638
|
+
# Try to get projections with fallback
|
639
|
+
projections = list(getattr(select_stmt, 'expressions', None) or [])
|
640
|
+
if not projections:
|
619
641
|
return lineage, output_columns
|
620
642
|
|
621
643
|
# Handle star expansion first
|
@@ -627,7 +649,7 @@ class SqlParser:
|
|
627
649
|
return self._handle_union_lineage(select_stmt, view_name)
|
628
650
|
|
629
651
|
# Standard column-by-column processing
|
630
|
-
for i, select_expr in enumerate(
|
652
|
+
for i, select_expr in enumerate(projections):
|
631
653
|
if isinstance(select_expr, exp.Alias):
|
632
654
|
# Aliased column: SELECT column AS alias
|
633
655
|
output_name = str(select_expr.alias)
|
@@ -641,10 +663,15 @@ class SqlParser:
|
|
641
663
|
output_name = str(select_expr)
|
642
664
|
source_expr = select_expr
|
643
665
|
|
666
|
+
# Determine data type for ColumnSchema
|
667
|
+
data_type = "unknown"
|
668
|
+
if isinstance(source_expr, exp.Cast):
|
669
|
+
data_type = str(source_expr.to).upper()
|
670
|
+
|
644
671
|
# Create output column schema
|
645
672
|
output_columns.append(ColumnSchema(
|
646
673
|
name=output_name,
|
647
|
-
data_type=
|
674
|
+
data_type=data_type,
|
648
675
|
nullable=True,
|
649
676
|
ordinal=i
|
650
677
|
))
|
@@ -0,0 +1,301 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: InfoTracker
|
3
|
+
Version: 0.3.1
|
4
|
+
Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
|
5
|
+
Project-URL: homepage, https://example.com/infotracker
|
6
|
+
Project-URL: documentation, https://example.com/infotracker/docs
|
7
|
+
Author: InfoTracker Authors
|
8
|
+
License: MIT
|
9
|
+
Keywords: data-lineage,impact-analysis,lineage,mssql,openlineage,sql
|
10
|
+
Classifier: Environment :: Console
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
12
|
+
Classifier: Operating System :: OS Independent
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
15
|
+
Classifier: Topic :: Database
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
17
|
+
Requires-Python: >=3.10
|
18
|
+
Requires-Dist: click
|
19
|
+
Requires-Dist: networkx>=3.3
|
20
|
+
Requires-Dist: packaging>=24.0
|
21
|
+
Requires-Dist: pydantic>=2.8.2
|
22
|
+
Requires-Dist: pyyaml>=6.0.1
|
23
|
+
Requires-Dist: rich
|
24
|
+
Requires-Dist: shellingham
|
25
|
+
Requires-Dist: sqlglot>=23.0.0
|
26
|
+
Requires-Dist: typer
|
27
|
+
Provides-Extra: dev
|
28
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
29
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
30
|
+
Description-Content-Type: text/markdown
|
31
|
+
|
32
|
+
# InfoTracker
|
33
|
+
|
34
|
+
**Column-level SQL lineage extraction and impact analysis for MS SQL Server**
|
35
|
+
|
36
|
+
InfoTracker is a powerful command-line tool that parses T-SQL files and generates detailed column-level lineage in OpenLineage format. It supports advanced SQL Server features including table-valued functions, stored procedures, temp tables, and EXEC patterns.
|
37
|
+
|
38
|
+
[](https://python.org)
|
39
|
+
[](LICENSE)
|
40
|
+
[](https://pypi.org/project/InfoTracker/)
|
41
|
+
|
42
|
+
## 🚀 Features
|
43
|
+
|
44
|
+
- **Column-level lineage** - Track data flow at the column level with precise transformations
|
45
|
+
- **Advanced SQL support** - T-SQL dialect with temp tables, variables, CTEs, and window functions
|
46
|
+
- **Impact analysis** - Find upstream and downstream dependencies with flexible selectors
|
47
|
+
- **Wildcard matching** - Support for table wildcards (`schema.table.*`) and column wildcards (`..pattern`)
|
48
|
+
- **Breaking change detection** - Detect schema changes that could break downstream processes
|
49
|
+
- **Multiple output formats** - Text tables or JSON for integration with other tools
|
50
|
+
- **OpenLineage compatible** - Standard format for data lineage interoperability
|
51
|
+
- **Advanced SQL objects** - Table-valued functions (TVF) and dataset-returning procedures
|
52
|
+
- **Temp table tracking** - Full lineage through EXEC into temp tables
|
53
|
+
|
54
|
+
## 📦 Installation
|
55
|
+
|
56
|
+
### From PyPI (Recommended)
|
57
|
+
```bash
|
58
|
+
pip install InfoTracker
|
59
|
+
```
|
60
|
+
|
61
|
+
### From GitHub
|
62
|
+
```bash
|
63
|
+
# Latest stable release
|
64
|
+
pip install git+https://github.com/InfoMatePL/InfoTracker.git
|
65
|
+
|
66
|
+
# Development version
|
67
|
+
git clone https://github.com/InfoMatePL/InfoTracker.git
|
68
|
+
cd InfoTracker
|
69
|
+
pip install -e .
|
70
|
+
```
|
71
|
+
|
72
|
+
### Verify Installation
|
73
|
+
```bash
|
74
|
+
infotracker --help
|
75
|
+
```
|
76
|
+
|
77
|
+
## ⚡ Quick Start
|
78
|
+
|
79
|
+
### 1. Extract Lineage
|
80
|
+
```bash
|
81
|
+
# Extract lineage from SQL files
|
82
|
+
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
83
|
+
```
|
84
|
+
|
85
|
+
### 2. Run Impact Analysis
|
86
|
+
```bash
|
87
|
+
# Find what feeds into a column (upstream)
|
88
|
+
infotracker impact -s "+STG.dbo.Orders.OrderID"
|
89
|
+
|
90
|
+
# Find what uses a column (downstream)
|
91
|
+
infotracker impact -s "STG.dbo.Orders.OrderID+"
|
92
|
+
|
93
|
+
# Both directions
|
94
|
+
infotracker impact -s "+dbo.fct_sales.Revenue+"
|
95
|
+
```
|
96
|
+
|
97
|
+
### 3. Detect Breaking Changes
|
98
|
+
```bash
|
99
|
+
# Compare two versions of your schema
|
100
|
+
infotracker diff --base build/lineage --head build/lineage_new
|
101
|
+
```
|
102
|
+
## 📖 Selector Syntax
|
103
|
+
|
104
|
+
InfoTracker supports flexible column selectors for precise impact analysis:
|
105
|
+
|
106
|
+
| Selector Format | Description | Example |
|
107
|
+
|-----------------|-------------|---------|
|
108
|
+
| `table.column` | Simple format (adds default `dbo` schema) | `Orders.OrderID` |
|
109
|
+
| `schema.table.column` | Schema-qualified format | `dbo.Orders.OrderID` |
|
110
|
+
| `database.schema.table.column` | Database-qualified format | `STG.dbo.Orders.OrderID` |
|
111
|
+
| `schema.table.*` | Table wildcard (all columns) | `dbo.fct_sales.*` |
|
112
|
+
| `..pattern` | Column wildcard (name contains pattern) | `..revenue` |
|
113
|
+
| `..pattern*` | Column wildcard with fnmatch | `..customer*` |
|
114
|
+
|
115
|
+
### Direction Control
|
116
|
+
- `selector` - downstream dependencies (default)
|
117
|
+
- `+selector` - upstream sources
|
118
|
+
- `selector+` - downstream dependencies (explicit)
|
119
|
+
- `+selector+` - both upstream and downstream
|
120
|
+
|
121
|
+
## 💡 Examples
|
122
|
+
|
123
|
+
### Basic Usage
|
124
|
+
```bash
|
125
|
+
# Extract lineage first (always run this before impact analysis)
|
126
|
+
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
127
|
+
|
128
|
+
# Basic column lineage
|
129
|
+
infotracker impact -s "+dbo.fct_sales.Revenue" # What feeds this column?
|
130
|
+
infotracker impact -s "STG.dbo.Orders.OrderID+" # What uses this column?
|
131
|
+
```
|
132
|
+
|
133
|
+
### Wildcard Selectors
|
134
|
+
```bash
|
135
|
+
# All columns from a specific table
|
136
|
+
infotracker impact -s "dbo.fct_sales.*"
|
137
|
+
infotracker impact -s "STG.dbo.Orders.*"
|
138
|
+
|
139
|
+
# Find all columns containing "revenue" (case-insensitive)
|
140
|
+
infotracker impact -s "..revenue"
|
141
|
+
|
142
|
+
# Find all columns starting with "customer"
|
143
|
+
infotracker impact -s "..customer*"
|
144
|
+
```
|
145
|
+
|
146
|
+
### Advanced SQL Objects
|
147
|
+
```bash
|
148
|
+
# Table-valued function columns (upstream)
|
149
|
+
infotracker impact -s "+dbo.fn_customer_orders_tvf.*"
|
150
|
+
|
151
|
+
# Procedure dataset columns (upstream)
|
152
|
+
infotracker impact -s "+dbo.usp_customer_metrics_dataset.*"
|
153
|
+
|
154
|
+
# Temp table lineage from EXEC
|
155
|
+
infotracker impact -s "+#temp_table.*"
|
156
|
+
```
|
157
|
+
|
158
|
+
### Output Formats
|
159
|
+
```bash
|
160
|
+
# Text output (default, human-readable)
|
161
|
+
infotracker impact -s "+..revenue"
|
162
|
+
|
163
|
+
# JSON output (machine-readable)
|
164
|
+
infotracker --format json impact -s "..customer*" > customer_lineage.json
|
165
|
+
|
166
|
+
# Control traversal depth
|
167
|
+
infotracker impact -s "+dbo.Orders.OrderID" --max-depth 2
|
168
|
+
```
|
169
|
+
|
170
|
+
### Breaking Change Detection
|
171
|
+
```bash
|
172
|
+
# Extract baseline
|
173
|
+
infotracker extract --sql-dir sql_v1 --out-dir build/baseline
|
174
|
+
|
175
|
+
# Extract new version
|
176
|
+
infotracker extract --sql-dir sql_v2 --out-dir build/current
|
177
|
+
|
178
|
+
# Detect breaking changes
|
179
|
+
infotracker diff --base build/baseline --head build/current
|
180
|
+
|
181
|
+
# Filter by severity
|
182
|
+
infotracker diff --base build/baseline --head build/current --threshold BREAKING
|
183
|
+
```
|
184
|
+
|
185
|
+
|
186
|
+
## Output Format
|
187
|
+
|
188
|
+
Impact analysis returns these columns:
|
189
|
+
- **from** - Source column (fully qualified)
|
190
|
+
- **to** - Target column (fully qualified)
|
191
|
+
- **direction** - `upstream` or `downstream`
|
192
|
+
- **transformation** - Type of transformation (`IDENTITY`, `ARITHMETIC`, `AGGREGATION`, `CASE_AGGREGATION`, `DATE_FUNCTION`, `WINDOW`, etc.)
|
193
|
+
- **description** - Human-readable transformation description
|
194
|
+
|
195
|
+
Results are automatically deduplicated. Use `--format json` for machine-readable output.
|
196
|
+
|
197
|
+
### New Transformation Types
|
198
|
+
|
199
|
+
The enhanced transformation taxonomy includes:
|
200
|
+
- `ARITHMETIC_AGGREGATION` - Arithmetic operations combined with aggregation functions
|
201
|
+
- `COMPLEX_AGGREGATION` - Multi-step calculations involving multiple aggregations
|
202
|
+
- `DATE_FUNCTION` - Date/time calculations like DATEDIFF, DATEADD
|
203
|
+
- `DATE_FUNCTION_AGGREGATION` - Date functions applied to aggregated results
|
204
|
+
- `CASE_AGGREGATION` - CASE statements applied to aggregated results
|
205
|
+
|
206
|
+
### Advanced Object Support
|
207
|
+
|
208
|
+
InfoTracker now supports advanced SQL Server objects:
|
209
|
+
|
210
|
+
**Table-Valued Functions (TVF):**
|
211
|
+
- Inline TVF (`RETURN AS SELECT`) - Parsed directly from SELECT statement
|
212
|
+
- Multi-statement TVF (`RETURN @table TABLE`) - Extracts schema from table variable definition
|
213
|
+
- Function parameters are tracked as filter metadata (don't create columns)
|
214
|
+
|
215
|
+
**Dataset-Returning Procedures:**
|
216
|
+
- Procedures ending with SELECT statement are treated as dataset sources
|
217
|
+
- Output schema extracted from the final SELECT statement
|
218
|
+
- Parameters tracked as filter metadata affecting lineage scope
|
219
|
+
|
220
|
+
**EXEC into Temp Tables:**
|
221
|
+
- `INSERT INTO #temp EXEC procedure` patterns create edges from procedure columns to temp table columns
|
222
|
+
- Temp table lineage propagates downstream to final targets
|
223
|
+
- Supports complex workflow patterns combining functions, procedures, and temp tables
|
224
|
+
|
225
|
+
## Configuration
|
226
|
+
|
227
|
+
InfoTracker follows this configuration precedence:
|
228
|
+
1. **CLI flags** (highest priority) - override everything
|
229
|
+
2. **infotracker.yml** config file - project defaults
|
230
|
+
3. **Built-in defaults** (lowest priority) - fallback values
|
231
|
+
|
232
|
+
## 🔧 Configuration
|
233
|
+
|
234
|
+
Create an `infotracker.yml` file in your project root:
|
235
|
+
|
236
|
+
```yaml
|
237
|
+
sql_dirs:
|
238
|
+
- "sql/"
|
239
|
+
- "models/"
|
240
|
+
out_dir: "build/lineage"
|
241
|
+
exclude_dirs:
|
242
|
+
- "__pycache__"
|
243
|
+
- ".git"
|
244
|
+
severity_threshold: "POTENTIALLY_BREAKING"
|
245
|
+
```
|
246
|
+
|
247
|
+
### Configuration Options
|
248
|
+
|
249
|
+
| Setting | Description | Default | Examples |
|
250
|
+
|---------|-------------|---------|----------|
|
251
|
+
| `sql_dirs` | Directories to scan for SQL files | `["."]` | `["sql/", "models/"]` |
|
252
|
+
| `out_dir` | Output directory for lineage files | `"lineage"` | `"build/artifacts"` |
|
253
|
+
| `exclude_dirs` | Directories to skip | `[]` | `["__pycache__", "node_modules"]` |
|
254
|
+
| `severity_threshold` | Breaking change detection level | `"NON_BREAKING"` | `"BREAKING"` |
|
255
|
+
|
256
|
+
## 📚 Documentation
|
257
|
+
|
258
|
+
- **[Architecture](docs/architecture.md)** - Core concepts and design
|
259
|
+
- **[Lineage Concepts](docs/lineage_concepts.md)** - Data lineage fundamentals
|
260
|
+
- **[CLI Usage](docs/cli_usage.md)** - Complete command reference
|
261
|
+
- **[Configuration](docs/configuration.md)** - Advanced configuration options
|
262
|
+
- **[DBT Integration](docs/dbt_integration.md)** - Using with DBT projects
|
263
|
+
- **[OpenLineage Mapping](docs/openlineage_mapping.md)** - Output format specification
|
264
|
+
- **[Breaking Changes](docs/breaking_changes.md)** - Change detection and severity levels
|
265
|
+
- **[Advanced Use Cases](docs/advanced_use_cases.md)** - TVFs, stored procedures, and complex scenarios
|
266
|
+
- **[Edge Cases](docs/edge_cases.md)** - SELECT *, UNION, temp tables handling
|
267
|
+
- **[FAQ](docs/faq.md)** - Common questions and troubleshooting
|
268
|
+
|
269
|
+
## 🧪 Testing
|
270
|
+
|
271
|
+
```bash
|
272
|
+
# Run all tests
|
273
|
+
pytest
|
274
|
+
|
275
|
+
# Run specific test categories
|
276
|
+
pytest tests/test_parser.py # Parser functionality
|
277
|
+
pytest tests/test_wildcard.py # Wildcard selectors
|
278
|
+
pytest tests/test_adapter.py # SQL dialect adapters
|
279
|
+
|
280
|
+
# Run with coverage
|
281
|
+
pytest --cov=infotracker --cov-report=html
|
282
|
+
```
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
|
288
|
+
## 📄 License
|
289
|
+
|
290
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
291
|
+
|
292
|
+
## 🙏 Acknowledgments
|
293
|
+
|
294
|
+
- [SQLGlot](https://github.com/tobymao/sqlglot) - SQL parsing library
|
295
|
+
- [OpenLineage](https://openlineage.io/) - Data lineage standard
|
296
|
+
- [Typer](https://typer.tiangolo.com/) - CLI framework
|
297
|
+
- [Rich](https://rich.readthedocs.io/) - Terminal formatting
|
298
|
+
|
299
|
+
---
|
300
|
+
|
301
|
+
**InfoTracker** - Making database schema evolution safer, one column at a time. 🎯
|
@@ -0,0 +1,16 @@
|
|
1
|
+
infotracker/__init__.py,sha256=TU6dd-1zoswGqK5zIl_o01msZ-pQGxHJlynPUYSYwXY,57
|
2
|
+
infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
|
3
|
+
infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
|
4
|
+
infotracker/cli.py,sha256=Hvid6PuMcygUj4Uxor4iBD5OLkfz_LJ249V0UZpwk8A,6181
|
5
|
+
infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
|
6
|
+
infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
|
7
|
+
infotracker/engine.py,sha256=QhBSSIE0yusHE2jHlsyTu7GG89tRy1BuJ4dG2bPS_Nw,23560
|
8
|
+
infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
|
9
|
+
infotracker/models.py,sha256=d7EIjOm3evI8YekQWgLE0L1cWiOcU0F34-XdqxBkcTk,18332
|
10
|
+
infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
|
11
|
+
infotracker/parser.py,sha256=-zz_bmc4Rkb-hT_eDIvvpWxFtdyGFMKcRun9raNX4AY,71335
|
12
|
+
infotracker/infotracker.yml,sha256=iRrrrUkdLCvEhw4DHqPnMchDlsJWI3xIJEpwevNU9sg,998
|
13
|
+
infotracker-0.3.1.dist-info/METADATA,sha256=dLhABRKb7FaHcmCW0HTwYZnJHlbIZHMHIqSD-sy7KM4,10487
|
14
|
+
infotracker-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
+
infotracker-0.3.1.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
|
16
|
+
infotracker-0.3.1.dist-info/RECORD,,
|
@@ -1,285 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: InfoTracker
|
3
|
-
Version: 0.2.6
|
4
|
-
Summary: Column-level SQL lineage, impact analysis, and breaking-change detection (MS SQL first)
|
5
|
-
Project-URL: homepage, https://example.com/infotracker
|
6
|
-
Project-URL: documentation, https://example.com/infotracker/docs
|
7
|
-
Author: InfoTracker Authors
|
8
|
-
License: MIT
|
9
|
-
Keywords: data-lineage,impact-analysis,lineage,mssql,openlineage,sql
|
10
|
-
Classifier: Environment :: Console
|
11
|
-
Classifier: License :: OSI Approved :: MIT License
|
12
|
-
Classifier: Operating System :: OS Independent
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
15
|
-
Classifier: Topic :: Database
|
16
|
-
Classifier: Topic :: Software Development :: Libraries
|
17
|
-
Requires-Python: >=3.10
|
18
|
-
Requires-Dist: click
|
19
|
-
Requires-Dist: networkx>=3.3
|
20
|
-
Requires-Dist: packaging>=24.0
|
21
|
-
Requires-Dist: pydantic>=2.8.2
|
22
|
-
Requires-Dist: pyyaml>=6.0.1
|
23
|
-
Requires-Dist: rich
|
24
|
-
Requires-Dist: shellingham
|
25
|
-
Requires-Dist: sqlglot>=23.0.0
|
26
|
-
Requires-Dist: typer
|
27
|
-
Provides-Extra: dev
|
28
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
29
|
-
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
30
|
-
Description-Content-Type: text/markdown
|
31
|
-
|
32
|
-
# InfoTracker
|
33
|
-
|
34
|
-
Column-level SQL lineage extraction and impact analysis for MS SQL Server
|
35
|
-
|
36
|
-
## Features
|
37
|
-
|
38
|
-
- **Column-level lineage** - Track data flow at the column level
|
39
|
-
- **Parse SQL files** and generate OpenLineage-compatible JSON
|
40
|
-
- **Impact analysis** - Find upstream and downstream column dependencies with flexible selectors
|
41
|
-
- **Wildcard matching** - Support for table wildcards (`schema.table.*`) and column wildcards (`..pattern`)
|
42
|
-
- **Direction control** - Query upstream (`+selector`), downstream (`selector+`), or both (`+selector+`)
|
43
|
-
- **Configurable depth** - Control traversal depth with `--max-depth`
|
44
|
-
- **Multiple output formats** - Text tables or JSON for scripting
|
45
|
-
- **MSSQL support** - T-SQL dialect with temp tables, variables, and stored procedures
|
46
|
-
- **Advanced SQL objects** - Support for table-valued functions (TVF) and dataset-returning procedures
|
47
|
-
- **Temp table lineage** - Track EXEC into temp tables and propagate lineage downstream
|
48
|
-
|
49
|
-
## Requirements
|
50
|
-
- Python 3.10+
|
51
|
-
- Virtual environment (activated)
|
52
|
-
- Basic SQL knowledge
|
53
|
-
- Git and shell
|
54
|
-
|
55
|
-
## Troubleshooting
|
56
|
-
- **Error tracebacks on help commands**: Make sure you're running in an activated virtual environment
|
57
|
-
- **Command not found**: Activate your virtual environment first
|
58
|
-
- **Import errors**: Ensure all dependencies are installed with `pip install -e .`
|
59
|
-
- **Column not found**: Use full URI format or check column_graph.json for exact names
|
60
|
-
|
61
|
-
## Quickstart
|
62
|
-
|
63
|
-
### Setup & Installation
|
64
|
-
```bash
|
65
|
-
# Activate virtual environment first (REQUIRED)
|
66
|
-
|
67
|
-
# Install dependencies
|
68
|
-
pip install -e .
|
69
|
-
|
70
|
-
# Verify installation
|
71
|
-
infotracker --help
|
72
|
-
```
|
73
|
-
|
74
|
-
### Basic Usage
|
75
|
-
```bash
|
76
|
-
# 1. Extract lineage from SQL files (builds column graph)
|
77
|
-
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
78
|
-
|
79
|
-
# 2. Run impact analysis
|
80
|
-
infotracker impact -s "STG.dbo.Orders.OrderID" # downstream dependencies
|
81
|
-
infotracker impact -s "+STG.dbo.Orders.OrderID" # upstream sources
|
82
|
-
```
|
83
|
-
|
84
|
-
## Selector Syntax
|
85
|
-
|
86
|
-
InfoTracker supports flexible column selectors:
|
87
|
-
|
88
|
-
| Selector Format | Description | Example |
|
89
|
-
|-----------------|-------------|---------|
|
90
|
-
| `table.column` | Simple format (adds default `dbo` schema) | `Orders.OrderID` |
|
91
|
-
| `schema.table.column` | Schema-qualified format | `dbo.Orders.OrderID` |
|
92
|
-
| `database.schema.table.column` | Database-qualified format | `STG.dbo.Orders.OrderID` |
|
93
|
-
| `schema.table.*` | Table wildcard (all columns) | `dbo.fct_sales.*` |
|
94
|
-
| `..pattern` | Column wildcard (name contains pattern) | `..revenue` |
|
95
|
-
| `.pattern` | Alias for column wildcard | `.orderid` |
|
96
|
-
| Full URI | Complete namespace format | `mssql://localhost/InfoTrackerDW.STG.dbo.Orders.OrderID` |
|
97
|
-
|
98
|
-
### Direction Control
|
99
|
-
- `selector` - downstream dependencies (default)
|
100
|
-
- `+selector` - upstream sources
|
101
|
-
- `selector+` - downstream dependencies (explicit)
|
102
|
-
- `+selector+` - both upstream and downstream
|
103
|
-
|
104
|
-
### Selector Cheat Sheet
|
105
|
-
|
106
|
-
**Table wildcards:**
|
107
|
-
```bash
|
108
|
-
# All columns from a specific table
|
109
|
-
infotracker impact -s "dbo.fct_sales.*"
|
110
|
-
infotracker impact -s "STG.dbo.Orders.*"
|
111
|
-
```
|
112
|
-
|
113
|
-
**Column name matching:**
|
114
|
-
```bash
|
115
|
-
# Find all columns containing "revenue" (case-insensitive)
|
116
|
-
infotracker impact -s "..revenue"
|
117
|
-
|
118
|
-
# Find all columns containing "id"
|
119
|
-
infotracker impact -s "..id"
|
120
|
-
|
121
|
-
# Use wildcards for pattern matching
|
122
|
-
infotracker impact -s "..customer*"
|
123
|
-
```
|
124
|
-
|
125
|
-
**Direction examples:**
|
126
|
-
```bash
|
127
|
-
# Upstream: what feeds into this column
|
128
|
-
infotracker impact -s "+dbo.fct_sales.Revenue"
|
129
|
-
|
130
|
-
# Downstream: what uses this column
|
131
|
-
infotracker impact -s "STG.dbo.Orders.OrderID+"
|
132
|
-
|
133
|
-
# Both directions
|
134
|
-
infotracker impact -s "+dbo.dim_customer.CustomerID+"
|
135
|
-
```
|
136
|
-
|
137
|
-
**Advanced SQL objects:**
|
138
|
-
```bash
|
139
|
-
# Table-valued function columns (upstream)
|
140
|
-
infotracker impact -s "+dbo.fn_customer_orders_tvf.*"
|
141
|
-
|
142
|
-
# Procedure dataset columns (upstream)
|
143
|
-
infotracker impact -s "+dbo.usp_customer_metrics_dataset.*"
|
144
|
-
|
145
|
-
# Temp table lineage from EXEC
|
146
|
-
infotracker impact -s "+#temp_table.*"
|
147
|
-
```
|
148
|
-
|
149
|
-
## Examples
|
150
|
-
|
151
|
-
```bash
|
152
|
-
# Extract lineage (run this first)
|
153
|
-
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
154
|
-
|
155
|
-
# Basic column lineage
|
156
|
-
infotracker impact -s "+dbo.fct_sales.Revenue" # upstream sources
|
157
|
-
infotracker impact -s "STG.dbo.Orders.OrderID+" # downstream usage
|
158
|
-
|
159
|
-
# Wildcard selectors
|
160
|
-
infotracker impact -s "+..revenue+" # all revenue columns (both directions)
|
161
|
-
infotracker impact -s "dbo.fct_sales.*" # all columns from table
|
162
|
-
infotracker --format json impact -s "..customer*" # customer columns (JSON output)
|
163
|
-
|
164
|
-
# Advanced SQL objects (NEW)
|
165
|
-
infotracker impact -s "+dbo.fn_customer_orders_tvf.*" # TVF columns (upstream)
|
166
|
-
infotracker impact -s "+dbo.usp_customer_metrics_dataset.*" # procedure columns (upstream)
|
167
|
-
|
168
|
-
# Depth control
|
169
|
-
infotracker impact -s "+dbo.Orders.OrderID" --max-depth 1
|
170
|
-
|
171
|
-
# Demo the new features with the included examples
|
172
|
-
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
173
|
-
infotracker impact -s "+dbo.fn_customer_orders_inline.*"
|
174
|
-
infotracker impact -s "+dbo.usp_customer_metrics_dataset.TotalRevenue"
|
175
|
-
```
|
176
|
-
|
177
|
-
### Copy-Paste Demo Commands
|
178
|
-
|
179
|
-
Test the new TVF and procedure lineage features:
|
180
|
-
|
181
|
-
```bash
|
182
|
-
# 1. Extract all lineage (including new TVF/procedure support)
|
183
|
-
infotracker extract --sql-dir examples/warehouse/sql --out-dir build/lineage
|
184
|
-
|
185
|
-
# 2. Test TVF lineage
|
186
|
-
infotracker --format text impact -s "+dbo.fn_customer_orders_tvf.*"
|
187
|
-
|
188
|
-
# 3. Test procedure lineage
|
189
|
-
infotracker --format text impact -s "+dbo.usp_customer_metrics_dataset.*"
|
190
|
-
|
191
|
-
# 4. Test column name contains wildcard
|
192
|
-
infotracker --format text impact -s "+..revenue"
|
193
|
-
|
194
|
-
# 5. Show results in JSON format
|
195
|
-
infotracker --format json impact -s "..total*" > tvf_lineage.json
|
196
|
-
```
|
197
|
-
|
198
|
-
## Output Format
|
199
|
-
|
200
|
-
Impact analysis returns these columns:
|
201
|
-
- **from** - Source column (fully qualified)
|
202
|
-
- **to** - Target column (fully qualified)
|
203
|
-
- **direction** - `upstream` or `downstream`
|
204
|
-
- **transformation** - Type of transformation (`IDENTITY`, `ARITHMETIC`, `AGGREGATION`, `CASE_AGGREGATION`, `DATE_FUNCTION`, `WINDOW`, etc.)
|
205
|
-
- **description** - Human-readable transformation description
|
206
|
-
|
207
|
-
Results are automatically deduplicated. Use `--format json` for machine-readable output.
|
208
|
-
|
209
|
-
### New Transformation Types
|
210
|
-
|
211
|
-
The enhanced transformation taxonomy includes:
|
212
|
-
- `ARITHMETIC_AGGREGATION` - Arithmetic operations combined with aggregation functions
|
213
|
-
- `COMPLEX_AGGREGATION` - Multi-step calculations involving multiple aggregations
|
214
|
-
- `DATE_FUNCTION` - Date/time calculations like DATEDIFF, DATEADD
|
215
|
-
- `DATE_FUNCTION_AGGREGATION` - Date functions applied to aggregated results
|
216
|
-
- `CASE_AGGREGATION` - CASE statements applied to aggregated results
|
217
|
-
|
218
|
-
### Advanced Object Support
|
219
|
-
|
220
|
-
InfoTracker now supports advanced SQL Server objects:
|
221
|
-
|
222
|
-
**Table-Valued Functions (TVF):**
|
223
|
-
- Inline TVF (`RETURN AS SELECT`) - Parsed directly from SELECT statement
|
224
|
-
- Multi-statement TVF (`RETURN @table TABLE`) - Extracts schema from table variable definition
|
225
|
-
- Function parameters are tracked as filter metadata (don't create columns)
|
226
|
-
|
227
|
-
**Dataset-Returning Procedures:**
|
228
|
-
- Procedures ending with SELECT statement are treated as dataset sources
|
229
|
-
- Output schema extracted from the final SELECT statement
|
230
|
-
- Parameters tracked as filter metadata affecting lineage scope
|
231
|
-
|
232
|
-
**EXEC into Temp Tables:**
|
233
|
-
- `INSERT INTO #temp EXEC procedure` patterns create edges from procedure columns to temp table columns
|
234
|
-
- Temp table lineage propagates downstream to final targets
|
235
|
-
- Supports complex workflow patterns combining functions, procedures, and temp tables
|
236
|
-
|
237
|
-
## Configuration
|
238
|
-
|
239
|
-
InfoTracker follows this configuration precedence:
|
240
|
-
1. **CLI flags** (highest priority) - override everything
|
241
|
-
2. **infotracker.yml** config file - project defaults
|
242
|
-
3. **Built-in defaults** (lowest priority) - fallback values
|
243
|
-
|
244
|
-
Create an `infotracker.yml` file in your project root:
|
245
|
-
```yaml
|
246
|
-
default_adapter: mssql
|
247
|
-
sql_dir: examples/warehouse/sql
|
248
|
-
out_dir: build/lineage
|
249
|
-
include: ["*.sql"]
|
250
|
-
exclude: ["*_wip.sql"]
|
251
|
-
```
|
252
|
-
|
253
|
-
## Documentation
|
254
|
-
|
255
|
-
For detailed information:
|
256
|
-
- `docs/overview.md` — what it is, goals, scope
|
257
|
-
- `docs/algorithm.md` — how extraction works
|
258
|
-
- `docs/lineage_concepts.md` — core concepts with visuals
|
259
|
-
- `docs/cli_usage.md` — commands and options
|
260
|
-
- `docs/breaking_changes.md` — definition and detection
|
261
|
-
- `docs/edge_cases.md` — SELECT *, UNION, temp tables, etc.
|
262
|
-
- `docs/adapters.md` — interface and MSSQL specifics
|
263
|
-
- `docs/architecture.md` — system and sequence diagrams
|
264
|
-
- `docs/configuration.md` — configuration reference
|
265
|
-
- `docs/openlineage_mapping.md` — how outputs map to OpenLineage
|
266
|
-
- `docs/faq.md` — common questions
|
267
|
-
|
268
|
-
#### Documentation
|
269
|
-
- `docs/overview.md` — what it is, goals, scope
|
270
|
-
- `docs/algorithm.md` — how extraction works
|
271
|
-
- `docs/lineage_concepts.md` — core concepts with visuals
|
272
|
-
- `docs/cli_usage.md` — commands and options
|
273
|
-
- `docs/breaking_changes.md` — definition and detection
|
274
|
-
- `docs/edge_cases.md` — SELECT *, UNION, temp tables, etc.
|
275
|
-
- `docs/advanced_use_cases.md` — tabular functions, procedures returning datasets
|
276
|
-
- `docs/adapters.md` — interface and MSSQL specifics
|
277
|
-
- `docs/architecture.md` — system and sequence diagrams
|
278
|
-
- `docs/configuration.md` — configuration reference
|
279
|
-
- `docs/openlineage_mapping.md` — how outputs map to OpenLineage
|
280
|
-
- `docs/faq.md` — common questions
|
281
|
-
- `docs/dbt_integration.md` — how to use with dbt projects
|
282
|
-
|
283
|
-
|
284
|
-
## License
|
285
|
-
MIT (or your team’s preferred license)
|
@@ -1,16 +0,0 @@
|
|
1
|
-
infotracker/__init__.py,sha256=XkoK2R_QULA1UDQqgaLbmKQ2bdsi-lO3mo_wi7dy9Gg,57
|
2
|
-
infotracker/__main__.py,sha256=_iCom0ddZ1myy6ly3ID1dBlLzzjf7iV7Kq9uUfkat74,121
|
3
|
-
infotracker/adapters.py,sha256=UEQeGSS3_fMOc5_Jsrw5aTtmIXlOdqqbHWL2uSgqkGM,3011
|
4
|
-
infotracker/cli.py,sha256=PQQoxqSmu8fSFTeGCdLKIKiY7WTcCzddiANYGc1qqe8,5666
|
5
|
-
infotracker/config.py,sha256=AG3go2kmaN_yTZ-zwVCV0ib7IF7xvLWVnNSEritwqPE,2628
|
6
|
-
infotracker/diff.py,sha256=LmIl3FL5NVxil6AFefrqQBkCCRonueg6BEXrnleVpw8,19796
|
7
|
-
infotracker/engine.py,sha256=JlsrzPoB4Xe4qnTrEZ7emYP0K-zkqTqYOGzZiEZesks,23441
|
8
|
-
infotracker/lineage.py,sha256=GcNflXSO5QhqJj9eJewlWwfL_86N4aHdEgoY3ESD6_U,4863
|
9
|
-
infotracker/models.py,sha256=aQwU_4V69CnnHdgsybd99uvE3fzoQoW-nwn5aMhxdbU,14796
|
10
|
-
infotracker/openlineage_utils.py,sha256=-g9Pkl5hOMQP2Rtu47ItHBC13z6Y0K3gEG6x9GrTJH8,5845
|
11
|
-
infotracker/parser.py,sha256=8NVtCMvyt7l_dIfAydR_VJGB7A_NBLb2T827ac8uMXc,70255
|
12
|
-
infotracker/infotracker.yml,sha256=iTVS246TS4DWLwN-vMiLHPbgDegjGIEpYF5UaL_lTd0,994
|
13
|
-
infotracker-0.2.6.dist-info/METADATA,sha256=Ukx6UAXLMs8kAEiRzWNagDVRP2LRMTfeuNN7byn3nqM,10449
|
14
|
-
infotracker-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
-
infotracker-0.2.6.dist-info/entry_points.txt,sha256=5ulAYRSvW3SohjeMwlYRX6LoWIHkEtc1qnwxWJQgN2Y,59
|
16
|
-
infotracker-0.2.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|