InfoTracker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ __all__ = [
2
+ "__version__",
3
+ ]
4
+
5
+ __version__ = "0.1.0"
6
+
@@ -0,0 +1,6 @@
1
+ """Entry point for the InfoTracker package."""
2
+
3
+ from .cli import entrypoint
4
+
5
+ if __name__ == "__main__":
6
+ entrypoint()
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+ import json
3
+ import logging
4
+ from typing import Protocol, Dict, Any, Optional
5
+ from .parser import SqlParser
6
+ from .lineage import OpenLineageGenerator
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class Adapter(Protocol):
11
+ name: str
12
+ dialect: str
13
+ def extract_lineage(self, sql: str, object_hint: Optional[str] = None) -> str: ...
14
+
15
+ class MssqlAdapter:
16
+ name = "mssql"
17
+ dialect = "tsql"
18
+
19
+ def __init__(self):
20
+ self.parser = SqlParser(dialect=self.dialect)
21
+ self.lineage_generator = OpenLineageGenerator()
22
+
23
+ def extract_lineage(self, sql: str, object_hint: Optional[str] = None) -> str:
24
+ """Extract lineage from SQL and return OpenLineage JSON as string."""
25
+ try:
26
+ obj_info = self.parser.parse_sql_file(sql, object_hint)
27
+ job_name = f"warehouse/sql/{object_hint}.sql" if object_hint else None
28
+ json_str = self.lineage_generator.generate(
29
+ obj_info, job_name=job_name, object_hint=object_hint
30
+ )
31
+ return json_str
32
+ except Exception as exc:
33
+ logger.error(f"Failed to extract lineage from SQL: {exc}")
34
+ error_payload = {
35
+ "eventType": "COMPLETE",
36
+ "eventTime": "2025-01-01T00:00:00Z",
37
+ "run": {"runId": "00000000-0000-0000-0000-000000000000"},
38
+ "job": {"namespace": "infotracker/examples",
39
+ "name": f"warehouse/sql/{(object_hint or 'unknown')}.sql"},
40
+ "inputs": [],
41
+ "outputs": [{
42
+ "namespace": "mssql://localhost/InfoTrackerDW",
43
+ "name": object_hint or "unknown",
44
+ "facets": {
45
+ "schema": {
46
+ "_producer": "https://github.com/OpenLineage/OpenLineage",
47
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
48
+ "fields": [
49
+ {"name": "error", "type": "string", "description": f"Error: {exc}"}
50
+ ],
51
+ }
52
+ },
53
+ }],
54
+ }
55
+ return json.dumps(error_payload, indent=2, ensure_ascii=False)
56
+
57
+ _ADAPTERS: Dict[str, Adapter] = {
58
+ "mssql": MssqlAdapter(),
59
+ }
60
+
61
+
62
+ def get_adapter(name: str) -> Adapter:
63
+ if name not in _ADAPTERS:
64
+ raise KeyError(f"Unknown adapter '{name}'. Available: {', '.join(_ADAPTERS)}")
65
+ return _ADAPTERS[name]
infotracker/cli.py ADDED
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from .config import load_config, RuntimeConfig
14
+ from .engine import ExtractRequest, ImpactRequest, DiffRequest, Engine
15
+
16
+
17
+ app = typer.Typer(add_completion=False, no_args_is_help=True, help="InfoTracker CLI")
18
+ console = Console()
19
+
20
+
21
+ def version_callback(value: bool):
22
+ from . import __version__
23
+
24
+ if value:
25
+ console.print(f"infotracker {__version__}")
26
+ raise typer.Exit()
27
+
28
+
29
+ @app.callback()
30
+ def main(
31
+ ctx: typer.Context,
32
+ config: Optional[Path] = typer.Option(None, exists=True, dir_okay=False, help="Path to infotracker.yml"),
33
+ log_level: str = typer.Option("info", help="log level: debug|info|warn|error"),
34
+ format: str = typer.Option("text", "--format", help="Output format: text|json", show_choices=True),
35
+ version: bool = typer.Option(False, "--version", callback=version_callback, is_eager=True, help="Show version and exit"),
36
+ ):
37
+ ctx.ensure_object(dict)
38
+ cfg = load_config(config)
39
+ # override with CLI flags (precedence)
40
+ cfg.log_level = log_level
41
+ cfg.output_format = format
42
+ ctx.obj["cfg"] = cfg
43
+ level = getattr(logging, cfg.log_level.upper(), logging.INFO)
44
+ logging.basicConfig(level=level)
45
+
46
+
47
+ @app.command()
48
+ def extract(
49
+ ctx: typer.Context,
50
+ sql_dir: Optional[Path] = typer.Option(None, exists=True, file_okay=False),
51
+ out_dir: Optional[Path] = typer.Option(None, file_okay=False),
52
+ adapter: Optional[str] = typer.Option(None),
53
+ catalog: Optional[Path] = typer.Option(None, exists=True),
54
+ fail_on_warn: bool = typer.Option(False),
55
+ include: list[str] = typer.Option([], "--include", help="Glob include pattern"),
56
+ exclude: list[str] = typer.Option([], "--exclude", help="Glob exclude pattern"),
57
+ ):
58
+ cfg: RuntimeConfig = ctx.obj["cfg"]
59
+ engine = Engine(cfg)
60
+ req = ExtractRequest(
61
+ sql_dir=sql_dir or Path(cfg.sql_dir),
62
+ out_dir=out_dir or Path(cfg.out_dir),
63
+ adapter=adapter or cfg.default_adapter,
64
+ catalog=catalog,
65
+ include=include or cfg.include,
66
+ exclude=exclude or cfg.exclude,
67
+ fail_on_warn=fail_on_warn,
68
+ )
69
+ result = engine.run_extract(req)
70
+ _emit(result, cfg.output_format)
71
+
72
+ # Handle fail_on_warn
73
+ if fail_on_warn and result.get("warnings", 0) > 0:
74
+ console.print(f"[red]ERROR: {result['warnings']} warnings detected with --fail-on-warn enabled[/red]")
75
+ raise typer.Exit(1)
76
+
77
+
78
+ @app.command()
79
+ def impact(
80
+ ctx: typer.Context,
81
+ selector: str = typer.Option(..., "-s", "--selector", help="[+]db.schema.object.column[+] - use + to indicate direction"),
82
+ max_depth: Optional[int] = typer.Option(None),
83
+ out: Optional[Path] = typer.Option(None),
84
+ graph_dir: Optional[Path] = typer.Option(None, "--graph-dir", help="Directory containing column_graph.json"),
85
+ ):
86
+ cfg: RuntimeConfig = ctx.obj["cfg"]
87
+ engine = Engine(cfg)
88
+ req = ImpactRequest(selector=selector, max_depth=max_depth or 2, graph_dir=graph_dir)
89
+ result = engine.run_impact(req)
90
+ _emit(result, cfg.output_format, out)
91
+
92
+
93
+ @app.command()
94
+ def diff(
95
+ ctx: typer.Context,
96
+ base: str = typer.Option(..., help="git ref name for base"),
97
+ head: str = typer.Option(..., help="git ref name for head"),
98
+ sql_dir: Optional[Path] = typer.Option(None, exists=True, file_okay=False),
99
+ adapter: Optional[str] = typer.Option(None),
100
+ severity_threshold: str = typer.Option("BREAKING"),
101
+ ):
102
+ cfg: RuntimeConfig = ctx.obj["cfg"]
103
+ engine = Engine(cfg)
104
+ req = DiffRequest(
105
+ base=base,
106
+ head=head,
107
+ sql_dir=sql_dir or Path(cfg.sql_dir),
108
+ adapter=adapter or cfg.default_adapter,
109
+ severity_threshold=severity_threshold,
110
+ )
111
+ result = engine.run_diff(req)
112
+ _emit(result, cfg.output_format)
113
+ raise typer.Exit(code=result.get("exit_code", 0))
114
+
115
+
116
+ def _emit(payload: dict, fmt: str, out_path: Optional[Path] = None) -> None:
117
+ from rich.table import Table
118
+ from rich.console import Console
119
+ import json
120
+
121
+ console = Console()
122
+
123
+ if fmt == "json":
124
+ console.print_json(json.dumps(payload, ensure_ascii=False))
125
+ return
126
+
127
+ # fmt == "text"
128
+ table = Table(show_header=True, header_style="bold")
129
+ cols = payload.get("columns", [])
130
+ for k in cols:
131
+ table.add_column(str(k))
132
+
133
+ for r in payload.get("rows", []):
134
+ if isinstance(r, dict):
135
+ table.add_row(*[str(r.get(c, "")) for c in cols])
136
+ else:
137
+ # lista / krotka — dopasuj po pozycji
138
+ table.add_row(*[str(x) for x in (list(r) + [""] * max(0, len(cols) - len(r)))][:len(cols)])
139
+
140
+ console.print(table)
141
+
142
+
143
+
144
+ def entrypoint() -> None:
145
+ app()
146
+
147
+
148
+ if __name__ == "__main__":
149
+ entrypoint()
150
+
infotracker/config.py ADDED
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ import yaml
8
+
9
+
10
+ @dataclass
11
+ class RuntimeConfig:
12
+ default_adapter: str = "mssql"
13
+ default_database: Optional[str] = None
14
+ sql_dir: str = "examples/warehouse/sql"
15
+ out_dir: str = "build/lineage"
16
+ include: List[str] = field(default_factory=lambda: ["*.sql"])
17
+ exclude: List[str] = field(default_factory=list)
18
+ severity_threshold: str = "BREAKING"
19
+ ignore: List[str] = field(default_factory=list)
20
+ catalog: Optional[str] = None
21
+ log_level: str = "info"
22
+ output_format: str = "text"
23
+
24
+
25
+ def load_config(path: Optional[Path]) -> RuntimeConfig:
26
+ cfg = RuntimeConfig()
27
+ if path is None:
28
+ # Try repo root default
29
+ default = Path("infotracker.yml")
30
+ if default.exists():
31
+ path = default
32
+ if path and path.exists():
33
+ data = yaml.safe_load(path.read_text()) or {}
34
+ for k, v in data.items():
35
+ if hasattr(cfg, k):
36
+ setattr(cfg, k, v)
37
+
38
+ # Load .infotrackerignore if exists
39
+ ignore_file = Path(".infotrackerignore")
40
+ patterns: list[str] = []
41
+
42
+ if ignore_file.exists():
43
+ try:
44
+ for line in ignore_file.read_text(encoding="utf-8").splitlines():
45
+ # utnij komentarz inline i białe znaki
46
+ line = line.split("#", 1)[0].strip()
47
+ if line:
48
+ patterns.append(line)
49
+ except Exception as e:
50
+ print(f"Warning: failed to load .infotrackerignore: {e}")
51
+
52
+ # scal z configiem
53
+ base = list(getattr(cfg, "ignore", []) or [])
54
+ cfg.ignore = sorted(set(base + patterns))
55
+
56
+ return cfg
57
+
infotracker/diff.py ADDED
@@ -0,0 +1,291 @@
1
+ """
2
+ Breaking change detection for InfoTracker.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from typing import Dict, List, Optional, Set, Any
9
+
10
+ from .models import ObjectInfo, ColumnSchema, ColumnLineage, TransformationType
11
+
12
+
13
+ class ChangeType(Enum):
14
+ """Types of changes that can be detected."""
15
+ COLUMN_ADDED = "COLUMN_ADDED"
16
+ COLUMN_REMOVED = "COLUMN_REMOVED"
17
+ COLUMN_RENAMED = "COLUMN_RENAMED"
18
+ COLUMN_TYPE_CHANGED = "COLUMN_TYPE_CHANGED"
19
+ COLUMN_NULLABILITY_CHANGED = "COLUMN_NULLABILITY_CHANGED"
20
+ COLUMN_ORDER_CHANGED = "COLUMN_ORDER_CHANGED"
21
+ LINEAGE_CHANGED = "LINEAGE_CHANGED"
22
+ OBJECT_ADDED = "OBJECT_ADDED"
23
+ OBJECT_REMOVED = "OBJECT_REMOVED"
24
+ OBJECT_TYPE_CHANGED = "OBJECT_TYPE_CHANGED"
25
+
26
+
27
+ class Severity(Enum):
28
+ """Severity levels for changes."""
29
+ BREAKING = "BREAKING"
30
+ POTENTIALLY_BREAKING = "POTENTIALLY_BREAKING"
31
+ NON_BREAKING = "NON_BREAKING"
32
+
33
+
34
+ @dataclass
35
+ class Change:
36
+ """Represents a single change between two versions."""
37
+ change_type: ChangeType
38
+ severity: Severity
39
+ object_name: str
40
+ column_name: Optional[str] = None
41
+ old_value: Any = None
42
+ new_value: Any = None
43
+ description: str = ""
44
+ impact_count: int = 0 # Number of downstream columns affected
45
+
46
+
47
+ class BreakingChangeDetector:
48
+ """Detects breaking changes between two sets of object information."""
49
+
50
+ def __init__(self):
51
+ self.changes: List[Change] = []
52
+
53
+ def detect_changes(self, base_objects: List[ObjectInfo], head_objects: List[ObjectInfo]) -> List[Change]:
54
+ """Detect changes between base and head object lists."""
55
+ self.changes = []
56
+
57
+ # Create lookup dictionaries
58
+ base_map = {obj.name.lower(): obj for obj in base_objects}
59
+ head_map = {obj.name.lower(): obj for obj in head_objects}
60
+
61
+ # Find object-level changes
62
+ self._detect_object_changes(base_map, head_map)
63
+
64
+ # Find schema changes for existing objects
65
+ common_objects = set(base_map.keys()) & set(head_map.keys())
66
+ for obj_name in common_objects:
67
+ self._detect_schema_changes(base_map[obj_name], head_map[obj_name])
68
+ self._detect_lineage_changes(base_map[obj_name], head_map[obj_name])
69
+
70
+ return self.changes
71
+
72
+ def _detect_object_changes(self, base_map: Dict[str, ObjectInfo], head_map: Dict[str, ObjectInfo]) -> None:
73
+ """Detect object additions, removals, and type changes."""
74
+ base_names = set(base_map.keys())
75
+ head_names = set(head_map.keys())
76
+
77
+ # Object additions
78
+ for added_name in head_names - base_names:
79
+ obj = head_map[added_name]
80
+ self.changes.append(Change(
81
+ change_type=ChangeType.OBJECT_ADDED,
82
+ severity=Severity.NON_BREAKING,
83
+ object_name=obj.name,
84
+ description=f"Added {obj.object_type} '{obj.name}'"
85
+ ))
86
+
87
+ # Object removals
88
+ for removed_name in base_names - head_names:
89
+ obj = base_map[removed_name]
90
+ self.changes.append(Change(
91
+ change_type=ChangeType.OBJECT_REMOVED,
92
+ severity=Severity.BREAKING,
93
+ object_name=obj.name,
94
+ description=f"Removed {obj.object_type} '{obj.name}'"
95
+ ))
96
+
97
+ # Object type changes
98
+ for common_name in base_names & head_names:
99
+ base_obj = base_map[common_name]
100
+ head_obj = head_map[common_name]
101
+
102
+ if base_obj.object_type != head_obj.object_type:
103
+ self.changes.append(Change(
104
+ change_type=ChangeType.OBJECT_TYPE_CHANGED,
105
+ severity=Severity.BREAKING,
106
+ object_name=base_obj.name,
107
+ old_value=base_obj.object_type,
108
+ new_value=head_obj.object_type,
109
+ description=f"Changed object type from {base_obj.object_type} to {head_obj.object_type}"
110
+ ))
111
+
112
+ def _detect_schema_changes(self, base_obj: ObjectInfo, head_obj: ObjectInfo) -> None:
113
+ """Detect schema changes within an object."""
114
+ base_columns = {col.name.lower(): col for col in base_obj.schema.columns}
115
+ head_columns = {col.name.lower(): col for col in head_obj.schema.columns}
116
+
117
+ base_names = set(base_columns.keys())
118
+ head_names = set(head_columns.keys())
119
+
120
+ # Column additions
121
+ for added_name in head_names - base_names:
122
+ col = head_columns[added_name]
123
+ severity = Severity.POTENTIALLY_BREAKING # Could affect SELECT *
124
+ self.changes.append(Change(
125
+ change_type=ChangeType.COLUMN_ADDED,
126
+ severity=severity,
127
+ object_name=base_obj.name,
128
+ column_name=col.name,
129
+ new_value=f"{col.data_type} {'NULL' if col.nullable else 'NOT NULL'}",
130
+ description=f"Added column '{col.name}' ({col.data_type})"
131
+ ))
132
+
133
+ # Column removals
134
+ for removed_name in base_names - head_names:
135
+ col = base_columns[removed_name]
136
+ self.changes.append(Change(
137
+ change_type=ChangeType.COLUMN_REMOVED,
138
+ severity=Severity.BREAKING,
139
+ object_name=base_obj.name,
140
+ column_name=col.name,
141
+ old_value=f"{col.data_type} {'NULL' if col.nullable else 'NOT NULL'}",
142
+ description=f"Removed column '{col.name}'"
143
+ ))
144
+
145
+ # Column changes for existing columns
146
+ for common_name in base_names & head_names:
147
+ base_col = base_columns[common_name]
148
+ head_col = head_columns[common_name]
149
+
150
+ # Type changes
151
+ if base_col.data_type != head_col.data_type:
152
+ severity = self._classify_type_change_severity(base_col.data_type, head_col.data_type)
153
+ self.changes.append(Change(
154
+ change_type=ChangeType.COLUMN_TYPE_CHANGED,
155
+ severity=severity,
156
+ object_name=base_obj.name,
157
+ column_name=base_col.name,
158
+ old_value=base_col.data_type,
159
+ new_value=head_col.data_type,
160
+ description=f"Changed column '{base_col.name}' type from {base_col.data_type} to {head_col.data_type}"
161
+ ))
162
+
163
+ # Nullability changes
164
+ if base_col.nullable != head_col.nullable:
165
+ severity = Severity.BREAKING if not head_col.nullable else Severity.POTENTIALLY_BREAKING
166
+ self.changes.append(Change(
167
+ change_type=ChangeType.COLUMN_NULLABILITY_CHANGED,
168
+ severity=severity,
169
+ object_name=base_obj.name,
170
+ column_name=base_col.name,
171
+ old_value="NULL" if base_col.nullable else "NOT NULL",
172
+ new_value="NULL" if head_col.nullable else "NOT NULL",
173
+ description=f"Changed column '{base_col.name}' nullability"
174
+ ))
175
+
176
+ # Ordinal changes (column order)
177
+ if base_col.ordinal != head_col.ordinal:
178
+ self.changes.append(Change(
179
+ change_type=ChangeType.COLUMN_ORDER_CHANGED,
180
+ severity=Severity.POTENTIALLY_BREAKING,
181
+ object_name=base_obj.name,
182
+ column_name=base_col.name,
183
+ old_value=base_col.ordinal,
184
+ new_value=head_col.ordinal,
185
+ description=f"Changed column '{base_col.name}' position from {base_col.ordinal} to {head_col.ordinal}"
186
+ ))
187
+
188
+ def _detect_lineage_changes(self, base_obj: ObjectInfo, head_obj: ObjectInfo) -> None:
189
+ """Detect lineage changes for columns."""
190
+ base_lineage = {lin.output_column.lower(): lin for lin in base_obj.lineage}
191
+ head_lineage = {lin.output_column.lower(): lin for lin in head_obj.lineage}
192
+
193
+ # Check for lineage changes in common columns
194
+ for column_name in set(base_lineage.keys()) & set(head_lineage.keys()):
195
+ base_lin = base_lineage[column_name]
196
+ head_lin = head_lineage[column_name]
197
+
198
+ # Compare transformation type
199
+ if base_lin.transformation_type != head_lin.transformation_type:
200
+ self.changes.append(Change(
201
+ change_type=ChangeType.LINEAGE_CHANGED,
202
+ severity=Severity.POTENTIALLY_BREAKING,
203
+ object_name=base_obj.name,
204
+ column_name=base_lin.output_column,
205
+ old_value=base_lin.transformation_type.value,
206
+ new_value=head_lin.transformation_type.value,
207
+ description=f"Changed transformation type for '{base_lin.output_column}'"
208
+ ))
209
+
210
+ # Compare input fields
211
+ base_inputs = {(ref.table_name, ref.column_name) for ref in base_lin.input_fields}
212
+ head_inputs = {(ref.table_name, ref.column_name) for ref in head_lin.input_fields}
213
+
214
+ if base_inputs != head_inputs:
215
+ self.changes.append(Change(
216
+ change_type=ChangeType.LINEAGE_CHANGED,
217
+ severity=Severity.POTENTIALLY_BREAKING,
218
+ object_name=base_obj.name,
219
+ column_name=base_lin.output_column,
220
+ old_value=len(base_inputs),
221
+ new_value=len(head_inputs),
222
+ description=f"Changed input dependencies for '{base_lin.output_column}'"
223
+ ))
224
+
225
+ def _classify_type_change_severity(self, old_type: str, new_type: str) -> Severity:
226
+ """Classify the severity of a type change."""
227
+ old_type = old_type.upper()
228
+ new_type = new_type.upper()
229
+
230
+ # Common safe widenings
231
+ safe_widenings = [
232
+ ("INT", "BIGINT"),
233
+ ("DECIMAL(10,2)", "DECIMAL(18,2)"),
234
+ ("VARCHAR(50)", "VARCHAR(100)"),
235
+ ("NVARCHAR(50)", "NVARCHAR(100)"),
236
+ ]
237
+
238
+ if (old_type, new_type) in safe_widenings:
239
+ return Severity.NON_BREAKING
240
+
241
+ # Check for obvious narrowings
242
+ if ("VARCHAR" in old_type and "VARCHAR" in new_type or
243
+ "DECIMAL" in old_type and "DECIMAL" in new_type):
244
+ return Severity.POTENTIALLY_BREAKING
245
+
246
+ # Default to breaking for type changes
247
+ return Severity.BREAKING
248
+
249
+ def classify_by_severity(self) -> Dict[Severity, List[Change]]:
250
+ """Group changes by severity level."""
251
+ result = {severity: [] for severity in Severity}
252
+ for change in self.changes:
253
+ result[change.severity].append(change)
254
+ return result
255
+
256
+ def get_breaking_count(self) -> int:
257
+ """Get count of breaking changes."""
258
+ return len([c for c in self.changes if c.severity == Severity.BREAKING])
259
+
260
+ def get_summary(self) -> Dict[str, Any]:
261
+ """Get summary of changes."""
262
+ by_severity = self.classify_by_severity()
263
+ return {
264
+ "total_changes": len(self.changes),
265
+ "breaking": len(by_severity[Severity.BREAKING]),
266
+ "potentially_breaking": len(by_severity[Severity.POTENTIALLY_BREAKING]),
267
+ "non_breaking": len(by_severity[Severity.NON_BREAKING]),
268
+ "changes_by_type": self._count_by_type(),
269
+ "changes": [self._change_to_dict(c) for c in self.changes]
270
+ }
271
+
272
+ def _count_by_type(self) -> Dict[str, int]:
273
+ """Count changes by type."""
274
+ counts = {}
275
+ for change in self.changes:
276
+ change_type = change.change_type.value
277
+ counts[change_type] = counts.get(change_type, 0) + 1
278
+ return counts
279
+
280
+ def _change_to_dict(self, change: Change) -> Dict[str, Any]:
281
+ """Convert change to dictionary for JSON serialization."""
282
+ return {
283
+ "change_type": change.change_type.value,
284
+ "severity": change.severity.value,
285
+ "object_name": change.object_name,
286
+ "column_name": change.column_name,
287
+ "old_value": change.old_value,
288
+ "new_value": change.new_value,
289
+ "description": change.description,
290
+ "impact_count": change.impact_count
291
+ }