InfoTracker 0.1.0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
infotracker/adapters.py CHANGED
@@ -16,9 +16,15 @@ class MssqlAdapter:
16
16
  name = "mssql"
17
17
  dialect = "tsql"
18
18
 
19
- def __init__(self):
19
+ def __init__(self, config=None):
20
20
  self.parser = SqlParser(dialect=self.dialect)
21
- self.lineage_generator = OpenLineageGenerator()
21
+ # Use namespace from config if available
22
+ namespace = "mssql://localhost/InfoTrackerDW" # default
23
+ if config and hasattr(config, 'openlineage'):
24
+ namespace = f"{config.openlineage.namespace}://localhost/InfoTrackerDW"
25
+ if config and hasattr(config, 'default_database'):
26
+ self.parser.set_default_database(config.default_database)
27
+ self.lineage_generator = OpenLineageGenerator(namespace=namespace)
22
28
 
23
29
  def extract_lineage(self, sql: str, object_hint: Optional[str] = None) -> str:
24
30
  """Extract lineage from SQL and return OpenLineage JSON as string."""
@@ -54,12 +60,13 @@ class MssqlAdapter:
54
60
  }
55
61
  return json.dumps(error_payload, indent=2, ensure_ascii=False)
56
62
 
57
- _ADAPTERS: Dict[str, Adapter] = {
58
- "mssql": MssqlAdapter(),
59
- }
63
+ _ADAPTERS: Dict[str, Adapter] = {}
60
64
 
61
65
 
62
- def get_adapter(name: str) -> Adapter:
66
+ def get_adapter(name: str, config=None) -> Adapter:
63
67
  if name not in _ADAPTERS:
64
- raise KeyError(f"Unknown adapter '{name}'. Available: {', '.join(_ADAPTERS)}")
68
+ if name == "mssql":
69
+ _ADAPTERS[name] = MssqlAdapter(config)
70
+ else:
71
+ raise KeyError(f"Unknown adapter '{name}'. Available: mssql")
65
72
  return _ADAPTERS[name]
infotracker/cli.py CHANGED
@@ -93,23 +93,20 @@ def impact(
93
93
  @app.command()
94
94
  def diff(
95
95
  ctx: typer.Context,
96
- base: str = typer.Option(..., help="git ref name for base"),
97
- head: str = typer.Option(..., help="git ref name for head"),
98
- sql_dir: Optional[Path] = typer.Option(None, exists=True, file_okay=False),
99
- adapter: Optional[str] = typer.Option(None),
100
- severity_threshold: str = typer.Option("BREAKING"),
96
+ base: Optional[Path] = typer.Option(None, "--base", help="Directory containing base OpenLineage artifacts"),
97
+ head: Optional[Path] = typer.Option(None, "--head", help="Directory containing head OpenLineage artifacts"),
98
+ format: str = typer.Option("text", "--format", help="Output format: text|json"),
101
99
  ):
100
+ """Compare two sets of OpenLineage artifacts for breaking changes."""
102
101
  cfg: RuntimeConfig = ctx.obj["cfg"]
103
102
  engine = Engine(cfg)
104
- req = DiffRequest(
105
- base=base,
106
- head=head,
107
- sql_dir=sql_dir or Path(cfg.sql_dir),
108
- adapter=adapter or cfg.default_adapter,
109
- severity_threshold=severity_threshold,
110
- )
111
- result = engine.run_diff(req)
112
- _emit(result, cfg.output_format)
103
+
104
+ if not base or not head:
105
+ console.print("[red]ERROR: Both --base and --head directories are required[/red]")
106
+ raise typer.Exit(1)
107
+
108
+ result = engine.run_diff(base, head, format)
109
+ _emit(result, format)
113
110
  raise typer.Exit(code=result.get("exit_code", 0))
114
111
 
115
112
 
@@ -121,23 +118,42 @@ def _emit(payload: dict, fmt: str, out_path: Optional[Path] = None) -> None:
121
118
  console = Console()
122
119
 
123
120
  if fmt == "json":
124
- console.print_json(json.dumps(payload, ensure_ascii=False))
125
- return
126
-
127
- # fmt == "text"
128
- table = Table(show_header=True, header_style="bold")
129
- cols = payload.get("columns", [])
130
- for k in cols:
131
- table.add_column(str(k))
132
-
133
- for r in payload.get("rows", []):
134
- if isinstance(r, dict):
135
- table.add_row(*[str(r.get(c, "")) for c in cols])
121
+ content = json.dumps(payload, ensure_ascii=False, indent=2)
122
+ else:
123
+ # fmt == "text" - we'll capture the table as a string
124
+ table = Table(show_header=True, header_style="bold")
125
+ cols = payload.get("columns", [])
126
+ for k in cols:
127
+ table.add_column(str(k))
128
+
129
+ for r in payload.get("rows", []):
130
+ if isinstance(r, dict):
131
+ table.add_row(*[str(r.get(c, "")) for c in cols])
132
+ else:
133
+ # lista / krotka — dopasuj po pozycji
134
+ table.add_row(*[str(x) for x in (list(r) + [""] * max(0, len(cols) - len(r)))][:len(cols)])
135
+
136
+ if out_path:
137
+ # Capture table as string for file output
138
+ from io import StringIO
139
+ string_io = StringIO()
140
+ temp_console = Console(file=string_io, width=120)
141
+ temp_console.print(table)
142
+ content = string_io.getvalue()
136
143
  else:
137
- # lista / krotka — dopasuj po pozycji
138
- table.add_row(*[str(x) for x in (list(r) + [""] * max(0, len(cols) - len(r)))][:len(cols)])
139
-
140
- console.print(table)
144
+ # Print to stdout
145
+ console.print(table)
146
+ return
147
+
148
+ # Write to file if out_path is specified
149
+ if out_path:
150
+ out_path.parent.mkdir(parents=True, exist_ok=True)
151
+ out_path.write_text(content, encoding='utf-8')
152
+ console.print(f"[green]Output written to {out_path}[/green]")
153
+ else:
154
+ # Print to stdout for JSON format
155
+ if fmt == "json":
156
+ console.print_json(content)
141
157
 
142
158
 
143
159
 
infotracker/config.py CHANGED
@@ -7,6 +7,11 @@ from typing import List, Optional
7
7
  import yaml
8
8
 
9
9
 
10
+ @dataclass
11
+ class OpenLineageCfg:
12
+ namespace: str = "mssql"
13
+
14
+
10
15
  @dataclass
11
16
  class RuntimeConfig:
12
17
  default_adapter: str = "mssql"
@@ -20,6 +25,7 @@ class RuntimeConfig:
20
25
  catalog: Optional[str] = None
21
26
  log_level: str = "info"
22
27
  output_format: str = "text"
28
+ openlineage: OpenLineageCfg = field(default_factory=OpenLineageCfg)
23
29
 
24
30
 
25
31
  def load_config(path: Optional[Path]) -> RuntimeConfig:
infotracker/diff.py CHANGED
@@ -3,7 +3,7 @@ Breaking change detection for InfoTracker.
3
3
  """
4
4
  from __future__ import annotations
5
5
 
6
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, field
7
7
  from enum import Enum
8
8
  from typing import Dict, List, Optional, Set, Any
9
9
 
@@ -44,12 +44,57 @@ class Change:
44
44
  impact_count: int = 0 # Number of downstream columns affected
45
45
 
46
46
 
47
+ @dataclass
48
+ class DiffReport:
49
+ """Report containing detected changes and metadata."""
50
+ changes: List[Change] = field(default_factory=list)
51
+
52
+ @property
53
+ def has_breaking(self) -> bool:
54
+ """Check if there are any breaking changes."""
55
+ return any(c.severity == Severity.BREAKING for c in self.changes)
56
+
57
+ @property
58
+ def rows(self) -> List[List[str]]:
59
+ """Get rows for table display."""
60
+ return [
61
+ [c.object_name, c.change_type.value, c.severity.value, c.description]
62
+ for c in self.changes
63
+ ]
64
+
65
+ @property
66
+ def columns(self) -> List[str]:
67
+ """Get column headers for table display."""
68
+ return ["object", "change_type", "severity", "description"]
69
+
70
+
71
+ @dataclass
72
+ class DiffResult:
73
+ """Result of diff operation."""
74
+ report: DiffReport
75
+ exit_code: int
76
+
77
+ @property
78
+ def rows(self) -> List[List[str]]:
79
+ return self.report.rows
80
+
81
+ @property
82
+ def columns(self) -> List[str]:
83
+ return self.report.columns
84
+
85
+
47
86
  class BreakingChangeDetector:
48
87
  """Detects breaking changes between two sets of object information."""
49
88
 
50
89
  def __init__(self):
51
90
  self.changes: List[Change] = []
52
91
 
92
+ def compare(self, base_objects: List[ObjectInfo], head_objects: List[ObjectInfo]) -> DiffReport:
93
+ """Compare base and head objects and return a diff report."""
94
+ self.changes = []
95
+ self.detect_changes(base_objects, head_objects)
96
+ return DiffReport(changes=self.changes.copy())
97
+
53
98
  def detect_changes(self, base_objects: List[ObjectInfo], head_objects: List[ObjectInfo]) -> List[Change]:
54
99
  """Detect changes between base and head object lists."""
55
100
  self.changes = []
@@ -117,8 +162,18 @@ class BreakingChangeDetector:
117
162
  base_names = set(base_columns.keys())
118
163
  head_names = set(head_columns.keys())
119
164
 
120
- # Column additions
121
- for added_name in head_names - base_names:
165
+ # Detect column renames before processing additions/removals
166
+ removed = base_names - head_names
167
+ added = head_names - base_names
168
+ renamed_pairs = self._detect_column_renames(base_columns, head_columns, removed, added, base_obj, head_obj)
169
+
170
+ # Remove renamed columns from added/removed sets
171
+ for old_name, new_name in renamed_pairs:
172
+ removed.discard(old_name)
173
+ added.discard(new_name)
174
+
175
+ # Column additions (after removing renames)
176
+ for added_name in added:
122
177
  col = head_columns[added_name]
123
178
  severity = Severity.POTENTIALLY_BREAKING # Could affect SELECT *
124
179
  self.changes.append(Change(
@@ -130,8 +185,8 @@ class BreakingChangeDetector:
130
185
  description=f"Added column '{col.name}' ({col.data_type})"
131
186
  ))
132
187
 
133
- # Column removals
134
- for removed_name in base_names - head_names:
188
+ # Column removals (after removing renames)
189
+ for removed_name in removed:
135
190
  col = base_columns[removed_name]
136
191
  self.changes.append(Change(
137
192
  change_type=ChangeType.COLUMN_REMOVED,
@@ -142,48 +197,58 @@ class BreakingChangeDetector:
142
197
  description=f"Removed column '{col.name}'"
143
198
  ))
144
199
 
145
- # Column changes for existing columns
146
- for common_name in base_names & head_names:
147
- base_col = base_columns[common_name]
148
- head_col = head_columns[common_name]
149
-
150
- # Type changes
151
- if base_col.data_type != head_col.data_type:
152
- severity = self._classify_type_change_severity(base_col.data_type, head_col.data_type)
153
- self.changes.append(Change(
154
- change_type=ChangeType.COLUMN_TYPE_CHANGED,
155
- severity=severity,
156
- object_name=base_obj.name,
157
- column_name=base_col.name,
158
- old_value=base_col.data_type,
159
- new_value=head_col.data_type,
160
- description=f"Changed column '{base_col.name}' type from {base_col.data_type} to {head_col.data_type}"
161
- ))
162
-
163
- # Nullability changes
164
- if base_col.nullable != head_col.nullable:
165
- severity = Severity.BREAKING if not head_col.nullable else Severity.POTENTIALLY_BREAKING
166
- self.changes.append(Change(
167
- change_type=ChangeType.COLUMN_NULLABILITY_CHANGED,
168
- severity=severity,
169
- object_name=base_obj.name,
170
- column_name=base_col.name,
171
- old_value="NULL" if base_col.nullable else "NOT NULL",
172
- new_value="NULL" if head_col.nullable else "NOT NULL",
173
- description=f"Changed column '{base_col.name}' nullability"
174
- ))
175
-
176
- # Ordinal changes (column order)
177
- if base_col.ordinal != head_col.ordinal:
178
- self.changes.append(Change(
179
- change_type=ChangeType.COLUMN_ORDER_CHANGED,
180
- severity=Severity.POTENTIALLY_BREAKING,
181
- object_name=base_obj.name,
182
- column_name=base_col.name,
183
- old_value=base_col.ordinal,
184
- new_value=head_col.ordinal,
185
- description=f"Changed column '{base_col.name}' position from {base_col.ordinal} to {head_col.ordinal}"
186
- ))
200
+ # Column changes for existing columns (including renamed ones)
201
+ for common_name in (base_names & head_names) | {new_name for _, new_name in renamed_pairs}:
202
+ # For renamed columns, use the new name to find the head column
203
+ if common_name in head_columns:
204
+ head_col = head_columns[common_name]
205
+ # Find corresponding base column (could be renamed)
206
+ base_col = None
207
+ for old_name, new_name in renamed_pairs:
208
+ if new_name == common_name:
209
+ base_col = base_columns[old_name]
210
+ break
211
+ if not base_col and common_name in base_columns:
212
+ base_col = base_columns[common_name]
213
+
214
+ if base_col:
215
+ # Type changes
216
+ if base_col.data_type != head_col.data_type:
217
+ severity = self._classify_type_change_severity(base_col.data_type, head_col.data_type)
218
+ self.changes.append(Change(
219
+ change_type=ChangeType.COLUMN_TYPE_CHANGED,
220
+ severity=severity,
221
+ object_name=base_obj.name,
222
+ column_name=head_col.name, # Use new name for renamed columns
223
+ old_value=base_col.data_type,
224
+ new_value=head_col.data_type,
225
+ description=f"Changed column '{head_col.name}' type from {base_col.data_type} to {head_col.data_type}"
226
+ ))
227
+
228
+ # Nullability changes
229
+ if base_col.nullable != head_col.nullable:
230
+ severity = Severity.BREAKING if not head_col.nullable else Severity.POTENTIALLY_BREAKING
231
+ self.changes.append(Change(
232
+ change_type=ChangeType.COLUMN_NULLABILITY_CHANGED,
233
+ severity=severity,
234
+ object_name=base_obj.name,
235
+ column_name=head_col.name, # Use new name for renamed columns
236
+ old_value="NULL" if base_col.nullable else "NOT NULL",
237
+ new_value="NULL" if head_col.nullable else "NOT NULL",
238
+ description=f"Changed column '{head_col.name}' nullability"
239
+ ))
240
+
241
+ # Ordinal changes (column order)
242
+ if base_col.ordinal != head_col.ordinal:
243
+ self.changes.append(Change(
244
+ change_type=ChangeType.COLUMN_ORDER_CHANGED,
245
+ severity=Severity.POTENTIALLY_BREAKING,
246
+ object_name=base_obj.name,
247
+ column_name=head_col.name, # Use new name for renamed columns
248
+ old_value=base_col.ordinal,
249
+ new_value=head_col.ordinal,
250
+ description=f"Changed column '{head_col.name}' position from {base_col.ordinal} to {head_col.ordinal}"
251
+ ))
187
252
 
188
253
  def _detect_lineage_changes(self, base_obj: ObjectInfo, head_obj: ObjectInfo) -> None:
189
254
  """Detect lineage changes for columns."""
@@ -222,6 +287,102 @@ class BreakingChangeDetector:
222
287
  description=f"Changed input dependencies for '{base_lin.output_column}'"
223
288
  ))
224
289
 
290
+ def _detect_column_renames(self, base_columns: Dict[str, ColumnSchema], head_columns: Dict[str, ColumnSchema],
291
+ removed: Set[str], added: Set[str], base_obj: ObjectInfo, head_obj: ObjectInfo) -> List[tuple[str, str]]:
292
+ """
293
+ Detect column renames using scoring algorithm.
294
+ Returns list of (old_name, new_name) tuples.
295
+ """
296
+ if not removed or not added:
297
+ return []
298
+
299
+ # Build lineage lookup for both objects
300
+ base_lineage = {lin.output_column.lower(): lin for lin in base_obj.lineage}
301
+ head_lineage = {lin.output_column.lower(): lin for lin in head_obj.lineage}
302
+
303
+ renamed_pairs = []
304
+
305
+ for old_name in list(removed):
306
+ best_score = 0
307
+ best_candidate = None
308
+ candidates_with_score = []
309
+
310
+ old_col = base_columns[old_name]
311
+ old_lineage = base_lineage.get(old_name)
312
+
313
+ for new_name in added:
314
+ new_col = head_columns[new_name]
315
+ score = 0
316
+
317
+ # +2 for matching data type (case-insensitive)
318
+ if self._normalize_data_type(old_col.data_type) == self._normalize_data_type(new_col.data_type):
319
+ score += 2
320
+
321
+ # +2 for matching nullability
322
+ if old_col.nullable == new_col.nullable:
323
+ score += 2
324
+
325
+ # +3 for identical lineage input_fields or +1 for similar ordinal if no lineage
326
+ if old_lineage:
327
+ new_lineage = head_lineage.get(new_name)
328
+ if new_lineage and self._compare_lineage_input_fields(old_lineage, new_lineage):
329
+ score += 3
330
+ else:
331
+ # If no lineage, use ordinal proximity
332
+ if abs(old_col.ordinal - new_col.ordinal) <= 1:
333
+ score += 1
334
+
335
+ # +1 for matching length/precision (if extractable from type)
336
+ if self._compare_type_precision(old_col.data_type, new_col.data_type):
337
+ score += 1
338
+
339
+ candidates_with_score.append((new_name, score))
340
+
341
+ if score > best_score:
342
+ best_score = score
343
+ best_candidate = new_name
344
+
345
+ # Check if best candidate has score >= 4 and is unambiguous
346
+ if best_score >= 4:
347
+ # Check for ties
348
+ tied_candidates = [name for name, score in candidates_with_score if score == best_score]
349
+ if len(tied_candidates) == 1:
350
+ renamed_pairs.append((old_name, best_candidate))
351
+
352
+ # Register the rename change
353
+ self.changes.append(Change(
354
+ change_type=ChangeType.COLUMN_RENAMED,
355
+ severity=Severity.POTENTIALLY_BREAKING,
356
+ object_name=base_obj.name,
357
+ column_name=f"{old_col.name}→{head_columns[best_candidate].name}",
358
+ description=f"Column renamed from '{old_col.name}' to '{head_columns[best_candidate].name}' with matching type/nullability/lineage"
359
+ ))
360
+
361
+ return renamed_pairs
362
+
363
+ def _normalize_data_type(self, data_type: str) -> str:
364
+ """Normalize data type for comparison (case-insensitive)."""
365
+ return data_type.upper().strip()
366
+
367
+ def _compare_lineage_input_fields(self, lineage1: ColumnLineage, lineage2: ColumnLineage) -> bool:
368
+ """Compare if two lineages have identical input fields."""
369
+ fields1 = {(ref.table_name, ref.column_name) for ref in lineage1.input_fields}
370
+ fields2 = {(ref.table_name, ref.column_name) for ref in lineage2.input_fields}
371
+ return fields1 == fields2
372
+
373
+ def _compare_type_precision(self, type1: str, type2: str) -> bool:
374
+ """Compare if types have matching length/precision."""
375
+ import re
376
+
377
+ # Extract precision info from types like VARCHAR(100), DECIMAL(10,2)
378
+ def extract_precision(type_str: str) -> tuple:
379
+ match = re.search(r'\(([^)]+)\)', type_str)
380
+ if match:
381
+ return tuple(match.group(1).split(','))
382
+ return ()
383
+
384
+ return extract_precision(type1) == extract_precision(type2)
385
+
225
386
  def _classify_type_change_severity(self, old_type: str, new_type: str) -> Severity:
226
387
  """Classify the severity of a type change."""
227
388
  old_type = old_type.upper()