InfoTracker 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infotracker/adapters.py +14 -7
- infotracker/cli.py +46 -30
- infotracker/config.py +6 -0
- infotracker/diff.py +208 -47
- infotracker/engine.py +267 -52
- infotracker/lineage.py +6 -3
- infotracker/models.py +106 -15
- infotracker/openlineage_utils.py +165 -0
- infotracker/parser.py +847 -75
- infotracker-0.2.0.dist-info/METADATA +285 -0
- infotracker-0.2.0.dist-info/RECORD +15 -0
- infotracker-0.1.0.dist-info/METADATA +0 -108
- infotracker-0.1.0.dist-info/RECORD +0 -14
- {infotracker-0.1.0.dist-info → infotracker-0.2.0.dist-info}/WHEEL +0 -0
- {infotracker-0.1.0.dist-info → infotracker-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
"""
|
2
|
+
Utilities for working with OpenLineage JSON artifacts.
|
3
|
+
"""
|
4
|
+
from __future__ import annotations
|
5
|
+
|
6
|
+
import json
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Dict, List, Any, Optional
|
9
|
+
|
10
|
+
from .models import ObjectInfo, ColumnSchema, TableSchema, ColumnLineage, ColumnReference, TransformationType
|
11
|
+
|
12
|
+
|
13
|
+
class OpenLineageLoader:
|
14
|
+
"""Loads OpenLineage JSON artifacts from directories."""
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
def load_dir(cls, directory: Path) -> List[Dict[str, Any]]:
|
18
|
+
"""Load all OpenLineage JSON files from a directory."""
|
19
|
+
artifacts = []
|
20
|
+
|
21
|
+
if not directory.exists():
|
22
|
+
return artifacts
|
23
|
+
|
24
|
+
for json_file in directory.glob("*.json"):
|
25
|
+
if json_file.name == "column_graph.json":
|
26
|
+
continue # Skip column graph file
|
27
|
+
|
28
|
+
try:
|
29
|
+
with open(json_file, 'r', encoding='utf-8') as f:
|
30
|
+
artifact = json.load(f)
|
31
|
+
artifacts.append(artifact)
|
32
|
+
except Exception as e:
|
33
|
+
# Log warning but continue
|
34
|
+
import logging
|
35
|
+
logging.warning(f"Failed to load {json_file}: {e}")
|
36
|
+
|
37
|
+
return artifacts
|
38
|
+
|
39
|
+
|
40
|
+
class OLMapper:
|
41
|
+
"""Maps OpenLineage artifacts to ObjectInfo instances."""
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
def to_object_infos(cls, artifacts: List[Dict[str, Any]]) -> List[ObjectInfo]:
|
45
|
+
"""Convert OpenLineage artifacts to ObjectInfo instances."""
|
46
|
+
objects = []
|
47
|
+
|
48
|
+
for artifact in artifacts:
|
49
|
+
try:
|
50
|
+
obj_info = cls._artifact_to_object_info(artifact)
|
51
|
+
if obj_info:
|
52
|
+
objects.append(obj_info)
|
53
|
+
except Exception as e:
|
54
|
+
# Log warning but continue
|
55
|
+
import logging
|
56
|
+
logging.warning(f"Failed to convert artifact to ObjectInfo: {e}")
|
57
|
+
|
58
|
+
return objects
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def _artifact_to_object_info(cls, artifact: Dict[str, Any]) -> Optional[ObjectInfo]:
|
62
|
+
"""Convert a single OpenLineage artifact to ObjectInfo."""
|
63
|
+
outputs = artifact.get("outputs", [])
|
64
|
+
if not outputs:
|
65
|
+
return None
|
66
|
+
|
67
|
+
output = outputs[0] # Take first output
|
68
|
+
name = output.get("name", "unknown")
|
69
|
+
namespace = output.get("namespace", "mssql://localhost/InfoTrackerDW")
|
70
|
+
|
71
|
+
facets = output.get("facets", {})
|
72
|
+
|
73
|
+
# Build schema from schema facet
|
74
|
+
schema_facet = facets.get("schema", {})
|
75
|
+
columns = []
|
76
|
+
if "fields" in schema_facet:
|
77
|
+
for i, field in enumerate(schema_facet["fields"]):
|
78
|
+
columns.append(ColumnSchema(
|
79
|
+
name=field.get("name", "unknown"),
|
80
|
+
data_type=field.get("type", "unknown"),
|
81
|
+
nullable=True, # Default assumption
|
82
|
+
ordinal=i
|
83
|
+
))
|
84
|
+
|
85
|
+
schema = TableSchema(
|
86
|
+
namespace=namespace,
|
87
|
+
name=name,
|
88
|
+
columns=columns
|
89
|
+
)
|
90
|
+
|
91
|
+
# Build lineage from columnLineage facet
|
92
|
+
lineage = []
|
93
|
+
lineage_facet = facets.get("columnLineage", {})
|
94
|
+
if "fields" in lineage_facet:
|
95
|
+
for output_col, lineage_info in lineage_facet["fields"].items():
|
96
|
+
input_fields = []
|
97
|
+
for input_field in lineage_info.get("inputFields", []):
|
98
|
+
input_fields.append(ColumnReference(
|
99
|
+
namespace=input_field.get("namespace", namespace),
|
100
|
+
table_name=input_field.get("name", "unknown"),
|
101
|
+
column_name=input_field.get("field", "unknown")
|
102
|
+
))
|
103
|
+
|
104
|
+
transformation_type_str = lineage_info.get("transformationType", "IDENTITY")
|
105
|
+
try:
|
106
|
+
transformation_type = TransformationType(transformation_type_str)
|
107
|
+
except ValueError:
|
108
|
+
transformation_type = TransformationType.IDENTITY
|
109
|
+
|
110
|
+
lineage.append(ColumnLineage(
|
111
|
+
output_column=output_col,
|
112
|
+
input_fields=input_fields,
|
113
|
+
transformation_type=transformation_type,
|
114
|
+
transformation_description=lineage_info.get("transformationDescription", "")
|
115
|
+
))
|
116
|
+
|
117
|
+
# Build dependencies from inputs
|
118
|
+
dependencies = set()
|
119
|
+
for input_obj in artifact.get("inputs", []):
|
120
|
+
input_name = input_obj.get("name", "")
|
121
|
+
if input_name:
|
122
|
+
dependencies.add(input_name)
|
123
|
+
|
124
|
+
# Determine object type
|
125
|
+
object_type = "view" if lineage else "table"
|
126
|
+
|
127
|
+
return ObjectInfo(
|
128
|
+
name=name,
|
129
|
+
object_type=object_type,
|
130
|
+
schema=schema,
|
131
|
+
lineage=lineage,
|
132
|
+
dependencies=dependencies
|
133
|
+
)
|
134
|
+
|
135
|
+
|
136
|
+
def qualify_identifier(identifier: str, default_database: Optional[str] = None) -> str:
|
137
|
+
"""Qualify a SQL identifier with default database when needed.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
identifier: The identifier to qualify (can be 1, 2, or 3 parts)
|
141
|
+
default_database: Default database to use when not specified
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
Fully qualified identifier
|
145
|
+
"""
|
146
|
+
if not identifier:
|
147
|
+
return identifier
|
148
|
+
|
149
|
+
parts = identifier.split('.')
|
150
|
+
|
151
|
+
if len(parts) == 1:
|
152
|
+
# Just table name - add schema and database
|
153
|
+
if default_database:
|
154
|
+
return f"{default_database}.dbo.{parts[0]}"
|
155
|
+
else:
|
156
|
+
return f"dbo.{parts[0]}"
|
157
|
+
elif len(parts) == 2:
|
158
|
+
# schema.table - add database
|
159
|
+
if default_database:
|
160
|
+
return f"{default_database}.{identifier}"
|
161
|
+
else:
|
162
|
+
return identifier
|
163
|
+
else:
|
164
|
+
# Already fully qualified
|
165
|
+
return identifier
|