InfoTracker 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ """
2
+ Utilities for working with OpenLineage JSON artifacts.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Dict, List, Any, Optional
9
+
10
+ from .models import ObjectInfo, ColumnSchema, TableSchema, ColumnLineage, ColumnReference, TransformationType
11
+
12
+
13
+ class OpenLineageLoader:
14
+ """Loads OpenLineage JSON artifacts from directories."""
15
+
16
+ @classmethod
17
+ def load_dir(cls, directory: Path) -> List[Dict[str, Any]]:
18
+ """Load all OpenLineage JSON files from a directory."""
19
+ artifacts = []
20
+
21
+ if not directory.exists():
22
+ return artifacts
23
+
24
+ for json_file in directory.glob("*.json"):
25
+ if json_file.name == "column_graph.json":
26
+ continue # Skip column graph file
27
+
28
+ try:
29
+ with open(json_file, 'r', encoding='utf-8') as f:
30
+ artifact = json.load(f)
31
+ artifacts.append(artifact)
32
+ except Exception as e:
33
+ # Log warning but continue
34
+ import logging
35
+ logging.warning(f"Failed to load {json_file}: {e}")
36
+
37
+ return artifacts
38
+
39
+
40
+ class OLMapper:
41
+ """Maps OpenLineage artifacts to ObjectInfo instances."""
42
+
43
+ @classmethod
44
+ def to_object_infos(cls, artifacts: List[Dict[str, Any]]) -> List[ObjectInfo]:
45
+ """Convert OpenLineage artifacts to ObjectInfo instances."""
46
+ objects = []
47
+
48
+ for artifact in artifacts:
49
+ try:
50
+ obj_info = cls._artifact_to_object_info(artifact)
51
+ if obj_info:
52
+ objects.append(obj_info)
53
+ except Exception as e:
54
+ # Log warning but continue
55
+ import logging
56
+ logging.warning(f"Failed to convert artifact to ObjectInfo: {e}")
57
+
58
+ return objects
59
+
60
+ @classmethod
61
+ def _artifact_to_object_info(cls, artifact: Dict[str, Any]) -> Optional[ObjectInfo]:
62
+ """Convert a single OpenLineage artifact to ObjectInfo."""
63
+ outputs = artifact.get("outputs", [])
64
+ if not outputs:
65
+ return None
66
+
67
+ output = outputs[0] # Take first output
68
+ name = output.get("name", "unknown")
69
+ namespace = output.get("namespace", "mssql://localhost/InfoTrackerDW")
70
+
71
+ facets = output.get("facets", {})
72
+
73
+ # Build schema from schema facet
74
+ schema_facet = facets.get("schema", {})
75
+ columns = []
76
+ if "fields" in schema_facet:
77
+ for i, field in enumerate(schema_facet["fields"]):
78
+ columns.append(ColumnSchema(
79
+ name=field.get("name", "unknown"),
80
+ data_type=field.get("type", "unknown"),
81
+ nullable=True, # Default assumption
82
+ ordinal=i
83
+ ))
84
+
85
+ schema = TableSchema(
86
+ namespace=namespace,
87
+ name=name,
88
+ columns=columns
89
+ )
90
+
91
+ # Build lineage from columnLineage facet
92
+ lineage = []
93
+ lineage_facet = facets.get("columnLineage", {})
94
+ if "fields" in lineage_facet:
95
+ for output_col, lineage_info in lineage_facet["fields"].items():
96
+ input_fields = []
97
+ for input_field in lineage_info.get("inputFields", []):
98
+ input_fields.append(ColumnReference(
99
+ namespace=input_field.get("namespace", namespace),
100
+ table_name=input_field.get("name", "unknown"),
101
+ column_name=input_field.get("field", "unknown")
102
+ ))
103
+
104
+ transformation_type_str = lineage_info.get("transformationType", "IDENTITY")
105
+ try:
106
+ transformation_type = TransformationType(transformation_type_str)
107
+ except ValueError:
108
+ transformation_type = TransformationType.IDENTITY
109
+
110
+ lineage.append(ColumnLineage(
111
+ output_column=output_col,
112
+ input_fields=input_fields,
113
+ transformation_type=transformation_type,
114
+ transformation_description=lineage_info.get("transformationDescription", "")
115
+ ))
116
+
117
+ # Build dependencies from inputs
118
+ dependencies = set()
119
+ for input_obj in artifact.get("inputs", []):
120
+ input_name = input_obj.get("name", "")
121
+ if input_name:
122
+ dependencies.add(input_name)
123
+
124
+ # Determine object type
125
+ object_type = "view" if lineage else "table"
126
+
127
+ return ObjectInfo(
128
+ name=name,
129
+ object_type=object_type,
130
+ schema=schema,
131
+ lineage=lineage,
132
+ dependencies=dependencies
133
+ )
134
+
135
+
136
+ def qualify_identifier(identifier: str, default_database: Optional[str] = None) -> str:
137
+ """Qualify a SQL identifier with default database when needed.
138
+
139
+ Args:
140
+ identifier: The identifier to qualify (can be 1, 2, or 3 parts)
141
+ default_database: Default database to use when not specified
142
+
143
+ Returns:
144
+ Fully qualified identifier
145
+ """
146
+ if not identifier:
147
+ return identifier
148
+
149
+ parts = identifier.split('.')
150
+
151
+ if len(parts) == 1:
152
+ # Just table name - add schema and database
153
+ if default_database:
154
+ return f"{default_database}.dbo.{parts[0]}"
155
+ else:
156
+ return f"dbo.{parts[0]}"
157
+ elif len(parts) == 2:
158
+ # schema.table - add database
159
+ if default_database:
160
+ return f"{default_database}.{identifier}"
161
+ else:
162
+ return identifier
163
+ else:
164
+ # Already fully qualified
165
+ return identifier