awx-zipline-ai 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- agent/__init__.py +1 -0
- agent/constants.py +15 -0
- agent/ttypes.py +1684 -0
- ai/__init__.py +0 -0
- ai/chronon/__init__.py +0 -0
- ai/chronon/airflow_helpers.py +248 -0
- ai/chronon/cli/__init__.py +0 -0
- ai/chronon/cli/compile/__init__.py +0 -0
- ai/chronon/cli/compile/column_hashing.py +336 -0
- ai/chronon/cli/compile/compile_context.py +173 -0
- ai/chronon/cli/compile/compiler.py +183 -0
- ai/chronon/cli/compile/conf_validator.py +742 -0
- ai/chronon/cli/compile/display/__init__.py +0 -0
- ai/chronon/cli/compile/display/class_tracker.py +102 -0
- ai/chronon/cli/compile/display/compile_status.py +95 -0
- ai/chronon/cli/compile/display/compiled_obj.py +12 -0
- ai/chronon/cli/compile/display/console.py +3 -0
- ai/chronon/cli/compile/display/diff_result.py +111 -0
- ai/chronon/cli/compile/fill_templates.py +35 -0
- ai/chronon/cli/compile/parse_configs.py +134 -0
- ai/chronon/cli/compile/parse_teams.py +242 -0
- ai/chronon/cli/compile/serializer.py +109 -0
- ai/chronon/cli/compile/version_utils.py +42 -0
- ai/chronon/cli/git_utils.py +145 -0
- ai/chronon/cli/logger.py +59 -0
- ai/chronon/constants.py +3 -0
- ai/chronon/group_by.py +692 -0
- ai/chronon/join.py +580 -0
- ai/chronon/logger.py +23 -0
- ai/chronon/model.py +40 -0
- ai/chronon/query.py +126 -0
- ai/chronon/repo/__init__.py +39 -0
- ai/chronon/repo/aws.py +284 -0
- ai/chronon/repo/cluster.py +136 -0
- ai/chronon/repo/compile.py +62 -0
- ai/chronon/repo/constants.py +164 -0
- ai/chronon/repo/default_runner.py +269 -0
- ai/chronon/repo/explore.py +418 -0
- ai/chronon/repo/extract_objects.py +134 -0
- ai/chronon/repo/gcp.py +586 -0
- ai/chronon/repo/gitpython_utils.py +15 -0
- ai/chronon/repo/hub_runner.py +261 -0
- ai/chronon/repo/hub_uploader.py +109 -0
- ai/chronon/repo/init.py +60 -0
- ai/chronon/repo/join_backfill.py +119 -0
- ai/chronon/repo/run.py +296 -0
- ai/chronon/repo/serializer.py +133 -0
- ai/chronon/repo/team_json_utils.py +46 -0
- ai/chronon/repo/utils.py +481 -0
- ai/chronon/repo/zipline.py +35 -0
- ai/chronon/repo/zipline_hub.py +277 -0
- ai/chronon/resources/__init__.py +0 -0
- ai/chronon/resources/gcp/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
- ai/chronon/resources/gcp/joins/__init__.py +0 -0
- ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
- ai/chronon/resources/gcp/joins/test/data.py +26 -0
- ai/chronon/resources/gcp/sources/__init__.py +0 -0
- ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
- ai/chronon/resources/gcp/sources/test/data.py +26 -0
- ai/chronon/resources/gcp/teams.py +58 -0
- ai/chronon/source.py +86 -0
- ai/chronon/staging_query.py +226 -0
- ai/chronon/types.py +58 -0
- ai/chronon/utils.py +510 -0
- ai/chronon/windows.py +48 -0
- awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
- awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
- awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
- awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
- awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
- gen_thrift/__init__.py +0 -0
- gen_thrift/api/__init__.py +1 -0
- gen_thrift/api/constants.py +15 -0
- gen_thrift/api/ttypes.py +3754 -0
- gen_thrift/common/__init__.py +1 -0
- gen_thrift/common/constants.py +15 -0
- gen_thrift/common/ttypes.py +1814 -0
- gen_thrift/eval/__init__.py +1 -0
- gen_thrift/eval/constants.py +15 -0
- gen_thrift/eval/ttypes.py +660 -0
- gen_thrift/fetcher/__init__.py +1 -0
- gen_thrift/fetcher/constants.py +15 -0
- gen_thrift/fetcher/ttypes.py +127 -0
- gen_thrift/hub/__init__.py +1 -0
- gen_thrift/hub/constants.py +15 -0
- gen_thrift/hub/ttypes.py +1109 -0
- gen_thrift/observability/__init__.py +1 -0
- gen_thrift/observability/constants.py +15 -0
- gen_thrift/observability/ttypes.py +2355 -0
- gen_thrift/planner/__init__.py +1 -0
- gen_thrift/planner/constants.py +15 -0
- gen_thrift/planner/ttypes.py +1967 -0
|
File without changes
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
|
|
4
|
+
from rich.text import Text
|
|
5
|
+
|
|
6
|
+
from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
|
|
7
|
+
from ai.chronon.cli.compile.display.diff_result import DiffResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ClassTracker:
|
|
11
|
+
"""
|
|
12
|
+
Tracker object per class - Join, StagingQuery, GroupBy etc
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.existing_objs: Dict[str, CompiledObj] = {} # name to obj
|
|
17
|
+
self.files_to_obj: Dict[str, List[Any]] = {}
|
|
18
|
+
self.files_to_errors: Dict[str, List[Exception]] = {}
|
|
19
|
+
self.new_objs: Dict[str, CompiledObj] = {} # name to obj
|
|
20
|
+
self.diff_result = DiffResult()
|
|
21
|
+
self.deleted_names: List[str] = []
|
|
22
|
+
|
|
23
|
+
def add_existing(self, obj: CompiledObj) -> None:
|
|
24
|
+
self.existing_objs[obj.name] = obj
|
|
25
|
+
|
|
26
|
+
def add(self, compiled: CompiledObj) -> None:
|
|
27
|
+
if compiled.errors:
|
|
28
|
+
if compiled.file not in self.files_to_errors:
|
|
29
|
+
self.files_to_errors[compiled.file] = []
|
|
30
|
+
|
|
31
|
+
self.files_to_errors[compiled.file].extend(compiled.errors)
|
|
32
|
+
|
|
33
|
+
else:
|
|
34
|
+
if compiled.file not in self.files_to_obj:
|
|
35
|
+
self.files_to_obj[compiled.file] = []
|
|
36
|
+
|
|
37
|
+
self.files_to_obj[compiled.file].append(compiled.obj)
|
|
38
|
+
|
|
39
|
+
self.new_objs[compiled.name] = compiled
|
|
40
|
+
self._update_diff(compiled)
|
|
41
|
+
|
|
42
|
+
def _update_diff(self, compiled: CompiledObj) -> None:
|
|
43
|
+
if compiled.name in self.existing_objs:
|
|
44
|
+
existing_json = self.existing_objs[compiled.name].tjson
|
|
45
|
+
new_json = compiled.tjson
|
|
46
|
+
|
|
47
|
+
if existing_json != new_json:
|
|
48
|
+
diff = difflib.unified_diff(
|
|
49
|
+
existing_json.splitlines(keepends=True),
|
|
50
|
+
new_json.splitlines(keepends=True),
|
|
51
|
+
n=2,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
print(f"Updated object: {compiled.name} in file {compiled.file}")
|
|
55
|
+
print("".join(diff))
|
|
56
|
+
print("\n")
|
|
57
|
+
|
|
58
|
+
self.diff_result.updated.append(compiled.name)
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
if not compiled.errors:
|
|
62
|
+
self.diff_result.added.append(compiled.name)
|
|
63
|
+
|
|
64
|
+
def close(self) -> None:
|
|
65
|
+
self.closed = True
|
|
66
|
+
self.recent_file = None
|
|
67
|
+
self.deleted_names = list(self.existing_objs.keys() - self.new_objs.keys())
|
|
68
|
+
|
|
69
|
+
def to_status(self) -> Text:
|
|
70
|
+
text = Text(overflow="fold", no_wrap=False)
|
|
71
|
+
|
|
72
|
+
if self.existing_objs:
|
|
73
|
+
text.append(f" Parsed {len(self.existing_objs)} previously compiled objects.\n")
|
|
74
|
+
|
|
75
|
+
if self.files_to_errors:
|
|
76
|
+
text.append(" Failed to compile ")
|
|
77
|
+
text.append(f"{len(self.files_to_errors)} ", style="red")
|
|
78
|
+
text.append("files.\n")
|
|
79
|
+
|
|
80
|
+
return text
|
|
81
|
+
|
|
82
|
+
def to_errors(self) -> Text:
|
|
83
|
+
text = Text(overflow="fold", no_wrap=False)
|
|
84
|
+
|
|
85
|
+
if self.files_to_errors:
|
|
86
|
+
for file, errors in self.files_to_errors.items():
|
|
87
|
+
text.append(" ERROR ", style="bold red")
|
|
88
|
+
text.append(f"- {file}:\n")
|
|
89
|
+
|
|
90
|
+
for error in errors:
|
|
91
|
+
# Format each error properly, handling newlines
|
|
92
|
+
error_msg = str(error)
|
|
93
|
+
text.append(f" {error_msg}\n", style="red")
|
|
94
|
+
|
|
95
|
+
return text
|
|
96
|
+
|
|
97
|
+
# doesn't make sense to show deletes until the very end of compilation
|
|
98
|
+
def diff(self, ignore_python_errors: bool = False) -> Text:
|
|
99
|
+
# Don't show diff if there are compile errors - it's confusing
|
|
100
|
+
if self.files_to_errors and not ignore_python_errors:
|
|
101
|
+
return Text("\n❗Please fix python errors then retry compilation.\n", style="dim cyan")
|
|
102
|
+
return self.diff_result.render(deleted_names=self.deleted_names)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
from rich.live import Live
|
|
5
|
+
from rich.text import Text
|
|
6
|
+
|
|
7
|
+
from ai.chronon.cli.compile.display.class_tracker import ClassTracker
|
|
8
|
+
from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CompileStatus:
|
|
12
|
+
"""
|
|
13
|
+
Uses rich ui - to consolidate and sink the overview of the compile process to the bottom.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, use_live: bool = False):
|
|
17
|
+
self.cls_to_tracker: Dict[str, ClassTracker] = OrderedDict()
|
|
18
|
+
self.use_live = use_live
|
|
19
|
+
# we need vertical_overflow to be visible as the output gets cufoff when our output goes past the termianal window
|
|
20
|
+
# but then we start seeing duplicates: https://github.com/Textualize/rich/issues/3263
|
|
21
|
+
if self.use_live:
|
|
22
|
+
self.live = Live(refresh_per_second=50, vertical_overflow="visible")
|
|
23
|
+
self.live.start()
|
|
24
|
+
|
|
25
|
+
def print_live_console(self, msg: str):
|
|
26
|
+
if self.use_live:
|
|
27
|
+
self.live.console.print(msg)
|
|
28
|
+
|
|
29
|
+
def add_object_update_display(self, compiled: CompiledObj, obj_type: str = None) -> None:
|
|
30
|
+
if compiled.obj_type is not None and obj_type is not None:
|
|
31
|
+
assert compiled.obj_type == obj_type, (
|
|
32
|
+
f"obj_type mismatch: {compiled.obj_type} != {obj_type}"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if obj_type not in self.cls_to_tracker:
|
|
36
|
+
self.cls_to_tracker[obj_type] = ClassTracker()
|
|
37
|
+
|
|
38
|
+
self.cls_to_tracker[obj_type].add(compiled)
|
|
39
|
+
|
|
40
|
+
self._update_display()
|
|
41
|
+
|
|
42
|
+
def add_existing_object_update_display(self, existing_obj: CompiledObj) -> None:
|
|
43
|
+
obj_type = existing_obj.obj_type
|
|
44
|
+
|
|
45
|
+
if obj_type not in self.cls_to_tracker:
|
|
46
|
+
self.cls_to_tracker[obj_type] = ClassTracker()
|
|
47
|
+
|
|
48
|
+
self.cls_to_tracker[obj_type].add_existing(existing_obj)
|
|
49
|
+
|
|
50
|
+
self._update_display()
|
|
51
|
+
|
|
52
|
+
def close_cls(self, obj_type: str) -> None:
|
|
53
|
+
if obj_type in self.cls_to_tracker:
|
|
54
|
+
self.cls_to_tracker[obj_type].close()
|
|
55
|
+
self._update_display()
|
|
56
|
+
|
|
57
|
+
def close(self) -> None:
|
|
58
|
+
self._update_display()
|
|
59
|
+
if self.use_live:
|
|
60
|
+
self.live.stop()
|
|
61
|
+
|
|
62
|
+
def render(self, ignore_python_errors: bool = False) -> Text:
|
|
63
|
+
text = Text(overflow="fold", no_wrap=False)
|
|
64
|
+
|
|
65
|
+
for obj_type, tracker in self.cls_to_tracker.items():
|
|
66
|
+
# Skip MetaData section
|
|
67
|
+
if obj_type == "MetaData":
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
text.append(f"\n{obj_type}-s:\n", style="cyan")
|
|
71
|
+
|
|
72
|
+
status = tracker.to_status()
|
|
73
|
+
if status:
|
|
74
|
+
text.append(status)
|
|
75
|
+
|
|
76
|
+
errors = tracker.to_errors()
|
|
77
|
+
if errors:
|
|
78
|
+
text.append(errors)
|
|
79
|
+
|
|
80
|
+
diff = tracker.diff(ignore_python_errors)
|
|
81
|
+
if diff:
|
|
82
|
+
text.append(diff)
|
|
83
|
+
|
|
84
|
+
text.append("\n")
|
|
85
|
+
return text
|
|
86
|
+
|
|
87
|
+
def _update_display(self):
|
|
88
|
+
# self.live.clear()
|
|
89
|
+
|
|
90
|
+
# TODO: add this after live_crop is implemented
|
|
91
|
+
# text = self.display_text()
|
|
92
|
+
# if self.use_live:
|
|
93
|
+
# self.live.update(text, refresh=True)
|
|
94
|
+
# return text
|
|
95
|
+
pass
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import List, Tuple
|
|
2
|
+
|
|
3
|
+
from rich.text import Text
|
|
4
|
+
|
|
5
|
+
from ai.chronon.cli.compile.version_utils import parse_name_and_version
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DiffResult:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self.added: List[str] = []
|
|
11
|
+
self.updated: List[str] = []
|
|
12
|
+
self.version_bumped: List[
|
|
13
|
+
Tuple[str, int, int]
|
|
14
|
+
] = [] # (base_name, old_version, new_version)
|
|
15
|
+
|
|
16
|
+
def detect_version_changes(self, deleted_names: List[str]) -> Tuple[List[str], List[str]]:
|
|
17
|
+
"""Detect version changes between deleted and added names.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Tuple of (remaining_deleted, remaining_added) after removing version changes
|
|
21
|
+
"""
|
|
22
|
+
# Create mappings of base names to versions
|
|
23
|
+
deleted_base_to_name = {}
|
|
24
|
+
added_base_to_name = {}
|
|
25
|
+
|
|
26
|
+
for name in deleted_names:
|
|
27
|
+
base_name, version = parse_name_and_version(name)
|
|
28
|
+
if version is not None:
|
|
29
|
+
deleted_base_to_name[base_name] = (name, version)
|
|
30
|
+
|
|
31
|
+
for name in self.added:
|
|
32
|
+
base_name, version = parse_name_and_version(name)
|
|
33
|
+
if version is not None:
|
|
34
|
+
added_base_to_name[base_name] = (name, version)
|
|
35
|
+
|
|
36
|
+
# Find version changes
|
|
37
|
+
remaining_deleted = []
|
|
38
|
+
remaining_added = []
|
|
39
|
+
|
|
40
|
+
for name in deleted_names:
|
|
41
|
+
base_name, old_version = parse_name_and_version(name)
|
|
42
|
+
if (
|
|
43
|
+
base_name in added_base_to_name
|
|
44
|
+
and old_version is not None
|
|
45
|
+
and base_name in deleted_base_to_name
|
|
46
|
+
):
|
|
47
|
+
# This is a version change
|
|
48
|
+
_, new_version = added_base_to_name[base_name]
|
|
49
|
+
self.version_bumped.append((base_name, old_version, new_version))
|
|
50
|
+
else:
|
|
51
|
+
remaining_deleted.append(name)
|
|
52
|
+
|
|
53
|
+
for name in self.added:
|
|
54
|
+
base_name, version = parse_name_and_version(name)
|
|
55
|
+
if not (base_name in deleted_base_to_name and version is not None):
|
|
56
|
+
# This is not part of a version change
|
|
57
|
+
remaining_added.append(name)
|
|
58
|
+
|
|
59
|
+
return remaining_deleted, remaining_added
|
|
60
|
+
|
|
61
|
+
def render(self, deleted_names: List[str], indent=" ") -> Text:
|
|
62
|
+
# Detect version changes first
|
|
63
|
+
remaining_deleted, remaining_added = self.detect_version_changes(deleted_names)
|
|
64
|
+
|
|
65
|
+
def added_signage():
|
|
66
|
+
return Text("➕ Added", style="dim green")
|
|
67
|
+
|
|
68
|
+
def updated_signage():
|
|
69
|
+
return Text("❗ Changed in place (no version change)", style="dim yellow")
|
|
70
|
+
|
|
71
|
+
def deleted_signage():
|
|
72
|
+
return Text("🗑️ Deleted", style="red")
|
|
73
|
+
|
|
74
|
+
def version_bumped_signage():
|
|
75
|
+
return Text("⬆️ Version changed", style="dim blue")
|
|
76
|
+
|
|
77
|
+
added = [(added_signage(), name) for name in remaining_added]
|
|
78
|
+
updated = [(updated_signage(), name) for name in self.updated]
|
|
79
|
+
version_bumped = [
|
|
80
|
+
(version_bumped_signage(), f"{base_name} (v{old_ver} -> v{new_ver})")
|
|
81
|
+
for base_name, old_ver, new_ver in self.version_bumped
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Put version changes and additions first, changed items at the bottom
|
|
85
|
+
# Sort each group separately to maintain grouping
|
|
86
|
+
version_bumped_sorted = sorted(version_bumped, key=lambda t: t[1])
|
|
87
|
+
added_sorted = sorted(added, key=lambda t: t[1])
|
|
88
|
+
updated_sorted = sorted(updated, key=lambda t: t[1])
|
|
89
|
+
|
|
90
|
+
result_order = version_bumped_sorted + added_sorted
|
|
91
|
+
|
|
92
|
+
if remaining_deleted:
|
|
93
|
+
deleted = [(deleted_signage(), name) for name in remaining_deleted]
|
|
94
|
+
deleted_sorted = sorted(deleted, key=lambda t: t[1])
|
|
95
|
+
result_order += deleted_sorted
|
|
96
|
+
|
|
97
|
+
# Add updated (changed in place) at the very end
|
|
98
|
+
result_order += updated_sorted
|
|
99
|
+
|
|
100
|
+
text = Text(overflow="fold", no_wrap=False)
|
|
101
|
+
for signage, name in result_order:
|
|
102
|
+
text.append(indent)
|
|
103
|
+
text.append(signage)
|
|
104
|
+
text.append(" ")
|
|
105
|
+
text.append(name)
|
|
106
|
+
text.append("\n")
|
|
107
|
+
|
|
108
|
+
if not text:
|
|
109
|
+
return Text(indent + "No new changes detected\n", style="dim")
|
|
110
|
+
|
|
111
|
+
return text
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from gen_thrift.api.ttypes import Join, Team
|
|
2
|
+
|
|
3
|
+
from ai.chronon import utils
|
|
4
|
+
from ai.chronon.cli.compile.compile_context import CompileContext
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _fill_template(table, obj, namespace):
|
|
8
|
+
if table:
|
|
9
|
+
table = table.replace("{{ logged_table }}", utils.log_table_name(obj, full_name=True))
|
|
10
|
+
table = table.replace("{{ db }}", namespace)
|
|
11
|
+
|
|
12
|
+
return table
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_templated_values(obj, cls, compile_context: CompileContext):
|
|
16
|
+
team_obj: Team = compile_context.teams_dict[obj.team]
|
|
17
|
+
namespace = team_obj.outputNamespace
|
|
18
|
+
|
|
19
|
+
if cls == Join and obj.bootstrapParts:
|
|
20
|
+
for bootstrap in obj.bootstrapParts:
|
|
21
|
+
bootstrap.table = _fill_template(bootstrap.table, obj, namespace)
|
|
22
|
+
|
|
23
|
+
if obj.metaData.dependencies:
|
|
24
|
+
obj.metaData.dependencies = [
|
|
25
|
+
_fill_template(dep, obj, namespace) for dep in obj.metaData.dependencies
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
if cls == Join and obj.labelParts:
|
|
29
|
+
obj.labelParts.metaData.dependencies = [
|
|
30
|
+
label_dep.replace(
|
|
31
|
+
"{{ join_backfill_table }}",
|
|
32
|
+
utils.output_table_name(obj, full_name=True),
|
|
33
|
+
)
|
|
34
|
+
for label_dep in obj.labelParts.metaData.dependencies
|
|
35
|
+
]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import glob
|
|
3
|
+
import importlib
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, List
|
|
6
|
+
|
|
7
|
+
from gen_thrift.api.ttypes import GroupBy, Join
|
|
8
|
+
|
|
9
|
+
from ai.chronon import airflow_helpers
|
|
10
|
+
from ai.chronon.cli.compile import parse_teams, serializer
|
|
11
|
+
from ai.chronon.cli.compile.compile_context import CompileContext
|
|
12
|
+
from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
|
|
13
|
+
from ai.chronon.cli.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def from_folder(cls: type, input_dir: str, compile_context: CompileContext) -> List[CompiledObj]:
|
|
19
|
+
"""
|
|
20
|
+
Recursively consumes a folder, and constructs a map of
|
|
21
|
+
object qualifier to StagingQuery, GroupBy, or Join
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
python_files = glob.glob(os.path.join(input_dir, "**/*.py"), recursive=True)
|
|
25
|
+
|
|
26
|
+
results = []
|
|
27
|
+
|
|
28
|
+
for f in python_files:
|
|
29
|
+
try:
|
|
30
|
+
results_dict = from_file(f, cls, input_dir)
|
|
31
|
+
|
|
32
|
+
for name, obj in results_dict.items():
|
|
33
|
+
parse_teams.update_metadata(obj, compile_context.teams_dict)
|
|
34
|
+
# Populate columnHashes field with semantic hashes
|
|
35
|
+
populate_column_hashes(obj)
|
|
36
|
+
|
|
37
|
+
# Airflow deps must be set AFTER updating metadata
|
|
38
|
+
airflow_helpers.set_airflow_deps(obj)
|
|
39
|
+
|
|
40
|
+
obj.metaData.sourceFile = os.path.relpath(f, compile_context.chronon_root)
|
|
41
|
+
|
|
42
|
+
tjson = serializer.thrift_simple_json(obj)
|
|
43
|
+
|
|
44
|
+
# Perform validation
|
|
45
|
+
errors = compile_context.validator.validate_obj(obj)
|
|
46
|
+
|
|
47
|
+
result = CompiledObj(
|
|
48
|
+
name=name,
|
|
49
|
+
obj=obj,
|
|
50
|
+
file=f,
|
|
51
|
+
errors=errors if len(errors) > 0 else None,
|
|
52
|
+
obj_type=cls.__name__,
|
|
53
|
+
tjson=tjson,
|
|
54
|
+
)
|
|
55
|
+
results.append(result)
|
|
56
|
+
|
|
57
|
+
compile_context.compile_status.add_object_update_display(result, cls.__name__)
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
result = CompiledObj(
|
|
61
|
+
name=None,
|
|
62
|
+
obj=None,
|
|
63
|
+
file=f,
|
|
64
|
+
errors=[e],
|
|
65
|
+
obj_type=cls.__name__,
|
|
66
|
+
tjson=None,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
results.append(result)
|
|
70
|
+
|
|
71
|
+
compile_context.compile_status.add_object_update_display(result, cls.__name__)
|
|
72
|
+
|
|
73
|
+
return results
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def from_file(file_path: str, cls: type, input_dir: str):
|
|
77
|
+
# this is where the python path should have been set to
|
|
78
|
+
chronon_root = os.path.dirname(input_dir)
|
|
79
|
+
rel_path = os.path.relpath(file_path, chronon_root)
|
|
80
|
+
|
|
81
|
+
rel_path_without_extension = os.path.splitext(rel_path)[0]
|
|
82
|
+
|
|
83
|
+
module_name = rel_path_without_extension.replace("/", ".")
|
|
84
|
+
|
|
85
|
+
conf_type, team_name_with_path = module_name.split(".", 1)
|
|
86
|
+
mod_path = team_name_with_path.replace("/", ".")
|
|
87
|
+
|
|
88
|
+
module = importlib.import_module(module_name)
|
|
89
|
+
|
|
90
|
+
result = {}
|
|
91
|
+
|
|
92
|
+
for var_name, obj in list(module.__dict__.items()):
|
|
93
|
+
if isinstance(obj, cls):
|
|
94
|
+
copied_obj = copy.deepcopy(obj)
|
|
95
|
+
|
|
96
|
+
name = f"{mod_path}.{var_name}"
|
|
97
|
+
|
|
98
|
+
# Add version suffix if version is set
|
|
99
|
+
name = name + "__" + str(copied_obj.metaData.version)
|
|
100
|
+
|
|
101
|
+
copied_obj.metaData.name = name
|
|
102
|
+
copied_obj.metaData.team = mod_path.split(".")[0]
|
|
103
|
+
|
|
104
|
+
result[name] = copied_obj
|
|
105
|
+
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def populate_column_hashes(obj: Any):
|
|
110
|
+
"""
|
|
111
|
+
Populate the columnHashes field in the object's metadata with semantic hashes
|
|
112
|
+
for each output column.
|
|
113
|
+
"""
|
|
114
|
+
# Import here to avoid circular imports
|
|
115
|
+
from ai.chronon.cli.compile.column_hashing import (
|
|
116
|
+
compute_group_by_columns_hashes,
|
|
117
|
+
compute_join_column_hashes,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if isinstance(obj, GroupBy):
|
|
121
|
+
# For GroupBy objects, get column hashes
|
|
122
|
+
column_hashes = compute_group_by_columns_hashes(obj, exclude_keys=False)
|
|
123
|
+
obj.metaData.columnHashes = column_hashes
|
|
124
|
+
|
|
125
|
+
elif isinstance(obj, Join):
|
|
126
|
+
# For Join objects, get column hashes
|
|
127
|
+
column_hashes = compute_join_column_hashes(obj)
|
|
128
|
+
obj.metaData.columnHashes = column_hashes
|
|
129
|
+
|
|
130
|
+
if obj.joinParts:
|
|
131
|
+
for jp in obj.joinParts or []:
|
|
132
|
+
group_by = jp.groupBy
|
|
133
|
+
group_by_hashes = compute_group_by_columns_hashes(group_by)
|
|
134
|
+
group_by.metaData.columnHashes = group_by_hashes
|