awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
File without changes
@@ -0,0 +1,102 @@
1
+ import difflib
2
+ from typing import Any, Dict, List
3
+
4
+ from rich.text import Text
5
+
6
+ from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
7
+ from ai.chronon.cli.compile.display.diff_result import DiffResult
8
+
9
+
10
+ class ClassTracker:
11
+ """
12
+ Tracker object per class - Join, StagingQuery, GroupBy etc
13
+ """
14
+
15
+ def __init__(self):
16
+ self.existing_objs: Dict[str, CompiledObj] = {} # name to obj
17
+ self.files_to_obj: Dict[str, List[Any]] = {}
18
+ self.files_to_errors: Dict[str, List[Exception]] = {}
19
+ self.new_objs: Dict[str, CompiledObj] = {} # name to obj
20
+ self.diff_result = DiffResult()
21
+ self.deleted_names: List[str] = []
22
+
23
+ def add_existing(self, obj: CompiledObj) -> None:
24
+ self.existing_objs[obj.name] = obj
25
+
26
+ def add(self, compiled: CompiledObj) -> None:
27
+ if compiled.errors:
28
+ if compiled.file not in self.files_to_errors:
29
+ self.files_to_errors[compiled.file] = []
30
+
31
+ self.files_to_errors[compiled.file].extend(compiled.errors)
32
+
33
+ else:
34
+ if compiled.file not in self.files_to_obj:
35
+ self.files_to_obj[compiled.file] = []
36
+
37
+ self.files_to_obj[compiled.file].append(compiled.obj)
38
+
39
+ self.new_objs[compiled.name] = compiled
40
+ self._update_diff(compiled)
41
+
42
+ def _update_diff(self, compiled: CompiledObj) -> None:
43
+ if compiled.name in self.existing_objs:
44
+ existing_json = self.existing_objs[compiled.name].tjson
45
+ new_json = compiled.tjson
46
+
47
+ if existing_json != new_json:
48
+ diff = difflib.unified_diff(
49
+ existing_json.splitlines(keepends=True),
50
+ new_json.splitlines(keepends=True),
51
+ n=2,
52
+ )
53
+
54
+ print(f"Updated object: {compiled.name} in file {compiled.file}")
55
+ print("".join(diff))
56
+ print("\n")
57
+
58
+ self.diff_result.updated.append(compiled.name)
59
+
60
+ else:
61
+ if not compiled.errors:
62
+ self.diff_result.added.append(compiled.name)
63
+
64
+ def close(self) -> None:
65
+ self.closed = True
66
+ self.recent_file = None
67
+ self.deleted_names = list(self.existing_objs.keys() - self.new_objs.keys())
68
+
69
+ def to_status(self) -> Text:
70
+ text = Text(overflow="fold", no_wrap=False)
71
+
72
+ if self.existing_objs:
73
+ text.append(f" Parsed {len(self.existing_objs)} previously compiled objects.\n")
74
+
75
+ if self.files_to_errors:
76
+ text.append(" Failed to compile ")
77
+ text.append(f"{len(self.files_to_errors)} ", style="red")
78
+ text.append("files.\n")
79
+
80
+ return text
81
+
82
+ def to_errors(self) -> Text:
83
+ text = Text(overflow="fold", no_wrap=False)
84
+
85
+ if self.files_to_errors:
86
+ for file, errors in self.files_to_errors.items():
87
+ text.append(" ERROR ", style="bold red")
88
+ text.append(f"- {file}:\n")
89
+
90
+ for error in errors:
91
+ # Format each error properly, handling newlines
92
+ error_msg = str(error)
93
+ text.append(f" {error_msg}\n", style="red")
94
+
95
+ return text
96
+
97
+ # doesn't make sense to show deletes until the very end of compilation
98
+ def diff(self, ignore_python_errors: bool = False) -> Text:
99
+ # Don't show diff if there are compile errors - it's confusing
100
+ if self.files_to_errors and not ignore_python_errors:
101
+ return Text("\n❗Please fix python errors then retry compilation.\n", style="dim cyan")
102
+ return self.diff_result.render(deleted_names=self.deleted_names)
@@ -0,0 +1,95 @@
1
+ from collections import OrderedDict
2
+ from typing import Dict
3
+
4
+ from rich.live import Live
5
+ from rich.text import Text
6
+
7
+ from ai.chronon.cli.compile.display.class_tracker import ClassTracker
8
+ from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
9
+
10
+
11
+ class CompileStatus:
12
+ """
13
+ Uses rich ui - to consolidate and sink the overview of the compile process to the bottom.
14
+ """
15
+
16
+ def __init__(self, use_live: bool = False):
17
+ self.cls_to_tracker: Dict[str, ClassTracker] = OrderedDict()
18
+ self.use_live = use_live
19
+ # we need vertical_overflow to be visible as the output gets cufoff when our output goes past the termianal window
20
+ # but then we start seeing duplicates: https://github.com/Textualize/rich/issues/3263
21
+ if self.use_live:
22
+ self.live = Live(refresh_per_second=50, vertical_overflow="visible")
23
+ self.live.start()
24
+
25
+ def print_live_console(self, msg: str):
26
+ if self.use_live:
27
+ self.live.console.print(msg)
28
+
29
+ def add_object_update_display(self, compiled: CompiledObj, obj_type: str = None) -> None:
30
+ if compiled.obj_type is not None and obj_type is not None:
31
+ assert compiled.obj_type == obj_type, (
32
+ f"obj_type mismatch: {compiled.obj_type} != {obj_type}"
33
+ )
34
+
35
+ if obj_type not in self.cls_to_tracker:
36
+ self.cls_to_tracker[obj_type] = ClassTracker()
37
+
38
+ self.cls_to_tracker[obj_type].add(compiled)
39
+
40
+ self._update_display()
41
+
42
+ def add_existing_object_update_display(self, existing_obj: CompiledObj) -> None:
43
+ obj_type = existing_obj.obj_type
44
+
45
+ if obj_type not in self.cls_to_tracker:
46
+ self.cls_to_tracker[obj_type] = ClassTracker()
47
+
48
+ self.cls_to_tracker[obj_type].add_existing(existing_obj)
49
+
50
+ self._update_display()
51
+
52
+ def close_cls(self, obj_type: str) -> None:
53
+ if obj_type in self.cls_to_tracker:
54
+ self.cls_to_tracker[obj_type].close()
55
+ self._update_display()
56
+
57
+ def close(self) -> None:
58
+ self._update_display()
59
+ if self.use_live:
60
+ self.live.stop()
61
+
62
+ def render(self, ignore_python_errors: bool = False) -> Text:
63
+ text = Text(overflow="fold", no_wrap=False)
64
+
65
+ for obj_type, tracker in self.cls_to_tracker.items():
66
+ # Skip MetaData section
67
+ if obj_type == "MetaData":
68
+ continue
69
+
70
+ text.append(f"\n{obj_type}-s:\n", style="cyan")
71
+
72
+ status = tracker.to_status()
73
+ if status:
74
+ text.append(status)
75
+
76
+ errors = tracker.to_errors()
77
+ if errors:
78
+ text.append(errors)
79
+
80
+ diff = tracker.diff(ignore_python_errors)
81
+ if diff:
82
+ text.append(diff)
83
+
84
+ text.append("\n")
85
+ return text
86
+
87
+ def _update_display(self):
88
+ # self.live.clear()
89
+
90
+ # TODO: add this after live_crop is implemented
91
+ # text = self.display_text()
92
+ # if self.use_live:
93
+ # self.live.update(text, refresh=True)
94
+ # return text
95
+ pass
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, List, Optional
3
+
4
+
5
+ @dataclass
6
+ class CompiledObj:
7
+ name: str
8
+ obj: Any
9
+ file: str
10
+ errors: Optional[List[Exception]]
11
+ obj_type: str
12
+ tjson: str
@@ -0,0 +1,3 @@
1
+ from rich.console import Console
2
+
3
+ console = Console()
@@ -0,0 +1,111 @@
1
+ from typing import List, Tuple
2
+
3
+ from rich.text import Text
4
+
5
+ from ai.chronon.cli.compile.version_utils import parse_name_and_version
6
+
7
+
8
+ class DiffResult:
9
+ def __init__(self):
10
+ self.added: List[str] = []
11
+ self.updated: List[str] = []
12
+ self.version_bumped: List[
13
+ Tuple[str, int, int]
14
+ ] = [] # (base_name, old_version, new_version)
15
+
16
+ def detect_version_changes(self, deleted_names: List[str]) -> Tuple[List[str], List[str]]:
17
+ """Detect version changes between deleted and added names.
18
+
19
+ Returns:
20
+ Tuple of (remaining_deleted, remaining_added) after removing version changes
21
+ """
22
+ # Create mappings of base names to versions
23
+ deleted_base_to_name = {}
24
+ added_base_to_name = {}
25
+
26
+ for name in deleted_names:
27
+ base_name, version = parse_name_and_version(name)
28
+ if version is not None:
29
+ deleted_base_to_name[base_name] = (name, version)
30
+
31
+ for name in self.added:
32
+ base_name, version = parse_name_and_version(name)
33
+ if version is not None:
34
+ added_base_to_name[base_name] = (name, version)
35
+
36
+ # Find version changes
37
+ remaining_deleted = []
38
+ remaining_added = []
39
+
40
+ for name in deleted_names:
41
+ base_name, old_version = parse_name_and_version(name)
42
+ if (
43
+ base_name in added_base_to_name
44
+ and old_version is not None
45
+ and base_name in deleted_base_to_name
46
+ ):
47
+ # This is a version change
48
+ _, new_version = added_base_to_name[base_name]
49
+ self.version_bumped.append((base_name, old_version, new_version))
50
+ else:
51
+ remaining_deleted.append(name)
52
+
53
+ for name in self.added:
54
+ base_name, version = parse_name_and_version(name)
55
+ if not (base_name in deleted_base_to_name and version is not None):
56
+ # This is not part of a version change
57
+ remaining_added.append(name)
58
+
59
+ return remaining_deleted, remaining_added
60
+
61
+ def render(self, deleted_names: List[str], indent=" ") -> Text:
62
+ # Detect version changes first
63
+ remaining_deleted, remaining_added = self.detect_version_changes(deleted_names)
64
+
65
+ def added_signage():
66
+ return Text("➕ Added", style="dim green")
67
+
68
+ def updated_signage():
69
+ return Text("❗ Changed in place (no version change)", style="dim yellow")
70
+
71
+ def deleted_signage():
72
+ return Text("🗑️ Deleted", style="red")
73
+
74
+ def version_bumped_signage():
75
+ return Text("⬆️ Version changed", style="dim blue")
76
+
77
+ added = [(added_signage(), name) for name in remaining_added]
78
+ updated = [(updated_signage(), name) for name in self.updated]
79
+ version_bumped = [
80
+ (version_bumped_signage(), f"{base_name} (v{old_ver} -> v{new_ver})")
81
+ for base_name, old_ver, new_ver in self.version_bumped
82
+ ]
83
+
84
+ # Put version changes and additions first, changed items at the bottom
85
+ # Sort each group separately to maintain grouping
86
+ version_bumped_sorted = sorted(version_bumped, key=lambda t: t[1])
87
+ added_sorted = sorted(added, key=lambda t: t[1])
88
+ updated_sorted = sorted(updated, key=lambda t: t[1])
89
+
90
+ result_order = version_bumped_sorted + added_sorted
91
+
92
+ if remaining_deleted:
93
+ deleted = [(deleted_signage(), name) for name in remaining_deleted]
94
+ deleted_sorted = sorted(deleted, key=lambda t: t[1])
95
+ result_order += deleted_sorted
96
+
97
+ # Add updated (changed in place) at the very end
98
+ result_order += updated_sorted
99
+
100
+ text = Text(overflow="fold", no_wrap=False)
101
+ for signage, name in result_order:
102
+ text.append(indent)
103
+ text.append(signage)
104
+ text.append(" ")
105
+ text.append(name)
106
+ text.append("\n")
107
+
108
+ if not text:
109
+ return Text(indent + "No new changes detected\n", style="dim")
110
+
111
+ return text
@@ -0,0 +1,35 @@
1
+ from gen_thrift.api.ttypes import Join, Team
2
+
3
+ from ai.chronon import utils
4
+ from ai.chronon.cli.compile.compile_context import CompileContext
5
+
6
+
7
+ def _fill_template(table, obj, namespace):
8
+ if table:
9
+ table = table.replace("{{ logged_table }}", utils.log_table_name(obj, full_name=True))
10
+ table = table.replace("{{ db }}", namespace)
11
+
12
+ return table
13
+
14
+
15
+ def set_templated_values(obj, cls, compile_context: CompileContext):
16
+ team_obj: Team = compile_context.teams_dict[obj.team]
17
+ namespace = team_obj.outputNamespace
18
+
19
+ if cls == Join and obj.bootstrapParts:
20
+ for bootstrap in obj.bootstrapParts:
21
+ bootstrap.table = _fill_template(bootstrap.table, obj, namespace)
22
+
23
+ if obj.metaData.dependencies:
24
+ obj.metaData.dependencies = [
25
+ _fill_template(dep, obj, namespace) for dep in obj.metaData.dependencies
26
+ ]
27
+
28
+ if cls == Join and obj.labelParts:
29
+ obj.labelParts.metaData.dependencies = [
30
+ label_dep.replace(
31
+ "{{ join_backfill_table }}",
32
+ utils.output_table_name(obj, full_name=True),
33
+ )
34
+ for label_dep in obj.labelParts.metaData.dependencies
35
+ ]
@@ -0,0 +1,134 @@
1
+ import copy
2
+ import glob
3
+ import importlib
4
+ import os
5
+ from typing import Any, List
6
+
7
+ from gen_thrift.api.ttypes import GroupBy, Join
8
+
9
+ from ai.chronon import airflow_helpers
10
+ from ai.chronon.cli.compile import parse_teams, serializer
11
+ from ai.chronon.cli.compile.compile_context import CompileContext
12
+ from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
13
+ from ai.chronon.cli.logger import get_logger
14
+
15
+ logger = get_logger()
16
+
17
+
18
+ def from_folder(cls: type, input_dir: str, compile_context: CompileContext) -> List[CompiledObj]:
19
+ """
20
+ Recursively consumes a folder, and constructs a map of
21
+ object qualifier to StagingQuery, GroupBy, or Join
22
+ """
23
+
24
+ python_files = glob.glob(os.path.join(input_dir, "**/*.py"), recursive=True)
25
+
26
+ results = []
27
+
28
+ for f in python_files:
29
+ try:
30
+ results_dict = from_file(f, cls, input_dir)
31
+
32
+ for name, obj in results_dict.items():
33
+ parse_teams.update_metadata(obj, compile_context.teams_dict)
34
+ # Populate columnHashes field with semantic hashes
35
+ populate_column_hashes(obj)
36
+
37
+ # Airflow deps must be set AFTER updating metadata
38
+ airflow_helpers.set_airflow_deps(obj)
39
+
40
+ obj.metaData.sourceFile = os.path.relpath(f, compile_context.chronon_root)
41
+
42
+ tjson = serializer.thrift_simple_json(obj)
43
+
44
+ # Perform validation
45
+ errors = compile_context.validator.validate_obj(obj)
46
+
47
+ result = CompiledObj(
48
+ name=name,
49
+ obj=obj,
50
+ file=f,
51
+ errors=errors if len(errors) > 0 else None,
52
+ obj_type=cls.__name__,
53
+ tjson=tjson,
54
+ )
55
+ results.append(result)
56
+
57
+ compile_context.compile_status.add_object_update_display(result, cls.__name__)
58
+
59
+ except Exception as e:
60
+ result = CompiledObj(
61
+ name=None,
62
+ obj=None,
63
+ file=f,
64
+ errors=[e],
65
+ obj_type=cls.__name__,
66
+ tjson=None,
67
+ )
68
+
69
+ results.append(result)
70
+
71
+ compile_context.compile_status.add_object_update_display(result, cls.__name__)
72
+
73
+ return results
74
+
75
+
76
+ def from_file(file_path: str, cls: type, input_dir: str):
77
+ # this is where the python path should have been set to
78
+ chronon_root = os.path.dirname(input_dir)
79
+ rel_path = os.path.relpath(file_path, chronon_root)
80
+
81
+ rel_path_without_extension = os.path.splitext(rel_path)[0]
82
+
83
+ module_name = rel_path_without_extension.replace("/", ".")
84
+
85
+ conf_type, team_name_with_path = module_name.split(".", 1)
86
+ mod_path = team_name_with_path.replace("/", ".")
87
+
88
+ module = importlib.import_module(module_name)
89
+
90
+ result = {}
91
+
92
+ for var_name, obj in list(module.__dict__.items()):
93
+ if isinstance(obj, cls):
94
+ copied_obj = copy.deepcopy(obj)
95
+
96
+ name = f"{mod_path}.{var_name}"
97
+
98
+ # Add version suffix if version is set
99
+ name = name + "__" + str(copied_obj.metaData.version)
100
+
101
+ copied_obj.metaData.name = name
102
+ copied_obj.metaData.team = mod_path.split(".")[0]
103
+
104
+ result[name] = copied_obj
105
+
106
+ return result
107
+
108
+
109
+ def populate_column_hashes(obj: Any):
110
+ """
111
+ Populate the columnHashes field in the object's metadata with semantic hashes
112
+ for each output column.
113
+ """
114
+ # Import here to avoid circular imports
115
+ from ai.chronon.cli.compile.column_hashing import (
116
+ compute_group_by_columns_hashes,
117
+ compute_join_column_hashes,
118
+ )
119
+
120
+ if isinstance(obj, GroupBy):
121
+ # For GroupBy objects, get column hashes
122
+ column_hashes = compute_group_by_columns_hashes(obj, exclude_keys=False)
123
+ obj.metaData.columnHashes = column_hashes
124
+
125
+ elif isinstance(obj, Join):
126
+ # For Join objects, get column hashes
127
+ column_hashes = compute_join_column_hashes(obj)
128
+ obj.metaData.columnHashes = column_hashes
129
+
130
+ if obj.joinParts:
131
+ for jp in obj.joinParts or []:
132
+ group_by = jp.groupBy
133
+ group_by_hashes = compute_group_by_columns_hashes(group_by)
134
+ group_by.metaData.columnHashes = group_by_hashes