awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
@@ -0,0 +1,242 @@
1
+ import importlib
2
+ import importlib.util
3
+ import os
4
+ import sys
5
+ from copy import deepcopy
6
+ from enum import Enum
7
+ from typing import Any, Dict, Optional, Union
8
+
9
+ from gen_thrift.api.ttypes import Join, MetaData, Team
10
+ from gen_thrift.common.ttypes import (
11
+ ClusterConfigProperties,
12
+ ConfigProperties,
13
+ EnvironmentVariables,
14
+ ExecutionInfo,
15
+ )
16
+
17
+ from ai.chronon.cli.compile.display.console import console
18
+ from ai.chronon.cli.logger import get_logger
19
+
20
+ logger = get_logger()
21
+
22
+ _DEFAULT_CONF_TEAM = "default"
23
+
24
+
25
+ def import_module_from_file(file_path):
26
+ # Get the module name from the file path (without .py extension)
27
+ module_name = file_path.split("/")[-1].replace(".py", "")
28
+
29
+ # Create the module spec
30
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
31
+
32
+ # Create the module based on the spec
33
+ module = importlib.util.module_from_spec(spec)
34
+
35
+ # Add the module to sys.modules
36
+ sys.modules[module_name] = module
37
+
38
+ # Execute the module
39
+ spec.loader.exec_module(module)
40
+
41
+ return module
42
+
43
+
44
+ def load_teams(conf_root: str, print: bool = True) -> Dict[str, Team]:
45
+ teams_file = os.path.join(conf_root, "teams.py")
46
+
47
+ assert os.path.exists(teams_file), (
48
+ f"Team config file: {teams_file} not found. You might be running this from the wrong directory."
49
+ )
50
+
51
+ team_module = import_module_from_file(teams_file)
52
+
53
+ assert team_module is not None, (
54
+ f"Team config file {teams_file} is not on the PYTHONPATH. You might need to add the your config "
55
+ f"directory to the PYTHONPATH."
56
+ )
57
+
58
+ team_dict = {}
59
+
60
+ if print:
61
+ console.print(f"Pulling configuration from [cyan italic]{teams_file}[/cyan italic]")
62
+
63
+ for name, obj in team_module.__dict__.items():
64
+ if isinstance(obj, Team):
65
+ obj.name = name
66
+ team_dict[name] = obj
67
+
68
+ return team_dict
69
+
70
+
71
+ def update_metadata(obj: Any, team_dict: Dict[str, Team]):
72
+ assert obj is not None, "Cannot update metadata None object"
73
+
74
+ metadata = obj.metaData
75
+
76
+ assert obj.metaData is not None, "Cannot update empty metadata"
77
+
78
+ name = obj.metaData.name
79
+ team = obj.metaData.team
80
+
81
+ assert team is not None, (
82
+ f"Team name is required in metadata for {name}. This usually set by compiler. Internal error."
83
+ )
84
+
85
+ assert team in team_dict, f"Team '{team}' not found in teams.py. Please add an entry 🙏"
86
+
87
+ assert _DEFAULT_CONF_TEAM in team_dict, (
88
+ f"'{_DEFAULT_CONF_TEAM}' team not found in teams.py, please add an entry 🙏."
89
+ )
90
+
91
+ # Only set the outputNamespace if it hasn't been set already
92
+ if not metadata.outputNamespace:
93
+ metadata.outputNamespace = team_dict[team].outputNamespace
94
+
95
+ if isinstance(obj, Join):
96
+ join_namespace = obj.metaData.outputNamespace
97
+
98
+ # set the metadata for each join part and labelParts
99
+ def set_group_by_metadata(join_part_gb, output_namespace):
100
+ if join_part_gb is not None:
101
+ if join_part_gb.metaData:
102
+ # Only set the outputNamespace if it hasn't been set already
103
+ if not join_part_gb.metaData.outputNamespace:
104
+ join_part_gb.metaData.outputNamespace = output_namespace
105
+ else:
106
+ # If there's no metaData at all, create it and set outputNamespace
107
+ join_part_gb.metaData = MetaData()
108
+ join_part_gb.metaData.outputNamespace = output_namespace
109
+
110
+ if obj.joinParts:
111
+ for jp in obj.joinParts or []:
112
+ jp.useLongNames = obj.useLongNames
113
+ set_group_by_metadata(jp.groupBy, join_namespace)
114
+
115
+ if obj.labelParts:
116
+ for lb in obj.labelParts.labels or []:
117
+ lb.useLongNames = obj.useLongNames
118
+ set_group_by_metadata(lb.groupBy, join_namespace)
119
+
120
+ if metadata.executionInfo is None:
121
+ metadata.executionInfo = ExecutionInfo()
122
+
123
+ merge_team_execution_info(metadata, team_dict, team)
124
+
125
+
126
+ def merge_team_execution_info(metadata: MetaData, team_dict: Dict[str, Team], team_name: str):
127
+ default_team = team_dict.get(_DEFAULT_CONF_TEAM)
128
+ if not metadata.executionInfo:
129
+ metadata.executionInfo = ExecutionInfo()
130
+
131
+ metadata.executionInfo.env = _merge_mode_maps(
132
+ default_team.env if default_team else {},
133
+ team_dict[team_name].env,
134
+ metadata.executionInfo.env,
135
+ env_or_config_attribute=EnvOrConfigAttribute.ENV,
136
+ )
137
+
138
+ metadata.executionInfo.conf = _merge_mode_maps(
139
+ default_team.conf if default_team else {},
140
+ team_dict[team_name].conf,
141
+ metadata.executionInfo.conf,
142
+ env_or_config_attribute=EnvOrConfigAttribute.CONFIG,
143
+ )
144
+
145
+ metadata.executionInfo.clusterConf = _merge_mode_maps(
146
+ default_team.clusterConf if default_team else {},
147
+ team_dict[team_name].clusterConf,
148
+ metadata.executionInfo.clusterConf,
149
+ env_or_config_attribute=EnvOrConfigAttribute.CLUSTER_CONFIG,
150
+ )
151
+
152
+
153
+ def _merge_maps(*maps: Optional[Dict[str, str]]):
154
+ """
155
+ Merges multiple maps into one - with the later maps overriding the earlier ones.
156
+ """
157
+
158
+ result = {}
159
+
160
+ for m in maps:
161
+ if m is None:
162
+ continue
163
+
164
+ for key, value in m.items():
165
+ result[key] = value
166
+
167
+ return result
168
+
169
+
170
+ class EnvOrConfigAttribute(str, Enum):
171
+ ENV = "modeEnvironments"
172
+ CONFIG = "modeConfigs"
173
+ CLUSTER_CONFIG = "modeClusterConfigs"
174
+
175
+
176
+ def _merge_mode_maps(
177
+ *mode_maps: Optional[Union[EnvironmentVariables, ConfigProperties, ClusterConfigProperties]],
178
+ env_or_config_attribute: EnvOrConfigAttribute,
179
+ ):
180
+ """
181
+ Merges multiple environment variables into one - with the later maps overriding the earlier ones.
182
+ """
183
+
184
+ # Merge `common` to each individual mode map. Creates a new map
185
+ def push_common_to_modes(
186
+ mode_map: Union[EnvironmentVariables, ConfigProperties], mode_key: EnvOrConfigAttribute
187
+ ):
188
+ final_mode_map = deepcopy(mode_map)
189
+ common = final_mode_map.common
190
+ modes = getattr(final_mode_map, mode_key)
191
+
192
+ if modes:
193
+ for _ in modes:
194
+ modes[_] = _merge_maps(common, modes[_])
195
+
196
+ return final_mode_map
197
+
198
+ filtered_mode_maps = [m for m in mode_maps if m]
199
+
200
+ # Initialize the result with the first mode map
201
+ result = None
202
+
203
+ if len(filtered_mode_maps) >= 1:
204
+ result = push_common_to_modes(filtered_mode_maps[0], env_or_config_attribute)
205
+
206
+ # Merge each new mode map into the result
207
+ for m in filtered_mode_maps[1:]:
208
+ # We want to prepare the individual modes with `common` in incoming_mode_map
209
+ incoming_mode_map = push_common_to_modes(m, env_or_config_attribute)
210
+
211
+ # create new common
212
+ incoming_common = incoming_mode_map.common
213
+ new_common = _merge_maps(result.common, incoming_common)
214
+ result.common = new_common
215
+
216
+ current_modes = getattr(result, env_or_config_attribute)
217
+ incoming_modes = getattr(incoming_mode_map, env_or_config_attribute)
218
+
219
+ current_modes_keys = list(current_modes.keys()) if current_modes else []
220
+ incoming_modes_keys = list(incoming_modes.keys()) if incoming_modes else []
221
+
222
+ all_modes_keys = list(set(current_modes_keys + incoming_modes_keys))
223
+
224
+ for mode in all_modes_keys:
225
+ current_mode = current_modes.get(mode, {}) if current_modes else {}
226
+
227
+ # if the incoming_mode is not found, we NEED to default to incoming_common
228
+ incoming_mode = (
229
+ incoming_modes.get(mode, incoming_common) if incoming_modes else incoming_common
230
+ )
231
+
232
+ # first to last with later ones overriding the earlier ones
233
+ # common -> current mode level -> incoming mode level
234
+
235
+ new_mode = _merge_maps(new_common, current_mode, incoming_mode)
236
+
237
+ if current_modes is None:
238
+ current_modes = {}
239
+
240
+ current_modes[mode] = new_mode
241
+
242
+ return result
@@ -0,0 +1,109 @@
1
+ # Copyright (C) 2023 The Chronon Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+
17
+ from thrift import TSerialization
18
+ from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
19
+ from thrift.protocol.TJSONProtocol import TSimpleJSONProtocolFactory
20
+ from thrift.Thrift import TType
21
+ from thrift.transport.TTransport import TMemoryBuffer
22
+
23
+
24
+ class ThriftJSONDecoder(json.JSONDecoder):
25
+ def __init__(self, *args, **kwargs):
26
+ self._thrift_class = kwargs.pop("thrift_class")
27
+ super(ThriftJSONDecoder, self).__init__(*args, **kwargs)
28
+
29
+ def decode(self, json_str):
30
+ if isinstance(json_str, dict):
31
+ dct = json_str
32
+ else:
33
+ dct = super(ThriftJSONDecoder, self).decode(json_str)
34
+ return self._convert(
35
+ dct, TType.STRUCT, (self._thrift_class, self._thrift_class.thrift_spec)
36
+ )
37
+
38
+ def _convert(self, val, ttype, ttype_info):
39
+ if ttype == TType.STRUCT:
40
+ (thrift_class, thrift_spec) = ttype_info
41
+ ret = thrift_class()
42
+ for field in thrift_spec:
43
+ if field is None:
44
+ continue
45
+ (_, field_ttype, field_name, field_ttype_info, dummy) = field
46
+ if field_name not in val:
47
+ continue
48
+ converted_val = self._convert(val[field_name], field_ttype, field_ttype_info)
49
+ setattr(ret, field_name, converted_val)
50
+ elif ttype == TType.LIST:
51
+ (element_ttype, element_ttype_info, _) = ttype_info
52
+ ret = [self._convert(x, element_ttype, element_ttype_info) for x in val]
53
+ elif ttype == TType.SET:
54
+ (element_ttype, element_ttype_info) = ttype_info
55
+ ret = set([self._convert(x, element_ttype, element_ttype_info) for x in val])
56
+ elif ttype == TType.MAP:
57
+ (key_ttype, key_ttype_info, val_ttype, val_ttype_info, _) = ttype_info
58
+ ret = dict(
59
+ [
60
+ (
61
+ self._convert(k, key_ttype, key_ttype_info),
62
+ self._convert(v, val_ttype, val_ttype_info),
63
+ )
64
+ for (k, v) in val.items()
65
+ ]
66
+ )
67
+ elif ttype == TType.STRING:
68
+ ret = str(val)
69
+ elif ttype == TType.DOUBLE:
70
+ ret = float(val)
71
+ elif ttype == TType.I64:
72
+ ret = int(val)
73
+ elif ttype == TType.I32 or ttype == TType.I16 or ttype == TType.BYTE:
74
+ ret = int(val)
75
+ elif ttype == TType.BOOL:
76
+ ret = bool(val)
77
+ else:
78
+ raise TypeError("Unrecognized thrift field type: %d" % ttype)
79
+ return ret
80
+
81
+
82
+ def json2thrift(json_str, thrift_class):
83
+ return json.loads(json_str, cls=ThriftJSONDecoder, thrift_class=thrift_class)
84
+
85
+
86
+ def json2binary(json_str, thrift_class):
87
+ thrift = json2thrift(json_str, thrift_class)
88
+ transport = TMemoryBuffer()
89
+ protocol = TBinaryProtocolAccelerated(transport)
90
+ thrift.write(protocol)
91
+ # Get the raw bytes representing the object in Thrift binary format
92
+ return transport.getvalue()
93
+
94
+
95
+ def file2thrift(path, thrift_class):
96
+ try:
97
+ with open(path, "r") as file:
98
+ return json2thrift(file.read(), thrift_class)
99
+ except json.decoder.JSONDecodeError as e:
100
+ raise Exception(
101
+ f"Error decoding file into a {thrift_class.__name__}: {path}. "
102
+ + f"Please double check that {path} represents a valid {thrift_class.__name__}."
103
+ ) from e
104
+
105
+
106
+ def thrift_simple_json(obj):
107
+ simple = TSerialization.serialize(obj, protocol_factory=TSimpleJSONProtocolFactory())
108
+ parsed = json.loads(simple)
109
+ return json.dumps(parsed, indent=2, sort_keys=True)
@@ -0,0 +1,42 @@
1
+ """
2
+ Utilities for handling config versioning in Chronon.
3
+ """
4
+
5
+ from typing import Optional, Tuple
6
+
7
+
8
+ def parse_name_and_version(name: str) -> Tuple[str, Optional[int]]:
9
+ """Parse config name to extract base name and version.
10
+
11
+ Args:
12
+ name: Config name (e.g., 'config_name__1' or 'config_name')
13
+
14
+ Returns:
15
+ Tuple of (base_name, version) where version is None if no version suffix
16
+ """
17
+ if "__" in name:
18
+ parts = name.rsplit("__", 1)
19
+ if len(parts) == 2 and parts[1].isdigit():
20
+ return parts[0], int(parts[1])
21
+ return name, None
22
+
23
+
24
+ def is_version_change(deleted_name: str, added_name: str) -> bool:
25
+ """Check if a deleted/added pair represents a version change.
26
+
27
+ Args:
28
+ deleted_name: Name of deleted config
29
+ added_name: Name of added config
30
+
31
+ Returns:
32
+ True if this represents a version bump of the same config
33
+ """
34
+ deleted_base, deleted_version = parse_name_and_version(deleted_name)
35
+ added_base, added_version = parse_name_and_version(added_name)
36
+
37
+ return (
38
+ deleted_base == added_base
39
+ and deleted_version is not None
40
+ and added_version is not None
41
+ and deleted_version != added_version
42
+ )
@@ -0,0 +1,145 @@
1
+ import subprocess
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ from ai.chronon.cli.logger import get_logger
7
+
8
+ logger = get_logger()
9
+
10
+
11
+ def get_current_branch() -> str:
12
+ try:
13
+ subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL)
14
+
15
+ return (
16
+ subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
17
+ .decode("utf-8")
18
+ .strip()
19
+ )
20
+
21
+ except subprocess.CalledProcessError as e:
22
+ try:
23
+ head_file = Path(".git/HEAD").resolve()
24
+
25
+ if head_file.exists():
26
+ content = head_file.read_text().strip()
27
+
28
+ if content.startswith("ref: refs/heads/"):
29
+ return content.split("/")[-1]
30
+
31
+ except Exception:
32
+ pass
33
+
34
+ print(
35
+ f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else 'Not a git repository or no commits'}",
36
+ file=sys.stderr,
37
+ )
38
+
39
+ raise
40
+
41
+
42
+ def get_fork_point(base_branch: str = "main") -> str:
43
+ try:
44
+ return (
45
+ subprocess.check_output(["git", "merge-base", base_branch, "HEAD"])
46
+ .decode("utf-8")
47
+ .strip()
48
+ )
49
+
50
+ except subprocess.CalledProcessError as e:
51
+ print(
52
+ f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else f'Could not determine fork point from {base_branch}'}",
53
+ file=sys.stderr,
54
+ )
55
+ raise
56
+
57
+
58
+ def get_file_content_at_commit(file_path: str, commit: str) -> Optional[str]:
59
+ try:
60
+ return subprocess.check_output(["git", "show", f"{commit}:{file_path}"]).decode("utf-8")
61
+ except subprocess.CalledProcessError:
62
+ return None
63
+
64
+
65
+ def get_current_file_content(file_path: str) -> Optional[str]:
66
+ try:
67
+ return Path(file_path).read_text()
68
+ except Exception:
69
+ return None
70
+
71
+
72
+ def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[str]:
73
+ path = Path(path).resolve()
74
+ if not path.exists():
75
+ print(f"⛔ Error: Path does not exist: {path}", file=sys.stderr)
76
+ raise ValueError(f"Path does not exist: {path}")
77
+
78
+ try:
79
+ subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL)
80
+ commit_range = f"{commit}..HEAD" if commit else "HEAD"
81
+
82
+ changes = (
83
+ subprocess.check_output(["git", "diff", "--name-only", commit_range, "--", str(path)])
84
+ .decode("utf-8")
85
+ .splitlines()
86
+ )
87
+
88
+ except subprocess.CalledProcessError:
89
+ changes = (
90
+ subprocess.check_output(["git", "diff", "--name-only", "--", str(path)])
91
+ .decode("utf-8")
92
+ .splitlines()
93
+ )
94
+
95
+ try:
96
+ untracked = (
97
+ subprocess.check_output(
98
+ ["git", "ls-files", "--others", "--exclude-standard", str(path)]
99
+ )
100
+ .decode("utf-8")
101
+ .splitlines()
102
+ )
103
+
104
+ changes.extend(untracked)
105
+
106
+ except subprocess.CalledProcessError as e:
107
+ print(
108
+ f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else 'Failed to get untracked files'}",
109
+ file=sys.stderr,
110
+ )
111
+
112
+ raise
113
+
114
+ logger.info(f"Changes since commit: {changes}")
115
+
116
+ return [change for change in changes if change.strip()]
117
+
118
+
119
+ def get_changes_since_fork(path: str, base_branch: str = "main") -> List[str]:
120
+ try:
121
+ fork_point = get_fork_point(base_branch)
122
+ path = Path(path).resolve()
123
+
124
+ # Get all potential changes
125
+ changed_files = set(get_changes_since_commit(str(path), fork_point))
126
+
127
+ # Filter out files that are identical to fork point
128
+ real_changes = []
129
+ for file in changed_files:
130
+ fork_content = get_file_content_at_commit(file, fork_point)
131
+ current_content = get_current_file_content(file)
132
+
133
+ if fork_content != current_content:
134
+ real_changes.append(file)
135
+
136
+ logger.info(f"Changes since fork: {real_changes}")
137
+
138
+ return real_changes
139
+
140
+ except subprocess.CalledProcessError as e:
141
+ print(
142
+ f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else f'Failed to get changes since fork from {base_branch}'}",
143
+ file=sys.stderr,
144
+ )
145
+ raise
@@ -0,0 +1,59 @@
1
+ import logging
2
+ import sys
3
+ from datetime import datetime
4
+
5
+ TIME_COLOR = "\033[36m" # Cyan
6
+ LEVEL_COLORS = {
7
+ logging.DEBUG: "\033[36m", # Cyan
8
+ logging.INFO: "\033[32m", # Green
9
+ logging.WARNING: "\033[33m", # Yellow
10
+ logging.ERROR: "\033[31m", # Red
11
+ logging.CRITICAL: "\033[41m", # White on Red
12
+ }
13
+ FILE_COLOR = "\033[35m" # Purple
14
+ RESET = "\033[0m"
15
+
16
+
17
+ class ColorFormatter(logging.Formatter):
18
+ def format(self, record):
19
+ time_str = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
20
+ level_color = LEVEL_COLORS.get(record.levelno)
21
+
22
+ return (
23
+ f"{TIME_COLOR}{time_str}{RESET} "
24
+ f"{level_color}{record.levelname}{RESET} "
25
+ f"{FILE_COLOR}{record.filename}:{record.lineno}{RESET} - "
26
+ f"{record.getMessage()}"
27
+ )
28
+
29
+
30
+ def get_logger(log_level=logging.INFO):
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # no need to reset if a handler already exists
34
+ if not logger.hasHandlers():
35
+ handler = logging.StreamHandler(sys.stdout)
36
+ handler.setFormatter(ColorFormatter())
37
+
38
+ logger.addHandler(handler)
39
+ logger.setLevel(log_level)
40
+
41
+ return logger
42
+
43
+
44
+ def red(text):
45
+ return f"\033[1;91m{text}\033[0m"
46
+
47
+
48
+ def green(text):
49
+ return f"\033[1;92m{text}\033[0m"
50
+
51
+
52
+ def require(cond, message):
53
+ if not cond:
54
+ print(f"X: {message}")
55
+ sys.exit(1)
56
+
57
+
58
+ def done(cond, message):
59
+ print(f"DONE: {message}")
@@ -0,0 +1,3 @@
1
+ AIRFLOW_DEPENDENCIES_KEY = "airflowDependencies"
2
+ AIRFLOW_LABEL_DEPENDENCIES_KEY = "airflowLabelDependencies"
3
+ PARTITION_COLUMN_KEY = "spark.chronon.partition.column"