awx-zipline-ai 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- agent/__init__.py +1 -0
- agent/constants.py +15 -0
- agent/ttypes.py +1684 -0
- ai/__init__.py +0 -0
- ai/chronon/__init__.py +0 -0
- ai/chronon/airflow_helpers.py +248 -0
- ai/chronon/cli/__init__.py +0 -0
- ai/chronon/cli/compile/__init__.py +0 -0
- ai/chronon/cli/compile/column_hashing.py +336 -0
- ai/chronon/cli/compile/compile_context.py +173 -0
- ai/chronon/cli/compile/compiler.py +183 -0
- ai/chronon/cli/compile/conf_validator.py +742 -0
- ai/chronon/cli/compile/display/__init__.py +0 -0
- ai/chronon/cli/compile/display/class_tracker.py +102 -0
- ai/chronon/cli/compile/display/compile_status.py +95 -0
- ai/chronon/cli/compile/display/compiled_obj.py +12 -0
- ai/chronon/cli/compile/display/console.py +3 -0
- ai/chronon/cli/compile/display/diff_result.py +111 -0
- ai/chronon/cli/compile/fill_templates.py +35 -0
- ai/chronon/cli/compile/parse_configs.py +134 -0
- ai/chronon/cli/compile/parse_teams.py +242 -0
- ai/chronon/cli/compile/serializer.py +109 -0
- ai/chronon/cli/compile/version_utils.py +42 -0
- ai/chronon/cli/git_utils.py +145 -0
- ai/chronon/cli/logger.py +59 -0
- ai/chronon/constants.py +3 -0
- ai/chronon/group_by.py +692 -0
- ai/chronon/join.py +580 -0
- ai/chronon/logger.py +23 -0
- ai/chronon/model.py +40 -0
- ai/chronon/query.py +126 -0
- ai/chronon/repo/__init__.py +39 -0
- ai/chronon/repo/aws.py +284 -0
- ai/chronon/repo/cluster.py +136 -0
- ai/chronon/repo/compile.py +62 -0
- ai/chronon/repo/constants.py +164 -0
- ai/chronon/repo/default_runner.py +269 -0
- ai/chronon/repo/explore.py +418 -0
- ai/chronon/repo/extract_objects.py +134 -0
- ai/chronon/repo/gcp.py +586 -0
- ai/chronon/repo/gitpython_utils.py +15 -0
- ai/chronon/repo/hub_runner.py +261 -0
- ai/chronon/repo/hub_uploader.py +109 -0
- ai/chronon/repo/init.py +60 -0
- ai/chronon/repo/join_backfill.py +119 -0
- ai/chronon/repo/run.py +296 -0
- ai/chronon/repo/serializer.py +133 -0
- ai/chronon/repo/team_json_utils.py +46 -0
- ai/chronon/repo/utils.py +481 -0
- ai/chronon/repo/zipline.py +35 -0
- ai/chronon/repo/zipline_hub.py +277 -0
- ai/chronon/resources/__init__.py +0 -0
- ai/chronon/resources/gcp/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
- ai/chronon/resources/gcp/joins/__init__.py +0 -0
- ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
- ai/chronon/resources/gcp/joins/test/data.py +26 -0
- ai/chronon/resources/gcp/sources/__init__.py +0 -0
- ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
- ai/chronon/resources/gcp/sources/test/data.py +26 -0
- ai/chronon/resources/gcp/teams.py +58 -0
- ai/chronon/source.py +86 -0
- ai/chronon/staging_query.py +226 -0
- ai/chronon/types.py +58 -0
- ai/chronon/utils.py +510 -0
- ai/chronon/windows.py +48 -0
- awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
- awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
- awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
- awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
- awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
- gen_thrift/__init__.py +0 -0
- gen_thrift/api/__init__.py +1 -0
- gen_thrift/api/constants.py +15 -0
- gen_thrift/api/ttypes.py +3754 -0
- gen_thrift/common/__init__.py +1 -0
- gen_thrift/common/constants.py +15 -0
- gen_thrift/common/ttypes.py +1814 -0
- gen_thrift/eval/__init__.py +1 -0
- gen_thrift/eval/constants.py +15 -0
- gen_thrift/eval/ttypes.py +660 -0
- gen_thrift/fetcher/__init__.py +1 -0
- gen_thrift/fetcher/constants.py +15 -0
- gen_thrift/fetcher/ttypes.py +127 -0
- gen_thrift/hub/__init__.py +1 -0
- gen_thrift/hub/constants.py +15 -0
- gen_thrift/hub/ttypes.py +1109 -0
- gen_thrift/observability/__init__.py +1 -0
- gen_thrift/observability/constants.py +15 -0
- gen_thrift/observability/ttypes.py +2355 -0
- gen_thrift/planner/__init__.py +1 -0
- gen_thrift/planner/constants.py +15 -0
- gen_thrift/planner/ttypes.py +1967 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import importlib.util
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Dict, Optional, Union
|
|
8
|
+
|
|
9
|
+
from gen_thrift.api.ttypes import Join, MetaData, Team
|
|
10
|
+
from gen_thrift.common.ttypes import (
|
|
11
|
+
ClusterConfigProperties,
|
|
12
|
+
ConfigProperties,
|
|
13
|
+
EnvironmentVariables,
|
|
14
|
+
ExecutionInfo,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from ai.chronon.cli.compile.display.console import console
|
|
18
|
+
from ai.chronon.cli.logger import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger()
|
|
21
|
+
|
|
22
|
+
_DEFAULT_CONF_TEAM = "default"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def import_module_from_file(file_path):
|
|
26
|
+
# Get the module name from the file path (without .py extension)
|
|
27
|
+
module_name = file_path.split("/")[-1].replace(".py", "")
|
|
28
|
+
|
|
29
|
+
# Create the module spec
|
|
30
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
31
|
+
|
|
32
|
+
# Create the module based on the spec
|
|
33
|
+
module = importlib.util.module_from_spec(spec)
|
|
34
|
+
|
|
35
|
+
# Add the module to sys.modules
|
|
36
|
+
sys.modules[module_name] = module
|
|
37
|
+
|
|
38
|
+
# Execute the module
|
|
39
|
+
spec.loader.exec_module(module)
|
|
40
|
+
|
|
41
|
+
return module
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_teams(conf_root: str, print: bool = True) -> Dict[str, Team]:
|
|
45
|
+
teams_file = os.path.join(conf_root, "teams.py")
|
|
46
|
+
|
|
47
|
+
assert os.path.exists(teams_file), (
|
|
48
|
+
f"Team config file: {teams_file} not found. You might be running this from the wrong directory."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
team_module = import_module_from_file(teams_file)
|
|
52
|
+
|
|
53
|
+
assert team_module is not None, (
|
|
54
|
+
f"Team config file {teams_file} is not on the PYTHONPATH. You might need to add the your config "
|
|
55
|
+
f"directory to the PYTHONPATH."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
team_dict = {}
|
|
59
|
+
|
|
60
|
+
if print:
|
|
61
|
+
console.print(f"Pulling configuration from [cyan italic]{teams_file}[/cyan italic]")
|
|
62
|
+
|
|
63
|
+
for name, obj in team_module.__dict__.items():
|
|
64
|
+
if isinstance(obj, Team):
|
|
65
|
+
obj.name = name
|
|
66
|
+
team_dict[name] = obj
|
|
67
|
+
|
|
68
|
+
return team_dict
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def update_metadata(obj: Any, team_dict: Dict[str, Team]):
|
|
72
|
+
assert obj is not None, "Cannot update metadata None object"
|
|
73
|
+
|
|
74
|
+
metadata = obj.metaData
|
|
75
|
+
|
|
76
|
+
assert obj.metaData is not None, "Cannot update empty metadata"
|
|
77
|
+
|
|
78
|
+
name = obj.metaData.name
|
|
79
|
+
team = obj.metaData.team
|
|
80
|
+
|
|
81
|
+
assert team is not None, (
|
|
82
|
+
f"Team name is required in metadata for {name}. This usually set by compiler. Internal error."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
assert team in team_dict, f"Team '{team}' not found in teams.py. Please add an entry 🙏"
|
|
86
|
+
|
|
87
|
+
assert _DEFAULT_CONF_TEAM in team_dict, (
|
|
88
|
+
f"'{_DEFAULT_CONF_TEAM}' team not found in teams.py, please add an entry 🙏."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Only set the outputNamespace if it hasn't been set already
|
|
92
|
+
if not metadata.outputNamespace:
|
|
93
|
+
metadata.outputNamespace = team_dict[team].outputNamespace
|
|
94
|
+
|
|
95
|
+
if isinstance(obj, Join):
|
|
96
|
+
join_namespace = obj.metaData.outputNamespace
|
|
97
|
+
|
|
98
|
+
# set the metadata for each join part and labelParts
|
|
99
|
+
def set_group_by_metadata(join_part_gb, output_namespace):
|
|
100
|
+
if join_part_gb is not None:
|
|
101
|
+
if join_part_gb.metaData:
|
|
102
|
+
# Only set the outputNamespace if it hasn't been set already
|
|
103
|
+
if not join_part_gb.metaData.outputNamespace:
|
|
104
|
+
join_part_gb.metaData.outputNamespace = output_namespace
|
|
105
|
+
else:
|
|
106
|
+
# If there's no metaData at all, create it and set outputNamespace
|
|
107
|
+
join_part_gb.metaData = MetaData()
|
|
108
|
+
join_part_gb.metaData.outputNamespace = output_namespace
|
|
109
|
+
|
|
110
|
+
if obj.joinParts:
|
|
111
|
+
for jp in obj.joinParts or []:
|
|
112
|
+
jp.useLongNames = obj.useLongNames
|
|
113
|
+
set_group_by_metadata(jp.groupBy, join_namespace)
|
|
114
|
+
|
|
115
|
+
if obj.labelParts:
|
|
116
|
+
for lb in obj.labelParts.labels or []:
|
|
117
|
+
lb.useLongNames = obj.useLongNames
|
|
118
|
+
set_group_by_metadata(lb.groupBy, join_namespace)
|
|
119
|
+
|
|
120
|
+
if metadata.executionInfo is None:
|
|
121
|
+
metadata.executionInfo = ExecutionInfo()
|
|
122
|
+
|
|
123
|
+
merge_team_execution_info(metadata, team_dict, team)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def merge_team_execution_info(metadata: MetaData, team_dict: Dict[str, Team], team_name: str):
|
|
127
|
+
default_team = team_dict.get(_DEFAULT_CONF_TEAM)
|
|
128
|
+
if not metadata.executionInfo:
|
|
129
|
+
metadata.executionInfo = ExecutionInfo()
|
|
130
|
+
|
|
131
|
+
metadata.executionInfo.env = _merge_mode_maps(
|
|
132
|
+
default_team.env if default_team else {},
|
|
133
|
+
team_dict[team_name].env,
|
|
134
|
+
metadata.executionInfo.env,
|
|
135
|
+
env_or_config_attribute=EnvOrConfigAttribute.ENV,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
metadata.executionInfo.conf = _merge_mode_maps(
|
|
139
|
+
default_team.conf if default_team else {},
|
|
140
|
+
team_dict[team_name].conf,
|
|
141
|
+
metadata.executionInfo.conf,
|
|
142
|
+
env_or_config_attribute=EnvOrConfigAttribute.CONFIG,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
metadata.executionInfo.clusterConf = _merge_mode_maps(
|
|
146
|
+
default_team.clusterConf if default_team else {},
|
|
147
|
+
team_dict[team_name].clusterConf,
|
|
148
|
+
metadata.executionInfo.clusterConf,
|
|
149
|
+
env_or_config_attribute=EnvOrConfigAttribute.CLUSTER_CONFIG,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _merge_maps(*maps: Optional[Dict[str, str]]):
|
|
154
|
+
"""
|
|
155
|
+
Merges multiple maps into one - with the later maps overriding the earlier ones.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
result = {}
|
|
159
|
+
|
|
160
|
+
for m in maps:
|
|
161
|
+
if m is None:
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
for key, value in m.items():
|
|
165
|
+
result[key] = value
|
|
166
|
+
|
|
167
|
+
return result
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class EnvOrConfigAttribute(str, Enum):
|
|
171
|
+
ENV = "modeEnvironments"
|
|
172
|
+
CONFIG = "modeConfigs"
|
|
173
|
+
CLUSTER_CONFIG = "modeClusterConfigs"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _merge_mode_maps(
|
|
177
|
+
*mode_maps: Optional[Union[EnvironmentVariables, ConfigProperties, ClusterConfigProperties]],
|
|
178
|
+
env_or_config_attribute: EnvOrConfigAttribute,
|
|
179
|
+
):
|
|
180
|
+
"""
|
|
181
|
+
Merges multiple environment variables into one - with the later maps overriding the earlier ones.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
# Merge `common` to each individual mode map. Creates a new map
|
|
185
|
+
def push_common_to_modes(
|
|
186
|
+
mode_map: Union[EnvironmentVariables, ConfigProperties], mode_key: EnvOrConfigAttribute
|
|
187
|
+
):
|
|
188
|
+
final_mode_map = deepcopy(mode_map)
|
|
189
|
+
common = final_mode_map.common
|
|
190
|
+
modes = getattr(final_mode_map, mode_key)
|
|
191
|
+
|
|
192
|
+
if modes:
|
|
193
|
+
for _ in modes:
|
|
194
|
+
modes[_] = _merge_maps(common, modes[_])
|
|
195
|
+
|
|
196
|
+
return final_mode_map
|
|
197
|
+
|
|
198
|
+
filtered_mode_maps = [m for m in mode_maps if m]
|
|
199
|
+
|
|
200
|
+
# Initialize the result with the first mode map
|
|
201
|
+
result = None
|
|
202
|
+
|
|
203
|
+
if len(filtered_mode_maps) >= 1:
|
|
204
|
+
result = push_common_to_modes(filtered_mode_maps[0], env_or_config_attribute)
|
|
205
|
+
|
|
206
|
+
# Merge each new mode map into the result
|
|
207
|
+
for m in filtered_mode_maps[1:]:
|
|
208
|
+
# We want to prepare the individual modes with `common` in incoming_mode_map
|
|
209
|
+
incoming_mode_map = push_common_to_modes(m, env_or_config_attribute)
|
|
210
|
+
|
|
211
|
+
# create new common
|
|
212
|
+
incoming_common = incoming_mode_map.common
|
|
213
|
+
new_common = _merge_maps(result.common, incoming_common)
|
|
214
|
+
result.common = new_common
|
|
215
|
+
|
|
216
|
+
current_modes = getattr(result, env_or_config_attribute)
|
|
217
|
+
incoming_modes = getattr(incoming_mode_map, env_or_config_attribute)
|
|
218
|
+
|
|
219
|
+
current_modes_keys = list(current_modes.keys()) if current_modes else []
|
|
220
|
+
incoming_modes_keys = list(incoming_modes.keys()) if incoming_modes else []
|
|
221
|
+
|
|
222
|
+
all_modes_keys = list(set(current_modes_keys + incoming_modes_keys))
|
|
223
|
+
|
|
224
|
+
for mode in all_modes_keys:
|
|
225
|
+
current_mode = current_modes.get(mode, {}) if current_modes else {}
|
|
226
|
+
|
|
227
|
+
# if the incoming_mode is not found, we NEED to default to incoming_common
|
|
228
|
+
incoming_mode = (
|
|
229
|
+
incoming_modes.get(mode, incoming_common) if incoming_modes else incoming_common
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# first to last with later ones overriding the earlier ones
|
|
233
|
+
# common -> current mode level -> incoming mode level
|
|
234
|
+
|
|
235
|
+
new_mode = _merge_maps(new_common, current_mode, incoming_mode)
|
|
236
|
+
|
|
237
|
+
if current_modes is None:
|
|
238
|
+
current_modes = {}
|
|
239
|
+
|
|
240
|
+
current_modes[mode] = new_mode
|
|
241
|
+
|
|
242
|
+
return result
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright (C) 2023 The Chronon Authors.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
from thrift import TSerialization
|
|
18
|
+
from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
|
|
19
|
+
from thrift.protocol.TJSONProtocol import TSimpleJSONProtocolFactory
|
|
20
|
+
from thrift.Thrift import TType
|
|
21
|
+
from thrift.transport.TTransport import TMemoryBuffer
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ThriftJSONDecoder(json.JSONDecoder):
|
|
25
|
+
def __init__(self, *args, **kwargs):
|
|
26
|
+
self._thrift_class = kwargs.pop("thrift_class")
|
|
27
|
+
super(ThriftJSONDecoder, self).__init__(*args, **kwargs)
|
|
28
|
+
|
|
29
|
+
def decode(self, json_str):
|
|
30
|
+
if isinstance(json_str, dict):
|
|
31
|
+
dct = json_str
|
|
32
|
+
else:
|
|
33
|
+
dct = super(ThriftJSONDecoder, self).decode(json_str)
|
|
34
|
+
return self._convert(
|
|
35
|
+
dct, TType.STRUCT, (self._thrift_class, self._thrift_class.thrift_spec)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def _convert(self, val, ttype, ttype_info):
|
|
39
|
+
if ttype == TType.STRUCT:
|
|
40
|
+
(thrift_class, thrift_spec) = ttype_info
|
|
41
|
+
ret = thrift_class()
|
|
42
|
+
for field in thrift_spec:
|
|
43
|
+
if field is None:
|
|
44
|
+
continue
|
|
45
|
+
(_, field_ttype, field_name, field_ttype_info, dummy) = field
|
|
46
|
+
if field_name not in val:
|
|
47
|
+
continue
|
|
48
|
+
converted_val = self._convert(val[field_name], field_ttype, field_ttype_info)
|
|
49
|
+
setattr(ret, field_name, converted_val)
|
|
50
|
+
elif ttype == TType.LIST:
|
|
51
|
+
(element_ttype, element_ttype_info, _) = ttype_info
|
|
52
|
+
ret = [self._convert(x, element_ttype, element_ttype_info) for x in val]
|
|
53
|
+
elif ttype == TType.SET:
|
|
54
|
+
(element_ttype, element_ttype_info) = ttype_info
|
|
55
|
+
ret = set([self._convert(x, element_ttype, element_ttype_info) for x in val])
|
|
56
|
+
elif ttype == TType.MAP:
|
|
57
|
+
(key_ttype, key_ttype_info, val_ttype, val_ttype_info, _) = ttype_info
|
|
58
|
+
ret = dict(
|
|
59
|
+
[
|
|
60
|
+
(
|
|
61
|
+
self._convert(k, key_ttype, key_ttype_info),
|
|
62
|
+
self._convert(v, val_ttype, val_ttype_info),
|
|
63
|
+
)
|
|
64
|
+
for (k, v) in val.items()
|
|
65
|
+
]
|
|
66
|
+
)
|
|
67
|
+
elif ttype == TType.STRING:
|
|
68
|
+
ret = str(val)
|
|
69
|
+
elif ttype == TType.DOUBLE:
|
|
70
|
+
ret = float(val)
|
|
71
|
+
elif ttype == TType.I64:
|
|
72
|
+
ret = int(val)
|
|
73
|
+
elif ttype == TType.I32 or ttype == TType.I16 or ttype == TType.BYTE:
|
|
74
|
+
ret = int(val)
|
|
75
|
+
elif ttype == TType.BOOL:
|
|
76
|
+
ret = bool(val)
|
|
77
|
+
else:
|
|
78
|
+
raise TypeError("Unrecognized thrift field type: %d" % ttype)
|
|
79
|
+
return ret
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def json2thrift(json_str, thrift_class):
|
|
83
|
+
return json.loads(json_str, cls=ThriftJSONDecoder, thrift_class=thrift_class)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def json2binary(json_str, thrift_class):
|
|
87
|
+
thrift = json2thrift(json_str, thrift_class)
|
|
88
|
+
transport = TMemoryBuffer()
|
|
89
|
+
protocol = TBinaryProtocolAccelerated(transport)
|
|
90
|
+
thrift.write(protocol)
|
|
91
|
+
# Get the raw bytes representing the object in Thrift binary format
|
|
92
|
+
return transport.getvalue()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def file2thrift(path, thrift_class):
|
|
96
|
+
try:
|
|
97
|
+
with open(path, "r") as file:
|
|
98
|
+
return json2thrift(file.read(), thrift_class)
|
|
99
|
+
except json.decoder.JSONDecodeError as e:
|
|
100
|
+
raise Exception(
|
|
101
|
+
f"Error decoding file into a {thrift_class.__name__}: {path}. "
|
|
102
|
+
+ f"Please double check that {path} represents a valid {thrift_class.__name__}."
|
|
103
|
+
) from e
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def thrift_simple_json(obj):
|
|
107
|
+
simple = TSerialization.serialize(obj, protocol_factory=TSimpleJSONProtocolFactory())
|
|
108
|
+
parsed = json.loads(simple)
|
|
109
|
+
return json.dumps(parsed, indent=2, sort_keys=True)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for handling config versioning in Chronon.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_name_and_version(name: str) -> Tuple[str, Optional[int]]:
|
|
9
|
+
"""Parse config name to extract base name and version.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
name: Config name (e.g., 'config_name__1' or 'config_name')
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Tuple of (base_name, version) where version is None if no version suffix
|
|
16
|
+
"""
|
|
17
|
+
if "__" in name:
|
|
18
|
+
parts = name.rsplit("__", 1)
|
|
19
|
+
if len(parts) == 2 and parts[1].isdigit():
|
|
20
|
+
return parts[0], int(parts[1])
|
|
21
|
+
return name, None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_version_change(deleted_name: str, added_name: str) -> bool:
|
|
25
|
+
"""Check if a deleted/added pair represents a version change.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
deleted_name: Name of deleted config
|
|
29
|
+
added_name: Name of added config
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
True if this represents a version bump of the same config
|
|
33
|
+
"""
|
|
34
|
+
deleted_base, deleted_version = parse_name_and_version(deleted_name)
|
|
35
|
+
added_base, added_version = parse_name_and_version(added_name)
|
|
36
|
+
|
|
37
|
+
return (
|
|
38
|
+
deleted_base == added_base
|
|
39
|
+
and deleted_version is not None
|
|
40
|
+
and added_version is not None
|
|
41
|
+
and deleted_version != added_version
|
|
42
|
+
)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from ai.chronon.cli.logger import get_logger
|
|
7
|
+
|
|
8
|
+
logger = get_logger()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_current_branch() -> str:
|
|
12
|
+
try:
|
|
13
|
+
subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL)
|
|
14
|
+
|
|
15
|
+
return (
|
|
16
|
+
subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
|
|
17
|
+
.decode("utf-8")
|
|
18
|
+
.strip()
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
except subprocess.CalledProcessError as e:
|
|
22
|
+
try:
|
|
23
|
+
head_file = Path(".git/HEAD").resolve()
|
|
24
|
+
|
|
25
|
+
if head_file.exists():
|
|
26
|
+
content = head_file.read_text().strip()
|
|
27
|
+
|
|
28
|
+
if content.startswith("ref: refs/heads/"):
|
|
29
|
+
return content.split("/")[-1]
|
|
30
|
+
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
print(
|
|
35
|
+
f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else 'Not a git repository or no commits'}",
|
|
36
|
+
file=sys.stderr,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
raise
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_fork_point(base_branch: str = "main") -> str:
|
|
43
|
+
try:
|
|
44
|
+
return (
|
|
45
|
+
subprocess.check_output(["git", "merge-base", base_branch, "HEAD"])
|
|
46
|
+
.decode("utf-8")
|
|
47
|
+
.strip()
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
except subprocess.CalledProcessError as e:
|
|
51
|
+
print(
|
|
52
|
+
f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else f'Could not determine fork point from {base_branch}'}",
|
|
53
|
+
file=sys.stderr,
|
|
54
|
+
)
|
|
55
|
+
raise
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_file_content_at_commit(file_path: str, commit: str) -> Optional[str]:
|
|
59
|
+
try:
|
|
60
|
+
return subprocess.check_output(["git", "show", f"{commit}:{file_path}"]).decode("utf-8")
|
|
61
|
+
except subprocess.CalledProcessError:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_current_file_content(file_path: str) -> Optional[str]:
|
|
66
|
+
try:
|
|
67
|
+
return Path(file_path).read_text()
|
|
68
|
+
except Exception:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[str]:
|
|
73
|
+
path = Path(path).resolve()
|
|
74
|
+
if not path.exists():
|
|
75
|
+
print(f"⛔ Error: Path does not exist: {path}", file=sys.stderr)
|
|
76
|
+
raise ValueError(f"Path does not exist: {path}")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL)
|
|
80
|
+
commit_range = f"{commit}..HEAD" if commit else "HEAD"
|
|
81
|
+
|
|
82
|
+
changes = (
|
|
83
|
+
subprocess.check_output(["git", "diff", "--name-only", commit_range, "--", str(path)])
|
|
84
|
+
.decode("utf-8")
|
|
85
|
+
.splitlines()
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
except subprocess.CalledProcessError:
|
|
89
|
+
changes = (
|
|
90
|
+
subprocess.check_output(["git", "diff", "--name-only", "--", str(path)])
|
|
91
|
+
.decode("utf-8")
|
|
92
|
+
.splitlines()
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
untracked = (
|
|
97
|
+
subprocess.check_output(
|
|
98
|
+
["git", "ls-files", "--others", "--exclude-standard", str(path)]
|
|
99
|
+
)
|
|
100
|
+
.decode("utf-8")
|
|
101
|
+
.splitlines()
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
changes.extend(untracked)
|
|
105
|
+
|
|
106
|
+
except subprocess.CalledProcessError as e:
|
|
107
|
+
print(
|
|
108
|
+
f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else 'Failed to get untracked files'}",
|
|
109
|
+
file=sys.stderr,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
raise
|
|
113
|
+
|
|
114
|
+
logger.info(f"Changes since commit: {changes}")
|
|
115
|
+
|
|
116
|
+
return [change for change in changes if change.strip()]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_changes_since_fork(path: str, base_branch: str = "main") -> List[str]:
|
|
120
|
+
try:
|
|
121
|
+
fork_point = get_fork_point(base_branch)
|
|
122
|
+
path = Path(path).resolve()
|
|
123
|
+
|
|
124
|
+
# Get all potential changes
|
|
125
|
+
changed_files = set(get_changes_since_commit(str(path), fork_point))
|
|
126
|
+
|
|
127
|
+
# Filter out files that are identical to fork point
|
|
128
|
+
real_changes = []
|
|
129
|
+
for file in changed_files:
|
|
130
|
+
fork_content = get_file_content_at_commit(file, fork_point)
|
|
131
|
+
current_content = get_current_file_content(file)
|
|
132
|
+
|
|
133
|
+
if fork_content != current_content:
|
|
134
|
+
real_changes.append(file)
|
|
135
|
+
|
|
136
|
+
logger.info(f"Changes since fork: {real_changes}")
|
|
137
|
+
|
|
138
|
+
return real_changes
|
|
139
|
+
|
|
140
|
+
except subprocess.CalledProcessError as e:
|
|
141
|
+
print(
|
|
142
|
+
f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else f'Failed to get changes since fork from {base_branch}'}",
|
|
143
|
+
file=sys.stderr,
|
|
144
|
+
)
|
|
145
|
+
raise
|
ai/chronon/cli/logger.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
TIME_COLOR = "\033[36m" # Cyan
|
|
6
|
+
LEVEL_COLORS = {
|
|
7
|
+
logging.DEBUG: "\033[36m", # Cyan
|
|
8
|
+
logging.INFO: "\033[32m", # Green
|
|
9
|
+
logging.WARNING: "\033[33m", # Yellow
|
|
10
|
+
logging.ERROR: "\033[31m", # Red
|
|
11
|
+
logging.CRITICAL: "\033[41m", # White on Red
|
|
12
|
+
}
|
|
13
|
+
FILE_COLOR = "\033[35m" # Purple
|
|
14
|
+
RESET = "\033[0m"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ColorFormatter(logging.Formatter):
|
|
18
|
+
def format(self, record):
|
|
19
|
+
time_str = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
|
|
20
|
+
level_color = LEVEL_COLORS.get(record.levelno)
|
|
21
|
+
|
|
22
|
+
return (
|
|
23
|
+
f"{TIME_COLOR}{time_str}{RESET} "
|
|
24
|
+
f"{level_color}{record.levelname}{RESET} "
|
|
25
|
+
f"{FILE_COLOR}{record.filename}:{record.lineno}{RESET} - "
|
|
26
|
+
f"{record.getMessage()}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_logger(log_level=logging.INFO):
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# no need to reset if a handler already exists
|
|
34
|
+
if not logger.hasHandlers():
|
|
35
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
36
|
+
handler.setFormatter(ColorFormatter())
|
|
37
|
+
|
|
38
|
+
logger.addHandler(handler)
|
|
39
|
+
logger.setLevel(log_level)
|
|
40
|
+
|
|
41
|
+
return logger
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def red(text):
|
|
45
|
+
return f"\033[1;91m{text}\033[0m"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def green(text):
|
|
49
|
+
return f"\033[1;92m{text}\033[0m"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def require(cond, message):
|
|
53
|
+
if not cond:
|
|
54
|
+
print(f"X: {message}")
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def done(cond, message):
|
|
59
|
+
print(f"DONE: {message}")
|
ai/chronon/constants.py
ADDED