awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of awx-zipline-ai might be problematic. Click here for more details.

Files changed (96) hide show
  1. agent/ttypes.py +6 -6
  2. ai/chronon/airflow_helpers.py +20 -23
  3. ai/chronon/cli/__init__.py +0 -0
  4. ai/chronon/cli/compile/__init__.py +0 -0
  5. ai/chronon/cli/compile/column_hashing.py +40 -17
  6. ai/chronon/cli/compile/compile_context.py +13 -17
  7. ai/chronon/cli/compile/compiler.py +59 -36
  8. ai/chronon/cli/compile/conf_validator.py +251 -99
  9. ai/chronon/cli/compile/display/__init__.py +0 -0
  10. ai/chronon/cli/compile/display/class_tracker.py +6 -16
  11. ai/chronon/cli/compile/display/compile_status.py +10 -10
  12. ai/chronon/cli/compile/display/diff_result.py +79 -14
  13. ai/chronon/cli/compile/fill_templates.py +3 -8
  14. ai/chronon/cli/compile/parse_configs.py +10 -17
  15. ai/chronon/cli/compile/parse_teams.py +38 -34
  16. ai/chronon/cli/compile/serializer.py +3 -9
  17. ai/chronon/cli/compile/version_utils.py +42 -0
  18. ai/chronon/cli/git_utils.py +2 -13
  19. ai/chronon/cli/logger.py +0 -2
  20. ai/chronon/constants.py +1 -1
  21. ai/chronon/group_by.py +47 -47
  22. ai/chronon/join.py +46 -32
  23. ai/chronon/logger.py +1 -2
  24. ai/chronon/model.py +9 -4
  25. ai/chronon/query.py +2 -2
  26. ai/chronon/repo/__init__.py +1 -2
  27. ai/chronon/repo/aws.py +17 -31
  28. ai/chronon/repo/cluster.py +121 -50
  29. ai/chronon/repo/compile.py +14 -8
  30. ai/chronon/repo/constants.py +1 -1
  31. ai/chronon/repo/default_runner.py +32 -54
  32. ai/chronon/repo/explore.py +70 -73
  33. ai/chronon/repo/extract_objects.py +6 -9
  34. ai/chronon/repo/gcp.py +89 -88
  35. ai/chronon/repo/gitpython_utils.py +3 -2
  36. ai/chronon/repo/hub_runner.py +145 -55
  37. ai/chronon/repo/hub_uploader.py +2 -1
  38. ai/chronon/repo/init.py +12 -5
  39. ai/chronon/repo/join_backfill.py +19 -5
  40. ai/chronon/repo/run.py +42 -39
  41. ai/chronon/repo/serializer.py +4 -12
  42. ai/chronon/repo/utils.py +72 -63
  43. ai/chronon/repo/zipline.py +3 -19
  44. ai/chronon/repo/zipline_hub.py +211 -39
  45. ai/chronon/resources/__init__.py +0 -0
  46. ai/chronon/resources/gcp/__init__.py +0 -0
  47. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  48. ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
  49. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  50. ai/chronon/resources/gcp/joins/test/data.py +4 -8
  51. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  52. ai/chronon/resources/gcp/sources/test/data.py +9 -6
  53. ai/chronon/resources/gcp/teams.py +9 -21
  54. ai/chronon/source.py +2 -4
  55. ai/chronon/staging_query.py +60 -19
  56. ai/chronon/types.py +3 -2
  57. ai/chronon/utils.py +21 -68
  58. ai/chronon/windows.py +2 -4
  59. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/METADATA +47 -24
  60. awx_zipline_ai-0.3.0.dist-info/RECORD +96 -0
  61. awx_zipline_ai-0.3.0.dist-info/top_level.txt +4 -0
  62. gen_thrift/__init__.py +0 -0
  63. {ai/chronon → gen_thrift}/api/ttypes.py +327 -197
  64. {ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
  65. gen_thrift/eval/ttypes.py +660 -0
  66. {ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
  67. {ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
  68. {ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
  69. ai/chronon/eval/__init__.py +0 -122
  70. ai/chronon/eval/query_parsing.py +0 -19
  71. ai/chronon/eval/sample_tables.py +0 -100
  72. ai/chronon/eval/table_scan.py +0 -186
  73. ai/chronon/orchestration/ttypes.py +0 -4406
  74. ai/chronon/resources/gcp/README.md +0 -174
  75. ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
  76. awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
  77. awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
  78. awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
  79. /jars/__init__.py → /__init__.py +0 -0
  80. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/WHEEL +0 -0
  81. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/entry_points.txt +0 -0
  82. {ai/chronon → gen_thrift}/api/__init__.py +0 -0
  83. {ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
  84. {ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
  85. {ai/chronon/api → gen_thrift/common}/constants.py +0 -0
  86. {ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
  87. {ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
  88. {ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
  89. {ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
  90. {ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
  91. {ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
  92. {ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
  93. {ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
  94. {ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
  95. {ai/chronon → gen_thrift}/planner/__init__.py +0 -0
  96. {ai/chronon → gen_thrift}/planner/constants.py +0 -0
@@ -26,14 +26,11 @@ class CompileStatus:
26
26
  if self.use_live:
27
27
  self.live.console.print(msg)
28
28
 
29
- def add_object_update_display(
30
- self, compiled: CompiledObj, obj_type: str = None
31
- ) -> None:
32
-
29
+ def add_object_update_display(self, compiled: CompiledObj, obj_type: str = None) -> None:
33
30
  if compiled.obj_type is not None and obj_type is not None:
34
- assert (
35
- compiled.obj_type == obj_type
36
- ), f"obj_type mismatch: {compiled.obj_type} != {obj_type}"
31
+ assert compiled.obj_type == obj_type, (
32
+ f"obj_type mismatch: {compiled.obj_type} != {obj_type}"
33
+ )
37
34
 
38
35
  if obj_type not in self.cls_to_tracker:
39
36
  self.cls_to_tracker[obj_type] = ClassTracker()
@@ -43,7 +40,6 @@ class CompileStatus:
43
40
  self._update_display()
44
41
 
45
42
  def add_existing_object_update_display(self, existing_obj: CompiledObj) -> None:
46
-
47
43
  obj_type = existing_obj.obj_type
48
44
 
49
45
  if obj_type not in self.cls_to_tracker:
@@ -63,10 +59,14 @@ class CompileStatus:
63
59
  if self.use_live:
64
60
  self.live.stop()
65
61
 
66
- def render(self) -> Text:
62
+ def render(self, ignore_python_errors: bool = False) -> Text:
67
63
  text = Text(overflow="fold", no_wrap=False)
68
64
 
69
65
  for obj_type, tracker in self.cls_to_tracker.items():
66
+ # Skip MetaData section
67
+ if obj_type == "MetaData":
68
+ continue
69
+
70
70
  text.append(f"\n{obj_type}-s:\n", style="cyan")
71
71
 
72
72
  status = tracker.to_status()
@@ -77,7 +77,7 @@ class CompileStatus:
77
77
  if errors:
78
78
  text.append(errors)
79
79
 
80
- diff = tracker.diff()
80
+ diff = tracker.diff(ignore_python_errors)
81
81
  if diff:
82
82
  text.append(diff)
83
83
 
@@ -1,36 +1,101 @@
1
- from typing import List
1
+ from typing import List, Tuple
2
2
 
3
3
  from rich.text import Text
4
4
 
5
+ from ai.chronon.cli.compile.version_utils import parse_name_and_version
5
6
 
6
- class DiffResult:
7
7
 
8
+ class DiffResult:
8
9
  def __init__(self):
9
10
  self.added: List[str] = []
10
11
  self.updated: List[str] = []
12
+ self.version_bumped: List[
13
+ Tuple[str, int, int]
14
+ ] = [] # (base_name, old_version, new_version)
15
+
16
+ def detect_version_changes(self, deleted_names: List[str]) -> Tuple[List[str], List[str]]:
17
+ """Detect version changes between deleted and added names.
18
+
19
+ Returns:
20
+ Tuple of (remaining_deleted, remaining_added) after removing version changes
21
+ """
22
+ # Create mappings of base names to versions
23
+ deleted_base_to_name = {}
24
+ added_base_to_name = {}
25
+
26
+ for name in deleted_names:
27
+ base_name, version = parse_name_and_version(name)
28
+ if version is not None:
29
+ deleted_base_to_name[base_name] = (name, version)
30
+
31
+ for name in self.added:
32
+ base_name, version = parse_name_and_version(name)
33
+ if version is not None:
34
+ added_base_to_name[base_name] = (name, version)
35
+
36
+ # Find version changes
37
+ remaining_deleted = []
38
+ remaining_added = []
39
+
40
+ for name in deleted_names:
41
+ base_name, old_version = parse_name_and_version(name)
42
+ if (
43
+ base_name in added_base_to_name
44
+ and old_version is not None
45
+ and base_name in deleted_base_to_name
46
+ ):
47
+ # This is a version change
48
+ _, new_version = added_base_to_name[base_name]
49
+ self.version_bumped.append((base_name, old_version, new_version))
50
+ else:
51
+ remaining_deleted.append(name)
52
+
53
+ for name in self.added:
54
+ base_name, version = parse_name_and_version(name)
55
+ if not (base_name in deleted_base_to_name and version is not None):
56
+ # This is not part of a version change
57
+ remaining_added.append(name)
58
+
59
+ return remaining_deleted, remaining_added
11
60
 
12
61
  def render(self, deleted_names: List[str], indent=" ") -> Text:
62
+ # Detect version changes first
63
+ remaining_deleted, remaining_added = self.detect_version_changes(deleted_names)
13
64
 
14
65
  def added_signage():
15
- return Text("Added", style="dim green")
66
+ return Text("Added", style="dim green")
16
67
 
17
68
  def updated_signage():
18
- return Text("Updated", style="dim yellow")
69
+ return Text("❗ Changed in place (no version change)", style="dim yellow")
19
70
 
20
71
  def deleted_signage():
21
- return Text("Deleted", style="red")
72
+ return Text("🗑️ Deleted", style="red")
22
73
 
23
- added = [(added_signage(), name) for name in self.added]
74
+ def version_bumped_signage():
75
+ return Text("⬆️ Version changed", style="dim blue")
24
76
 
77
+ added = [(added_signage(), name) for name in remaining_added]
25
78
  updated = [(updated_signage(), name) for name in self.updated]
26
-
27
- result_order = added + updated
28
-
29
- if deleted_names:
30
- deleted = [(deleted_signage(), name) for name in deleted_names]
31
- result_order += deleted
32
-
33
- result_order = sorted(result_order, key=lambda t: t[1])
79
+ version_bumped = [
80
+ (version_bumped_signage(), f"{base_name} (v{old_ver} -> v{new_ver})")
81
+ for base_name, old_ver, new_ver in self.version_bumped
82
+ ]
83
+
84
+ # Put version changes and additions first, changed items at the bottom
85
+ # Sort each group separately to maintain grouping
86
+ version_bumped_sorted = sorted(version_bumped, key=lambda t: t[1])
87
+ added_sorted = sorted(added, key=lambda t: t[1])
88
+ updated_sorted = sorted(updated, key=lambda t: t[1])
89
+
90
+ result_order = version_bumped_sorted + added_sorted
91
+
92
+ if remaining_deleted:
93
+ deleted = [(deleted_signage(), name) for name in remaining_deleted]
94
+ deleted_sorted = sorted(deleted, key=lambda t: t[1])
95
+ result_order += deleted_sorted
96
+
97
+ # Add updated (changed in place) at the very end
98
+ result_order += updated_sorted
34
99
 
35
100
  text = Text(overflow="fold", no_wrap=False)
36
101
  for signage, name in result_order:
@@ -1,26 +1,22 @@
1
+ from gen_thrift.api.ttypes import Join, Team
2
+
1
3
  from ai.chronon import utils
2
- from ai.chronon.api.ttypes import Join, Team
3
4
  from ai.chronon.cli.compile.compile_context import CompileContext
4
5
 
5
6
 
6
7
  def _fill_template(table, obj, namespace):
7
-
8
8
  if table:
9
- table = table.replace(
10
- "{{ logged_table }}", utils.log_table_name(obj, full_name=True)
11
- )
9
+ table = table.replace("{{ logged_table }}", utils.log_table_name(obj, full_name=True))
12
10
  table = table.replace("{{ db }}", namespace)
13
11
 
14
12
  return table
15
13
 
16
14
 
17
15
  def set_templated_values(obj, cls, compile_context: CompileContext):
18
-
19
16
  team_obj: Team = compile_context.teams_dict[obj.team]
20
17
  namespace = team_obj.outputNamespace
21
18
 
22
19
  if cls == Join and obj.bootstrapParts:
23
-
24
20
  for bootstrap in obj.bootstrapParts:
25
21
  bootstrap.table = _fill_template(bootstrap.table, obj, namespace)
26
22
 
@@ -30,7 +26,6 @@ def set_templated_values(obj, cls, compile_context: CompileContext):
30
26
  ]
31
27
 
32
28
  if cls == Join and obj.labelParts:
33
-
34
29
  obj.labelParts.metaData.dependencies = [
35
30
  label_dep.replace(
36
31
  "{{ join_backfill_table }}",
@@ -4,8 +4,9 @@ import importlib
4
4
  import os
5
5
  from typing import Any, List
6
6
 
7
+ from gen_thrift.api.ttypes import GroupBy, Join
8
+
7
9
  from ai.chronon import airflow_helpers
8
- from ai.chronon.api.ttypes import GroupBy, Join
9
10
  from ai.chronon.cli.compile import parse_teams, serializer
10
11
  from ai.chronon.cli.compile.compile_context import CompileContext
11
12
  from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
@@ -13,9 +14,8 @@ from ai.chronon.cli.logger import get_logger
13
14
 
14
15
  logger = get_logger()
15
16
 
16
- def from_folder(
17
- cls: type, input_dir: str, compile_context: CompileContext
18
- ) -> List[CompiledObj]:
17
+
18
+ def from_folder(cls: type, input_dir: str, compile_context: CompileContext) -> List[CompiledObj]:
19
19
  """
20
20
  Recursively consumes a folder, and constructs a map of
21
21
  object qualifier to StagingQuery, GroupBy, or Join
@@ -26,7 +26,6 @@ def from_folder(
26
26
  results = []
27
27
 
28
28
  for f in python_files:
29
-
30
29
  try:
31
30
  results_dict = from_file(f, cls, input_dir)
32
31
 
@@ -55,9 +54,7 @@ def from_folder(
55
54
  )
56
55
  results.append(result)
57
56
 
58
- compile_context.compile_status.add_object_update_display(
59
- result, cls.__name__
60
- )
57
+ compile_context.compile_status.add_object_update_display(result, cls.__name__)
61
58
 
62
59
  except Exception as e:
63
60
  result = CompiledObj(
@@ -71,15 +68,12 @@ def from_folder(
71
68
 
72
69
  results.append(result)
73
70
 
74
- compile_context.compile_status.add_object_update_display(
75
- result, cls.__name__
76
- )
71
+ compile_context.compile_status.add_object_update_display(result, cls.__name__)
77
72
 
78
73
  return results
79
74
 
80
75
 
81
76
  def from_file(file_path: str, cls: type, input_dir: str):
82
-
83
77
  # this is where the python path should have been set to
84
78
  chronon_root = os.path.dirname(input_dir)
85
79
  rel_path = os.path.relpath(file_path, chronon_root)
@@ -96,16 +90,14 @@ def from_file(file_path: str, cls: type, input_dir: str):
96
90
  result = {}
97
91
 
98
92
  for var_name, obj in list(module.__dict__.items()):
99
-
100
93
  if isinstance(obj, cls):
101
-
102
94
  copied_obj = copy.deepcopy(obj)
103
95
 
104
96
  name = f"{mod_path}.{var_name}"
105
-
97
+
106
98
  # Add version suffix if version is set
107
99
  name = name + "__" + str(copied_obj.metaData.version)
108
-
100
+
109
101
  copied_obj.metaData.name = name
110
102
  copied_obj.metaData.team = mod_path.split(".")[0]
111
103
 
@@ -113,6 +105,7 @@ def from_file(file_path: str, cls: type, input_dir: str):
113
105
 
114
106
  return result
115
107
 
108
+
116
109
  def populate_column_hashes(obj: Any):
117
110
  """
118
111
  Populate the columnHashes field in the object's metadata with semantic hashes
@@ -135,7 +128,7 @@ def populate_column_hashes(obj: Any):
135
128
  obj.metaData.columnHashes = column_hashes
136
129
 
137
130
  if obj.joinParts:
138
- for jp in (obj.joinParts or []):
131
+ for jp in obj.joinParts or []:
139
132
  group_by = jp.groupBy
140
133
  group_by_hashes = compute_group_by_columns_hashes(group_by)
141
134
  group_by.metaData.columnHashes = group_by_hashes
@@ -6,13 +6,14 @@ from copy import deepcopy
6
6
  from enum import Enum
7
7
  from typing import Any, Dict, Optional, Union
8
8
 
9
- from ai.chronon.api.common.ttypes import (
9
+ from gen_thrift.api.ttypes import Join, MetaData, Team
10
+ from gen_thrift.common.ttypes import (
10
11
  ClusterConfigProperties,
11
12
  ConfigProperties,
12
13
  EnvironmentVariables,
13
14
  ExecutionInfo,
14
15
  )
15
- from ai.chronon.api.ttypes import Join, MetaData, Team
16
+
16
17
  from ai.chronon.cli.compile.display.console import console
17
18
  from ai.chronon.cli.logger import get_logger
18
19
 
@@ -43,9 +44,9 @@ def import_module_from_file(file_path):
43
44
  def load_teams(conf_root: str, print: bool = True) -> Dict[str, Team]:
44
45
  teams_file = os.path.join(conf_root, "teams.py")
45
46
 
46
- assert os.path.exists(
47
- teams_file
48
- ), f"Team config file: {teams_file} not found. You might be running this from the wrong directory."
47
+ assert os.path.exists(teams_file), (
48
+ f"Team config file: {teams_file} not found. You might be running this from the wrong directory."
49
+ )
49
50
 
50
51
  team_module = import_module_from_file(teams_file)
51
52
 
@@ -57,9 +58,7 @@ def load_teams(conf_root: str, print: bool = True) -> Dict[str, Team]:
57
58
  team_dict = {}
58
59
 
59
60
  if print:
60
- console.print(
61
- f"Pulling configuration from [cyan italic]{teams_file}[/cyan italic]"
62
- )
61
+ console.print(f"Pulling configuration from [cyan italic]{teams_file}[/cyan italic]")
63
62
 
64
63
  for name, obj in team_module.__dict__.items():
65
64
  if isinstance(obj, Team):
@@ -79,17 +78,15 @@ def update_metadata(obj: Any, team_dict: Dict[str, Team]):
79
78
  name = obj.metaData.name
80
79
  team = obj.metaData.team
81
80
 
82
- assert (
83
- team is not None
84
- ), f"Team name is required in metadata for {name}. This usually set by compiler. Internal error."
81
+ assert team is not None, (
82
+ f"Team name is required in metadata for {name}. This usually set by compiler. Internal error."
83
+ )
85
84
 
86
- assert (
87
- team in team_dict
88
- ), f"Team '{team}' not found in teams.py. Please add an entry 🙏"
85
+ assert team in team_dict, f"Team '{team}' not found in teams.py. Please add an entry 🙏"
89
86
 
90
- assert (
91
- _DEFAULT_CONF_TEAM in team_dict
92
- ), f"'{_DEFAULT_CONF_TEAM}' team not found in teams.py, please add an entry 🙏."
87
+ assert _DEFAULT_CONF_TEAM in team_dict, (
88
+ f"'{_DEFAULT_CONF_TEAM}' team not found in teams.py, please add an entry 🙏."
89
+ )
93
90
 
94
91
  # Only set the outputNamespace if it hasn't been set already
95
92
  if not metadata.outputNamespace:
@@ -111,12 +108,12 @@ def update_metadata(obj: Any, team_dict: Dict[str, Team]):
111
108
  join_part_gb.metaData.outputNamespace = output_namespace
112
109
 
113
110
  if obj.joinParts:
114
- for jp in (obj.joinParts or []):
111
+ for jp in obj.joinParts or []:
115
112
  jp.useLongNames = obj.useLongNames
116
113
  set_group_by_metadata(jp.groupBy, join_namespace)
117
114
 
118
115
  if obj.labelParts:
119
- for lb in (obj.labelParts.labels or []):
116
+ for lb in obj.labelParts.labels or []:
120
117
  lb.useLongNames = obj.useLongNames
121
118
  set_group_by_metadata(lb.groupBy, join_namespace)
122
119
 
@@ -161,7 +158,6 @@ def _merge_maps(*maps: Optional[Dict[str, str]]):
161
158
  result = {}
162
159
 
163
160
  for m in maps:
164
-
165
161
  if m is None:
166
162
  continue
167
163
 
@@ -178,22 +174,25 @@ class EnvOrConfigAttribute(str, Enum):
178
174
 
179
175
 
180
176
  def _merge_mode_maps(
181
- *mode_maps: Optional[Union[EnvironmentVariables, ConfigProperties, ClusterConfigProperties]],
182
- env_or_config_attribute: EnvOrConfigAttribute,
177
+ *mode_maps: Optional[Union[EnvironmentVariables, ConfigProperties, ClusterConfigProperties]],
178
+ env_or_config_attribute: EnvOrConfigAttribute,
183
179
  ):
184
180
  """
185
181
  Merges multiple environment variables into one - with the later maps overriding the earlier ones.
186
182
  """
187
183
 
188
184
  # Merge `common` to each individual mode map. Creates a new map
189
- def push_common_to_modes(mode_map: Union[EnvironmentVariables, ConfigProperties], mode_key: EnvOrConfigAttribute):
185
+ def push_common_to_modes(
186
+ mode_map: Union[EnvironmentVariables, ConfigProperties], mode_key: EnvOrConfigAttribute
187
+ ):
190
188
  final_mode_map = deepcopy(mode_map)
191
189
  common = final_mode_map.common
192
190
  modes = getattr(final_mode_map, mode_key)
193
- for _ in modes:
194
- modes[_] = _merge_maps(
195
- common, modes[_]
196
- )
191
+
192
+ if modes:
193
+ for _ in modes:
194
+ modes[_] = _merge_maps(common, modes[_])
195
+
197
196
  return final_mode_map
198
197
 
199
198
  filtered_mode_maps = [m for m in mode_maps if m]
@@ -217,22 +216,27 @@ def _merge_mode_maps(
217
216
  current_modes = getattr(result, env_or_config_attribute)
218
217
  incoming_modes = getattr(incoming_mode_map, env_or_config_attribute)
219
218
 
220
- current_modes_keys = list(current_modes.keys())
221
- incoming_modes_keys = list(incoming_modes.keys())
219
+ current_modes_keys = list(current_modes.keys()) if current_modes else []
220
+ incoming_modes_keys = list(incoming_modes.keys()) if incoming_modes else []
222
221
 
223
222
  all_modes_keys = list(set(current_modes_keys + incoming_modes_keys))
223
+
224
224
  for mode in all_modes_keys:
225
- current_mode = current_modes.get(mode, {})
225
+ current_mode = current_modes.get(mode, {}) if current_modes else {}
226
226
 
227
227
  # if the incoming_mode is not found, we NEED to default to incoming_common
228
- incoming_mode = incoming_modes.get(mode, incoming_common)
228
+ incoming_mode = (
229
+ incoming_modes.get(mode, incoming_common) if incoming_modes else incoming_common
230
+ )
229
231
 
230
232
  # first to last with later ones overriding the earlier ones
231
233
  # common -> current mode level -> incoming mode level
232
234
 
233
- new_mode = _merge_maps(
234
- new_common, current_mode, incoming_mode
235
- )
235
+ new_mode = _merge_maps(new_common, current_mode, incoming_mode)
236
+
237
+ if current_modes is None:
238
+ current_modes = {}
239
+
236
240
  current_modes[mode] = new_mode
237
241
 
238
242
  return result
@@ -45,18 +45,14 @@ class ThriftJSONDecoder(json.JSONDecoder):
45
45
  (_, field_ttype, field_name, field_ttype_info, dummy) = field
46
46
  if field_name not in val:
47
47
  continue
48
- converted_val = self._convert(
49
- val[field_name], field_ttype, field_ttype_info
50
- )
48
+ converted_val = self._convert(val[field_name], field_ttype, field_ttype_info)
51
49
  setattr(ret, field_name, converted_val)
52
50
  elif ttype == TType.LIST:
53
51
  (element_ttype, element_ttype_info, _) = ttype_info
54
52
  ret = [self._convert(x, element_ttype, element_ttype_info) for x in val]
55
53
  elif ttype == TType.SET:
56
54
  (element_ttype, element_ttype_info) = ttype_info
57
- ret = set(
58
- [self._convert(x, element_ttype, element_ttype_info) for x in val]
59
- )
55
+ ret = set([self._convert(x, element_ttype, element_ttype_info) for x in val])
60
56
  elif ttype == TType.MAP:
61
57
  (key_ttype, key_ttype_info, val_ttype, val_ttype_info, _) = ttype_info
62
58
  ret = dict(
@@ -108,8 +104,6 @@ def file2thrift(path, thrift_class):
108
104
 
109
105
 
110
106
  def thrift_simple_json(obj):
111
- simple = TSerialization.serialize(
112
- obj, protocol_factory=TSimpleJSONProtocolFactory()
113
- )
107
+ simple = TSerialization.serialize(obj, protocol_factory=TSimpleJSONProtocolFactory())
114
108
  parsed = json.loads(simple)
115
109
  return json.dumps(parsed, indent=2, sort_keys=True)
@@ -0,0 +1,42 @@
1
+ """
2
+ Utilities for handling config versioning in Chronon.
3
+ """
4
+
5
+ from typing import Optional, Tuple
6
+
7
+
8
+ def parse_name_and_version(name: str) -> Tuple[str, Optional[int]]:
9
+ """Parse config name to extract base name and version.
10
+
11
+ Args:
12
+ name: Config name (e.g., 'config_name__1' or 'config_name')
13
+
14
+ Returns:
15
+ Tuple of (base_name, version) where version is None if no version suffix
16
+ """
17
+ if "__" in name:
18
+ parts = name.rsplit("__", 1)
19
+ if len(parts) == 2 and parts[1].isdigit():
20
+ return parts[0], int(parts[1])
21
+ return name, None
22
+
23
+
24
+ def is_version_change(deleted_name: str, added_name: str) -> bool:
25
+ """Check if a deleted/added pair represents a version change.
26
+
27
+ Args:
28
+ deleted_name: Name of deleted config
29
+ added_name: Name of added config
30
+
31
+ Returns:
32
+ True if this represents a version bump of the same config
33
+ """
34
+ deleted_base, deleted_version = parse_name_and_version(deleted_name)
35
+ added_base, added_version = parse_name_and_version(added_name)
36
+
37
+ return (
38
+ deleted_base == added_base
39
+ and deleted_version is not None
40
+ and added_version is not None
41
+ and deleted_version != added_version
42
+ )
@@ -9,7 +9,6 @@ logger = get_logger()
9
9
 
10
10
 
11
11
  def get_current_branch() -> str:
12
-
13
12
  try:
14
13
  subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL)
15
14
 
@@ -20,7 +19,6 @@ def get_current_branch() -> str:
20
19
  )
21
20
 
22
21
  except subprocess.CalledProcessError as e:
23
-
24
22
  try:
25
23
  head_file = Path(".git/HEAD").resolve()
26
24
 
@@ -43,7 +41,6 @@ def get_current_branch() -> str:
43
41
 
44
42
  def get_fork_point(base_branch: str = "main") -> str:
45
43
  try:
46
-
47
44
  return (
48
45
  subprocess.check_output(["git", "merge-base", base_branch, "HEAD"])
49
46
  .decode("utf-8")
@@ -60,9 +57,7 @@ def get_fork_point(base_branch: str = "main") -> str:
60
57
 
61
58
  def get_file_content_at_commit(file_path: str, commit: str) -> Optional[str]:
62
59
  try:
63
- return subprocess.check_output(["git", "show", f"{commit}:{file_path}"]).decode(
64
- "utf-8"
65
- )
60
+ return subprocess.check_output(["git", "show", f"{commit}:{file_path}"]).decode("utf-8")
66
61
  except subprocess.CalledProcessError:
67
62
  return None
68
63
 
@@ -75,7 +70,6 @@ def get_current_file_content(file_path: str) -> Optional[str]:
75
70
 
76
71
 
77
72
  def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[str]:
78
-
79
73
  path = Path(path).resolve()
80
74
  if not path.exists():
81
75
  print(f"⛔ Error: Path does not exist: {path}", file=sys.stderr)
@@ -86,15 +80,12 @@ def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[st
86
80
  commit_range = f"{commit}..HEAD" if commit else "HEAD"
87
81
 
88
82
  changes = (
89
- subprocess.check_output(
90
- ["git", "diff", "--name-only", commit_range, "--", str(path)]
91
- )
83
+ subprocess.check_output(["git", "diff", "--name-only", commit_range, "--", str(path)])
92
84
  .decode("utf-8")
93
85
  .splitlines()
94
86
  )
95
87
 
96
88
  except subprocess.CalledProcessError:
97
-
98
89
  changes = (
99
90
  subprocess.check_output(["git", "diff", "--name-only", "--", str(path)])
100
91
  .decode("utf-8")
@@ -102,7 +93,6 @@ def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[st
102
93
  )
103
94
 
104
95
  try:
105
-
106
96
  untracked = (
107
97
  subprocess.check_output(
108
98
  ["git", "ls-files", "--others", "--exclude-standard", str(path)]
@@ -114,7 +104,6 @@ def get_changes_since_commit(path: str, commit: Optional[str] = None) -> List[st
114
104
  changes.extend(untracked)
115
105
 
116
106
  except subprocess.CalledProcessError as e:
117
-
118
107
  print(
119
108
  f"⛔ Error: {e.stderr.decode('utf-8') if e.stderr else 'Failed to get untracked files'}",
120
109
  file=sys.stderr,
ai/chronon/cli/logger.py CHANGED
@@ -15,9 +15,7 @@ RESET = "\033[0m"
15
15
 
16
16
 
17
17
  class ColorFormatter(logging.Formatter):
18
-
19
18
  def format(self, record):
20
-
21
19
  time_str = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
22
20
  level_color = LEVEL_COLORS.get(record.levelno)
23
21
 
ai/chronon/constants.py CHANGED
@@ -1,3 +1,3 @@
1
1
  AIRFLOW_DEPENDENCIES_KEY = "airflowDependencies"
2
2
  AIRFLOW_LABEL_DEPENDENCIES_KEY = "airflowLabelDependencies"
3
- PARTITION_COLUMN_KEY = "spark.chronon.partition.column"
3
+ PARTITION_COLUMN_KEY = "spark.chronon.partition.column"