awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. agent/ttypes.py +6 -6
  2. ai/chronon/airflow_helpers.py +20 -23
  3. ai/chronon/cli/__init__.py +0 -0
  4. ai/chronon/cli/compile/__init__.py +0 -0
  5. ai/chronon/cli/compile/column_hashing.py +40 -17
  6. ai/chronon/cli/compile/compile_context.py +13 -17
  7. ai/chronon/cli/compile/compiler.py +59 -36
  8. ai/chronon/cli/compile/conf_validator.py +251 -99
  9. ai/chronon/cli/compile/display/__init__.py +0 -0
  10. ai/chronon/cli/compile/display/class_tracker.py +6 -16
  11. ai/chronon/cli/compile/display/compile_status.py +10 -10
  12. ai/chronon/cli/compile/display/diff_result.py +79 -14
  13. ai/chronon/cli/compile/fill_templates.py +3 -8
  14. ai/chronon/cli/compile/parse_configs.py +10 -17
  15. ai/chronon/cli/compile/parse_teams.py +38 -34
  16. ai/chronon/cli/compile/serializer.py +3 -9
  17. ai/chronon/cli/compile/version_utils.py +42 -0
  18. ai/chronon/cli/git_utils.py +2 -13
  19. ai/chronon/cli/logger.py +0 -2
  20. ai/chronon/constants.py +1 -1
  21. ai/chronon/group_by.py +47 -47
  22. ai/chronon/join.py +46 -32
  23. ai/chronon/logger.py +1 -2
  24. ai/chronon/model.py +9 -4
  25. ai/chronon/query.py +2 -2
  26. ai/chronon/repo/__init__.py +1 -2
  27. ai/chronon/repo/aws.py +17 -31
  28. ai/chronon/repo/cluster.py +121 -50
  29. ai/chronon/repo/compile.py +14 -8
  30. ai/chronon/repo/constants.py +1 -1
  31. ai/chronon/repo/default_runner.py +32 -54
  32. ai/chronon/repo/explore.py +70 -73
  33. ai/chronon/repo/extract_objects.py +6 -9
  34. ai/chronon/repo/gcp.py +89 -88
  35. ai/chronon/repo/gitpython_utils.py +3 -2
  36. ai/chronon/repo/hub_runner.py +145 -55
  37. ai/chronon/repo/hub_uploader.py +2 -1
  38. ai/chronon/repo/init.py +12 -5
  39. ai/chronon/repo/join_backfill.py +19 -5
  40. ai/chronon/repo/run.py +42 -39
  41. ai/chronon/repo/serializer.py +4 -12
  42. ai/chronon/repo/utils.py +72 -63
  43. ai/chronon/repo/zipline.py +3 -19
  44. ai/chronon/repo/zipline_hub.py +211 -39
  45. ai/chronon/resources/__init__.py +0 -0
  46. ai/chronon/resources/gcp/__init__.py +0 -0
  47. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  48. ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
  49. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  50. ai/chronon/resources/gcp/joins/test/data.py +4 -8
  51. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  52. ai/chronon/resources/gcp/sources/test/data.py +9 -6
  53. ai/chronon/resources/gcp/teams.py +9 -21
  54. ai/chronon/source.py +2 -4
  55. ai/chronon/staging_query.py +60 -19
  56. ai/chronon/types.py +3 -2
  57. ai/chronon/utils.py +21 -68
  58. ai/chronon/windows.py +2 -4
  59. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/METADATA +48 -24
  60. awx_zipline_ai-0.3.1.dist-info/RECORD +96 -0
  61. awx_zipline_ai-0.3.1.dist-info/top_level.txt +4 -0
  62. gen_thrift/__init__.py +0 -0
  63. {ai/chronon → gen_thrift}/api/ttypes.py +327 -197
  64. {ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
  65. gen_thrift/eval/ttypes.py +660 -0
  66. {ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
  67. {ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
  68. {ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
  69. ai/chronon/eval/__init__.py +0 -122
  70. ai/chronon/eval/query_parsing.py +0 -19
  71. ai/chronon/eval/sample_tables.py +0 -100
  72. ai/chronon/eval/table_scan.py +0 -186
  73. ai/chronon/orchestration/ttypes.py +0 -4406
  74. ai/chronon/resources/gcp/README.md +0 -174
  75. ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
  76. awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
  77. awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
  78. awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
  79. /jars/__init__.py → /__init__.py +0 -0
  80. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/WHEEL +0 -0
  81. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/entry_points.txt +0 -0
  82. {ai/chronon → gen_thrift}/api/__init__.py +0 -0
  83. {ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
  84. {ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
  85. {ai/chronon/api → gen_thrift/common}/constants.py +0 -0
  86. {ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
  87. {ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
  88. {ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
  89. {ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
  90. {ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
  91. {ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
  92. {ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
  93. {ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
  94. {ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
  95. {ai/chronon → gen_thrift}/planner/__init__.py +0 -0
  96. {ai/chronon → gen_thrift}/planner/constants.py +0 -0
@@ -1,34 +1,30 @@
1
-
2
1
  from sources.test.data import source_v1
3
2
 
4
3
  from ai.chronon.group_by import Aggregation, GroupBy, Operation, TimeUnit, Window
5
4
 
6
- window_sizes = [Window(length=day, time_unit=TimeUnit.DAYS) for day in [3, 14, 30]] # Define some window sizes to use below
5
+ window_sizes = [
6
+ Window(length=day, time_unit=TimeUnit.DAYS) for day in [3, 14, 30]
7
+ ] # Define some window sizes to use below
7
8
 
8
9
  group_by_v1 = GroupBy(
9
10
  backfill_start_date="2023-11-01",
10
11
  sources=[source_v1],
11
- keys=["user_id"], # We are aggregating by user
12
+ keys=["user_id"], # We are aggregating by user
12
13
  online=True,
13
- aggregations=[Aggregation(
14
- input_column="purchase_price",
15
- operation=Operation.SUM,
16
- windows=window_sizes
17
- ), # The sum of purchases prices in various windows
14
+ aggregations=[
18
15
  Aggregation(
19
- input_column="purchase_price",
20
- operation=Operation.COUNT,
21
- windows=window_sizes
22
- ), # The count of purchases in various windows
16
+ input_column="purchase_price", operation=Operation.SUM, windows=window_sizes
17
+ ), # The sum of purchases prices in various windows
23
18
  Aggregation(
24
- input_column="purchase_price",
25
- operation=Operation.AVERAGE,
26
- windows=window_sizes
27
- ), # The average purchases by user in various windows
19
+ input_column="purchase_price", operation=Operation.COUNT, windows=window_sizes
20
+ ), # The count of purchases in various windows
21
+ Aggregation(
22
+ input_column="purchase_price", operation=Operation.AVERAGE, windows=window_sizes
23
+ ), # The average purchases by user in various windows
28
24
  Aggregation(
29
25
  input_column="purchase_price",
30
26
  operation=Operation.LAST_K(10),
31
27
  ),
32
28
  ],
33
29
  version=0,
34
- )
30
+ )
File without changes
@@ -1,6 +1,6 @@
1
+ from gen_thrift.api.ttypes import EventSource, Source
1
2
  from group_bys.test.data import group_by_v1
2
3
 
3
- from ai.chronon.api.ttypes import EventSource, Source
4
4
  from ai.chronon.join import Join, JoinPart
5
5
  from ai.chronon.query import Query, selects
6
6
 
@@ -12,9 +12,7 @@ source = Source(
12
12
  events=EventSource(
13
13
  table="data.checkouts",
14
14
  query=Query(
15
- selects=selects(
16
- "user_id"
17
- ), # The primary key used to join various GroupBys together
15
+ selects=selects("user_id"), # The primary key used to join various GroupBys together
18
16
  time_column="ts",
19
17
  ), # The event time used to compute feature values as-of
20
18
  )
@@ -22,9 +20,7 @@ source = Source(
22
20
 
23
21
  v1 = Join(
24
22
  left=source,
25
- right_parts=[
26
- JoinPart(group_by=group_by_v1)
27
- ],
23
+ right_parts=[JoinPart(group_by=group_by_v1)],
28
24
  row_ids="user_id",
29
25
  version=0,
30
- )
26
+ )
File without changes
@@ -1,4 +1,5 @@
1
- from ai.chronon.api.ttypes import EventSource, Source
1
+ from gen_thrift.api.ttypes import EventSource, Source
2
+
2
3
  from ai.chronon.query import Query, selects
3
4
 
4
5
  """
@@ -13,11 +14,13 @@ with a clear event time column and selected fields for downstream feature comput
13
14
 
14
15
  source_v1 = Source(
15
16
  events=EventSource(
16
- table="data.purchases", # This points to the log table in the warehouse with historical purchase events, updated in batch daily
17
- topic=None, # See the 'returns' GroupBy for an example that has a streaming source configured. In this case, this would be the streaming source topic that can be listened to for realtime events
17
+ table="data.purchases", # This points to the log table in the warehouse with historical purchase events, updated in batch daily
18
+ topic=None, # See the 'returns' GroupBy for an example that has a streaming source configured. In this case, this would be the streaming source topic that can be listened to for realtime events
18
19
  query=Query(
19
- selects=selects("user_id","purchase_price"), # Select the fields we care about
20
- time_column="ts") # The event time
21
- ))
20
+ selects=selects("user_id", "purchase_price"), # Select the fields we care about
21
+ time_column="ts",
22
+ ), # The event time
23
+ )
24
+ )
22
25
 
23
26
  # The `source_v1` object can now be used in a Chronon join or pipeline definition
@@ -1,4 +1,5 @@
1
- from ai.chronon.api.ttypes import Team
1
+ from gen_thrift.api.ttypes import Team
2
+
2
3
  from ai.chronon.repo.constants import RunMode
3
4
  from ai.chronon.types import ConfigProperties, EnvironmentVariables
4
5
 
@@ -10,34 +11,25 @@ default = Team(
10
11
  common={
11
12
  "spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
12
13
  "spark.chronon.table_write.format": "iceberg",
13
-
14
14
  "spark.sql.defaultCatalog": "bigquery_catalog",
15
-
16
15
  "spark.sql.catalog.bigquery_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
17
16
  "spark.sql.catalog.bigquery_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
18
17
  "spark.sql.catalog.bigquery_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
19
-
20
18
  "spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
21
19
  "spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
22
-
23
20
  "spark.chronon.coalesce.factor": "10",
24
21
  "spark.default.parallelism": "10",
25
22
  "spark.sql.shuffle.partitions": "10",
26
-
27
23
  # TODO: Please fill in the following values
28
24
  "spark.sql.catalog.bigquery_catalog.warehouse": "gs://zipline-warehouse-<customer_id>/data/tables/",
29
- "spark.sql.catalog.bigquery_catalog.gcp_location": "<region>",
30
- "spark.sql.catalog.bigquery_catalog.gcp_project": "<project-id>",
31
- "spark.chronon.partition.format": "<date-format>", # ex: "yyyy-MM-dd",
32
- "spark.chronon.partition.column": "<partition-column-name>", # ex: "ds",
25
+ "spark.sql.catalog.bigquery_catalog.gcp.bigquery.location": "<region>",
26
+ "spark.sql.catalog.bigquery_catalog.gcp.bigquery.project-id": "<project-id>",
27
+ "spark.chronon.partition.format": "<date-format>", # ex: "yyyy-MM-dd",
28
+ "spark.chronon.partition.column": "<partition-column-name>", # ex: "ds",
33
29
  },
34
30
  ),
35
31
  env=EnvironmentVariables(
36
32
  common={
37
- "JOB_MODE": "local[*]",
38
- "CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
39
- "CHRONON_ONLINE_ARGS": "[ONLINE-TODO]args prefixed with -Z become constructor map for your implementation of ai.chronon.online.Api, -Zkv-host=<YOUR_HOST> -Zkv-port=<YOUR_PORT>",
40
-
41
33
  # TODO: Please fill in the following values
42
34
  "CUSTOMER_ID": "<customer_id>",
43
35
  "GCP_PROJECT_ID": "<project-id>",
@@ -45,7 +37,7 @@ default = Team(
45
37
  "GCP_DATAPROC_CLUSTER_NAME": "<dataproc-cluster-name>",
46
38
  "GCP_BIGTABLE_INSTANCE_ID": "<bigtable-instance-id>",
47
39
  "ARTIFACT_PREFIX": "<customer-artifact-bucket>",
48
- "CLOUD_PROVIDER": "<gcp | aws>"
40
+ "CLOUD_PROVIDER": "<gcp | aws>",
49
41
  },
50
42
  ),
51
43
  )
@@ -54,11 +46,7 @@ default = Team(
54
46
  test = Team(
55
47
  outputNamespace="data",
56
48
  env=EnvironmentVariables(
57
- common={},
58
- modeEnvironments={
59
- RunMode.BACKFILL: {},
60
- RunMode.UPLOAD: {}
61
- }
49
+ common={}, modeEnvironments={RunMode.BACKFILL: {}, RunMode.UPLOAD: {}}
62
50
  ),
63
51
  )
64
52
 
@@ -67,4 +55,4 @@ team_conf = Team(
67
55
  env=EnvironmentVariables(
68
56
  common={},
69
57
  ),
70
- )
58
+ )
ai/chronon/source.py CHANGED
@@ -2,7 +2,7 @@
2
2
  Wrappers to directly create Source objects.
3
3
  """
4
4
 
5
- import ai.chronon.api.ttypes as ttypes
5
+ import gen_thrift.api.ttypes as ttypes
6
6
 
7
7
 
8
8
  def EventSource(
@@ -29,9 +29,7 @@ def EventSource(
29
29
 
30
30
  """
31
31
  return ttypes.Source(
32
- events=ttypes.EventSource(
33
- table=table, topic=topic, query=query, isCumulative=is_cumulative
34
- )
32
+ events=ttypes.EventSource(table=table, topic=topic, query=query, isCumulative=is_cumulative)
35
33
  )
36
34
 
37
35
 
@@ -1,20 +1,28 @@
1
-
2
1
  import inspect
3
2
  import json
4
3
  from dataclasses import dataclass
5
4
  from typing import Dict, List, Optional, Union
6
5
 
6
+ import gen_thrift.api.ttypes as ttypes
7
+ import gen_thrift.common.ttypes as common
8
+
7
9
  import ai.chronon.airflow_helpers as airflow_helpers
8
- import ai.chronon.api.common.ttypes as common
9
- import ai.chronon.api.ttypes as ttypes
10
+ from ai.chronon import utils
10
11
  from ai.chronon.constants import AIRFLOW_DEPENDENCIES_KEY
11
12
 
12
13
 
14
+ def _get_output_table_name(staging_query: ttypes.StagingQuery, full_name: bool = False):
15
+ """generate output table name for staging query job"""
16
+ utils.__set_name(staging_query, ttypes.StagingQuery, "staging_queries")
17
+ return utils.output_table_name(staging_query, full_name=full_name)
18
+
19
+
13
20
  # Wrapper for EngineType
14
21
  class EngineType:
15
22
  SPARK = ttypes.EngineType.SPARK
16
23
  BIGQUERY = ttypes.EngineType.BIGQUERY
17
24
 
25
+
18
26
  @dataclass
19
27
  class TableDependency:
20
28
  table: str
@@ -26,26 +34,54 @@ class TableDependency:
26
34
  def to_thrift(self):
27
35
  if self.offset is None:
28
36
  raise ValueError(f"Dependency offset for table {self.table} must be specified.")
29
- offset_window = common.Window(length = self.offset, timeUnit= common.TimeUnit.DAYS)
37
+ offset_window = common.Window(length=self.offset, timeUnit=common.TimeUnit.DAYS)
30
38
  return common.TableDependency(
31
39
  tableInfo=common.TableInfo(
32
- table=self.table,
40
+ table=self.table,
33
41
  partitionColumn=self.partition_column,
34
42
  partitionFormat=self.partition_format,
35
- partitionInterval=common.Window(1, common.TimeUnit.DAYS)
43
+ partitionInterval=common.Window(1, common.TimeUnit.DAYS),
36
44
  ),
37
45
  startOffset=offset_window,
38
46
  endOffset=offset_window,
39
47
  startCutOff=None,
40
- endCutOff=None
48
+ endCutOff=None,
41
49
  )
42
50
 
51
+
52
+ def Import(
53
+ query: str,
54
+ version: int,
55
+ output_namespace: Optional[str] = None,
56
+ engine_type: Optional[EngineType] = None,
57
+ dependencies: Optional[List[Union[TableDependency, Dict]]] = None,
58
+ conf: Optional[common.ConfigProperties] = None,
59
+ env_vars: Optional[common.EnvironmentVariables] = None,
60
+ offline_schedule: str = "@daily",
61
+ ):
62
+ assert dependencies is not None and len(dependencies) == 1, (
63
+ f"Import must specify exactly one table dependency. Got: {dependencies}"
64
+ )
65
+ assert dependencies[0].partition_column is not None, (
66
+ f"Import must specify a partition column for the table dependency. Got: {dependencies[0].partition_column}"
67
+ )
68
+
69
+ return StagingQuery(
70
+ query=query,
71
+ version=version,
72
+ output_namespace=output_namespace,
73
+ dependencies=dependencies,
74
+ conf=conf,
75
+ env_vars=env_vars,
76
+ engine_type=engine_type,
77
+ offline_schedule=offline_schedule,
78
+ )
79
+
80
+
43
81
  def StagingQuery(
44
- name: str,
45
82
  query: str,
46
83
  version: int,
47
84
  output_namespace: Optional[str] = None,
48
- start_partition: Optional[str] = None,
49
85
  table_properties: Optional[Dict[str, str]] = None,
50
86
  setups: Optional[List[str]] = None,
51
87
  engine_type: Optional[EngineType] = None,
@@ -58,23 +94,20 @@ def StagingQuery(
58
94
  cluster_conf: common.ClusterConfigProperties = None,
59
95
  step_days: Optional[int] = None,
60
96
  recompute_days: Optional[int] = None,
97
+ additional_partitions: List[str] = None,
61
98
  ) -> ttypes.StagingQuery:
62
99
  """
63
100
  Creates a StagingQuery object for executing arbitrary SQL queries with templated date parameters.
64
101
 
65
102
  :param query:
66
103
  Arbitrary spark query that should be written with template parameters:
67
- - `{{ start_date }}`: Initial run uses start_partition, future runs use latest partition + 1 day
104
+ - `{{ start_date }}`: Initial run uses start_date, future runs use latest partition + 1 day
68
105
  - `{{ end_date }}`: The end partition of the computing range
69
106
  - `{{ latest_date }}`: End partition independent of the computing range (for cumulative sources)
70
107
  - `{{ max_date(table=namespace.my_table) }}`: Max partition available for a given table
71
108
  These parameters can be modified with offset and bounds:
72
109
  - `{{ start_date(offset=-10, lower_bound='2023-01-01', upper_bound='2024-01-01') }}`
73
110
  :type query: str
74
- :param start_partition:
75
- On the first run, `{{ start_date }}` will be set to this user provided start date,
76
- future incremental runs will set it to the latest existing partition + 1 day.
77
- :type start_partition: str
78
111
  :param setups:
79
112
  Spark SQL setup statements. Used typically to register UDFs.
80
113
  :type setups: List[str]
@@ -121,13 +154,17 @@ def StagingQuery(
121
154
  # Get caller's filename to assign team
122
155
  team = inspect.stack()[1].filename.split("/")[-2]
123
156
 
157
+ assert isinstance(version, int), (
158
+ f"Version must be an integer, but found {type(version).__name__}"
159
+ )
160
+
124
161
  # Create execution info
125
162
  exec_info = common.ExecutionInfo(
126
163
  scheduleCron=offline_schedule,
127
164
  conf=conf,
128
165
  env=env_vars,
129
166
  stepDays=step_days,
130
- clusterConf=cluster_conf
167
+ clusterConf=cluster_conf,
131
168
  )
132
169
 
133
170
  airflow_dependencies = []
@@ -155,14 +192,14 @@ def StagingQuery(
155
192
 
156
193
  # Create metadata
157
194
  meta_data = ttypes.MetaData(
158
- name=name,
159
195
  outputNamespace=output_namespace,
160
196
  team=team,
161
197
  executionInfo=exec_info,
162
198
  tags=tags,
163
199
  customJson=custom_json,
164
200
  tableProperties=table_properties,
165
- version=str(version)
201
+ version=str(version),
202
+ additionalOutputPartitionColumns=additional_partitions,
166
203
  )
167
204
 
168
205
  thrift_deps = []
@@ -175,11 +212,15 @@ def StagingQuery(
175
212
  staging_query = ttypes.StagingQuery(
176
213
  metaData=meta_data,
177
214
  query=query,
178
- startPartition=start_partition,
179
215
  setups=setups,
180
216
  engineType=engine_type,
181
217
  tableDependencies=thrift_deps,
182
218
  recomputeDays=recompute_days,
183
219
  )
184
220
 
185
- return staging_query
221
+ # Add the table property that calls the private function
222
+ staging_query.__class__.table = property(
223
+ lambda self: _get_output_table_name(self, full_name=True)
224
+ )
225
+
226
+ return staging_query
ai/chronon/types.py CHANGED
@@ -2,8 +2,9 @@
2
2
  importing ai.chronon.types will bring in all the api's needed to create any chronon object
3
3
  """
4
4
 
5
- import ai.chronon.api.common.ttypes as common
6
- import ai.chronon.api.ttypes as ttypes
5
+ import gen_thrift.api.ttypes as ttypes
6
+ import gen_thrift.common.ttypes as common
7
+
7
8
  import ai.chronon.group_by as group_by
8
9
  import ai.chronon.join as join
9
10
  import ai.chronon.query as query
ai/chronon/utils.py CHANGED
@@ -23,9 +23,9 @@ import tempfile
23
23
  from collections.abc import Iterable
24
24
  from typing import List, Optional, Union, cast
25
25
 
26
- import ai.chronon.api.ttypes as api
26
+ import gen_thrift.api.ttypes as api
27
+
27
28
  import ai.chronon.repo.extract_objects as eo
28
- from ai.chronon.cli.compile import parse_teams
29
29
  from ai.chronon.repo import FOLDER_NAME_TO_CLASS
30
30
 
31
31
  ChrononJobTypes = Union[api.GroupBy, api.Join, api.StagingQuery]
@@ -56,21 +56,16 @@ class JsonDiffer:
56
56
  self.new_name = "new.json"
57
57
  self.old_name = "old.json"
58
58
 
59
- def diff(
60
- self, new_json_str: object, old_json_str: object, skipped_keys=None
61
- ) -> str:
59
+ def diff(self, new_json_str: object, old_json_str: object, skipped_keys=None) -> str:
62
60
  if skipped_keys is None:
63
61
  skipped_keys = []
64
- new_json = {
65
- k: v for k, v in json.loads(new_json_str).items() if k not in skipped_keys
66
- }
67
- old_json = {
68
- k: v for k, v in json.loads(old_json_str).items() if k not in skipped_keys
69
- }
70
-
71
- with open(os.path.join(self.temp_dir, self.old_name), mode="w") as old, open(
72
- os.path.join(self.temp_dir, self.new_name), mode="w"
73
- ) as new:
62
+ new_json = {k: v for k, v in json.loads(new_json_str).items() if k not in skipped_keys}
63
+ old_json = {k: v for k, v in json.loads(old_json_str).items() if k not in skipped_keys}
64
+
65
+ with (
66
+ open(os.path.join(self.temp_dir, self.old_name), mode="w") as old,
67
+ open(os.path.join(self.temp_dir, self.new_name), mode="w") as new,
68
+ ):
74
69
  old.write(json.dumps(old_json, sort_keys=True, indent=2))
75
70
  new.write(json.dumps(new_json, sort_keys=True, indent=2))
76
71
  diff_str = subprocess.run(
@@ -131,6 +126,7 @@ def _get_underlying_source(
131
126
  else:
132
127
  return source.joinSource
133
128
 
129
+
134
130
  def get_root_source(
135
131
  source: api.Source,
136
132
  ) -> Union[api.EventSource, api.EntitySource]:
@@ -141,6 +137,7 @@ def get_root_source(
141
137
  else:
142
138
  return get_root_source(source.joinSource.join.left)
143
139
 
140
+
144
141
  def get_query(source: api.Source) -> api.Query:
145
142
  return _get_underlying_source(source).query
146
143
 
@@ -151,7 +148,9 @@ def get_table(source: api.Source) -> str:
151
148
  elif source.events:
152
149
  table = source.events.table
153
150
  else:
154
- table = get_join_output_table_name(source.joinSource.join, True)
151
+ from ai.chronon.join import _get_output_table_name
152
+
153
+ table = _get_output_table_name(source.joinSource.join, True)
155
154
  return table.split("/")[0]
156
155
 
157
156
 
@@ -229,11 +228,7 @@ def dict_to_bash_commands(d):
229
228
  return ""
230
229
  bash_commands = []
231
230
  for key, value in d.items():
232
- cmd = (
233
- f"--{key.replace('_', '-')}={value}"
234
- if value
235
- else f"--{key.replace('_', '-')}"
236
- )
231
+ cmd = f"--{key.replace('_', '-')}={value}" if value else f"--{key.replace('_', '-')}"
237
232
  bash_commands.append(cmd)
238
233
  return " ".join(bash_commands)
239
234
 
@@ -259,9 +254,7 @@ def output_table_name(obj, full_name: bool):
259
254
 
260
255
  def join_part_name(jp):
261
256
  if jp.groupBy is None:
262
- raise NotImplementedError(
263
- "Join Part names for non group bys is not implemented."
264
- )
257
+ raise NotImplementedError("Join Part names for non group bys is not implemented.")
265
258
  if not jp.groupBy.metaData.name and isinstance(jp.groupBy, api.GroupBy):
266
259
  __set_name(jp.groupBy, api.GroupBy, "group_bys")
267
260
  return "_".join(
@@ -296,51 +289,15 @@ def join_part_output_table_name(join, jp, full_name: bool = False):
296
289
  )
297
290
 
298
291
 
299
- def group_by_output_table_name(obj, full_name: bool = False):
300
- """
301
- Group by backfill output table name
302
- To be synced with api.Extensions.scala
303
- """
304
- if not obj.metaData.name:
305
- __set_name(obj, api.GroupBy, "group_bys")
306
- return output_table_name(obj, full_name)
307
-
308
-
309
292
  def log_table_name(obj, full_name: bool = False):
310
293
  return output_table_name(obj, full_name=full_name) + "_logged"
311
294
 
312
295
 
313
- def get_staging_query_output_table_name(
314
- staging_query: api.StagingQuery, full_name: bool = False
315
- ):
316
- """generate output table name for staging query job"""
317
- __set_name(staging_query, api.StagingQuery, "staging_queries")
318
- return output_table_name(staging_query, full_name=full_name)
319
-
320
-
321
296
  def get_team_conf_from_py(team, key):
322
297
  team_module = importlib.import_module(f"teams.{team}")
323
298
  return getattr(team_module, key)
324
299
 
325
300
 
326
- def get_join_output_table_name(join: api.Join, full_name: bool = False):
327
- """generate output table name for join backfill job"""
328
- # join sources could also be created inline alongside groupBy file
329
- # so we specify fallback module as group_bys
330
- if isinstance(join, api.Join):
331
- __set_name(join, api.Join, "joins")
332
- # set output namespace
333
- if not join.metaData.outputNamespace:
334
- team_name = join.metaData.name.split(".")[0]
335
- namespace = (
336
- parse_teams.load_teams(chronon_root_path, print=False)
337
- .get(team_name)
338
- .outputNamespace
339
- )
340
- join.metaData.outputNamespace = namespace
341
- return output_table_name(join, full_name=full_name)
342
-
343
-
344
301
  def wait_for_simple_schema(table, lag, start, end):
345
302
  if not table:
346
303
  return None
@@ -348,9 +305,7 @@ def wait_for_simple_schema(table, lag, start, end):
348
305
  clean_name = table_tokens[0]
349
306
  subpartition_spec = "/".join(table_tokens[1:]) if len(table_tokens) > 1 else ""
350
307
  return {
351
- "name": "wait_for_{}_ds{}".format(
352
- clean_name, "" if lag == 0 else f"_minus_{lag}"
353
- ),
308
+ "name": "wait_for_{}_ds{}".format(clean_name, "" if lag == 0 else f"_minus_{lag}"),
354
309
  "spec": "{}/ds={}{}".format(
355
310
  clean_name,
356
311
  "{{ ds }}" if lag == 0 else "{{{{ macros.ds_add(ds, -{}) }}}}".format(lag),
@@ -413,7 +368,6 @@ def get_applicable_modes(conf: ChrononJobTypes) -> List[str]:
413
368
  modes.append("streaming")
414
369
 
415
370
  elif isinstance(conf, api.Join):
416
-
417
371
  join = cast(api.Join, conf)
418
372
 
419
373
  if get_offline_schedule(conf) is not None:
@@ -483,9 +437,9 @@ def chronon_path(file_path: str) -> str:
483
437
  conf_types = FOLDER_NAME_TO_CLASS.keys()
484
438
  splits = file_path.split("/")
485
439
  conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
486
- assert (
487
- len(conf_occurences) > 0
488
- ), f"Path: {file_path} doesn't contain folder with name among {conf_types}"
440
+ assert len(conf_occurences) > 0, (
441
+ f"Path: {file_path} doesn't contain folder with name among {conf_types}"
442
+ )
489
443
 
490
444
  index = min([splits.index(typ) for typ in conf_types if typ in splits])
491
445
  rel_path = "/".join(splits[index:])
@@ -535,7 +489,6 @@ def compose(arg, *methods):
535
489
  result = [indent + arg]
536
490
 
537
491
  for method in methods:
538
-
539
492
  method_parts = method.split(" ", 1)
540
493
  method = method_parts[0]
541
494
 
ai/chronon/windows.py CHANGED
@@ -1,4 +1,4 @@
1
- import ai.chronon.api.common.ttypes as common
1
+ import gen_thrift.common.ttypes as common
2
2
 
3
3
 
4
4
  def _days(length: int) -> common.Window:
@@ -40,9 +40,7 @@ def _from_str(s: str) -> common.Window:
40
40
  elif unit == "h":
41
41
  return _hours(length)
42
42
  else:
43
- raise ValueError(
44
- f"Invalid time unit '{unit}'. Must be 'd' for days or 'h' for hours"
45
- )
43
+ raise ValueError(f"Invalid time unit '{unit}'. Must be 'd' for days or 'h' for hours")
46
44
 
47
45
  except ValueError as e:
48
46
  if "invalid literal for int()" in str(e):
@@ -1,33 +1,57 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awx-zipline-ai
3
- Version: 0.2.1
4
- Summary: Zipline python API library
5
- Classifier: Programming Language :: Python :: 3.11
3
+ Version: 0.3.1
4
+ Summary: CLI tool for the Zipline AI platform
5
+ Author-email: Zipline AI <hello@zipline.ai>
6
+ License: Apache License 2.0
7
+ Project-URL: homepage, https://zipline.ai
8
+ Project-URL: documentation, https://docs.zipline.ai
9
+ Project-URL: github, https://github.com/zipline-ai/chronon/
6
10
  Requires-Python: >=3.11
7
11
  Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
- Requires-Dist: click
10
- Requires-Dist: thrift==0.20.0
11
- Requires-Dist: pyspark==3.5.4
12
- Requires-Dist: sqlglot
12
+ Requires-Dist: boto3==1.40.26
13
+ Requires-Dist: botocore==1.40.26
14
+ Requires-Dist: cachetools==5.5.2
15
+ Requires-Dist: certifi==2025.8.3
16
+ Requires-Dist: charset-normalizer==3.4.3
17
+ Requires-Dist: click==8.2.1
13
18
  Requires-Dist: crcmod==1.7
14
- Requires-Dist: glom
15
- Requires-Dist: boto3
16
- Requires-Dist: importlib-resources==6.5.2
17
- Requires-Dist: rich
19
+ Requires-Dist: gitdb==4.0.12
20
+ Requires-Dist: gitpython==3.1.45
21
+ Requires-Dist: google-api-core[grpc]==2.25.1
22
+ Requires-Dist: google-auth==2.40.3
23
+ Requires-Dist: google-cloud-bigquery-storage==2.33.0
24
+ Requires-Dist: google-cloud-core==2.4.3
25
+ Requires-Dist: google-cloud-iam==2.19.1
18
26
  Requires-Dist: google-cloud-storage==2.19.0
19
- Requires-Dist: google-cloud-bigquery-storage
20
- Requires-Dist: GitPython
21
- Provides-Extra: pip2compat
22
- Requires-Dist: click<8; extra == "pip2compat"
23
- Dynamic: classifier
24
- Dynamic: description
25
- Dynamic: description-content-type
26
- Dynamic: license-file
27
- Dynamic: provides-extra
28
- Dynamic: requires-dist
29
- Dynamic: requires-python
30
- Dynamic: summary
27
+ Requires-Dist: google-crc32c==1.7.1
28
+ Requires-Dist: google-resumable-media==2.7.2
29
+ Requires-Dist: googleapis-common-protos[grpc]==1.70.0
30
+ Requires-Dist: grpc-google-iam-v1==0.14.2
31
+ Requires-Dist: grpcio<=1.74.0,>=1.66.2
32
+ Requires-Dist: grpcio-status<=1.74.0,>=1.62.3
33
+ Requires-Dist: idna==3.10
34
+ Requires-Dist: importlib-resources==6.5.2
35
+ Requires-Dist: jmespath==1.0.1
36
+ Requires-Dist: markdown-it-py==4.0.0
37
+ Requires-Dist: mdurl==0.1.2
38
+ Requires-Dist: proto-plus==1.26.1
39
+ Requires-Dist: protobuf<=6.32.0,>=4.25.5
40
+ Requires-Dist: py4j==0.10.9.7
41
+ Requires-Dist: pyasn1==0.6.1
42
+ Requires-Dist: pyasn1-modules==0.4.2
43
+ Requires-Dist: pygments==2.19.2
44
+ Requires-Dist: pyspark==3.5.4
45
+ Requires-Dist: python-dateutil==2.9.0.post0
46
+ Requires-Dist: requests==2.32.5
47
+ Requires-Dist: rich==14.1.0
48
+ Requires-Dist: rsa==4.9.1
49
+ Requires-Dist: s3transfer==0.13.1
50
+ Requires-Dist: six==1.17.0
51
+ Requires-Dist: smmap==5.0.2
52
+ Requires-Dist: thrift==0.20.0
53
+ Requires-Dist: urllib3==2.5.0
54
+ Requires-Dist: python-dotenv>=1.0.1
31
55
 
32
56
  ### Chronon Python API
33
57