awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
@@ -0,0 +1,226 @@
1
+ import inspect
2
+ import json
3
+ from dataclasses import dataclass
4
+ from typing import Dict, List, Optional, Union
5
+
6
+ import gen_thrift.api.ttypes as ttypes
7
+ import gen_thrift.common.ttypes as common
8
+
9
+ import ai.chronon.airflow_helpers as airflow_helpers
10
+ from ai.chronon import utils
11
+ from ai.chronon.constants import AIRFLOW_DEPENDENCIES_KEY
12
+
13
+
14
+ def _get_output_table_name(staging_query: ttypes.StagingQuery, full_name: bool = False):
15
+ """generate output table name for staging query job"""
16
+ utils.__set_name(staging_query, ttypes.StagingQuery, "staging_queries")
17
+ return utils.output_table_name(staging_query, full_name=full_name)
18
+
19
+
20
+ # Wrapper for EngineType
21
+ class EngineType:
22
+ SPARK = ttypes.EngineType.SPARK
23
+ BIGQUERY = ttypes.EngineType.BIGQUERY
24
+
25
+
26
+ @dataclass
27
+ class TableDependency:
28
+ table: str
29
+ partition_column: Optional[str] = None
30
+ partition_format: Optional[str] = None
31
+ additional_partitions: Optional[List[str]] = None
32
+ offset: Optional[int] = None
33
+
34
+ def to_thrift(self):
35
+ if self.offset is None:
36
+ raise ValueError(f"Dependency offset for table {self.table} must be specified.")
37
+ offset_window = common.Window(length=self.offset, timeUnit=common.TimeUnit.DAYS)
38
+ return common.TableDependency(
39
+ tableInfo=common.TableInfo(
40
+ table=self.table,
41
+ partitionColumn=self.partition_column,
42
+ partitionFormat=self.partition_format,
43
+ partitionInterval=common.Window(1, common.TimeUnit.DAYS),
44
+ ),
45
+ startOffset=offset_window,
46
+ endOffset=offset_window,
47
+ startCutOff=None,
48
+ endCutOff=None,
49
+ )
50
+
51
+
52
+ def Import(
53
+ query: str,
54
+ version: int,
55
+ output_namespace: Optional[str] = None,
56
+ engine_type: Optional[EngineType] = None,
57
+ dependencies: Optional[List[Union[TableDependency, Dict]]] = None,
58
+ conf: Optional[common.ConfigProperties] = None,
59
+ env_vars: Optional[common.EnvironmentVariables] = None,
60
+ offline_schedule: str = "@daily",
61
+ ):
62
+ assert dependencies is not None and len(dependencies) == 1, (
63
+ f"Import must specify exactly one table dependency. Got: {dependencies}"
64
+ )
65
+ assert dependencies[0].partition_column is not None, (
66
+ f"Import must specify a partition column for the table dependency. Got: {dependencies[0].partition_column}"
67
+ )
68
+
69
+ return StagingQuery(
70
+ query=query,
71
+ version=version,
72
+ output_namespace=output_namespace,
73
+ dependencies=dependencies,
74
+ conf=conf,
75
+ env_vars=env_vars,
76
+ engine_type=engine_type,
77
+ offline_schedule=offline_schedule,
78
+ )
79
+
80
+
81
+ def StagingQuery(
82
+ query: str,
83
+ version: int,
84
+ output_namespace: Optional[str] = None,
85
+ table_properties: Optional[Dict[str, str]] = None,
86
+ setups: Optional[List[str]] = None,
87
+ engine_type: Optional[EngineType] = None,
88
+ dependencies: Optional[List[Union[TableDependency, Dict]]] = None,
89
+ tags: Optional[Dict[str, str]] = None,
90
+ # execution params
91
+ offline_schedule: str = "@daily",
92
+ conf: Optional[common.ConfigProperties] = None,
93
+ env_vars: Optional[common.EnvironmentVariables] = None,
94
+ cluster_conf: common.ClusterConfigProperties = None,
95
+ step_days: Optional[int] = None,
96
+ recompute_days: Optional[int] = None,
97
+ additional_partitions: List[str] = None,
98
+ ) -> ttypes.StagingQuery:
99
+ """
100
+ Creates a StagingQuery object for executing arbitrary SQL queries with templated date parameters.
101
+
102
+ :param query:
103
+ Arbitrary spark query that should be written with template parameters:
104
+ - `{{ start_date }}`: Initial run uses start_date, future runs use latest partition + 1 day
105
+ - `{{ end_date }}`: The end partition of the computing range
106
+ - `{{ latest_date }}`: End partition independent of the computing range (for cumulative sources)
107
+ - `{{ max_date(table=namespace.my_table) }}`: Max partition available for a given table
108
+ These parameters can be modified with offset and bounds:
109
+ - `{{ start_date(offset=-10, lower_bound='2023-01-01', upper_bound='2024-01-01') }}`
110
+ :type query: str
111
+ :param setups:
112
+ Spark SQL setup statements. Used typically to register UDFs.
113
+ :type setups: List[str]
114
+ :type partition_column: str
115
+ :param engine_type:
116
+ By default, spark is the compute engine. You can specify an override (eg. bigquery, etc.)
117
+ Use the EngineType class constants: EngineType.SPARK, EngineType.BIGQUERY, etc.
118
+ :type engine_type: int
119
+ :param tags:
120
+ Additional metadata that does not directly affect computation, but is useful for management.
121
+ :type tags: Dict[str, str]
122
+ :param offline_schedule:
123
+ The offline schedule interval for batch jobs. Format examples:
124
+ '@hourly': '0 * * * *',
125
+ '@daily': '0 0 * * *',
126
+ '@weekly': '0 0 * * 0',
127
+ '@monthly': '0 0 1 * *',
128
+ '@yearly': '0 0 1 1 *'
129
+ :type offline_schedule: str
130
+ :param conf:
131
+ Configuration properties for the StagingQuery.
132
+ :type conf: common.ConfigProperties
133
+ :param env_vars:
134
+ Environment variables for the StagingQuery.
135
+ :type env_vars: common.EnvironmentVariables
136
+ :param cluster_conf:
137
+ Cluster configuration properties for the join.
138
+ :param step_days:
139
+ The maximum number of days to process at once
140
+ :type step_days: int
141
+ :param dependencies:
142
+ List of dependencies for the StagingQuery. Each dependency can be either a TableDependency object
143
+ or a dictionary with 'name' and 'spec' keys.
144
+ :type dependencies: List[Union[TableDependency, Dict]]
145
+ :param recompute_days:
146
+ Used by orchestrator to determine how many days are recomputed on each incremental scheduled run. Should be
147
+ set when the source data is changed in-place (i.e. existing partitions overwritten with new data each day up to
148
+ X days later) or when you want partially mature aggregations (i.e. a 7 day window, but start computing it from
149
+ day 1, and refresh it for the next 6 days)
150
+ :type recompute_days: int
151
+ :return:
152
+ A StagingQuery object
153
+ """
154
+ # Get caller's filename to assign team
155
+ team = inspect.stack()[1].filename.split("/")[-2]
156
+
157
+ assert isinstance(version, int), (
158
+ f"Version must be an integer, but found {type(version).__name__}"
159
+ )
160
+
161
+ # Create execution info
162
+ exec_info = common.ExecutionInfo(
163
+ scheduleCron=offline_schedule,
164
+ conf=conf,
165
+ env=env_vars,
166
+ stepDays=step_days,
167
+ clusterConf=cluster_conf,
168
+ )
169
+
170
+ airflow_dependencies = []
171
+
172
+ if dependencies:
173
+ for d in dependencies:
174
+ if isinstance(d, TableDependency):
175
+ # Create an Airflow dependency object for the table
176
+ airflow_dependency = airflow_helpers.create_airflow_dependency(
177
+ d.table,
178
+ d.partition_column,
179
+ d.additional_partitions,
180
+ d.offset,
181
+ )
182
+ airflow_dependencies.append(airflow_dependency)
183
+ elif isinstance(d, dict):
184
+ # If it's already a dictionary, just append it
185
+ airflow_dependencies.append(d)
186
+ else:
187
+ raise ValueError(
188
+ "Dependencies must be either TableDependency instances or dictionaries."
189
+ )
190
+
191
+ custom_json = json.dumps({AIRFLOW_DEPENDENCIES_KEY: airflow_dependencies})
192
+
193
+ # Create metadata
194
+ meta_data = ttypes.MetaData(
195
+ outputNamespace=output_namespace,
196
+ team=team,
197
+ executionInfo=exec_info,
198
+ tags=tags,
199
+ customJson=custom_json,
200
+ tableProperties=table_properties,
201
+ version=str(version),
202
+ additionalOutputPartitionColumns=additional_partitions,
203
+ )
204
+
205
+ thrift_deps = []
206
+ if dependencies and len(dependencies) > 0:
207
+ for d in dependencies:
208
+ if d and isinstance(d, TableDependency):
209
+ thrift_deps.append(d.to_thrift())
210
+
211
+ # Create and return the StagingQuery object with camelCase parameter names
212
+ staging_query = ttypes.StagingQuery(
213
+ metaData=meta_data,
214
+ query=query,
215
+ setups=setups,
216
+ engineType=engine_type,
217
+ tableDependencies=thrift_deps,
218
+ recomputeDays=recompute_days,
219
+ )
220
+
221
+ # Add the table property that calls the private function
222
+ staging_query.__class__.table = property(
223
+ lambda self: _get_output_table_name(self, full_name=True)
224
+ )
225
+
226
+ return staging_query
ai/chronon/types.py ADDED
@@ -0,0 +1,58 @@
1
+ """
2
+ importing ai.chronon.types will bring in all the api's needed to create any chronon object
3
+ """
4
+
5
+ import gen_thrift.api.ttypes as ttypes
6
+ import gen_thrift.common.ttypes as common
7
+
8
+ import ai.chronon.group_by as group_by
9
+ import ai.chronon.join as join
10
+ import ai.chronon.query as query
11
+ import ai.chronon.source as source
12
+
13
+ # source related concepts
14
+ Query = query.Query
15
+ selects = query.selects
16
+
17
+ Source = ttypes.Source
18
+ EventSource = source.EventSource
19
+ EntitySource = source.EntitySource
20
+ JoinSource = source.JoinSource
21
+
22
+ # Aggregation / GroupBy related concepts
23
+ GroupBy = group_by.GroupBy
24
+ Aggregation = group_by.Aggregation
25
+ Operation = group_by.Operation
26
+ Window = group_by.Window
27
+ TimeUnit = group_by.TimeUnit
28
+ DefaultAggregation = group_by.DefaultAggregation
29
+
30
+ Accuracy = ttypes.Accuracy
31
+ TEMPORAL = ttypes.Accuracy.TEMPORAL
32
+ SNAPSHOT = ttypes.Accuracy.SNAPSHOT
33
+
34
+ Derivation = group_by.Derivation
35
+
36
+ # join related concepts
37
+ Join = join.Join
38
+ JoinPart = join.JoinPart
39
+ BootstrapPart = join.BootstrapPart
40
+ LabelParts = join.LabelParts
41
+ ContextualSource = join.ContextualSource
42
+ ExternalPart = join.ExternalPart
43
+ ExternalSource = join.ExternalSource
44
+ DataType = join.DataType
45
+
46
+
47
+ # Staging Query related concepts
48
+ StagingQuery = ttypes.StagingQuery
49
+ MetaData = ttypes.MetaData
50
+
51
+
52
+ EnvironmentVariables = common.EnvironmentVariables
53
+ ConfigProperties = common.ConfigProperties
54
+ ClusterConfigProperties = common.ClusterConfigProperties
55
+ ExecutionInfo = common.ExecutionInfo
56
+ TableDependency = common.TableDependency
57
+
58
+ Team = ttypes.Team