awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
ai/chronon/repo/run.py ADDED
@@ -0,0 +1,296 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ run.py needs to only depend in python standard library to simplify execution requirements.
4
+ """
5
+
6
+ # Copyright (C) 2023 The Chronon Authors.
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ # See the License for the specific language governing permissions and
18
+ # limitations under the License.
19
+
20
+ import os
21
+ from datetime import datetime
22
+
23
+ import click
24
+
25
+ from ai.chronon.repo.aws import (
26
+ ZIPLINE_AWS_JAR_DEFAULT,
27
+ ZIPLINE_AWS_ONLINE_CLASS_DEFAULT,
28
+ AwsRunner,
29
+ )
30
+ from ai.chronon.repo.constants import (
31
+ APP_NAME_TEMPLATE,
32
+ AWS,
33
+ CLOUD_PROVIDER_KEYWORD,
34
+ GCP,
35
+ MODE_ARGS,
36
+ ONLINE_CLASS_ARG,
37
+ ONLINE_JAR_ARG,
38
+ ONLINE_MODES,
39
+ RENDER_INFO_DEFAULT_SCRIPT,
40
+ ZIPLINE_DIRECTORY,
41
+ RunMode,
42
+ )
43
+ from ai.chronon.repo.default_runner import Runner
44
+ from ai.chronon.repo.gcp import (
45
+ ZIPLINE_GCP_JAR_DEFAULT,
46
+ ZIPLINE_GCP_ONLINE_CLASS_DEFAULT,
47
+ GcpRunner,
48
+ )
49
+ from ai.chronon.repo.utils import get_environ_arg, set_runtime_env_v3
50
+
51
+
52
+ # TODO: @davidhan - we should move these to all be in the defaults of the choice args
53
+ def set_defaults(ctx):
54
+ """Set default values based on environment."""
55
+ chronon_repo_path = os.environ.get("CHRONON_REPO_PATH", ".")
56
+ today = datetime.today().strftime("%Y-%m-%d")
57
+
58
+ obj = ctx.obj if ctx.obj is not None else dict()
59
+
60
+ defaults = {
61
+ "ds": today, # TODO: this breaks if the partition column is not the same as yyyy-MM-dd.
62
+ "app_name": os.environ.get("APP_NAME"),
63
+ "online_jar": os.environ.get("CHRONON_ONLINE_JAR"),
64
+ "repo": chronon_repo_path,
65
+ "online_class": os.environ.get("CHRONON_ONLINE_CLASS"),
66
+ "version": os.environ.get("VERSION") or obj.get("version"),
67
+ "spark_version": os.environ.get("SPARK_VERSION", "2.4.0"),
68
+ "spark_submit_path": os.path.join(chronon_repo_path, "scripts/spark_submit.sh"),
69
+ "spark_streaming_submit_path": os.path.join(
70
+ chronon_repo_path, "scripts/spark_streaming.sh"
71
+ ),
72
+ # NOTE: We don't want to ever call the fetch_online_jar.py script since we're working
73
+ # on our internal zipline fork of the chronon repo
74
+ # "online_jar_fetch": os.path.join(chronon_repo_path, "scripts/fetch_online_jar.py"),
75
+ "online_args": os.environ.get("CHRONON_ONLINE_ARGS"),
76
+ "chronon_jar": os.environ.get("CHRONON_DRIVER_JAR"),
77
+ "list_apps": "python3 " + os.path.join(chronon_repo_path, "scripts/yarn_list.py"),
78
+ "render_info": os.path.join(chronon_repo_path, RENDER_INFO_DEFAULT_SCRIPT),
79
+ "project_conf": obj.get("project_conf"),
80
+ "artifact_prefix": os.environ.get("ARTIFACT_PREFIX"),
81
+ "flink_state_uri": os.environ.get("FLINK_STATE_URI"),
82
+ }
83
+ for key, value in defaults.items():
84
+ if ctx.params.get(key) is None and value is not None:
85
+ ctx.params[key] = value
86
+
87
+
88
+ def validate_flink_state(ctx, param, value):
89
+ uri_schemes = ["gs://", "s3://"]
90
+ if value and not any(value.startswith(scheme) for scheme in uri_schemes):
91
+ raise click.BadParameter(f"Flink state uri must start with {uri_schemes}")
92
+ return value
93
+
94
+
95
+ def validate_additional_jars(ctx, param, value):
96
+ if value:
97
+ jars = value.split(",")
98
+ for jar in jars:
99
+ if not jar.startswith(("gs://", "s3://")):
100
+ raise click.BadParameter(f"Additional jars must start with gs://, s3://: {jar}")
101
+ return value
102
+
103
+
104
+ @click.command(
105
+ name="run",
106
+ context_settings=dict(allow_extra_args=True, ignore_unknown_options=True),
107
+ )
108
+ @click.option(
109
+ "--conf", required=True, help="Conf param - required for every mode"
110
+ ) # TODO: @davidhan - we should be able to infer this in the future
111
+ @click.option(
112
+ "--env",
113
+ required=False,
114
+ default="dev",
115
+ help="Running environment - default to be dev",
116
+ )
117
+ @click.option(
118
+ "--mode", type=click.Choice([str(k) for k in MODE_ARGS.keys()]), default=str(RunMode.BACKFILL)
119
+ )
120
+ @click.option("--ds", help="the end partition to backfill the data")
121
+ @click.option("--app-name", help="app name. Default to {}".format(APP_NAME_TEMPLATE))
122
+ @click.option(
123
+ "--start-ds",
124
+ help="override the original start partition for a range backfill. "
125
+ "It only supports staging query, group by backfill and join jobs. "
126
+ "It could leave holes in your final output table due to the override date range.",
127
+ )
128
+ @click.option("--end-ds", help="the end ds for a range backfill")
129
+ @click.option(
130
+ "--parallelism",
131
+ help="break down the backfill range into this number of tasks in parallel. "
132
+ "Please use it along with --start-ds and --end-ds and only in manual mode",
133
+ )
134
+ @click.option("--repo", help="Path to chronon repo", default=".")
135
+ @click.option(
136
+ "--online-jar",
137
+ help="Jar containing Online KvStore & Deserializer Impl. "
138
+ "Used for streaming and metadata-upload mode.",
139
+ )
140
+ @click.option(
141
+ "--online-class",
142
+ help="Class name of Online Impl. Used for streaming and metadata-upload mode.",
143
+ )
144
+ @click.option("--version", required=False, help="Chronon version to use.")
145
+ @click.option("--spark-version", default="2.4.0", help="Spark version to use for downloading jar.")
146
+ @click.option("--spark-submit-path", help="Path to spark-submit")
147
+ @click.option("--spark-streaming-submit-path", help="Path to spark-submit for streaming")
148
+ @click.option(
149
+ "--online-jar-fetch",
150
+ help="Path to script that can pull online jar. This will run only "
151
+ "when a file doesn't exist at location specified by online_jar",
152
+ )
153
+ @click.option("--sub-help", is_flag=True, help="print help command of the underlying jar and exit")
154
+ @click.option(
155
+ "--conf-type",
156
+ help="related to sub-help - no need to set unless you are not working with a conf",
157
+ )
158
+ @click.option("--online-args", help="Basic arguments that need to be supplied to all online modes")
159
+ @click.option("--chronon-jar", help="Path to chronon OS jar")
160
+ @click.option("--release-tag", help="Use the latest jar for a particular tag.")
161
+ @click.option("--list-apps", help="command/script to list running jobs on the scheduler")
162
+ @click.option(
163
+ "--render-info",
164
+ help="Path to script rendering additional information of the given config. "
165
+ "Only applicable when mode is set to info",
166
+ )
167
+ @click.option("--kafka-bootstrap", help="Kafka bootstrap server in host:port format")
168
+ @click.option(
169
+ "--latest-savepoint",
170
+ is_flag=True,
171
+ default=False,
172
+ help="Deploys streaming job with latest savepoint",
173
+ )
174
+ @click.option("--custom-savepoint", help="Savepoint to deploy streaming job with.")
175
+ @click.option(
176
+ "--no-savepoint", is_flag=True, default=False, help="Deploys streaming job without a savepoint"
177
+ )
178
+ @click.option(
179
+ "--version-check",
180
+ is_flag=True,
181
+ default=False,
182
+ help="Checks if Zipline version of running streaming job is different from local version and deploys the job if they are different",
183
+ )
184
+ @click.option(
185
+ "--flink-state-uri",
186
+ help="Bucket for storing flink state checkpoints/savepoints and other internal pieces for orchestration.",
187
+ callback=validate_flink_state,
188
+ )
189
+ @click.option(
190
+ "--additional-jars",
191
+ help="Comma separated list of additional jar URIs to be included in the Flink job classpath (e.g. gs://bucket/jar1.jar,gs://bucket/jar2.jar).",
192
+ callback=validate_additional_jars,
193
+ )
194
+ @click.option(
195
+ "--validate",
196
+ is_flag=True,
197
+ help="Validate the catalyst util Spark expression evaluation logic",
198
+ )
199
+ @click.option("--validate-rows", default="10000", help="Number of rows to run the validation on")
200
+ @click.option("--join-part-name", help="Name of the join part to use for join-part-job")
201
+ @click.option(
202
+ "--artifact-prefix",
203
+ help="Remote artifact URI to install zipline client artifacts necessary for interacting with Zipline infrastructure.",
204
+ )
205
+ @click.option("--disable-cloud-logging", is_flag=True, default=False, help="Disables cloud logging")
206
+ @click.option(
207
+ "--enable-debug",
208
+ is_flag=True,
209
+ default=False,
210
+ help="Enables verbose debug logging in run modes that support it",
211
+ )
212
+ @click.option(
213
+ "--uploader",
214
+ type=click.Choice(["spark", "bigquery"], case_sensitive=False),
215
+ help="Bulk put uploader to use when load data to kv store, applied to upload-to-kv mode",
216
+ )
217
+ @click.pass_context
218
+ def main(
219
+ ctx,
220
+ conf,
221
+ env,
222
+ mode,
223
+ ds,
224
+ app_name,
225
+ start_ds,
226
+ end_ds,
227
+ parallelism,
228
+ repo,
229
+ online_jar,
230
+ online_class,
231
+ version,
232
+ spark_version,
233
+ spark_submit_path,
234
+ spark_streaming_submit_path,
235
+ online_jar_fetch,
236
+ sub_help,
237
+ conf_type,
238
+ online_args,
239
+ chronon_jar,
240
+ release_tag,
241
+ list_apps,
242
+ render_info,
243
+ kafka_bootstrap,
244
+ latest_savepoint,
245
+ custom_savepoint,
246
+ no_savepoint,
247
+ version_check,
248
+ flink_state_uri,
249
+ validate,
250
+ validate_rows,
251
+ join_part_name,
252
+ artifact_prefix,
253
+ disable_cloud_logging,
254
+ additional_jars,
255
+ enable_debug,
256
+ uploader,
257
+ ):
258
+ unknown_args = ctx.args
259
+ click.echo("Running with args: {}".format(ctx.params))
260
+
261
+ conf_path = os.path.join(repo, conf)
262
+ if not os.path.isfile(conf_path):
263
+ raise ValueError(f"Conf file {conf_path} does not exist.")
264
+
265
+ set_runtime_env_v3(ctx.params, conf)
266
+ set_defaults(ctx)
267
+ extra_args = (" " + online_args) if mode in ONLINE_MODES and online_args else ""
268
+ ctx.params["args"] = " ".join(unknown_args) + extra_args
269
+ os.makedirs(ZIPLINE_DIRECTORY, exist_ok=True)
270
+
271
+ cloud_provider = get_environ_arg(CLOUD_PROVIDER_KEYWORD, ignoreError=True)
272
+
273
+ print(f"Cloud provider: {cloud_provider}")
274
+
275
+ if not cloud_provider:
276
+ # Support open source chronon runs
277
+ if chronon_jar:
278
+ Runner(ctx.params, os.path.expanduser(chronon_jar)).run()
279
+ else:
280
+ raise ValueError("Jar path is not set.")
281
+ elif cloud_provider.upper() == GCP:
282
+ ctx.params[ONLINE_JAR_ARG] = ZIPLINE_GCP_JAR_DEFAULT
283
+ ctx.params[ONLINE_CLASS_ARG] = ZIPLINE_GCP_ONLINE_CLASS_DEFAULT
284
+ ctx.params[CLOUD_PROVIDER_KEYWORD] = cloud_provider
285
+ GcpRunner(ctx.params).run()
286
+ elif cloud_provider.upper() == AWS:
287
+ ctx.params[ONLINE_JAR_ARG] = ZIPLINE_AWS_JAR_DEFAULT
288
+ ctx.params[ONLINE_CLASS_ARG] = ZIPLINE_AWS_ONLINE_CLASS_DEFAULT
289
+ ctx.params[CLOUD_PROVIDER_KEYWORD] = cloud_provider
290
+ AwsRunner(ctx.params).run()
291
+ else:
292
+ raise ValueError(f"Unsupported cloud provider: {cloud_provider}")
293
+
294
+
295
+ if __name__ == "__main__":
296
+ main()
@@ -0,0 +1,133 @@
1
+ # Copyright (C) 2023 The Chronon Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+
17
+ from thrift import TSerialization
18
+ from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
19
+ from thrift.protocol.TJSONProtocol import (
20
+ TJSONProtocolFactory,
21
+ TSimpleJSONProtocolFactory,
22
+ )
23
+ from thrift.Thrift import TType
24
+ from thrift.transport.TTransport import TMemoryBuffer
25
+
26
+ from ai.chronon.utils import JsonDiffer
27
+
28
+
29
+ class ThriftJSONDecoder(json.JSONDecoder):
30
+ def __init__(self, *args, **kwargs):
31
+ self._thrift_class = kwargs.pop("thrift_class")
32
+ super(ThriftJSONDecoder, self).__init__(*args, **kwargs)
33
+
34
+ def decode(self, json_str):
35
+ if isinstance(json_str, dict):
36
+ dct = json_str
37
+ else:
38
+ dct = super(ThriftJSONDecoder, self).decode(json_str)
39
+ return self._convert(
40
+ dct, TType.STRUCT, (self._thrift_class, self._thrift_class.thrift_spec)
41
+ )
42
+
43
+ def _convert(self, val, ttype, ttype_info):
44
+ if ttype == TType.STRUCT:
45
+ (thrift_class, thrift_spec) = ttype_info
46
+ ret = thrift_class()
47
+ for field in thrift_spec:
48
+ if field is None:
49
+ continue
50
+ (_, field_ttype, field_name, field_ttype_info, dummy) = field
51
+ if field_name not in val:
52
+ continue
53
+ converted_val = self._convert(val[field_name], field_ttype, field_ttype_info)
54
+ setattr(ret, field_name, converted_val)
55
+ elif ttype == TType.LIST:
56
+ (element_ttype, element_ttype_info, _) = ttype_info
57
+ ret = [self._convert(x, element_ttype, element_ttype_info) for x in val]
58
+ elif ttype == TType.SET:
59
+ (element_ttype, element_ttype_info) = ttype_info
60
+ ret = set([self._convert(x, element_ttype, element_ttype_info) for x in val])
61
+ elif ttype == TType.MAP:
62
+ (key_ttype, key_ttype_info, val_ttype, val_ttype_info, _) = ttype_info
63
+ ret = dict(
64
+ [
65
+ (
66
+ self._convert(k, key_ttype, key_ttype_info),
67
+ self._convert(v, val_ttype, val_ttype_info),
68
+ )
69
+ for (k, v) in val.items()
70
+ ]
71
+ )
72
+ elif ttype == TType.STRING:
73
+ ret = str(val)
74
+ elif ttype == TType.DOUBLE:
75
+ ret = float(val)
76
+ elif ttype == TType.I64:
77
+ ret = int(val)
78
+ elif ttype == TType.I32 or ttype == TType.I16 or ttype == TType.BYTE:
79
+ ret = int(val)
80
+ elif ttype == TType.BOOL:
81
+ ret = bool(val)
82
+ else:
83
+ raise TypeError("Unrecognized thrift field type: %d" % ttype)
84
+ return ret
85
+
86
+
87
+ def json2thrift(json_str, thrift_class):
88
+ return json.loads(json_str, cls=ThriftJSONDecoder, thrift_class=thrift_class)
89
+
90
+
91
+ def json2binary(json_str, thrift_class):
92
+ thrift = json2thrift(json_str, thrift_class)
93
+ transport = TMemoryBuffer()
94
+ protocol = TBinaryProtocolAccelerated(transport)
95
+ thrift.write(protocol)
96
+ # Get the raw bytes representing the object in Thrift binary format
97
+ return transport.getvalue()
98
+
99
+
100
+ def file2thrift(path, thrift_class):
101
+ try:
102
+ with open(path, "r") as file:
103
+ return json2thrift(file.read(), thrift_class)
104
+ except json.decoder.JSONDecodeError as e:
105
+ raise Exception(
106
+ f"Error decoding file into a {thrift_class.__name__}: {path}. "
107
+ + f"Please double check that {path} represents a valid {thrift_class.__name__}."
108
+ ) from e
109
+
110
+
111
+ def thrift_json(obj):
112
+ return TSerialization.serialize(obj, protocol_factory=TJSONProtocolFactory())
113
+
114
+
115
+ def thrift_simple_json(obj):
116
+ simple = TSerialization.serialize(obj, protocol_factory=TSimpleJSONProtocolFactory())
117
+ parsed = json.loads(simple)
118
+ return json.dumps(parsed, indent=2, sort_keys=True)
119
+
120
+
121
+ def thrift_simple_json_protected(obj, obj_type) -> str:
122
+ serialized = thrift_simple_json(obj)
123
+ # ensure that reversal works - we will use this reversal during deployment
124
+ thrift_obj = json.loads(serialized, cls=ThriftJSONDecoder, thrift_class=obj_type)
125
+ actual = thrift_simple_json(thrift_obj)
126
+ differ = JsonDiffer()
127
+ diff = differ.diff(serialized, actual)
128
+ assert len(diff) == 0, f"""Serialization can't be reversed
129
+ diff: \n{diff}
130
+ original: \n{serialized}
131
+ """
132
+ differ.clean()
133
+ return serialized
@@ -0,0 +1,46 @@
1
+ """A module used for reading teams.json file."""
2
+
3
+ # Copyright (C) 2023 The Chronon Authors.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import json
18
+
19
+ # `default` team in teams.json contains default values.
20
+ DEFAULT_CONF_TEAM = "default"
21
+
22
+ loaded_jsons = {}
23
+
24
+
25
+ def read_conf_json(json_path):
26
+ if json_path not in loaded_jsons:
27
+ with open(json_path) as w:
28
+ team_json = json.load(w)
29
+ loaded_jsons[json_path] = team_json
30
+ return loaded_jsons[json_path]
31
+
32
+
33
+ def team_exists(json_path, team):
34
+ team_json = read_conf_json(json_path)
35
+ return team in team_json
36
+
37
+
38
+ def get_team_conf(json_path, team, key):
39
+ team_json = read_conf_json(json_path)
40
+ if team not in team_json:
41
+ raise ValueError("team {} does not exist in {}".format(team, json_path))
42
+ team_dict = team_json[team]
43
+ if key in team_dict:
44
+ return team_dict[key]
45
+ else:
46
+ return team_json[DEFAULT_CONF_TEAM][key]