awx-zipline-ai 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. agent/__init__.py +1 -0
  2. agent/constants.py +15 -0
  3. agent/ttypes.py +1684 -0
  4. ai/__init__.py +0 -0
  5. ai/chronon/__init__.py +0 -0
  6. ai/chronon/airflow_helpers.py +251 -0
  7. ai/chronon/api/__init__.py +1 -0
  8. ai/chronon/api/common/__init__.py +1 -0
  9. ai/chronon/api/common/constants.py +15 -0
  10. ai/chronon/api/common/ttypes.py +1844 -0
  11. ai/chronon/api/constants.py +15 -0
  12. ai/chronon/api/ttypes.py +3624 -0
  13. ai/chronon/cli/compile/column_hashing.py +313 -0
  14. ai/chronon/cli/compile/compile_context.py +177 -0
  15. ai/chronon/cli/compile/compiler.py +160 -0
  16. ai/chronon/cli/compile/conf_validator.py +590 -0
  17. ai/chronon/cli/compile/display/class_tracker.py +112 -0
  18. ai/chronon/cli/compile/display/compile_status.py +95 -0
  19. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  20. ai/chronon/cli/compile/display/console.py +3 -0
  21. ai/chronon/cli/compile/display/diff_result.py +46 -0
  22. ai/chronon/cli/compile/fill_templates.py +40 -0
  23. ai/chronon/cli/compile/parse_configs.py +141 -0
  24. ai/chronon/cli/compile/parse_teams.py +238 -0
  25. ai/chronon/cli/compile/serializer.py +115 -0
  26. ai/chronon/cli/git_utils.py +156 -0
  27. ai/chronon/cli/logger.py +61 -0
  28. ai/chronon/constants.py +3 -0
  29. ai/chronon/eval/__init__.py +122 -0
  30. ai/chronon/eval/query_parsing.py +19 -0
  31. ai/chronon/eval/sample_tables.py +100 -0
  32. ai/chronon/eval/table_scan.py +186 -0
  33. ai/chronon/fetcher/__init__.py +1 -0
  34. ai/chronon/fetcher/constants.py +15 -0
  35. ai/chronon/fetcher/ttypes.py +127 -0
  36. ai/chronon/group_by.py +692 -0
  37. ai/chronon/hub/__init__.py +1 -0
  38. ai/chronon/hub/constants.py +15 -0
  39. ai/chronon/hub/ttypes.py +1228 -0
  40. ai/chronon/join.py +566 -0
  41. ai/chronon/logger.py +24 -0
  42. ai/chronon/model.py +35 -0
  43. ai/chronon/observability/__init__.py +1 -0
  44. ai/chronon/observability/constants.py +15 -0
  45. ai/chronon/observability/ttypes.py +2192 -0
  46. ai/chronon/orchestration/__init__.py +1 -0
  47. ai/chronon/orchestration/constants.py +15 -0
  48. ai/chronon/orchestration/ttypes.py +4406 -0
  49. ai/chronon/planner/__init__.py +1 -0
  50. ai/chronon/planner/constants.py +15 -0
  51. ai/chronon/planner/ttypes.py +1686 -0
  52. ai/chronon/query.py +126 -0
  53. ai/chronon/repo/__init__.py +40 -0
  54. ai/chronon/repo/aws.py +298 -0
  55. ai/chronon/repo/cluster.py +65 -0
  56. ai/chronon/repo/compile.py +56 -0
  57. ai/chronon/repo/constants.py +164 -0
  58. ai/chronon/repo/default_runner.py +291 -0
  59. ai/chronon/repo/explore.py +421 -0
  60. ai/chronon/repo/extract_objects.py +137 -0
  61. ai/chronon/repo/gcp.py +585 -0
  62. ai/chronon/repo/gitpython_utils.py +14 -0
  63. ai/chronon/repo/hub_runner.py +171 -0
  64. ai/chronon/repo/hub_uploader.py +108 -0
  65. ai/chronon/repo/init.py +53 -0
  66. ai/chronon/repo/join_backfill.py +105 -0
  67. ai/chronon/repo/run.py +293 -0
  68. ai/chronon/repo/serializer.py +141 -0
  69. ai/chronon/repo/team_json_utils.py +46 -0
  70. ai/chronon/repo/utils.py +472 -0
  71. ai/chronon/repo/zipline.py +51 -0
  72. ai/chronon/repo/zipline_hub.py +105 -0
  73. ai/chronon/resources/gcp/README.md +174 -0
  74. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  75. ai/chronon/resources/gcp/group_bys/test/data.py +34 -0
  76. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  77. ai/chronon/resources/gcp/joins/test/data.py +30 -0
  78. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  79. ai/chronon/resources/gcp/sources/test/data.py +23 -0
  80. ai/chronon/resources/gcp/teams.py +70 -0
  81. ai/chronon/resources/gcp/zipline-cli-install.sh +54 -0
  82. ai/chronon/source.py +88 -0
  83. ai/chronon/staging_query.py +185 -0
  84. ai/chronon/types.py +57 -0
  85. ai/chronon/utils.py +557 -0
  86. ai/chronon/windows.py +50 -0
  87. awx_zipline_ai-0.2.0.dist-info/METADATA +173 -0
  88. awx_zipline_ai-0.2.0.dist-info/RECORD +93 -0
  89. awx_zipline_ai-0.2.0.dist-info/WHEEL +5 -0
  90. awx_zipline_ai-0.2.0.dist-info/entry_points.txt +2 -0
  91. awx_zipline_ai-0.2.0.dist-info/licenses/LICENSE +202 -0
  92. awx_zipline_ai-0.2.0.dist-info/top_level.txt +3 -0
  93. jars/__init__.py +0 -0
@@ -0,0 +1,472 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import subprocess
5
+ import time
6
+ import xml.etree.ElementTree as ET
7
+ from datetime import datetime, timedelta
8
+ from enum import Enum
9
+
10
+ from ai.chronon.cli.compile.parse_teams import EnvOrConfigAttribute
11
+ from ai.chronon.logger import get_logger
12
+ from ai.chronon.repo.constants import (
13
+ APP_NAME_TEMPLATE,
14
+ SCALA_VERSION_FOR_SPARK,
15
+ SUPPORTED_SPARK,
16
+ )
17
+
18
+ LOG = get_logger()
19
+
20
+ class JobType(Enum):
21
+ SPARK = "spark"
22
+ FLINK = "flink"
23
+
24
+
25
+ def retry_decorator(retries=3, backoff=20):
26
+ def wrapper(func):
27
+ def wrapped(*args, **kwargs):
28
+ attempt = 0
29
+ while attempt <= retries:
30
+ try:
31
+ return func(*args, **kwargs)
32
+ except Exception as e:
33
+ attempt += 1
34
+ LOG.exception(e)
35
+ sleep_time = attempt * backoff
36
+ LOG.info(
37
+ "[{}] Retry: {} out of {}/ Sleeping for {}".format(
38
+ func.__name__, attempt, retries, sleep_time
39
+ )
40
+ )
41
+ time.sleep(sleep_time)
42
+ return func(*args, **kwargs)
43
+
44
+ return wrapped
45
+
46
+ return wrapper
47
+
48
+
49
+ def get_environ_arg(env_name, ignoreError=False) -> str:
50
+ value = os.environ.get(env_name)
51
+ if not value and not ignoreError:
52
+ raise ValueError(f"Please set {env_name} environment variable")
53
+ return value
54
+
55
+ def get_customer_warehouse_bucket() -> str:
56
+ return f"zipline-warehouse-{get_customer_id()}"
57
+
58
+ def get_customer_id() -> str:
59
+ return get_environ_arg("CUSTOMER_ID")
60
+
61
+
62
+ def extract_filename_from_path(path):
63
+ return path.split("/")[-1]
64
+
65
+
66
+ def check_call(cmd):
67
+ LOG.info("Running command: " + cmd)
68
+ return subprocess.check_call(cmd.split(), bufsize=0)
69
+
70
+
71
+ def check_output(cmd):
72
+ LOG.info("Running command: " + cmd)
73
+ return subprocess.check_output(cmd.split(), bufsize=0).strip()
74
+
75
+
76
+ def custom_json(conf):
77
+ """Extract the json stored in customJson for a conf."""
78
+ if conf.get("metaData", {}).get("customJson"):
79
+ return json.loads(conf["metaData"]["customJson"])
80
+ return {}
81
+
82
+
83
+ def download_only_once(url, path, skip_download=False):
84
+ if skip_download:
85
+ LOG.info("Skipping download of " + path)
86
+ return
87
+ should_download = True
88
+ path = path.strip()
89
+ if os.path.exists(path):
90
+ content_output = check_output("curl -sI " + url).decode("utf-8")
91
+ content_length = re.search("(content-length:\\s)(\\d+)", content_output.lower())
92
+ remote_size = int(content_length.group().split()[-1])
93
+ local_size = int(check_output("wc -c " + path).split()[0])
94
+ LOG.info(
95
+ """Files sizes of {url} vs. {path}
96
+ Remote size: {remote_size}
97
+ Local size : {local_size}""".format(
98
+ **locals()
99
+ )
100
+ )
101
+ if local_size == remote_size:
102
+ LOG.info("Sizes match. Assuming it's already downloaded.")
103
+ should_download = False
104
+ if should_download:
105
+ LOG.info(
106
+ "Different file from remote at local: " + path + ". Re-downloading.."
107
+ )
108
+ check_call("curl {} -o {} --connect-timeout 10".format(url, path))
109
+ else:
110
+ LOG.info("No file at: " + path + ". Downloading..")
111
+ check_call("curl {} -o {} --connect-timeout 10".format(url, path))
112
+
113
+
114
+ # NOTE: this is only for the open source chronon. For the internal zipline version, we have a different jar to download.
115
+ @retry_decorator(retries=3, backoff=50)
116
+ def download_jar(
117
+ version,
118
+ jar_type="uber",
119
+ release_tag=None,
120
+ spark_version="2.4.0",
121
+ skip_download=False,
122
+ ):
123
+ assert spark_version in SUPPORTED_SPARK, (
124
+ f"Received unsupported spark version {spark_version}. "
125
+ f"Supported spark versions are {SUPPORTED_SPARK}"
126
+ )
127
+ scala_version = SCALA_VERSION_FOR_SPARK[spark_version]
128
+ maven_url_prefix = os.environ.get("CHRONON_MAVEN_MIRROR_PREFIX", None)
129
+ default_url_prefix = (
130
+ "https://s01.oss.sonatype.org/service/local/repositories/public/content"
131
+ )
132
+ url_prefix = maven_url_prefix if maven_url_prefix else default_url_prefix
133
+ base_url = "{}/ai/chronon/spark_{}_{}".format(url_prefix, jar_type, scala_version)
134
+ LOG.info("Downloading jar from url: " + base_url)
135
+ jar_path = os.environ.get("CHRONON_DRIVER_JAR", None)
136
+ if jar_path is None:
137
+ if version == "latest":
138
+ version = None
139
+ if version is None:
140
+ metadata_content = check_output(
141
+ "curl -s {}/maven-metadata.xml".format(base_url)
142
+ )
143
+ meta_tree = ET.fromstring(metadata_content)
144
+ versions = [
145
+ node.text
146
+ for node in meta_tree.findall("./versioning/versions/")
147
+ if re.search(
148
+ r"^\d+\.\d+\.\d+{}$".format(
149
+ r"\_{}\d*".format(release_tag) if release_tag else ""
150
+ ),
151
+ node.text,
152
+ )
153
+ ]
154
+ version = versions[-1]
155
+ jar_url = "{base_url}/{version}/spark_{jar_type}_{scala_version}-{version}-assembly.jar".format(
156
+ base_url=base_url,
157
+ version=version,
158
+ scala_version=scala_version,
159
+ jar_type=jar_type,
160
+ )
161
+ jar_path = os.path.join("/tmp", extract_filename_from_path(jar_url))
162
+ download_only_once(jar_url, jar_path, skip_download)
163
+ return jar_path
164
+
165
+
166
+ def get_teams_json_file_path(repo_path):
167
+ return os.path.join(repo_path, "teams.json")
168
+
169
+
170
+ def get_teams_py_file_path(repo_path):
171
+ return os.path.join(repo_path, "teams.py")
172
+
173
+ def set_runtime_env_v3(params, conf):
174
+ effective_mode = params.get("mode")
175
+
176
+ runtime_env = {"APP_NAME": params.get("app_name")}
177
+
178
+ if params.get("repo") and conf and effective_mode:
179
+ # get the conf file
180
+ conf_path = os.path.join(params["repo"], conf)
181
+ if os.path.isfile(conf_path):
182
+ with open(conf_path, "r") as infile:
183
+ conf_json = json.load(infile)
184
+ metadata = conf_json.get("metaData", {}) or conf_json # user may just pass metadata as the entire json
185
+ env = metadata.get("executionInfo", {}).get("env", {})
186
+ runtime_env.update(env.get(EnvOrConfigAttribute.ENV,{}).get(effective_mode,{}) or env.get("common", {}))
187
+ # Also set APP_NAME
188
+ try:
189
+ _, conf_type, team, _ = conf.split("/")[-4:]
190
+ if not team:
191
+ team = "default"
192
+ # context is the environment in which the job is running, which is provided from the args,
193
+ # default to be dev.
194
+ if params["env"]:
195
+ context = params["env"]
196
+ else:
197
+ context = "dev"
198
+ LOG.info(f"Context: {context} -- conf_type: {conf_type} -- team: {team}")
199
+
200
+ runtime_env["APP_NAME"] = APP_NAME_TEMPLATE.format(
201
+ mode=effective_mode,
202
+ conf_type=conf_type,
203
+ context=context,
204
+ name=conf_json["metaData"]["name"],
205
+ )
206
+ except Exception:
207
+ LOG.warn(
208
+ "Failed to set APP_NAME due to invalid conf path: {}, please ensure to supply the "
209
+ "relative path to zipline/ folder".format(
210
+ conf
211
+ )
212
+ )
213
+ else:
214
+ if not params.get("app_name") and not os.environ.get("APP_NAME"):
215
+ # Provide basic app_name when no conf is defined.
216
+ # Modes like metadata-upload and metadata-export can rely on conf-type or folder rather than a conf.
217
+ runtime_env["APP_NAME"] = "_".join(
218
+ [
219
+ k
220
+ for k in [
221
+ "chronon",
222
+ effective_mode.replace("-", "_")
223
+ ]
224
+ if k is not None
225
+ ]
226
+ )
227
+ for key, value in runtime_env.items():
228
+ if key not in os.environ and value is not None:
229
+ LOG.info(f"Setting to environment: {key}={value}")
230
+ print(f"Setting to environment: {key}={value}")
231
+ os.environ[key] = value
232
+
233
+ # TODO: delete this when we cutover
234
+ def set_runtime_env(params):
235
+ """
236
+ Setting the runtime environment variables.
237
+ These are extracted from the common env, the team env and the common env.
238
+ In order to use the environment variables defined in the configs as overrides for the args in the cli this method
239
+ needs to be run before the runner and jar downloads.
240
+
241
+ The order of priority is:
242
+ - Environment variables existing already.
243
+ - Environment variables derived from args (like app_name)
244
+ - conf.metaData.modeToEnvMap for the mode (set on config)
245
+ - team's dev environment for each mode set on teams.json
246
+ - team's prod environment for each mode set on teams.json
247
+ - default team environment per context and mode set on teams.json
248
+ - Common Environment set in teams.json
249
+ """
250
+
251
+ environment = {
252
+ "common_env": {},
253
+ "conf_env": {},
254
+ "default_env": {},
255
+ "team_env": {},
256
+ "production_team_env": {},
257
+ "cli_args": {},
258
+ }
259
+
260
+ conf_type = None
261
+ # Normalize modes that are effectively replacement of each other (streaming/local-streaming/streaming-client)
262
+ effective_mode = params["mode"]
263
+ if effective_mode and "streaming" in effective_mode:
264
+ effective_mode = "streaming"
265
+ if params["repo"]:
266
+
267
+ # Break if teams.json and teams.py exists
268
+ teams_json_file = get_teams_json_file_path(params["repo"])
269
+ teams_py_file = get_teams_py_file_path(params["repo"])
270
+
271
+ if os.path.exists(teams_json_file) and os.path.exists(teams_py_file):
272
+ raise ValueError(
273
+ "Both teams.json and teams.py exist. Please only use teams.py."
274
+ )
275
+
276
+ if os.path.exists(teams_json_file):
277
+ set_runtime_env_teams_json(
278
+ environment, params, effective_mode, teams_json_file
279
+ )
280
+ if params["app_name"]:
281
+ environment["cli_args"]["APP_NAME"] = params["app_name"]
282
+ else:
283
+ if not params["app_name"] and not environment["cli_args"].get("APP_NAME"):
284
+ # Provide basic app_name when no conf is defined.
285
+ # Modes like metadata-upload and metadata-export can rely on conf-type or folder rather than a conf.
286
+ environment["cli_args"]["APP_NAME"] = "_".join(
287
+ [
288
+ k
289
+ for k in [
290
+ "chronon",
291
+ conf_type,
292
+ (
293
+ params["mode"].replace("-", "_")
294
+ if params["mode"]
295
+ else None
296
+ ),
297
+ ]
298
+ if k is not None
299
+ ]
300
+ )
301
+
302
+ # Adding these to make sure they are printed if provided by the environment.
303
+ environment["cli_args"]["CHRONON_DRIVER_JAR"] = params["chronon_jar"]
304
+ environment["cli_args"]["CHRONON_ONLINE_JAR"] = params["online_jar"]
305
+ environment["cli_args"]["CHRONON_ONLINE_CLASS"] = params["online_class"]
306
+ order = [
307
+ "conf_env",
308
+ "team_env", # todo: team_env maybe should be below default/common_env
309
+ "production_team_env",
310
+ "default_env",
311
+ "common_env",
312
+ "cli_args",
313
+ ]
314
+ LOG.info("Setting env variables:")
315
+ for key in os.environ:
316
+ if any([key in (environment.get(set_key, {}) or {}) for set_key in order]):
317
+ LOG.info(f"From <environment> found {key}={os.environ[key]}")
318
+ for set_key in order:
319
+ for key, value in (environment.get(set_key, {}) or {}).items():
320
+ if key not in os.environ and value is not None:
321
+ LOG.info(f"From <{set_key}> setting {key}={value}")
322
+ os.environ[key] = value
323
+
324
+ # TODO: delete this when we cutover
325
+ def set_runtime_env_teams_json(environment, params, effective_mode, teams_json_file):
326
+ if os.path.exists(teams_json_file):
327
+ with open(teams_json_file, "r") as infile:
328
+ teams_json = json.load(infile)
329
+ # we should have a fallback if user wants to set to something else `default`
330
+ environment["common_env"] = teams_json.get("default", {}).get("common_env", {})
331
+ if params["conf"] and effective_mode:
332
+ try:
333
+ _, conf_type, team, _ = params["conf"].split("/")[-4:]
334
+ except Exception as e:
335
+ LOG.error(
336
+ "Invalid conf path: {}, please ensure to supply the relative path to zipline/ folder".format(
337
+ params["conf"]
338
+ )
339
+ )
340
+ raise e
341
+ if not team:
342
+ team = "default"
343
+ # context is the environment in which the job is running, which is provided from the args,
344
+ # default to be dev.
345
+ if params["env"]:
346
+ context = params["env"]
347
+ else:
348
+ context = "dev"
349
+ LOG.info(
350
+ f"Context: {context} -- conf_type: {conf_type} -- team: {team}"
351
+ )
352
+ conf_path = os.path.join(params["repo"], params["conf"])
353
+ if os.path.isfile(conf_path):
354
+ with open(conf_path, "r") as conf_file:
355
+ conf_json = json.load(conf_file)
356
+
357
+ new_env = (
358
+ conf_json.get("metaData")
359
+ .get("executionInfo", {})
360
+ .get("env", {})
361
+ .get(effective_mode, {})
362
+ )
363
+
364
+ old_env = (
365
+ conf_json.get("metaData")
366
+ .get("modeToEnvMap", {})
367
+ .get(effective_mode, {})
368
+ )
369
+
370
+ environment["conf_env"] = new_env if new_env else old_env
371
+
372
+ # Load additional args used on backfill.
373
+ if custom_json(conf_json) and effective_mode in [
374
+ "backfill",
375
+ "backfill-left",
376
+ "backfill-final",
377
+ ]:
378
+ environment["conf_env"]["CHRONON_CONFIG_ADDITIONAL_ARGS"] = (
379
+ " ".join(custom_json(conf_json).get("additional_args", []))
380
+ )
381
+ environment["cli_args"]["APP_NAME"] = APP_NAME_TEMPLATE.format(
382
+ mode=effective_mode,
383
+ conf_type=conf_type,
384
+ context=context,
385
+ name=conf_json["metaData"]["name"],
386
+ )
387
+ environment["team_env"] = (
388
+ teams_json[team].get(context, {}).get(effective_mode, {})
389
+ )
390
+ # fall-back to prod env even in dev mode when dev env is undefined.
391
+ environment["production_team_env"] = (
392
+ teams_json[team].get("production", {}).get(effective_mode, {})
393
+ )
394
+ # By default use production env.
395
+ environment["default_env"] = (
396
+ teams_json.get("default", {})
397
+ .get("production", {})
398
+ .get(effective_mode, {})
399
+ )
400
+ environment["cli_args"]["CHRONON_CONF_PATH"] = conf_path
401
+ if params["app_name"]:
402
+ environment["cli_args"]["APP_NAME"] = params["app_name"]
403
+ else:
404
+ if not params["app_name"] and not environment["cli_args"].get("APP_NAME"):
405
+ # Provide basic app_name when no conf is defined.
406
+ # Modes like metadata-upload and metadata-export can rely on conf-type or folder rather than a conf.
407
+ environment["cli_args"]["APP_NAME"] = "_".join(
408
+ [
409
+ k
410
+ for k in [
411
+ "chronon",
412
+ conf_type,
413
+ params["mode"].replace("-", "_") if params["mode"] else None,
414
+ ]
415
+ if k is not None
416
+ ]
417
+ )
418
+
419
+ # Adding these to make sure they are printed if provided by the environment.
420
+ environment["cli_args"]["CHRONON_DRIVER_JAR"] = params["chronon_jar"]
421
+ environment["cli_args"]["CHRONON_ONLINE_JAR"] = params["online_jar"]
422
+ environment["cli_args"]["CHRONON_ONLINE_CLASS"] = params["online_class"]
423
+ order = [
424
+ "conf_env",
425
+ "team_env",
426
+ "production_team_env",
427
+ "default_env",
428
+ "common_env",
429
+ "cli_args",
430
+ ]
431
+ LOG.info("Setting env variables:")
432
+ for key in os.environ:
433
+ if any([key in environment[set_key] for set_key in order]):
434
+ LOG.info(f"From <environment> found {key}={os.environ[key]}")
435
+ for set_key in order:
436
+ for key, value in environment[set_key].items():
437
+ if key not in os.environ and value is not None:
438
+ LOG.info(f"From <{set_key}> setting {key}={value}")
439
+ os.environ[key] = value
440
+
441
+
442
+ def split_date_range(start_date, end_date, parallelism):
443
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
444
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
445
+ if start_date > end_date:
446
+ raise ValueError("Start date should be earlier than end date")
447
+ total_days = (
448
+ end_date - start_date
449
+ ).days + 1 # +1 to include the end_date in the range
450
+
451
+ # Check if parallelism is greater than total_days
452
+ if parallelism > total_days:
453
+ raise ValueError("Parallelism should be less than or equal to total days")
454
+
455
+ split_size = total_days // parallelism
456
+ date_ranges = []
457
+
458
+ for i in range(parallelism):
459
+ split_start = start_date + timedelta(days=i * split_size)
460
+ if i == parallelism - 1:
461
+ split_end = end_date
462
+ else:
463
+ split_end = split_start + timedelta(days=split_size - 1)
464
+ date_ranges.append(
465
+ (split_start.strftime("%Y-%m-%d"), split_end.strftime("%Y-%m-%d"))
466
+ )
467
+ return date_ranges
468
+
469
+ def get_metadata_name_from_conf(repo_path, conf_path):
470
+ with open(os.path.join(repo_path, conf_path), "r") as conf_file:
471
+ data = json.load(conf_file)
472
+ return data.get("metaData", {}).get("name", None)
@@ -0,0 +1,51 @@
1
+ from importlib.metadata import PackageNotFoundError
2
+ from importlib.metadata import version as ver
3
+
4
+ import click
5
+
6
+ from ai.chronon.cli.compile.display.console import console
7
+ from ai.chronon.repo.compile import compile
8
+ from ai.chronon.repo.hub_runner import hub
9
+ from ai.chronon.repo.init import main as init_main
10
+ from ai.chronon.repo.run import main as run_main
11
+
12
+ LOGO = """
13
+ =%%%@:-%%%@=:%%%@+ .%@%@@@@@@%%%%%%: .+#%*. -%%%= -#%#-
14
+ :@@@@#.@@@@%.%@@@@. .@@@@@@@@@@@@@@- -@@@@= =@@@+ @@@@@
15
+ :@@@@*.%@@@#.#@@@%. .#@@@@: :==: =@@@+ -=- :
16
+ =@@@@=-@@@@+:%@@@#. #@@@%. :--: .%%=:+#%@@@%#+- =@@@+ .-:-. *%= #%%* :=#%@@@@#*-
17
+ .#@@@#-+@@@%-=@@@@- .%@@@%. @@@@ .@@@@@@@@%%@@@@%= =@@@+ +@@@= *@@@+. %@@% :#@@@@%%%@@@@@=
18
+ +**+=-%@@@+-#@@@*----=. :@@@@# %@@@ .@@@@%=. .-#@@@* =@@@+ +@@@= *@@@@@*: %@@% -@@@%- .+@@@*
19
+ +@@@%-+@@@%-=@@@@+ :@@@@* @@@@ .@@@@. #@@@: =@@@+ +@@@= *@@@%@@@*: %@@% %@@@#++****+*@@@@-
20
+ -@@@@+:#@@@*:#@@@#. -@@@@* @@@@ .@@@@ *@@@- =@@@+ +@@@= *@@@.-%@@@#-%@@% @@@@****#****++++:
21
+ =@@@@--@@@@=:@@@@* =@@@@+ @@@@ .@@@@#. .+@@@% =@@@+ +@@@= *@@@ -#@@@@@@% =@@@*.
22
+ +@@@@--@@@@=:@@@@* +@@@@@#########+ @@@@ .@@@@@@%*+*#@@@@* =@@@+ +@@@= *@@@. :#@@@@% =@@@@% -==+-
23
+ :@@@@* @@@@# @@@@% *@@@@@@@@@@@@@@@% @@@@ .@@@@#@@@@@@@%+: =@@@+ +@@@= *@@@. :*@@% .=#@@@@@@@%*:
24
+ .@@@%
25
+ .@@@%
26
+ .@@@@
27
+ ---:
28
+ """
29
+
30
+
31
+ def _set_package_version():
32
+ try:
33
+ package_version = ver("zipline-ai")
34
+ except PackageNotFoundError:
35
+ console.print("No package found. Continuing with the latest version.")
36
+ package_version = "latest"
37
+ return package_version
38
+
39
+
40
+ @click.group(help="The Zipline CLI. A tool for authoring and running Zipline pipelines in the cloud. For more information, see: https://chronon.ai/")
41
+ @click.version_option(version=_set_package_version())
42
+ @click.pass_context
43
+ def zipline(ctx):
44
+ ctx.ensure_object(dict)
45
+ ctx.obj["version"] = _set_package_version()
46
+
47
+
48
+ zipline.add_command(compile)
49
+ zipline.add_command(run_main)
50
+ zipline.add_command(init_main)
51
+ zipline.add_command(hub)
@@ -0,0 +1,105 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+ import google.auth
5
+ import requests
6
+ from google.auth.transport.requests import Request
7
+
8
+
9
+ class ZiplineHub:
10
+ def __init__(self, base_url):
11
+ if not base_url:
12
+ raise ValueError("Base URL for ZiplineHub cannot be empty.")
13
+ self.base_url = base_url
14
+ if self.base_url.startswith("https") and self.base_url.endswith(".app"):
15
+ print("\n 🔐 Using Google Cloud authentication for ZiplineHub.")
16
+
17
+ # First try to get ID token from environment (GitHub Actions)
18
+ self.id_token = os.getenv('GCP_ID_TOKEN')
19
+ if self.id_token:
20
+ print(" 🔑 Using ID token from environment")
21
+ else:
22
+ # Fallback to Google Cloud authentication
23
+ print(" 🔑 Generating ID token from default credentials")
24
+ credentials, project_id = google.auth.default()
25
+ credentials.refresh(Request())
26
+ self.id_token = credentials.id_token
27
+
28
+ def call_diff_api(self, names_to_hashes: dict[str, str]) -> Optional[list[str]]:
29
+ url = f"{self.base_url}/upload/v1/diff"
30
+
31
+ diff_request = {
32
+ 'namesToHashes': names_to_hashes
33
+ }
34
+ headers = {'Content-Type': 'application/json'}
35
+ if hasattr(self, 'id_token'):
36
+ headers['Authorization'] = f'Bearer {self.id_token}'
37
+ try:
38
+ response = requests.post(url, json=diff_request, headers=headers)
39
+ response.raise_for_status()
40
+ diff_response = response.json()
41
+ return diff_response['diff']
42
+ except requests.RequestException as e:
43
+ print(f" ❌ Error calling diff API: {e}")
44
+ raise e
45
+
46
+ def call_upload_api(self, diff_confs, branch: str):
47
+ url = f"{self.base_url}/upload/v1/confs"
48
+
49
+ upload_request = {
50
+ 'diffConfs': diff_confs,
51
+ 'branch': branch,
52
+ }
53
+ headers = {'Content-Type': 'application/json'}
54
+ if hasattr(self, 'id_token'):
55
+ headers['Authorization'] = f'Bearer {self.id_token}'
56
+
57
+ try:
58
+ response = requests.post(url, json=upload_request, headers=headers)
59
+ response.raise_for_status()
60
+ return response.json()
61
+ except requests.RequestException as e:
62
+ print(f" ❌ Error calling upload API: {e}")
63
+ raise e
64
+
65
+ def call_sync_api(self, branch: str, names_to_hashes: dict[str, str]) -> Optional[list[str]]:
66
+ url = f"{self.base_url}/upload/v1/sync"
67
+
68
+ sync_request = {
69
+ "namesToHashes": names_to_hashes,
70
+ "branch": branch,
71
+ }
72
+ headers = {'Content-Type': 'application/json'}
73
+ if hasattr(self, 'id_token'):
74
+ headers['Authorization'] = f'Bearer {self.id_token}'
75
+ try:
76
+ response = requests.post(url, json=sync_request, headers=headers)
77
+ response.raise_for_status()
78
+ return response.json()
79
+ except requests.RequestException as e:
80
+ print(f" ❌ Error calling diff API: {e}")
81
+ raise e
82
+
83
+ def call_workflow_start_api(self, conf_name, mode, branch, user, start, end, conf_hash):
84
+ url = f"{self.base_url}/workflow/start"
85
+
86
+ workflow_request = {
87
+ 'confName': conf_name,
88
+ 'confHash': conf_hash,
89
+ 'mode': mode,
90
+ 'branch': branch,
91
+ 'user': user,
92
+ 'start': start,
93
+ 'end': end,
94
+ }
95
+ headers = {'Content-Type': 'application/json'}
96
+ if hasattr(self, 'id_token'):
97
+ headers['Authorization'] = f'Bearer {self.id_token}'
98
+
99
+ try:
100
+ response = requests.post(url, json=workflow_request, headers=headers)
101
+ response.raise_for_status()
102
+ return response.json()
103
+ except requests.RequestException as e:
104
+ print(f" ❌ Error calling workflow start API: {e}")
105
+ raise e