awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of awx-zipline-ai might be problematic. Click here for more details.

Files changed (96) hide show
  1. agent/ttypes.py +6 -6
  2. ai/chronon/airflow_helpers.py +20 -23
  3. ai/chronon/cli/__init__.py +0 -0
  4. ai/chronon/cli/compile/__init__.py +0 -0
  5. ai/chronon/cli/compile/column_hashing.py +40 -17
  6. ai/chronon/cli/compile/compile_context.py +13 -17
  7. ai/chronon/cli/compile/compiler.py +59 -36
  8. ai/chronon/cli/compile/conf_validator.py +251 -99
  9. ai/chronon/cli/compile/display/__init__.py +0 -0
  10. ai/chronon/cli/compile/display/class_tracker.py +6 -16
  11. ai/chronon/cli/compile/display/compile_status.py +10 -10
  12. ai/chronon/cli/compile/display/diff_result.py +79 -14
  13. ai/chronon/cli/compile/fill_templates.py +3 -8
  14. ai/chronon/cli/compile/parse_configs.py +10 -17
  15. ai/chronon/cli/compile/parse_teams.py +38 -34
  16. ai/chronon/cli/compile/serializer.py +3 -9
  17. ai/chronon/cli/compile/version_utils.py +42 -0
  18. ai/chronon/cli/git_utils.py +2 -13
  19. ai/chronon/cli/logger.py +0 -2
  20. ai/chronon/constants.py +1 -1
  21. ai/chronon/group_by.py +47 -47
  22. ai/chronon/join.py +46 -32
  23. ai/chronon/logger.py +1 -2
  24. ai/chronon/model.py +9 -4
  25. ai/chronon/query.py +2 -2
  26. ai/chronon/repo/__init__.py +1 -2
  27. ai/chronon/repo/aws.py +17 -31
  28. ai/chronon/repo/cluster.py +121 -50
  29. ai/chronon/repo/compile.py +14 -8
  30. ai/chronon/repo/constants.py +1 -1
  31. ai/chronon/repo/default_runner.py +32 -54
  32. ai/chronon/repo/explore.py +70 -73
  33. ai/chronon/repo/extract_objects.py +6 -9
  34. ai/chronon/repo/gcp.py +89 -88
  35. ai/chronon/repo/gitpython_utils.py +3 -2
  36. ai/chronon/repo/hub_runner.py +145 -55
  37. ai/chronon/repo/hub_uploader.py +2 -1
  38. ai/chronon/repo/init.py +12 -5
  39. ai/chronon/repo/join_backfill.py +19 -5
  40. ai/chronon/repo/run.py +42 -39
  41. ai/chronon/repo/serializer.py +4 -12
  42. ai/chronon/repo/utils.py +72 -63
  43. ai/chronon/repo/zipline.py +3 -19
  44. ai/chronon/repo/zipline_hub.py +211 -39
  45. ai/chronon/resources/__init__.py +0 -0
  46. ai/chronon/resources/gcp/__init__.py +0 -0
  47. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  48. ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
  49. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  50. ai/chronon/resources/gcp/joins/test/data.py +4 -8
  51. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  52. ai/chronon/resources/gcp/sources/test/data.py +9 -6
  53. ai/chronon/resources/gcp/teams.py +9 -21
  54. ai/chronon/source.py +2 -4
  55. ai/chronon/staging_query.py +60 -19
  56. ai/chronon/types.py +3 -2
  57. ai/chronon/utils.py +21 -68
  58. ai/chronon/windows.py +2 -4
  59. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/METADATA +47 -24
  60. awx_zipline_ai-0.3.0.dist-info/RECORD +96 -0
  61. awx_zipline_ai-0.3.0.dist-info/top_level.txt +4 -0
  62. gen_thrift/__init__.py +0 -0
  63. {ai/chronon → gen_thrift}/api/ttypes.py +327 -197
  64. {ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
  65. gen_thrift/eval/ttypes.py +660 -0
  66. {ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
  67. {ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
  68. {ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
  69. ai/chronon/eval/__init__.py +0 -122
  70. ai/chronon/eval/query_parsing.py +0 -19
  71. ai/chronon/eval/sample_tables.py +0 -100
  72. ai/chronon/eval/table_scan.py +0 -186
  73. ai/chronon/orchestration/ttypes.py +0 -4406
  74. ai/chronon/resources/gcp/README.md +0 -174
  75. ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
  76. awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
  77. awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
  78. awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
  79. /jars/__init__.py → /__init__.py +0 -0
  80. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/WHEEL +0 -0
  81. {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/entry_points.txt +0 -0
  82. {ai/chronon → gen_thrift}/api/__init__.py +0 -0
  83. {ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
  84. {ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
  85. {ai/chronon/api → gen_thrift/common}/constants.py +0 -0
  86. {ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
  87. {ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
  88. {ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
  89. {ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
  90. {ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
  91. {ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
  92. {ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
  93. {ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
  94. {ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
  95. {ai/chronon → gen_thrift}/planner/__init__.py +0 -0
  96. {ai/chronon → gen_thrift}/planner/constants.py +0 -0
@@ -33,21 +33,11 @@ GB_INDEX_SPEC = {
33
33
  ],
34
34
  "_event_tables": ["sources[].events.table"],
35
35
  "_event_topics": ["sources[].events.topic"],
36
- "aggregation": [
37
- "aggregations[].inputColumn"
38
- ],
39
- "keys": [
40
- "keyColumns"
41
- ],
42
- "name": [
43
- "metaData.name"
44
- ],
45
- "online": [
46
- "metaData.online"
47
- ],
48
- "output_namespace": [
49
- "metaData.outputNamespace"
50
- ],
36
+ "aggregation": ["aggregations[].inputColumn"],
37
+ "keys": ["keyColumns"],
38
+ "name": ["metaData.name"],
39
+ "online": ["metaData.online"],
40
+ "output_namespace": ["metaData.outputNamespace"],
51
41
  }
52
42
 
53
43
  JOIN_INDEX_SPEC = {
@@ -60,38 +50,29 @@ JOIN_INDEX_SPEC = {
60
50
  "joinParts[].groupBy.metaData.name",
61
51
  "rightParts[].groupBy.name",
62
52
  ],
63
- "name": [
64
- "metaData.name"
65
- ],
66
- "output_namespace": [
67
- "metaData.outputNamespace"
68
- ],
69
- "_group_bys": [
70
- "joinParts[].groupBy",
71
- "rightParts[].groupBy"
72
- ]
53
+ "name": ["metaData.name"],
54
+ "output_namespace": ["metaData.outputNamespace"],
55
+ "_group_bys": ["joinParts[].groupBy", "rightParts[].groupBy"],
73
56
  }
74
57
 
75
- DEFAULTS_SPEC = {
76
- 'outputNamespace': "namespace"
77
- }
58
+ DEFAULTS_SPEC = {"outputNamespace": "namespace"}
78
59
 
79
60
  GB_REL_PATH = "production/group_bys"
80
61
  JOIN_REL_PATH = "production/joins"
81
62
  FILTER_COLUMNS = ["aggregation", "keys", "name", "sources", "joins"]
82
- PATH_FIELDS = ['file', 'json_file']
63
+ PATH_FIELDS = ["file", "json_file"]
83
64
  # colors chosen to be visible clearly on BOTH black and white terminals
84
65
  # change with caution
85
- NORMAL = '\033[0m'
86
- BOLD = '\033[1m'
87
- ITALIC = '\033[3m'
88
- UNDERLINE = '\033[4m'
89
- RED = '\033[38;5;160m'
90
- GREEN = '\033[38;5;28m'
91
- ORANGE = '\033[38;5;130m'
92
- BLUE = '\033[38;5;27m'
93
- GREY = '\033[38;5;246m'
94
- HIGHLIGHT = BOLD+ITALIC+RED
66
+ NORMAL = "\033[0m"
67
+ BOLD = "\033[1m"
68
+ ITALIC = "\033[3m"
69
+ UNDERLINE = "\033[4m"
70
+ RED = "\033[38;5;160m"
71
+ GREEN = "\033[38;5;28m"
72
+ ORANGE = "\033[38;5;130m"
73
+ BLUE = "\033[38;5;27m"
74
+ GREY = "\033[38;5;246m"
75
+ HIGHLIGHT = BOLD + ITALIC + RED
95
76
 
96
77
 
97
78
  # walks the json nodes recursively collecting all values that match the path
@@ -176,7 +157,7 @@ git_info_cache = {}
176
157
 
177
158
  # git_info is the most expensive part of the entire script - so we will have to parallelize
178
159
  def git_info(file_paths, exclude=None, root=CWD):
179
- exclude_args = f"--invert-grep --grep={exclude}" if exclude else ''
160
+ exclude_args = f"--invert-grep --grep={exclude}" if exclude else ""
180
161
  procs = []
181
162
  with chdir(root):
182
163
  for file_path in file_paths:
@@ -185,8 +166,11 @@ def git_info(file_paths, exclude=None, root=CWD):
185
166
  else:
186
167
  args = (
187
168
  f"echo $(git log -n 2 --pretty='format:{BLUE} %as/%an/%ae' {exclude_args} -- "
188
- f"{file_path.replace(root, '')})")
189
- procs.append((file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True)))
169
+ f"{file_path.replace(root, '')})"
170
+ )
171
+ procs.append(
172
+ (file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True))
173
+ )
190
174
 
191
175
  result = {}
192
176
  for file_path, proc in procs:
@@ -229,7 +213,7 @@ def highlight(text, word):
229
213
  for idx in find_string(text, word):
230
214
  result = result + text[prev_idx:idx] + HIGHLIGHT + word + NORMAL
231
215
  prev_idx = idx + len(word)
232
- result += text[prev_idx:len(text)]
216
+ result += text[prev_idx : len(text)]
233
217
  return result
234
218
 
235
219
 
@@ -237,13 +221,13 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
237
221
  lines = []
238
222
  if trim_paths:
239
223
  for field in filter(lambda x: x in entry, PATH_FIELDS):
240
- entry[field] = entry[field].replace(root, '')
224
+ entry[field] = entry[field].replace(root, "")
241
225
  for column, values in entry.items():
242
- name = " "*(15 - len(column)) + column
226
+ name = " " * (15 - len(column)) + column
243
227
  if column in FILTER_COLUMNS and len(values) > show:
244
228
  values = [value for value in set(values) if target in value]
245
- if (len(values) > show):
246
- truncated = ', '.join(values[:show])
229
+ if len(values) > show:
230
+ truncated = ", ".join(values[:show])
247
231
  remaining = len(values) - show
248
232
  values = f"[{truncated} ... {GREY}{UNDERLINE}{remaining} more{NORMAL}]"
249
233
  if column == "file":
@@ -257,12 +241,15 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
257
241
 
258
242
  def find_in_index(index_table, target):
259
243
  def valid_entry(entry):
260
- return any([
261
- target in value
262
- for column, values in entry.items()
263
- if column in FILTER_COLUMNS
264
- for value in values
265
- ])
244
+ return any(
245
+ [
246
+ target in value
247
+ for column, values in entry.items()
248
+ if column in FILTER_COLUMNS
249
+ for value in values
250
+ ]
251
+ )
252
+
266
253
  return find_in_index_pred(index_table, valid_entry)
267
254
 
268
255
 
@@ -278,7 +265,7 @@ def display_entries(entries, target, root=CWD, trim_paths=False):
278
265
  pretty = prettify_entry(entry, target, info, root=root, trim_paths=trim_paths)
279
266
  display.append((info, pretty))
280
267
 
281
- for (_, pretty_entry) in sorted(display):
268
+ for _, pretty_entry in sorted(display):
282
269
  print(pretty_entry)
283
270
 
284
271
 
@@ -340,7 +327,9 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
340
327
  consumers = set()
341
328
  for join in entry["joins"]:
342
329
  conf_file_path = conf_file("joins", join)
343
- consumer_name, consumer_email = author_name_email(conf_file_path, exclude_commit_message)
330
+ consumer_name, consumer_email = author_name_email(
331
+ conf_file_path, exclude_commit_message
332
+ )
344
333
  consumers.add(consumer_name)
345
334
  emails.add(consumer_email)
346
335
  row = [
@@ -349,58 +338,64 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
349
338
  is_online,
350
339
  entry["_event_tables"][0],
351
340
  joins,
352
- ", ".join(consumers)
341
+ ", ".join(consumers),
353
342
  ]
354
343
  result.append(row)
355
344
  return found
356
345
 
357
346
  find_in_index_pred(gb_index, is_events_without_topics)
358
347
  if output_file:
359
- with open(os.path.expanduser(output_file), 'w') as tsv_file:
348
+ with open(os.path.expanduser(output_file), "w") as tsv_file:
360
349
  for row in result:
361
- tsv_file.write('\t'.join(map(str, row))+'\n')
362
- print("wrote information about cases where events us used " +
363
- f"without topics set into file {os.path.expanduser(output_file)}")
350
+ tsv_file.write("\t".join(map(str, row)) + "\n")
351
+ print(
352
+ "wrote information about cases where events us used "
353
+ + f"without topics set into file {os.path.expanduser(output_file)}"
354
+ )
364
355
  else:
365
356
  for row in result:
366
- print('\t'.join(map(str, row))+'\n')
357
+ print("\t".join(map(str, row)) + "\n")
367
358
  print(",".join(list(emails)))
368
359
 
369
360
 
370
- def load_team_data(path='', teams_root=None):
361
+ def load_team_data(path="", teams_root=None):
371
362
  # Check if path is teams.json or teams.py
372
- if 'teams.json' in path:
373
- with open(path, 'r') as infile:
363
+ if "teams.json" in path:
364
+ with open(path, "r") as infile:
374
365
  teams = json.load(infile)
375
- base_defaults = teams.get('default', {})
366
+ base_defaults = teams.get("default", {})
376
367
  full_info = teams.copy()
377
368
  for team, values in teams.items():
378
369
  full_info[team] = dict(base_defaults, **values)
379
370
  return full_info
380
371
  else:
381
372
  from ai.chronon.cli.compile import parse_teams
373
+
382
374
  assert teams_root is not None, "Need root to load teams.py"
383
375
  teams_py = parse_teams.load_teams(teams_root)
384
376
  return teams_py
385
377
 
386
378
 
387
379
  # register all handlers here
388
- handlers = {
389
- "_events_without_topics": events_without_topics
390
- }
380
+ handlers = {"_events_without_topics": events_without_topics}
391
381
 
392
382
  if __name__ == "__main__":
393
383
  parser = argparse.ArgumentParser(description="Explore tool for chronon")
394
384
  parser.add_argument("keyword", help="Keyword to look up keys")
395
385
  parser.add_argument("--conf-root", help="Conf root for the configs", default=CWD)
396
386
  parser.add_argument(
397
- "--handler-args", nargs="*", help="Special arguments for handler keywords of the form param=value")
387
+ "--handler-args",
388
+ nargs="*",
389
+ help="Special arguments for handler keywords of the form param=value",
390
+ )
398
391
  args = parser.parse_args()
399
392
  root = args.conf_root
400
393
  if not (root.endswith("chronon") or root.endswith("zipline")):
401
- print("This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
402
- + root)
403
- teams = load_team_data(os.path.join(root, 'teams.json'), teams_root=root)
394
+ print(
395
+ "This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
396
+ + root
397
+ )
398
+ teams = load_team_data(os.path.join(root, "teams.json"), teams_root=root)
404
399
  gb_index = build_index("group_bys", GB_INDEX_SPEC, root=root, teams=teams)
405
400
  join_index = build_index("joins", JOIN_INDEX_SPEC, root=root, teams=teams)
406
401
  enrich_with_joins(gb_index, join_index, root=root, teams=teams)
@@ -412,7 +407,9 @@ if __name__ == "__main__":
412
407
  handler_args = {}
413
408
  for arg in args.handler_args:
414
409
  splits = arg.split("=", 1)
415
- assert len(splits) == 2, f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
410
+ assert len(splits) == 2, (
411
+ f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
412
+ )
416
413
  key, value = splits
417
414
  handler_args[key] = value
418
415
  handler(**handler_args)
@@ -76,18 +76,17 @@ def import_module_set_name(module, cls):
76
76
  # obj.metaData.name=user.avg_session_length.v1__1
77
77
  # obj.metaData.team=user
78
78
  base_name = module.__name__.partition(".")[2] + "." + name
79
-
79
+
80
80
  # Add version suffix if version is set
81
- if hasattr(obj.metaData, 'version') and obj.metaData.version is not None:
81
+ if hasattr(obj.metaData, "version") and obj.metaData.version is not None:
82
82
  base_name = base_name + "__" + str(obj.metaData.version)
83
-
83
+
84
84
  obj.metaData.name = base_name
85
85
  obj.metaData.team = module.__name__.split(".")[1]
86
86
  return module
87
87
 
88
88
 
89
89
  def from_file(file_path: str, cls: type, log_level=logging.INFO):
90
-
91
90
  logger = get_logger(log_level)
92
91
  logger.debug("Loading objects of type {cls} from {file_path}".format(**locals()))
93
92
 
@@ -110,15 +109,14 @@ def from_file(file_path: str, cls: type, log_level=logging.INFO):
110
109
 
111
110
 
112
111
  def chronon_path(file_path: str) -> str:
113
-
114
112
  conf_types = FOLDER_NAME_TO_CLASS.keys()
115
113
 
116
114
  splits = file_path.split("/")
117
115
  conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
118
116
 
119
- assert (
120
- len(conf_occurences) > 0
121
- ), f"Path: {file_path} doesn't contain folder with name among {conf_types}"
117
+ assert len(conf_occurences) > 0, (
118
+ f"Path: {file_path} doesn't contain folder with name among {conf_types}"
119
+ )
122
120
 
123
121
  index = min([splits.index(typ) for typ in conf_types if typ in splits])
124
122
  rel_path = "/".join(splits[index:])
@@ -127,7 +125,6 @@ def chronon_path(file_path: str) -> str:
127
125
 
128
126
 
129
127
  def module_path(file_path: str) -> str:
130
-
131
128
  adjusted_path = chronon_path(file_path)
132
129
  assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"
133
130