awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/ttypes.py +6 -6
- ai/chronon/airflow_helpers.py +20 -23
- ai/chronon/cli/__init__.py +0 -0
- ai/chronon/cli/compile/__init__.py +0 -0
- ai/chronon/cli/compile/column_hashing.py +40 -17
- ai/chronon/cli/compile/compile_context.py +13 -17
- ai/chronon/cli/compile/compiler.py +59 -36
- ai/chronon/cli/compile/conf_validator.py +251 -99
- ai/chronon/cli/compile/display/__init__.py +0 -0
- ai/chronon/cli/compile/display/class_tracker.py +6 -16
- ai/chronon/cli/compile/display/compile_status.py +10 -10
- ai/chronon/cli/compile/display/diff_result.py +79 -14
- ai/chronon/cli/compile/fill_templates.py +3 -8
- ai/chronon/cli/compile/parse_configs.py +10 -17
- ai/chronon/cli/compile/parse_teams.py +38 -34
- ai/chronon/cli/compile/serializer.py +3 -9
- ai/chronon/cli/compile/version_utils.py +42 -0
- ai/chronon/cli/git_utils.py +2 -13
- ai/chronon/cli/logger.py +0 -2
- ai/chronon/constants.py +1 -1
- ai/chronon/group_by.py +47 -47
- ai/chronon/join.py +46 -32
- ai/chronon/logger.py +1 -2
- ai/chronon/model.py +9 -4
- ai/chronon/query.py +2 -2
- ai/chronon/repo/__init__.py +1 -2
- ai/chronon/repo/aws.py +17 -31
- ai/chronon/repo/cluster.py +121 -50
- ai/chronon/repo/compile.py +14 -8
- ai/chronon/repo/constants.py +1 -1
- ai/chronon/repo/default_runner.py +32 -54
- ai/chronon/repo/explore.py +70 -73
- ai/chronon/repo/extract_objects.py +6 -9
- ai/chronon/repo/gcp.py +89 -88
- ai/chronon/repo/gitpython_utils.py +3 -2
- ai/chronon/repo/hub_runner.py +145 -55
- ai/chronon/repo/hub_uploader.py +2 -1
- ai/chronon/repo/init.py +12 -5
- ai/chronon/repo/join_backfill.py +19 -5
- ai/chronon/repo/run.py +42 -39
- ai/chronon/repo/serializer.py +4 -12
- ai/chronon/repo/utils.py +72 -63
- ai/chronon/repo/zipline.py +3 -19
- ai/chronon/repo/zipline_hub.py +211 -39
- ai/chronon/resources/__init__.py +0 -0
- ai/chronon/resources/gcp/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
- ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
- ai/chronon/resources/gcp/joins/__init__.py +0 -0
- ai/chronon/resources/gcp/joins/test/data.py +4 -8
- ai/chronon/resources/gcp/sources/__init__.py +0 -0
- ai/chronon/resources/gcp/sources/test/data.py +9 -6
- ai/chronon/resources/gcp/teams.py +9 -21
- ai/chronon/source.py +2 -4
- ai/chronon/staging_query.py +60 -19
- ai/chronon/types.py +3 -2
- ai/chronon/utils.py +21 -68
- ai/chronon/windows.py +2 -4
- {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/METADATA +48 -24
- awx_zipline_ai-0.3.1.dist-info/RECORD +96 -0
- awx_zipline_ai-0.3.1.dist-info/top_level.txt +4 -0
- gen_thrift/__init__.py +0 -0
- {ai/chronon → gen_thrift}/api/ttypes.py +327 -197
- {ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
- gen_thrift/eval/ttypes.py +660 -0
- {ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
- {ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
- {ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
- ai/chronon/eval/__init__.py +0 -122
- ai/chronon/eval/query_parsing.py +0 -19
- ai/chronon/eval/sample_tables.py +0 -100
- ai/chronon/eval/table_scan.py +0 -186
- ai/chronon/orchestration/ttypes.py +0 -4406
- ai/chronon/resources/gcp/README.md +0 -174
- ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
- awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
- awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
- awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
- /jars/__init__.py → /__init__.py +0 -0
- {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/WHEEL +0 -0
- {awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/entry_points.txt +0 -0
- {ai/chronon → gen_thrift}/api/__init__.py +0 -0
- {ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
- {ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
- {ai/chronon/api → gen_thrift/common}/constants.py +0 -0
- {ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
- {ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
- {ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
- {ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
- {ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
- {ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
- {ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
- {ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
- {ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
- {ai/chronon → gen_thrift}/planner/__init__.py +0 -0
- {ai/chronon → gen_thrift}/planner/constants.py +0 -0
ai/chronon/repo/explore.py
CHANGED
|
@@ -33,21 +33,11 @@ GB_INDEX_SPEC = {
|
|
|
33
33
|
],
|
|
34
34
|
"_event_tables": ["sources[].events.table"],
|
|
35
35
|
"_event_topics": ["sources[].events.topic"],
|
|
36
|
-
"aggregation": [
|
|
37
|
-
|
|
38
|
-
],
|
|
39
|
-
"
|
|
40
|
-
|
|
41
|
-
],
|
|
42
|
-
"name": [
|
|
43
|
-
"metaData.name"
|
|
44
|
-
],
|
|
45
|
-
"online": [
|
|
46
|
-
"metaData.online"
|
|
47
|
-
],
|
|
48
|
-
"output_namespace": [
|
|
49
|
-
"metaData.outputNamespace"
|
|
50
|
-
],
|
|
36
|
+
"aggregation": ["aggregations[].inputColumn"],
|
|
37
|
+
"keys": ["keyColumns"],
|
|
38
|
+
"name": ["metaData.name"],
|
|
39
|
+
"online": ["metaData.online"],
|
|
40
|
+
"output_namespace": ["metaData.outputNamespace"],
|
|
51
41
|
}
|
|
52
42
|
|
|
53
43
|
JOIN_INDEX_SPEC = {
|
|
@@ -60,38 +50,29 @@ JOIN_INDEX_SPEC = {
|
|
|
60
50
|
"joinParts[].groupBy.metaData.name",
|
|
61
51
|
"rightParts[].groupBy.name",
|
|
62
52
|
],
|
|
63
|
-
"name": [
|
|
64
|
-
|
|
65
|
-
],
|
|
66
|
-
"output_namespace": [
|
|
67
|
-
"metaData.outputNamespace"
|
|
68
|
-
],
|
|
69
|
-
"_group_bys": [
|
|
70
|
-
"joinParts[].groupBy",
|
|
71
|
-
"rightParts[].groupBy"
|
|
72
|
-
]
|
|
53
|
+
"name": ["metaData.name"],
|
|
54
|
+
"output_namespace": ["metaData.outputNamespace"],
|
|
55
|
+
"_group_bys": ["joinParts[].groupBy", "rightParts[].groupBy"],
|
|
73
56
|
}
|
|
74
57
|
|
|
75
|
-
DEFAULTS_SPEC = {
|
|
76
|
-
'outputNamespace': "namespace"
|
|
77
|
-
}
|
|
58
|
+
DEFAULTS_SPEC = {"outputNamespace": "namespace"}
|
|
78
59
|
|
|
79
60
|
GB_REL_PATH = "production/group_bys"
|
|
80
61
|
JOIN_REL_PATH = "production/joins"
|
|
81
62
|
FILTER_COLUMNS = ["aggregation", "keys", "name", "sources", "joins"]
|
|
82
|
-
PATH_FIELDS = [
|
|
63
|
+
PATH_FIELDS = ["file", "json_file"]
|
|
83
64
|
# colors chosen to be visible clearly on BOTH black and white terminals
|
|
84
65
|
# change with caution
|
|
85
|
-
NORMAL =
|
|
86
|
-
BOLD =
|
|
87
|
-
ITALIC =
|
|
88
|
-
UNDERLINE =
|
|
89
|
-
RED =
|
|
90
|
-
GREEN =
|
|
91
|
-
ORANGE =
|
|
92
|
-
BLUE =
|
|
93
|
-
GREY =
|
|
94
|
-
HIGHLIGHT = BOLD+ITALIC+RED
|
|
66
|
+
NORMAL = "\033[0m"
|
|
67
|
+
BOLD = "\033[1m"
|
|
68
|
+
ITALIC = "\033[3m"
|
|
69
|
+
UNDERLINE = "\033[4m"
|
|
70
|
+
RED = "\033[38;5;160m"
|
|
71
|
+
GREEN = "\033[38;5;28m"
|
|
72
|
+
ORANGE = "\033[38;5;130m"
|
|
73
|
+
BLUE = "\033[38;5;27m"
|
|
74
|
+
GREY = "\033[38;5;246m"
|
|
75
|
+
HIGHLIGHT = BOLD + ITALIC + RED
|
|
95
76
|
|
|
96
77
|
|
|
97
78
|
# walks the json nodes recursively collecting all values that match the path
|
|
@@ -176,7 +157,7 @@ git_info_cache = {}
|
|
|
176
157
|
|
|
177
158
|
# git_info is the most expensive part of the entire script - so we will have to parallelize
|
|
178
159
|
def git_info(file_paths, exclude=None, root=CWD):
|
|
179
|
-
exclude_args = f"--invert-grep --grep={exclude}" if exclude else
|
|
160
|
+
exclude_args = f"--invert-grep --grep={exclude}" if exclude else ""
|
|
180
161
|
procs = []
|
|
181
162
|
with chdir(root):
|
|
182
163
|
for file_path in file_paths:
|
|
@@ -185,8 +166,11 @@ def git_info(file_paths, exclude=None, root=CWD):
|
|
|
185
166
|
else:
|
|
186
167
|
args = (
|
|
187
168
|
f"echo $(git log -n 2 --pretty='format:{BLUE} %as/%an/%ae' {exclude_args} -- "
|
|
188
|
-
f"{file_path.replace(root, '')})"
|
|
189
|
-
|
|
169
|
+
f"{file_path.replace(root, '')})"
|
|
170
|
+
)
|
|
171
|
+
procs.append(
|
|
172
|
+
(file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True))
|
|
173
|
+
)
|
|
190
174
|
|
|
191
175
|
result = {}
|
|
192
176
|
for file_path, proc in procs:
|
|
@@ -229,7 +213,7 @@ def highlight(text, word):
|
|
|
229
213
|
for idx in find_string(text, word):
|
|
230
214
|
result = result + text[prev_idx:idx] + HIGHLIGHT + word + NORMAL
|
|
231
215
|
prev_idx = idx + len(word)
|
|
232
|
-
result += text[prev_idx:len(text)]
|
|
216
|
+
result += text[prev_idx : len(text)]
|
|
233
217
|
return result
|
|
234
218
|
|
|
235
219
|
|
|
@@ -237,13 +221,13 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
|
|
|
237
221
|
lines = []
|
|
238
222
|
if trim_paths:
|
|
239
223
|
for field in filter(lambda x: x in entry, PATH_FIELDS):
|
|
240
|
-
entry[field] = entry[field].replace(root,
|
|
224
|
+
entry[field] = entry[field].replace(root, "")
|
|
241
225
|
for column, values in entry.items():
|
|
242
|
-
name = " "*(15 - len(column)) + column
|
|
226
|
+
name = " " * (15 - len(column)) + column
|
|
243
227
|
if column in FILTER_COLUMNS and len(values) > show:
|
|
244
228
|
values = [value for value in set(values) if target in value]
|
|
245
|
-
if
|
|
246
|
-
truncated =
|
|
229
|
+
if len(values) > show:
|
|
230
|
+
truncated = ", ".join(values[:show])
|
|
247
231
|
remaining = len(values) - show
|
|
248
232
|
values = f"[{truncated} ... {GREY}{UNDERLINE}{remaining} more{NORMAL}]"
|
|
249
233
|
if column == "file":
|
|
@@ -257,12 +241,15 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
|
|
|
257
241
|
|
|
258
242
|
def find_in_index(index_table, target):
|
|
259
243
|
def valid_entry(entry):
|
|
260
|
-
return any(
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
244
|
+
return any(
|
|
245
|
+
[
|
|
246
|
+
target in value
|
|
247
|
+
for column, values in entry.items()
|
|
248
|
+
if column in FILTER_COLUMNS
|
|
249
|
+
for value in values
|
|
250
|
+
]
|
|
251
|
+
)
|
|
252
|
+
|
|
266
253
|
return find_in_index_pred(index_table, valid_entry)
|
|
267
254
|
|
|
268
255
|
|
|
@@ -278,7 +265,7 @@ def display_entries(entries, target, root=CWD, trim_paths=False):
|
|
|
278
265
|
pretty = prettify_entry(entry, target, info, root=root, trim_paths=trim_paths)
|
|
279
266
|
display.append((info, pretty))
|
|
280
267
|
|
|
281
|
-
for
|
|
268
|
+
for _, pretty_entry in sorted(display):
|
|
282
269
|
print(pretty_entry)
|
|
283
270
|
|
|
284
271
|
|
|
@@ -340,7 +327,9 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
|
|
|
340
327
|
consumers = set()
|
|
341
328
|
for join in entry["joins"]:
|
|
342
329
|
conf_file_path = conf_file("joins", join)
|
|
343
|
-
consumer_name, consumer_email = author_name_email(
|
|
330
|
+
consumer_name, consumer_email = author_name_email(
|
|
331
|
+
conf_file_path, exclude_commit_message
|
|
332
|
+
)
|
|
344
333
|
consumers.add(consumer_name)
|
|
345
334
|
emails.add(consumer_email)
|
|
346
335
|
row = [
|
|
@@ -349,58 +338,64 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
|
|
|
349
338
|
is_online,
|
|
350
339
|
entry["_event_tables"][0],
|
|
351
340
|
joins,
|
|
352
|
-
", ".join(consumers)
|
|
341
|
+
", ".join(consumers),
|
|
353
342
|
]
|
|
354
343
|
result.append(row)
|
|
355
344
|
return found
|
|
356
345
|
|
|
357
346
|
find_in_index_pred(gb_index, is_events_without_topics)
|
|
358
347
|
if output_file:
|
|
359
|
-
with open(os.path.expanduser(output_file),
|
|
348
|
+
with open(os.path.expanduser(output_file), "w") as tsv_file:
|
|
360
349
|
for row in result:
|
|
361
|
-
tsv_file.write(
|
|
362
|
-
print(
|
|
363
|
-
|
|
350
|
+
tsv_file.write("\t".join(map(str, row)) + "\n")
|
|
351
|
+
print(
|
|
352
|
+
"wrote information about cases where events us used "
|
|
353
|
+
+ f"without topics set into file {os.path.expanduser(output_file)}"
|
|
354
|
+
)
|
|
364
355
|
else:
|
|
365
356
|
for row in result:
|
|
366
|
-
print(
|
|
357
|
+
print("\t".join(map(str, row)) + "\n")
|
|
367
358
|
print(",".join(list(emails)))
|
|
368
359
|
|
|
369
360
|
|
|
370
|
-
def load_team_data(path=
|
|
361
|
+
def load_team_data(path="", teams_root=None):
|
|
371
362
|
# Check if path is teams.json or teams.py
|
|
372
|
-
if
|
|
373
|
-
with open(path,
|
|
363
|
+
if "teams.json" in path:
|
|
364
|
+
with open(path, "r") as infile:
|
|
374
365
|
teams = json.load(infile)
|
|
375
|
-
base_defaults = teams.get(
|
|
366
|
+
base_defaults = teams.get("default", {})
|
|
376
367
|
full_info = teams.copy()
|
|
377
368
|
for team, values in teams.items():
|
|
378
369
|
full_info[team] = dict(base_defaults, **values)
|
|
379
370
|
return full_info
|
|
380
371
|
else:
|
|
381
372
|
from ai.chronon.cli.compile import parse_teams
|
|
373
|
+
|
|
382
374
|
assert teams_root is not None, "Need root to load teams.py"
|
|
383
375
|
teams_py = parse_teams.load_teams(teams_root)
|
|
384
376
|
return teams_py
|
|
385
377
|
|
|
386
378
|
|
|
387
379
|
# register all handlers here
|
|
388
|
-
handlers = {
|
|
389
|
-
"_events_without_topics": events_without_topics
|
|
390
|
-
}
|
|
380
|
+
handlers = {"_events_without_topics": events_without_topics}
|
|
391
381
|
|
|
392
382
|
if __name__ == "__main__":
|
|
393
383
|
parser = argparse.ArgumentParser(description="Explore tool for chronon")
|
|
394
384
|
parser.add_argument("keyword", help="Keyword to look up keys")
|
|
395
385
|
parser.add_argument("--conf-root", help="Conf root for the configs", default=CWD)
|
|
396
386
|
parser.add_argument(
|
|
397
|
-
"--handler-args",
|
|
387
|
+
"--handler-args",
|
|
388
|
+
nargs="*",
|
|
389
|
+
help="Special arguments for handler keywords of the form param=value",
|
|
390
|
+
)
|
|
398
391
|
args = parser.parse_args()
|
|
399
392
|
root = args.conf_root
|
|
400
393
|
if not (root.endswith("chronon") or root.endswith("zipline")):
|
|
401
|
-
print(
|
|
402
|
-
|
|
403
|
-
|
|
394
|
+
print(
|
|
395
|
+
"This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
|
|
396
|
+
+ root
|
|
397
|
+
)
|
|
398
|
+
teams = load_team_data(os.path.join(root, "teams.json"), teams_root=root)
|
|
404
399
|
gb_index = build_index("group_bys", GB_INDEX_SPEC, root=root, teams=teams)
|
|
405
400
|
join_index = build_index("joins", JOIN_INDEX_SPEC, root=root, teams=teams)
|
|
406
401
|
enrich_with_joins(gb_index, join_index, root=root, teams=teams)
|
|
@@ -412,7 +407,9 @@ if __name__ == "__main__":
|
|
|
412
407
|
handler_args = {}
|
|
413
408
|
for arg in args.handler_args:
|
|
414
409
|
splits = arg.split("=", 1)
|
|
415
|
-
assert len(splits) == 2,
|
|
410
|
+
assert len(splits) == 2, (
|
|
411
|
+
f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
|
|
412
|
+
)
|
|
416
413
|
key, value = splits
|
|
417
414
|
handler_args[key] = value
|
|
418
415
|
handler(**handler_args)
|
|
@@ -76,18 +76,17 @@ def import_module_set_name(module, cls):
|
|
|
76
76
|
# obj.metaData.name=user.avg_session_length.v1__1
|
|
77
77
|
# obj.metaData.team=user
|
|
78
78
|
base_name = module.__name__.partition(".")[2] + "." + name
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
# Add version suffix if version is set
|
|
81
|
-
if hasattr(obj.metaData,
|
|
81
|
+
if hasattr(obj.metaData, "version") and obj.metaData.version is not None:
|
|
82
82
|
base_name = base_name + "__" + str(obj.metaData.version)
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
obj.metaData.name = base_name
|
|
85
85
|
obj.metaData.team = module.__name__.split(".")[1]
|
|
86
86
|
return module
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def from_file(file_path: str, cls: type, log_level=logging.INFO):
|
|
90
|
-
|
|
91
90
|
logger = get_logger(log_level)
|
|
92
91
|
logger.debug("Loading objects of type {cls} from {file_path}".format(**locals()))
|
|
93
92
|
|
|
@@ -110,15 +109,14 @@ def from_file(file_path: str, cls: type, log_level=logging.INFO):
|
|
|
110
109
|
|
|
111
110
|
|
|
112
111
|
def chronon_path(file_path: str) -> str:
|
|
113
|
-
|
|
114
112
|
conf_types = FOLDER_NAME_TO_CLASS.keys()
|
|
115
113
|
|
|
116
114
|
splits = file_path.split("/")
|
|
117
115
|
conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
|
|
118
116
|
|
|
119
|
-
assert (
|
|
120
|
-
|
|
121
|
-
)
|
|
117
|
+
assert len(conf_occurences) > 0, (
|
|
118
|
+
f"Path: {file_path} doesn't contain folder with name among {conf_types}"
|
|
119
|
+
)
|
|
122
120
|
|
|
123
121
|
index = min([splits.index(typ) for typ in conf_types if typ in splits])
|
|
124
122
|
rel_path = "/".join(splits[index:])
|
|
@@ -127,7 +125,6 @@ def chronon_path(file_path: str) -> str:
|
|
|
127
125
|
|
|
128
126
|
|
|
129
127
|
def module_path(file_path: str) -> str:
|
|
130
|
-
|
|
131
128
|
adjusted_path = chronon_path(file_path)
|
|
132
129
|
assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"
|
|
133
130
|
|