awx-zipline-ai 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. __init__.py +0 -0
  2. agent/__init__.py +1 -0
  3. agent/constants.py +15 -0
  4. agent/ttypes.py +1684 -0
  5. ai/__init__.py +0 -0
  6. ai/chronon/__init__.py +0 -0
  7. ai/chronon/airflow_helpers.py +248 -0
  8. ai/chronon/cli/__init__.py +0 -0
  9. ai/chronon/cli/compile/__init__.py +0 -0
  10. ai/chronon/cli/compile/column_hashing.py +336 -0
  11. ai/chronon/cli/compile/compile_context.py +173 -0
  12. ai/chronon/cli/compile/compiler.py +183 -0
  13. ai/chronon/cli/compile/conf_validator.py +742 -0
  14. ai/chronon/cli/compile/display/__init__.py +0 -0
  15. ai/chronon/cli/compile/display/class_tracker.py +102 -0
  16. ai/chronon/cli/compile/display/compile_status.py +95 -0
  17. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  18. ai/chronon/cli/compile/display/console.py +3 -0
  19. ai/chronon/cli/compile/display/diff_result.py +111 -0
  20. ai/chronon/cli/compile/fill_templates.py +35 -0
  21. ai/chronon/cli/compile/parse_configs.py +134 -0
  22. ai/chronon/cli/compile/parse_teams.py +242 -0
  23. ai/chronon/cli/compile/serializer.py +109 -0
  24. ai/chronon/cli/compile/version_utils.py +42 -0
  25. ai/chronon/cli/git_utils.py +145 -0
  26. ai/chronon/cli/logger.py +59 -0
  27. ai/chronon/constants.py +3 -0
  28. ai/chronon/group_by.py +692 -0
  29. ai/chronon/join.py +580 -0
  30. ai/chronon/logger.py +23 -0
  31. ai/chronon/model.py +40 -0
  32. ai/chronon/query.py +126 -0
  33. ai/chronon/repo/__init__.py +39 -0
  34. ai/chronon/repo/aws.py +284 -0
  35. ai/chronon/repo/cluster.py +136 -0
  36. ai/chronon/repo/compile.py +62 -0
  37. ai/chronon/repo/constants.py +164 -0
  38. ai/chronon/repo/default_runner.py +269 -0
  39. ai/chronon/repo/explore.py +418 -0
  40. ai/chronon/repo/extract_objects.py +134 -0
  41. ai/chronon/repo/gcp.py +586 -0
  42. ai/chronon/repo/gitpython_utils.py +15 -0
  43. ai/chronon/repo/hub_runner.py +261 -0
  44. ai/chronon/repo/hub_uploader.py +109 -0
  45. ai/chronon/repo/init.py +60 -0
  46. ai/chronon/repo/join_backfill.py +119 -0
  47. ai/chronon/repo/run.py +296 -0
  48. ai/chronon/repo/serializer.py +133 -0
  49. ai/chronon/repo/team_json_utils.py +46 -0
  50. ai/chronon/repo/utils.py +481 -0
  51. ai/chronon/repo/zipline.py +35 -0
  52. ai/chronon/repo/zipline_hub.py +277 -0
  53. ai/chronon/resources/__init__.py +0 -0
  54. ai/chronon/resources/gcp/__init__.py +0 -0
  55. ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
  56. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  57. ai/chronon/resources/gcp/group_bys/test/data.py +30 -0
  58. ai/chronon/resources/gcp/joins/__init__.py +0 -0
  59. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  60. ai/chronon/resources/gcp/joins/test/data.py +26 -0
  61. ai/chronon/resources/gcp/sources/__init__.py +0 -0
  62. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  63. ai/chronon/resources/gcp/sources/test/data.py +26 -0
  64. ai/chronon/resources/gcp/teams.py +58 -0
  65. ai/chronon/source.py +86 -0
  66. ai/chronon/staging_query.py +226 -0
  67. ai/chronon/types.py +58 -0
  68. ai/chronon/utils.py +510 -0
  69. ai/chronon/windows.py +48 -0
  70. awx_zipline_ai-0.0.32.dist-info/METADATA +197 -0
  71. awx_zipline_ai-0.0.32.dist-info/RECORD +96 -0
  72. awx_zipline_ai-0.0.32.dist-info/WHEEL +5 -0
  73. awx_zipline_ai-0.0.32.dist-info/entry_points.txt +2 -0
  74. awx_zipline_ai-0.0.32.dist-info/top_level.txt +4 -0
  75. gen_thrift/__init__.py +0 -0
  76. gen_thrift/api/__init__.py +1 -0
  77. gen_thrift/api/constants.py +15 -0
  78. gen_thrift/api/ttypes.py +3754 -0
  79. gen_thrift/common/__init__.py +1 -0
  80. gen_thrift/common/constants.py +15 -0
  81. gen_thrift/common/ttypes.py +1814 -0
  82. gen_thrift/eval/__init__.py +1 -0
  83. gen_thrift/eval/constants.py +15 -0
  84. gen_thrift/eval/ttypes.py +660 -0
  85. gen_thrift/fetcher/__init__.py +1 -0
  86. gen_thrift/fetcher/constants.py +15 -0
  87. gen_thrift/fetcher/ttypes.py +127 -0
  88. gen_thrift/hub/__init__.py +1 -0
  89. gen_thrift/hub/constants.py +15 -0
  90. gen_thrift/hub/ttypes.py +1109 -0
  91. gen_thrift/observability/__init__.py +1 -0
  92. gen_thrift/observability/constants.py +15 -0
  93. gen_thrift/observability/ttypes.py +2355 -0
  94. gen_thrift/planner/__init__.py +1 -0
  95. gen_thrift/planner/constants.py +15 -0
  96. gen_thrift/planner/ttypes.py +1967 -0
@@ -0,0 +1,418 @@
1
+ #!/usr/bin/env python3
2
+
3
+
4
+ # Copyright (C) 2023 The Chronon Authors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import subprocess
22
+ from contextlib import contextmanager
23
+ from pathlib import Path
24
+
25
+ CWD = os.getcwd()
26
+ GB_INDEX_SPEC = {
27
+ "sources": [
28
+ "sources[].events.table",
29
+ "sources[].entities.snapshotTable",
30
+ "sources[].entities.mutationTable",
31
+ "sources[].entities.topic",
32
+ "sources[].events.topic",
33
+ ],
34
+ "_event_tables": ["sources[].events.table"],
35
+ "_event_topics": ["sources[].events.topic"],
36
+ "aggregation": ["aggregations[].inputColumn"],
37
+ "keys": ["keyColumns"],
38
+ "name": ["metaData.name"],
39
+ "online": ["metaData.online"],
40
+ "output_namespace": ["metaData.outputNamespace"],
41
+ }
42
+
43
+ JOIN_INDEX_SPEC = {
44
+ "input_table": [
45
+ "left.entities.snapshotTable",
46
+ "left.events.table",
47
+ ],
48
+ "_events_driver": ["left.events.table"],
49
+ "group_bys": [
50
+ "joinParts[].groupBy.metaData.name",
51
+ "rightParts[].groupBy.name",
52
+ ],
53
+ "name": ["metaData.name"],
54
+ "output_namespace": ["metaData.outputNamespace"],
55
+ "_group_bys": ["joinParts[].groupBy", "rightParts[].groupBy"],
56
+ }
57
+
58
+ DEFAULTS_SPEC = {"outputNamespace": "namespace"}
59
+
60
+ GB_REL_PATH = "production/group_bys"
61
+ JOIN_REL_PATH = "production/joins"
62
+ FILTER_COLUMNS = ["aggregation", "keys", "name", "sources", "joins"]
63
+ PATH_FIELDS = ["file", "json_file"]
64
+ # colors chosen to be visible clearly on BOTH black and white terminals
65
+ # change with caution
66
+ NORMAL = "\033[0m"
67
+ BOLD = "\033[1m"
68
+ ITALIC = "\033[3m"
69
+ UNDERLINE = "\033[4m"
70
+ RED = "\033[38;5;160m"
71
+ GREEN = "\033[38;5;28m"
72
+ ORANGE = "\033[38;5;130m"
73
+ BLUE = "\033[38;5;27m"
74
+ GREY = "\033[38;5;246m"
75
+ HIGHLIGHT = BOLD + ITALIC + RED
76
+
77
+
78
+ # walks the json nodes recursively collecting all values that match the path
79
+ # a trailing `[]` in a field in the path indicates that there is an array of
80
+ # object in the correspoding node value.
81
+ def extract_json(json_path, conf_json):
82
+ if json_path is None:
83
+ return conf_json
84
+ steps = json_path.split(".", 1)
85
+ key = steps[0]
86
+ next = steps[1] if len(steps) > 1 else None
87
+ if key.endswith("[]"):
88
+ key = key[:-2]
89
+ if key in conf_json:
90
+ result = []
91
+ for value in conf_json[key]:
92
+ result.extend(extract_json(next, value))
93
+ return result
94
+ else:
95
+ if key in conf_json:
96
+ final = extract_json(next, conf_json[key])
97
+ if isinstance(final, list):
98
+ return final
99
+ else:
100
+ return [final]
101
+ return []
102
+
103
+
104
+ def build_entry(conf, index_spec, conf_type, root=CWD, teams=None):
105
+ conf_dict = conf
106
+ if isinstance(conf, str):
107
+ with open(conf) as conf_file:
108
+ try:
109
+ conf_dict = json.load(conf_file)
110
+ except BaseException as ex:
111
+ print(f"Failed to parse {conf} due to :: {ex}")
112
+ return
113
+ entry = {"file": None}
114
+ for column, paths in index_spec.items():
115
+ result = []
116
+ for path in paths:
117
+ result.extend(extract_json(path, conf_dict))
118
+ entry[column] = result
119
+
120
+ if len(entry["name"]) == 0:
121
+ return None
122
+
123
+ # derive python file path from the name & conf_type
124
+ (team, conf_module) = entry["name"][0].split(".", 1)
125
+ # Update missing values with teams defaults.
126
+ for field, mapped_field in DEFAULTS_SPEC.items():
127
+ if field in entry and not entry[field]:
128
+ team_dict = teams[team].__dict__
129
+ entry[field] = [team_dict[mapped_field]]
130
+
131
+ file_base = "/".join(conf_module.split(".")[:-1])
132
+ py_file = file_base + ".py"
133
+ init_file = file_base + "/__init__.py"
134
+ py_path = os.path.join(root, conf_type, team, py_file)
135
+ init_path = os.path.join(root, conf_type, team, init_file)
136
+ conf_path = py_path if os.path.exists(py_path) else init_path
137
+ entry["json_file"] = os.path.join(root, "production", conf_type, team, conf_module)
138
+ entry["file"] = conf_path
139
+ return entry
140
+
141
+
142
+ @contextmanager
143
+ def chdir(path):
144
+ """
145
+ Context manager to run subprocesses in the appropriate folder so git can get the relevant info.
146
+ """
147
+ origin = Path().absolute()
148
+ try:
149
+ os.chdir(path)
150
+ yield
151
+ finally:
152
+ os.chdir(origin)
153
+
154
+
155
+ git_info_cache = {}
156
+
157
+
158
+ # git_info is the most expensive part of the entire script - so we will have to parallelize
159
+ def git_info(file_paths, exclude=None, root=CWD):
160
+ exclude_args = f"--invert-grep --grep={exclude}" if exclude else ""
161
+ procs = []
162
+ with chdir(root):
163
+ for file_path in file_paths:
164
+ if file_path in git_info_cache:
165
+ procs.append((file_path, git_info_cache[file_path]))
166
+ else:
167
+ args = (
168
+ f"echo $(git log -n 2 --pretty='format:{BLUE} %as/%an/%ae' {exclude_args} -- "
169
+ f"{file_path.replace(root, '')})"
170
+ )
171
+ procs.append(
172
+ (file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True))
173
+ )
174
+
175
+ result = {}
176
+ for file_path, proc in procs:
177
+ if isinstance(proc, subprocess.Popen):
178
+ lines = []
179
+ for line in proc.stdout.readlines():
180
+ lines.append(line.decode("utf-8").strip())
181
+ git_info_cache[file_path] = lines[0]
182
+ result[file_path] = git_info_cache[file_path]
183
+ return result
184
+
185
+
186
+ def walk_files(path):
187
+ for root, _, files in os.walk(path):
188
+ for file in files:
189
+ yield os.path.join(root, file)
190
+
191
+
192
+ def build_index(conf_type, index_spec, root=CWD, teams=None):
193
+ rel_path = os.path.join(root, "production", conf_type)
194
+ teams = teams or {}
195
+ index_table = {}
196
+ for path in walk_files(rel_path):
197
+ index_entry = build_entry(path, index_spec, conf_type, root=root, teams=teams)
198
+ if index_entry is not None:
199
+ index_table[index_entry["name"][0]] = index_entry
200
+ return index_table
201
+
202
+
203
+ def find_string(text, word):
204
+ start = text.find(word)
205
+ while start > -1:
206
+ yield start
207
+ start = text.find(word, start + 1)
208
+
209
+
210
+ def highlight(text, word):
211
+ result = ""
212
+ prev_idx = 0
213
+ for idx in find_string(text, word):
214
+ result = result + text[prev_idx:idx] + HIGHLIGHT + word + NORMAL
215
+ prev_idx = idx + len(word)
216
+ result += text[prev_idx : len(text)]
217
+ return result
218
+
219
+
220
+ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=False):
221
+ lines = []
222
+ if trim_paths:
223
+ for field in filter(lambda x: x in entry, PATH_FIELDS):
224
+ entry[field] = entry[field].replace(root, "")
225
+ for column, values in entry.items():
226
+ name = " " * (15 - len(column)) + column
227
+ if column in FILTER_COLUMNS and len(values) > show:
228
+ values = [value for value in set(values) if target in value]
229
+ if len(values) > show:
230
+ truncated = ", ".join(values[:show])
231
+ remaining = len(values) - show
232
+ values = f"[{truncated} ... {GREY}{UNDERLINE}{remaining} more{NORMAL}]"
233
+ if column == "file":
234
+ values = f"{BOLD}{values} {modification}{NORMAL}"
235
+ else:
236
+ values = highlight(str(values), target)
237
+ lines.append(f"{BOLD}{ORANGE}{name}{NORMAL} - {values}")
238
+ content = "\n" + "\n".join(lines)
239
+ return content
240
+
241
+
242
+ def find_in_index(index_table, target):
243
+ def valid_entry(entry):
244
+ return any(
245
+ [
246
+ target in value
247
+ for column, values in entry.items()
248
+ if column in FILTER_COLUMNS
249
+ for value in values
250
+ ]
251
+ )
252
+
253
+ return find_in_index_pred(index_table, valid_entry)
254
+
255
+
256
+ def find_in_index_pred(index_table, valid_entry):
257
+ return [entry for _, entry in index_table.items() if valid_entry(entry)]
258
+
259
+
260
+ def display_entries(entries, target, root=CWD, trim_paths=False):
261
+ git_infos = git_info([entry["file"] for entry in entries], root=root)
262
+ display = []
263
+ for entry in entries:
264
+ info = git_infos[entry["file"]]
265
+ pretty = prettify_entry(entry, target, info, root=root, trim_paths=trim_paths)
266
+ display.append((info, pretty))
267
+
268
+ for _, pretty_entry in sorted(display):
269
+ print(pretty_entry)
270
+
271
+
272
+ def enrich_with_joins(gb_index, join_index, root=CWD, teams=None):
273
+ # nested gb entries
274
+ for _, join_entry in join_index.items():
275
+ for gb in join_entry["_group_bys"]:
276
+ entry = build_entry(gb, GB_INDEX_SPEC, "group_bys", root=root, teams=teams)
277
+ gb_index[entry["name"][0]] = entry
278
+ # lineage -> reverse index from gb -> join
279
+ for _, group_by in gb_index.items():
280
+ group_by["joins"] = []
281
+ group_by["join_event_driver"] = []
282
+ for _, join in join_index.items():
283
+ for gb_name in join["group_bys"]:
284
+ if gb_name in gb_index:
285
+ gb_index[gb_name]["joins"].append(join["name"][0])
286
+ if len(join["_events_driver"]) > 0:
287
+ gb_index[gb_name]["join_event_driver"].append(join["_events_driver"][0])
288
+
289
+
290
+ # reuse `git log` command result
291
+ file_to_author = {}
292
+ # extract information based on GROUPBY_INDEX_SPEC into this
293
+ gb_index = []
294
+ # extract information based on JOIN_INDEX_SPEC into this
295
+ join_index = []
296
+
297
+
298
+ def author_name_email(file, exclude=None):
299
+ if not os.path.exists(file):
300
+ return ("", "")
301
+ if file not in file_to_author:
302
+ for filepath, auth_str in git_info([file], exclude).items():
303
+ file_to_author[filepath] = auth_str.split("/")[-2:]
304
+ return file_to_author[file]
305
+
306
+
307
+ def conf_file(conf_type, conf_name):
308
+ path_parts = ["production", conf_type]
309
+ path_parts.extend(conf_name.split(".", 1))
310
+ return os.path.join(*path_parts)
311
+
312
+
313
+ # args[0] is output tsv file
314
+ # args[1] is commit messages to exclude when extracting author and email information
315
+ def events_without_topics(output_file=None, exclude_commit_message=None):
316
+ result = []
317
+ emails = set()
318
+
319
+ def is_events_without_topics(entry):
320
+ found = len(entry["_event_topics"]) == 0 and len(entry["_event_tables"]) > 0
321
+ is_online = len(entry["online"]) > 0
322
+ joins = ", ".join(entry["joins"]) if len(entry["joins"]) > 0 else "STANDALONE"
323
+ if found:
324
+ file = entry["json_file"] if os.path.exists(entry["json_file"]) else entry["file"]
325
+ producer_name, producer_email = author_name_email(file, exclude_commit_message)
326
+ emails.add(producer_email)
327
+ consumers = set()
328
+ for join in entry["joins"]:
329
+ conf_file_path = conf_file("joins", join)
330
+ consumer_name, consumer_email = author_name_email(
331
+ conf_file_path, exclude_commit_message
332
+ )
333
+ consumers.add(consumer_name)
334
+ emails.add(consumer_email)
335
+ row = [
336
+ entry["name"][0],
337
+ producer_name,
338
+ is_online,
339
+ entry["_event_tables"][0],
340
+ joins,
341
+ ", ".join(consumers),
342
+ ]
343
+ result.append(row)
344
+ return found
345
+
346
+ find_in_index_pred(gb_index, is_events_without_topics)
347
+ if output_file:
348
+ with open(os.path.expanduser(output_file), "w") as tsv_file:
349
+ for row in result:
350
+ tsv_file.write("\t".join(map(str, row)) + "\n")
351
+ print(
352
+ "wrote information about cases where events us used "
353
+ + f"without topics set into file {os.path.expanduser(output_file)}"
354
+ )
355
+ else:
356
+ for row in result:
357
+ print("\t".join(map(str, row)) + "\n")
358
+ print(",".join(list(emails)))
359
+
360
+
361
+ def load_team_data(path="", teams_root=None):
362
+ # Check if path is teams.json or teams.py
363
+ if "teams.json" in path:
364
+ with open(path, "r") as infile:
365
+ teams = json.load(infile)
366
+ base_defaults = teams.get("default", {})
367
+ full_info = teams.copy()
368
+ for team, values in teams.items():
369
+ full_info[team] = dict(base_defaults, **values)
370
+ return full_info
371
+ else:
372
+ from ai.chronon.cli.compile import parse_teams
373
+
374
+ assert teams_root is not None, "Need root to load teams.py"
375
+ teams_py = parse_teams.load_teams(teams_root)
376
+ return teams_py
377
+
378
+
379
+ # register all handlers here
380
+ handlers = {"_events_without_topics": events_without_topics}
381
+
382
+ if __name__ == "__main__":
383
+ parser = argparse.ArgumentParser(description="Explore tool for chronon")
384
+ parser.add_argument("keyword", help="Keyword to look up keys")
385
+ parser.add_argument("--conf-root", help="Conf root for the configs", default=CWD)
386
+ parser.add_argument(
387
+ "--handler-args",
388
+ nargs="*",
389
+ help="Special arguments for handler keywords of the form param=value",
390
+ )
391
+ args = parser.parse_args()
392
+ root = args.conf_root
393
+ if not (root.endswith("chronon") or root.endswith("zipline")):
394
+ print(
395
+ "This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
396
+ + root
397
+ )
398
+ teams = load_team_data(os.path.join(root, "teams.json"), teams_root=root)
399
+ gb_index = build_index("group_bys", GB_INDEX_SPEC, root=root, teams=teams)
400
+ join_index = build_index("joins", JOIN_INDEX_SPEC, root=root, teams=teams)
401
+ enrich_with_joins(gb_index, join_index, root=root, teams=teams)
402
+
403
+ candidate = args.keyword
404
+ if candidate in handlers:
405
+ print(f"{candidate} is a registered handler")
406
+ handler = handlers[candidate]
407
+ handler_args = {}
408
+ for arg in args.handler_args:
409
+ splits = arg.split("=", 1)
410
+ assert len(splits) == 2, (
411
+ f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
412
+ )
413
+ key, value = splits
414
+ handler_args[key] = value
415
+ handler(**handler_args)
416
+ else:
417
+ group_bys = find_in_index(gb_index, args.keyword)
418
+ display_entries(group_bys, args.keyword, root=root, trim_paths=True)
@@ -0,0 +1,134 @@
1
+ # Copyright (C) 2023 The Chronon Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import glob
16
+ import importlib.machinery
17
+ import importlib.util
18
+ import logging
19
+ import os
20
+
21
+ from ai.chronon.logger import get_logger
22
+ from ai.chronon.repo import FOLDER_NAME_TO_CLASS
23
+
24
+
25
+ def from_folder(full_path: str, cls: type, log_level=logging.INFO):
26
+ """
27
+ Recursively consumes a folder, and constructs a map
28
+ Creates a map of object qualifier to
29
+ """
30
+ if full_path.endswith("/"):
31
+ full_path = full_path[:-1]
32
+
33
+ python_files = glob.glob(os.path.join(full_path, "**/*.py"), recursive=True)
34
+ result = {}
35
+ for f in python_files:
36
+ try:
37
+ result.update(from_file(f, cls, log_level))
38
+ except Exception as e:
39
+ logging.error(f"Failed to extract: {f}")
40
+ logging.exception(e)
41
+ return result
42
+
43
+
44
+ def from_folderV2(full_path: str, target_file: str, cls: type):
45
+ """
46
+ Recursively consumes a folder, and constructs a map of
47
+ object qualifier to StagingQuery, GroupBy, or Join
48
+ """
49
+ if full_path.endswith("/"):
50
+ full_path = full_path[:-1]
51
+
52
+ python_files = glob.glob(os.path.join(full_path, "**/*.py"), recursive=True)
53
+ results = {}
54
+ errors = {}
55
+ target_file_error = None
56
+ for f in python_files:
57
+ try:
58
+ results_dict = from_file(f, cls, log_level=logging.NOTSET)
59
+ for k, v in results_dict.items():
60
+ results[k] = (v, f)
61
+ except Exception as e:
62
+ if f == target_file:
63
+ target_file_error = e
64
+ errors[f] = e
65
+ return results, errors, target_file_error
66
+
67
+
68
+ def import_module_set_name(module, cls):
69
+ """
70
+ evaluate imported modules to assign object name.
71
+ """
72
+ for name, obj in list(module.__dict__.items()):
73
+ if isinstance(obj, cls):
74
+ # the name would be `team_name.python_script_name.[group_by_name|join_name|staging_query_name]__version`
75
+ # example module.__name__=group_bys.user.avg_session_length, version=1
76
+ # obj.metaData.name=user.avg_session_length.v1__1
77
+ # obj.metaData.team=user
78
+ base_name = module.__name__.partition(".")[2] + "." + name
79
+
80
+ # Add version suffix if version is set
81
+ if hasattr(obj.metaData, "version") and obj.metaData.version is not None:
82
+ base_name = base_name + "__" + str(obj.metaData.version)
83
+
84
+ obj.metaData.name = base_name
85
+ obj.metaData.team = module.__name__.split(".")[1]
86
+ return module
87
+
88
+
89
+ def from_file(file_path: str, cls: type, log_level=logging.INFO):
90
+ logger = get_logger(log_level)
91
+ logger.debug("Loading objects of type {cls} from {file_path}".format(**locals()))
92
+
93
+ # mod_qualifier includes team name and python script name without `.py`
94
+ # this line takes the full file path as input, strips the root path on the left side
95
+ # strips `.py` on the right side and finally replaces the slash sign to dot
96
+ # eg: the output would be `team_name.python_script_name`
97
+ module_qualifier = module_path(file_path)
98
+ mod = importlib.import_module(module_qualifier)
99
+
100
+ # the key of result dict would be `team_name.python_script_name.[group_by_name|join_name|staging_query_name]`
101
+ # real world case: psx.reservation_status.v1
102
+ import_module_set_name(mod, cls)
103
+
104
+ result = {}
105
+ for obj in [o for o in mod.__dict__.values() if isinstance(o, cls)]:
106
+ result[obj.metaData.name] = obj
107
+
108
+ return result
109
+
110
+
111
+ def chronon_path(file_path: str) -> str:
112
+ conf_types = FOLDER_NAME_TO_CLASS.keys()
113
+
114
+ splits = file_path.split("/")
115
+ conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
116
+
117
+ assert len(conf_occurences) > 0, (
118
+ f"Path: {file_path} doesn't contain folder with name among {conf_types}"
119
+ )
120
+
121
+ index = min([splits.index(typ) for typ in conf_types if typ in splits])
122
+ rel_path = "/".join(splits[index:])
123
+
124
+ return rel_path
125
+
126
+
127
+ def module_path(file_path: str) -> str:
128
+ adjusted_path = chronon_path(file_path)
129
+ assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"
130
+
131
+ without_extension = adjusted_path[:-3]
132
+ mod_path = without_extension.replace("/", ".")
133
+
134
+ return mod_path