awx-zipline-ai 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. agent/__init__.py +1 -0
  2. agent/constants.py +15 -0
  3. agent/ttypes.py +1684 -0
  4. ai/__init__.py +0 -0
  5. ai/chronon/__init__.py +0 -0
  6. ai/chronon/airflow_helpers.py +251 -0
  7. ai/chronon/api/__init__.py +1 -0
  8. ai/chronon/api/common/__init__.py +1 -0
  9. ai/chronon/api/common/constants.py +15 -0
  10. ai/chronon/api/common/ttypes.py +1844 -0
  11. ai/chronon/api/constants.py +15 -0
  12. ai/chronon/api/ttypes.py +3624 -0
  13. ai/chronon/cli/compile/column_hashing.py +313 -0
  14. ai/chronon/cli/compile/compile_context.py +177 -0
  15. ai/chronon/cli/compile/compiler.py +160 -0
  16. ai/chronon/cli/compile/conf_validator.py +590 -0
  17. ai/chronon/cli/compile/display/class_tracker.py +112 -0
  18. ai/chronon/cli/compile/display/compile_status.py +95 -0
  19. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  20. ai/chronon/cli/compile/display/console.py +3 -0
  21. ai/chronon/cli/compile/display/diff_result.py +46 -0
  22. ai/chronon/cli/compile/fill_templates.py +40 -0
  23. ai/chronon/cli/compile/parse_configs.py +141 -0
  24. ai/chronon/cli/compile/parse_teams.py +238 -0
  25. ai/chronon/cli/compile/serializer.py +115 -0
  26. ai/chronon/cli/git_utils.py +156 -0
  27. ai/chronon/cli/logger.py +61 -0
  28. ai/chronon/constants.py +3 -0
  29. ai/chronon/eval/__init__.py +122 -0
  30. ai/chronon/eval/query_parsing.py +19 -0
  31. ai/chronon/eval/sample_tables.py +100 -0
  32. ai/chronon/eval/table_scan.py +186 -0
  33. ai/chronon/fetcher/__init__.py +1 -0
  34. ai/chronon/fetcher/constants.py +15 -0
  35. ai/chronon/fetcher/ttypes.py +127 -0
  36. ai/chronon/group_by.py +692 -0
  37. ai/chronon/hub/__init__.py +1 -0
  38. ai/chronon/hub/constants.py +15 -0
  39. ai/chronon/hub/ttypes.py +1228 -0
  40. ai/chronon/join.py +566 -0
  41. ai/chronon/logger.py +24 -0
  42. ai/chronon/model.py +35 -0
  43. ai/chronon/observability/__init__.py +1 -0
  44. ai/chronon/observability/constants.py +15 -0
  45. ai/chronon/observability/ttypes.py +2192 -0
  46. ai/chronon/orchestration/__init__.py +1 -0
  47. ai/chronon/orchestration/constants.py +15 -0
  48. ai/chronon/orchestration/ttypes.py +4406 -0
  49. ai/chronon/planner/__init__.py +1 -0
  50. ai/chronon/planner/constants.py +15 -0
  51. ai/chronon/planner/ttypes.py +1686 -0
  52. ai/chronon/query.py +126 -0
  53. ai/chronon/repo/__init__.py +40 -0
  54. ai/chronon/repo/aws.py +298 -0
  55. ai/chronon/repo/cluster.py +65 -0
  56. ai/chronon/repo/compile.py +56 -0
  57. ai/chronon/repo/constants.py +164 -0
  58. ai/chronon/repo/default_runner.py +291 -0
  59. ai/chronon/repo/explore.py +421 -0
  60. ai/chronon/repo/extract_objects.py +137 -0
  61. ai/chronon/repo/gcp.py +585 -0
  62. ai/chronon/repo/gitpython_utils.py +14 -0
  63. ai/chronon/repo/hub_runner.py +171 -0
  64. ai/chronon/repo/hub_uploader.py +108 -0
  65. ai/chronon/repo/init.py +53 -0
  66. ai/chronon/repo/join_backfill.py +105 -0
  67. ai/chronon/repo/run.py +293 -0
  68. ai/chronon/repo/serializer.py +141 -0
  69. ai/chronon/repo/team_json_utils.py +46 -0
  70. ai/chronon/repo/utils.py +472 -0
  71. ai/chronon/repo/zipline.py +51 -0
  72. ai/chronon/repo/zipline_hub.py +105 -0
  73. ai/chronon/resources/gcp/README.md +174 -0
  74. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  75. ai/chronon/resources/gcp/group_bys/test/data.py +34 -0
  76. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  77. ai/chronon/resources/gcp/joins/test/data.py +30 -0
  78. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  79. ai/chronon/resources/gcp/sources/test/data.py +23 -0
  80. ai/chronon/resources/gcp/teams.py +70 -0
  81. ai/chronon/resources/gcp/zipline-cli-install.sh +54 -0
  82. ai/chronon/source.py +88 -0
  83. ai/chronon/staging_query.py +185 -0
  84. ai/chronon/types.py +57 -0
  85. ai/chronon/utils.py +557 -0
  86. ai/chronon/windows.py +50 -0
  87. awx_zipline_ai-0.2.0.dist-info/METADATA +173 -0
  88. awx_zipline_ai-0.2.0.dist-info/RECORD +93 -0
  89. awx_zipline_ai-0.2.0.dist-info/WHEEL +5 -0
  90. awx_zipline_ai-0.2.0.dist-info/entry_points.txt +2 -0
  91. awx_zipline_ai-0.2.0.dist-info/licenses/LICENSE +202 -0
  92. awx_zipline_ai-0.2.0.dist-info/top_level.txt +3 -0
  93. jars/__init__.py +0 -0
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+
3
+
4
+ # Copyright (C) 2023 The Chronon Authors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import subprocess
22
+ from contextlib import contextmanager
23
+ from pathlib import Path
24
+
25
+ CWD = os.getcwd()
26
+ GB_INDEX_SPEC = {
27
+ "sources": [
28
+ "sources[].events.table",
29
+ "sources[].entities.snapshotTable",
30
+ "sources[].entities.mutationTable",
31
+ "sources[].entities.topic",
32
+ "sources[].events.topic",
33
+ ],
34
+ "_event_tables": ["sources[].events.table"],
35
+ "_event_topics": ["sources[].events.topic"],
36
+ "aggregation": [
37
+ "aggregations[].inputColumn"
38
+ ],
39
+ "keys": [
40
+ "keyColumns"
41
+ ],
42
+ "name": [
43
+ "metaData.name"
44
+ ],
45
+ "online": [
46
+ "metaData.online"
47
+ ],
48
+ "output_namespace": [
49
+ "metaData.outputNamespace"
50
+ ],
51
+ }
52
+
53
+ JOIN_INDEX_SPEC = {
54
+ "input_table": [
55
+ "left.entities.snapshotTable",
56
+ "left.events.table",
57
+ ],
58
+ "_events_driver": ["left.events.table"],
59
+ "group_bys": [
60
+ "joinParts[].groupBy.metaData.name",
61
+ "rightParts[].groupBy.name",
62
+ ],
63
+ "name": [
64
+ "metaData.name"
65
+ ],
66
+ "output_namespace": [
67
+ "metaData.outputNamespace"
68
+ ],
69
+ "_group_bys": [
70
+ "joinParts[].groupBy",
71
+ "rightParts[].groupBy"
72
+ ]
73
+ }
74
+
75
+ DEFAULTS_SPEC = {
76
+ 'outputNamespace': "namespace"
77
+ }
78
+
79
+ GB_REL_PATH = "production/group_bys"
80
+ JOIN_REL_PATH = "production/joins"
81
+ FILTER_COLUMNS = ["aggregation", "keys", "name", "sources", "joins"]
82
+ PATH_FIELDS = ['file', 'json_file']
83
+ # colors chosen to be visible clearly on BOTH black and white terminals
84
+ # change with caution
85
+ NORMAL = '\033[0m'
86
+ BOLD = '\033[1m'
87
+ ITALIC = '\033[3m'
88
+ UNDERLINE = '\033[4m'
89
+ RED = '\033[38;5;160m'
90
+ GREEN = '\033[38;5;28m'
91
+ ORANGE = '\033[38;5;130m'
92
+ BLUE = '\033[38;5;27m'
93
+ GREY = '\033[38;5;246m'
94
+ HIGHLIGHT = BOLD+ITALIC+RED
95
+
96
+
97
+ # walks the json nodes recursively collecting all values that match the path
98
+ # a trailing `[]` in a field in the path indicates that there is an array of
99
+ # object in the correspoding node value.
100
+ def extract_json(json_path, conf_json):
101
+ if json_path is None:
102
+ return conf_json
103
+ steps = json_path.split(".", 1)
104
+ key = steps[0]
105
+ next = steps[1] if len(steps) > 1 else None
106
+ if key.endswith("[]"):
107
+ key = key[:-2]
108
+ if key in conf_json:
109
+ result = []
110
+ for value in conf_json[key]:
111
+ result.extend(extract_json(next, value))
112
+ return result
113
+ else:
114
+ if key in conf_json:
115
+ final = extract_json(next, conf_json[key])
116
+ if isinstance(final, list):
117
+ return final
118
+ else:
119
+ return [final]
120
+ return []
121
+
122
+
123
+ def build_entry(conf, index_spec, conf_type, root=CWD, teams=None):
124
+ conf_dict = conf
125
+ if isinstance(conf, str):
126
+ with open(conf) as conf_file:
127
+ try:
128
+ conf_dict = json.load(conf_file)
129
+ except BaseException as ex:
130
+ print(f"Failed to parse {conf} due to :: {ex}")
131
+ return
132
+ entry = {"file": None}
133
+ for column, paths in index_spec.items():
134
+ result = []
135
+ for path in paths:
136
+ result.extend(extract_json(path, conf_dict))
137
+ entry[column] = result
138
+
139
+ if len(entry["name"]) == 0:
140
+ return None
141
+
142
+ # derive python file path from the name & conf_type
143
+ (team, conf_module) = entry["name"][0].split(".", 1)
144
+ # Update missing values with teams defaults.
145
+ for field, mapped_field in DEFAULTS_SPEC.items():
146
+ if field in entry and not entry[field]:
147
+ team_dict = teams[team].__dict__
148
+ entry[field] = [team_dict[mapped_field]]
149
+
150
+ file_base = "/".join(conf_module.split(".")[:-1])
151
+ py_file = file_base + ".py"
152
+ init_file = file_base + "/__init__.py"
153
+ py_path = os.path.join(root, conf_type, team, py_file)
154
+ init_path = os.path.join(root, conf_type, team, init_file)
155
+ conf_path = py_path if os.path.exists(py_path) else init_path
156
+ entry["json_file"] = os.path.join(root, "production", conf_type, team, conf_module)
157
+ entry["file"] = conf_path
158
+ return entry
159
+
160
+
161
+ @contextmanager
162
+ def chdir(path):
163
+ """
164
+ Context manager to run subprocesses in the appropriate folder so git can get the relevant info.
165
+ """
166
+ origin = Path().absolute()
167
+ try:
168
+ os.chdir(path)
169
+ yield
170
+ finally:
171
+ os.chdir(origin)
172
+
173
+
174
+ git_info_cache = {}
175
+
176
+
177
+ # git_info is the most expensive part of the entire script - so we will have to parallelize
178
+ def git_info(file_paths, exclude=None, root=CWD):
179
+ exclude_args = f"--invert-grep --grep={exclude}" if exclude else ''
180
+ procs = []
181
+ with chdir(root):
182
+ for file_path in file_paths:
183
+ if file_path in git_info_cache:
184
+ procs.append((file_path, git_info_cache[file_path]))
185
+ else:
186
+ args = (
187
+ f"echo $(git log -n 2 --pretty='format:{BLUE} %as/%an/%ae' {exclude_args} -- "
188
+ f"{file_path.replace(root, '')})")
189
+ procs.append((file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True)))
190
+
191
+ result = {}
192
+ for file_path, proc in procs:
193
+ if isinstance(proc, subprocess.Popen):
194
+ lines = []
195
+ for line in proc.stdout.readlines():
196
+ lines.append(line.decode("utf-8").strip())
197
+ git_info_cache[file_path] = lines[0]
198
+ result[file_path] = git_info_cache[file_path]
199
+ return result
200
+
201
+
202
+ def walk_files(path):
203
+ for root, _, files in os.walk(path):
204
+ for file in files:
205
+ yield os.path.join(root, file)
206
+
207
+
208
+ def build_index(conf_type, index_spec, root=CWD, teams=None):
209
+ rel_path = os.path.join(root, "production", conf_type)
210
+ teams = teams or {}
211
+ index_table = {}
212
+ for path in walk_files(rel_path):
213
+ index_entry = build_entry(path, index_spec, conf_type, root=root, teams=teams)
214
+ if index_entry is not None:
215
+ index_table[index_entry["name"][0]] = index_entry
216
+ return index_table
217
+
218
+
219
+ def find_string(text, word):
220
+ start = text.find(word)
221
+ while start > -1:
222
+ yield start
223
+ start = text.find(word, start + 1)
224
+
225
+
226
+ def highlight(text, word):
227
+ result = ""
228
+ prev_idx = 0
229
+ for idx in find_string(text, word):
230
+ result = result + text[prev_idx:idx] + HIGHLIGHT + word + NORMAL
231
+ prev_idx = idx + len(word)
232
+ result += text[prev_idx:len(text)]
233
+ return result
234
+
235
+
236
+ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=False):
237
+ lines = []
238
+ if trim_paths:
239
+ for field in filter(lambda x: x in entry, PATH_FIELDS):
240
+ entry[field] = entry[field].replace(root, '')
241
+ for column, values in entry.items():
242
+ name = " "*(15 - len(column)) + column
243
+ if column in FILTER_COLUMNS and len(values) > show:
244
+ values = [value for value in set(values) if target in value]
245
+ if (len(values) > show):
246
+ truncated = ', '.join(values[:show])
247
+ remaining = len(values) - show
248
+ values = f"[{truncated} ... {GREY}{UNDERLINE}{remaining} more{NORMAL}]"
249
+ if column == "file":
250
+ values = f"{BOLD}{values} {modification}{NORMAL}"
251
+ else:
252
+ values = highlight(str(values), target)
253
+ lines.append(f"{BOLD}{ORANGE}{name}{NORMAL} - {values}")
254
+ content = "\n" + "\n".join(lines)
255
+ return content
256
+
257
+
258
+ def find_in_index(index_table, target):
259
+ def valid_entry(entry):
260
+ return any([
261
+ target in value
262
+ for column, values in entry.items()
263
+ if column in FILTER_COLUMNS
264
+ for value in values
265
+ ])
266
+ return find_in_index_pred(index_table, valid_entry)
267
+
268
+
269
+ def find_in_index_pred(index_table, valid_entry):
270
+ return [entry for _, entry in index_table.items() if valid_entry(entry)]
271
+
272
+
273
+ def display_entries(entries, target, root=CWD, trim_paths=False):
274
+ git_infos = git_info([entry["file"] for entry in entries], root=root)
275
+ display = []
276
+ for entry in entries:
277
+ info = git_infos[entry["file"]]
278
+ pretty = prettify_entry(entry, target, info, root=root, trim_paths=trim_paths)
279
+ display.append((info, pretty))
280
+
281
+ for (_, pretty_entry) in sorted(display):
282
+ print(pretty_entry)
283
+
284
+
285
+ def enrich_with_joins(gb_index, join_index, root=CWD, teams=None):
286
+ # nested gb entries
287
+ for _, join_entry in join_index.items():
288
+ for gb in join_entry["_group_bys"]:
289
+ entry = build_entry(gb, GB_INDEX_SPEC, "group_bys", root=root, teams=teams)
290
+ gb_index[entry["name"][0]] = entry
291
+ # lineage -> reverse index from gb -> join
292
+ for _, group_by in gb_index.items():
293
+ group_by["joins"] = []
294
+ group_by["join_event_driver"] = []
295
+ for _, join in join_index.items():
296
+ for gb_name in join["group_bys"]:
297
+ if gb_name in gb_index:
298
+ gb_index[gb_name]["joins"].append(join["name"][0])
299
+ if len(join["_events_driver"]) > 0:
300
+ gb_index[gb_name]["join_event_driver"].append(join["_events_driver"][0])
301
+
302
+
303
+ # reuse `git log` command result
304
+ file_to_author = {}
305
+ # extract information based on GROUPBY_INDEX_SPEC into this
306
+ gb_index = []
307
+ # extract information based on JOIN_INDEX_SPEC into this
308
+ join_index = []
309
+
310
+
311
+ def author_name_email(file, exclude=None):
312
+ if not os.path.exists(file):
313
+ return ("", "")
314
+ if file not in file_to_author:
315
+ for filepath, auth_str in git_info([file], exclude).items():
316
+ file_to_author[filepath] = auth_str.split("/")[-2:]
317
+ return file_to_author[file]
318
+
319
+
320
+ def conf_file(conf_type, conf_name):
321
+ path_parts = ["production", conf_type]
322
+ path_parts.extend(conf_name.split(".", 1))
323
+ return os.path.join(*path_parts)
324
+
325
+
326
+ # args[0] is output tsv file
327
+ # args[1] is commit messages to exclude when extracting author and email information
328
+ def events_without_topics(output_file=None, exclude_commit_message=None):
329
+ result = []
330
+ emails = set()
331
+
332
+ def is_events_without_topics(entry):
333
+ found = len(entry["_event_topics"]) == 0 and len(entry["_event_tables"]) > 0
334
+ is_online = len(entry["online"]) > 0
335
+ joins = ", ".join(entry["joins"]) if len(entry["joins"]) > 0 else "STANDALONE"
336
+ if found:
337
+ file = entry["json_file"] if os.path.exists(entry["json_file"]) else entry["file"]
338
+ producer_name, producer_email = author_name_email(file, exclude_commit_message)
339
+ emails.add(producer_email)
340
+ consumers = set()
341
+ for join in entry["joins"]:
342
+ conf_file_path = conf_file("joins", join)
343
+ consumer_name, consumer_email = author_name_email(conf_file_path, exclude_commit_message)
344
+ consumers.add(consumer_name)
345
+ emails.add(consumer_email)
346
+ row = [
347
+ entry["name"][0],
348
+ producer_name,
349
+ is_online,
350
+ entry["_event_tables"][0],
351
+ joins,
352
+ ", ".join(consumers)
353
+ ]
354
+ result.append(row)
355
+ return found
356
+
357
+ find_in_index_pred(gb_index, is_events_without_topics)
358
+ if output_file:
359
+ with open(os.path.expanduser(output_file), 'w') as tsv_file:
360
+ for row in result:
361
+ tsv_file.write('\t'.join(map(str, row))+'\n')
362
+ print("wrote information about cases where events us used " +
363
+ f"without topics set into file {os.path.expanduser(output_file)}")
364
+ else:
365
+ for row in result:
366
+ print('\t'.join(map(str, row))+'\n')
367
+ print(",".join(list(emails)))
368
+
369
+
370
+ def load_team_data(path='', teams_root=None):
371
+ # Check if path is teams.json or teams.py
372
+ if 'teams.json' in path:
373
+ with open(path, 'r') as infile:
374
+ teams = json.load(infile)
375
+ base_defaults = teams.get('default', {})
376
+ full_info = teams.copy()
377
+ for team, values in teams.items():
378
+ full_info[team] = dict(base_defaults, **values)
379
+ return full_info
380
+ else:
381
+ from ai.chronon.cli.compile import parse_teams
382
+ assert teams_root is not None, "Need root to load teams.py"
383
+ teams_py = parse_teams.load_teams(teams_root)
384
+ return teams_py
385
+
386
+
387
+ # register all handlers here
388
+ handlers = {
389
+ "_events_without_topics": events_without_topics
390
+ }
391
+
392
+ if __name__ == "__main__":
393
+ parser = argparse.ArgumentParser(description="Explore tool for chronon")
394
+ parser.add_argument("keyword", help="Keyword to look up keys")
395
+ parser.add_argument("--conf-root", help="Conf root for the configs", default=CWD)
396
+ parser.add_argument(
397
+ "--handler-args", nargs="*", help="Special arguments for handler keywords of the form param=value")
398
+ args = parser.parse_args()
399
+ root = args.conf_root
400
+ if not (root.endswith("chronon") or root.endswith("zipline")):
401
+ print("This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
402
+ + root)
403
+ teams = load_team_data(os.path.join(root, 'teams.json'), teams_root=root)
404
+ gb_index = build_index("group_bys", GB_INDEX_SPEC, root=root, teams=teams)
405
+ join_index = build_index("joins", JOIN_INDEX_SPEC, root=root, teams=teams)
406
+ enrich_with_joins(gb_index, join_index, root=root, teams=teams)
407
+
408
+ candidate = args.keyword
409
+ if candidate in handlers:
410
+ print(f"{candidate} is a registered handler")
411
+ handler = handlers[candidate]
412
+ handler_args = {}
413
+ for arg in args.handler_args:
414
+ splits = arg.split("=", 1)
415
+ assert len(splits) == 2, f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
416
+ key, value = splits
417
+ handler_args[key] = value
418
+ handler(**handler_args)
419
+ else:
420
+ group_bys = find_in_index(gb_index, args.keyword)
421
+ display_entries(group_bys, args.keyword, root=root, trim_paths=True)
@@ -0,0 +1,137 @@
1
+ # Copyright (C) 2023 The Chronon Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import glob
16
+ import importlib.machinery
17
+ import importlib.util
18
+ import logging
19
+ import os
20
+
21
+ from ai.chronon.logger import get_logger
22
+ from ai.chronon.repo import FOLDER_NAME_TO_CLASS
23
+
24
+
25
+ def from_folder(full_path: str, cls: type, log_level=logging.INFO):
26
+ """
27
+ Recursively consumes a folder, and constructs a map
28
+ Creates a map of object qualifier to
29
+ """
30
+ if full_path.endswith("/"):
31
+ full_path = full_path[:-1]
32
+
33
+ python_files = glob.glob(os.path.join(full_path, "**/*.py"), recursive=True)
34
+ result = {}
35
+ for f in python_files:
36
+ try:
37
+ result.update(from_file(f, cls, log_level))
38
+ except Exception as e:
39
+ logging.error(f"Failed to extract: {f}")
40
+ logging.exception(e)
41
+ return result
42
+
43
+
44
+ def from_folderV2(full_path: str, target_file: str, cls: type):
45
+ """
46
+ Recursively consumes a folder, and constructs a map of
47
+ object qualifier to StagingQuery, GroupBy, or Join
48
+ """
49
+ if full_path.endswith("/"):
50
+ full_path = full_path[:-1]
51
+
52
+ python_files = glob.glob(os.path.join(full_path, "**/*.py"), recursive=True)
53
+ results = {}
54
+ errors = {}
55
+ target_file_error = None
56
+ for f in python_files:
57
+ try:
58
+ results_dict = from_file(f, cls, log_level=logging.NOTSET)
59
+ for k, v in results_dict.items():
60
+ results[k] = (v, f)
61
+ except Exception as e:
62
+ if f == target_file:
63
+ target_file_error = e
64
+ errors[f] = e
65
+ return results, errors, target_file_error
66
+
67
+
68
+ def import_module_set_name(module, cls):
69
+ """
70
+ evaluate imported modules to assign object name.
71
+ """
72
+ for name, obj in list(module.__dict__.items()):
73
+ if isinstance(obj, cls):
74
+ # the name would be `team_name.python_script_name.[group_by_name|join_name|staging_query_name]__version`
75
+ # example module.__name__=group_bys.user.avg_session_length, version=1
76
+ # obj.metaData.name=user.avg_session_length.v1__1
77
+ # obj.metaData.team=user
78
+ base_name = module.__name__.partition(".")[2] + "." + name
79
+
80
+ # Add version suffix if version is set
81
+ if hasattr(obj.metaData, 'version') and obj.metaData.version is not None:
82
+ base_name = base_name + "__" + str(obj.metaData.version)
83
+
84
+ obj.metaData.name = base_name
85
+ obj.metaData.team = module.__name__.split(".")[1]
86
+ return module
87
+
88
+
89
+ def from_file(file_path: str, cls: type, log_level=logging.INFO):
90
+
91
+ logger = get_logger(log_level)
92
+ logger.debug("Loading objects of type {cls} from {file_path}".format(**locals()))
93
+
94
+ # mod_qualifier includes team name and python script name without `.py`
95
+ # this line takes the full file path as input, strips the root path on the left side
96
+ # strips `.py` on the right side and finally replaces the slash sign to dot
97
+ # eg: the output would be `team_name.python_script_name`
98
+ module_qualifier = module_path(file_path)
99
+ mod = importlib.import_module(module_qualifier)
100
+
101
+ # the key of result dict would be `team_name.python_script_name.[group_by_name|join_name|staging_query_name]`
102
+ # real world case: psx.reservation_status.v1
103
+ import_module_set_name(mod, cls)
104
+
105
+ result = {}
106
+ for obj in [o for o in mod.__dict__.values() if isinstance(o, cls)]:
107
+ result[obj.metaData.name] = obj
108
+
109
+ return result
110
+
111
+
112
+ def chronon_path(file_path: str) -> str:
113
+
114
+ conf_types = FOLDER_NAME_TO_CLASS.keys()
115
+
116
+ splits = file_path.split("/")
117
+ conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
118
+
119
+ assert (
120
+ len(conf_occurences) > 0
121
+ ), f"Path: {file_path} doesn't contain folder with name among {conf_types}"
122
+
123
+ index = min([splits.index(typ) for typ in conf_types if typ in splits])
124
+ rel_path = "/".join(splits[index:])
125
+
126
+ return rel_path
127
+
128
+
129
+ def module_path(file_path: str) -> str:
130
+
131
+ adjusted_path = chronon_path(file_path)
132
+ assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"
133
+
134
+ without_extension = adjusted_path[:-3]
135
+ mod_path = without_extension.replace("/", ".")
136
+
137
+ return mod_path