PyPI - awx-zipline-ai - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

agent/ttypes.py +6 -6
ai/chronon/airflow_helpers.py +20 -23
ai/chronon/cli/__init__.py +0 -0
ai/chronon/cli/compile/__init__.py +0 -0
ai/chronon/cli/compile/column_hashing.py +40 -17
ai/chronon/cli/compile/compile_context.py +13 -17
ai/chronon/cli/compile/compiler.py +59 -36
ai/chronon/cli/compile/conf_validator.py +251 -99
ai/chronon/cli/compile/display/__init__.py +0 -0
ai/chronon/cli/compile/display/class_tracker.py +6 -16
ai/chronon/cli/compile/display/compile_status.py +10 -10
ai/chronon/cli/compile/display/diff_result.py +79 -14
ai/chronon/cli/compile/fill_templates.py +3 -8
ai/chronon/cli/compile/parse_configs.py +10 -17
ai/chronon/cli/compile/parse_teams.py +38 -34
ai/chronon/cli/compile/serializer.py +3 -9
ai/chronon/cli/compile/version_utils.py +42 -0
ai/chronon/cli/git_utils.py +2 -13
ai/chronon/cli/logger.py +0 -2
ai/chronon/constants.py +1 -1
ai/chronon/group_by.py +47 -47
ai/chronon/join.py +46 -32
ai/chronon/logger.py +1 -2
ai/chronon/model.py +9 -4
ai/chronon/query.py +2 -2
ai/chronon/repo/__init__.py +1 -2
ai/chronon/repo/aws.py +17 -31
ai/chronon/repo/cluster.py +121 -50
ai/chronon/repo/compile.py +14 -8
ai/chronon/repo/constants.py +1 -1
ai/chronon/repo/default_runner.py +32 -54
ai/chronon/repo/explore.py +70 -73
ai/chronon/repo/extract_objects.py +6 -9
ai/chronon/repo/gcp.py +89 -88
ai/chronon/repo/gitpython_utils.py +3 -2
ai/chronon/repo/hub_runner.py +145 -55
ai/chronon/repo/hub_uploader.py +2 -1
ai/chronon/repo/init.py +12 -5
ai/chronon/repo/join_backfill.py +19 -5
ai/chronon/repo/run.py +42 -39
ai/chronon/repo/serializer.py +4 -12
ai/chronon/repo/utils.py +72 -63
ai/chronon/repo/zipline.py +3 -19
ai/chronon/repo/zipline_hub.py +211 -39
ai/chronon/resources/__init__.py +0 -0
ai/chronon/resources/gcp/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
ai/chronon/resources/gcp/joins/__init__.py +0 -0
ai/chronon/resources/gcp/joins/test/data.py +4 -8
ai/chronon/resources/gcp/sources/__init__.py +0 -0
ai/chronon/resources/gcp/sources/test/data.py +9 -6
ai/chronon/resources/gcp/teams.py +9 -21
ai/chronon/source.py +2 -4
ai/chronon/staging_query.py +60 -19
ai/chronon/types.py +3 -2
ai/chronon/utils.py +21 -68
ai/chronon/windows.py +2 -4
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/METADATA +48 -24
awx_zipline_ai-0.3.1.dist-info/RECORD +96 -0
awx_zipline_ai-0.3.1.dist-info/top_level.txt +4 -0
gen_thrift/__init__.py +0 -0
{ai/chronon → gen_thrift}/api/ttypes.py +327 -197
{ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
gen_thrift/eval/ttypes.py +660 -0
{ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
{ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
{ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
ai/chronon/eval/__init__.py +0 -122
ai/chronon/eval/query_parsing.py +0 -19
ai/chronon/eval/sample_tables.py +0 -100
ai/chronon/eval/table_scan.py +0 -186
ai/chronon/orchestration/ttypes.py +0 -4406
ai/chronon/resources/gcp/README.md +0 -174
ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
/jars/__init__.py → /__init__.py +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/WHEEL +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.1.dist-info}/entry_points.txt +0 -0
{ai/chronon → gen_thrift}/api/__init__.py +0 -0
{ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
{ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
{ai/chronon/api → gen_thrift/common}/constants.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
{ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
{ai/chronon → gen_thrift}/planner/__init__.py +0 -0
{ai/chronon → gen_thrift}/planner/constants.py +0 -0

ai/chronon/repo/explore.py CHANGED Viewed

@@ -33,21 +33,11 @@ GB_INDEX_SPEC = {
     ],
     "_event_tables": ["sources[].events.table"],
     "_event_topics": ["sources[].events.topic"],
-    "aggregation": [
-        "aggregations[].inputColumn"
-    ],
-    "keys": [
-        "keyColumns"
-    ],
-    "name": [
-        "metaData.name"
-    ],
-    "online": [
-        "metaData.online"
-    ],
-    "output_namespace": [
-        "metaData.outputNamespace"
-    ],
+    "aggregation": ["aggregations[].inputColumn"],
+    "keys": ["keyColumns"],
+    "name": ["metaData.name"],
+    "online": ["metaData.online"],
+    "output_namespace": ["metaData.outputNamespace"],
 }
 JOIN_INDEX_SPEC = {
@@ -60,38 +50,29 @@ JOIN_INDEX_SPEC = {
         "joinParts[].groupBy.metaData.name",
         "rightParts[].groupBy.name",
     ],
-    "name": [
-        "metaData.name"
-    ],
-    "output_namespace": [
-        "metaData.outputNamespace"
-    ],
-    "_group_bys": [
-        "joinParts[].groupBy",
-        "rightParts[].groupBy"
-    ]
+    "name": ["metaData.name"],
+    "output_namespace": ["metaData.outputNamespace"],
+    "_group_bys": ["joinParts[].groupBy", "rightParts[].groupBy"],
 }
-DEFAULTS_SPEC = {
-    'outputNamespace': "namespace"
-}
+DEFAULTS_SPEC = {"outputNamespace": "namespace"}
 GB_REL_PATH = "production/group_bys"
 JOIN_REL_PATH = "production/joins"
 FILTER_COLUMNS = ["aggregation", "keys", "name", "sources", "joins"]
-PATH_FIELDS = ['file', 'json_file']
+PATH_FIELDS = ["file", "json_file"]
 # colors chosen to be visible clearly on BOTH black and white terminals
 # change with caution
-NORMAL = '\033[0m'
-BOLD = '\033[1m'
-ITALIC = '\033[3m'
-UNDERLINE = '\033[4m'
-RED = '\033[38;5;160m'
-GREEN = '\033[38;5;28m'
-ORANGE = '\033[38;5;130m'
-BLUE = '\033[38;5;27m'
-GREY = '\033[38;5;246m'
-HIGHLIGHT = BOLD+ITALIC+RED
+NORMAL = "\033[0m"
+BOLD = "\033[1m"
+ITALIC = "\033[3m"
+UNDERLINE = "\033[4m"
+RED = "\033[38;5;160m"
+GREEN = "\033[38;5;28m"
+ORANGE = "\033[38;5;130m"
+BLUE = "\033[38;5;27m"
+GREY = "\033[38;5;246m"
+HIGHLIGHT = BOLD + ITALIC + RED
 # walks the json nodes recursively collecting all values that match the path
@@ -176,7 +157,7 @@ git_info_cache = {}
 # git_info is the most expensive part of the entire script - so we will have to parallelize
 def git_info(file_paths, exclude=None, root=CWD):
-    exclude_args = f"--invert-grep --grep={exclude}" if exclude else ''
+    exclude_args = f"--invert-grep --grep={exclude}" if exclude else ""
     procs = []
     with chdir(root):
         for file_path in file_paths:
@@ -185,8 +166,11 @@ def git_info(file_paths, exclude=None, root=CWD):
             else:
                 args = (
                     f"echo $(git log -n 2 --pretty='format:{BLUE} %as/%an/%ae' {exclude_args} -- "
-                    f"{file_path.replace(root, '')})")
-                procs.append((file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True)))
+                    f"{file_path.replace(root, '')})"
+                )
+                procs.append(
+                    (file_path, subprocess.Popen(args, stdout=subprocess.PIPE, shell=True))
+                )
         result = {}
         for file_path, proc in procs:
@@ -229,7 +213,7 @@ def highlight(text, word):
     for idx in find_string(text, word):
         result = result + text[prev_idx:idx] + HIGHLIGHT + word + NORMAL
         prev_idx = idx + len(word)
-    result += text[prev_idx:len(text)]
+    result += text[prev_idx : len(text)]
     return result
@@ -237,13 +221,13 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
     lines = []
     if trim_paths:
         for field in filter(lambda x: x in entry, PATH_FIELDS):
-            entry[field] = entry[field].replace(root, '')
+            entry[field] = entry[field].replace(root, "")
     for column, values in entry.items():
-        name = " "*(15 - len(column)) + column
+        name = " " * (15 - len(column)) + column
         if column in FILTER_COLUMNS and len(values) > show:
             values = [value for value in set(values) if target in value]
-            if (len(values) > show):
-                truncated = ', '.join(values[:show])
+            if len(values) > show:
+                truncated = ", ".join(values[:show])
                 remaining = len(values) - show
                 values = f"[{truncated} ... {GREY}{UNDERLINE}{remaining} more{NORMAL}]"
         if column == "file":
@@ -257,12 +241,15 @@ def prettify_entry(entry, target, modification, show=10, root=CWD, trim_paths=Fa
 def find_in_index(index_table, target):
     def valid_entry(entry):
-        return any([
-            target in value
-            for column, values in entry.items()
-            if column in FILTER_COLUMNS
-            for value in values
-        ])
+        return any(
+            [
+                target in value
+                for column, values in entry.items()
+                if column in FILTER_COLUMNS
+                for value in values
+            ]
+        )
     return find_in_index_pred(index_table, valid_entry)
@@ -278,7 +265,7 @@ def display_entries(entries, target, root=CWD, trim_paths=False):
         pretty = prettify_entry(entry, target, info, root=root, trim_paths=trim_paths)
         display.append((info, pretty))
-    for (_, pretty_entry) in sorted(display):
+    for _, pretty_entry in sorted(display):
         print(pretty_entry)
@@ -340,7 +327,9 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
             consumers = set()
             for join in entry["joins"]:
                 conf_file_path = conf_file("joins", join)
-                consumer_name, consumer_email = author_name_email(conf_file_path, exclude_commit_message)
+                consumer_name, consumer_email = author_name_email(
+                    conf_file_path, exclude_commit_message
+                )
                 consumers.add(consumer_name)
                 emails.add(consumer_email)
             row = [
@@ -349,58 +338,64 @@ def events_without_topics(output_file=None, exclude_commit_message=None):
                 is_online,
                 entry["_event_tables"][0],
                 joins,
-                ", ".join(consumers)
+                ", ".join(consumers),
             ]
             result.append(row)
         return found
     find_in_index_pred(gb_index, is_events_without_topics)
     if output_file:
-        with open(os.path.expanduser(output_file), 'w') as tsv_file:
+        with open(os.path.expanduser(output_file), "w") as tsv_file:
             for row in result:
-                tsv_file.write('\t'.join(map(str, row))+'\n')
-        print("wrote information about cases where events us used " +
-              f"without topics set into file {os.path.expanduser(output_file)}")
+                tsv_file.write("\t".join(map(str, row)) + "\n")
+        print(
+            "wrote information about cases where events us used "
+            + f"without topics set into file {os.path.expanduser(output_file)}"
+        )
     else:
         for row in result:
-            print('\t'.join(map(str, row))+'\n')
+            print("\t".join(map(str, row)) + "\n")
     print(",".join(list(emails)))
-def load_team_data(path='', teams_root=None):
+def load_team_data(path="", teams_root=None):
     # Check if path is teams.json or teams.py
-    if 'teams.json' in path:
-        with open(path, 'r') as infile:
+    if "teams.json" in path:
+        with open(path, "r") as infile:
             teams = json.load(infile)
-        base_defaults = teams.get('default', {})
+        base_defaults = teams.get("default", {})
         full_info = teams.copy()
         for team, values in teams.items():
             full_info[team] = dict(base_defaults, **values)
         return full_info
     else:
         from ai.chronon.cli.compile import parse_teams
         assert teams_root is not None, "Need root to load teams.py"
         teams_py = parse_teams.load_teams(teams_root)
         return teams_py
 # register all handlers here
-handlers = {
-    "_events_without_topics": events_without_topics
-}
+handlers = {"_events_without_topics": events_without_topics}
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Explore tool for chronon")
     parser.add_argument("keyword", help="Keyword to look up keys")
     parser.add_argument("--conf-root", help="Conf root for the configs", default=CWD)
     parser.add_argument(
-        "--handler-args", nargs="*", help="Special arguments for handler keywords of the form param=value")
+        "--handler-args",
+        nargs="*",
+        help="Special arguments for handler keywords of the form param=value",
+    )
     args = parser.parse_args()
     root = args.conf_root
     if not (root.endswith("chronon") or root.endswith("zipline")):
-        print("This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
-              + root)
-    teams = load_team_data(os.path.join(root, 'teams.json'), teams_root=root)
+        print(
+            "This script needs to be run from chronon conf root - with folder named 'chronon' or 'zipline', found: "
+            + root
+        )
+    teams = load_team_data(os.path.join(root, "teams.json"), teams_root=root)
     gb_index = build_index("group_bys", GB_INDEX_SPEC, root=root, teams=teams)
     join_index = build_index("joins", JOIN_INDEX_SPEC, root=root, teams=teams)
     enrich_with_joins(gb_index, join_index, root=root, teams=teams)
@@ -412,7 +407,9 @@ if __name__ == "__main__":
         handler_args = {}
         for arg in args.handler_args:
             splits = arg.split("=", 1)
-            assert len(splits) == 2, f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
+            assert len(splits) == 2, (
+                f"need args to handler for the form, param=value. Found and invalid arg:{arg}"
+            )
             key, value = splits
             handler_args[key] = value
         handler(**handler_args)

ai/chronon/repo/extract_objects.py CHANGED Viewed

@@ -76,18 +76,17 @@ def import_module_set_name(module, cls):
             # obj.metaData.name=user.avg_session_length.v1__1
             # obj.metaData.team=user
             base_name = module.__name__.partition(".")[2] + "." + name
             # Add version suffix if version is set
-            if hasattr(obj.metaData, 'version') and obj.metaData.version is not None:
+            if hasattr(obj.metaData, "version") and obj.metaData.version is not None:
                 base_name = base_name + "__" + str(obj.metaData.version)
             obj.metaData.name = base_name
             obj.metaData.team = module.__name__.split(".")[1]
     return module
 def from_file(file_path: str, cls: type, log_level=logging.INFO):
     logger = get_logger(log_level)
     logger.debug("Loading objects of type {cls} from {file_path}".format(**locals()))
@@ -110,15 +109,14 @@ def from_file(file_path: str, cls: type, log_level=logging.INFO):
 def chronon_path(file_path: str) -> str:
     conf_types = FOLDER_NAME_TO_CLASS.keys()
     splits = file_path.split("/")
     conf_occurences = [splits.index(typ) for typ in conf_types if typ in splits]
-    assert (
-        len(conf_occurences) > 0
-    ), f"Path: {file_path} doesn't contain folder with name among {conf_types}"
+    assert len(conf_occurences) > 0, (
+        f"Path: {file_path} doesn't contain folder with name among {conf_types}"
+    )
     index = min([splits.index(typ) for typ in conf_types if typ in splits])
     rel_path = "/".join(splits[index:])
@@ -127,7 +125,6 @@ def chronon_path(file_path: str) -> str:
 def module_path(file_path: str) -> str:
     adjusted_path = chronon_path(file_path)
     assert adjusted_path.endswith(".py"), f"Path: {file_path} doesn't end with '.py'"

awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl