PyPI - awx-zipline-ai - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of awx-zipline-ai might be problematic. Click here for more details.

Files changed (96) hide show

agent/ttypes.py +6 -6
ai/chronon/airflow_helpers.py +20 -23
ai/chronon/cli/__init__.py +0 -0
ai/chronon/cli/compile/__init__.py +0 -0
ai/chronon/cli/compile/column_hashing.py +40 -17
ai/chronon/cli/compile/compile_context.py +13 -17
ai/chronon/cli/compile/compiler.py +59 -36
ai/chronon/cli/compile/conf_validator.py +251 -99
ai/chronon/cli/compile/display/__init__.py +0 -0
ai/chronon/cli/compile/display/class_tracker.py +6 -16
ai/chronon/cli/compile/display/compile_status.py +10 -10
ai/chronon/cli/compile/display/diff_result.py +79 -14
ai/chronon/cli/compile/fill_templates.py +3 -8
ai/chronon/cli/compile/parse_configs.py +10 -17
ai/chronon/cli/compile/parse_teams.py +38 -34
ai/chronon/cli/compile/serializer.py +3 -9
ai/chronon/cli/compile/version_utils.py +42 -0
ai/chronon/cli/git_utils.py +2 -13
ai/chronon/cli/logger.py +0 -2
ai/chronon/constants.py +1 -1
ai/chronon/group_by.py +47 -47
ai/chronon/join.py +46 -32
ai/chronon/logger.py +1 -2
ai/chronon/model.py +9 -4
ai/chronon/query.py +2 -2
ai/chronon/repo/__init__.py +1 -2
ai/chronon/repo/aws.py +17 -31
ai/chronon/repo/cluster.py +121 -50
ai/chronon/repo/compile.py +14 -8
ai/chronon/repo/constants.py +1 -1
ai/chronon/repo/default_runner.py +32 -54
ai/chronon/repo/explore.py +70 -73
ai/chronon/repo/extract_objects.py +6 -9
ai/chronon/repo/gcp.py +89 -88
ai/chronon/repo/gitpython_utils.py +3 -2
ai/chronon/repo/hub_runner.py +145 -55
ai/chronon/repo/hub_uploader.py +2 -1
ai/chronon/repo/init.py +12 -5
ai/chronon/repo/join_backfill.py +19 -5
ai/chronon/repo/run.py +42 -39
ai/chronon/repo/serializer.py +4 -12
ai/chronon/repo/utils.py +72 -63
ai/chronon/repo/zipline.py +3 -19
ai/chronon/repo/zipline_hub.py +211 -39
ai/chronon/resources/__init__.py +0 -0
ai/chronon/resources/gcp/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
ai/chronon/resources/gcp/joins/__init__.py +0 -0
ai/chronon/resources/gcp/joins/test/data.py +4 -8
ai/chronon/resources/gcp/sources/__init__.py +0 -0
ai/chronon/resources/gcp/sources/test/data.py +9 -6
ai/chronon/resources/gcp/teams.py +9 -21
ai/chronon/source.py +2 -4
ai/chronon/staging_query.py +60 -19
ai/chronon/types.py +3 -2
ai/chronon/utils.py +21 -68
ai/chronon/windows.py +2 -4
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/METADATA +47 -24
awx_zipline_ai-0.3.0.dist-info/RECORD +96 -0
awx_zipline_ai-0.3.0.dist-info/top_level.txt +4 -0
gen_thrift/__init__.py +0 -0
{ai/chronon → gen_thrift}/api/ttypes.py +327 -197
{ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
gen_thrift/eval/ttypes.py +660 -0
{ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
{ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
{ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
ai/chronon/eval/__init__.py +0 -122
ai/chronon/eval/query_parsing.py +0 -19
ai/chronon/eval/sample_tables.py +0 -100
ai/chronon/eval/table_scan.py +0 -186
ai/chronon/orchestration/ttypes.py +0 -4406
ai/chronon/resources/gcp/README.md +0 -174
ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
/jars/__init__.py → /__init__.py +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/WHEEL +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/entry_points.txt +0 -0
{ai/chronon → gen_thrift}/api/__init__.py +0 -0
{ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
{ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
{ai/chronon/api → gen_thrift/common}/constants.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
{ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
{ai/chronon → gen_thrift}/planner/__init__.py +0 -0
{ai/chronon → gen_thrift}/planner/constants.py +0 -0

ai/chronon/repo/hub_runner.py CHANGED Viewed

@@ -1,16 +1,20 @@
 import json
 import os
-from datetime import datetime
-from urllib.parse import quote_plus
+from dataclasses import dataclass
+from datetime import date, timedelta
+from typing import Optional
 import click
-from attr import dataclass
+from gen_thrift.planner.ttypes import Mode
 from ai.chronon.cli.git_utils import get_current_branch
 from ai.chronon.repo import hub_uploader, utils
 from ai.chronon.repo.constants import RunMode
+from ai.chronon.repo.utils import handle_conf_not_found, print_possible_confs
 from ai.chronon.repo.zipline_hub import ZiplineHub
+ALLOWED_DATE_FORMATS = ["%Y-%m-%d"]
 @click.group()
 def hub():
@@ -21,60 +25,110 @@ def hub():
 def common_options(func):
     func = click.option("--repo", help="Path to chronon repo", default=".")(func)
     func = click.option("--conf", required=True, help="Conf param - required for every mode")(func)
+    func = click.option(
+        "--hub_url", help="Zipline Hub address, e.g. http://localhost:3903", default=None
+    )(func)
     return func
 def ds_option(func):
-    return click.option("--ds", help="the end partition to backfill the data")(func)
+    return click.option(
+        "--ds",
+        help="the end partition to backfill the data",
+        type=click.DateTime(formats=ALLOWED_DATE_FORMATS),
+    )(func)
 def start_ds_option(func):
     return click.option(
-    "--start-ds",
-    help="override the original start partition for a range backfill. "
-    "It only supports staging query, group by backfill and join jobs. "
-    "It could leave holes in your final output table due to the override date range.",)(func)
+        "--start-ds",
+        type=click.DateTime(formats=ALLOWED_DATE_FORMATS),
+        help="override the original start partition for a range backfill. "
+        "It only supports staging query, group by backfill and join jobs. "
+        "It could leave holes in your final output table due to the override date range.",
+    )(func)
 def end_ds_option(func):
-    return click.option("--end-ds", help="the end ds for a range backfill")(func)
+    return click.option(
+        "--end-ds",
+        help="the end ds for a range backfill",
+        type=click.DateTime(formats=ALLOWED_DATE_FORMATS),
+        default=str(date.today() - timedelta(days=2)),
+    )(func)
-def submit_workflow(repo,
-                    conf,
-                    mode,
-                    start_ds,
-                    end_ds):
-    hub_conf = get_hub_conf(conf)
-    zipline_hub = ZiplineHub(base_url=hub_conf.hub_url)
-    conf_name_to_hash_dict = hub_uploader.build_local_repo_hashmap(root_dir= repo)
+def submit_workflow(repo, conf, mode, start_ds, end_ds, hub_url=None):
+    hub_conf = get_hub_conf(conf, root_dir=repo)
+    if hub_url is not None:
+        zipline_hub = ZiplineHub(base_url=hub_url, sa_name=hub_conf.sa_name)
+    else:
+        zipline_hub = ZiplineHub(base_url=hub_conf.hub_url, sa_name=hub_conf.sa_name)
+    conf_name_to_hash_dict = hub_uploader.build_local_repo_hashmap(root_dir=repo)
     branch = get_current_branch()
-    hub_uploader.compute_and_upload_diffs(branch, zipline_hub=zipline_hub, local_repo_confs=conf_name_to_hash_dict)
+    hub_uploader.compute_and_upload_diffs(
+        branch, zipline_hub=zipline_hub, local_repo_confs=conf_name_to_hash_dict
+    )
     # get conf name
     conf_name = utils.get_metadata_name_from_conf(repo, conf)
     response_json = zipline_hub.call_workflow_start_api(
         conf_name=conf_name,
         mode=mode,
         branch=branch,  # Get the current branch
-        user=os.environ.get('USER'),
+        user=os.environ.get("USER"),
         start=start_ds,
         end=end_ds,
         conf_hash=conf_name_to_hash_dict[conf_name].hash,
+        skip_long_running=False,
     )
-    print(" 🆔 Workflow Id:", response_json.get("workflowId", "N/A"))
+    workflow_id = response_json.get("workflowId", "N/A")
+    print(" 🆔 Workflow Id:", workflow_id)
     print_wf_url(
         conf=conf,
         conf_name=conf_name,
-        mode=RunMode.BACKFILL.value,
-        start_ds=start_ds,
-        end_ds=end_ds,
-        branch=branch
+        mode=mode,
+        workflow_id=workflow_id,
+        repo=repo
+    )
+def submit_schedule(repo, conf, hub_url=None):
+    hub_conf = get_hub_conf(conf, root_dir=repo)
+    if hub_url is not None:
+        zipline_hub = ZiplineHub(base_url=hub_url, sa_name=hub_conf.sa_name)
+    else:
+        zipline_hub = ZiplineHub(base_url=hub_conf.hub_url, sa_name=hub_conf.sa_name)
+    conf_name_to_obj_dict = hub_uploader.build_local_repo_hashmap(root_dir=repo)
+    branch = get_current_branch()
+    hub_uploader.compute_and_upload_diffs(
+        branch, zipline_hub=zipline_hub, local_repo_confs=conf_name_to_obj_dict
+    )
+    # get conf name
+    conf_name = utils.get_metadata_name_from_conf(repo, conf)
+    schedule_modes = get_schedule_modes(os.path.join(repo, conf))
+    # create a dict for RunMode.BACKFILL.value and RunMode.DEPLOY.value to schedule_modes.offline_schedule and schedule_modes.online
+    modes = {
+        RunMode.BACKFILL.value.upper(): schedule_modes.offline_schedule,
+        RunMode.DEPLOY.value.upper(): schedule_modes.online,
+    }
+    response_json = zipline_hub.call_schedule_api(
+        modes=modes,
+        branch=branch,
+        conf_name=conf_name,
+        conf_hash=conf_name_to_obj_dict[conf_name].hash,
     )
+    schedules = response_json.get("schedules", "N/A")
+    readable_schedules = {Mode._VALUES_TO_NAMES[int(k)]: v for k, v in schedules.items()}
+    print(" 🗓️ Schedules Deployed:", readable_schedules)
 # zipline hub backfill --conf=compiled/joins/join
 # adhoc backfills
@@ -82,41 +136,58 @@ def submit_workflow(repo,
 @common_options
 @start_ds_option
 @end_ds_option
-def backfill(repo,
-             conf,
-             start_ds,
-             end_ds):
+@handle_conf_not_found(log_error=True, callback=print_possible_confs)
+def backfill(repo, conf, hub_url, start_ds, end_ds):
     """
     - Submit a backfill job to Zipline.
     Response should contain a list of confs that are different from what's on remote.
     - Call upload API to upload the conf contents for the list of confs that were different.
     - Call the actual run API with mode set to backfill.
     """
-    submit_workflow(repo, conf, RunMode.BACKFILL.value, start_ds, end_ds)
+    submit_workflow(
+        repo, conf, RunMode.BACKFILL.value, start_ds, end_ds, hub_url=hub_url
+    )
-# zipline hub deploy --conf=compiled/joins/join
+# zipline hub run-adhoc --conf=compiled/joins/join
 # currently only supports one-off deploy node submission
 @hub.command()
 @common_options
 @end_ds_option
-def deploy(repo,
-           conf,
-           end_ds):
+@handle_conf_not_found(log_error=True, callback=print_possible_confs)
+def run_adhoc(repo, conf, hub_url, end_ds):
     """
-    - Submit a one-off deploy job to Zipline.
+    - Submit a one-off deploy job to Zipline. This submits the various jobs to allow your conf to be tested online.
     Response should contain a list of confs that are different from what's on remote.
     - Call upload API to upload the conf contents for the list of confs that were different.
     - Call the actual run API with mode set to deploy
     """
-    submit_workflow(repo, conf, RunMode.DEPLOY.value, end_ds, end_ds)
+    submit_workflow(repo, conf, RunMode.DEPLOY.value, end_ds, end_ds, hub_url=hub_url)
-def get_common_env_map(file_path):
-    with open(file_path, 'r') as f:
+# zipline hub schedule --conf=compiled/joins/join
+@hub.command()
+@common_options
+@handle_conf_not_found(log_error=True, callback=print_possible_confs)
+def schedule(repo, conf, hub_url):
+    """
+    - Deploys a schedule for the specified conf to Zipline. This allows your conf to have various associated jobs run on a schedule.
+    This verb will introspect your conf to determine which of its jobs need to be scheduled (or paused if turned off) based on the
+    'offline_schedule' and 'online' fields.
+    """
+    submit_schedule(repo, conf, hub_url=hub_url)
+def get_metadata_map(file_path):
+    with open(file_path, "r") as f:
         data = json.load(f)
-    common_env_map = data['metaData']['executionInfo']['env']['common']
+    metadata_map = data["metaData"]
+    return metadata_map
+def get_common_env_map(file_path):
+    metadata_map = get_metadata_map(file_path)
+    common_env_map = metadata_map["executionInfo"]["env"]["common"]
     return common_env_map
@@ -124,18 +195,42 @@ def get_common_env_map(file_path):
 class HubConfig:
     hub_url: str
     frontend_url: str
+    sa_name: Optional[str] = None
+@dataclass
+class ScheduleModes:
+    online: str
+    offline_schedule: str
-def get_hub_conf(conf_path):
-    common_env_map = get_common_env_map(conf_path)
+def get_hub_conf(conf_path, root_dir="."):
+    file_path = os.path.join(root_dir, conf_path)
+    common_env_map = get_common_env_map(file_path)
     hub_url = common_env_map.get("HUB_URL", os.environ.get("HUB_URL"))
     frontend_url = common_env_map.get("FRONTEND_URL", os.environ.get("FRONTEND_URL"))
-    return HubConfig(hub_url=hub_url, frontend_url=frontend_url)
+    sa_name = common_env_map.get("SA_NAME", os.environ.get("SA_NAME"))
+    return HubConfig(hub_url=hub_url, frontend_url=frontend_url, sa_name=sa_name)
+def get_schedule_modes(conf_path):
+    metadata_map = get_metadata_map(conf_path)
+    online_value = metadata_map.get("online", False)
+    online = "true" if bool(online_value) else "false"
+    offline_schedule = metadata_map["executionInfo"].get("scheduleCron", None)
-def print_wf_url(conf, conf_name, mode, start_ds, end_ds, branch):
+    # check if offline_schedule is null or 'None' or '@daily' else throw an error
+    valid_schedules = {None, "None", "@daily"}
+    if offline_schedule not in valid_schedules:
+        raise ValueError(
+            f"Unsupported offline_schedule: {offline_schedule}. Only null, 'None', or '@daily' are supported."
+        )
+    offline_schedule = offline_schedule or "None"
+    return ScheduleModes(online=online, offline_schedule=offline_schedule)
-    hub_conf = get_hub_conf(conf)
+def print_wf_url(conf, conf_name, mode, workflow_id, repo="."):
+    hub_conf = get_hub_conf(conf, root_dir=repo)
     frontend_url = hub_conf.frontend_url
     if "compiled/joins" in conf:
@@ -143,17 +238,13 @@ def print_wf_url(conf, conf_name, mode, start_ds, end_ds, branch):
     elif "compiled/staging_queries" in conf:
         hub_conf_type = "stagingqueries"
     elif "compiled/group_by" in conf:
-        hub_conf_type = "groupby"
+        hub_conf_type = "groupbys"
     elif "compiled/models" in conf:
         hub_conf_type = "models"
     else:
         raise ValueError(f"Unsupported conf type: {conf}")
-    # TODO: frontend uses localtime to create the millis, we should make it use UTC and make this align
-    def _millis(date_str):
-        return int(datetime.strptime(date_str, "%Y-%m-%d").timestamp() * 1000)
-    def _mode_string(mode):
+    def _mode_string():
         if mode == "backfill":
             return "offline"
         elif mode == "deploy":
@@ -161,11 +252,10 @@ def print_wf_url(conf, conf_name, mode, start_ds, end_ds, branch):
         else:
             raise ValueError(f"Unsupported mode: {mode}")
-    workflow_url = f"{frontend_url.rstrip('/')}/{hub_conf_type}/{conf_name}/{_mode_string(mode)}?start={_millis(start_ds)}&end={_millis(end_ds)}&branch={quote_plus(branch)}"
+    workflow_url = f"{frontend_url.rstrip('/')}/{hub_conf_type}/{conf_name}/{_mode_string()}?workflowId={workflow_id}"
     print(" 🔗 Workflow : " + workflow_url + "\n")
 if __name__ == "__main__":
     hub()

ai/chronon/repo/hub_uploader.py CHANGED Viewed

@@ -3,7 +3,8 @@ import hashlib
 import json
 import os
-from ai.chronon.orchestration.ttypes import Conf
+from gen_thrift.api.ttypes import Conf
 from ai.chronon.repo import (
     FOLDER_NAME_TO_CLASS,
     FOLDER_NAME_TO_CONF_TYPE,

ai/chronon/repo/init.py CHANGED Viewed

@@ -17,7 +17,7 @@ from ai.chronon.cli.compile.display.console import console
     envvar="CLOUD_PROVIDER",
     help="Cloud provider to use.",
     required=True,
-    type=click.Choice(['aws', 'gcp'], case_sensitive=False)
+    type=click.Choice(["aws", "gcp"], case_sensitive=False),
 )
 @click.option(
     "--chronon-root",
@@ -31,9 +31,11 @@ def main(ctx, chronon_root, cloud_provider):
     target_path = os.path.abspath(chronon_root)
     if os.path.exists(target_path) and os.listdir(target_path):
-        choice = Prompt.ask(f"[bold yellow] Warning: [/]{target_path} is not empty. Proceed?",
-                   choices=["y", "n"],
-                   default="y")
+        choice = Prompt.ask(
+            f"[bold yellow] Warning: [/]{target_path} is not empty. Proceed?",
+            choices=["y", "n"],
+            default="y",
+        )
         if choice == "n":
             return
@@ -42,7 +44,12 @@ def main(ctx, chronon_root, cloud_provider):
     try:
         shutil.copytree(template_path, target_path, dirs_exist_ok=True)
         console.print("[bold green] Project scaffolding created successfully! 🎉\n")
-        export_cmd = Syntax(f"`export PYTHONPATH={target_path}:$PYTHONPATH`", "bash", theme="github-dark", line_numbers=False)
+        export_cmd = Syntax(
+            f"`export PYTHONPATH={target_path}:$PYTHONPATH`",
+            "bash",
+            theme="github-dark",
+            line_numbers=False,
+        )
         console.print("Please copy the following command to your shell config:")
         console.print(export_cmd)
     except Exception:

ai/chronon/repo/join_backfill.py CHANGED Viewed

@@ -8,7 +8,6 @@ from ai.chronon.utils import (
     convert_json_to_obj,
     dict_to_bash_commands,
     dict_to_exports,
-    get_join_output_table_name,
     join_part_name,
     sanitize,
 )
@@ -34,7 +33,13 @@ class JoinBackfill:
     ):
         self.dag_id = "_".join(
             map(
-                sanitize, ["chronon_joins_backfill", os.path.basename(config_path).split("/")[-1], start_date, end_date]
+                sanitize,
+                [
+                    "chronon_joins_backfill",
+                    os.path.basename(config_path).split("/")[-1],
+                    start_date,
+                    end_date,
+                ],
             )
         )
         self.start_date = start_date
@@ -56,7 +61,8 @@ class JoinBackfill:
         """
         flow = Flow(self.join.metaData.name)
         final_node = Node(
-            f"{TASK_PREFIX}__{sanitize(get_join_output_table_name(self.join, full_name=True))}", self.run_final_join()
+            f"{TASK_PREFIX}__{sanitize(self.join.table)}",
+            self.run_final_join(),
         )
         left_node = Node(f"{TASK_PREFIX}__left_table", self.run_left_table())
         flow.add_node(final_node)
@@ -89,11 +95,19 @@ class JoinBackfill:
     def run_left_table(self):
         settings = self.settings.get("left_table", self.settings["default"])
-        return self.export_template(settings) + " && " + self.command_template(extra_args={"mode": "backfill-left"})
+        return (
+            self.export_template(settings)
+            + " && "
+            + self.command_template(extra_args={"mode": "backfill-left"})
+        )
     def run_final_join(self):
         settings = self.settings.get("final_join", self.settings["default"])
-        return self.export_template(settings) + " && " + self.command_template(extra_args={"mode": "backfill-final"})
+        return (
+            self.export_template(settings)
+            + " && "
+            + self.command_template(extra_args={"mode": "backfill-final"})
+        )
     def run(self, orchestrator: str, overrides: Optional[dict] = None):
         from ai.chronon.constants import ADAPTERS

ai/chronon/repo/run.py CHANGED Viewed

@@ -74,8 +74,7 @@ def set_defaults(ctx):
         # "online_jar_fetch": os.path.join(chronon_repo_path, "scripts/fetch_online_jar.py"),
         "online_args": os.environ.get("CHRONON_ONLINE_ARGS"),
         "chronon_jar": os.environ.get("CHRONON_DRIVER_JAR"),
-        "list_apps": "python3 "
-        + os.path.join(chronon_repo_path, "scripts/yarn_list.py"),
+        "list_apps": "python3 " + os.path.join(chronon_repo_path, "scripts/yarn_list.py"),
         "render_info": os.path.join(chronon_repo_path, RENDER_INFO_DEFAULT_SCRIPT),
         "project_conf": obj.get("project_conf"),
         "artifact_prefix": os.environ.get("ARTIFACT_PREFIX"),
@@ -85,24 +84,23 @@ def set_defaults(ctx):
         if ctx.params.get(key) is None and value is not None:
             ctx.params[key] = value
 def validate_flink_state(ctx, param, value):
     uri_schemes = ["gs://", "s3://"]
     if value and not any(value.startswith(scheme) for scheme in uri_schemes):
-        raise click.BadParameter(
-            f"Flink state uri must start with {uri_schemes}"
-        )
+        raise click.BadParameter(f"Flink state uri must start with {uri_schemes}")
     return value
 def validate_additional_jars(ctx, param, value):
     if value:
-        jars = value.split(',')
+        jars = value.split(",")
         for jar in jars:
-            if not jar.startswith(('gs://', 's3://')):
-                raise click.BadParameter(
-                    f"Additional jars must start with gs://, s3://: {jar}"
-                )
+            if not jar.startswith(("gs://", "s3://")):
+                raise click.BadParameter(f"Additional jars must start with gs://, s3://: {jar}")
     return value
 @click.command(
     name="run",
     context_settings=dict(allow_extra_args=True, ignore_unknown_options=True),
@@ -116,7 +114,9 @@ def validate_additional_jars(ctx, param, value):
     default="dev",
     help="Running environment - default to be dev",
 )
-@click.option("--mode", type=click.Choice([str(k) for k in MODE_ARGS.keys()]), default=str(RunMode.BACKFILL))
+@click.option(
+    "--mode", type=click.Choice([str(k) for k in MODE_ARGS.keys()]), default=str(RunMode.BACKFILL)
+)
 @click.option("--ds", help="the end partition to backfill the data")
 @click.option("--app-name", help="app name. Default to {}".format(APP_NAME_TEMPLATE))
 @click.option(
@@ -142,58 +142,61 @@ def validate_additional_jars(ctx, param, value):
     help="Class name of Online Impl. Used for streaming and metadata-upload mode.",
 )
 @click.option("--version", required=False, help="Chronon version to use.")
-@click.option(
-    "--spark-version", default="2.4.0", help="Spark version to use for downloading jar."
-)
+@click.option("--spark-version", default="2.4.0", help="Spark version to use for downloading jar.")
 @click.option("--spark-submit-path", help="Path to spark-submit")
-@click.option(
-    "--spark-streaming-submit-path", help="Path to spark-submit for streaming"
-)
+@click.option("--spark-streaming-submit-path", help="Path to spark-submit for streaming")
 @click.option(
     "--online-jar-fetch",
     help="Path to script that can pull online jar. This will run only "
     "when a file doesn't exist at location specified by online_jar",
 )
-@click.option(
-    "--sub-help", is_flag=True, help="print help command of the underlying jar and exit"
-)
+@click.option("--sub-help", is_flag=True, help="print help command of the underlying jar and exit")
 @click.option(
     "--conf-type",
     help="related to sub-help - no need to set unless you are not working with a conf",
 )
-@click.option(
-    "--online-args", help="Basic arguments that need to be supplied to all online modes"
-)
+@click.option("--online-args", help="Basic arguments that need to be supplied to all online modes")
 @click.option("--chronon-jar", help="Path to chronon OS jar")
 @click.option("--release-tag", help="Use the latest jar for a particular tag.")
-@click.option(
-    "--list-apps", help="command/script to list running jobs on the scheduler"
-)
+@click.option("--list-apps", help="command/script to list running jobs on the scheduler")
 @click.option(
     "--render-info",
     help="Path to script rendering additional information of the given config. "
     "Only applicable when mode is set to info",
 )
 @click.option("--kafka-bootstrap", help="Kafka bootstrap server in host:port format")
-@click.option("--latest-savepoint", is_flag=True, default=False, help="Deploys streaming job with latest savepoint")
+@click.option(
+    "--latest-savepoint",
+    is_flag=True,
+    default=False,
+    help="Deploys streaming job with latest savepoint",
+)
 @click.option("--custom-savepoint", help="Savepoint to deploy streaming job with.")
-@click.option("--no-savepoint", is_flag=True, default=False, help="Deploys streaming job without a savepoint")
-@click.option("--version-check", is_flag=True, default=False,
-              help="Checks if Zipline version of running streaming job is different from local version and deploys the job if they are different")
-@click.option("--flink-state-uri",
-              help="Bucket for storing flink state checkpoints/savepoints and other internal pieces for orchestration.",
-              callback=validate_flink_state)
-@click.option("--additional-jars",
-              help="Comma separated list of additional jar URIs to be included in the Flink job classpath (e.g. gs://bucket/jar1.jar,gs://bucket/jar2.jar).",
-              callback=validate_additional_jars)
 @click.option(
-    "--validate",
+    "--no-savepoint", is_flag=True, default=False, help="Deploys streaming job without a savepoint"
+)
+@click.option(
+    "--version-check",
     is_flag=True,
-    help="Validate the catalyst util Spark expression evaluation logic",
+    default=False,
+    help="Checks if Zipline version of running streaming job is different from local version and deploys the job if they are different",
 )
 @click.option(
-    "--validate-rows", default="10000", help="Number of rows to  run the validation on"
+    "--flink-state-uri",
+    help="Bucket for storing flink state checkpoints/savepoints and other internal pieces for orchestration.",
+    callback=validate_flink_state,
+)
+@click.option(
+    "--additional-jars",
+    help="Comma separated list of additional jar URIs to be included in the Flink job classpath (e.g. gs://bucket/jar1.jar,gs://bucket/jar2.jar).",
+    callback=validate_additional_jars,
+)
+@click.option(
+    "--validate",
+    is_flag=True,
+    help="Validate the catalyst util Spark expression evaluation logic",
 )
+@click.option("--validate-rows", default="10000", help="Number of rows to  run the validation on")
 @click.option("--join-part-name", help="Name of the join part to use for join-part-job")
 @click.option(
     "--artifact-prefix",

ai/chronon/repo/serializer.py CHANGED Viewed

@@ -50,18 +50,14 @@ class ThriftJSONDecoder(json.JSONDecoder):
                 (_, field_ttype, field_name, field_ttype_info, dummy) = field
                 if field_name not in val:
                     continue
-                converted_val = self._convert(
-                    val[field_name], field_ttype, field_ttype_info
-                )
+                converted_val = self._convert(val[field_name], field_ttype, field_ttype_info)
                 setattr(ret, field_name, converted_val)
         elif ttype == TType.LIST:
             (element_ttype, element_ttype_info, _) = ttype_info
             ret = [self._convert(x, element_ttype, element_ttype_info) for x in val]
         elif ttype == TType.SET:
             (element_ttype, element_ttype_info) = ttype_info
-            ret = set(
-                [self._convert(x, element_ttype, element_ttype_info) for x in val]
-            )
+            ret = set([self._convert(x, element_ttype, element_ttype_info) for x in val])
         elif ttype == TType.MAP:
             (key_ttype, key_ttype_info, val_ttype, val_ttype_info, _) = ttype_info
             ret = dict(
@@ -117,9 +113,7 @@ def thrift_json(obj):
 def thrift_simple_json(obj):
-    simple = TSerialization.serialize(
-        obj, protocol_factory=TSimpleJSONProtocolFactory()
-    )
+    simple = TSerialization.serialize(obj, protocol_factory=TSimpleJSONProtocolFactory())
     parsed = json.loads(simple)
     return json.dumps(parsed, indent=2, sort_keys=True)
@@ -131,9 +125,7 @@ def thrift_simple_json_protected(obj, obj_type) -> str:
     actual = thrift_simple_json(thrift_obj)
     differ = JsonDiffer()
     diff = differ.diff(serialized, actual)
-    assert (
-        len(diff) == 0
-    ), f"""Serialization can't be reversed
+    assert len(diff) == 0, f"""Serialization can't be reversed
 diff: \n{diff}
 original: \n{serialized}
 """

awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl