PyPI - awx-zipline-ai - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of awx-zipline-ai might be problematic. Click here for more details.

Files changed (96) hide show

agent/ttypes.py +6 -6
ai/chronon/airflow_helpers.py +20 -23
ai/chronon/cli/__init__.py +0 -0
ai/chronon/cli/compile/__init__.py +0 -0
ai/chronon/cli/compile/column_hashing.py +40 -17
ai/chronon/cli/compile/compile_context.py +13 -17
ai/chronon/cli/compile/compiler.py +59 -36
ai/chronon/cli/compile/conf_validator.py +251 -99
ai/chronon/cli/compile/display/__init__.py +0 -0
ai/chronon/cli/compile/display/class_tracker.py +6 -16
ai/chronon/cli/compile/display/compile_status.py +10 -10
ai/chronon/cli/compile/display/diff_result.py +79 -14
ai/chronon/cli/compile/fill_templates.py +3 -8
ai/chronon/cli/compile/parse_configs.py +10 -17
ai/chronon/cli/compile/parse_teams.py +38 -34
ai/chronon/cli/compile/serializer.py +3 -9
ai/chronon/cli/compile/version_utils.py +42 -0
ai/chronon/cli/git_utils.py +2 -13
ai/chronon/cli/logger.py +0 -2
ai/chronon/constants.py +1 -1
ai/chronon/group_by.py +47 -47
ai/chronon/join.py +46 -32
ai/chronon/logger.py +1 -2
ai/chronon/model.py +9 -4
ai/chronon/query.py +2 -2
ai/chronon/repo/__init__.py +1 -2
ai/chronon/repo/aws.py +17 -31
ai/chronon/repo/cluster.py +121 -50
ai/chronon/repo/compile.py +14 -8
ai/chronon/repo/constants.py +1 -1
ai/chronon/repo/default_runner.py +32 -54
ai/chronon/repo/explore.py +70 -73
ai/chronon/repo/extract_objects.py +6 -9
ai/chronon/repo/gcp.py +89 -88
ai/chronon/repo/gitpython_utils.py +3 -2
ai/chronon/repo/hub_runner.py +145 -55
ai/chronon/repo/hub_uploader.py +2 -1
ai/chronon/repo/init.py +12 -5
ai/chronon/repo/join_backfill.py +19 -5
ai/chronon/repo/run.py +42 -39
ai/chronon/repo/serializer.py +4 -12
ai/chronon/repo/utils.py +72 -63
ai/chronon/repo/zipline.py +3 -19
ai/chronon/repo/zipline_hub.py +211 -39
ai/chronon/resources/__init__.py +0 -0
ai/chronon/resources/gcp/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/__init__.py +0 -0
ai/chronon/resources/gcp/group_bys/test/data.py +13 -17
ai/chronon/resources/gcp/joins/__init__.py +0 -0
ai/chronon/resources/gcp/joins/test/data.py +4 -8
ai/chronon/resources/gcp/sources/__init__.py +0 -0
ai/chronon/resources/gcp/sources/test/data.py +9 -6
ai/chronon/resources/gcp/teams.py +9 -21
ai/chronon/source.py +2 -4
ai/chronon/staging_query.py +60 -19
ai/chronon/types.py +3 -2
ai/chronon/utils.py +21 -68
ai/chronon/windows.py +2 -4
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/METADATA +47 -24
awx_zipline_ai-0.3.0.dist-info/RECORD +96 -0
awx_zipline_ai-0.3.0.dist-info/top_level.txt +4 -0
gen_thrift/__init__.py +0 -0
{ai/chronon → gen_thrift}/api/ttypes.py +327 -197
{ai/chronon/api → gen_thrift}/common/ttypes.py +9 -39
gen_thrift/eval/ttypes.py +660 -0
{ai/chronon → gen_thrift}/hub/ttypes.py +12 -131
{ai/chronon → gen_thrift}/observability/ttypes.py +343 -180
{ai/chronon → gen_thrift}/planner/ttypes.py +326 -45
ai/chronon/eval/__init__.py +0 -122
ai/chronon/eval/query_parsing.py +0 -19
ai/chronon/eval/sample_tables.py +0 -100
ai/chronon/eval/table_scan.py +0 -186
ai/chronon/orchestration/ttypes.py +0 -4406
ai/chronon/resources/gcp/README.md +0 -174
ai/chronon/resources/gcp/zipline-cli-install.sh +0 -54
awx_zipline_ai-0.2.1.dist-info/RECORD +0 -93
awx_zipline_ai-0.2.1.dist-info/licenses/LICENSE +0 -202
awx_zipline_ai-0.2.1.dist-info/top_level.txt +0 -3
/jars/__init__.py → /__init__.py +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/WHEEL +0 -0
{awx_zipline_ai-0.2.1.dist-info → awx_zipline_ai-0.3.0.dist-info}/entry_points.txt +0 -0
{ai/chronon → gen_thrift}/api/__init__.py +0 -0
{ai/chronon/api/common → gen_thrift/api}/constants.py +0 -0
{ai/chronon/api → gen_thrift}/common/__init__.py +0 -0
{ai/chronon/api → gen_thrift/common}/constants.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/__init__.py +0 -0
{ai/chronon/fetcher → gen_thrift/eval}/constants.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/__init__.py +0 -0
{ai/chronon/hub → gen_thrift/fetcher}/constants.py +0 -0
{ai/chronon → gen_thrift}/fetcher/ttypes.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/__init__.py +0 -0
{ai/chronon/observability → gen_thrift/hub}/constants.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/__init__.py +0 -0
{ai/chronon/orchestration → gen_thrift/observability}/constants.py +0 -0
{ai/chronon → gen_thrift}/planner/__init__.py +0 -0
{ai/chronon → gen_thrift}/planner/constants.py +0 -0

ai/chronon/group_by.py CHANGED Viewed

@@ -18,13 +18,25 @@ import logging
 from copy import deepcopy
 from typing import Callable, Dict, List, Optional, Tuple, Union
-import ai.chronon.api.common.ttypes as common
-import ai.chronon.api.ttypes as ttypes
+import gen_thrift.api.ttypes as ttypes
+import gen_thrift.common.ttypes as common
 import ai.chronon.utils as utils
 import ai.chronon.windows as window_utils
 OperationType = int  # type(zthrift.Operation.FIRST)
+def _get_output_table_name(obj, full_name: bool = False):
+    """
+    Group by backfill output table name
+    To be synced with api.Extensions.scala
+    """
+    if not obj.metaData.name:
+        utils.__set_name(obj, ttypes.GroupBy, "group_bys")
+    return utils.output_table_name(obj, full_name)
 #  The GroupBy's default online/production status is None and it will inherit
 # online/production status from the Joins it is included.
 # If it is included in multiple joins, it is considered online/production
@@ -58,7 +70,6 @@ class Accuracy(ttypes.Accuracy):
 class Operation:
     MIN = ttypes.Operation.MIN
     """Minimum value in the column"""
@@ -143,9 +154,7 @@ class Operation:
     UNIQUE_TOP_K = collector(ttypes.Operation.UNIQUE_TOP_K)
     """Returns top k unique elements ranked by their values. Automatically deduplicates inputs. For structs, requires sort_key (String) and unique_id (Long) fields."""
-    APPROX_PERCENTILE = generic_collector(
-        ttypes.Operation.APPROX_PERCENTILE, ["percentiles"], k=20
-    )
+    APPROX_PERCENTILE = generic_collector(ttypes.Operation.APPROX_PERCENTILE, ["percentiles"], k=20)
     """Approximate percentile calculation with configurable accuracy parameter k=20"""
@@ -169,9 +178,7 @@ def DefaultAggregation(keys, sources, operation=Operation.LAST, tags=None):
             "ds",
             query.timeColumn,
         ]
-        aggregate_columns += [
-            column for column in columns if column not in non_aggregate_columns
-        ]
+        aggregate_columns += [column for column in columns if column not in non_aggregate_columns]
     return [
         Aggregation(operation=operation, input_column=column, tags=tags)
         for column in aggregate_columns
@@ -232,9 +239,7 @@ def Aggregation(
         elif isinstance(w, common.Window):
             return w
         else:
-            raise Exception(
-                "window should be either a string like '7d', '24h', or a Window type"
-            )
+            raise Exception("window should be either a string like '7d', '24h', or a Window type")
     norm_windows = [normalize(w) for w in windows] if windows else None
@@ -279,8 +284,7 @@ def validate_group_by(group_by: ttypes.GroupBy):
     first_source_columns = set(utils.get_columns(sources[0]))
     # TODO undo this check after ml_models CI passes
     assert "ts" not in first_source_columns, (
-        "'ts' is a reserved key word for Chronon,"
-        " please specify the expression in timeColumn"
+        "'ts' is a reserved key word for Chronon, please specify the expression in timeColumn"
     )
     for src in sources:
         query = utils.get_query(src)
@@ -290,8 +294,7 @@ def validate_group_by(group_by: ttypes.GroupBy):
                 "event source as it should be the same with timeColumn"
             )
             assert query.reversalColumn is None, (
-                "reversalColumn should not be specified for event source "
-                "as it won't have mutations"
+                "reversalColumn should not be specified for event source as it won't have mutations"
             )
             if group_by.accuracy != Accuracy.SNAPSHOT:
                 assert query.timeColumn is not None, (
@@ -300,9 +303,9 @@ def validate_group_by(group_by: ttypes.GroupBy):
                 )
         else:
             if contains_windowed_aggregation(aggregations):
-                assert (
-                    query.timeColumn
-                ), "Please specify timeColumn for entity source with windowed aggregations"
+                assert query.timeColumn, (
+                    "Please specify timeColumn for entity source with windowed aggregations"
+                )
     column_set = None
     # all sources should select the same columns
@@ -310,7 +313,7 @@ def validate_group_by(group_by: ttypes.GroupBy):
         column_set = set(utils.get_columns(source))
         column_diff = column_set ^ first_source_columns
         assert not column_diff, f"""
-Mismatched columns among sources [1, {i+2}], Difference: {column_diff}
+Mismatched columns among sources [1, {i + 2}], Difference: {column_diff}
 """
     # all keys should be present in the selected columns
@@ -325,10 +328,7 @@ Keys {unselected_keys}, are unselected in source
         has_mutations = (
             any(
                 [
-                    (
-                        s.entities.mutationTable is not None
-                        or s.entities.mutationTopic is not None
-                    )
+                    (s.entities.mutationTable is not None or s.entities.mutationTopic is not None)
                     for s in sources
                     if s.entities is not None
                 ]
@@ -336,9 +336,9 @@ Keys {unselected_keys}, are unselected in source
             if not is_events
             else False
         )
-        assert not (
-            is_events or has_mutations
-        ), "You can only set aggregations=None in an EntitySource without mutations"
+        assert not (is_events or has_mutations), (
+            "You can only set aggregations=None in an EntitySource without mutations"
+        )
     else:
         columns = set([c for src in sources for c in utils.get_columns(src)])
         for agg in aggregations:
@@ -355,9 +355,7 @@ Keys {unselected_keys}, are unselected in source
                     try:
                         percentile_array = json.loads(agg.argMap["percentiles"])
                         assert isinstance(percentile_array, list)
-                        assert all(
-                            [float(p) >= 0 and float(p) <= 1 for p in percentile_array]
-                        )
+                        assert all([float(p) >= 0 and float(p) <= 1 for p in percentile_array])
                     except Exception as e:
                         LOGGER.exception(e)
                         raise ValueError(
@@ -388,9 +386,7 @@ Keys {unselected_keys}, are unselected in source
                 )
-_ANY_SOURCE_TYPE = Union[
-    ttypes.Source, ttypes.EventSource, ttypes.EntitySource, ttypes.JoinSource
-]
+_ANY_SOURCE_TYPE = Union[ttypes.Source, ttypes.EventSource, ttypes.EntitySource, ttypes.JoinSource]
 def _get_op_suffix(operation, argmap):
@@ -409,7 +405,9 @@ def _get_op_suffix(operation, argmap):
 def get_output_col_names(aggregation):
-    base_name = f"{aggregation.inputColumn}_{_get_op_suffix(aggregation.operation, aggregation.argMap)}"
+    base_name = (
+        f"{aggregation.inputColumn}_{_get_op_suffix(aggregation.operation, aggregation.argMap)}"
+    )
     windowed_names = []
     if aggregation.windows:
         for window in aggregation.windows:
@@ -456,7 +454,7 @@ def GroupBy(
     :param sources:
         can be constructed as entities or events or joinSource::
-            import ai.chronon.api.ttypes as chronon
+            import gen_thrift.api.ttypes as chronon
             events = chronon.Source(events=chronon.Events(
                 table=YOUR_TABLE,
                 topic=YOUR_TOPIC #  <- OPTIONAL for serving
@@ -478,7 +476,7 @@ def GroupBy(
         Multiple sources can be supplied to backfill the historical values with their respective start and end
         partitions. However, only one source is allowed to be a streaming one.
-    :type sources: List[ai.chronon.api.ttypes.Events|ai.chronon.api.ttypes.Entities]
+    :type sources: List[gen_thrift.api.ttypes.Events|gen_thrift.api.ttypes.Entities]
     :param keys:
         List of primary keys that defines the data that needs to be collected in the result table. Similar to the
         GroupBy in the SQL context.
@@ -486,12 +484,12 @@ def GroupBy(
     :param aggregations:
         List of aggregations that needs to be computed for the data following the grouping defined by the keys::
-            import ai.chronon.api.ttypes as chronon
+            import gen_thrift.api.ttypes as chronon
             aggregations = [
                 chronon.Aggregation(input_column="entity", operation=Operation.LAST),
                 chronon.Aggregation(input_column="entity", operation=Operation.LAST, windows=['7d'])
             ],
-    :type aggregations: List[ai.chronon.api.ttypes.Aggregation]
+    :type aggregations: List[gen_thrift.api.ttypes.Aggregation]
     :param online:
         Should we upload the result data of this conf into the KV store so that we can fetch/serve this GroupBy online.
         Once Online is set to True, you ideally should not change the conf.
@@ -533,7 +531,7 @@ def GroupBy(
         Defines the computing accuracy of the GroupBy.
         If "Snapshot" is selected, the aggregations are computed based on the partition identifier - "ds" time column.
         If "Temporal" is selected, the aggregations are computed based on the event time - "ts" time column.
-    :type accuracy: ai.chronon.api.ttypes.SNAPSHOT or ai.chronon.api.ttypes.TEMPORAL
+    :type accuracy: gen_thrift.api.ttypes.SNAPSHOT or gen_thrift.api.ttypes.TEMPORAL
     :param lag:
         Param that goes into customJson. You can pull this out of the json at path "metaData.customJson.lag"
         This is used by airflow integration to pick an older hive partition to wait on.
@@ -555,7 +553,7 @@ def GroupBy(
     :param derivations:
         Derivation allows arbitrary SQL select clauses to be computed using columns from the output of group by backfill
         output schema. It is supported for offline computations for now.
-    :type derivations: List[ai.chronon.api.ttypes.Drivation]
+    :type derivations: List[gen_thrift.api.ttypes.Drivation]
     :param kwargs:
         Additional properties that would be passed to run.py if specified under additional_args property.
         And provides an option to pass custom values to the processing logic.
@@ -585,6 +583,10 @@ def GroupBy(
     """
     assert sources, "Sources are not specified"
+    assert isinstance(version, int), (
+        f"Version must be an integer, but found {type(version).__name__}"
+    )
     agg_inputs = []
     if aggregations is not None:
         agg_inputs = [agg.inputColumn for agg in aggregations]
@@ -596,11 +598,7 @@ def GroupBy(
         query = (
             source.entities.query
             if source.entities is not None
-            else (
-                source.events.query
-                if source.events is not None
-                else source.joinSource.query
-            )
+            else (source.events.query if source.events is not None else source.joinSource.query)
         )
         if query.selects is None:
@@ -665,13 +663,12 @@ def GroupBy(
                 for output_col in get_output_col_names(agg):
                     column_tags[output_col] = agg.tags
     metadata = ttypes.MetaData(
         online=online,
         production=production,
         outputNamespace=output_namespace,
         tableProperties=table_properties,
-        team=team,
+        team=team,
         executionInfo=exec_info,
         tags=tags if tags else None,
         columnTags=column_tags if column_tags else None,
@@ -689,4 +686,7 @@ def GroupBy(
     )
     validate_group_by(group_by)
+    # Add the table property that calls the private function
+    group_by.__class__.table = property(lambda self: _get_output_table_name(self, full_name=True))
     return group_by

ai/chronon/join.py CHANGED Viewed

@@ -19,14 +19,34 @@ import logging
 from collections import Counter
 from typing import Dict, List, Tuple, Union
-import ai.chronon.api.common.ttypes as common
-import ai.chronon.api.ttypes as api
+import gen_thrift.api.ttypes as api
+import gen_thrift.common.ttypes as common
 import ai.chronon.repo.extract_objects as eo
 import ai.chronon.utils as utils
+from ai.chronon.cli.compile import parse_teams
 logging.basicConfig(level=logging.INFO)
+def _get_output_table_name(join: api.Join, full_name: bool = False):
+    """generate output table name for join backfill job"""
+    # join sources could also be created inline alongside groupBy file
+    # so we specify fallback module as group_bys
+    if isinstance(join, api.Join):
+        utils.__set_name(join, api.Join, "joins")
+    # set output namespace
+    if not join.metaData.outputNamespace:
+        team_name = join.metaData.name.split(".")[0]
+        namespace = (
+            parse_teams.load_teams(utils.chronon_root_path, print=False)
+            .get(team_name)
+            .outputNamespace
+        )
+        join.metaData.outputNamespace = namespace
+    return utils.output_table_name(join, full_name=full_name)
 def JoinPart(
     group_by: api.GroupBy,
     key_mapping: Dict[str, str] = None,
@@ -57,9 +77,9 @@ def JoinPart(
         components like GroupBys.
     """
-    assert isinstance(
-        group_by, api.GroupBy
-    ), f"Expecting GroupBy. But found {type(group_by).__name__}"
+    assert isinstance(group_by, api.GroupBy), (
+        f"Expecting GroupBy. But found {type(group_by).__name__}"
+    )
     # used for reset for next run
     import_copy = __builtins__["__import__"]
@@ -80,14 +100,10 @@ def JoinPart(
     if group_by_module_name:
         logging.debug(
-            "group_by's module info from garbage collector {}".format(
-                group_by_module_name
-            )
+            "group_by's module info from garbage collector {}".format(group_by_module_name)
         )
         group_by_module = importlib.import_module(group_by_module_name)
-        __builtins__["__import__"] = eo.import_module_set_name(
-            group_by_module, api.GroupBy
-        )
+        __builtins__["__import__"] = eo.import_module_set_name(group_by_module, api.GroupBy)
     else:
         if not group_by.metaData.name:
             logging.error("No group_by file or custom group_by name found")
@@ -133,9 +149,9 @@ class DataType:
     # TIMESTAMP = api.TDataType(api.DataKind.TIMESTAMP)
     def MAP(key_type: api.TDataType, value_type: api.TDataType) -> api.TDataType:
-        assert key_type == api.TDataType(
-            api.DataKind.STRING
-        ), "key_type has to STRING for MAP types"
+        assert key_type == api.TDataType(api.DataKind.STRING), (
+            "key_type has to STRING for MAP types"
+        )
         return api.TDataType(
             api.DataKind.MAP,
@@ -143,9 +159,7 @@ class DataType:
         )
     def LIST(elem_type: api.TDataType) -> api.TDataType:
-        return api.TDataType(
-            api.DataKind.LIST, params=[api.DataField("elem", elem_type)]
-        )
+        return api.TDataType(api.DataKind.LIST, params=[api.DataField("elem", elem_type)])
     def STRUCT(name: str, *fields: FieldsType) -> api.TDataType:
         return api.TDataType(
@@ -475,18 +489,19 @@ def Join(
     if isinstance(row_ids, str):
         row_ids = [row_ids]
+    assert isinstance(version, int), (
+        f"Version must be an integer, but found {type(version).__name__}"
+    )
     # create a deep copy for case: multiple LeftOuterJoin use the same left,
     # validation will fail after the first iteration
     updated_left = copy.deepcopy(left)
     if left.events and left.events.query.selects:
         assert "ts" not in left.events.query.selects.keys(), (
-            "'ts' is a reserved key word for Chronon,"
-            " please specify the expression in timeColumn"
+            "'ts' is a reserved key word for Chronon, please specify the expression in timeColumn"
         )
         # mapping ts to query.timeColumn to events only
-        updated_left.events.query.selects.update(
-            {"ts": updated_left.events.query.timeColumn}
-        )
+        updated_left.events.query.selects.update({"ts": updated_left.events.query.timeColumn})
     if label_part:
         label_metadata = api.MetaData(
@@ -499,9 +514,7 @@ def Join(
             metaData=label_metadata,
         )
-    consistency_sample_percent = (
-        consistency_sample_percent if check_consistency else None
-    )
+    consistency_sample_percent = consistency_sample_percent if check_consistency else None
     # external parts need to be unique on (prefix, part.source.metaData.name)
     if online_external_parts:
@@ -513,15 +526,13 @@ def Join(
             if count > 1:
                 has_duplicates = True
                 print(f"Found {count - 1} duplicate(s) for external part {key}")
-        assert (
-            has_duplicates is False
-        ), "Please address all the above mentioned duplicates."
+        assert has_duplicates is False, "Please address all the above mentioned duplicates."
     if bootstrap_from_log:
         has_logging = sample_percent > 0 and online
-        assert (
-            has_logging
-        ), "Join must be online with sample_percent set in order to use bootstrap_from_log option"
+        assert has_logging, (
+            "Join must be online with sample_percent set in order to use bootstrap_from_log option"
+        )
         bootstrap_parts = (bootstrap_parts or []) + [
             api.BootstrapPart(
                 # templated values will be replaced when metaData.name is set at the end
@@ -535,7 +546,7 @@ def Join(
         env=env_vars,
         stepDays=step_days,
         historicalBackfill=historical_backfill,
-        clusterConf=cluster_conf
+        clusterConf=cluster_conf,
     )
     metadata = api.MetaData(
@@ -563,4 +574,7 @@ def Join(
         useLongNames=use_long_names,
     )
+    # Add the table property that calls the private function
+    join.__class__.table = property(lambda self: _get_output_table_name(self, full_name=True))
     return join

ai/chronon/logger.py CHANGED Viewed

@@ -1,4 +1,3 @@
 #     Copyright (C) 2023 The Chronon Authors.
 #
 #     Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +14,7 @@
 import logging
-LOG_FORMAT = '[%(asctime)-11s] %(levelname)s [%(filename)s:%(lineno)d] %(message)s'
+LOG_FORMAT = "[%(asctime)-11s] %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
 def get_logger(log_level=logging.INFO):

ai/chronon/model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Optional
-import ai.chronon.api.ttypes as ttypes
+import gen_thrift.api.ttypes as ttypes
 class ModelType:
@@ -14,7 +14,7 @@ def Model(
     outputSchema: ttypes.TDataType,
     modelType: ModelType,
     name: str = None,
-    modelParams: Optional[dict[str, str]] = None
+    modelParams: Optional[dict[str, str]] = None,
 ) -> ttypes.Model:
     if not isinstance(source, ttypes.Source):
         raise ValueError("Invalid source type")
@@ -31,5 +31,10 @@ def Model(
         name=name,
     )
-    return ttypes.Model(modelType=modelType, outputSchema=outputSchema, source=source,
-                        modelParams=modelParams, metaData=metaData)
+    return ttypes.Model(
+        modelType=modelType,
+        outputSchema=outputSchema,
+        source=source,
+        modelParams=modelParams,
+        metaData=metaData,
+    )

ai/chronon/query.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from collections import OrderedDict
 from typing import Dict, List
-import ai.chronon.api.ttypes as api
+import gen_thrift.api.ttypes as api
 def Query(
@@ -96,7 +96,7 @@ def Query(
         reversalColumn=reversal_column,
         partitionColumn=partition_column,
         subPartitionsToWaitFor=sub_partitions_to_wait_for,
-        partitionFormat=partition_format
+        partitionFormat=partition_format,
     )

ai/chronon/repo/__init__.py CHANGED Viewed

@@ -12,8 +12,7 @@
 #     See the License for the specific language governing permissions and
 #     limitations under the License.
-from ai.chronon.api.ttypes import GroupBy, Join, Model, StagingQuery
-from ai.chronon.orchestration.ttypes import ConfType
+from gen_thrift.api.ttypes import ConfType, GroupBy, Join, Model, StagingQuery
 JOIN_FOLDER_NAME = "joins"
 GROUP_BY_FOLDER_NAME = "group_bys"

ai/chronon/repo/aws.py CHANGED Viewed

@@ -38,17 +38,13 @@ class AwsRunner(Runner):
         service_jar_path = AwsRunner.download_zipline_aws_jar(
             ZIPLINE_DIRECTORY, get_customer_id(), args["version"], ZIPLINE_AWS_SERVICE_JAR
         )
-        jar_path = (
-            f"{service_jar_path}:{aws_jar_path}" if args['mode'] == "fetch" else aws_jar_path
-        )
+        jar_path = f"{service_jar_path}:{aws_jar_path}" if args["mode"] == "fetch" else aws_jar_path
         self.version = args.get("version", "latest")
         super().__init__(args, os.path.expanduser(jar_path))
     @staticmethod
-    def upload_s3_file(
-        bucket_name: str, source_file_name: str, destination_blob_name: str
-    ):
+    def upload_s3_file(bucket_name: str, source_file_name: str, destination_blob_name: str):
         """Uploads a file to the bucket."""
         obj = boto3.client("s3")
         try:
@@ -61,7 +57,9 @@ class AwsRunner(Runner):
             raise RuntimeError(f"Failed to upload {source_file_name}: {str(e)}") from e
     @staticmethod
-    def download_zipline_aws_jar(destination_dir: str, customer_id: str, version: str, jar_name: str):
+    def download_zipline_aws_jar(
+        destination_dir: str, customer_id: str, version: str, jar_name: str
+    ):
         s3_client = boto3.client("s3")
         destination_path = f"{destination_dir}/{jar_name}"
         source_key_name = f"release/{version}/jars/{jar_name}"
@@ -78,9 +76,7 @@ class AwsRunner(Runner):
         if are_identical:
             print(f"{destination_path} matches S3 {bucket_name}/{source_key_name}")
         else:
-            print(
-                f"{destination_path} does NOT match S3 {bucket_name}/{source_key_name}"
-            )
+            print(f"{destination_path} does NOT match S3 {bucket_name}/{source_key_name}")
             print(f"Downloading {jar_name} from S3...")
             s3_client.download_file(
@@ -122,9 +118,7 @@ class AwsRunner(Runner):
         return None
     @staticmethod
-    def compare_s3_and_local_file_hashes(
-        bucket_name: str, s3_file_path: str, local_file_path: str
-    ):
+    def compare_s3_and_local_file_hashes(bucket_name: str, s3_file_path: str, local_file_path: str):
         try:
             s3_hash = AwsRunner.get_s3_file_hash(bucket_name, s3_file_path)
             local_hash = AwsRunner.get_local_file_hash(local_file_path)
@@ -144,9 +138,7 @@ class AwsRunner(Runner):
         s3_files = []
         for source_file in local_files_to_upload:
             # upload to `metadata` folder
-            destination_file_path = (
-                f"metadata/{extract_filename_from_path(source_file)}"
-            )
+            destination_file_path = f"metadata/{extract_filename_from_path(source_file)}"
             s3_files.append(
                 AwsRunner.upload_s3_file(
                     customer_warehouse_bucket_name, source_file, destination_file_path
@@ -169,7 +161,9 @@ class AwsRunner(Runner):
             + f"/release/{self.version}/jars/{ZIPLINE_AWS_JAR_DEFAULT}"
         )
-        final_args = "{user_args} --jar-uri={jar_uri} --job-type={job_type} --main-class={main_class}"
+        final_args = (
+            "{user_args} --jar-uri={jar_uri} --job-type={job_type} --main-class={main_class}"
+        )
         if job_type == JobType.FLINK:
             main_class = "ai.chronon.flink.FlinkJob"
@@ -197,7 +191,7 @@ class AwsRunner(Runner):
                     main_class=main_class,
                 )
                 + f" --additional-conf-path={EMR_MOUNT_FILE_PREFIX}additional-confs.yaml"
-                  f" --files={s3_file_args}"
+                f" --files={s3_file_args}"
             )
         else:
             raise ValueError(f"Invalid job type: {job_type}")
@@ -240,15 +234,12 @@ class AwsRunner(Runner):
                             end_ds=end_ds,
                             # when we download files from s3 to emr, they'll be mounted at /mnt/zipline
                             override_conf_path=(
-                                EMR_MOUNT_FILE_PREFIX
-                                + extract_filename_from_path(self.conf)
+                                EMR_MOUNT_FILE_PREFIX + extract_filename_from_path(self.conf)
                                 if self.conf
                                 else None
                             ),
                         ),
-                        additional_args=os.environ.get(
-                            "CHRONON_CONFIG_ADDITIONAL_ARGS", ""
-                        ),
+                        additional_args=os.environ.get("CHRONON_CONFIG_ADDITIONAL_ARGS", ""),
                     )
                     emr_args = self.generate_emr_submitter_args(
@@ -265,15 +256,12 @@ class AwsRunner(Runner):
                         start_ds=self.start_ds,
                         # when we download files from s3 to emr, they'll be mounted at /mnt/zipline
                         override_conf_path=(
-                            EMR_MOUNT_FILE_PREFIX
-                            + extract_filename_from_path(self.conf)
+                            EMR_MOUNT_FILE_PREFIX + extract_filename_from_path(self.conf)
                             if self.conf
                             else None
                         ),
                     ),
-                    additional_args=os.environ.get(
-                        "CHRONON_CONFIG_ADDITIONAL_ARGS", ""
-                    ),
+                    additional_args=os.environ.get("CHRONON_CONFIG_ADDITIONAL_ARGS", ""),
                 )
                 emr_args = self.generate_emr_submitter_args(
@@ -288,9 +276,7 @@ class AwsRunner(Runner):
             # parallel backfill mode
             with multiprocessing.Pool(processes=int(self.parallelism)) as pool:
                 LOG.info(
-                    "Running args list {} with pool size {}".format(
-                        command_list, self.parallelism
-                    )
+                    "Running args list {} with pool size {}".format(command_list, self.parallelism)
                 )
                 pool.map(check_call, command_list)
         elif len(command_list) == 1:

awx-zipline-ai 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

awx-zipline-ai 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl