chalkruntime 3.32.2__tar.gz → 3.32.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/PKG-INFO +1 -1
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/chalk_overload.py +1 -4
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/general_bound_invoker.py +190 -2
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/one_to_one_invoker.py +7 -3
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/vectorized_hasmany_sampler.py +197 -1
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/importer.py +1 -1
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/PKG-INFO +1 -1
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/pyproject.toml +2 -1
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/README.md +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/constants.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/dataframe.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/lazyframe.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/failed_argument.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/resolver_errors.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/wrapped_resolver_exception.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/convert_chalkpy_underscore.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/feature.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/filter_conversion.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/global_graph.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_impl.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_proxy.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_state.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/jinja_parser.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/materializations.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/maybe_named_collection.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/named_query.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/nearest_neighbor.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/overlay_graph.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/prompt_service.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/protograph_deserializer.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/protograph_serializer.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/resolver.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/singletons.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/sklearn_model_parser.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/stream_resolver.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore_codec_info.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore_operation_registry.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/variables.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/heaptrack_launcher.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/group_incrementalizer.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/incrementalizer.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/batch_result_collector.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/bound_invoker.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/bound_invoker_cache.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/no_arg_scalar_invoker.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/overlay_features.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/parse_external_resolver.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/partition_batch.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/query_execution_parameters.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_args_builder.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_input.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_input_upload.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_output_metadata.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_raw_output_parsing.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_result.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_runner.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/sample.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/validator.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/converter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/memray_launcher.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/metadata.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/py.typed +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/config.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/entrypoint.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/env_helper.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/remote_python_function_registry_client.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/service.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/composed_rewriter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/contextual_query_rewriter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/filter_query_rewriter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/identity_rewriter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/query_rewriter.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/query_rewriter_helper.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/converter_utils.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/exc.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/message_parsing.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/resolver_utils.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/types.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/window_keys.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/__init__.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/async_helpers.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/contextvars.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/datadog.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/internal_pl_utils.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/tracing.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/viztracer_profiling.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/valgrind_launcher.py +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/SOURCES.txt +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/dependency_links.txt +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/requires.txt +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/top_level.txt +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/setup.cfg +0 -0
- {chalkruntime-3.32.2 → chalkruntime-3.32.4}/setup.py +0 -0
|
@@ -2,11 +2,10 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import TypeVar
|
|
4
4
|
|
|
5
|
-
import libchalk.chalkfunction
|
|
6
|
-
import libchalk.udf
|
|
7
5
|
from chalkruntime.graph.maybe_named_collection import MaybeNamedCollection
|
|
8
6
|
from libchalk.chalkfunction import (
|
|
9
7
|
ArgumentType,
|
|
8
|
+
ChalkFunctionOverload,
|
|
10
9
|
ChalkFunctionOverloadFailed,
|
|
11
10
|
ChalkFunctionOverloadResolved,
|
|
12
11
|
default_arrow_type_promoter,
|
|
@@ -15,8 +14,6 @@ from libchalk.chalkfunction import (
|
|
|
15
14
|
TItem = TypeVar("TItem")
|
|
16
15
|
TOther = TypeVar("TOther")
|
|
17
16
|
|
|
18
|
-
ChalkFunctionOverload = libchalk.chalkfunction.ChalkFunctionOverload
|
|
19
|
-
|
|
20
17
|
|
|
21
18
|
def get_resolved_overload(
|
|
22
19
|
overload: ChalkFunctionOverload,
|
|
@@ -90,6 +90,7 @@ from chalkruntime.invoker.resolver_runner import (
|
|
|
90
90
|
)
|
|
91
91
|
from chalkruntime.invoker.vectorized_hasmany_sampler import (
|
|
92
92
|
RESOLVER_INPUT_IDX_COL_NAME,
|
|
93
|
+
NestedHasManySampler,
|
|
93
94
|
PolarsVectorizedHasManySampler,
|
|
94
95
|
PyArrowVectorizedHasManySampler,
|
|
95
96
|
VectorizedHasManySampler,
|
|
@@ -152,7 +153,8 @@ def _parse_df_feature(
|
|
|
152
153
|
continue
|
|
153
154
|
# use groups.raw instead of groups.partitioned, since we don't partition for DF resolvers.
|
|
154
155
|
hm_table = groups.raw_has_many[hm_ft].result_and_metadata
|
|
155
|
-
|
|
156
|
+
_hm_pa = hm_table.to_table()
|
|
157
|
+
hm_df = pa_table_to_pl_df(_hm_pa), None
|
|
156
158
|
sampler = PolarsVectorizedHasManySampler(
|
|
157
159
|
resolver_inputs_df=resolver_inputs_df,
|
|
158
160
|
has_many_feature=hm_ft,
|
|
@@ -868,7 +870,10 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
|
|
|
868
870
|
# we walk up the has-many join path, iteratively packing the deeper layers into lists of structs
|
|
869
871
|
hm_base_subfeatures = all_has_many_subfeatures(feature_type)
|
|
870
872
|
hm_schema_subfeatures = tuple(x for x in required_features_to_sample(feature_type) if is_underlying_has_many(x))
|
|
871
|
-
assert len(hm_base_subfeatures)
|
|
873
|
+
assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
|
|
874
|
+
assert len(hm_base_subfeatures) == len(hm_schema_subfeatures), (
|
|
875
|
+
f"hm_base_subfeatures ({hm_base_subfeatures}) and hm_schema_subfeatures ({hm_schema_subfeatures}) must have the same length"
|
|
876
|
+
)
|
|
872
877
|
|
|
873
878
|
current_hm_df: tuple[pl.DataFrame, pl.DataFrame | None] | None = None
|
|
874
879
|
hm_entry: HasManyFeatureEntry | None = None
|
|
@@ -896,8 +901,190 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
|
|
|
896
901
|
current_hm_df = (sampler.join_df_and_pack_into_struct().drop(RESOLVER_INPUT_IDX_COL_NAME), None)
|
|
897
902
|
# now we want select for only the current_hm_feature column to be in resolver inputs, dropping irrelevant columns
|
|
898
903
|
current_hm_feature = has_many_subfeatures_to_projection(base_hm_feature, [relative_hm_feature])
|
|
904
|
+
# Top-down nested HM: if the outer feature's schema declares nested HM columns
|
|
905
|
+
# that are absent from the current table, retrieve their data from the mapping
|
|
906
|
+
# and use NestedHasManySampler top-down so timestamps flow from outer to inner
|
|
907
|
+
# (required for correct online temporal semantics).
|
|
908
|
+
_nested_hm_schema = [
|
|
909
|
+
col for col in current_hm_feature.underlying.df.columns if isinstance(col, HasManyFeatureType)
|
|
910
|
+
]
|
|
911
|
+
if _nested_hm_schema:
|
|
912
|
+
_current_table_cols = set(current_hm_df[0].columns)
|
|
913
|
+
_missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
|
|
914
|
+
if not _missing_nested:
|
|
915
|
+
# For "deeply nested" cases (e.g. triple-nested: University → College → Course → Section):
|
|
916
|
+
# nested HM cols (e.g. college.courses) are already packed in the table, but their packed
|
|
917
|
+
# structs lack sub-nested data (e.g. course.sections) because the resolver encoding schema
|
|
918
|
+
# is scalar-only. However, partitioned_has_many already has a separate courses table WITH
|
|
919
|
+
# course.sections. Drop these cols so the _missing_nested path below picks them up and uses
|
|
920
|
+
# NestedHasManySampler with the correctly nested data.
|
|
921
|
+
# Only drop cols where we can actually retrieve the data (same lookup as the _missing_nested
|
|
922
|
+
# handler below at lines 945-950), to avoid dropping cols that PolarsVectorizedHasManySampler
|
|
923
|
+
# still needs when data is unavailable through the nested path.
|
|
924
|
+
_phm = feature_to_data_mapping.partitioned_has_many or {}
|
|
925
|
+
|
|
926
|
+
def _nested_data_available(col: HasManyFeatureType) -> bool:
|
|
927
|
+
if col in _phm:
|
|
928
|
+
return True
|
|
929
|
+
if col in feature_to_data_mapping.raw_has_many:
|
|
930
|
+
return True
|
|
931
|
+
return False
|
|
932
|
+
|
|
933
|
+
_deeply_nested = [
|
|
934
|
+
col
|
|
935
|
+
for col in _nested_hm_schema
|
|
936
|
+
if col.root_fqn in _current_table_cols
|
|
937
|
+
and any(isinstance(x, HasManyFeatureType) for x in col.underlying.df.columns)
|
|
938
|
+
and _nested_data_available(col)
|
|
939
|
+
]
|
|
940
|
+
if _deeply_nested:
|
|
941
|
+
_cols_to_drop = [col.root_fqn for col in _deeply_nested]
|
|
942
|
+
current_hm_df = (current_hm_df[0].drop(_cols_to_drop), current_hm_df[1])
|
|
943
|
+
_current_table_cols = set(current_hm_df[0].columns)
|
|
944
|
+
_missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
|
|
945
|
+
if _missing_nested:
|
|
946
|
+
resolver_inputs_pl = (
|
|
947
|
+
resolver_inputs_df
|
|
948
|
+
if resolver_inputs_df is not None
|
|
949
|
+
else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
|
|
950
|
+
)
|
|
951
|
+
nested_levels: list[
|
|
952
|
+
tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]
|
|
953
|
+
] = [(current_hm_feature, current_hm_df[0])]
|
|
954
|
+
for _nested_hm in _missing_nested:
|
|
955
|
+
_nested_data_raw: pl.DataFrame | None = None
|
|
956
|
+
if _nested_hm in feature_to_data_mapping.partitioned_has_many:
|
|
957
|
+
_nested_data_raw = feature_to_data_mapping.partitioned_has_many[_nested_hm][0]
|
|
958
|
+
elif _nested_hm in feature_to_data_mapping.raw_has_many:
|
|
959
|
+
_nested_data_raw = pa_table_to_pl_df(
|
|
960
|
+
feature_to_data_mapping.raw_has_many[_nested_hm].result_and_metadata.to_table()
|
|
961
|
+
)
|
|
962
|
+
if _nested_data_raw is not None:
|
|
963
|
+
# Sections extracted from packed resolver output (via PushHasManyToResult)
|
|
964
|
+
# inherit the parent's TS_COL_NAME but lack the foreign feature-time
|
|
965
|
+
# column required by _get_ungrouped_rows' temporal filter. Use
|
|
966
|
+
# TS_COL_NAME as a proxy so timestamps flow correctly top-down.
|
|
967
|
+
_foreign_ts_fqn = getattr(
|
|
968
|
+
self._graph.ts_feature_for_namespace(_nested_hm.underlying.foreign_namespace()),
|
|
969
|
+
"fqn",
|
|
970
|
+
None,
|
|
971
|
+
)
|
|
972
|
+
if (
|
|
973
|
+
_foreign_ts_fqn is not None
|
|
974
|
+
and _foreign_ts_fqn not in _nested_data_raw.columns
|
|
975
|
+
and TS_COL_NAME in _nested_data_raw.columns
|
|
976
|
+
):
|
|
977
|
+
_nested_data_raw = _nested_data_raw.with_columns(
|
|
978
|
+
pl.col(TS_COL_NAME).alias(_foreign_ts_fqn)
|
|
979
|
+
)
|
|
980
|
+
# Pre-pack any sub-nested HM data (bottom-up) to support triple-nested has-many.
|
|
981
|
+
# For example, when _nested_hm=college.courses, check if course.sections data
|
|
982
|
+
# is available and pack it into courses_df before NestedHasManySampler runs.
|
|
983
|
+
_sub_nested_hm_cols = [
|
|
984
|
+
col for col in _nested_hm.underlying.df.columns if isinstance(col, HasManyFeatureType)
|
|
985
|
+
]
|
|
986
|
+
for _sub_nested_hm in _sub_nested_hm_cols:
|
|
987
|
+
_sub_nested_fqn = _sub_nested_hm.root_fqn
|
|
988
|
+
_sub_data: pl.DataFrame | None = None
|
|
989
|
+
# Direct key lookup first
|
|
990
|
+
if (
|
|
991
|
+
feature_to_data_mapping.partitioned_has_many
|
|
992
|
+
and _sub_nested_hm in feature_to_data_mapping.partitioned_has_many
|
|
993
|
+
):
|
|
994
|
+
_sub_data = feature_to_data_mapping.partitioned_has_many[_sub_nested_hm][0]
|
|
995
|
+
elif _sub_nested_hm in feature_to_data_mapping.raw_has_many:
|
|
996
|
+
_sub_data = pa_table_to_pl_df(
|
|
997
|
+
feature_to_data_mapping.raw_has_many[
|
|
998
|
+
_sub_nested_hm
|
|
999
|
+
].result_and_metadata.to_table()
|
|
1000
|
+
)
|
|
1001
|
+
else:
|
|
1002
|
+
# Look for an InputFeatureType key whose underlying matches _sub_nested_hm.
|
|
1003
|
+
# After has_many_join_operator propagates nested HM features, entries like
|
|
1004
|
+
# university.colleges.courses.sections (InputFeatureType) appear in
|
|
1005
|
+
# partitioned_has_many with underlying=course.sections (HasManyFeatureType).
|
|
1006
|
+
for _k, _v in (feature_to_data_mapping.partitioned_has_many or {}).items():
|
|
1007
|
+
if isinstance(_k, InputFeatureType) and _k.underlying == _sub_nested_hm:
|
|
1008
|
+
_sub_data = _v[0]
|
|
1009
|
+
break
|
|
1010
|
+
if _sub_data is not None:
|
|
1011
|
+
# Add foreign ts alias if needed for the sub-nested data
|
|
1012
|
+
_sub_foreign_ts_fqn = getattr(
|
|
1013
|
+
self._graph.ts_feature_for_namespace(
|
|
1014
|
+
_sub_nested_hm.underlying.foreign_namespace()
|
|
1015
|
+
),
|
|
1016
|
+
"fqn",
|
|
1017
|
+
None,
|
|
1018
|
+
)
|
|
1019
|
+
if (
|
|
1020
|
+
_sub_foreign_ts_fqn is not None
|
|
1021
|
+
and _sub_foreign_ts_fqn not in _sub_data.columns
|
|
1022
|
+
and TS_COL_NAME in _sub_data.columns
|
|
1023
|
+
):
|
|
1024
|
+
_sub_data = _sub_data.with_columns(
|
|
1025
|
+
pl.col(TS_COL_NAME).alias(_sub_foreign_ts_fqn)
|
|
1026
|
+
)
|
|
1027
|
+
# Drop the null sub-nested column from _nested_data_raw to avoid
|
|
1028
|
+
# column conflict when the sub-sampler adds the packed column.
|
|
1029
|
+
if _sub_nested_fqn in _nested_data_raw.columns:
|
|
1030
|
+
_nested_data_raw = _nested_data_raw.drop(_sub_nested_fqn)
|
|
1031
|
+
# Also drop RESOLVER_INPUT_IDX_COL_NAME so the sub-sampler
|
|
1032
|
+
# assigns a fresh per-row index for the pack operation.
|
|
1033
|
+
if RESOLVER_INPUT_IDX_COL_NAME in _nested_data_raw.columns:
|
|
1034
|
+
_nested_data_raw = _nested_data_raw.drop(RESOLVER_INPUT_IDX_COL_NAME)
|
|
1035
|
+
_sub_sampler = PolarsVectorizedHasManySampler(
|
|
1036
|
+
resolver_inputs_df=_nested_data_raw,
|
|
1037
|
+
has_many_feature=_sub_nested_hm,
|
|
1038
|
+
has_many_df=(_sub_data, None),
|
|
1039
|
+
graph=self._graph,
|
|
1040
|
+
oom_slim_hm_by_dates=False,
|
|
1041
|
+
oom_slim_hm_by_join_keys=False,
|
|
1042
|
+
enable_indexed_has_many_joins=False,
|
|
1043
|
+
allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
|
|
1044
|
+
include_metadata_columns=False,
|
|
1045
|
+
)
|
|
1046
|
+
_nested_data_raw = _sub_sampler.join_df_and_pack_into_struct().drop(
|
|
1047
|
+
RESOLVER_INPUT_IDX_COL_NAME
|
|
1048
|
+
)
|
|
1049
|
+
nested_levels.append((_nested_hm, _nested_data_raw))
|
|
1050
|
+
if len(nested_levels) > 1:
|
|
1051
|
+
return NestedHasManySampler(
|
|
1052
|
+
resolver_inputs_df=resolver_inputs_pl,
|
|
1053
|
+
levels=nested_levels,
|
|
1054
|
+
graph=self._graph,
|
|
1055
|
+
allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
|
|
1056
|
+
).yield_groups_per_row()
|
|
899
1057
|
else:
|
|
900
1058
|
current_hm_feature = feature_type
|
|
1059
|
+
if len(hm_base_subfeatures) > 1 and all(
|
|
1060
|
+
k in feature_to_data_mapping.raw_has_many for k in hm_base_subfeatures
|
|
1061
|
+
):
|
|
1062
|
+
# Nested has-many in the raw (online/static) path: separate data tables
|
|
1063
|
+
# exist in raw_has_many for each level. Use NestedHasManySampler to pack
|
|
1064
|
+
# them bottom-up (innermost first) before yielding groups per resolver row.
|
|
1065
|
+
resolver_inputs_pl = (
|
|
1066
|
+
resolver_inputs_df
|
|
1067
|
+
if resolver_inputs_df is not None
|
|
1068
|
+
else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
|
|
1069
|
+
)
|
|
1070
|
+
levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]] = []
|
|
1071
|
+
for i, base_hm in enumerate(hm_base_subfeatures):
|
|
1072
|
+
entry = feature_to_data_mapping.raw_has_many[base_hm]
|
|
1073
|
+
data_df = pa_table_to_pl_df(entry.result_and_metadata.to_table())
|
|
1074
|
+
# The outermost feature is used as-is; inner features are expressed
|
|
1075
|
+
# relative to their immediate parent so the join keys resolve correctly.
|
|
1076
|
+
feature_for_level = (
|
|
1077
|
+
base_hm
|
|
1078
|
+
if i == 0
|
|
1079
|
+
else cast(InputFeatureType[HasManyFeatureType], base_hm).relative_to(hm_base_subfeatures[i - 1])
|
|
1080
|
+
)
|
|
1081
|
+
levels.append((feature_for_level, data_df))
|
|
1082
|
+
return NestedHasManySampler(
|
|
1083
|
+
resolver_inputs_df=resolver_inputs_pl,
|
|
1084
|
+
levels=levels,
|
|
1085
|
+
graph=self._graph,
|
|
1086
|
+
allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
|
|
1087
|
+
).yield_groups_per_row()
|
|
901
1088
|
hm_entry = feature_to_data_mapping.raw_has_many[current_hm_feature]
|
|
902
1089
|
|
|
903
1090
|
sampler: VectorizedHasManySampler | None = None
|
|
@@ -974,6 +1161,7 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
|
|
|
974
1161
|
)
|
|
975
1162
|
# If there are any has-one still left in path after the last has-many, we need to change the namespace
|
|
976
1163
|
if not is_underlying_has_many(feature_type):
|
|
1164
|
+
assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
|
|
977
1165
|
packed_lf = packed_lf.select(
|
|
978
1166
|
pl.col(column.root_fqn).alias(column.fqn)
|
|
979
1167
|
for column in hm_base_subfeatures[-1].underlying.df.columns
|
|
@@ -100,6 +100,7 @@ def _assimilate_resolvers(
|
|
|
100
100
|
):
|
|
101
101
|
root_ns = get_unique_item(res.unique_input_root_ns for res in resolvers)
|
|
102
102
|
graph = get_unique_item(res.graph for res in resolvers)
|
|
103
|
+
# Note: this is mutated below if any of the resolvers are async to prevent crashes
|
|
103
104
|
is_cpu_bound = get_unique_item(res.resource_hint == "cpu" for res in resolvers)
|
|
104
105
|
pkey_feature = graph.primary_feature_for_namespace(root_ns)
|
|
105
106
|
assert pkey_feature is not None
|
|
@@ -136,10 +137,13 @@ def _assimilate_resolvers(
|
|
|
136
137
|
default_args.append(maybe_default_arg)
|
|
137
138
|
for out in resolver.output:
|
|
138
139
|
output_refs.add(out)
|
|
140
|
+
metadata = ResolverOutputMetadata.from_resolver(resolver, pkey_feature)
|
|
139
141
|
resolver_to_input_fqns_has_default_and_defaults[resolver] = (
|
|
140
|
-
|
|
142
|
+
metadata,
|
|
141
143
|
tuple(inputs),
|
|
142
144
|
)
|
|
145
|
+
if metadata.is_async:
|
|
146
|
+
is_cpu_bound = False
|
|
143
147
|
|
|
144
148
|
if not get_chalk_fix_invalid_result_propagation():
|
|
145
149
|
default_args = [ResolverArgErrorHandlerParsed(default_value=...) for _ in input_refs]
|
|
@@ -502,7 +506,7 @@ class IOBoundParallelResolver:
|
|
|
502
506
|
else:
|
|
503
507
|
with execution_context:
|
|
504
508
|
start = time.perf_counter()
|
|
505
|
-
result = metadata.fn(*resolver_args)
|
|
509
|
+
result = await metadata.fn(*resolver_args)
|
|
506
510
|
duration = time.perf_counter() - start
|
|
507
511
|
|
|
508
512
|
except (Exception, PolarsPanicErrorCompat) as e:
|
|
@@ -614,8 +618,8 @@ class ParallelResolverInvoker(BoundInvokerProtocol):
|
|
|
614
618
|
self._query_execution_params = query_execution_params
|
|
615
619
|
self._graph = graph
|
|
616
620
|
self._resolver_executor = resolver_executor
|
|
617
|
-
self._is_cpu_bound = get_unique_item(resolver.resource_hint == "cpu" for resolver in resolvers)
|
|
618
621
|
fn = _assimilate_resolvers(resolvers, resolver_executor)
|
|
622
|
+
self._is_cpu_bound = isinstance(fn, CPUBoundParallelResolver)
|
|
619
623
|
if len(fn.input_refs) == 0:
|
|
620
624
|
self._unique_input_root_ns = get_unique_item(o.root_namespace for o in fn.output_refs)
|
|
621
625
|
else:
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/vectorized_hasmany_sampler.py
RENAMED
|
@@ -1039,7 +1039,8 @@ class PolarsVectorizedHasManySampler(VectorizedHasManySampler):
|
|
|
1039
1039
|
)
|
|
1040
1040
|
else:
|
|
1041
1041
|
ans = with_struct.groupby(RESOLVER_INPUT_IDX_COL_NAME).agg(pl.col(str(has_many_feature)))
|
|
1042
|
-
|
|
1042
|
+
result = ans.select([RESOLVER_INPUT_IDX_COL_NAME, str(has_many_feature)]).collect()
|
|
1043
|
+
return result
|
|
1043
1044
|
|
|
1044
1045
|
def join_df_and_pack_into_struct(self) -> pl.DataFrame:
|
|
1045
1046
|
"""
|
|
@@ -1404,6 +1405,201 @@ class PyArrowVectorizedHasManySampler(VectorizedHasManySampler):
|
|
|
1404
1405
|
)
|
|
1405
1406
|
|
|
1406
1407
|
|
|
1408
|
+
class NestedHasManySampler:
|
|
1409
|
+
"""
|
|
1410
|
+
Handles nested has-many relationships by processing levels top-down: the outermost
|
|
1411
|
+
join is resolved first (preserving its timestamps), then each successive inner level
|
|
1412
|
+
is sampled using the matched rows from the level above as resolver inputs.
|
|
1413
|
+
|
|
1414
|
+
For a ``College -> Courses -> Sections`` hierarchy:
|
|
1415
|
+
|
|
1416
|
+
1. **Top** (Courses): obtain the course rows that match each college, including their
|
|
1417
|
+
``__ts__`` values, via :meth:`PolarsVectorizedHasManySampler._get_ungrouped_rows`.
|
|
1418
|
+
2. **Inner** (Sections): use those course rows as resolver inputs for a
|
|
1419
|
+
:class:`PolarsVectorizedHasManySampler` that packs sections into each course row,
|
|
1420
|
+
with temporal filtering driven by the courses' ``__ts__`` values.
|
|
1421
|
+
3. **Yield**: emit one :class:`DataFrame` of (augmented) courses per college.
|
|
1422
|
+
|
|
1423
|
+
This top-down ordering is required for correct temporal semantics: the ``__ts__``
|
|
1424
|
+
stored in each course row (after the outer join) must govern which section rows are
|
|
1425
|
+
considered valid for that course.
|
|
1426
|
+
|
|
1427
|
+
Parameters
|
|
1428
|
+
----------
|
|
1429
|
+
resolver_inputs_df:
|
|
1430
|
+
The outermost resolver inputs (e.g. the College table).
|
|
1431
|
+
levels:
|
|
1432
|
+
A list of ``(feature, data_df)`` tuples ordered **outermost to innermost**.
|
|
1433
|
+
Each feature must be scoped to its own parent's namespace — i.e. ``Course.sections``
|
|
1434
|
+
is passed as a plain :class:`HasManyFeatureType`, not prefixed with
|
|
1435
|
+
``College.courses``. Example for the two-level case::
|
|
1436
|
+
|
|
1437
|
+
[
|
|
1438
|
+
(College.courses, courses_df), # outermost
|
|
1439
|
+
(Course.sections, sections_df), # innermost
|
|
1440
|
+
]
|
|
1441
|
+
|
|
1442
|
+
graph:
|
|
1443
|
+
The resolved feature graph, forwarded to each inner sampler.
|
|
1444
|
+
allow_planner_postponed_has_many_sampling_planner_option:
|
|
1445
|
+
Forwarded unchanged to each inner :class:`PolarsVectorizedHasManySampler`.
|
|
1446
|
+
"""
|
|
1447
|
+
|
|
1448
|
+
# Temporary column used to preserve the outer resolver-input index while inner
|
|
1449
|
+
# samplers add their own RESOLVER_INPUT_IDX_COL_NAME.
|
|
1450
|
+
_OUTER_IDX_COL = "___CHALK_NESTED_HM_OUTER_IDX___"
|
|
1451
|
+
|
|
1452
|
+
def __init__(
|
|
1453
|
+
self,
|
|
1454
|
+
*,
|
|
1455
|
+
resolver_inputs_df: pl.DataFrame,
|
|
1456
|
+
levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]],
|
|
1457
|
+
graph: ResolvedGraph,
|
|
1458
|
+
allow_planner_postponed_has_many_sampling_planner_option: bool,
|
|
1459
|
+
):
|
|
1460
|
+
super().__init__()
|
|
1461
|
+
assert len(levels) >= 1, "NestedHasManySampler requires at least one level"
|
|
1462
|
+
self._resolver_inputs_df = resolver_inputs_df
|
|
1463
|
+
self._levels = levels
|
|
1464
|
+
self._graph = graph
|
|
1465
|
+
self._allow_postponed = allow_planner_postponed_has_many_sampling_planner_option
|
|
1466
|
+
|
|
1467
|
+
def yield_groups_per_row(self) -> Iterable[DataFrame]:
|
|
1468
|
+
levels = self._levels
|
|
1469
|
+
outer_feature, outer_data = levels[0]
|
|
1470
|
+
|
|
1471
|
+
if len(levels) == 1:
|
|
1472
|
+
# Single level: delegate to the standard sampler unchanged.
|
|
1473
|
+
yield from PolarsVectorizedHasManySampler(
|
|
1474
|
+
resolver_inputs_df=self._resolver_inputs_df,
|
|
1475
|
+
has_many_feature=outer_feature,
|
|
1476
|
+
has_many_df=(outer_data, None),
|
|
1477
|
+
graph=self._graph,
|
|
1478
|
+
oom_slim_hm_by_dates=False,
|
|
1479
|
+
oom_slim_hm_by_join_keys=False,
|
|
1480
|
+
enable_indexed_has_many_joins=False,
|
|
1481
|
+
allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
|
|
1482
|
+
include_metadata_columns=False,
|
|
1483
|
+
).yield_groups_per_row()
|
|
1484
|
+
return
|
|
1485
|
+
|
|
1486
|
+
# --- Step 1: get outer-level ungrouped rows (e.g. courses matched to colleges) ---
|
|
1487
|
+
# We call _get_ungrouped_rows directly instead of constructing a full
|
|
1488
|
+
# PolarsVectorizedHasManySampler, because the sampler's __init__ calls
|
|
1489
|
+
# _get_grouped_rows which tries to select the outer expected columns (including
|
|
1490
|
+
# the nested has-many column e.g. course.sections) — those don't exist in
|
|
1491
|
+
# outer_data yet. _get_ungrouped_rows only does the join + temporal filter,
|
|
1492
|
+
# which is exactly what we need here.
|
|
1493
|
+
outer_ri_df = self._resolver_inputs_df
|
|
1494
|
+
if RESOLVER_INPUT_IDX_COL_NAME not in outer_ri_df.columns:
|
|
1495
|
+
outer_ri_df = with_row_index_compat(outer_ri_df, RESOLVER_INPUT_IDX_COL_NAME)
|
|
1496
|
+
|
|
1497
|
+
outer_foreign_pkey = unwrap_optional(
|
|
1498
|
+
self._graph.primary_feature_for_namespace(outer_feature.underlying.foreign_namespace())
|
|
1499
|
+
)
|
|
1500
|
+
outer_foreign_ts = unwrap_optional(
|
|
1501
|
+
self._graph.ts_feature_for_namespace(outer_feature.underlying.foreign_namespace())
|
|
1502
|
+
)
|
|
1503
|
+
|
|
1504
|
+
outer_left_join_features = outer_feature.underlying.get_local_join_features()
|
|
1505
|
+
if isinstance(outer_feature, InputFeatureType):
|
|
1506
|
+
outer_left_join_features = [
|
|
1507
|
+
InputFeatureType.replace_suffix(outer_feature, jf) for jf in outer_left_join_features
|
|
1508
|
+
]
|
|
1509
|
+
outer_left_join_cols = [f.root_fqn for f in outer_left_join_features]
|
|
1510
|
+
outer_right_join_cols = [f.root_fqn for f in outer_feature.underlying.get_foreign_join_features()]
|
|
1511
|
+
|
|
1512
|
+
outer_inputs_rename_dict = {
|
|
1513
|
+
col: "__CHALK_RESOLVER_INPUT__" + col for col in outer_ri_df.columns if col != RESOLVER_INPUT_IDX_COL_NAME
|
|
1514
|
+
}
|
|
1515
|
+
outer_unique = all(pl_is_uniquable_on(dtype) for dtype in outer_ri_df.dtypes)
|
|
1516
|
+
|
|
1517
|
+
# outer_ungrouped: RESOLVER_INPUT_IDX_COL_NAME=outer_idx, course cols (incl. __ts__)
|
|
1518
|
+
outer_ungrouped: pl.DataFrame = PolarsVectorizedHasManySampler._get_ungrouped_rows( # pyright: ignore[reportPrivateUsage]
|
|
1519
|
+
graph=self._graph,
|
|
1520
|
+
hm_df=outer_data,
|
|
1521
|
+
mapping_table=None,
|
|
1522
|
+
resolver_inputs_df=outer_ri_df,
|
|
1523
|
+
foreign_pkey_feature=outer_foreign_pkey,
|
|
1524
|
+
foreign_ts_feature=outer_foreign_ts,
|
|
1525
|
+
has_many_feature=outer_feature,
|
|
1526
|
+
inputs_rename_dict=outer_inputs_rename_dict,
|
|
1527
|
+
left_join_cols=outer_left_join_cols,
|
|
1528
|
+
right_join_cols=outer_right_join_cols,
|
|
1529
|
+
oom_slim_hm_by_dates=False,
|
|
1530
|
+
oom_slim_hm_by_join_keys=False,
|
|
1531
|
+
unique_resolver_inputs=outer_unique,
|
|
1532
|
+
enable_indexed_has_many_joins=False,
|
|
1533
|
+
allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
|
|
1534
|
+
).collect()
|
|
1535
|
+
|
|
1536
|
+
# --- Step 2: rename outer idx to backup so inner samplers get fresh row indices ---
|
|
1537
|
+
# current_rows has _OUTER_IDX=outer_idx, course cols — no RESOLVER_INPUT_IDX_COL_NAME
|
|
1538
|
+
current_rows = outer_ungrouped.rename({RESOLVER_INPUT_IDX_COL_NAME: self._OUTER_IDX_COL})
|
|
1539
|
+
|
|
1540
|
+
# --- Step 3: process each inner level in order (top-down) ---
|
|
1541
|
+
# Each inner sampler receives the (possibly augmented) rows from the level above as
|
|
1542
|
+
# its resolver_inputs. Because RESOLVER_INPUT_IDX_COL_NAME is absent, the sampler
|
|
1543
|
+
# adds a fresh per-row index, joins the next level's data, and packs it as a
|
|
1544
|
+
# list-of-structs column. We then drop that transient index and move on.
|
|
1545
|
+
for inner_feature, inner_data in levels[1:]:
|
|
1546
|
+
inner_sampler = PolarsVectorizedHasManySampler(
|
|
1547
|
+
resolver_inputs_df=current_rows,
|
|
1548
|
+
has_many_feature=inner_feature,
|
|
1549
|
+
has_many_df=(inner_data, None),
|
|
1550
|
+
graph=self._graph,
|
|
1551
|
+
oom_slim_hm_by_dates=False,
|
|
1552
|
+
oom_slim_hm_by_join_keys=False,
|
|
1553
|
+
enable_indexed_has_many_joins=False,
|
|
1554
|
+
allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
|
|
1555
|
+
include_metadata_columns=False,
|
|
1556
|
+
)
|
|
1557
|
+
# Result: RESOLVER_INPUT_IDX_COL_NAME=inner_row_idx, _OUTER_IDX=outer_idx,
|
|
1558
|
+
# parent_cols, packed_inner_col
|
|
1559
|
+
packed = inner_sampler.join_df_and_pack_into_struct()
|
|
1560
|
+
# Drop the transient inner row index; _OUTER_IDX is preserved for the next
|
|
1561
|
+
# iteration (or for the final yield step below).
|
|
1562
|
+
current_rows = packed.drop(RESOLVER_INPUT_IDX_COL_NAME)
|
|
1563
|
+
|
|
1564
|
+
# --- Step 4: restore outer idx and yield one DataFrame per outer resolver input ---
|
|
1565
|
+
# current_rows: _OUTER_IDX=outer_idx, parent_cols (with all inner levels packed in)
|
|
1566
|
+
augmented = current_rows.rename({self._OUTER_IDX_COL: RESOLVER_INPUT_IDX_COL_NAME})
|
|
1567
|
+
|
|
1568
|
+
outer_expected_col_names: list[str] = [x.root_fqn for x in outer_feature.underlying.df.columns]
|
|
1569
|
+
n_outer = len(self._resolver_inputs_df)
|
|
1570
|
+
|
|
1571
|
+
empty_pl = augmented.filter(pl.lit(False)).select(outer_expected_col_names)
|
|
1572
|
+
empty_chalk = DataFrame(
|
|
1573
|
+
empty_pl.lazy(),
|
|
1574
|
+
missing_value_strategy="default_or_allow",
|
|
1575
|
+
verify_validity=False,
|
|
1576
|
+
convert_dtypes=False,
|
|
1577
|
+
)
|
|
1578
|
+
|
|
1579
|
+
# Build a template Chalk DataFrame from the first non-empty group so we can
|
|
1580
|
+
# copy.copy() it for subsequent groups (avoids re-running DataFrame.__init__ N times).
|
|
1581
|
+
template_chalk: DataFrame | None = None
|
|
1582
|
+
|
|
1583
|
+
for outer_idx in range(n_outer):
|
|
1584
|
+
group_pl = augmented.filter(pl.col(RESOLVER_INPUT_IDX_COL_NAME) == outer_idx).select(
|
|
1585
|
+
outer_expected_col_names
|
|
1586
|
+
)
|
|
1587
|
+
if len(group_pl) == 0:
|
|
1588
|
+
yield empty_chalk
|
|
1589
|
+
elif template_chalk is None:
|
|
1590
|
+
template_chalk = DataFrame(
|
|
1591
|
+
group_pl.lazy(),
|
|
1592
|
+
missing_value_strategy="default_or_allow",
|
|
1593
|
+
verify_validity=False,
|
|
1594
|
+
convert_dtypes=False,
|
|
1595
|
+
)
|
|
1596
|
+
yield template_chalk
|
|
1597
|
+
else:
|
|
1598
|
+
copied = copy.copy(template_chalk)
|
|
1599
|
+
copied._swap_underlying(group_pl.lazy()) # pyright: ignore[reportPrivateUsage]
|
|
1600
|
+
yield copied
|
|
1601
|
+
|
|
1602
|
+
|
|
1407
1603
|
def table_has_struct_or_list(table: pa.Table):
|
|
1408
1604
|
return any(type_contains_struct_or_list(pa_type) for pa_type in table.schema.types)
|
|
1409
1605
|
|
|
@@ -12,11 +12,11 @@ from typing import TYPE_CHECKING, Callable, Optional
|
|
|
12
12
|
from chalk.features import Feature, FeatureSetBase
|
|
13
13
|
from chalk.features.resolver import RESOLVER_REGISTRY
|
|
14
14
|
from chalk.importer import (
|
|
15
|
-
FailedImport,
|
|
16
15
|
import_all_python_files_from_dir,
|
|
17
16
|
import_sql_file_resolvers,
|
|
18
17
|
run_post_import_fixups,
|
|
19
18
|
)
|
|
19
|
+
from chalk.parsed.duplicate_input_gql import FailedImport
|
|
20
20
|
from chalk.utils.log_with_context import get_logger
|
|
21
21
|
from chalk.utils.storage_client import (
|
|
22
22
|
AzureBlobStorageClient,
|
|
@@ -45,7 +45,7 @@ name = "chalkruntime"
|
|
|
45
45
|
description = "Runtime support library for Chalk AI"
|
|
46
46
|
readme = "README.md"
|
|
47
47
|
requires-python = ">=3.10"
|
|
48
|
-
version = "3.32.
|
|
48
|
+
version = "3.32.4"
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
[tool.deptry]
|
|
@@ -85,6 +85,7 @@ remove-duplicate-keys = true
|
|
|
85
85
|
|
|
86
86
|
[tool.pyright]
|
|
87
87
|
include = ["chalkruntime/**", "setup.py"]
|
|
88
|
+
extraPaths = ["../shared_public"]
|
|
88
89
|
reportCallInDefaultInitializer = "error"
|
|
89
90
|
# reportUnboundVariable
|
|
90
91
|
# reportUnusedCoroutine
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/convert_chalkpy_underscore.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore_operation_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/group_incrementalizer.py
RENAMED
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/incrementalizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/query_execution_parameters.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_output_metadata.py
RENAMED
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_raw_output_parsing.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/contextual_query_rewriter.py
RENAMED
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/filter_query_rewriter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/query_rewriter_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|