chalkruntime 3.32.2__tar.gz → 3.32.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/PKG-INFO +1 -1
  2. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/chalk_overload.py +1 -4
  3. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/general_bound_invoker.py +190 -2
  4. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/one_to_one_invoker.py +7 -3
  5. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/vectorized_hasmany_sampler.py +197 -1
  6. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/importer.py +1 -1
  7. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/PKG-INFO +1 -1
  8. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/pyproject.toml +2 -1
  9. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/README.md +0 -0
  10. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/__init__.py +0 -0
  11. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/constants.py +0 -0
  12. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/__init__.py +0 -0
  13. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/dataframe.py +0 -0
  14. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/dataframe/lazyframe.py +0 -0
  15. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/__init__.py +0 -0
  16. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/failed_argument.py +0 -0
  17. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/resolver_errors.py +0 -0
  18. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/exc/wrapped_resolver_exception.py +0 -0
  19. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/__init__.py +0 -0
  20. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/convert_chalkpy_underscore.py +0 -0
  21. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/feature.py +0 -0
  22. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/filter_conversion.py +0 -0
  23. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/global_graph.py +0 -0
  24. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph.py +0 -0
  25. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_impl.py +0 -0
  26. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_proxy.py +0 -0
  27. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/graph_state.py +0 -0
  28. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/jinja_parser.py +0 -0
  29. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/materializations.py +0 -0
  30. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/maybe_named_collection.py +0 -0
  31. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/named_query.py +0 -0
  32. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/nearest_neighbor.py +0 -0
  33. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/overlay_graph.py +0 -0
  34. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/prompt_service.py +0 -0
  35. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/protograph_deserializer.py +0 -0
  36. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/protograph_serializer.py +0 -0
  37. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/resolver.py +0 -0
  38. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/singletons.py +0 -0
  39. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/sklearn_model_parser.py +0 -0
  40. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/stream_resolver.py +0 -0
  41. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore.py +0 -0
  42. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore_codec_info.py +0 -0
  43. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/underscore_operation_registry.py +0 -0
  44. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/variables.py +0 -0
  45. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/heaptrack_launcher.py +0 -0
  46. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/__init__.py +0 -0
  47. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/group_incrementalizer.py +0 -0
  48. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/incrementalization/incrementalizer.py +0 -0
  49. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/__init__.py +0 -0
  50. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/batch_result_collector.py +0 -0
  51. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/bound_invoker.py +0 -0
  52. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/bound_invoker_cache.py +0 -0
  53. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/no_arg_scalar_invoker.py +0 -0
  54. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/overlay_features.py +0 -0
  55. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/parse_external_resolver.py +0 -0
  56. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/partition_batch.py +0 -0
  57. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/query_execution_parameters.py +0 -0
  58. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_args_builder.py +0 -0
  59. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_input.py +0 -0
  60. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_input_upload.py +0 -0
  61. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_output_metadata.py +0 -0
  62. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_raw_output_parsing.py +0 -0
  63. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_result.py +0 -0
  64. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/resolver_runner.py +0 -0
  65. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/sample.py +0 -0
  66. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/validator.py +0 -0
  67. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/__init__.py +0 -0
  68. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/converter.py +0 -0
  69. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/memray_launcher.py +0 -0
  70. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/metadata.py +0 -0
  71. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/py.typed +0 -0
  72. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/__init__.py +0 -0
  73. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/config.py +0 -0
  74. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/entrypoint.py +0 -0
  75. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/env_helper.py +0 -0
  76. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/remote_python_function_registry_client.py +0 -0
  77. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/server/service.py +0 -0
  78. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/__init__.py +0 -0
  79. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/composed_rewriter.py +0 -0
  80. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/contextual_query_rewriter.py +0 -0
  81. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/filter_query_rewriter.py +0 -0
  82. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/identity_rewriter.py +0 -0
  83. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/query_rewriter.py +0 -0
  84. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/sql_rewriter/query_rewriter_helper.py +0 -0
  85. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/__init__.py +0 -0
  86. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/converter_utils.py +0 -0
  87. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/exc.py +0 -0
  88. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/message_parsing.py +0 -0
  89. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/resolver_utils.py +0 -0
  90. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/types.py +0 -0
  91. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/streaming/window_keys.py +0 -0
  92. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/__init__.py +0 -0
  93. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/async_helpers.py +0 -0
  94. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/contextvars.py +0 -0
  95. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/datadog.py +0 -0
  96. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/internal_pl_utils.py +0 -0
  97. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/tracing.py +0 -0
  98. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/utils/viztracer_profiling.py +0 -0
  99. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/valgrind_launcher.py +0 -0
  100. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/SOURCES.txt +0 -0
  101. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/dependency_links.txt +0 -0
  102. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/requires.txt +0 -0
  103. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/top_level.txt +0 -0
  104. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/setup.cfg +0 -0
  105. {chalkruntime-3.32.2 → chalkruntime-3.32.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chalkruntime
3
- Version: 3.32.2
3
+ Version: 3.32.4
4
4
  Summary: Runtime support library for Chalk AI
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -2,11 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TypeVar
4
4
 
5
- import libchalk.chalkfunction
6
- import libchalk.udf
7
5
  from chalkruntime.graph.maybe_named_collection import MaybeNamedCollection
8
6
  from libchalk.chalkfunction import (
9
7
  ArgumentType,
8
+ ChalkFunctionOverload,
10
9
  ChalkFunctionOverloadFailed,
11
10
  ChalkFunctionOverloadResolved,
12
11
  default_arrow_type_promoter,
@@ -15,8 +14,6 @@ from libchalk.chalkfunction import (
15
14
  TItem = TypeVar("TItem")
16
15
  TOther = TypeVar("TOther")
17
16
 
18
- ChalkFunctionOverload = libchalk.chalkfunction.ChalkFunctionOverload
19
-
20
17
 
21
18
  def get_resolved_overload(
22
19
  overload: ChalkFunctionOverload,
@@ -90,6 +90,7 @@ from chalkruntime.invoker.resolver_runner import (
90
90
  )
91
91
  from chalkruntime.invoker.vectorized_hasmany_sampler import (
92
92
  RESOLVER_INPUT_IDX_COL_NAME,
93
+ NestedHasManySampler,
93
94
  PolarsVectorizedHasManySampler,
94
95
  PyArrowVectorizedHasManySampler,
95
96
  VectorizedHasManySampler,
@@ -152,7 +153,8 @@ def _parse_df_feature(
152
153
  continue
153
154
  # use groups.raw instead of groups.partitioned, since we don't partition for DF resolvers.
154
155
  hm_table = groups.raw_has_many[hm_ft].result_and_metadata
155
- hm_df = pa_table_to_pl_df(hm_table.to_table()), None
156
+ _hm_pa = hm_table.to_table()
157
+ hm_df = pa_table_to_pl_df(_hm_pa), None
156
158
  sampler = PolarsVectorizedHasManySampler(
157
159
  resolver_inputs_df=resolver_inputs_df,
158
160
  has_many_feature=hm_ft,
@@ -868,7 +870,10 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
868
870
  # we walk up the has-many join path, iteratively packing the deeper layers into lists of structs
869
871
  hm_base_subfeatures = all_has_many_subfeatures(feature_type)
870
872
  hm_schema_subfeatures = tuple(x for x in required_features_to_sample(feature_type) if is_underlying_has_many(x))
871
- assert len(hm_base_subfeatures) == len(hm_schema_subfeatures)
873
+ assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
874
+ assert len(hm_base_subfeatures) == len(hm_schema_subfeatures), (
875
+ f"hm_base_subfeatures ({hm_base_subfeatures}) and hm_schema_subfeatures ({hm_schema_subfeatures}) must have the same length"
876
+ )
872
877
 
873
878
  current_hm_df: tuple[pl.DataFrame, pl.DataFrame | None] | None = None
874
879
  hm_entry: HasManyFeatureEntry | None = None
@@ -896,8 +901,190 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
896
901
  current_hm_df = (sampler.join_df_and_pack_into_struct().drop(RESOLVER_INPUT_IDX_COL_NAME), None)
897
902
  # now we want select for only the current_hm_feature column to be in resolver inputs, dropping irrelevant columns
898
903
  current_hm_feature = has_many_subfeatures_to_projection(base_hm_feature, [relative_hm_feature])
904
+ # Top-down nested HM: if the outer feature's schema declares nested HM columns
905
+ # that are absent from the current table, retrieve their data from the mapping
906
+ # and use NestedHasManySampler top-down so timestamps flow from outer to inner
907
+ # (required for correct online temporal semantics).
908
+ _nested_hm_schema = [
909
+ col for col in current_hm_feature.underlying.df.columns if isinstance(col, HasManyFeatureType)
910
+ ]
911
+ if _nested_hm_schema:
912
+ _current_table_cols = set(current_hm_df[0].columns)
913
+ _missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
914
+ if not _missing_nested:
915
+ # For "deeply nested" cases (e.g. triple-nested: University → College → Course → Section):
916
+ # nested HM cols (e.g. college.courses) are already packed in the table, but their packed
917
+ # structs lack sub-nested data (e.g. course.sections) because the resolver encoding schema
918
+ # is scalar-only. However, partitioned_has_many already has a separate courses table WITH
919
+ # course.sections. Drop these cols so the _missing_nested path below picks them up and uses
920
+ # NestedHasManySampler with the correctly nested data.
921
+ # Only drop cols where we can actually retrieve the data (same lookup as the _missing_nested
922
+ # handler below at lines 945-950), to avoid dropping cols that PolarsVectorizedHasManySampler
923
+ # still needs when data is unavailable through the nested path.
924
+ _phm = feature_to_data_mapping.partitioned_has_many or {}
925
+
926
+ def _nested_data_available(col: HasManyFeatureType) -> bool:
927
+ if col in _phm:
928
+ return True
929
+ if col in feature_to_data_mapping.raw_has_many:
930
+ return True
931
+ return False
932
+
933
+ _deeply_nested = [
934
+ col
935
+ for col in _nested_hm_schema
936
+ if col.root_fqn in _current_table_cols
937
+ and any(isinstance(x, HasManyFeatureType) for x in col.underlying.df.columns)
938
+ and _nested_data_available(col)
939
+ ]
940
+ if _deeply_nested:
941
+ _cols_to_drop = [col.root_fqn for col in _deeply_nested]
942
+ current_hm_df = (current_hm_df[0].drop(_cols_to_drop), current_hm_df[1])
943
+ _current_table_cols = set(current_hm_df[0].columns)
944
+ _missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
945
+ if _missing_nested:
946
+ resolver_inputs_pl = (
947
+ resolver_inputs_df
948
+ if resolver_inputs_df is not None
949
+ else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
950
+ )
951
+ nested_levels: list[
952
+ tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]
953
+ ] = [(current_hm_feature, current_hm_df[0])]
954
+ for _nested_hm in _missing_nested:
955
+ _nested_data_raw: pl.DataFrame | None = None
956
+ if _nested_hm in feature_to_data_mapping.partitioned_has_many:
957
+ _nested_data_raw = feature_to_data_mapping.partitioned_has_many[_nested_hm][0]
958
+ elif _nested_hm in feature_to_data_mapping.raw_has_many:
959
+ _nested_data_raw = pa_table_to_pl_df(
960
+ feature_to_data_mapping.raw_has_many[_nested_hm].result_and_metadata.to_table()
961
+ )
962
+ if _nested_data_raw is not None:
963
+ # Sections extracted from packed resolver output (via PushHasManyToResult)
964
+ # inherit the parent's TS_COL_NAME but lack the foreign feature-time
965
+ # column required by _get_ungrouped_rows' temporal filter. Use
966
+ # TS_COL_NAME as a proxy so timestamps flow correctly top-down.
967
+ _foreign_ts_fqn = getattr(
968
+ self._graph.ts_feature_for_namespace(_nested_hm.underlying.foreign_namespace()),
969
+ "fqn",
970
+ None,
971
+ )
972
+ if (
973
+ _foreign_ts_fqn is not None
974
+ and _foreign_ts_fqn not in _nested_data_raw.columns
975
+ and TS_COL_NAME in _nested_data_raw.columns
976
+ ):
977
+ _nested_data_raw = _nested_data_raw.with_columns(
978
+ pl.col(TS_COL_NAME).alias(_foreign_ts_fqn)
979
+ )
980
+ # Pre-pack any sub-nested HM data (bottom-up) to support triple-nested has-many.
981
+ # For example, when _nested_hm=college.courses, check if course.sections data
982
+ # is available and pack it into courses_df before NestedHasManySampler runs.
983
+ _sub_nested_hm_cols = [
984
+ col for col in _nested_hm.underlying.df.columns if isinstance(col, HasManyFeatureType)
985
+ ]
986
+ for _sub_nested_hm in _sub_nested_hm_cols:
987
+ _sub_nested_fqn = _sub_nested_hm.root_fqn
988
+ _sub_data: pl.DataFrame | None = None
989
+ # Direct key lookup first
990
+ if (
991
+ feature_to_data_mapping.partitioned_has_many
992
+ and _sub_nested_hm in feature_to_data_mapping.partitioned_has_many
993
+ ):
994
+ _sub_data = feature_to_data_mapping.partitioned_has_many[_sub_nested_hm][0]
995
+ elif _sub_nested_hm in feature_to_data_mapping.raw_has_many:
996
+ _sub_data = pa_table_to_pl_df(
997
+ feature_to_data_mapping.raw_has_many[
998
+ _sub_nested_hm
999
+ ].result_and_metadata.to_table()
1000
+ )
1001
+ else:
1002
+ # Look for an InputFeatureType key whose underlying matches _sub_nested_hm.
1003
+ # After has_many_join_operator propagates nested HM features, entries like
1004
+ # university.colleges.courses.sections (InputFeatureType) appear in
1005
+ # partitioned_has_many with underlying=course.sections (HasManyFeatureType).
1006
+ for _k, _v in (feature_to_data_mapping.partitioned_has_many or {}).items():
1007
+ if isinstance(_k, InputFeatureType) and _k.underlying == _sub_nested_hm:
1008
+ _sub_data = _v[0]
1009
+ break
1010
+ if _sub_data is not None:
1011
+ # Add foreign ts alias if needed for the sub-nested data
1012
+ _sub_foreign_ts_fqn = getattr(
1013
+ self._graph.ts_feature_for_namespace(
1014
+ _sub_nested_hm.underlying.foreign_namespace()
1015
+ ),
1016
+ "fqn",
1017
+ None,
1018
+ )
1019
+ if (
1020
+ _sub_foreign_ts_fqn is not None
1021
+ and _sub_foreign_ts_fqn not in _sub_data.columns
1022
+ and TS_COL_NAME in _sub_data.columns
1023
+ ):
1024
+ _sub_data = _sub_data.with_columns(
1025
+ pl.col(TS_COL_NAME).alias(_sub_foreign_ts_fqn)
1026
+ )
1027
+ # Drop the null sub-nested column from _nested_data_raw to avoid
1028
+ # column conflict when the sub-sampler adds the packed column.
1029
+ if _sub_nested_fqn in _nested_data_raw.columns:
1030
+ _nested_data_raw = _nested_data_raw.drop(_sub_nested_fqn)
1031
+ # Also drop RESOLVER_INPUT_IDX_COL_NAME so the sub-sampler
1032
+ # assigns a fresh per-row index for the pack operation.
1033
+ if RESOLVER_INPUT_IDX_COL_NAME in _nested_data_raw.columns:
1034
+ _nested_data_raw = _nested_data_raw.drop(RESOLVER_INPUT_IDX_COL_NAME)
1035
+ _sub_sampler = PolarsVectorizedHasManySampler(
1036
+ resolver_inputs_df=_nested_data_raw,
1037
+ has_many_feature=_sub_nested_hm,
1038
+ has_many_df=(_sub_data, None),
1039
+ graph=self._graph,
1040
+ oom_slim_hm_by_dates=False,
1041
+ oom_slim_hm_by_join_keys=False,
1042
+ enable_indexed_has_many_joins=False,
1043
+ allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
1044
+ include_metadata_columns=False,
1045
+ )
1046
+ _nested_data_raw = _sub_sampler.join_df_and_pack_into_struct().drop(
1047
+ RESOLVER_INPUT_IDX_COL_NAME
1048
+ )
1049
+ nested_levels.append((_nested_hm, _nested_data_raw))
1050
+ if len(nested_levels) > 1:
1051
+ return NestedHasManySampler(
1052
+ resolver_inputs_df=resolver_inputs_pl,
1053
+ levels=nested_levels,
1054
+ graph=self._graph,
1055
+ allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
1056
+ ).yield_groups_per_row()
899
1057
  else:
900
1058
  current_hm_feature = feature_type
1059
+ if len(hm_base_subfeatures) > 1 and all(
1060
+ k in feature_to_data_mapping.raw_has_many for k in hm_base_subfeatures
1061
+ ):
1062
+ # Nested has-many in the raw (online/static) path: separate data tables
1063
+ # exist in raw_has_many for each level. Use NestedHasManySampler to pack
1064
+ # them bottom-up (innermost first) before yielding groups per resolver row.
1065
+ resolver_inputs_pl = (
1066
+ resolver_inputs_df
1067
+ if resolver_inputs_df is not None
1068
+ else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
1069
+ )
1070
+ levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]] = []
1071
+ for i, base_hm in enumerate(hm_base_subfeatures):
1072
+ entry = feature_to_data_mapping.raw_has_many[base_hm]
1073
+ data_df = pa_table_to_pl_df(entry.result_and_metadata.to_table())
1074
+ # The outermost feature is used as-is; inner features are expressed
1075
+ # relative to their immediate parent so the join keys resolve correctly.
1076
+ feature_for_level = (
1077
+ base_hm
1078
+ if i == 0
1079
+ else cast(InputFeatureType[HasManyFeatureType], base_hm).relative_to(hm_base_subfeatures[i - 1])
1080
+ )
1081
+ levels.append((feature_for_level, data_df))
1082
+ return NestedHasManySampler(
1083
+ resolver_inputs_df=resolver_inputs_pl,
1084
+ levels=levels,
1085
+ graph=self._graph,
1086
+ allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
1087
+ ).yield_groups_per_row()
901
1088
  hm_entry = feature_to_data_mapping.raw_has_many[current_hm_feature]
902
1089
 
903
1090
  sampler: VectorizedHasManySampler | None = None
@@ -974,6 +1161,7 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
974
1161
  )
975
1162
  # If there are any has-one still left in path after the last has-many, we need to change the namespace
976
1163
  if not is_underlying_has_many(feature_type):
1164
+ assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
977
1165
  packed_lf = packed_lf.select(
978
1166
  pl.col(column.root_fqn).alias(column.fqn)
979
1167
  for column in hm_base_subfeatures[-1].underlying.df.columns
@@ -100,6 +100,7 @@ def _assimilate_resolvers(
100
100
  ):
101
101
  root_ns = get_unique_item(res.unique_input_root_ns for res in resolvers)
102
102
  graph = get_unique_item(res.graph for res in resolvers)
103
+ # Note: this is mutated below if any of the resolvers are async to prevent crashes
103
104
  is_cpu_bound = get_unique_item(res.resource_hint == "cpu" for res in resolvers)
104
105
  pkey_feature = graph.primary_feature_for_namespace(root_ns)
105
106
  assert pkey_feature is not None
@@ -136,10 +137,13 @@ def _assimilate_resolvers(
136
137
  default_args.append(maybe_default_arg)
137
138
  for out in resolver.output:
138
139
  output_refs.add(out)
140
+ metadata = ResolverOutputMetadata.from_resolver(resolver, pkey_feature)
139
141
  resolver_to_input_fqns_has_default_and_defaults[resolver] = (
140
- ResolverOutputMetadata.from_resolver(resolver, pkey_feature),
142
+ metadata,
141
143
  tuple(inputs),
142
144
  )
145
+ if metadata.is_async:
146
+ is_cpu_bound = False
143
147
 
144
148
  if not get_chalk_fix_invalid_result_propagation():
145
149
  default_args = [ResolverArgErrorHandlerParsed(default_value=...) for _ in input_refs]
@@ -502,7 +506,7 @@ class IOBoundParallelResolver:
502
506
  else:
503
507
  with execution_context:
504
508
  start = time.perf_counter()
505
- result = metadata.fn(*resolver_args)
509
+ result = await metadata.fn(*resolver_args)
506
510
  duration = time.perf_counter() - start
507
511
 
508
512
  except (Exception, PolarsPanicErrorCompat) as e:
@@ -614,8 +618,8 @@ class ParallelResolverInvoker(BoundInvokerProtocol):
614
618
  self._query_execution_params = query_execution_params
615
619
  self._graph = graph
616
620
  self._resolver_executor = resolver_executor
617
- self._is_cpu_bound = get_unique_item(resolver.resource_hint == "cpu" for resolver in resolvers)
618
621
  fn = _assimilate_resolvers(resolvers, resolver_executor)
622
+ self._is_cpu_bound = isinstance(fn, CPUBoundParallelResolver)
619
623
  if len(fn.input_refs) == 0:
620
624
  self._unique_input_root_ns = get_unique_item(o.root_namespace for o in fn.output_refs)
621
625
  else:
@@ -1039,7 +1039,8 @@ class PolarsVectorizedHasManySampler(VectorizedHasManySampler):
1039
1039
  )
1040
1040
  else:
1041
1041
  ans = with_struct.groupby(RESOLVER_INPUT_IDX_COL_NAME).agg(pl.col(str(has_many_feature)))
1042
- return ans.select([RESOLVER_INPUT_IDX_COL_NAME, str(has_many_feature)]).collect()
1042
+ result = ans.select([RESOLVER_INPUT_IDX_COL_NAME, str(has_many_feature)]).collect()
1043
+ return result
1043
1044
 
1044
1045
  def join_df_and_pack_into_struct(self) -> pl.DataFrame:
1045
1046
  """
@@ -1404,6 +1405,201 @@ class PyArrowVectorizedHasManySampler(VectorizedHasManySampler):
1404
1405
  )
1405
1406
 
1406
1407
 
1408
+ class NestedHasManySampler:
1409
+ """
1410
+ Handles nested has-many relationships by processing levels top-down: the outermost
1411
+ join is resolved first (preserving its timestamps), then each successive inner level
1412
+ is sampled using the matched rows from the level above as resolver inputs.
1413
+
1414
+ For a ``College -> Courses -> Sections`` hierarchy:
1415
+
1416
+ 1. **Top** (Courses): obtain the course rows that match each college, including their
1417
+ ``__ts__`` values, via :meth:`PolarsVectorizedHasManySampler._get_ungrouped_rows`.
1418
+ 2. **Inner** (Sections): use those course rows as resolver inputs for a
1419
+ :class:`PolarsVectorizedHasManySampler` that packs sections into each course row,
1420
+ with temporal filtering driven by the courses' ``__ts__`` values.
1421
+ 3. **Yield**: emit one :class:`DataFrame` of (augmented) courses per college.
1422
+
1423
+ This top-down ordering is required for correct temporal semantics: the ``__ts__``
1424
+ stored in each course row (after the outer join) must govern which section rows are
1425
+ considered valid for that course.
1426
+
1427
+ Parameters
1428
+ ----------
1429
+ resolver_inputs_df:
1430
+ The outermost resolver inputs (e.g. the College table).
1431
+ levels:
1432
+ A list of ``(feature, data_df)`` tuples ordered **outermost to innermost**.
1433
+ Each feature must be scoped to its own parent's namespace — i.e. ``Course.sections``
1434
+ is passed as a plain :class:`HasManyFeatureType`, not prefixed with
1435
+ ``College.courses``. Example for the two-level case::
1436
+
1437
+ [
1438
+ (College.courses, courses_df), # outermost
1439
+ (Course.sections, sections_df), # innermost
1440
+ ]
1441
+
1442
+ graph:
1443
+ The resolved feature graph, forwarded to each inner sampler.
1444
+ allow_planner_postponed_has_many_sampling_planner_option:
1445
+ Forwarded unchanged to each inner :class:`PolarsVectorizedHasManySampler`.
1446
+ """
1447
+
1448
+ # Temporary column used to preserve the outer resolver-input index while inner
1449
+ # samplers add their own RESOLVER_INPUT_IDX_COL_NAME.
1450
+ _OUTER_IDX_COL = "___CHALK_NESTED_HM_OUTER_IDX___"
1451
+
1452
+ def __init__(
1453
+ self,
1454
+ *,
1455
+ resolver_inputs_df: pl.DataFrame,
1456
+ levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]],
1457
+ graph: ResolvedGraph,
1458
+ allow_planner_postponed_has_many_sampling_planner_option: bool,
1459
+ ):
1460
+ super().__init__()
1461
+ assert len(levels) >= 1, "NestedHasManySampler requires at least one level"
1462
+ self._resolver_inputs_df = resolver_inputs_df
1463
+ self._levels = levels
1464
+ self._graph = graph
1465
+ self._allow_postponed = allow_planner_postponed_has_many_sampling_planner_option
1466
+
1467
+ def yield_groups_per_row(self) -> Iterable[DataFrame]:
1468
+ levels = self._levels
1469
+ outer_feature, outer_data = levels[0]
1470
+
1471
+ if len(levels) == 1:
1472
+ # Single level: delegate to the standard sampler unchanged.
1473
+ yield from PolarsVectorizedHasManySampler(
1474
+ resolver_inputs_df=self._resolver_inputs_df,
1475
+ has_many_feature=outer_feature,
1476
+ has_many_df=(outer_data, None),
1477
+ graph=self._graph,
1478
+ oom_slim_hm_by_dates=False,
1479
+ oom_slim_hm_by_join_keys=False,
1480
+ enable_indexed_has_many_joins=False,
1481
+ allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
1482
+ include_metadata_columns=False,
1483
+ ).yield_groups_per_row()
1484
+ return
1485
+
1486
+ # --- Step 1: get outer-level ungrouped rows (e.g. courses matched to colleges) ---
1487
+ # We call _get_ungrouped_rows directly instead of constructing a full
1488
+ # PolarsVectorizedHasManySampler, because the sampler's __init__ calls
1489
+ # _get_grouped_rows which tries to select the outer expected columns (including
1490
+ # the nested has-many column e.g. course.sections) — those don't exist in
1491
+ # outer_data yet. _get_ungrouped_rows only does the join + temporal filter,
1492
+ # which is exactly what we need here.
1493
+ outer_ri_df = self._resolver_inputs_df
1494
+ if RESOLVER_INPUT_IDX_COL_NAME not in outer_ri_df.columns:
1495
+ outer_ri_df = with_row_index_compat(outer_ri_df, RESOLVER_INPUT_IDX_COL_NAME)
1496
+
1497
+ outer_foreign_pkey = unwrap_optional(
1498
+ self._graph.primary_feature_for_namespace(outer_feature.underlying.foreign_namespace())
1499
+ )
1500
+ outer_foreign_ts = unwrap_optional(
1501
+ self._graph.ts_feature_for_namespace(outer_feature.underlying.foreign_namespace())
1502
+ )
1503
+
1504
+ outer_left_join_features = outer_feature.underlying.get_local_join_features()
1505
+ if isinstance(outer_feature, InputFeatureType):
1506
+ outer_left_join_features = [
1507
+ InputFeatureType.replace_suffix(outer_feature, jf) for jf in outer_left_join_features
1508
+ ]
1509
+ outer_left_join_cols = [f.root_fqn for f in outer_left_join_features]
1510
+ outer_right_join_cols = [f.root_fqn for f in outer_feature.underlying.get_foreign_join_features()]
1511
+
1512
+ outer_inputs_rename_dict = {
1513
+ col: "__CHALK_RESOLVER_INPUT__" + col for col in outer_ri_df.columns if col != RESOLVER_INPUT_IDX_COL_NAME
1514
+ }
1515
+ outer_unique = all(pl_is_uniquable_on(dtype) for dtype in outer_ri_df.dtypes)
1516
+
1517
+ # outer_ungrouped: RESOLVER_INPUT_IDX_COL_NAME=outer_idx, course cols (incl. __ts__)
1518
+ outer_ungrouped: pl.DataFrame = PolarsVectorizedHasManySampler._get_ungrouped_rows( # pyright: ignore[reportPrivateUsage]
1519
+ graph=self._graph,
1520
+ hm_df=outer_data,
1521
+ mapping_table=None,
1522
+ resolver_inputs_df=outer_ri_df,
1523
+ foreign_pkey_feature=outer_foreign_pkey,
1524
+ foreign_ts_feature=outer_foreign_ts,
1525
+ has_many_feature=outer_feature,
1526
+ inputs_rename_dict=outer_inputs_rename_dict,
1527
+ left_join_cols=outer_left_join_cols,
1528
+ right_join_cols=outer_right_join_cols,
1529
+ oom_slim_hm_by_dates=False,
1530
+ oom_slim_hm_by_join_keys=False,
1531
+ unique_resolver_inputs=outer_unique,
1532
+ enable_indexed_has_many_joins=False,
1533
+ allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
1534
+ ).collect()
1535
+
1536
+ # --- Step 2: rename outer idx to backup so inner samplers get fresh row indices ---
1537
+ # current_rows has _OUTER_IDX=outer_idx, course cols — no RESOLVER_INPUT_IDX_COL_NAME
1538
+ current_rows = outer_ungrouped.rename({RESOLVER_INPUT_IDX_COL_NAME: self._OUTER_IDX_COL})
1539
+
1540
+ # --- Step 3: process each inner level in order (top-down) ---
1541
+ # Each inner sampler receives the (possibly augmented) rows from the level above as
1542
+ # its resolver_inputs. Because RESOLVER_INPUT_IDX_COL_NAME is absent, the sampler
1543
+ # adds a fresh per-row index, joins the next level's data, and packs it as a
1544
+ # list-of-structs column. We then drop that transient index and move on.
1545
+ for inner_feature, inner_data in levels[1:]:
1546
+ inner_sampler = PolarsVectorizedHasManySampler(
1547
+ resolver_inputs_df=current_rows,
1548
+ has_many_feature=inner_feature,
1549
+ has_many_df=(inner_data, None),
1550
+ graph=self._graph,
1551
+ oom_slim_hm_by_dates=False,
1552
+ oom_slim_hm_by_join_keys=False,
1553
+ enable_indexed_has_many_joins=False,
1554
+ allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
1555
+ include_metadata_columns=False,
1556
+ )
1557
+ # Result: RESOLVER_INPUT_IDX_COL_NAME=inner_row_idx, _OUTER_IDX=outer_idx,
1558
+ # parent_cols, packed_inner_col
1559
+ packed = inner_sampler.join_df_and_pack_into_struct()
1560
+ # Drop the transient inner row index; _OUTER_IDX is preserved for the next
1561
+ # iteration (or for the final yield step below).
1562
+ current_rows = packed.drop(RESOLVER_INPUT_IDX_COL_NAME)
1563
+
1564
+ # --- Step 4: restore outer idx and yield one DataFrame per outer resolver input ---
1565
+ # current_rows: _OUTER_IDX=outer_idx, parent_cols (with all inner levels packed in)
1566
+ augmented = current_rows.rename({self._OUTER_IDX_COL: RESOLVER_INPUT_IDX_COL_NAME})
1567
+
1568
+ outer_expected_col_names: list[str] = [x.root_fqn for x in outer_feature.underlying.df.columns]
1569
+ n_outer = len(self._resolver_inputs_df)
1570
+
1571
+ empty_pl = augmented.filter(pl.lit(False)).select(outer_expected_col_names)
1572
+ empty_chalk = DataFrame(
1573
+ empty_pl.lazy(),
1574
+ missing_value_strategy="default_or_allow",
1575
+ verify_validity=False,
1576
+ convert_dtypes=False,
1577
+ )
1578
+
1579
+ # Build a template Chalk DataFrame from the first non-empty group so we can
1580
+ # copy.copy() it for subsequent groups (avoids re-running DataFrame.__init__ N times).
1581
+ template_chalk: DataFrame | None = None
1582
+
1583
+ for outer_idx in range(n_outer):
1584
+ group_pl = augmented.filter(pl.col(RESOLVER_INPUT_IDX_COL_NAME) == outer_idx).select(
1585
+ outer_expected_col_names
1586
+ )
1587
+ if len(group_pl) == 0:
1588
+ yield empty_chalk
1589
+ elif template_chalk is None:
1590
+ template_chalk = DataFrame(
1591
+ group_pl.lazy(),
1592
+ missing_value_strategy="default_or_allow",
1593
+ verify_validity=False,
1594
+ convert_dtypes=False,
1595
+ )
1596
+ yield template_chalk
1597
+ else:
1598
+ copied = copy.copy(template_chalk)
1599
+ copied._swap_underlying(group_pl.lazy()) # pyright: ignore[reportPrivateUsage]
1600
+ yield copied
1601
+
1602
+
1407
1603
  def table_has_struct_or_list(table: pa.Table):
1408
1604
  return any(type_contains_struct_or_list(pa_type) for pa_type in table.schema.types)
1409
1605
 
@@ -12,11 +12,11 @@ from typing import TYPE_CHECKING, Callable, Optional
12
12
  from chalk.features import Feature, FeatureSetBase
13
13
  from chalk.features.resolver import RESOLVER_REGISTRY
14
14
  from chalk.importer import (
15
- FailedImport,
16
15
  import_all_python_files_from_dir,
17
16
  import_sql_file_resolvers,
18
17
  run_post_import_fixups,
19
18
  )
19
+ from chalk.parsed.duplicate_input_gql import FailedImport
20
20
  from chalk.utils.log_with_context import get_logger
21
21
  from chalk.utils.storage_client import (
22
22
  AzureBlobStorageClient,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chalkruntime
3
- Version: 3.32.2
3
+ Version: 3.32.4
4
4
  Summary: Runtime support library for Chalk AI
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -45,7 +45,7 @@ name = "chalkruntime"
45
45
  description = "Runtime support library for Chalk AI"
46
46
  readme = "README.md"
47
47
  requires-python = ">=3.10"
48
- version = "3.32.2"
48
+ version = "3.32.4"
49
49
 
50
50
 
51
51
  [tool.deptry]
@@ -85,6 +85,7 @@ remove-duplicate-keys = true
85
85
 
86
86
  [tool.pyright]
87
87
  include = ["chalkruntime/**", "setup.py"]
88
+ extraPaths = ["../shared_public"]
88
89
  reportCallInDefaultInitializer = "error"
89
90
  # reportUnboundVariable
90
91
  # reportUnusedCoroutine
File without changes
File without changes
File without changes