PyPI - chalkruntime - Versions diffs - 3.32.2__tar.gz → 3.32.4__tar.gz - Mend

chalkruntime 3.32.2tar.gz → 3.32.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chalkruntime
-Version: 3.32.2
+Version: 3.32.4
 Summary: Runtime support library for Chalk AI
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/graph/chalk_overload.py RENAMED Viewed

@@ -2,11 +2,10 @@ from __future__ import annotations
 from typing import TypeVar
-import libchalk.chalkfunction
-import libchalk.udf
 from chalkruntime.graph.maybe_named_collection import MaybeNamedCollection
 from libchalk.chalkfunction import (
     ArgumentType,
+    ChalkFunctionOverload,
     ChalkFunctionOverloadFailed,
     ChalkFunctionOverloadResolved,
     default_arrow_type_promoter,
@@ -15,8 +14,6 @@ from libchalk.chalkfunction import (
 TItem = TypeVar("TItem")
 TOther = TypeVar("TOther")
-ChalkFunctionOverload = libchalk.chalkfunction.ChalkFunctionOverload
 def get_resolved_overload(
     overload: ChalkFunctionOverload,

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/general_bound_invoker.py RENAMED Viewed

@@ -90,6 +90,7 @@ from chalkruntime.invoker.resolver_runner import (
 )
 from chalkruntime.invoker.vectorized_hasmany_sampler import (
     RESOLVER_INPUT_IDX_COL_NAME,
+    NestedHasManySampler,
     PolarsVectorizedHasManySampler,
     PyArrowVectorizedHasManySampler,
     VectorizedHasManySampler,
@@ -152,7 +153,8 @@ def _parse_df_feature(
             continue
         # use groups.raw instead of groups.partitioned, since we don't partition for DF resolvers.
         hm_table = groups.raw_has_many[hm_ft].result_and_metadata
-        hm_df = pa_table_to_pl_df(hm_table.to_table()), None
+        _hm_pa = hm_table.to_table()
+        hm_df = pa_table_to_pl_df(_hm_pa), None
         sampler = PolarsVectorizedHasManySampler(
             resolver_inputs_df=resolver_inputs_df,
             has_many_feature=hm_ft,
@@ -868,7 +870,10 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
         # we walk up the has-many join path, iteratively packing the deeper layers into lists of structs
         hm_base_subfeatures = all_has_many_subfeatures(feature_type)
         hm_schema_subfeatures = tuple(x for x in required_features_to_sample(feature_type) if is_underlying_has_many(x))
-        assert len(hm_base_subfeatures) == len(hm_schema_subfeatures)
+        assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
+        assert len(hm_base_subfeatures) == len(hm_schema_subfeatures), (
+            f"hm_base_subfeatures ({hm_base_subfeatures}) and hm_schema_subfeatures ({hm_schema_subfeatures}) must have the same length"
+        )
         current_hm_df: tuple[pl.DataFrame, pl.DataFrame | None] | None = None
         hm_entry: HasManyFeatureEntry | None = None
@@ -896,8 +901,190 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
                 current_hm_df = (sampler.join_df_and_pack_into_struct().drop(RESOLVER_INPUT_IDX_COL_NAME), None)
                 # now we want select for only the current_hm_feature column to be in resolver inputs, dropping irrelevant columns
                 current_hm_feature = has_many_subfeatures_to_projection(base_hm_feature, [relative_hm_feature])
+            # Top-down nested HM: if the outer feature's schema declares nested HM columns
+            # that are absent from the current table, retrieve their data from the mapping
+            # and use NestedHasManySampler top-down so timestamps flow from outer to inner
+            # (required for correct online temporal semantics).
+            _nested_hm_schema = [
+                col for col in current_hm_feature.underlying.df.columns if isinstance(col, HasManyFeatureType)
+            ]
+            if _nested_hm_schema:
+                _current_table_cols = set(current_hm_df[0].columns)
+                _missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
+                if not _missing_nested:
+                    # For "deeply nested" cases (e.g. triple-nested: University → College → Course → Section):
+                    # nested HM cols (e.g. college.courses) are already packed in the table, but their packed
+                    # structs lack sub-nested data (e.g. course.sections) because the resolver encoding schema
+                    # is scalar-only. However, partitioned_has_many already has a separate courses table WITH
+                    # course.sections. Drop these cols so the _missing_nested path below picks them up and uses
+                    # NestedHasManySampler with the correctly nested data.
+                    # Only drop cols where we can actually retrieve the data (same lookup as the _missing_nested
+                    # handler below at lines 945-950), to avoid dropping cols that PolarsVectorizedHasManySampler
+                    # still needs when data is unavailable through the nested path.
+                    _phm = feature_to_data_mapping.partitioned_has_many or {}
+                    def _nested_data_available(col: HasManyFeatureType) -> bool:
+                        if col in _phm:
+                            return True
+                        if col in feature_to_data_mapping.raw_has_many:
+                            return True
+                        return False
+                    _deeply_nested = [
+                        col
+                        for col in _nested_hm_schema
+                        if col.root_fqn in _current_table_cols
+                        and any(isinstance(x, HasManyFeatureType) for x in col.underlying.df.columns)
+                        and _nested_data_available(col)
+                    ]
+                    if _deeply_nested:
+                        _cols_to_drop = [col.root_fqn for col in _deeply_nested]
+                        current_hm_df = (current_hm_df[0].drop(_cols_to_drop), current_hm_df[1])
+                        _current_table_cols = set(current_hm_df[0].columns)
+                        _missing_nested = [col for col in _nested_hm_schema if col.root_fqn not in _current_table_cols]
+                if _missing_nested:
+                    resolver_inputs_pl = (
+                        resolver_inputs_df
+                        if resolver_inputs_df is not None
+                        else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
+                    )
+                    nested_levels: list[
+                        tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]
+                    ] = [(current_hm_feature, current_hm_df[0])]
+                    for _nested_hm in _missing_nested:
+                        _nested_data_raw: pl.DataFrame | None = None
+                        if _nested_hm in feature_to_data_mapping.partitioned_has_many:
+                            _nested_data_raw = feature_to_data_mapping.partitioned_has_many[_nested_hm][0]
+                        elif _nested_hm in feature_to_data_mapping.raw_has_many:
+                            _nested_data_raw = pa_table_to_pl_df(
+                                feature_to_data_mapping.raw_has_many[_nested_hm].result_and_metadata.to_table()
+                            )
+                        if _nested_data_raw is not None:
+                            # Sections extracted from packed resolver output (via PushHasManyToResult)
+                            # inherit the parent's TS_COL_NAME but lack the foreign feature-time
+                            # column required by _get_ungrouped_rows' temporal filter.  Use
+                            # TS_COL_NAME as a proxy so timestamps flow correctly top-down.
+                            _foreign_ts_fqn = getattr(
+                                self._graph.ts_feature_for_namespace(_nested_hm.underlying.foreign_namespace()),
+                                "fqn",
+                                None,
+                            )
+                            if (
+                                _foreign_ts_fqn is not None
+                                and _foreign_ts_fqn not in _nested_data_raw.columns
+                                and TS_COL_NAME in _nested_data_raw.columns
+                            ):
+                                _nested_data_raw = _nested_data_raw.with_columns(
+                                    pl.col(TS_COL_NAME).alias(_foreign_ts_fqn)
+                                )
+                            # Pre-pack any sub-nested HM data (bottom-up) to support triple-nested has-many.
+                            # For example, when _nested_hm=college.courses, check if course.sections data
+                            # is available and pack it into courses_df before NestedHasManySampler runs.
+                            _sub_nested_hm_cols = [
+                                col for col in _nested_hm.underlying.df.columns if isinstance(col, HasManyFeatureType)
+                            ]
+                            for _sub_nested_hm in _sub_nested_hm_cols:
+                                _sub_nested_fqn = _sub_nested_hm.root_fqn
+                                _sub_data: pl.DataFrame | None = None
+                                # Direct key lookup first
+                                if (
+                                    feature_to_data_mapping.partitioned_has_many
+                                    and _sub_nested_hm in feature_to_data_mapping.partitioned_has_many
+                                ):
+                                    _sub_data = feature_to_data_mapping.partitioned_has_many[_sub_nested_hm][0]
+                                elif _sub_nested_hm in feature_to_data_mapping.raw_has_many:
+                                    _sub_data = pa_table_to_pl_df(
+                                        feature_to_data_mapping.raw_has_many[
+                                            _sub_nested_hm
+                                        ].result_and_metadata.to_table()
+                                    )
+                                else:
+                                    # Look for an InputFeatureType key whose underlying matches _sub_nested_hm.
+                                    # After has_many_join_operator propagates nested HM features, entries like
+                                    # university.colleges.courses.sections (InputFeatureType) appear in
+                                    # partitioned_has_many with underlying=course.sections (HasManyFeatureType).
+                                    for _k, _v in (feature_to_data_mapping.partitioned_has_many or {}).items():
+                                        if isinstance(_k, InputFeatureType) and _k.underlying == _sub_nested_hm:
+                                            _sub_data = _v[0]
+                                            break
+                                if _sub_data is not None:
+                                    # Add foreign ts alias if needed for the sub-nested data
+                                    _sub_foreign_ts_fqn = getattr(
+                                        self._graph.ts_feature_for_namespace(
+                                            _sub_nested_hm.underlying.foreign_namespace()
+                                        ),
+                                        "fqn",
+                                        None,
+                                    )
+                                    if (
+                                        _sub_foreign_ts_fqn is not None
+                                        and _sub_foreign_ts_fqn not in _sub_data.columns
+                                        and TS_COL_NAME in _sub_data.columns
+                                    ):
+                                        _sub_data = _sub_data.with_columns(
+                                            pl.col(TS_COL_NAME).alias(_sub_foreign_ts_fqn)
+                                        )
+                                    # Drop the null sub-nested column from _nested_data_raw to avoid
+                                    # column conflict when the sub-sampler adds the packed column.
+                                    if _sub_nested_fqn in _nested_data_raw.columns:
+                                        _nested_data_raw = _nested_data_raw.drop(_sub_nested_fqn)
+                                    # Also drop RESOLVER_INPUT_IDX_COL_NAME so the sub-sampler
+                                    # assigns a fresh per-row index for the pack operation.
+                                    if RESOLVER_INPUT_IDX_COL_NAME in _nested_data_raw.columns:
+                                        _nested_data_raw = _nested_data_raw.drop(RESOLVER_INPUT_IDX_COL_NAME)
+                                    _sub_sampler = PolarsVectorizedHasManySampler(
+                                        resolver_inputs_df=_nested_data_raw,
+                                        has_many_feature=_sub_nested_hm,
+                                        has_many_df=(_sub_data, None),
+                                        graph=self._graph,
+                                        oom_slim_hm_by_dates=False,
+                                        oom_slim_hm_by_join_keys=False,
+                                        enable_indexed_has_many_joins=False,
+                                        allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
+                                        include_metadata_columns=False,
+                                    )
+                                    _nested_data_raw = _sub_sampler.join_df_and_pack_into_struct().drop(
+                                        RESOLVER_INPUT_IDX_COL_NAME
+                                    )
+                            nested_levels.append((_nested_hm, _nested_data_raw))
+                    if len(nested_levels) > 1:
+                        return NestedHasManySampler(
+                            resolver_inputs_df=resolver_inputs_pl,
+                            levels=nested_levels,
+                            graph=self._graph,
+                            allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
+                        ).yield_groups_per_row()
         else:
             current_hm_feature = feature_type
+            if len(hm_base_subfeatures) > 1 and all(
+                k in feature_to_data_mapping.raw_has_many for k in hm_base_subfeatures
+            ):
+                # Nested has-many in the raw (online/static) path: separate data tables
+                # exist in raw_has_many for each level.  Use NestedHasManySampler to pack
+                # them bottom-up (innermost first) before yielding groups per resolver row.
+                resolver_inputs_pl = (
+                    resolver_inputs_df
+                    if resolver_inputs_df is not None
+                    else pa_table_to_pl_df(unwrap_optional(resolver_inputs_table).to_table())
+                )
+                levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]] = []
+                for i, base_hm in enumerate(hm_base_subfeatures):
+                    entry = feature_to_data_mapping.raw_has_many[base_hm]
+                    data_df = pa_table_to_pl_df(entry.result_and_metadata.to_table())
+                    # The outermost feature is used as-is; inner features are expressed
+                    # relative to their immediate parent so the join keys resolve correctly.
+                    feature_for_level = (
+                        base_hm
+                        if i == 0
+                        else cast(InputFeatureType[HasManyFeatureType], base_hm).relative_to(hm_base_subfeatures[i - 1])
+                    )
+                    levels.append((feature_for_level, data_df))
+                return NestedHasManySampler(
+                    resolver_inputs_df=resolver_inputs_pl,
+                    levels=levels,
+                    graph=self._graph,
+                    allow_planner_postponed_has_many_sampling_planner_option=config.allow_planner_postponed_has_many_sampling,
+                ).yield_groups_per_row()
             hm_entry = feature_to_data_mapping.raw_has_many[current_hm_feature]
         sampler: VectorizedHasManySampler | None = None
@@ -974,6 +1161,7 @@ class GeneralBoundInvoker(BoundInvokerProtocol):
                     )
                 # If there are any has-one still left in path after the last has-many, we need to change the namespace
                 if not is_underlying_has_many(feature_type):
+                    assert len(hm_base_subfeatures) > 0, f"Expected at least one has-many subfeature for {feature_type}"
                     packed_lf = packed_lf.select(
                         pl.col(column.root_fqn).alias(column.fqn)
                         for column in hm_base_subfeatures[-1].underlying.df.columns

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/one_to_one_invoker.py RENAMED Viewed

@@ -100,6 +100,7 @@ def _assimilate_resolvers(
 ):
     root_ns = get_unique_item(res.unique_input_root_ns for res in resolvers)
     graph = get_unique_item(res.graph for res in resolvers)
+    # Note: this is mutated below if any of the resolvers are async to prevent crashes
     is_cpu_bound = get_unique_item(res.resource_hint == "cpu" for res in resolvers)
     pkey_feature = graph.primary_feature_for_namespace(root_ns)
     assert pkey_feature is not None
@@ -136,10 +137,13 @@ def _assimilate_resolvers(
                 default_args.append(maybe_default_arg)
         for out in resolver.output:
             output_refs.add(out)
+        metadata = ResolverOutputMetadata.from_resolver(resolver, pkey_feature)
         resolver_to_input_fqns_has_default_and_defaults[resolver] = (
-            ResolverOutputMetadata.from_resolver(resolver, pkey_feature),
+            metadata,
             tuple(inputs),
         )
+        if metadata.is_async:
+            is_cpu_bound = False
     if not get_chalk_fix_invalid_result_propagation():
         default_args = [ResolverArgErrorHandlerParsed(default_value=...) for _ in input_refs]
@@ -502,7 +506,7 @@ class IOBoundParallelResolver:
                 else:
                     with execution_context:
                         start = time.perf_counter()
-                        result = metadata.fn(*resolver_args)
+                        result = await metadata.fn(*resolver_args)
                         duration = time.perf_counter() - start
             except (Exception, PolarsPanicErrorCompat) as e:
@@ -614,8 +618,8 @@ class ParallelResolverInvoker(BoundInvokerProtocol):
             self._query_execution_params = query_execution_params
             self._graph = graph
             self._resolver_executor = resolver_executor
-            self._is_cpu_bound = get_unique_item(resolver.resource_hint == "cpu" for resolver in resolvers)
             fn = _assimilate_resolvers(resolvers, resolver_executor)
+            self._is_cpu_bound = isinstance(fn, CPUBoundParallelResolver)
             if len(fn.input_refs) == 0:
                 self._unique_input_root_ns = get_unique_item(o.root_namespace for o in fn.output_refs)
             else:

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/invoker/vectorized_hasmany_sampler.py RENAMED Viewed

@@ -1039,7 +1039,8 @@ class PolarsVectorizedHasManySampler(VectorizedHasManySampler):
             )
         else:
             ans = with_struct.groupby(RESOLVER_INPUT_IDX_COL_NAME).agg(pl.col(str(has_many_feature)))
-        return ans.select([RESOLVER_INPUT_IDX_COL_NAME, str(has_many_feature)]).collect()
+        result = ans.select([RESOLVER_INPUT_IDX_COL_NAME, str(has_many_feature)]).collect()
+        return result
     def join_df_and_pack_into_struct(self) -> pl.DataFrame:
         """
@@ -1404,6 +1405,201 @@ class PyArrowVectorizedHasManySampler(VectorizedHasManySampler):
         )
+class NestedHasManySampler:
+    """
+    Handles nested has-many relationships by processing levels top-down: the outermost
+    join is resolved first (preserving its timestamps), then each successive inner level
+    is sampled using the matched rows from the level above as resolver inputs.
+    For a ``College -> Courses -> Sections`` hierarchy:
+    1. **Top** (Courses): obtain the course rows that match each college, including their
+       ``__ts__`` values, via :meth:`PolarsVectorizedHasManySampler._get_ungrouped_rows`.
+    2. **Inner** (Sections): use those course rows as resolver inputs for a
+       :class:`PolarsVectorizedHasManySampler` that packs sections into each course row,
+       with temporal filtering driven by the courses' ``__ts__`` values.
+    3. **Yield**: emit one :class:`DataFrame` of (augmented) courses per college.
+    This top-down ordering is required for correct temporal semantics: the ``__ts__``
+    stored in each course row (after the outer join) must govern which section rows are
+    considered valid for that course.
+    Parameters
+    ----------
+    resolver_inputs_df:
+        The outermost resolver inputs (e.g. the College table).
+    levels:
+        A list of ``(feature, data_df)`` tuples ordered **outermost to innermost**.
+        Each feature must be scoped to its own parent's namespace — i.e. ``Course.sections``
+        is passed as a plain :class:`HasManyFeatureType`, not prefixed with
+        ``College.courses``.  Example for the two-level case::
+            [
+                (College.courses, courses_df),   # outermost
+                (Course.sections, sections_df),  # innermost
+            ]
+    graph:
+        The resolved feature graph, forwarded to each inner sampler.
+    allow_planner_postponed_has_many_sampling_planner_option:
+        Forwarded unchanged to each inner :class:`PolarsVectorizedHasManySampler`.
+    """
+    # Temporary column used to preserve the outer resolver-input index while inner
+    # samplers add their own RESOLVER_INPUT_IDX_COL_NAME.
+    _OUTER_IDX_COL = "___CHALK_NESTED_HM_OUTER_IDX___"
+    def __init__(
+        self,
+        *,
+        resolver_inputs_df: pl.DataFrame,
+        levels: list[tuple[HasManyFeatureType | InputFeatureType[HasManyFeatureType], pl.DataFrame]],
+        graph: ResolvedGraph,
+        allow_planner_postponed_has_many_sampling_planner_option: bool,
+    ):
+        super().__init__()
+        assert len(levels) >= 1, "NestedHasManySampler requires at least one level"
+        self._resolver_inputs_df = resolver_inputs_df
+        self._levels = levels
+        self._graph = graph
+        self._allow_postponed = allow_planner_postponed_has_many_sampling_planner_option
+    def yield_groups_per_row(self) -> Iterable[DataFrame]:
+        levels = self._levels
+        outer_feature, outer_data = levels[0]
+        if len(levels) == 1:
+            # Single level: delegate to the standard sampler unchanged.
+            yield from PolarsVectorizedHasManySampler(
+                resolver_inputs_df=self._resolver_inputs_df,
+                has_many_feature=outer_feature,
+                has_many_df=(outer_data, None),
+                graph=self._graph,
+                oom_slim_hm_by_dates=False,
+                oom_slim_hm_by_join_keys=False,
+                enable_indexed_has_many_joins=False,
+                allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
+                include_metadata_columns=False,
+            ).yield_groups_per_row()
+            return
+        # --- Step 1: get outer-level ungrouped rows (e.g. courses matched to colleges) ---
+        # We call _get_ungrouped_rows directly instead of constructing a full
+        # PolarsVectorizedHasManySampler, because the sampler's __init__ calls
+        # _get_grouped_rows which tries to select the outer expected columns (including
+        # the nested has-many column e.g. course.sections) — those don't exist in
+        # outer_data yet.  _get_ungrouped_rows only does the join + temporal filter,
+        # which is exactly what we need here.
+        outer_ri_df = self._resolver_inputs_df
+        if RESOLVER_INPUT_IDX_COL_NAME not in outer_ri_df.columns:
+            outer_ri_df = with_row_index_compat(outer_ri_df, RESOLVER_INPUT_IDX_COL_NAME)
+        outer_foreign_pkey = unwrap_optional(
+            self._graph.primary_feature_for_namespace(outer_feature.underlying.foreign_namespace())
+        )
+        outer_foreign_ts = unwrap_optional(
+            self._graph.ts_feature_for_namespace(outer_feature.underlying.foreign_namespace())
+        )
+        outer_left_join_features = outer_feature.underlying.get_local_join_features()
+        if isinstance(outer_feature, InputFeatureType):
+            outer_left_join_features = [
+                InputFeatureType.replace_suffix(outer_feature, jf) for jf in outer_left_join_features
+            ]
+        outer_left_join_cols = [f.root_fqn for f in outer_left_join_features]
+        outer_right_join_cols = [f.root_fqn for f in outer_feature.underlying.get_foreign_join_features()]
+        outer_inputs_rename_dict = {
+            col: "__CHALK_RESOLVER_INPUT__" + col for col in outer_ri_df.columns if col != RESOLVER_INPUT_IDX_COL_NAME
+        }
+        outer_unique = all(pl_is_uniquable_on(dtype) for dtype in outer_ri_df.dtypes)
+        # outer_ungrouped: RESOLVER_INPUT_IDX_COL_NAME=outer_idx, course cols (incl. __ts__)
+        outer_ungrouped: pl.DataFrame = PolarsVectorizedHasManySampler._get_ungrouped_rows(  # pyright: ignore[reportPrivateUsage]
+            graph=self._graph,
+            hm_df=outer_data,
+            mapping_table=None,
+            resolver_inputs_df=outer_ri_df,
+            foreign_pkey_feature=outer_foreign_pkey,
+            foreign_ts_feature=outer_foreign_ts,
+            has_many_feature=outer_feature,
+            inputs_rename_dict=outer_inputs_rename_dict,
+            left_join_cols=outer_left_join_cols,
+            right_join_cols=outer_right_join_cols,
+            oom_slim_hm_by_dates=False,
+            oom_slim_hm_by_join_keys=False,
+            unique_resolver_inputs=outer_unique,
+            enable_indexed_has_many_joins=False,
+            allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
+        ).collect()
+        # --- Step 2: rename outer idx to backup so inner samplers get fresh row indices ---
+        # current_rows has _OUTER_IDX=outer_idx, course cols — no RESOLVER_INPUT_IDX_COL_NAME
+        current_rows = outer_ungrouped.rename({RESOLVER_INPUT_IDX_COL_NAME: self._OUTER_IDX_COL})
+        # --- Step 3: process each inner level in order (top-down) ---
+        # Each inner sampler receives the (possibly augmented) rows from the level above as
+        # its resolver_inputs.  Because RESOLVER_INPUT_IDX_COL_NAME is absent, the sampler
+        # adds a fresh per-row index, joins the next level's data, and packs it as a
+        # list-of-structs column.  We then drop that transient index and move on.
+        for inner_feature, inner_data in levels[1:]:
+            inner_sampler = PolarsVectorizedHasManySampler(
+                resolver_inputs_df=current_rows,
+                has_many_feature=inner_feature,
+                has_many_df=(inner_data, None),
+                graph=self._graph,
+                oom_slim_hm_by_dates=False,
+                oom_slim_hm_by_join_keys=False,
+                enable_indexed_has_many_joins=False,
+                allow_planner_postponed_has_many_sampling_planner_option=self._allow_postponed,
+                include_metadata_columns=False,
+            )
+            # Result: RESOLVER_INPUT_IDX_COL_NAME=inner_row_idx, _OUTER_IDX=outer_idx,
+            #         parent_cols, packed_inner_col
+            packed = inner_sampler.join_df_and_pack_into_struct()
+            # Drop the transient inner row index; _OUTER_IDX is preserved for the next
+            # iteration (or for the final yield step below).
+            current_rows = packed.drop(RESOLVER_INPUT_IDX_COL_NAME)
+        # --- Step 4: restore outer idx and yield one DataFrame per outer resolver input ---
+        # current_rows: _OUTER_IDX=outer_idx, parent_cols (with all inner levels packed in)
+        augmented = current_rows.rename({self._OUTER_IDX_COL: RESOLVER_INPUT_IDX_COL_NAME})
+        outer_expected_col_names: list[str] = [x.root_fqn for x in outer_feature.underlying.df.columns]
+        n_outer = len(self._resolver_inputs_df)
+        empty_pl = augmented.filter(pl.lit(False)).select(outer_expected_col_names)
+        empty_chalk = DataFrame(
+            empty_pl.lazy(),
+            missing_value_strategy="default_or_allow",
+            verify_validity=False,
+            convert_dtypes=False,
+        )
+        # Build a template Chalk DataFrame from the first non-empty group so we can
+        # copy.copy() it for subsequent groups (avoids re-running DataFrame.__init__ N times).
+        template_chalk: DataFrame | None = None
+        for outer_idx in range(n_outer):
+            group_pl = augmented.filter(pl.col(RESOLVER_INPUT_IDX_COL_NAME) == outer_idx).select(
+                outer_expected_col_names
+            )
+            if len(group_pl) == 0:
+                yield empty_chalk
+            elif template_chalk is None:
+                template_chalk = DataFrame(
+                    group_pl.lazy(),
+                    missing_value_strategy="default_or_allow",
+                    verify_validity=False,
+                    convert_dtypes=False,
+                )
+                yield template_chalk
+            else:
+                copied = copy.copy(template_chalk)
+                copied._swap_underlying(group_pl.lazy())  # pyright: ignore[reportPrivateUsage]
+                yield copied
 def table_has_struct_or_list(table: pa.Table):
     return any(type_contains_struct_or_list(pa_type) for pa_type in table.schema.types)

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime/loader/importer.py RENAMED Viewed

@@ -12,11 +12,11 @@ from typing import TYPE_CHECKING, Callable, Optional
 from chalk.features import Feature, FeatureSetBase
 from chalk.features.resolver import RESOLVER_REGISTRY
 from chalk.importer import (
-    FailedImport,
     import_all_python_files_from_dir,
     import_sql_file_resolvers,
     run_post_import_fixups,
 )
+from chalk.parsed.duplicate_input_gql import FailedImport
 from chalk.utils.log_with_context import get_logger
 from chalk.utils.storage_client import (
     AzureBlobStorageClient,

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/chalkruntime.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chalkruntime
-Version: 3.32.2
+Version: 3.32.4
 Summary: Runtime support library for Chalk AI
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{chalkruntime-3.32.2 → chalkruntime-3.32.4}/pyproject.toml RENAMED Viewed

@@ -45,7 +45,7 @@ name = "chalkruntime"
 description = "Runtime support library for Chalk AI"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "3.32.2"
+version = "3.32.4"
 [tool.deptry]
@@ -85,6 +85,7 @@ remove-duplicate-keys = true
 [tool.pyright]
 include = ["chalkruntime/**", "setup.py"]
+extraPaths = ["../shared_public"]
 reportCallInDefaultInitializer = "error"
 # reportUnboundVariable
 # reportUnusedCoroutine