chalkruntime 3.34.2__tar.gz → 3.34.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/PKG-INFO +1 -1
  2. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/convert_chalkpy_underscore.py +38 -0
  3. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/underscore.py +25 -5
  4. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/incrementalization/incrementalizer.py +6 -1
  5. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/vectorized_hasmany_sampler.py +2 -2
  6. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime.egg-info/PKG-INFO +1 -1
  7. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/pyproject.toml +1 -1
  8. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/README.md +0 -0
  9. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/__init__.py +0 -0
  10. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/constants.py +0 -0
  11. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/dataframe/__init__.py +0 -0
  12. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/dataframe/dataframe.py +0 -0
  13. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/dataframe/lazyframe.py +0 -0
  14. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/exc/__init__.py +0 -0
  15. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/exc/failed_argument.py +0 -0
  16. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/exc/resolver_errors.py +0 -0
  17. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/exc/wrapped_resolver_exception.py +0 -0
  18. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/__init__.py +0 -0
  19. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/chalk_overload.py +0 -0
  20. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/feature.py +0 -0
  21. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/filter_conversion.py +0 -0
  22. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/global_graph.py +0 -0
  23. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/graph.py +0 -0
  24. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/graph_impl.py +0 -0
  25. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/graph_proxy.py +0 -0
  26. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/graph_state.py +0 -0
  27. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/jinja_parser.py +0 -0
  28. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/materializations.py +0 -0
  29. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/maybe_named_collection.py +0 -0
  30. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/named_query.py +0 -0
  31. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/nearest_neighbor.py +0 -0
  32. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/overlay_graph.py +0 -0
  33. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/prompt_service.py +0 -0
  34. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/protograph_deserializer.py +0 -0
  35. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/protograph_serializer.py +0 -0
  36. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/resolver.py +0 -0
  37. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/singletons.py +0 -0
  38. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/sklearn_model_parser.py +0 -0
  39. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/stream_resolver.py +0 -0
  40. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/underscore_codec_info.py +0 -0
  41. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/underscore_operation_registry.py +0 -0
  42. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/graph/variables.py +0 -0
  43. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/heaptrack_launcher.py +0 -0
  44. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/incrementalization/__init__.py +0 -0
  45. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/incrementalization/group_incrementalizer.py +0 -0
  46. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/__init__.py +0 -0
  47. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/batch_result_collector.py +0 -0
  48. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/bound_invoker.py +0 -0
  49. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/bound_invoker_cache.py +0 -0
  50. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/general_bound_invoker.py +0 -0
  51. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/no_arg_scalar_invoker.py +0 -0
  52. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/one_to_one_invoker.py +0 -0
  53. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/overlay_features.py +0 -0
  54. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/parse_external_resolver.py +0 -0
  55. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/partition_batch.py +0 -0
  56. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/query_execution_parameters.py +0 -0
  57. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_args_builder.py +0 -0
  58. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_input.py +0 -0
  59. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_input_upload.py +0 -0
  60. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_output_metadata.py +0 -0
  61. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_raw_output_parsing.py +0 -0
  62. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_result.py +0 -0
  63. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/resolver_runner.py +0 -0
  64. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/sample.py +0 -0
  65. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/invoker/validator.py +0 -0
  66. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/loader/__init__.py +0 -0
  67. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/loader/converter.py +0 -0
  68. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/loader/importer.py +0 -0
  69. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/memray_launcher.py +0 -0
  70. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/metadata.py +0 -0
  71. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/py.typed +0 -0
  72. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/__init__.py +0 -0
  73. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/config.py +0 -0
  74. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/entrypoint.py +0 -0
  75. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/env_helper.py +0 -0
  76. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/remote_python_function_registry_client.py +0 -0
  77. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/server/service.py +0 -0
  78. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/__init__.py +0 -0
  79. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/composed_rewriter.py +0 -0
  80. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/contextual_query_rewriter.py +0 -0
  81. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/filter_query_rewriter.py +0 -0
  82. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/identity_rewriter.py +0 -0
  83. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/query_rewriter.py +0 -0
  84. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/sql_rewriter/query_rewriter_helper.py +0 -0
  85. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/__init__.py +0 -0
  86. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/converter_utils.py +0 -0
  87. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/exc.py +0 -0
  88. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/message_parsing.py +0 -0
  89. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/resolver_utils.py +0 -0
  90. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/types.py +0 -0
  91. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/streaming/window_keys.py +0 -0
  92. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/__init__.py +0 -0
  93. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/async_helpers.py +0 -0
  94. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/contextvars.py +0 -0
  95. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/datadog.py +0 -0
  96. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/internal_pl_utils.py +0 -0
  97. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/tracing.py +0 -0
  98. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/utils/viztracer_profiling.py +0 -0
  99. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime/valgrind_launcher.py +0 -0
  100. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime.egg-info/SOURCES.txt +0 -0
  101. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime.egg-info/dependency_links.txt +0 -0
  102. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime.egg-info/requires.txt +0 -0
  103. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/chalkruntime.egg-info/top_level.txt +0 -0
  104. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/setup.cfg +0 -0
  105. {chalkruntime-3.34.2 → chalkruntime-3.34.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chalkruntime
3
- Version: 3.34.2
3
+ Version: 3.34.4
4
4
  Summary: Runtime support library for Chalk AI
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1108,6 +1108,30 @@ def _convert_underscore_expression(
1108
1108
  if not isinstance(n_arg, UnderscoreConstant):
1109
1109
  raise ValueError(f"2nd argument to of '{operation_name}' must be constant")
1110
1110
  additional_args.append(n_arg)
1111
+ elif operation_name == "approx_top_k" and "by" in underscore._chalk__kwargs:
1112
+ # `approx_top_k(k=..., by=_.weight_col)` is a weighted top-k. Lift the `by=` kwarg
1113
+ # into a positional operand so it shares the same shape as `max_by`/`min_by` for
1114
+ # downstream has-many handling (e.g. `has_aggregate_input` / `_process_underscore_hm`).
1115
+ if not isinstance(parent, UnderscoreItemParsed):
1116
+ raise ValueError(f"'{operation_name}' with 'by=' must be called on a column")
1117
+ if parent.feature_keys[0].root_namespace is None:
1118
+ raise ValueError(
1119
+ f"'{operation_name}' with 'by=' must be called on a feature with a valid parent namespace"
1120
+ )
1121
+ by_arg = _convert_underscore_expression(
1122
+ underscore=underscore._chalk__kwargs["by"],
1123
+ namespace=parent.feature_keys[0].root_namespace,
1124
+ graph=graph,
1125
+ for_feature=for_feature,
1126
+ local_scope=local_scope,
1127
+ parent_op=parent_op,
1128
+ context=context,
1129
+ )
1130
+ if not isinstance(by_arg, UnderscoreValue):
1131
+ raise ValueError(
1132
+ f"Expected 'by' argument to '{operation_name}' to be an UnderscoreValue, received '{type(by_arg)}'"
1133
+ )
1134
+ additional_args: list[UnderscoreValue | UnderscoreConstant] = [by_arg]
1111
1135
  else:
1112
1136
  additional_args: list[UnderscoreValue | UnderscoreConstant] = []
1113
1137
 
@@ -1675,6 +1699,20 @@ def _convert_underscore_materialized_aggregation(
1675
1699
  first_arg = resolved_underscore.operands.positional_items[1]
1676
1700
  if not isinstance(first_arg, UnderscoreFeature):
1677
1701
  raise ValueError(f"The second argument to '{window_materialization.aggregation}' should be a feature")
1702
+ elif window_materialization.aggregation == MaterializedAggregationType.ApproxTopK:
1703
+ # `approx_top_k` is invoked as either `approx_top_k(_.col, k=...)` (1 operand) or
1704
+ # `approx_top_k(_.col, by=_.weight, k=...)` (2 operands - the parser lifts `by=` into
1705
+ # a positional operand). Either is valid.
1706
+ if len(resolved_underscore.operands.positional_items) not in (1, 2):
1707
+ raise ValueError(
1708
+ f"The windowed aggregation operation '{window_materialization.aggregation}' accepts 1 or 2 positional arguments, but received {len(resolved_underscore.operands.positional_items)} arguments"
1709
+ )
1710
+ if len(resolved_underscore.operands.positional_items) == 2:
1711
+ second_arg = resolved_underscore.operands.positional_items[1]
1712
+ if not isinstance(second_arg, UnderscoreFeature):
1713
+ raise ValueError(
1714
+ f"The second (`by=`) argument to '{window_materialization.aggregation}' should be a feature"
1715
+ )
1678
1716
  elif not resolved_underscore.operands.has_exactly_n_and_only_positional_items(1):
1679
1717
  raise ValueError(
1680
1718
  f"The windowed aggregation operation '{window_materialization.aggregation}' only accepts a single positional argument, but received {len(resolved_underscore.operands.positional_items)} arguments"
@@ -785,7 +785,11 @@ class UnderscoreValidationError(Exception):
785
785
 
786
786
  OperationSupplier = Callable[[str], "UnderscoreOperation"]
787
787
 
788
- by_aggregate_fns = ("max_by", "min_by", "max_by_n", "min_by_n")
788
+ by_aggregate_fns = ("max_by", "min_by", "max_by_n", "min_by_n", "approx_top_k")
789
+ # `approx_top_k` is in `by_aggregate_fns` because it can be invoked with a `by=` operand
790
+ # (lifted into a positional operand at parse time). Without `by=`, it has only a single
791
+ # positional operand and is treated as a regular has-many aggregation.
792
+ _always_by_aggregate_fns = frozenset(("max_by", "min_by", "max_by_n", "min_by_n"))
789
793
 
790
794
 
791
795
  @dataclasses.dataclass(frozen=True, kw_only=True)
@@ -828,10 +832,24 @@ class UnderscoreOperationExpression(UnderscoreValue):
828
832
  f"Expected kwargs to be a 'FrozenOrderedSet', but is actually a '{type(self.kwargs).__name__}'."
829
833
  )
830
834
 
835
+ def is_by_aggregate_invocation(self) -> bool:
836
+ """True iff this operation is invoked with a `by=` (sort) operand.
837
+
838
+ `max_by`/`min_by`/`max_by_n`/`min_by_n` always require a `by=` operand. `approx_top_k`
839
+ accepts an optional `by=` kwarg that the parser lifts into a 2nd positional operand;
840
+ when called without `by=`, it has only one positional operand (the has-many column)
841
+ and behaves like a normal aggregation.
842
+ """
843
+ if self.operation_name in _always_by_aggregate_fns:
844
+ return True
845
+ if self.operation_name == "approx_top_k":
846
+ return len(self.operands.positional_items) >= 2
847
+ return False
848
+
831
849
  @cached_property
832
850
  def has_aggregate_input(self):
833
851
  # Used to parse max_by and min_by aggregations
834
- if self.operation_name in by_aggregate_fns and len(self.operands.positional_items) not in (2, 3): # max/min_by
852
+ if not self.is_by_aggregate_invocation() or len(self.operands.positional_items) not in (2, 3):
835
853
  raise ValueError(
836
854
  f"Expected 2 or 3 operands while converting expression to DataFrame filter; got: {self.operands.pprint()}"
837
855
  )
@@ -886,8 +904,10 @@ class UnderscoreOperationExpression(UnderscoreValue):
886
904
 
887
905
  @cached_property
888
906
  def root_namespace(self) -> str | None:
889
- if self.operation_name in by_aggregate_fns:
890
- # These functions are special
907
+ if self.is_by_aggregate_invocation():
908
+ # `by=` aggregations span two namespaces (the has-many's parent and the has-many's
909
+ # child, where the `by` column lives). Pick the first non-pseudonamespace as the
910
+ # canonical root rather than rejecting the multi-namespace case.
891
911
  special_namespaces = [
892
912
  op.root_namespace
893
913
  for _, op in MaybeNamedCollection.enumerate(self.operands)
@@ -936,7 +956,7 @@ class UnderscoreOperationExpression(UnderscoreValue):
936
956
  @cached_property
937
957
  def inputs(self) -> tuple[UnderscoreInputFeatureType, ...]:
938
958
  all_inputs: list[UnderscoreInputFeatureType] = []
939
- if self.operation_name in by_aggregate_fns:
959
+ if self.is_by_aggregate_invocation():
940
960
  if isinstance(self.operands.positional_items[0], UnderscoreValue):
941
961
  all_inputs.append(self.has_aggregate_input)
942
962
  return tuple(all_inputs)
@@ -167,9 +167,14 @@ class Incrementalizer(QueryRewriter):
167
167
  sqlglot.column(settings.incremental_column)
168
168
  < sqlglot_expressions.Placeholder(this=END_TS_PARAM)
169
169
  )
170
+ params = {**chalk_query.params}
171
+ if apply_lower_bound:
172
+ params[START_TS_PARAM] = start_ts
173
+ if apply_upper_bound:
174
+ params[END_TS_PARAM] = end_ts
170
175
  return FinalizedChalkQuery(
171
176
  query=text(sqlglot_query.sql(dialect=chalk_query.source.get_sqlglot_dialect())),
172
- params={**chalk_query.params, START_TS_PARAM: start_ts, END_TS_PARAM: end_ts},
177
+ params=params,
173
178
  finalizer=chalk_query.finalizer,
174
179
  incremental_settings=None,
175
180
  source=chalk_query.source,
@@ -19,7 +19,7 @@ from chalk.features.dataframe._validation import (
19
19
  )
20
20
  from chalk.utils.collections import FrozenOrderedSet, OrderedSet, unwrap_optional
21
21
  from chalk.utils.df_utils import pa_cast, pa_table_to_pl_df
22
- from chalk.utils.pl_helpers import is_new_polars, pl_is_uniquable_on
22
+ from chalk.utils.pl_helpers import is_new_polars, pl_is_uniquable_on, str_json_decode_compat
23
23
  from chalk.utils.tracing import safe_trace
24
24
 
25
25
  from chalkruntime.constants import (
@@ -1660,7 +1660,7 @@ def _validate_df_schema(underlying: Union[pl.DataFrame, pl.LazyFrame], graph: Re
1660
1660
  elif (
1661
1661
  isinstance(expected_dtype, pl.List) and actual_dtype == pl.Utf8 # pyright: ignore[reportUnnecessaryComparison]
1662
1662
  ):
1663
- col = pl.col(root_fqn).str.json_extract(expected_dtype)
1663
+ col = str_json_decode_compat(pl.col(root_fqn), expected_dtype)
1664
1664
  try:
1665
1665
  underlying = underlying.with_columns(col.cast(expected_dtype))
1666
1666
  except (Exception, PolarsPanicErrorCompat) as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chalkruntime
3
- Version: 3.34.2
3
+ Version: 3.34.4
4
4
  Summary: Runtime support library for Chalk AI
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -44,7 +44,7 @@ name = "chalkruntime"
44
44
  description = "Runtime support library for Chalk AI"
45
45
  readme = "README.md"
46
46
  requires-python = ">=3.10"
47
- version = "3.34.2"
47
+ version = "3.34.4"
48
48
 
49
49
 
50
50
  [tool.deptry]
File without changes
File without changes
File without changes