chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
  10. chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
  11. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
  12. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  13. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  14. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  15. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  18. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  19. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  20. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  21. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  22. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  23. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  26. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  27. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  28. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  29. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  32. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  33. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  34. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  35. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  38. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  39. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  40. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  41. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  42. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  43. chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
  44. chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
  45. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
  46. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
  47. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  48. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  49. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  50. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  53. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
  54. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
  57. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  58. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  61. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
  62. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
  65. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  66. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  69. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  70. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  71. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  72. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  73. chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
  74. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
  75. chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
  76. chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
  77. chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
  78. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  79. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  80. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  81. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  82. chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
  83. chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
  84. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
  85. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
  86. chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
  87. chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
  88. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  89. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  90. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  91. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  92. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  93. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  94. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  95. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  96. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  97. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  98. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  99. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  100. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  101. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  102. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  103. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  104. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  105. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  106. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  107. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  108. chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
  109. chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
  110. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
  111. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
  112. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  113. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  114. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  115. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  116. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
  117. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
  118. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  119. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  120. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  121. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  122. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  123. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  124. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  125. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  126. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  127. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  128. chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
  129. chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
  130. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
  131. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
  132. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  133. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  134. chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
  135. chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
  136. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  137. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  138. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  139. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  140. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
  141. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
  142. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  143. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  144. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  145. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  146. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  147. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  148. chalk/_lsp/error_builder.py +11 -0
  149. chalk/_version.py +1 -1
  150. chalk/client/client.py +128 -43
  151. chalk/client/client_async.py +149 -0
  152. chalk/client/client_async_impl.py +22 -0
  153. chalk/client/client_grpc.py +539 -104
  154. chalk/client/client_impl.py +449 -122
  155. chalk/client/dataset.py +7 -1
  156. chalk/client/models.py +98 -0
  157. chalk/client/serialization/model_serialization.py +92 -9
  158. chalk/df/LazyFramePlaceholder.py +1154 -0
  159. chalk/features/_class_property.py +7 -0
  160. chalk/features/_embedding/embedding.py +1 -0
  161. chalk/features/_encoding/converter.py +83 -2
  162. chalk/features/feature_field.py +40 -30
  163. chalk/features/feature_set_decorator.py +1 -0
  164. chalk/features/feature_wrapper.py +42 -3
  165. chalk/features/hooks.py +81 -10
  166. chalk/features/inference.py +33 -31
  167. chalk/features/resolver.py +224 -24
  168. chalk/functions/__init__.py +65 -3
  169. chalk/gitignore/gitignore_parser.py +5 -1
  170. chalk/importer.py +142 -68
  171. chalk/ml/__init__.py +2 -0
  172. chalk/ml/model_hooks.py +194 -26
  173. chalk/ml/model_reference.py +56 -8
  174. chalk/ml/model_version.py +24 -15
  175. chalk/ml/utils.py +20 -17
  176. chalk/operators/_utils.py +10 -3
  177. chalk/parsed/_proto/export.py +22 -0
  178. chalk/parsed/duplicate_input_gql.py +3 -0
  179. chalk/parsed/json_conversions.py +20 -14
  180. chalk/parsed/to_proto.py +16 -4
  181. chalk/parsed/user_types_to_json.py +31 -10
  182. chalk/parsed/validation_from_registries.py +182 -0
  183. chalk/queries/named_query.py +16 -6
  184. chalk/queries/scheduled_query.py +9 -1
  185. chalk/serialization/parsed_annotation.py +24 -11
  186. chalk/sql/__init__.py +18 -0
  187. chalk/sql/_internal/integrations/databricks.py +55 -17
  188. chalk/sql/_internal/integrations/mssql.py +127 -62
  189. chalk/sql/_internal/integrations/redshift.py +4 -0
  190. chalk/sql/_internal/sql_file_resolver.py +53 -9
  191. chalk/sql/_internal/sql_source.py +35 -2
  192. chalk/streams/_kafka_source.py +5 -1
  193. chalk/streams/_windows.py +15 -2
  194. chalk/utils/_otel_version.py +13 -0
  195. chalk/utils/async_helpers.py +2 -2
  196. chalk/utils/missing_dependency.py +5 -4
  197. chalk/utils/tracing.py +185 -95
  198. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
  199. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
  200. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  201. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  202. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -12,12 +12,11 @@ from chalk.utils.collections import ensure_tuple
12
12
 
13
13
 
14
14
  def build_inference_function(
15
- model_version: ModelVersion, pkey: Feature, output_feature: Optional[Feature] = None, text_model: bool = False
15
+ model_version: ModelVersion, pkey: Feature, output_features: Optional[Feature | list[Feature]] = None
16
16
  ) -> Callable[[DataFrame], DataFrame]:
17
17
  """Build the core inference function that takes a DataFrame and returns predictions.
18
18
 
19
- This is the function that gets called by the inference resolver. It excludes the primary key
20
- from the input, makes predictions, and optionally adds them back to the DataFrame.
19
+ Uses ModelInference.prepare_input() and extract_output() for model-specific logic.
21
20
 
22
21
  Parameters
23
22
  ----------
@@ -25,40 +24,49 @@ def build_inference_function(
25
24
  The model version to use for prediction
26
25
  pkey
27
26
  The primary key feature to exclude from predictions
28
- output_feature
29
- Optional output feature to add predictions to the DataFrame. If None, just returns raw predictions.
27
+ output_features
28
+ Optional output feature(s) to add predictions to the DataFrame.
29
+ Can be a single Feature or a list of Features for multi-output models.
30
30
 
31
31
  Returns
32
32
  -------
33
33
  Callable[[DataFrame], DataFrame]
34
34
  Function that takes a DataFrame and returns predictions
35
35
  """
36
+ # For all other models, use the ModelInference prepare_input/extract_output methods
36
37
  pkey_string = str(pkey)
37
- if text_model:
38
- # For text models, we expect a single column of string data
39
- def fn(inp: DataFrame):
40
- texts = inp[[c for c in inp.columns if c != pkey_string]].to_pyarrow().column(0).to_numpy()
41
- result = model_version.predict(texts)
42
- if output_feature is not None:
43
- return inp[pkey_string].with_columns({output_feature: result})
44
- return result
45
-
46
- return fn
47
38
 
48
39
  def fn(inp: DataFrame):
49
- # Convert features to PyArrow table, excluding primary key
50
- arr = inp[[c for c in inp.columns if c != pkey_string]].to_pyarrow().__array__()
40
+ # Get features (excluding primary key) as PyArrow table
41
+ feature_table = inp[[c for c in inp.columns if c != pkey_string]].to_pyarrow()
42
+
43
+ # Use model-specific input preparation (default: __array__(), ONNX: struct array)
44
+ model_input = model_version.predictor.prepare_input(feature_table)
45
+
46
+ # Run prediction
47
+ result = model_version.predict(model_input)
48
+
49
+ if output_features is not None:
50
+ # Normalize to list for uniform processing
51
+ features_list = output_features if isinstance(output_features, list) else [output_features]
52
+
53
+ # Extract output for each feature and build columns dict
54
+ columns_dict = {}
55
+ for output_feature in features_list:
56
+ # Use model-specific output extraction (default: identity, ONNX: extract field)
57
+ output_feature_name = str(output_feature).split(".")[-1]
58
+ result_data = model_version.predictor.extract_output(result, output_feature_name)
59
+ columns_dict[output_feature] = result_data
60
+
61
+ return inp[pkey_string].with_columns(columns_dict)
51
62
 
52
- result = model_version.predict(arr)
53
- if output_feature is not None:
54
- return inp[pkey_string].with_columns({output_feature: result})
55
63
  return result
56
64
 
57
65
  return fn
58
66
 
59
67
 
60
68
  def generate_inference_resolver(
61
- inputs: list[Underscore], model_version: ModelVersion, resource_hint: Optional[ResourceHint] = None
69
+ inputs: list[Underscore] | Underscore, model_version: ModelVersion, resource_hint: Optional[ResourceHint] = None
62
70
  ) -> Feature:
63
71
  output_feature = Feature()
64
72
  previous_hook = output_feature.hook
@@ -73,21 +81,15 @@ def generate_inference_resolver(
73
81
 
74
82
  def resolver_factory():
75
83
  # Use the extracted build_inference_function
76
-
77
84
  cleaned_inputs = []
78
- for i in inputs:
85
+ inputs_list = inputs if isinstance(inputs, list) else [inputs]
86
+ for i in inputs_list:
79
87
  try:
80
88
  cleaned_inputs.append(Feature.from_root_fqn(output_feature.namespace + str(i)[1:]))
81
89
  except Exception as e:
82
90
  raise ValueError(f"Could not find feature for input {i}: {e}")
83
91
 
84
- try:
85
- text_model = len(cleaned_inputs) == 1 and cleaned_inputs[0].typ.parsed_annotation is str
86
- except Exception:
87
- # Fallback in case of any issues determining the type
88
- text_model = False
89
-
90
- fn = build_inference_function(model_version, pkey, output_feature, text_model=text_model)
92
+ fn = build_inference_function(model_version, pkey, output_feature)
91
93
 
92
94
  identifier = model_version.identifier or ""
93
95
  model_reference = MODEL_REFERENCE_REGISTRY.get((model_version.name, identifier), None)
@@ -111,7 +113,7 @@ def generate_inference_resolver(
111
113
  when=None,
112
114
  tags=None,
113
115
  owner=None,
114
- resource_hint=resource_hint,
116
+ resource_hint=resource_hint or model_version.resource_hint,
115
117
  data_sources=None,
116
118
  is_sql_file_resolver=False,
117
119
  source_line=None,
@@ -70,6 +70,7 @@ from pydantic import BaseModel
70
70
 
71
71
  from chalk._lsp._class_finder import get_function_caller_info
72
72
  from chalk._lsp.error_builder import FunctionCallErrorBuilder, ResolverErrorBuilder, get_resolver_error_builder
73
+ from chalk.df.LazyFramePlaceholder import LazyFramePlaceholder
73
74
  from chalk.features._encoding.protobuf import (
74
75
  convert_proto_message_type_to_pyarrow_type,
75
76
  serialize_message_file_descriptor,
@@ -114,6 +115,7 @@ if TYPE_CHECKING:
114
115
 
115
116
  from chalk.features import Underscore
116
117
  from chalk.features.underscore import UnderscoreAttr, UnderscoreCall, UnderscoreCast, UnderscoreFunction
118
+ from chalk.ml.model_version import ModelVersion
117
119
  from chalk.sql import BaseSQLSourceProtocol, SQLSourceGroup
118
120
  from chalk.sql._internal.sql_settings import SQLResolverSettings
119
121
  from chalk.sql._internal.sql_source import BaseSQLSource
@@ -555,28 +557,18 @@ class ResolverRegistry:
555
557
  short_name = resolver.name
556
558
  if short_name in self._short_name_to_resolver:
557
559
  if not override and not notebook.is_notebook():
558
- if resolver.fqn == self._short_name_to_resolver[short_name]:
559
- # Same resolver was redefined
560
- resolver.lsp_builder.add_diagnostic(
561
- message=f"Duplicate resolver '{resolver.fqn}'. Multiple resolvers cannot have the same name.",
562
- code="71",
563
- label="duplicate name",
564
- range=resolver.lsp_builder.function_name(),
565
- raise_error=ValueError,
566
- )
567
- else:
568
- # Same short name was reused
569
- resolver.lsp_builder.add_diagnostic(
570
- message=(
571
- f"Another resolver with the same function name '{resolver.name}' in module "
572
- f"'{self._short_name_to_resolver[short_name].__module__}' exists. "
573
- f"Resolver function names must be unique. Please rename this resolver in module '{resolver.__module__}'."
574
- ),
575
- label="duplicate resolver shortname",
576
- code="71",
577
- range=resolver.lsp_builder.function_name(),
578
- raise_error=None,
579
- )
560
+ # Same short name was reused
561
+ resolver.lsp_builder.add_diagnostic(
562
+ message=(
563
+ f"Another resolver with the same function name '{resolver.name}' in module "
564
+ f"'{self._short_name_to_resolver[short_name].__module__}' exists. "
565
+ f"Resolver function names must be unique. Please rename this resolver in module '{resolver.__module__}'."
566
+ ),
567
+ label="duplicate resolver shortname",
568
+ code="71",
569
+ range=resolver.lsp_builder.function_name(),
570
+ raise_error=None,
571
+ )
580
572
  return
581
573
  existing_resolver = self._short_name_to_resolver[short_name]
582
574
  # Need to remove the resolver from the typed registry
@@ -649,6 +641,7 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
649
641
  output_row_order: Literal["one-to-one"] | None = None,
650
642
  venv: str | None = None,
651
643
  name: None = None, # deprecated
644
+ postprocessing: Underscore | None = None,
652
645
  ):
653
646
  self._function_definition = ... if function_definition is None else function_definition
654
647
  self._function_captured_globals = ... if function_captured_globals is None else function_captured_globals
@@ -692,6 +685,7 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
692
685
  self._data_lineage = data_lineage
693
686
  self._sql_settings = sql_settings
694
687
  self.output_row_order = output_row_order
688
+ self.postprocessing = postprocessing
695
689
  super().__init__()
696
690
 
697
691
  @property
@@ -849,14 +843,20 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
849
843
  annotation = None
850
844
 
851
845
  if annotation is not None:
852
- if not isinstance(val, DataFrame):
846
+ if self.static and type(val).__name__ == "DataFrame" and type(val).__module__ == "chalkdf.dataframe":
847
+ # No need to wrap this class in DataFrame.
848
+ pass
849
+ elif self.static and isinstance(val, LazyFramePlaceholder):
850
+ # No need to wrap this class in DataFrame.
851
+ pass
852
+ elif not isinstance(val, DataFrame):
853
853
  val = DataFrame(val)
854
854
 
855
855
  if time_is_frozen():
856
856
  frozen_filter = Filter(lhs=CHALK_TS_FEATURE, operation="<=", rhs=TimeDelta(hours_ago=0))
857
857
  annotation.filters = (frozen_filter, *annotation.filters)
858
858
 
859
- if annotation.filters and len(annotation.filters) > 0:
859
+ if annotation.filters and len(annotation.filters) > 0 and not isinstance(val, LazyFramePlaceholder):
860
860
  try:
861
861
  val = val[annotation.filters]
862
862
  val._materialize() # pyright: ignore[reportPrivateUsage]
@@ -2829,6 +2829,8 @@ class StreamResolver(Resolver[P, T]):
2829
2829
  sql_settings: SQLResolverSettings | None,
2830
2830
  feature_expressions: dict[Feature, Underscore] | None,
2831
2831
  message_producer_parsed: StreamResolverMessageProducerParsed | None,
2832
+ skip_online: bool = False,
2833
+ skip_offline: bool = False,
2832
2834
  ):
2833
2835
  super().__init__(
2834
2836
  function_definition=function_definition,
@@ -2897,6 +2899,8 @@ class StreamResolver(Resolver[P, T]):
2897
2899
 
2898
2900
  self.feature_expressions: dict[Feature, Underscore] | None = feature_expressions
2899
2901
  self.message_producer_parsed: StreamResolverMessageProducerParsed | None = message_producer_parsed
2902
+ self.skip_online = skip_online
2903
+ self.skip_offline = skip_offline
2900
2904
 
2901
2905
  @property
2902
2906
  def output_features(self) -> Sequence[Feature]:
@@ -3808,6 +3812,8 @@ def make_stream_resolver(
3808
3812
  owner: Optional[str] = None,
3809
3813
  doc: str | None = None,
3810
3814
  sink: Sink | None = None,
3815
+ skip_online: bool = False,
3816
+ skip_offline: bool = False,
3811
3817
  ) -> StreamResolver:
3812
3818
  """Constructs a streaming resolver that, instead of a Python function,
3813
3819
  defines its output features as column projections on an input message.
@@ -3840,6 +3846,14 @@ def make_stream_resolver(
3840
3846
  sink
3841
3847
  An optional message producer configuration that specifies where to send messages.
3842
3848
  Read more at https://docs.chalk.ai/api-docs#Sink
3849
+ skip_online
3850
+ If True, skip online persistence (no writes to Redis/DynamoDB/etc).
3851
+ Results will still be processed but not stored in online stores.
3852
+ Note: Only applies to native streaming. Default: False
3853
+ skip_offline
3854
+ If True, skip offline persistence (no result bus publishing for offline storage).
3855
+ Results will still be processed but not stored in offline stores (S3/BigQuery/etc).
3856
+ Note: Only applies to native streaming. Default: False
3843
3857
 
3844
3858
  Returns
3845
3859
  -------
@@ -4017,6 +4031,8 @@ def make_stream_resolver(
4017
4031
  sql_settings=None,
4018
4032
  feature_expressions={unwrap_feature(x): u for x, u in output_features.items()},
4019
4033
  message_producer_parsed=message_producer_parsed,
4034
+ skip_online=skip_online,
4035
+ skip_offline=skip_offline,
4020
4036
  )
4021
4037
  resolver.add_to_registry(override=False)
4022
4038
  return resolver
@@ -4307,6 +4323,9 @@ def validate_field_chain(
4307
4323
 
4308
4324
  # Base case: if parent is UnderscoreRoot (_), validate field against current_type
4309
4325
  if isinstance(underscore_attr._chalk__parent, UnderscoreRoot):
4326
+ if underscore_attr._chalk__attr == "chalk_now":
4327
+ return datetime
4328
+
4310
4329
  # Check if current_type allows field access
4311
4330
  if current_type in (str, bytes):
4312
4331
  error_builder.add_diagnostic(
@@ -4476,3 +4495,184 @@ def validate_message_attributes(
4476
4495
  message_type = message_type.__args__[0] # pyright: ignore[reportAttributeAccessIssue]
4477
4496
  for expression in expressions:
4478
4497
  validate_underscore_expression(expression, message_type, error_builder, name)
4498
+
4499
+
4500
+ def make_model_resolver(
4501
+ name: str,
4502
+ model: "ModelVersion",
4503
+ inputs: Dict[Feature, str] | List[Feature],
4504
+ output: Feature | List[Feature] | Dict[Feature, str],
4505
+ feature_class: Optional[type[Features]] = None,
4506
+ resource_group: Optional[str] = None,
4507
+ resource_hint: Optional[ResourceHint] = None,
4508
+ ) -> OnlineResolver:
4509
+ """
4510
+ Create an online resolver that runs inference on a model.
4511
+
4512
+ This function provides an imperative API for creating model inference resolvers,
4513
+ as an alternative to using F.inference in feature definitions. It uses the same
4514
+ underlying implementation as F.inference but allows you to create resolvers
4515
+ programmatically.
4516
+
4517
+ Parameters
4518
+ ----------
4519
+ name
4520
+ The name of the resolver
4521
+ model
4522
+ A ModelVersion reference to a deployed model
4523
+ inputs
4524
+ Either a dict mapping Feature objects to model input names (strings), or a list of
4525
+ Feature objects. If a dict, the values represent the model's expected input names
4526
+ (for future use). If a list, the features will be passed as a single DataFrame to
4527
+ the model.
4528
+ output
4529
+ The output feature(s) that will contain the predictions.
4530
+ Can be a single Feature, a list of Features, or a dict mapping Feature objects to
4531
+ model output names (strings) for future use with multi-output models.
4532
+ feature_class
4533
+ Optional feature class to use. If not provided, will be inferred from the inputs.
4534
+ resource_group
4535
+ Optional resource group for the resolver
4536
+ resource_hint
4537
+ Optional resource hint for execution (e.g., CPU/GPU preferences)
4538
+
4539
+ Returns
4540
+ -------
4541
+ OnlineResolver
4542
+ The created resolver
4543
+
4544
+ Examples
4545
+ --------
4546
+ >>> from chalk.features import features, feature
4547
+ >>> from chalk.features.resolver import make_model_resolver
4548
+ >>> from chalk.ml import ModelVersion
4549
+ >>>
4550
+ >>> @features
4551
+ ... class User:
4552
+ ... id: str = feature(primary=True)
4553
+ ... age: float
4554
+ ... income: float
4555
+ ... risk_score: float
4556
+ ... credit_score: float
4557
+ >>>
4558
+ >>> # Create a model version reference
4559
+ >>> model = ModelVersion(
4560
+ ... name="risk_model",
4561
+ ... version=1,
4562
+ ... model_type="sklearn",
4563
+ ... model_encoding="pickle",
4564
+ ... filename="model.pkl"
4565
+ ... )
4566
+ >>>
4567
+ >>> # Create resolver with single output
4568
+ >>> resolver = make_model_resolver(
4569
+ ... name="risk_model",
4570
+ ... model=model,
4571
+ ... inputs=[User.age, User.income],
4572
+ ... output=User.risk_score,
4573
+ ... )
4574
+ >>>
4575
+ >>> # Create resolver with multiple outputs (list)
4576
+ >>> resolver = make_model_resolver(
4577
+ ... name="multi_output_model",
4578
+ ... model=model,
4579
+ ... inputs=[User.age, User.income],
4580
+ ... output=[User.risk_score, User.credit_score],
4581
+ ... )
4582
+ >>>
4583
+ >>> # Create resolver with named inputs and outputs (dict)
4584
+ >>> resolver = make_model_resolver(
4585
+ ... name="named_model",
4586
+ ... model=model,
4587
+ ... inputs={User.age: "age_input", User.income: "income_input"},
4588
+ ... output={User.risk_score: "risk_output", User.credit_score: "credit_output"},
4589
+ ... )
4590
+ """
4591
+ from chalk.features.inference import build_inference_function
4592
+
4593
+ if isinstance(inputs, dict):
4594
+ input_features_raw = list(inputs.keys())
4595
+ else:
4596
+ input_features_raw = inputs
4597
+
4598
+ input_features = [unwrap_feature(f) for f in input_features_raw]
4599
+
4600
+ if isinstance(output, dict):
4601
+ output_features = [unwrap_feature(f) for f in output.keys()]
4602
+ elif isinstance(output, list):
4603
+ output_features = [unwrap_feature(f) for f in output]
4604
+ else:
4605
+ output_features = [unwrap_feature(output)]
4606
+
4607
+ # If feature_class is not provided, try to infer it from the first input feature
4608
+ if feature_class is None:
4609
+ if not input_features:
4610
+ raise ValueError("Cannot infer feature class: no input features provided and feature_class not specified")
4611
+
4612
+ first_input = input_features[0]
4613
+
4614
+ if hasattr(first_input, "features_cls") and first_input.features_cls is not None:
4615
+ feature_class = first_input.features_cls
4616
+ else:
4617
+ raise ValueError(
4618
+ "Cannot infer feature class from inputs. Please provide feature_class parameter explicitly."
4619
+ )
4620
+
4621
+ pkey = feature_class.__chalk_primary__
4622
+ if pkey is None:
4623
+ raise ValueError(f"Feature class {feature_class} does not have a primary key defined")
4624
+
4625
+ first_output = output_features[0]
4626
+
4627
+ output_namespace = (
4628
+ first_output.namespace
4629
+ if hasattr(first_output, "namespace") and first_output.namespace
4630
+ else feature_class.__name__.lower()
4631
+ )
4632
+
4633
+ # Use the same underlying inference function as F.inference
4634
+ # Pass list of outputs if multiple, single if only one
4635
+ output_for_inference = output_features if len(output_features) > 1 else output_features[0]
4636
+ inference_fn = build_inference_function(model, pkey, output_for_inference)
4637
+
4638
+ if len(output_features) == 1:
4639
+ output_names = output_features[0].name
4640
+ else:
4641
+ output_names = "_".join(f.name for f in output_features)
4642
+
4643
+ resolver = OnlineResolver(
4644
+ function_definition="",
4645
+ filename="",
4646
+ fqn=f"{name}__{output_namespace}_{output_names}",
4647
+ doc=None,
4648
+ inputs=[DataFrame[[pkey, *ensure_tuple(input_features)]]],
4649
+ state=None,
4650
+ output=Features[DataFrame[tuple([*output_features, pkey])]], # type: ignore[misc]
4651
+ fn=inference_fn,
4652
+ environment=None,
4653
+ machine_type=None,
4654
+ default_args=[None],
4655
+ timeout=None,
4656
+ cron=None,
4657
+ when=None,
4658
+ tags=None,
4659
+ owner=None,
4660
+ resource_hint=resource_hint or model.resource_hint,
4661
+ data_sources=None,
4662
+ is_sql_file_resolver=False,
4663
+ source_line=None,
4664
+ lsp_builder=get_resolver_error_builder(inference_fn),
4665
+ parse=None,
4666
+ static=False,
4667
+ total=False,
4668
+ autogenerated=False,
4669
+ unique_on=None,
4670
+ partitioned_by=None,
4671
+ data_lineage=None,
4672
+ sql_settings=None,
4673
+ )
4674
+
4675
+ # Register the resolver
4676
+ RESOLVER_REGISTRY.add_to_registry(resolver, override=False)
4677
+
4678
+ return resolver
@@ -1635,6 +1635,69 @@ def sagemaker_predict(
1635
1635
  )
1636
1636
 
1637
1637
 
1638
+ def openai_complete(
1639
+ api_key: Underscore | str,
1640
+ prompt: Underscore | str,
1641
+ model: Underscore | str,
1642
+ max_tokens: Underscore | int,
1643
+ temperature: Underscore | float,
1644
+ ):
1645
+ """
1646
+ Makes a completion request to OpenAI's chat API and returns the response.
1647
+
1648
+ This is a blocking expression that calls OpenAI's API during feature computation.
1649
+ The response includes the completion text along with token usage statistics.
1650
+
1651
+ Parameters
1652
+ ----------
1653
+ api_key
1654
+ The OpenAI API key to use for authentication.
1655
+ prompt
1656
+ The prompt text to send to the model.
1657
+ model
1658
+ The OpenAI model to use (e.g., "gpt-4", "gpt-3.5-turbo").
1659
+ max_tokens
1660
+ The maximum number of tokens to generate in the completion.
1661
+ temperature
1662
+ The sampling temperature to use, between 0 and 2. Higher values make
1663
+ output more random, lower values make it more deterministic.
1664
+
1665
+ Returns
1666
+ -------
1667
+ A struct containing:
1668
+ - completion: The generated text response
1669
+ - prompt_tokens: Number of tokens in the prompt
1670
+ - completion_tokens: Number of tokens in the completion
1671
+ - total_tokens: Total tokens used (prompt + completion)
1672
+ - model: The model used for the completion
1673
+ - finish_reason: Why the completion stopped (e.g., "stop", "length")
1674
+
1675
+ Examples
1676
+ --------
1677
+ >>> import chalk.functions as F
1678
+ >>> from chalk.features import _, features
1679
+ >>> @features
1680
+ ... class Document:
1681
+ ... id: str
1682
+ ... content: str
1683
+ ... summary: str = F.openai_complete(
1684
+ ... api_key="sk-...",
1685
+ ... prompt=_.content,
1686
+ ... model="gpt-4",
1687
+ ... max_tokens=100,
1688
+ ... temperature=0.7,
1689
+ ... ).completion
1690
+ """
1691
+ return UnderscoreFunction(
1692
+ "openai_complete",
1693
+ api_key,
1694
+ prompt,
1695
+ model,
1696
+ max_tokens,
1697
+ temperature,
1698
+ )
1699
+
1700
+
1638
1701
  def json_value(expr: Underscore, path: Union[str, Underscore]):
1639
1702
  """
1640
1703
  Extract structured data from a JSON string feature using a JSONPath expression.
@@ -3884,7 +3947,6 @@ def array_filter(
3884
3947
  ... id: str
3885
3948
  ... recent_activities: list[float]
3886
3949
  ... average_activity: float
3887
- ...
3888
3950
  ... recent_high_value_activities: list[float] = F.array_filter(
3889
3951
  ... _.recent_activities,
3890
3952
  ... lambda amount: amount > _.average_activity,
@@ -3990,7 +4052,6 @@ def array_reduce(
3990
4052
  ... class User:
3991
4053
  ... id: str
3992
4054
  ... scores: list[int]
3993
- ...
3994
4055
  ... total_score: int = F.array_reduce(
3995
4056
  ... arr=_.scores,
3996
4057
  ... initial_value=0,
@@ -4671,7 +4732,7 @@ def nth_bucket_end(value: Underscore, bucket_duration: str, n: int, initial_buck
4671
4732
 
4672
4733
 
4673
4734
  def inference(
4674
- model: ModelVersion, inputs: list[Underscore | Any], resource_hint: ResourceHint | None = None
4735
+ model: ModelVersion, inputs: list[Underscore | Any] | Underscore, resource_hint: ResourceHint | None = None
4675
4736
  ) -> Underscore | Feature:
4676
4737
  """
4677
4738
  Run inference on a deployed ML model.
@@ -5917,6 +5978,7 @@ __all__ = (
5917
5978
  "normal_cdf",
5918
5979
  "nth_bucket_end",
5919
5980
  "nth_bucket_start",
5981
+ "openai_complete",
5920
5982
  "parse_datetime",
5921
5983
  "partial_ratio",
5922
5984
  "pi",
@@ -114,6 +114,10 @@ def _rule_from_pattern(pattern: str, base_path: Optional[Path] = None, source: O
114
114
  regex = _fnmatch_pathname_to_regex(pattern, directory_only)
115
115
  if anchored:
116
116
  regex = f"^{regex}"
117
+ else:
118
+ # For non-anchored patterns, match at path component boundaries
119
+ # (start of string or after a path separator)
120
+ regex = f"(^|/){regex}"
117
121
  regex = f"(?ms){regex}"
118
122
  return IgnoreRule(
119
123
  pattern=orig_pattern,
@@ -215,6 +219,6 @@ def _fnmatch_pathname_to_regex(pattern: str, directory_only: bool):
215
219
  if directory_only:
216
220
  res.append(r"/.*$")
217
221
  else:
218
- res.append("(/.*|[^/]*)$")
222
+ res.append("(/.*)?$")
219
223
 
220
224
  return "".join(res)