chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
  10. chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
  11. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
  12. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  13. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  14. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  15. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  18. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  19. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  20. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  21. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  22. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  23. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  26. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  27. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  28. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  29. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  32. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  33. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  34. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  35. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  38. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  39. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  40. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  41. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  42. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  43. chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
  44. chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
  45. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
  46. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
  47. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  48. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  49. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  50. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  53. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
  54. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
  57. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  58. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  61. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
  62. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
  65. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  66. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  69. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  70. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  71. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  72. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  73. chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
  74. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
  75. chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
  76. chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
  77. chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
  78. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  79. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  80. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  81. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  82. chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
  83. chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
  84. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
  85. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
  86. chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
  87. chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
  88. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  89. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  90. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  91. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  92. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  93. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  94. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  95. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  96. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  97. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  98. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  99. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  100. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  101. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  102. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  103. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  104. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  105. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  106. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  107. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  108. chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
  109. chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
  110. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
  111. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
  112. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  113. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  114. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  115. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  116. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
  117. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
  118. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  119. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  120. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  121. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  122. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  123. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  124. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  125. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  126. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  127. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  128. chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
  129. chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
  130. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
  131. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
  132. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  133. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  134. chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
  135. chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
  136. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  137. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  138. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  139. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  140. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
  141. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
  142. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  143. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  144. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  145. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  146. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  147. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  148. chalk/_lsp/error_builder.py +11 -0
  149. chalk/_version.py +1 -1
  150. chalk/client/client.py +128 -43
  151. chalk/client/client_async.py +149 -0
  152. chalk/client/client_async_impl.py +22 -0
  153. chalk/client/client_grpc.py +539 -104
  154. chalk/client/client_impl.py +449 -122
  155. chalk/client/dataset.py +7 -1
  156. chalk/client/models.py +98 -0
  157. chalk/client/serialization/model_serialization.py +92 -9
  158. chalk/df/LazyFramePlaceholder.py +1154 -0
  159. chalk/features/_class_property.py +7 -0
  160. chalk/features/_embedding/embedding.py +1 -0
  161. chalk/features/_encoding/converter.py +83 -2
  162. chalk/features/feature_field.py +40 -30
  163. chalk/features/feature_set_decorator.py +1 -0
  164. chalk/features/feature_wrapper.py +42 -3
  165. chalk/features/hooks.py +81 -10
  166. chalk/features/inference.py +33 -31
  167. chalk/features/resolver.py +224 -24
  168. chalk/functions/__init__.py +65 -3
  169. chalk/gitignore/gitignore_parser.py +5 -1
  170. chalk/importer.py +142 -68
  171. chalk/ml/__init__.py +2 -0
  172. chalk/ml/model_hooks.py +194 -26
  173. chalk/ml/model_reference.py +56 -8
  174. chalk/ml/model_version.py +24 -15
  175. chalk/ml/utils.py +20 -17
  176. chalk/operators/_utils.py +10 -3
  177. chalk/parsed/_proto/export.py +22 -0
  178. chalk/parsed/duplicate_input_gql.py +3 -0
  179. chalk/parsed/json_conversions.py +20 -14
  180. chalk/parsed/to_proto.py +16 -4
  181. chalk/parsed/user_types_to_json.py +31 -10
  182. chalk/parsed/validation_from_registries.py +182 -0
  183. chalk/queries/named_query.py +16 -6
  184. chalk/queries/scheduled_query.py +9 -1
  185. chalk/serialization/parsed_annotation.py +24 -11
  186. chalk/sql/__init__.py +18 -0
  187. chalk/sql/_internal/integrations/databricks.py +55 -17
  188. chalk/sql/_internal/integrations/mssql.py +127 -62
  189. chalk/sql/_internal/integrations/redshift.py +4 -0
  190. chalk/sql/_internal/sql_file_resolver.py +53 -9
  191. chalk/sql/_internal/sql_source.py +35 -2
  192. chalk/streams/_kafka_source.py +5 -1
  193. chalk/streams/_windows.py +15 -2
  194. chalk/utils/_otel_version.py +13 -0
  195. chalk/utils/async_helpers.py +2 -2
  196. chalk/utils/missing_dependency.py +5 -4
  197. chalk/utils/tracing.py +185 -95
  198. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
  199. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
  200. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  201. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  202. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import inspect
4
4
  import os
5
5
  from datetime import datetime
6
+ from typing import TYPE_CHECKING
6
7
 
7
8
  from chalk.ml.model_version import ModelVersion
8
9
  from chalk.ml.utils import (
@@ -15,6 +16,9 @@ from chalk.ml.utils import (
15
16
  from chalk.utils.object_inspect import get_source_object_starting
16
17
  from chalk.utils.source_parsing import should_skip_source_code_parsing
17
18
 
19
+ if TYPE_CHECKING:
20
+ from chalk.features.resolver import ResourceHint
21
+
18
22
 
19
23
  class ModelReference:
20
24
  def __init__(
@@ -24,6 +28,8 @@ class ModelReference:
24
28
  version: int | None = None,
25
29
  alias: str | None = None,
26
30
  as_of_date: datetime | None = None,
31
+ resource_hint: "ResourceHint | None" = None,
32
+ resource_group: str | None = None,
27
33
  ):
28
34
  """Specifies the model version that should be loaded into the deployment.
29
35
 
@@ -74,6 +80,8 @@ class ModelReference:
74
80
  self.as_of_date = as_of_date
75
81
  self.alias = alias
76
82
  self.identifier = identifier
83
+ self.resource_hint = resource_hint
84
+ self.resource_group = resource_group
77
85
 
78
86
  self.filename = filename
79
87
  self.source_line_start = source_line_start
@@ -110,6 +118,8 @@ class ModelReference:
110
118
  model_class=ModelClass(model_artifact_metadata.spec.model_class)
111
119
  if model_artifact_metadata.spec.model_class
112
120
  else None,
121
+ resource_hint=resource_hint,
122
+ resource_group=resource_group,
113
123
  )
114
124
 
115
125
  from chalk.features.hooks import before_all
@@ -117,14 +127,22 @@ class ModelReference:
117
127
  def hook():
118
128
  mv.load_model()
119
129
 
120
- before_all(hook)
130
+ before_all(hook, resource_hint=resource_hint, resource_group=resource_group)
121
131
 
122
132
  self.model_version = mv
123
133
  else:
124
- self.model_version = ModelVersion(name=name, identifier=identifier)
134
+ self.model_version = ModelVersion(
135
+ name=name, identifier=identifier, resource_hint=resource_hint, resource_group=resource_group
136
+ )
125
137
 
126
138
  @classmethod
127
- def as_of(cls, name: str, when: datetime) -> ModelVersion:
139
+ def as_of(
140
+ cls,
141
+ name: str,
142
+ when: datetime,
143
+ resource_hint: "ResourceHint | None" = None,
144
+ resource_group: str | None = None,
145
+ ) -> ModelVersion:
128
146
  """Creates a ModelReference for a specific point in time.
129
147
 
130
148
  Parameters
@@ -133,6 +151,11 @@ class ModelReference:
133
151
  The name of the model.
134
152
  when
135
153
  The datetime to use for creating the model version identifier.
154
+ resource_hint
155
+ Whether this model loading is bound by CPU, I/O, or GPU.
156
+ resource_group
157
+ The resource group for the model: this is used to isolate execution
158
+ onto a separate pod (or set of nodes), such as on a GPU-enabled node.
136
159
 
137
160
  Returns
138
161
  -------
@@ -144,13 +167,20 @@ class ModelReference:
144
167
  >>> import datetime
145
168
  >>> timestamp = datetime.datetime(2023, 10, 15, 14, 30, 0)
146
169
  >>> model = ModelReference.as_of("fraud_model", timestamp)
170
+ >>> model = ModelReference.as_of("fraud_model", timestamp, resource_hint="gpu", resource_group="gpu-group")
147
171
  """
148
172
 
149
- mr = ModelReference(name=name, as_of_date=when)
173
+ mr = ModelReference(name=name, as_of_date=when, resource_hint=resource_hint, resource_group=resource_group)
150
174
  return mr.model_version
151
175
 
152
176
  @classmethod
153
- def from_version(cls, name: str, version: int) -> ModelVersion:
177
+ def from_version(
178
+ cls,
179
+ name: str,
180
+ version: int,
181
+ resource_hint: "ResourceHint | None" = None,
182
+ resource_group: str | None = None,
183
+ ) -> ModelVersion:
154
184
  """Creates a ModelReference using a numeric version identifier.
155
185
 
156
186
  Parameters
@@ -159,6 +189,11 @@ class ModelReference:
159
189
  The name of the model.
160
190
  version
161
191
  The version number. Must be a non-negative integer.
192
+ resource_hint
193
+ Whether this model loading is bound by CPU, I/O, or GPU.
194
+ resource_group
195
+ The resource group for the model: this is used to isolate execution
196
+ onto a separate pod (or set of nodes), such as on a GPU-enabled node.
162
197
 
163
198
  Returns
164
199
  -------
@@ -173,15 +208,22 @@ class ModelReference:
173
208
  Examples
174
209
  --------
175
210
  >>> model = ModelReference.from_version("fraud_model", 1)
211
+ >>> model = ModelReference.from_version("fraud_model", 1, resource_hint="gpu", resource_group="gpu-group")
176
212
  """
177
213
  if version < 0:
178
214
  raise ValueError("Version number must be a non-negative integer.")
179
215
 
180
- mr = ModelReference(name=name, version=version)
216
+ mr = ModelReference(name=name, version=version, resource_hint=resource_hint, resource_group=resource_group)
181
217
  return mr.model_version
182
218
 
183
219
  @classmethod
184
- def from_alias(cls, name: str, alias: str) -> ModelVersion:
220
+ def from_alias(
221
+ cls,
222
+ name: str,
223
+ alias: str,
224
+ resource_hint: "ResourceHint | None" = None,
225
+ resource_group: str | None = None,
226
+ ) -> ModelVersion:
185
227
  """Creates a ModelReference using an alias identifier.
186
228
 
187
229
  Parameters
@@ -190,6 +232,11 @@ class ModelReference:
190
232
  The name of the model.
191
233
  alias
192
234
  The alias string. Must be non-empty.
235
+ resource_hint
236
+ Whether this model loading is bound by CPU, I/O, or GPU.
237
+ resource_group
238
+ The resource group for the model: this is used to isolate execution
239
+ onto a separate pod (or set of nodes), such as on a GPU-enabled node.
193
240
 
194
241
  Returns
195
242
  -------
@@ -204,11 +251,12 @@ class ModelReference:
204
251
  Examples
205
252
  --------
206
253
  >>> model = ModelReference.from_alias("fraud_model", "latest")
254
+ >>> model = ModelReference.from_alias("fraud_model", "latest", resource_hint="gpu", resource_group="gpu-group")
207
255
  """
208
256
  if not alias:
209
257
  raise ValueError("Alias must be a non-empty string.")
210
258
 
211
- mr = ModelReference(name=name, alias=alias)
259
+ mr = ModelReference(name=name, alias=alias, resource_hint=resource_hint, resource_group=resource_group)
212
260
  return mr.model_version
213
261
 
214
262
 
chalk/ml/model_version.py CHANGED
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from datetime import datetime
4
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
5
5
 
6
6
  from chalk.ml.model_hooks import MODEL_REGISTRY
7
7
  from chalk.ml.utils import ModelClass, ModelEncoding, ModelType
8
8
 
9
+ if TYPE_CHECKING:
10
+ from chalk.features.resolver import ResourceHint
11
+
9
12
 
10
13
  class ModelVersion:
11
14
  def __init__(
@@ -20,6 +23,8 @@ class ModelVersion:
20
23
  model_encoding: ModelEncoding | None = None,
21
24
  model_class: ModelClass | None = None,
22
25
  filename: str | None = None,
26
+ resource_hint: "ResourceHint | None" = None,
27
+ resource_group: str | None = None,
23
28
  ):
24
29
  """Specifies the model version that should be loaded into the deployment.
25
30
 
@@ -41,6 +46,8 @@ class ModelVersion:
41
46
  self.model_encoding = model_encoding
42
47
  self.model_class = model_class
43
48
  self.filename = filename
49
+ self.resource_hint: "ResourceHint | None" = resource_hint
50
+ self.resource_group = resource_group
44
51
 
45
52
  self._model = None
46
53
  self._predictor = None
@@ -58,26 +65,15 @@ class ModelVersion:
58
65
  model_type=self.model_type, encoding=self.model_encoding, model_class=self.model_class
59
66
  )
60
67
  if model is not None and self.filename is not None:
61
- self._model = model.load_model(self.filename)
68
+ self._model = model.load_model(self.filename, resource_hint=self.resource_hint)
62
69
  else:
63
70
  raise ValueError(
64
71
  f"No load function defined for type {self.model_type}, encoding {self.model_encoding}, and class {self.model_class}"
65
72
  )
66
73
 
67
74
  def predict(self, X: Any):
68
- """Loads the model from the specified filename using the appropriate hook."""
69
-
70
- if self._predictor is None:
71
- if self.model_type is None or self.model_encoding is None:
72
- raise ValueError("Model type and encoding must be specified to use predict.")
73
- self._predictor = MODEL_REGISTRY.get(
74
- model_type=self.model_type, encoding=self.model_encoding, model_class=self.model_class
75
- )
76
- if self._predictor is None:
77
- raise ValueError(
78
- f"No predict function defined for type {self.model_type} and extension {self.model_encoding}"
79
- )
80
- return self._predictor.predict(self.model, X)
75
+ """Runs prediction using the loaded model."""
76
+ return self.predictor.predict(self.model, X)
81
77
 
82
78
  @property
83
79
  def model(self) -> Any:
@@ -86,3 +82,16 @@ class ModelVersion:
86
82
  self.load_model()
87
83
 
88
84
  return self._model
85
+
86
+ @property
87
+ def predictor(self) -> Any:
88
+ """Returns the predictor instance, initializing it if needed."""
89
+ if self._predictor is None:
90
+ if self.model_type is None or self.model_encoding is None:
91
+ raise ValueError("Model type and encoding must be specified to use predictor.")
92
+ self._predictor = MODEL_REGISTRY.get(
93
+ model_type=self.model_type, encoding=self.model_encoding, model_class=self.model_class
94
+ )
95
+ if self._predictor is None:
96
+ raise ValueError(f"No predictor defined for type {self.model_type} and encoding {self.model_encoding}")
97
+ return self._predictor
chalk/ml/utils.py CHANGED
@@ -400,6 +400,26 @@ class ModelAttributeExtractor:
400
400
 
401
401
  @staticmethod
402
402
  def infer_model_type(model: Any) -> Tuple[Optional[ModelType], Optional[ModelClass]]:
403
+ # ONNX - check early since ONNX models are commonly wrapped
404
+ try:
405
+ import onnx # pyright: ignore[reportMissingImports]
406
+
407
+ if isinstance(model, onnx.ModelProto):
408
+ return ModelType.ONNX, None
409
+ # Check if model has a wrapped ONNX ModelProto (e.g., model._model)
410
+ if hasattr(model, "_model") and isinstance(model._model, onnx.ModelProto):
411
+ return ModelType.ONNX, None
412
+ except ImportError:
413
+ pass
414
+
415
+ try:
416
+ import onnxruntime # pyright: ignore[reportMissingImports]
417
+
418
+ if isinstance(model, onnxruntime.InferenceSession):
419
+ return ModelType.ONNX, None
420
+ except ImportError:
421
+ pass
422
+
403
423
  # PYTORCH
404
424
  try:
405
425
  import torch.nn as nn # pyright: ignore[reportMissingImports]
@@ -458,23 +478,6 @@ class ModelAttributeExtractor:
458
478
  except ImportError:
459
479
  pass
460
480
 
461
- # ONNX - do we need this one?
462
- try:
463
- import onnx # pyright: ignore[reportMissingImports]
464
-
465
- if isinstance(model, onnx.ModelProto):
466
- return ModelType.ONNX, None
467
- except ImportError:
468
- pass
469
-
470
- try:
471
- import onnxruntime # pyright: ignore[reportMissingImports]
472
-
473
- if isinstance(model, onnxruntime.InferenceSession):
474
- return ModelType.ONNX, None
475
- except ImportError:
476
- pass
477
-
478
481
  # SKLEARN
479
482
  try:
480
483
  import sklearn.base # pyright: ignore[reportMissingImports]
chalk/operators/_utils.py CHANGED
@@ -9,6 +9,7 @@ import pyarrow
9
9
  from chalk import DataFrame, Features, StaticOperator
10
10
  from chalk._gen.chalk.expression.v1 import expression_pb2 as expr_pb
11
11
  from chalk.client import ChalkError, ChalkException, ErrorCode, ErrorCodeCategory
12
+ from chalk.df.LazyFramePlaceholder import LazyFramePlaceholder
12
13
  from chalk.features.feature_field import Feature
13
14
 
14
15
 
@@ -79,7 +80,7 @@ def static_resolver_to_operator(
79
80
  fn: Callable,
80
81
  inputs: Sequence[Union[Feature, type[DataFrame]]],
81
82
  output: Optional[type[Features]],
82
- ) -> StaticOperator | DfPlaceholder | ChalkDataFrame:
83
+ ) -> StaticOperator | DfPlaceholder | ChalkDataFrame | LazyFramePlaceholder:
83
84
  if output is None:
84
85
  raise _GetStaticOperatorError(
85
86
  resolver_fqn=fqn,
@@ -96,8 +97,14 @@ def static_resolver_to_operator(
96
97
  message="Static resolver must take no arguments and have exactly one DataFrame output",
97
98
  underlying_exception=None,
98
99
  )
100
+
99
101
  try:
100
- placeholder_inputs = [DfPlaceholder(schema_dict=schema_for_input(input_type)) for input_type in inputs]
102
+ placeholder_inputs = [
103
+ LazyFramePlaceholder.named_table(
104
+ name=f"resolver_df_input_{input_index}", schema=pyarrow.schema(schema_for_input(input_type))
105
+ )
106
+ for input_index, input_type in enumerate(inputs)
107
+ ]
101
108
  static_operator = fn(*placeholder_inputs)
102
109
  except Exception as e:
103
110
  # Weird hacky way to return a placeholder even if the resolver fails.
@@ -108,7 +115,7 @@ def static_resolver_to_operator(
108
115
  )
109
116
  else:
110
117
  if (
111
- not isinstance(static_operator, (StaticOperator, DfPlaceholder))
118
+ not isinstance(static_operator, (StaticOperator, DfPlaceholder, LazyFramePlaceholder))
112
119
  and not static_operator.__class__.__name__ == "ChalkDataFrame"
113
120
  and not static_operator.__class__.__name__ == "LazyFrame"
114
121
  and not (
@@ -27,6 +27,7 @@ from chalk.parsed._proto.utils import (
27
27
  convert_failed_import_to_gql,
28
28
  convert_failed_import_to_proto,
29
29
  datetime_to_proto_timestamp,
30
+ timedelta_to_proto_duration,
30
31
  )
31
32
  from chalk.parsed._proto.validation import validate_artifacts
32
33
  from chalk.parsed.to_proto import ToProtoConverter
@@ -145,6 +146,24 @@ def export_from_registry() -> export_pb.Export:
145
146
  """
146
147
  failed_protos: List[export_pb.FailedImport] = []
147
148
 
149
+ # Validate registries BEFORE conversion to catch errors early
150
+ # This ensures parity with GQL validation path
151
+ from chalk.parsed.validation_from_registries import validate_all_from_registries
152
+
153
+ try:
154
+ validate_all_from_registries(
155
+ features_registry=FeatureSetBase.registry,
156
+ resolver_registry=RESOLVER_REGISTRY,
157
+ )
158
+ except Exception as e:
159
+ # If validation fails, add to failed_protos but continue
160
+ # to allow other validation to complete
161
+ from chalk._lsp.error_builder import LSPErrorBuilder
162
+
163
+ if not LSPErrorBuilder.promote_exception(e):
164
+ # Not an LSP error, so log it as a failed import
165
+ failed_protos.append(build_failed_import(e, "validation"))
166
+
148
167
  graph_res = ToProtoConverter.convert_graph(
149
168
  features_registry=FeatureSetBase.registry,
150
169
  resolver_registry=RESOLVER_REGISTRY.get_all_resolvers(),
@@ -193,6 +212,9 @@ def export_from_registry() -> export_pb.Export:
193
212
  file_name=cron.filename,
194
213
  resource_group=cron.resource_group,
195
214
  planner_options=cron.planner_options,
215
+ completion_deadline=timedelta_to_proto_duration(cron.completion_deadline)
216
+ if cron.completion_deadline is not None
217
+ else cron.completion_deadline,
196
218
  )
197
219
  )
198
220
 
@@ -279,6 +279,9 @@ class UpsertCronQueryGQL:
279
279
  upperBoundStr: Optional[str] = None
280
280
  resourceGroup: Optional[str] = None
281
281
  plannerOptions: Optional[Dict[str, str]] = None
282
+ completionDeadline: Optional[str] = None
283
+ numShards: Optional[int] = None
284
+ numWorkers: Optional[int] = None
282
285
 
283
286
 
284
287
  @dataclasses_json.dataclass_json
@@ -428,6 +428,9 @@ def convert_type_to_gql(
428
428
  incrementalSources=None if t.incremental_resolvers is None else list(t.incremental_resolvers),
429
429
  resourceGroup=t.resource_group,
430
430
  plannerOptions=t.planner_options,
431
+ completionDeadline=None if t.completion_deadline is None else timedelta_to_duration(t.completion_deadline),
432
+ numShards=t.num_shards,
433
+ numWorkers=t.num_workers,
431
434
  )
432
435
 
433
436
  if isinstance(t, NamedQuery):
@@ -539,20 +542,23 @@ def convert_type_to_gql(
539
542
  )
540
543
 
541
544
  elif t.join is not None:
542
- # If a has_one/has_many has an incorrect type annotation
543
- builder = t.features_cls.__chalk_error_builder__
544
- builder.add_diagnostic(
545
- range=builder.annotation_range(t.attribute_name),
546
- message=(
547
- f"The attribute '{t.features_cls.__name__}.{t.attribute_name}' "
548
- f"has a join filter ({t.join}) but its type annotation is not a feature class or "
549
- f"DataFrame ({t.typ})."
550
- ),
551
- label="Incorrect join type annotation",
552
- raise_error=TypeError,
553
- code="34",
554
- code_href="https://docs.chalk.ai/docs/has-many",
555
- )
545
+ # Check if user tried to use DataFrame (even if validation failed)
546
+ # Use is_dataframe_annotation() to detect DataFrame types without triggering validation errors
547
+ if not t.typ.is_dataframe_annotation():
548
+ # If a has_one/has_many has an incorrect type annotation
549
+ builder = t.features_cls.__chalk_error_builder__
550
+ builder.add_diagnostic(
551
+ range=builder.annotation_range(t.attribute_name),
552
+ message=(
553
+ f"The attribute '{t.features_cls.__name__}.{t.attribute_name}' "
554
+ f"has a join filter ({t.join}) but its type annotation is not a feature class or "
555
+ f"DataFrame ({t.typ})."
556
+ ),
557
+ label="Incorrect join type annotation",
558
+ raise_error=TypeError,
559
+ code="34",
560
+ code_href="https://docs.chalk.ai/docs/has-many",
561
+ )
556
562
 
557
563
  elif t.is_feature_time:
558
564
  feature_time_kind_gql = UpsertFeatureTimeKindGQL()
chalk/parsed/to_proto.py CHANGED
@@ -21,6 +21,7 @@ from chalk._gen.chalk.graph.v1 import graph_pb2 as pb
21
21
  from chalk._gen.chalk.graph.v2 import sources_pb2 as sources_pb
22
22
  from chalk._gen.chalk.lsp.v1.lsp_pb2 import Location, Position, Range
23
23
  from chalk._validation.feature_validation import FeatureValidation
24
+ from chalk.df.LazyFramePlaceholder import LazyFramePlaceholder
24
25
  from chalk.features import (
25
26
  CacheStrategy,
26
27
  Feature,
@@ -899,7 +900,7 @@ class ToProtoConverter:
899
900
  else None,
900
901
  backfill_schedule=mat.backfill_schedule,
901
902
  approx_top_k_arg_k=aggregation_kwargs.get("k")
902
- if mat.aggregation in ("approx_top_k", "min_by_n", "max_by_n")
903
+ if mat.aggregation in ("approx_top_k", "approx_percentile", "min_by_n", "max_by_n")
903
904
  else None,
904
905
  ),
905
906
  tags=f.tags,
@@ -995,7 +996,7 @@ class ToProtoConverter:
995
996
  else None,
996
997
  continuous_resolver=wmp.continuous_resolver,
997
998
  approx_top_k_arg_k=aggregation_kwargs.get("k")
998
- if wmp.aggregation in ("approx_top_k", "min_by_n", "max_by_n")
999
+ if wmp.aggregation in ("approx_top_k", "approx_percentile", "min_by_n", "max_by_n")
999
1000
  else None,
1000
1001
  )
1001
1002
  if wmp is not None
@@ -1025,6 +1026,9 @@ class ToProtoConverter:
1025
1026
  expression=ToProtoConverter.convert_underscore(f.underscore_expression)
1026
1027
  if f.underscore_expression is not None
1027
1028
  else None,
1029
+ offline_expression=ToProtoConverter.convert_underscore(f.offline_underscore_expression)
1030
+ if f.offline_underscore_expression is not None
1031
+ else None,
1028
1032
  expression_definition_location=ToProtoConverter.convert_expression_definition_location(
1029
1033
  f.underscore_expression
1030
1034
  )
@@ -1147,9 +1151,13 @@ class ToProtoConverter:
1147
1151
  raise ValueError(f"Unsupported resource hint: {r.resource_hint}")
1148
1152
 
1149
1153
  static_operation = None
1154
+ static_operation_dataframe = None
1150
1155
  if r.static:
1151
1156
  static_operator = static_resolver_to_operator(fqn=r.fqn, fn=r.fn, inputs=r.inputs, output=r.output)
1152
- static_operation = static_operator._to_proto() # pyright: ignore[reportPrivateUsage]
1157
+ if isinstance(static_operator, LazyFramePlaceholder):
1158
+ static_operation_dataframe = static_operator._to_proto() # pyright: ignore[reportPrivateUsage]
1159
+ else:
1160
+ static_operation = static_operator._to_proto() # pyright: ignore[reportPrivateUsage]
1153
1161
 
1154
1162
  function_reference_proto = ToProtoConverter.create_function_reference(
1155
1163
  r.fn,
@@ -1158,7 +1166,9 @@ class ToProtoConverter:
1158
1166
  filename=r.filename,
1159
1167
  source_line=r.source_line,
1160
1168
  )
1161
-
1169
+ postprocessing_underscore_expr: expr_pb.LogicalExprNode | None = None
1170
+ if isinstance(r.postprocessing, Underscore):
1171
+ postprocessing_underscore_expr = r.postprocessing._to_proto() # pyright: ignore[reportPrivateUsage]
1162
1172
  return pb.Resolver(
1163
1173
  fqn=r.fqn,
1164
1174
  kind=(
@@ -1186,9 +1196,11 @@ class ToProtoConverter:
1186
1196
  unique_on=tuple(x.root_fqn for x in r.unique_on) if r.unique_on is not None else (),
1187
1197
  partitioned_by=(x.root_fqn for x in r.partitioned_by) if r.partitioned_by is not None else (),
1188
1198
  static_operation=static_operation,
1199
+ static_operation_dataframe=static_operation_dataframe,
1189
1200
  sql_settings=ToProtoConverter.convert_sql_settings(r.sql_settings) if r.sql_settings else None,
1190
1201
  output_row_order=r.output_row_order,
1191
1202
  venv=r.venv,
1203
+ underscore_expr=postprocessing_underscore_expr,
1192
1204
  )
1193
1205
 
1194
1206
  @staticmethod
@@ -110,6 +110,22 @@ def project_settings_to_gql(config: ProjectSettings) -> ProjectSettingsGQL:
110
110
 
111
111
 
112
112
  def get_registered_types(scope_to: Path, failed: List[FailedImport]) -> UpsertGraphGQL:
113
+ # Validate registries BEFORE conversion to catch errors early
114
+ # This ensures parity with Proto validation path
115
+ from chalk.parsed.validation_from_registries import validate_all_from_registries
116
+
117
+ try:
118
+ validate_all_from_registries(
119
+ features_registry=FeatureSetBase.registry,
120
+ resolver_registry=RESOLVER_REGISTRY,
121
+ )
122
+ except Exception as e:
123
+ # If validation fails, add to failed but continue
124
+ # to allow other validation to complete
125
+ if not LSPErrorBuilder.promote_exception(e):
126
+ # Not an LSP error, so log it as a failed import
127
+ failed.append(build_failed_import(e, "validation"))
128
+
113
129
  features = []
114
130
  feature_classes: list[FeatureClassGQL] = []
115
131
  for x in FeatureSetBase.registry.values():
@@ -173,6 +189,14 @@ def get_registered_types(scope_to: Path, failed: List[FailedImport]) -> UpsertGr
173
189
  named_queries: list[UpsertNamedQueryGQL] = []
174
190
  for named_query in NAMED_QUERY_REGISTRY.values():
175
191
  if named_query.filename is None or _is_relative_to(Path(named_query.filename), scope_to):
192
+ # only try to convert if there are no errors
193
+ if not named_query.errors:
194
+ try:
195
+ named_queries.append(convert_type_to_gql(named_query, path_prefix=path_prefix_to_remove))
196
+ except Exception as e:
197
+ failed.append(build_failed_import(e, f"named query '{named_query.name}'"))
198
+
199
+ # named_query.errors can go from empty to non-empty after conversion
176
200
  if named_query.errors:
177
201
  code_object_diagnostics[named_query.filename or ""].extend(
178
202
  [
@@ -185,15 +209,17 @@ def get_registered_types(scope_to: Path, failed: List[FailedImport]) -> UpsertGr
185
209
  for error in named_query.errors
186
210
  ]
187
211
  )
188
- continue
189
- try:
190
- named_queries.append(convert_type_to_gql(named_query, path_prefix=path_prefix_to_remove))
191
- except Exception as e:
192
- failed.append(build_failed_import(e, f"named query '{named_query.name}'"))
193
212
 
194
213
  model_references: list[UpsertModelReferenceGQL] = []
195
214
  for mr in MODEL_REFERENCE_REGISTRY.values():
196
215
  if mr.filename is None or _is_relative_to(Path(mr.filename), scope_to):
216
+ # similar logic to named_queries above
217
+ if not mr.errors:
218
+ try:
219
+ model_references.append(convert_type_to_gql(mr, path_prefix=path_prefix_to_remove))
220
+ except Exception as e:
221
+ failed.append(build_failed_import(e, f"model reference '{mr.name}'"))
222
+
197
223
  if mr.errors:
198
224
  code_object_diagnostics[mr.filename or ""].extend(
199
225
  [
@@ -206,11 +232,6 @@ def get_registered_types(scope_to: Path, failed: List[FailedImport]) -> UpsertGr
206
232
  for error in mr.errors
207
233
  ]
208
234
  )
209
- continue
210
- try:
211
- model_references.append(convert_type_to_gql(mr, path_prefix=path_prefix_to_remove))
212
- except Exception as e:
213
- failed.append(build_failed_import(e, f"model reference '{mr.name}'"))
214
235
 
215
236
  # online store configs
216
237
  for osc in ONLINE_STORE_CONFIG_REGISTRY.values():