chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -99,10 +99,12 @@ from chalk.client.models import (
99
99
  GetRegisteredModelResponse,
100
100
  GetRegisteredModelVersionResponse,
101
101
  IngestDatasetRequest,
102
+ ManualTriggerScheduledQueryResponse,
102
103
  MultiUploadFeaturesRequest,
103
104
  MultiUploadFeaturesResponse,
104
105
  OfflineQueryContext,
105
106
  OfflineQueryInput,
107
+ OfflineQueryInputSql,
106
108
  OfflineQueryInputUri,
107
109
  OfflineQueryParquetUploadURLResponse,
108
110
  OnlineQuery,
@@ -124,6 +126,7 @@ from chalk.client.models import (
124
126
  ResolverReplayResponse,
125
127
  ResolverRunResponse,
126
128
  ResourceRequests,
129
+ ScheduledQueryRun,
127
130
  SetDatasetRevisionMetadataRequest,
128
131
  SetDatasetRevisionMetadataResponse,
129
132
  SetIncrementalProgressRequest,
@@ -171,7 +174,9 @@ from chalk.utils.duration import parse_chalk_duration, timedelta_to_duration
171
174
  from chalk.utils.environment_parsing import env_var_bool
172
175
  from chalk.utils.log_with_context import get_logger
173
176
  from chalk.utils.missing_dependency import missing_dependency_exception
177
+ from chalk.utils.notebook import parse_notebook_into_script
174
178
  from chalk.utils.string import s
179
+ from chalk.utils.tracing import add_trace_headers, safe_trace
175
180
 
176
181
  if TYPE_CHECKING:
177
182
  import ssl
@@ -433,7 +438,7 @@ def _offline_query_inputs_should_be_uploaded(
433
438
 
434
439
  for single_input in inputs_as_list:
435
440
  if isinstance(single_input, collections.abc.Mapping):
436
- num_rows = max(len(v) if isinstance(v, list) else 1 for v in single_input.values())
441
+ num_rows = max(len(v) if hasattr(v, "__len__") else 1 for v in single_input.values())
437
442
  elif isinstance(single_input, pl.DataFrame):
438
443
  num_rows = single_input.height
439
444
  elif isinstance(single_input, pd.DataFrame):
@@ -741,6 +746,8 @@ class OnlineQueryResponseImpl(OnlineQueryResult):
741
746
  self.warnings = warnings
742
747
  self.meta = meta
743
748
 
749
+ print(self.data)
750
+
744
751
  for d in self.data:
745
752
  if d.value is not None:
746
753
  try:
@@ -1867,78 +1874,85 @@ https://docs.chalk.ai/cli/apply
1867
1874
  connect_timeout: float | ellipsis | None = ...,
1868
1875
  headers: Mapping[str, str] | None = None,
1869
1876
  query_context: Mapping[str, JsonValue] | str | None = None,
1877
+ trace: bool = False,
1870
1878
  value_metrics_tag_by_features: Sequence[FeatureReference] = (),
1871
1879
  ) -> OnlineQueryResponseImpl:
1872
- encoded_inputs, all_warnings = recursive_encode_inputs(input)
1873
- encoded_outputs = encode_outputs(output)
1874
- outputs = encoded_outputs.string_outputs
1875
- encoded_value_metrics_tag_by_features = encode_outputs(value_metrics_tag_by_features).string_outputs
1876
- if branch is ...:
1877
- branch = self._branch
1878
- now_str = None
1879
- if now is not None:
1880
- if now.tzinfo is None:
1881
- now = now.astimezone(tz=timezone.utc)
1882
- now_str = now.isoformat()
1883
-
1884
- staleness_encoded = {}
1885
- if staleness is not None:
1886
- for k, v in staleness.items():
1887
- if isinstance(k, str):
1888
- # It's a feature set
1889
- staleness_encoded[k] = v
1890
- elif is_feature_set_class(k):
1891
- staleness_encoded[k.namespace] = v
1892
- else:
1893
- staleness_encoded[ensure_feature(k).root_fqn] = v
1894
-
1895
- request = OnlineQueryRequest(
1896
- inputs=encoded_inputs,
1897
- outputs=outputs,
1898
- expression_outputs=encoded_outputs.feature_expressions_base64,
1899
- now=now_str,
1900
- staleness=staleness_encoded,
1901
- context=OnlineQueryContext(
1902
- environment=environment,
1903
- tags=tags,
1904
- required_resolver_tags=required_resolver_tags,
1905
- ),
1906
- deployment_id=preview_deployment_id,
1907
- branch_id=branch,
1908
- correlation_id=correlation_id,
1909
- query_name=query_name,
1910
- query_name_version=query_name_version,
1911
- meta=meta,
1912
- explain=explain,
1913
- include_meta=bool(include_meta or explain),
1914
- store_plan_stages=store_plan_stages,
1915
- encoding_options=encoding_options or FeatureEncodingOptions(),
1916
- planner_options=planner_options,
1917
- value_metrics_tag_by_features=tuple(encoded_value_metrics_tag_by_features),
1918
- query_context=_validate_context_dict(query_context),
1919
- overlay_graph=_get_overlay_graph_b64(),
1920
- )
1880
+ with safe_trace("query"):
1881
+ if branch is ...:
1882
+ branch = self._branch
1883
+ extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
1884
+ if query_name is not None:
1885
+ extra_headers["X-Chalk-Query-Name"] = query_name
1886
+ if trace:
1887
+ extra_headers = add_trace_headers(extra_headers)
1888
+ if headers:
1889
+ extra_headers.update(headers)
1890
+
1891
+ encoded_inputs, all_warnings = recursive_encode_inputs(input)
1892
+ encoded_outputs = encode_outputs(output)
1893
+ outputs = encoded_outputs.string_outputs
1894
+ encoded_value_metrics_tag_by_features = encode_outputs(value_metrics_tag_by_features).string_outputs
1895
+
1896
+ now_str = None
1897
+ if now is not None:
1898
+ if now.tzinfo is None:
1899
+ now = now.astimezone(tz=timezone.utc)
1900
+ now_str = now.isoformat()
1901
+
1902
+ staleness_encoded = {}
1903
+ if staleness is not None:
1904
+ for k, v in staleness.items():
1905
+ if isinstance(k, str):
1906
+ # It's a feature set
1907
+ staleness_encoded[k] = v
1908
+ elif is_feature_set_class(k):
1909
+ staleness_encoded[k.namespace] = v
1910
+ else:
1911
+ staleness_encoded[ensure_feature(k).root_fqn] = v
1921
1912
 
1922
- extra_headers = {}
1923
- if query_name is not None:
1924
- extra_headers["X-Chalk-Query-Name"] = query_name
1925
- if headers:
1926
- extra_headers.update(headers)
1913
+ request = OnlineQueryRequest(
1914
+ inputs=encoded_inputs,
1915
+ outputs=outputs,
1916
+ expression_outputs=encoded_outputs.feature_expressions_base64,
1917
+ now=now_str,
1918
+ staleness=staleness_encoded,
1919
+ context=OnlineQueryContext(
1920
+ environment=environment,
1921
+ tags=tags,
1922
+ required_resolver_tags=required_resolver_tags,
1923
+ ),
1924
+ deployment_id=preview_deployment_id,
1925
+ branch_id=branch,
1926
+ correlation_id=correlation_id,
1927
+ query_name=query_name,
1928
+ query_name_version=query_name_version,
1929
+ meta=meta,
1930
+ explain=explain,
1931
+ include_meta=bool(include_meta or explain),
1932
+ store_plan_stages=store_plan_stages,
1933
+ encoding_options=encoding_options or FeatureEncodingOptions(),
1934
+ planner_options=planner_options,
1935
+ value_metrics_tag_by_features=tuple(encoded_value_metrics_tag_by_features),
1936
+ query_context=_validate_context_dict(query_context),
1937
+ overlay_graph=_get_overlay_graph_b64(),
1938
+ )
1927
1939
 
1928
- resp = self._request(
1929
- method="POST",
1930
- uri="/v1/query/online",
1931
- json=request,
1932
- response=OnlineQueryResponse,
1933
- environment_override=environment,
1934
- preview_deployment_id=preview_deployment_id,
1935
- branch=branch,
1936
- metadata_request=False,
1937
- extra_headers=extra_headers,
1938
- timeout=request_timeout,
1939
- connect_timeout=connect_timeout,
1940
- )
1941
- return OnlineQueryResponseImpl(data=resp.data, errors=resp.errors or [], warnings=all_warnings, meta=resp.meta)
1940
+ resp = self._request(
1941
+ method="POST",
1942
+ uri="/v1/query/online",
1943
+ json=request,
1944
+ response=OnlineQueryResponse,
1945
+ environment_override=environment,
1946
+ preview_deployment_id=preview_deployment_id,
1947
+ branch=branch,
1948
+ metadata_request=False,
1949
+ extra_headers=extra_headers,
1950
+ timeout=request_timeout,
1951
+ connect_timeout=connect_timeout,
1952
+ )
1953
+ return OnlineQueryResponseImpl(
1954
+ data=resp.data, errors=resp.errors or [], warnings=all_warnings, meta=resp.meta
1955
+ )
1942
1956
 
1943
1957
  def multi_query(
1944
1958
  self,
@@ -1954,13 +1968,15 @@ https://docs.chalk.ai/cli/apply
1954
1968
  use_feather: Optional[bool] = True, # deprecated
1955
1969
  compression: Optional[str] = "uncompressed",
1956
1970
  ) -> BulkOnlineQueryResponse:
1957
- extra_headers = {}
1971
+ if branch is ...:
1972
+ branch = self._branch
1973
+ extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
1958
1974
  if query_name is not None:
1959
1975
  extra_headers["X-Chalk-Query-Name"] = query_name
1976
+
1960
1977
  buffer = BytesIO()
1961
1978
  buffer.write(MULTI_QUERY_MAGIC_STR)
1962
- if branch is ...:
1963
- branch = self._branch
1979
+
1964
1980
  for query in queries:
1965
1981
  tags = query.tags
1966
1982
  encoded_inputs = {str(k): v for k, v in query.input.items()}
@@ -2063,13 +2079,13 @@ https://docs.chalk.ai/cli/apply
2063
2079
  headers: Mapping[str, str] | None = None,
2064
2080
  value_metrics_tag_by_features: Sequence[FeatureReference] = (),
2065
2081
  ) -> BulkOnlineQueryResponse:
2066
- extra_headers = {}
2082
+ if branch is ...:
2083
+ branch = self._branch
2084
+ extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
2067
2085
  if query_name is not None:
2068
2086
  extra_headers["X-Chalk-Query-Name"] = query_name
2069
2087
  if headers:
2070
2088
  extra_headers.update(headers)
2071
- if branch is ...:
2072
- branch = self._branch
2073
2089
 
2074
2090
  now_str = None
2075
2091
  if now is not None:
@@ -2225,6 +2241,8 @@ https://docs.chalk.ai/cli/apply
2225
2241
  override_target_image_tag: Optional[str] = None,
2226
2242
  feature_for_lower_upper_bound: Optional[FeatureReference] = None,
2227
2243
  use_job_queue: bool = False,
2244
+ *,
2245
+ input_sql: str | None = None,
2228
2246
  ) -> DatasetImpl:
2229
2247
  run_asynchronously = (
2230
2248
  use_multiple_computers
@@ -2267,48 +2285,70 @@ https://docs.chalk.ai/cli/apply
2267
2285
 
2268
2286
  context = OfflineQueryContext(environment=environment)
2269
2287
 
2270
- if input is None:
2271
- query_input = None
2272
- elif isinstance(input, OfflineQueryInputUri):
2273
- query_input = input
2274
- elif isinstance(input, str):
2275
- query_input = OfflineQueryInputUri(
2276
- parquet_uri=input,
2277
- start_row=None,
2278
- end_row=None,
2279
- )
2280
- else:
2281
- # by this point, should be
2282
- # Union[QueryInput, List[QueryInput], Tuple[QueryInput, ...]]
2283
- if isinstance(input, (list, tuple)):
2284
- input_times_tuple: Sequence[QueryInputTime] = (
2285
- [None] * len(input)
2286
- if input_times is None
2287
- else [input_times for _ in input]
2288
- if isinstance(input_times, datetime)
2289
- else input_times
2288
+ _check_exclusive_options(
2289
+ {
2290
+ "input": input,
2291
+ "input_sql": input_sql,
2292
+ "max_samples": max_samples,
2293
+ }
2294
+ )
2295
+ if input_sql is not None:
2296
+ if input_times is not None:
2297
+ raise ValueError(
2298
+ f"Cannot specify `input_sql` and `input_times` together. Instead, the ChalkSQL query may output a `{TS_COL_NAME}` column"
2290
2299
  )
2291
- run_asynchronously = True
2292
- multi_input = list(zip(input, input_times_tuple))
2293
- else:
2294
- # Just a QueryInput
2295
- multi_input = [(input, cast(None, input_times))]
2300
+ if num_shards is not None:
2301
+ raise ValueError("Cannot specify `input_sql` and `num_shards` together.")
2302
+ if num_workers is not None:
2303
+ raise ValueError("Cannot specify `input_sql` and `num_workers` together.")
2296
2304
 
2297
- # defaulting to uploading input as table if inputs are large
2298
- if upload_input_as_table or _offline_query_inputs_should_be_uploaded(input) or num_shards:
2299
- with ThreadPoolExecutor(thread_name_prefix="offline_query_upload_input") as upload_input_executor:
2300
- query_input = self._upload_offline_query_input(
2301
- multi_input,
2302
- context=context,
2303
- branch=branch,
2304
- executor=upload_input_executor,
2305
- num_shards=num_shards,
2306
- )
2307
- elif run_asynchronously:
2308
- query_input = tuple(_to_offline_query_input(x, t) for x, t in multi_input)
2305
+ # Set query_input
2306
+ if input is not None:
2307
+ # Set query_input from input
2308
+ if isinstance(input, OfflineQueryInputUri):
2309
+ query_input = input
2310
+ elif isinstance(input, str):
2311
+ query_input = OfflineQueryInputUri(
2312
+ parquet_uri=input,
2313
+ start_row=None,
2314
+ end_row=None,
2315
+ )
2309
2316
  else:
2310
- assert len(multi_input) == 1, "We should default to running asynchronously if inputs is partitioned"
2311
- query_input = _to_offline_query_input(*multi_input[0])
2317
+ # by this point, should be
2318
+ # Union[QueryInput, List[QueryInput], Tuple[QueryInput, ...]]
2319
+ if isinstance(input, (list, tuple)):
2320
+ input_times_tuple: Sequence[QueryInputTime] = (
2321
+ [None] * len(input)
2322
+ if input_times is None
2323
+ else [input_times for _ in input]
2324
+ if isinstance(input_times, datetime)
2325
+ else input_times
2326
+ )
2327
+ run_asynchronously = True
2328
+ multi_input = list(zip(input, input_times_tuple))
2329
+ else:
2330
+ # Just a QueryInput
2331
+ multi_input = [(input, cast(None, input_times))]
2332
+
2333
+ # defaulting to uploading input as table if inputs are large
2334
+ if upload_input_as_table or _offline_query_inputs_should_be_uploaded(input) or num_shards:
2335
+ with ThreadPoolExecutor(thread_name_prefix="offline_query_upload_input") as upload_input_executor:
2336
+ query_input = self._upload_offline_query_input(
2337
+ multi_input,
2338
+ context=context,
2339
+ branch=branch,
2340
+ executor=upload_input_executor,
2341
+ num_shards=num_shards,
2342
+ )
2343
+ elif run_asynchronously:
2344
+ query_input = tuple(_to_offline_query_input(x, t) for x, t in multi_input)
2345
+ else:
2346
+ assert len(multi_input) == 1, "We should default to running asynchronously if inputs is partitioned"
2347
+ query_input = _to_offline_query_input(*multi_input[0])
2348
+ elif input_sql is not None:
2349
+ query_input = OfflineQueryInputSql(input_sql=input_sql)
2350
+ else:
2351
+ query_input = None
2312
2352
 
2313
2353
  response = self._create_dataset_job(
2314
2354
  optional_output=optional_output_root_fqns,
@@ -2339,9 +2379,9 @@ https://docs.chalk.ai/cli/apply
2339
2379
  override_target_image_tag=override_target_image_tag,
2340
2380
  num_shards=num_shards,
2341
2381
  num_workers=num_workers,
2342
- feature_for_lower_upper_bound=str(feature_for_lower_upper_bound)
2343
- if feature_for_lower_upper_bound is not None
2344
- else None,
2382
+ feature_for_lower_upper_bound=(
2383
+ str(feature_for_lower_upper_bound) if feature_for_lower_upper_bound is not None else None
2384
+ ),
2345
2385
  completion_deadline=completion_deadline,
2346
2386
  max_retries=max_retries,
2347
2387
  optional_output_expressions=optional_output_expressions,
@@ -2371,6 +2411,111 @@ https://docs.chalk.ai/cli/apply
2371
2411
  initialized_dataset.is_finished = True
2372
2412
  return initialized_dataset
2373
2413
 
2414
+ def run_scheduled_query(
2415
+ self,
2416
+ name: str,
2417
+ planner_options: Optional[Mapping[str, Any]] = None,
2418
+ incremental_resolvers: Optional[Sequence[str]] = None,
2419
+ max_samples: Optional[int] = None,
2420
+ env_overrides: Optional[Mapping[str, str]] = None,
2421
+ ) -> ManualTriggerScheduledQueryResponse:
2422
+ """
2423
+ Manually trigger a scheduled query request.
2424
+
2425
+ Parameters
2426
+ ----------
2427
+ name
2428
+ The name of the scheduled query to be triggered.
2429
+ incremental_resolvers
2430
+ If set to None, Chalk will incrementalize resolvers in the query's root namespaces.
2431
+ If set to a list of resolvers, this set will be used for incrementalization.
2432
+ Incremental resolvers must return a feature time in its output, and must return a `DataFrame`.
2433
+ Most commonly, this will be the name of a SQL file resolver. Chalk will ingest all new data
2434
+ from these resolvers and propagate changes to values in the root namespace.
2435
+ max_samples
2436
+ The maximum number of samples to compute.
2437
+ env_overrides:
2438
+ A dictionary of environment values to override during this specific triggered query.
2439
+
2440
+ Other Parameters
2441
+ ----------------
2442
+ planner_options
2443
+ A dictionary of options to pass to the planner.
2444
+ These are typically provided by Chalk Support for specific use cases.
2445
+
2446
+ Returns
2447
+ -------
2448
+ ManualTriggerScheduledQueryResponse
2449
+ A response message containing metadata around the triggered run.
2450
+
2451
+ Examples
2452
+ --------
2453
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
2454
+ >>> ChalkGRPCClient().run_scheduled_query(
2455
+ ... name="my_scheduled_query",
2456
+ ... )
2457
+ """
2458
+ from chalk.client.client_grpc import ChalkGRPCClient
2459
+
2460
+ client_grpc = ChalkGRPCClient(
2461
+ client_id=self._client_id,
2462
+ client_secret=self._client_secret,
2463
+ environment=self._primary_environment,
2464
+ api_server=self._api_server,
2465
+ )
2466
+
2467
+ resp = client_grpc.run_scheduled_query(
2468
+ name=name,
2469
+ planner_options=planner_options,
2470
+ incremental_resolvers=incremental_resolvers,
2471
+ max_samples=max_samples,
2472
+ env_overrides=env_overrides,
2473
+ )
2474
+
2475
+ return resp
2476
+
2477
+ def get_scheduled_query_run_history(
2478
+ self,
2479
+ name: str,
2480
+ limit: int = 10,
2481
+ ) -> List[ScheduledQueryRun]:
2482
+ """
2483
+ Get the run history for a scheduled query.
2484
+
2485
+ Parameters
2486
+ ----------
2487
+ name
2488
+ The name of the scheduled query.
2489
+ limit
2490
+ The maximum number of runs to return. Defaults to 10.
2491
+
2492
+ Returns
2493
+ -------
2494
+ list[ScheduledQueryRun]
2495
+ A response message containing the list of scheduled query runs.
2496
+
2497
+ Examples
2498
+ --------
2499
+ >>> from chalk.client import ChalkClient
2500
+ >>> ChalkClient().get_scheduled_query_run_history(
2501
+ ... name="my_scheduled_query",
2502
+ ... limit=20,
2503
+ ... )
2504
+ """
2505
+ from chalk.client.client_grpc import ChalkGRPCClient
2506
+
2507
+ client_grpc = ChalkGRPCClient(
2508
+ client_id=self._client_id,
2509
+ client_secret=self._client_secret,
2510
+ environment=self._primary_environment,
2511
+ api_server=self._api_server,
2512
+ )
2513
+
2514
+ return client_grpc.get_scheduled_query_run_history(
2515
+ name=name,
2516
+ limit=limit,
2517
+ )
2518
+
2374
2519
  def prompt_evaluation(
2375
2520
  self,
2376
2521
  prompts: list[Prompt | str],
@@ -3432,6 +3577,7 @@ https://docs.chalk.ai/cli/apply
3432
3577
  Optional[OfflineQueryInput],
3433
3578
  UploadedParquetShardedOfflineQueryInput,
3434
3579
  OfflineQueryInputUri,
3580
+ OfflineQueryInputSql,
3435
3581
  ],
3436
3582
  max_samples: Optional[int],
3437
3583
  dataset_name: Optional[str],
@@ -4345,6 +4491,187 @@ https://docs.chalk.ai/cli/apply
4345
4491
  )
4346
4492
  return resp
4347
4493
 
4494
+ def _run_serialized_query(
4495
+ self,
4496
+ serialized_plan_bytes: bytes,
4497
+ input: Union[Mapping[FeatureReference, Sequence[Any]], pa.Table],
4498
+ output: Sequence[FeatureReference] = (),
4499
+ staleness: Optional[Mapping[FeatureReference, str]] = None,
4500
+ context: Optional[OnlineQueryContext] = None,
4501
+ query_name: Optional[str] = None,
4502
+ query_name_version: Optional[str] = None,
4503
+ correlation_id: Optional[str] = None,
4504
+ include_meta: bool = False,
4505
+ explain: bool = False,
4506
+ store_plan_stages: bool = False,
4507
+ meta: Optional[Mapping[str, str]] = None,
4508
+ headers: Mapping[str, str] | None = None,
4509
+ ) -> BulkOnlineQueryResult:
4510
+ """Run a query using a pre-serialized plan.
4511
+
4512
+ This is a protected method for internal use and testing.
4513
+
4514
+ Parameters
4515
+ ----------
4516
+ serialized_plan_bytes
4517
+ The serialized BatchPlan protobuf bytes
4518
+ input
4519
+ The input data, either as a mapping of features to values or as a PyArrow table
4520
+ output
4521
+ The output features to compute
4522
+ staleness
4523
+ Maximum staleness overrides for features
4524
+ context
4525
+ Query context including environment and tags
4526
+ query_name
4527
+ The name of the query
4528
+ query_name_version
4529
+ The version of the query
4530
+ correlation_id
4531
+ Correlation ID for logging
4532
+ include_meta
4533
+ Whether to include metadata in the response
4534
+ explain
4535
+ Whether to include explain output
4536
+ store_plan_stages
4537
+ Whether to store plan stages
4538
+ meta
4539
+ Customer metadata tags
4540
+ headers
4541
+ Additional headers to provide with the request
4542
+
4543
+ Returns
4544
+ -------
4545
+ OnlineQueryResult
4546
+ The query result
4547
+ """
4548
+ try:
4549
+ import pyarrow as pa
4550
+ import pyarrow.feather as feather
4551
+ except ImportError:
4552
+ raise missing_dependency_exception("chalkpy[runtime]")
4553
+
4554
+ # Convert input to PyArrow table if needed
4555
+ if isinstance(input, Mapping):
4556
+ # Convert mapping to PyArrow table
4557
+ table_dict = {}
4558
+ for feat_ref, values in input.items():
4559
+ feat_name = str(feat_ref)
4560
+ # Ensure values is a list
4561
+ if not isinstance(values, list):
4562
+ values = [values]
4563
+ table_dict[feat_name] = values
4564
+ input_table = pa.Table.from_pydict(table_dict)
4565
+ else:
4566
+ input_table = input
4567
+
4568
+ # Encode outputs
4569
+ outputs_encoded = encode_outputs(output).string_outputs if output else []
4570
+
4571
+ # Encode staleness
4572
+ staleness_encoded = {}
4573
+ if staleness is not None:
4574
+ for k, v in staleness.items():
4575
+ if is_feature_set_class(k):
4576
+ for f in k.features:
4577
+ staleness_encoded[f.root_fqn] = v
4578
+ else:
4579
+ staleness_encoded[ensure_feature(k).root_fqn] = v
4580
+
4581
+ # Create FeatherRequestHeader
4582
+ from chalk.client.models import OnlineQueryContext as OQC
4583
+
4584
+ header_dict = {
4585
+ "outputs": outputs_encoded,
4586
+ "expression_outputs": [],
4587
+ "staleness": staleness_encoded if staleness_encoded else None,
4588
+ "context": (context or OQC()).dict(),
4589
+ "include_meta": include_meta,
4590
+ "explain": explain,
4591
+ "correlation_id": correlation_id,
4592
+ "query_name": query_name,
4593
+ "query_name_version": query_name_version,
4594
+ "meta": meta,
4595
+ "store_plan_stages": store_plan_stages,
4596
+ }
4597
+ header_json = json.dumps(header_dict).encode("utf-8")
4598
+
4599
+ # Serialize the input table to feather format
4600
+ feather_buffer = BytesIO()
4601
+ feather.write_feather(input_table, feather_buffer)
4602
+ feather_bytes = feather_buffer.getvalue()
4603
+
4604
+ # Build the request body:
4605
+ # 1. First 8 bytes: int64 (big-endian) - length of serialized plan
4606
+ # 2. Next N bytes: serialized BatchPlan protobuf
4607
+ # 3. Next 8 bytes: int64 (big-endian) - length of header JSON
4608
+ # 4. Next M bytes: UTF-8 encoded JSON header (FeatherRequestHeader)
4609
+ # 5. Next 8 bytes: int64 (big-endian) - length of feather data
4610
+ # 6. Remaining bytes: feather-encoded input data
4611
+ request_body = BytesIO()
4612
+ request_body.write(len(serialized_plan_bytes).to_bytes(8, byteorder="big"))
4613
+ request_body.write(serialized_plan_bytes)
4614
+ request_body.write(len(header_json).to_bytes(8, byteorder="big"))
4615
+ request_body.write(header_json)
4616
+ request_body.write(len(feather_bytes).to_bytes(8, byteorder="big"))
4617
+ request_body.write(feather_bytes)
4618
+
4619
+ # Make the HTTP request
4620
+ response = self._request(
4621
+ method="POST",
4622
+ uri="/v1/query/run",
4623
+ response=None, # We'll handle the response manually
4624
+ json=None,
4625
+ data=request_body.getvalue(),
4626
+ environment_override=None,
4627
+ preview_deployment_id=None,
4628
+ branch=None,
4629
+ metadata_request=False,
4630
+ extra_headers=headers,
4631
+ )
4632
+
4633
+ if not isinstance(response, requests.Response): # pyright: ignore[reportUnnecessaryIsInstance]
4634
+ raise TypeError("Expected requests.Response")
4635
+
4636
+ if response.status_code != 200:
4637
+ raise RuntimeError(f"Request failed with status {response.status_code}: {response.text}")
4638
+
4639
+ # Deserialize the response
4640
+ result = OnlineQueryResultFeather.deserialize(response.content)
4641
+
4642
+ # Convert feather bytes back to a dataframe
4643
+ scalars_df = None
4644
+ if result.scalar_data:
4645
+ scalars_table = feather.read_table(BytesIO(result.scalar_data))
4646
+ scalars_df = pa_table_to_pl_df(scalars_table)
4647
+
4648
+ # Parse errors from JSON strings back to ChalkError objects
4649
+ errors = []
4650
+ if result.errors:
4651
+ for error_json in result.errors:
4652
+ try:
4653
+ error_dict = json.loads(error_json)
4654
+ errors.append(ChalkError(**error_dict))
4655
+ except Exception:
4656
+ # If parsing fails, create a generic error
4657
+ errors.append(ChalkError.create(code=ErrorCode.PARSE_FAILED, message=str(error_json)))
4658
+
4659
+ # Parse meta if present
4660
+ query_meta = None
4661
+ if result.meta:
4662
+ try:
4663
+ query_meta = QueryMeta(**json.loads(result.meta))
4664
+ except Exception:
4665
+ pass
4666
+
4667
+ # Return as BulkOnlineQueryResult
4668
+ return BulkOnlineQueryResult(
4669
+ scalars_df=scalars_df,
4670
+ groups_dfs=None,
4671
+ errors=errors if errors else None,
4672
+ meta=query_meta,
4673
+ )
4674
+
4348
4675
  def _to_value(self, x: FeatureResult):
4349
4676
  f: Feature = Feature.from_root_fqn(x.field)
4350
4677
 
@@ -4551,20 +4878,49 @@ https://docs.chalk.ai/cli/apply
4551
4878
  # This shouldn't happen, but satisfies type checker
4552
4879
  raise RuntimeError("All retries exhausted but no exception recorded")
4553
4880
 
4554
- def _canonicalize_error(x: ChalkError):
4555
- return x.copy(
4556
- update={
4557
- "exception": (
4558
- None
4559
- if x.exception is None
4560
- else x.exception.copy(update={"stacktrace": "", "internal_stacktrace": None})
4561
- ),
4562
- "feature": None,
4563
- }
4564
- )
4565
-
4566
- query_errors = FrozenOrderedSet(_canonicalize_error(x) for x in (query_errors or []))
4567
- actual_errors = FrozenOrderedSet(_canonicalize_error(x) for x in (resp_errors or []))
4881
+ def _canonicalize_error(x: ChalkError, expected: Optional[ChalkError] = None):
4882
+ """
4883
+ Canonicalize error for comparison. If expected is provided, only compare
4884
+ fields that are non-None in the expected error.
4885
+ """
4886
+ update = {}
4887
+
4888
+ # Always normalize exception stacktraces if exception exists
4889
+ if x.exception is not None:
4890
+ update["exception"] = x.exception.copy(update={"stacktrace": "", "internal_stacktrace": None})
4891
+
4892
+ # If expected is provided, clear fields that are None in expected (meaning we don't care about them)
4893
+ if expected is not None:
4894
+ if expected.feature is None:
4895
+ update["feature"] = None
4896
+ if expected.resolver is None:
4897
+ update["resolver"] = None
4898
+ if expected.display_primary_key is None:
4899
+ update["display_primary_key"] = None
4900
+ if expected.display_primary_key_fqn is None:
4901
+ update["display_primary_key_fqn"] = None
4902
+ if expected.exception is None:
4903
+ update["exception"] = None
4904
+
4905
+ return x.copy(update=update) if update else x
4906
+
4907
+ # Canonicalize expected errors first (without reference)
4908
+ query_errors_list = [_canonicalize_error(x) for x in (query_errors or [])]
4909
+
4910
+ # Canonicalize actual errors with reference to expected ones
4911
+ # For each actual error, find matching expected error and canonicalize accordingly
4912
+ actual_errors_list = []
4913
+ for actual in resp_errors or []:
4914
+ # Find the best matching expected error (by code and message)
4915
+ matching_expected = None
4916
+ for query_error in query_errors_list:
4917
+ if actual.code == query_error.code and actual.message == query_error.message:
4918
+ matching_expected = query_error
4919
+ break
4920
+ actual_errors_list.append(_canonicalize_error(actual, matching_expected))
4921
+
4922
+ query_errors = FrozenOrderedSet(query_errors_list)
4923
+ actual_errors = FrozenOrderedSet(actual_errors_list)
4568
4924
 
4569
4925
  if not _do_query_errors_match(actual_errors, query_errors):
4570
4926
  errors_expected = len(query_errors) > 0
@@ -4641,6 +4997,9 @@ https://docs.chalk.ai/cli/apply
4641
4997
  _fail_test("errors differed -- see output table above")
4642
4998
 
4643
4999
  if resp_data is not None:
5000
+ # set of features that were asserted on
5001
+ expected_features = {e.fqn for e in expected}
5002
+
4644
5003
  actuals = [
4645
5004
  Result(
4646
5005
  x.field,
@@ -4649,6 +5008,7 @@ https://docs.chalk.ai/cli/apply
4649
5008
  x.error,
4650
5009
  )
4651
5010
  for x in resp_data
5011
+ if x.field in expected_features # Filter to only asserted features
4652
5012
  ]
4653
5013
 
4654
5014
  feature_mismatch = not _do_resultsets_match(actuals, expected)
@@ -4899,6 +5259,7 @@ https://docs.chalk.ai/cli/apply
4899
5259
  name: str,
4900
5260
  model_artifact_id: Optional[str] = None,
4901
5261
  run_id: Optional[str] = None,
5262
+ run_name: Optional[str] = None,
4902
5263
  criterion: Optional[ModelRunCriterion] = None,
4903
5264
  aliases: Optional[List[str]] = None,
4904
5265
  ) -> RegisterModelVersionResponse:
@@ -4915,6 +5276,7 @@ https://docs.chalk.ai/cli/apply
4915
5276
  name=name,
4916
5277
  model_artifact_id=model_artifact_id,
4917
5278
  run_id=run_id,
5279
+ run_name=run_name,
4918
5280
  criterion=criterion,
4919
5281
  aliases=aliases,
4920
5282
  )
@@ -4923,14 +5285,40 @@ https://docs.chalk.ai/cli/apply
4923
5285
 
4924
5286
  def train_model(
4925
5287
  self,
4926
- train_fn: Callable[[Optional[Mapping[str, Any]]], bool],
4927
- model_name: str,
4928
- dataset_name: str,
5288
+ experiment_name: str,
5289
+ train_fn: Callable[[], None],
4929
5290
  config: Optional[Mapping[str, Any]] = None,
5291
+ branch: Optional[Union[BranchId, ellipsis]] = ...,
4930
5292
  resources: Optional[ResourceRequests] = None,
5293
+ env_overrides: Optional[Mapping[str, str]] = None,
5294
+ enable_profiling: bool = False,
5295
+ max_retries: int = 0,
4931
5296
  ) -> CreateModelTrainingJobResponse:
4932
5297
  from chalk.client.client_grpc import ChalkGRPCClient
4933
5298
 
5299
+ if branch is ...:
5300
+ branch = self._branch
5301
+
5302
+ if not callable(train_fn):
5303
+ raise ValueError("train_fn must be a callable function.")
5304
+
5305
+ nargs = len(inspect.signature(train_fn).parameters)
5306
+
5307
+ if nargs == 0:
5308
+ if config is not None:
5309
+ raise ValueError("train_fn must accept a 'config' parameter to use the provided config.")
5310
+ config_str = None
5311
+
5312
+ if nargs == 1:
5313
+ if config is None:
5314
+ raise ValueError("train_fn must not accept a 'config' parameter when no config is provided.")
5315
+ try:
5316
+ config_str = json.dumps({"kwargs": {"config": config}})
5317
+ except TypeError as e:
5318
+ raise ValueError("config must be JSON serializable.") from e
5319
+
5320
+ script = parse_notebook_into_script(train_fn, config is not None)
5321
+
4934
5322
  client_grpc = ChalkGRPCClient(
4935
5323
  client_id=self._client_id,
4936
5324
  client_secret=self._client_secret,
@@ -4938,8 +5326,25 @@ https://docs.chalk.ai/cli/apply
4938
5326
  api_server=self._api_server,
4939
5327
  )
4940
5328
 
4941
- client_grpc.create_model_training_job(
4942
- train_fn=train_fn, model_name=model_name, dataset_name=dataset_name, config=config, resources=resources
5329
+ task_response = client_grpc.create_model_training_job(
5330
+ script=script,
5331
+ function_name=train_fn.__name__,
5332
+ experiment_name=experiment_name,
5333
+ config=config_str,
5334
+ branch=branch,
5335
+ resources=resources,
5336
+ env_overrides=env_overrides,
5337
+ enable_profiling=enable_profiling,
4943
5338
  )
4944
5339
 
5340
+ client_grpc.follow_model_training_job(operation_id=task_response.task_id)
5341
+
4945
5342
  return CreateModelTrainingJobResponse(success=True)
5343
+
5344
+
5345
+ def _check_exclusive_options(options: dict[str, Any | None]):
5346
+ filled_options = {k: v for k, v in options.items() if v is not None}
5347
+ if len(filled_options) > 1:
5348
+ raise ValueError(
5349
+ f"Only one of the options: {', '.join(filled_options.keys())} can be specified (they are mutually exclusive options)."
5350
+ )