chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -18,8 +18,9 @@ from google.protobuf import empty_pb2, timestamp_pb2
18
18
  from chalk import DataFrame, EnvironmentId, chalk_logger
19
19
  from chalk._gen.chalk.auth.v1.agent_pb2 import CustomClaim
20
20
  from chalk._gen.chalk.auth.v1.permissions_pb2 import Permission
21
- from chalk._gen.chalk.common.v1 import offline_query_pb2, online_query_pb2, upload_features_pb2
21
+ from chalk._gen.chalk.common.v1 import online_query_pb2, resources_pb2, upload_features_pb2
22
22
  from chalk._gen.chalk.common.v1.online_query_pb2 import GenericSingleQuery, UploadFeaturesBulkRequest
23
+ from chalk._gen.chalk.common.v1.script_task_pb2 import ScriptTaskKind, ScriptTaskRequest, TrainingRunArgs
23
24
  from chalk._gen.chalk.common.v2.execute_plan_pb2 import ExecutePlanRequest, ExecutePlanResponse
24
25
  from chalk._gen.chalk.engine.v1 import query_server_pb2
25
26
  from chalk._gen.chalk.engine.v1.query_server_pb2_grpc import QueryServiceStub
@@ -36,6 +37,11 @@ from chalk._gen.chalk.protosql.v1.sql_service_pb2 import (
36
37
  )
37
38
  from chalk._gen.chalk.protosql.v1.sql_service_pb2_grpc import SqlServiceStub
38
39
  from chalk._gen.chalk.server.v1.auth_pb2_grpc import AuthServiceStub
40
+ from chalk._gen.chalk.server.v1.dataplanejobqueue_pb2 import (
41
+ GetJobQueueOperationSummaryRequest,
42
+ GetJobQueueOperationSummaryResponse,
43
+ )
44
+ from chalk._gen.chalk.server.v1.dataplanejobqueue_pb2_grpc import DataPlaneJobQueueServiceStub
39
45
  from chalk._gen.chalk.server.v1.deploy_pb2 import (
40
46
  CreateBranchFromSourceDeploymentRequest,
41
47
  CreateBranchFromSourceDeploymentResponse,
@@ -49,6 +55,7 @@ from chalk._gen.chalk.server.v1.graph_pb2 import (
49
55
  PythonVersion,
50
56
  )
51
57
  from chalk._gen.chalk.server.v1.graph_pb2_grpc import GraphServiceStub
58
+ from chalk._gen.chalk.server.v1.log_pb2_grpc import LogSearchServiceStub
52
59
  from chalk._gen.chalk.server.v1.model_registry_pb2 import (
53
60
  CreateModelArtifactRequest,
54
61
  CreateModelArtifactResponse,
@@ -66,8 +73,13 @@ from chalk._gen.chalk.server.v1.model_registry_pb2 import (
66
73
  GetModelVersionResponse,
67
74
  )
68
75
  from chalk._gen.chalk.server.v1.model_registry_pb2_grpc import ModelRegistryServiceStub
69
- from chalk._gen.chalk.server.v1.offline_queries_pb2 import CreateModelTrainingJobRequest, CreateModelTrainingJobResponse
70
76
  from chalk._gen.chalk.server.v1.offline_queries_pb2_grpc import OfflineQueryMetadataServiceStub
77
+ from chalk._gen.chalk.server.v1.scheduled_query_pb2_grpc import ScheduledQueryServiceStub
78
+ from chalk._gen.chalk.server.v1.scheduled_query_run_pb2 import GetScheduledQueryRunsRequest
79
+ from chalk._gen.chalk.server.v1.scheduler_pb2 import ManualTriggerScheduledQueryRequest
80
+ from chalk._gen.chalk.server.v1.scheduler_pb2_grpc import SchedulerServiceStub
81
+ from chalk._gen.chalk.server.v1.script_tasks_pb2 import CreateScriptTaskRequest, CreateScriptTaskResponse
82
+ from chalk._gen.chalk.server.v1.script_tasks_pb2_grpc import ScriptTaskServiceStub
71
83
  from chalk._gen.chalk.server.v1.team_pb2 import (
72
84
  CreateServiceTokenRequest,
73
85
  CreateServiceTokenResponse,
@@ -75,6 +87,7 @@ from chalk._gen.chalk.server.v1.team_pb2 import (
75
87
  ListServiceTokensResponse,
76
88
  )
77
89
  from chalk._gen.chalk.server.v1.team_pb2_grpc import TeamServiceStub
90
+ from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2_grpc import SimpleStreamingServiceStub
78
91
  from chalk.client import ChalkAuthException, FeatureReference
79
92
  from chalk.client.client_impl import _validate_context_dict # pyright: ignore[reportPrivateUsage]
80
93
  from chalk.client.models import (
@@ -84,6 +97,9 @@ from chalk.client.models import (
84
97
  CreateBranchResponse,
85
98
  GetRegisteredModelResponse,
86
99
  GetRegisteredModelVersionResponse,
100
+ )
101
+ from chalk.client.models import ManualTriggerScheduledQueryResponse as ManualTriggerScheduledQueryResponseDataclass
102
+ from chalk.client.models import (
87
103
  ModelUploadUrlResponse,
88
104
  OnlineQuery,
89
105
  OnlineQueryResponse,
@@ -91,6 +107,9 @@ from chalk.client.models import (
91
107
  RegisterModelResponse,
92
108
  RegisterModelVersionResponse,
93
109
  ResourceRequests,
110
+ ScheduledQueryRun,
111
+ StreamResolverTestResponse,
112
+ StreamResolverTestStatus,
94
113
  UploadFeaturesResponse,
95
114
  )
96
115
  from chalk.client.serialization.model_serialization import ModelSerializer
@@ -101,20 +120,25 @@ from chalk.features._encoding.inputs import GRPC_ENCODE_OPTIONS, InputEncodeOpti
101
120
  from chalk.features._encoding.json import FeatureEncodingOptions
102
121
  from chalk.features._encoding.outputs import encode_outputs
103
122
  from chalk.features.feature_set import is_feature_set_class
123
+ from chalk.features.resolver import Resolver
104
124
  from chalk.features.tag import DeploymentId
105
125
  from chalk.importer import CHALK_IMPORT_FLAG
106
126
  from chalk.ml import LocalSourceConfig, ModelEncoding, ModelRunCriterion, ModelType, SourceConfig
107
127
  from chalk.ml.model_file_transfer import ModelFileUploader
128
+ from chalk.ml.utils import ModelClass
108
129
  from chalk.parsed._proto.utils import datetime_to_proto_timestamp, value_to_proto
109
130
  from chalk.utils import df_utils
110
131
  from chalk.utils.df_utils import record_batch_to_arrow_ipc
111
132
  from chalk.utils.grpc import AuthenticatedChalkClientInterceptor, TokenRefresher, UnauthenticatedChalkClientInterceptor
133
+ from chalk.utils.tracing import add_trace_headers, safe_trace
112
134
 
113
135
  if TYPE_CHECKING:
114
136
  from pyarrow import RecordBatch, Table
137
+ from pydantic import BaseModel
115
138
 
116
139
  from chalk._gen.chalk.server.v1.builder_pb2 import StartBranchResponse
117
140
  from chalk._gen.chalk.server.v1.builder_pb2_grpc import BuilderServiceStub
141
+ from chalk.client import ChalkError
118
142
 
119
143
  CHALK_GRPC_TRACE_ID_HEADER: str = "x-chalk-trace-id"
120
144
 
@@ -135,6 +159,20 @@ def get_trace_id_from_response(call: grpc.Call) -> Optional[str]:
135
159
  return None
136
160
 
137
161
 
162
+ def _merge_headers(
163
+ headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
164
+ extra_headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
165
+ ) -> tuple[tuple[str, str | bytes], ...]:
166
+ headers = _canonicalize_headers(headers)
167
+ extra_headers = _canonicalize_headers(extra_headers)
168
+ all_headers: list[tuple[str, str | bytes]] = []
169
+ for h in headers:
170
+ all_headers.append(h)
171
+ for h in extra_headers:
172
+ all_headers.append(h)
173
+ return tuple(all_headers)
174
+
175
+
138
176
  def _canonicalize_headers(
139
177
  headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
140
178
  ) -> tuple[tuple[str, str | bytes], ...]:
@@ -188,29 +226,26 @@ def _parse_uri_for_engine(query_server_uri: str) -> ParsedUri:
188
226
  return ParsedUri(uri_without_scheme=uri_without_scheme, use_tls=use_tls)
189
227
 
190
228
 
191
- channel_options: List[tuple[str, str | int]] = [
192
- ("grpc.max_send_message_length", 1024 * 1024 * 100), # 100MB
193
- ("grpc.max_receive_message_length", 1024 * 1024 * 100), # 100MB
229
+ default_channel_options: Dict[str, str | int] = {
230
+ "grpc.max_send_message_length": 1024 * 1024 * 100, # 100MB
231
+ "grpc.max_receive_message_length": 1024 * 1024 * 100, # 100MB
194
232
  # https://grpc.io/docs/guides/performance/#python
195
- (grpc.experimental.ChannelOptions.SingleThreadedUnaryStream, 1),
196
- (
197
- "grpc.service_config",
198
- json.dumps(
199
- {
200
- "methodConfig": [
201
- {
202
- "name": [{}],
203
- "maxAttempts": 5,
204
- "initialBackoff": "0.1s",
205
- "maxBackoff": "1s",
206
- "backoffMultiplier": 2,
207
- "retryableStatusCodes": ["UNAVAILABLE"],
208
- }
209
- ]
210
- }
211
- ),
233
+ grpc.experimental.ChannelOptions.SingleThreadedUnaryStream: 1,
234
+ "grpc.service_config": json.dumps(
235
+ {
236
+ "methodConfig": [
237
+ {
238
+ "name": [{}],
239
+ "maxAttempts": 5,
240
+ "initialBackoff": "0.1s",
241
+ "maxBackoff": "1s",
242
+ "backoffMultiplier": 2,
243
+ "retryableStatusCodes": ["UNAVAILABLE"],
244
+ }
245
+ ]
246
+ }
212
247
  ),
213
- ]
248
+ }
214
249
 
215
250
 
216
251
  T = TypeVar("T")
@@ -258,6 +293,22 @@ class StubProvider:
258
293
  )
259
294
  return OfflineQueryMetadataServiceStub(self._server_channel)
260
295
 
296
+ @cached_property
297
+ def scheduled_query_stub(self) -> SchedulerServiceStub:
298
+ if self._server_channel is None:
299
+ raise ValueError(
300
+ "The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
301
+ )
302
+ return SchedulerServiceStub(self._server_channel)
303
+
304
+ @cached_property
305
+ def scheduled_query_run_stub(self) -> ScheduledQueryServiceStub:
306
+ if self._server_channel is None:
307
+ raise ValueError(
308
+ "The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
309
+ )
310
+ return ScheduledQueryServiceStub(self._server_channel)
311
+
261
312
  @cached_property
262
313
  def sql_stub(self) -> SqlServiceStub:
263
314
  if self._engine_channel is None:
@@ -274,12 +325,26 @@ class StubProvider:
274
325
  )
275
326
  return DataFrameServiceStub(self._engine_channel)
276
327
 
328
+ @cached_property
329
+ def streaming_stub(self) -> SimpleStreamingServiceStub:
330
+ if self._engine_channel is None:
331
+ raise ValueError(
332
+ "The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
333
+ )
334
+ return SimpleStreamingServiceStub(self._engine_channel)
335
+
277
336
  @cached_property
278
337
  def model_stub(self) -> ModelRegistryServiceStub:
279
338
  if self._server_channel is None:
280
339
  raise RuntimeError("Unable to connect to API server.")
281
340
  return ModelRegistryServiceStub(self._server_channel)
282
341
 
342
+ @cached_property
343
+ def task_stub(self) -> ScriptTaskServiceStub:
344
+ if self._server_channel is None:
345
+ raise RuntimeError("Unable to connect to API server.")
346
+ return ScriptTaskServiceStub(self._server_channel)
347
+
283
348
  @cached_property
284
349
  def builder_stub(self) -> "BuilderServiceStub":
285
350
  from chalk._gen.chalk.server.v1.builder_pb2_grpc import BuilderServiceStub
@@ -288,6 +353,18 @@ class StubProvider:
288
353
  raise RuntimeError("Unable to connect to API server.")
289
354
  return BuilderServiceStub(self._server_channel)
290
355
 
356
+ @cached_property
357
+ def log_stub(self) -> LogSearchServiceStub:
358
+ if self._server_channel is None:
359
+ raise RuntimeError("Unable to connect to API server.")
360
+ return LogSearchServiceStub(self._server_channel)
361
+
362
+ @cached_property
363
+ def job_queue_stub(self) -> DataPlaneJobQueueServiceStub:
364
+ if self._server_channel is None:
365
+ raise RuntimeError("Unable to connect to API server.")
366
+ return DataPlaneJobQueueServiceStub(self._server_channel)
367
+
291
368
  def __init__(
292
369
  self,
293
370
  token_config: TokenConfig,
@@ -295,17 +372,25 @@ class StubProvider:
295
372
  deployment_tag: str | None = None,
296
373
  skip_api_server: bool = False,
297
374
  additional_headers: List[tuple[str, str]] | None = None,
375
+ channel_options: List[tuple[str, str | int]] | None = None,
298
376
  ):
299
377
  super().__init__()
300
378
  additional_headers_nonempty: List[tuple[str, str]] = [] if additional_headers is None else additional_headers
301
379
  token_refresher: TokenRefresher | None = None
380
+ channel_options_merged: Dict[str, str | int] = default_channel_options.copy()
381
+ if channel_options:
382
+ channel_options_merged.update(dict(channel_options))
302
383
  if skip_api_server:
303
384
  # Omits the auth handshake with the API server. Primarily for internal use/testing -- if used in production,
304
385
  # this client will simply fail to connect. If True then query_server must be provided & point to
305
386
  # `localhost/127.0.0.1`.
306
387
  if query_server is None:
307
388
  raise ValueError("If skipping API server auth, query_server URI must be provided.")
308
- elif not (query_server.startswith("localhost") or query_server.startswith("127.0.0.1")):
389
+ parsed_uri = _parse_uri_for_engine(query_server)
390
+ if not (
391
+ parsed_uri.uri_without_scheme.startswith("localhost")
392
+ or parsed_uri.uri_without_scheme.startswith("127.0.0.1")
393
+ ):
309
394
  warnings.warn(
310
395
  "Skipping API server auth should only be enabled if query_server URI is localhost. It will fail to authenticate against a production engine."
311
396
  )
@@ -321,13 +406,13 @@ class StubProvider:
321
406
  _unauthenticated_server_channel: grpc.Channel = (
322
407
  grpc.insecure_channel(
323
408
  target=server_host,
324
- options=channel_options,
409
+ options=list(channel_options_merged.items()),
325
410
  )
326
411
  if server_host.startswith("localhost") or server_host.startswith("127.0.0.1")
327
412
  else grpc.secure_channel(
328
413
  target=server_host,
329
414
  credentials=grpc.ssl_channel_credentials(),
330
- options=channel_options,
415
+ options=list(channel_options_merged.items()),
331
416
  )
332
417
  )
333
418
 
@@ -401,12 +486,12 @@ class StubProvider:
401
486
  grpc.secure_channel(
402
487
  target=parsed_uri.uri_without_scheme,
403
488
  credentials=grpc.ssl_channel_credentials(),
404
- options=channel_options,
489
+ options=list(channel_options_merged.items()),
405
490
  )
406
491
  if parsed_uri.use_tls
407
492
  else grpc.insecure_channel(
408
493
  target=parsed_uri.uri_without_scheme,
409
- options=channel_options,
494
+ options=list(channel_options_merged.items()),
410
495
  )
411
496
  ),
412
497
  *interceptors,
@@ -421,6 +506,7 @@ class StubRefresher:
421
506
  deployment_tag: str | None = None,
422
507
  skip_api_server: bool = False,
423
508
  additional_headers: List[tuple[str, str]] | None = None,
509
+ channel_options: List[tuple[str, str | int]] | None = None,
424
510
  ):
425
511
  super().__init__()
426
512
  self._token_config = token_config
@@ -428,6 +514,7 @@ class StubRefresher:
428
514
  self._deployment_tag = deployment_tag
429
515
  self._skip_api_server = skip_api_server
430
516
  self._additional_headers = additional_headers
517
+ self._channel_options = channel_options
431
518
  self._stub = self._refresh_stub()
432
519
 
433
520
  def _refresh_stub(self) -> StubProvider:
@@ -437,6 +524,7 @@ class StubRefresher:
437
524
  deployment_tag=self._deployment_tag,
438
525
  skip_api_server=self._skip_api_server,
439
526
  additional_headers=self._additional_headers,
527
+ channel_options=self._channel_options,
440
528
  )
441
529
  return self._stub
442
530
 
@@ -473,6 +561,12 @@ class StubRefresher:
473
561
  def call_offline_query_stub(self, fn: Callable[[OfflineQueryMetadataServiceStub], T]) -> T:
474
562
  return self._retry_callable(fn, lambda: self._stub.offline_query_stub)
475
563
 
564
+ def call_scheduled_query_stub(self, fn: Callable[[SchedulerServiceStub], T]) -> T:
565
+ return self._retry_callable(fn, lambda: self._stub.scheduled_query_stub)
566
+
567
+ def call_scheduled_query_run_stub(self, fn: Callable[[ScheduledQueryServiceStub], T]) -> T:
568
+ return self._retry_callable(fn, lambda: self._stub.scheduled_query_run_stub)
569
+
476
570
  def call_sql_stub(self, fn: Callable[[SqlServiceStub], T]) -> T:
477
571
  return self._retry_callable(fn, lambda: self._stub.sql_stub)
478
572
 
@@ -482,9 +576,25 @@ class StubRefresher:
482
576
  def call_model_stub(self, fn: Callable[[ModelRegistryServiceStub], T]) -> T:
483
577
  return self._retry_callable(fn, lambda: self._stub.model_stub)
484
578
 
579
+ def call_task_stub(self, fn: Callable[[ScriptTaskServiceStub], T]) -> T:
580
+ return self._retry_callable(fn, lambda: self._stub.task_stub)
581
+
485
582
  def call_builder_stub(self, fn: Callable[["BuilderServiceStub"], T]) -> T:
486
583
  return self._retry_callable(fn, lambda: self._stub.builder_stub)
487
584
 
585
+ def call_log_stub(self, fn: Callable[[LogSearchServiceStub], T]) -> T:
586
+ return self._retry_callable(fn, lambda: self._stub.log_stub)
587
+
588
+ def call_job_queue_stub(self, fn: Callable[[DataPlaneJobQueueServiceStub], T]) -> T:
589
+ return self._retry_callable(fn, lambda: self._stub.job_queue_stub)
590
+
591
+ def call_streaming_stub(self, fn: Callable[[SimpleStreamingServiceStub], T]) -> T:
592
+ return self._retry_callable(fn, lambda: self._stub.streaming_stub)
593
+
594
+ @property
595
+ def log_stub(self) -> LogSearchServiceStub:
596
+ return self._stub.log_stub
597
+
488
598
  @property
489
599
  def environment_id(self) -> str | None:
490
600
  return self._stub.environment_id
@@ -506,6 +616,7 @@ class ChalkGRPCClient:
506
616
  additional_headers: List[tuple[str, str]] | None = None,
507
617
  query_server: str | None = None,
508
618
  input_compression: typing.Literal["lz4", "zstd", "uncompressed"] = "lz4",
619
+ channel_options: List[Tuple[str, str | int]] | None = None,
509
620
  **kwargs: Any,
510
621
  ):
511
622
  """Create a `ChalkGRPCClient` with the given credentials.
@@ -561,6 +672,7 @@ class ChalkGRPCClient:
561
672
  deployment_tag=deployment_tag,
562
673
  additional_headers=additional_headers,
563
674
  skip_api_server=kwargs.get("_skip_api_server", False),
675
+ channel_options=channel_options,
564
676
  )
565
677
 
566
678
  _INPUT_ENCODE_OPTIONS = GRPC_ENCODE_OPTIONS
@@ -618,6 +730,7 @@ class ChalkGRPCClient:
618
730
  request_timeout: Optional[float] = None,
619
731
  headers: Mapping[str, str] | Sequence[tuple[str, str | bytes]] | None = None,
620
732
  query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None = None,
733
+ trace: bool = False,
621
734
  ) -> OnlineQueryResponse:
622
735
  """Compute features values using online resolvers.
623
736
 
@@ -740,6 +853,7 @@ class ChalkGRPCClient:
740
853
  request_timeout=request_timeout,
741
854
  headers=headers,
742
855
  query_context=_validate_context_dict(query_context),
856
+ trace=trace,
743
857
  )
744
858
  return OnlineQueryConverter.online_query_bulk_response_decode_to_single(bulk_response)
745
859
 
@@ -765,37 +879,44 @@ class ChalkGRPCClient:
765
879
  request_timeout: Optional[float] = None,
766
880
  headers: Mapping[str, str] | Sequence[tuple[str, str | bytes]] | None = None,
767
881
  query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
882
+ trace: bool = False,
768
883
  ) -> online_query_pb2.OnlineQueryBulkResponse:
769
- request = self._make_query_bulk_request(
770
- input={k: [v] for k, v in input.items()},
771
- output=output,
772
- now=[now] if now is not None else [],
773
- staleness=staleness or {},
774
- tags=tags or (),
775
- correlation_id=correlation_id,
776
- query_name=query_name,
777
- query_name_version=query_name_version,
778
- include_meta=include_meta,
779
- meta=meta or {},
780
- explain=explain,
781
- store_plan_stages=store_plan_stages,
782
- value_metrics_tag_by_features=value_metrics_tag_by_features,
783
- encoding_options=encoding_options,
784
- required_resolver_tags=required_resolver_tags or (),
785
- planner_options=planner_options or {},
786
- query_context=query_context,
787
- )
788
- return self._stub_refresher.call_query_stub(
789
- lambda x: x.OnlineQueryBulk(
790
- request,
791
- timeout=request_timeout,
792
- metadata=_canonicalize_headers(headers),
884
+ with safe_trace("_online_query_grpc_request"):
885
+ request = self._make_query_bulk_request(
886
+ input={k: [v] for k, v in input.items()},
887
+ output=output,
888
+ now=[now] if now is not None else [],
889
+ staleness=staleness or {},
890
+ tags=tags or (),
891
+ correlation_id=correlation_id,
892
+ query_name=query_name,
893
+ query_name_version=query_name_version,
894
+ include_meta=include_meta,
895
+ meta=meta or {},
896
+ explain=explain,
897
+ store_plan_stages=store_plan_stages,
898
+ value_metrics_tag_by_features=value_metrics_tag_by_features,
899
+ encoding_options=encoding_options,
900
+ required_resolver_tags=required_resolver_tags or (),
901
+ planner_options=planner_options or {},
902
+ query_context=query_context,
903
+ )
904
+ if trace:
905
+ extra_headers: dict[str, str] = {}
906
+ extra_headers = add_trace_headers(extra_headers)
907
+ headers = _merge_headers(extra_headers, headers)
908
+ metadata = _canonicalize_headers(headers)
909
+ return self._stub_refresher.call_query_stub(
910
+ lambda x: x.OnlineQueryBulk(
911
+ request,
912
+ timeout=request_timeout,
913
+ metadata=metadata,
914
+ )
793
915
  )
794
- )
795
916
 
796
917
  def online_query_bulk(
797
918
  self,
798
- input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
919
+ input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
799
920
  output: Sequence[FeatureReference] = (),
800
921
  now: Optional[Sequence[dt.datetime]] = None,
801
922
  staleness: Optional[Mapping[FeatureReference, str]] = None,
@@ -814,9 +935,17 @@ class ChalkGRPCClient:
814
935
  request_timeout: Optional[float] = None,
815
936
  headers: Mapping[str, str | bytes] | Sequence[tuple[str, str | bytes]] | None = None,
816
937
  query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None = None,
938
+ *,
939
+ input_sql: str | None = None,
817
940
  ) -> BulkOnlineQueryResult:
941
+ if input is None and input_sql is None:
942
+ raise TypeError("One of `input` or `input_sql` is required")
943
+ if input is not None and input_sql is not None:
944
+ raise TypeError("`input` and `input_sql` are mutually exclusive")
945
+
818
946
  response, call = self._online_query_bulk_grpc_request(
819
947
  input=input,
948
+ input_sql=input_sql,
820
949
  output=output,
821
950
  now=now,
822
951
  staleness=staleness,
@@ -843,7 +972,8 @@ class ChalkGRPCClient:
843
972
  def _online_query_bulk_grpc_request(
844
973
  self,
845
974
  *,
846
- input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
975
+ input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
976
+ input_sql: str | None = None,
847
977
  output: Sequence[FeatureReference] = (),
848
978
  now: Optional[Sequence[dt.datetime]] = None,
849
979
  staleness: Optional[Mapping[FeatureReference, str]] = None,
@@ -864,8 +994,10 @@ class ChalkGRPCClient:
864
994
  query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
865
995
  ) -> Tuple[online_query_pb2.OnlineQueryBulkResponse, grpc.Call]:
866
996
  """Returns the raw GRPC response and metadata"""
997
+
867
998
  request = self._make_query_bulk_request(
868
999
  input=input,
1000
+ input_sql=input_sql,
869
1001
  output=output,
870
1002
  now=now or (),
871
1003
  staleness=staleness or {},
@@ -1054,7 +1186,9 @@ class ChalkGRPCClient:
1054
1186
 
1055
1187
  def _make_query_bulk_request(
1056
1188
  self,
1057
- input: Mapping[FeatureReference, Sequence[Any]] | DataFrame,
1189
+ *,
1190
+ input: Mapping[FeatureReference, Sequence[Any]] | DataFrame | None = None,
1191
+ input_sql: str | None = None,
1058
1192
  output: Sequence[FeatureReference],
1059
1193
  now: Sequence[dt.datetime],
1060
1194
  staleness: Mapping[FeatureReference, str],
@@ -1072,9 +1206,19 @@ class ChalkGRPCClient:
1072
1206
  planner_options: Mapping[str, str | int | bool],
1073
1207
  query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None,
1074
1208
  ) -> online_query_pb2.OnlineQueryBulkRequest:
1075
- inputs_bytes = get_features_feather_bytes(
1076
- input, self._INPUT_ENCODE_OPTIONS, compression=self._input_compression
1077
- )
1209
+ if input is None and input_sql is None:
1210
+ raise TypeError("One of `input` or `input_sql` is required")
1211
+ if input is not None and input_sql is not None:
1212
+ raise TypeError("`input` and `input_sql` are mutually exclusive")
1213
+
1214
+ inputs_feather: bytes | None
1215
+ if input is None:
1216
+ inputs_feather = None
1217
+ else:
1218
+ inputs_feather = get_features_feather_bytes(
1219
+ input, self._INPUT_ENCODE_OPTIONS, compression=self._input_compression
1220
+ )
1221
+
1078
1222
  encoded_outputs = encode_outputs(output)
1079
1223
  outputs = encoded_outputs.string_outputs
1080
1224
  # Currently assume every feature tag is just a fqn instead of a more complex expr.
@@ -1103,7 +1247,8 @@ class ChalkGRPCClient:
1103
1247
  query_context = _validate_context_dict(query_context)
1104
1248
  query_context_proto = {k: value_to_proto(v) for k, v in query_context.items()} if query_context else None
1105
1249
  return online_query_pb2.OnlineQueryBulkRequest(
1106
- inputs_feather=inputs_bytes,
1250
+ inputs_feather=inputs_feather,
1251
+ inputs_sql=input_sql,
1107
1252
  outputs=[online_query_pb2.OutputExpr(feature_fqn=o) for o in outputs]
1108
1253
  + [online_query_pb2.OutputExpr(feature_expression=o) for o in encoded_outputs.feature_expressions_proto],
1109
1254
  now=now_proto,
@@ -1131,6 +1276,101 @@ class ChalkGRPCClient:
1131
1276
  body_type=online_query_pb2.FEATHER_BODY_TYPE_RECORD_BATCHES,
1132
1277
  )
1133
1278
 
1279
+ def run_scheduled_query(
1280
+ self,
1281
+ name: str,
1282
+ planner_options: Optional[Mapping[str, Any]],
1283
+ incremental_resolvers: Optional[Sequence[str]],
1284
+ max_samples: Optional[int],
1285
+ env_overrides: Optional[Mapping[str, str]],
1286
+ ) -> ManualTriggerScheduledQueryResponseDataclass:
1287
+ """
1288
+ Manually trigger a scheduled query request.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ name
1293
+ The name of the scheduled query to be triggered.
1294
+ incremental_resolvers
1295
+ If set to None, Chalk will incrementalize resolvers in the query's root namespaces.
1296
+ If set to a list of resolvers, this set will be used for incrementalization.
1297
+ Incremental resolvers must return a feature time in its output, and must return a `DataFrame`.
1298
+ Most commonly, this will be the name of a SQL file resolver. Chalk will ingest all new data
1299
+ from these resolvers and propagate changes to values in the root namespace.
1300
+ max_samples
1301
+ The maximum number of samples to compute.
1302
+ env_overrides:
1303
+ A dictionary of environment values to override during this specific triggered query.
1304
+
1305
+ Other Parameters
1306
+ ----------------
1307
+ planner_options
1308
+ A dictionary of options to pass to the planner.
1309
+ These are typically provided by Chalk Support for specific use cases.
1310
+
1311
+ Returns
1312
+ -------
1313
+ ManualTriggerScheduledQueryResponse
1314
+ A response message containing metadata around the triggered run.
1315
+
1316
+ Examples
1317
+ --------
1318
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
1319
+ >>> ChalkGRPCClient().run_scheduled_query(
1320
+ ... name="my_scheduled_query",
1321
+ ... )
1322
+ """
1323
+ proto_resp = self._stub_refresher.call_scheduled_query_stub(
1324
+ lambda x: x.ManualTriggerScheduledQuery(
1325
+ request=ManualTriggerScheduledQueryRequest(
1326
+ cron_query_name=name,
1327
+ planner_options=planner_options or {},
1328
+ incremental_resolvers=incremental_resolvers or (),
1329
+ max_samples=max_samples,
1330
+ env_overrides=env_overrides or {},
1331
+ ),
1332
+ )
1333
+ )
1334
+ return ManualTriggerScheduledQueryResponseDataclass.from_proto(proto_resp)
1335
+
1336
+ def get_scheduled_query_run_history(
1337
+ self,
1338
+ name: str,
1339
+ limit: int = 10,
1340
+ ) -> List[ScheduledQueryRun]:
1341
+ """
1342
+ Get the run history for a scheduled query.
1343
+
1344
+ Parameters
1345
+ ----------
1346
+ name
1347
+ The name of the scheduled query.
1348
+ limit
1349
+ The maximum number of runs to return. Defaults to 10.
1350
+
1351
+ Returns
1352
+ -------
1353
+ list[ScheduledQueryRun]
1354
+ A response message containing the list of scheduled query runs.
1355
+
1356
+ Examples
1357
+ --------
1358
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
1359
+ >>> ChalkGRPCClient().get_scheduled_query_run_history(
1360
+ ... name="my_scheduled_query",
1361
+ ... limit=20,
1362
+ ... )
1363
+ """
1364
+ proto_resp = self._stub_refresher.call_scheduled_query_run_stub(
1365
+ lambda x: x.GetScheduledQueryRuns(
1366
+ GetScheduledQueryRunsRequest(
1367
+ cron_name=name,
1368
+ limit=limit,
1369
+ )
1370
+ )
1371
+ )
1372
+ return [ScheduledQueryRun.from_proto(run) for run in proto_resp.runs]
1373
+
1134
1374
  def get_graph(self, deployment: DeploymentId | None = None) -> Graph:
1135
1375
  """Get the graph for a given deployment.
1136
1376
 
@@ -1410,6 +1650,7 @@ class ChalkGRPCClient:
1410
1650
  self,
1411
1651
  name: str,
1412
1652
  model_type: Optional[ModelType] = None,
1653
+ model_class: Optional[ModelClass] = None,
1413
1654
  model_encoding: Optional[ModelEncoding] = None,
1414
1655
  aliases: Optional[List[str]] = None,
1415
1656
  model: Optional[Any] = None,
@@ -1423,53 +1664,53 @@ class ChalkGRPCClient:
1423
1664
  source_config: Optional[SourceConfig] = None,
1424
1665
  dependencies: Optional[List[str]] = None,
1425
1666
  ) -> RegisterModelVersionResponse:
1426
- """
1427
- Register a model in the Chalk model registry.
1667
+ """Register a model in the Chalk model registry.
1428
1668
 
1429
1669
  Parameters
1430
1670
  ----------
1431
- name : str
1432
- Unique name for the model
1433
- aliases : list of str, optional
1434
- List of version aliases (e.g., ["v1.0", "latest"])
1435
- model : object, optional
1436
- Python model object (for object-based registration)
1437
- model_paths : list of str, optional
1438
- Paths to model files (for file-based registration)
1439
- additional_files : List[str], optional
1440
- Additional files needed for inference (tokenizers, configs, etc.)
1441
- model_type : ModelType, optional
1442
- Type of model framework
1443
- model_encoding : ModelEncoding, optional
1444
- Serialization format
1445
- input_schema : dict, list, or Any
1446
- Definition of the input schema. Can be:
1447
- - dict: Dictionary mapping column names to dtypes for tabular data
1448
- - list: List of (shape, dtype) tuples for tensor data
1449
- output_schema : dict, list, or Any
1450
- Definition of the output schema. Can be:
1451
- - dict: Dictionary mapping column names to dtypes for tabular data
1452
- - list: List of (shape, dtype) tuples for tensor data
1453
- metadata : dict, optional
1454
- Additional metadata dictionary containing framework info,
1455
- training details, performance metrics, etc.
1456
- input_features : FeatureReference, str, optional
1671
+ name
1672
+ Unique name for the model.
1673
+ aliases
1674
+ List of version aliases (e.g., `["v1.0", "latest"]`).
1675
+ model
1676
+ Python model object (for object-based registration).
1677
+ model_paths
1678
+ Paths to model files (for file-based registration).
1679
+ additional_files
1680
+ Additional files needed for inference (tokenizers, configs, etc.)
1681
+ model_type
1682
+ Type of model framework.
1683
+ model_encoding
1684
+ Serialization format.
1685
+ input_schema
1686
+ Definition of the input schema. Can be:
1687
+ - `dict`: Dictionary mapping column names to dtypes for tabular data
1688
+ - `list`: List of `(shape, dtype)` tuples for tensor data
1689
+ output_schema
1690
+ Definition of the output schema. Can be:
1691
+ - `dict`: Dictionary mapping column names to dtypes for tabular data
1692
+ - `list`: List of `(shape, dtype)` tuples for tensor data
1693
+ metadata
1694
+ Additional metadata dictionary containing framework info,
1695
+ training details, performance metrics, etc.
1696
+ input_features
1457
1697
  The features to be used as inputs to the model.
1458
1698
  For example, `[User.message]`. Features can also be expressed as snakecased strings,
1459
- e.g. `["user.message"]`
1460
- output_features : FeatureReference, str, optional
1699
+ e.g. `["user.message"]`.
1700
+ output_features
1461
1701
  The features to be used as outputs to the model.
1462
1702
  For example, `[User.is_spam]`. Features can also be expressed as snakecased strings,
1463
- e.g. `["user.is_spam"]`
1464
- source_config : LocalSourceConfig, S3SourceConfig, HFSourceConfig, optional
1703
+ e.g. `["user.is_spam"]`.
1704
+ source_config
1465
1705
  Config to pass credentials to access files from a remote source.
1466
- dependencies : List[str], optional
1706
+ dependencies
1467
1707
  List of package dependencies needed to run this model.
1468
- e.g. ["torch==2.7.1", "numpy==1.26.4"]
1708
+ e.g. `["torch==2.7.1", "numpy==1.26.4"]`.
1709
+
1469
1710
  Returns
1470
1711
  -------
1471
1712
  ModelVersion
1472
- The registered model version object
1713
+ The registered model version object.
1473
1714
 
1474
1715
  Examples
1475
1716
  --------
@@ -1507,6 +1748,9 @@ class ChalkGRPCClient:
1507
1748
  if model_type is None:
1508
1749
  model_type = model_serializer.model_type
1509
1750
 
1751
+ if model_class is None:
1752
+ model_class = model_serializer.model_class
1753
+
1510
1754
  if model is not None:
1511
1755
  inferred_input_schema, inferred_output_schema = model_serializer.infer_input_output_schemas(
1512
1756
  model, model_type
@@ -1547,9 +1791,29 @@ class ChalkGRPCClient:
1547
1791
  "Failed to register model. Please specify a model encoding if using model_paths."
1548
1792
  )
1549
1793
 
1794
+ # Auto-convert ONNX list schemas to dict format if needed
1795
+ if model_type == ModelType.ONNX:
1796
+ input_schema = model_serializer.convert_onnx_list_schema_to_dict(input_schema, model, is_input=True)
1797
+ output_schema = model_serializer.convert_onnx_list_schema_to_dict(
1798
+ output_schema, model, is_input=False
1799
+ )
1800
+
1550
1801
  input_model_schema = model_serializer.convert_schema(input_schema)
1551
1802
  output_model_schema = model_serializer.convert_schema(output_schema)
1552
1803
 
1804
+ # Final validation: ONNX models must use tabular schemas
1805
+ if model_type == ModelType.ONNX:
1806
+ if input_model_schema is not None and not input_model_schema.HasField("tabular"):
1807
+ raise ValueError(
1808
+ "ONNX models must be registered with tabular input schema (dict format). "
1809
+ + "Use dict format like {'input': Tensor[...]} instead of list format."
1810
+ )
1811
+ if output_model_schema is not None and not output_model_schema.HasField("tabular"):
1812
+ raise ValueError(
1813
+ "ONNX models must be registered with tabular output schema (dict format). "
1814
+ + "Use dict format like {'output': Vector[...]} instead of list format."
1815
+ )
1816
+
1553
1817
  all_files_to_process, model_file_names = model_file_uploader.prepare_file_mapping(
1554
1818
  model_paths, additional_files
1555
1819
  )
@@ -1579,6 +1843,7 @@ class ChalkGRPCClient:
1579
1843
  for file in additional_files_upload_paths
1580
1844
  ],
1581
1845
  model_type=model_type,
1846
+ model_class=model_class,
1582
1847
  model_encoding=model_encoding,
1583
1848
  model_signature=_model_artifact_pb2.ModelSignature(
1584
1849
  inputs=input_model_schema,
@@ -1691,6 +1956,7 @@ class ChalkGRPCClient:
1691
1956
  name: str,
1692
1957
  model_artifact_id: Optional[str] = None,
1693
1958
  run_id: Optional[str] = None,
1959
+ run_name: Optional[str] = None,
1694
1960
  criterion: Optional[ModelRunCriterion] = None,
1695
1961
  aliases: Optional[List[str]] = None,
1696
1962
  ) -> RegisterModelVersionResponse:
@@ -1705,6 +1971,8 @@ class ChalkGRPCClient:
1705
1971
  Artifact UUID to promote to a model version.
1706
1972
  run_id: str, optional
1707
1973
  run id that produce the artifact to promote.
1974
+ run_name: str, optional
1975
+ run name used in the checkpointer for artifact to promote.
1708
1976
  criterion: ModelRunCriterion, optional
1709
1977
  criterion on which to select the artifact from the training run.
1710
1978
  If none provided, the latest artifact in the run will be selected.
@@ -1722,11 +1990,15 @@ class ChalkGRPCClient:
1722
1990
  ... )
1723
1991
  """
1724
1992
  if model_artifact_id is not None:
1725
- if run_id is not None or criterion is not None:
1726
- raise ValueError("Please specify only one of 'model_artifact_id' or (run_name, run criterion)")
1993
+ if run_id is not None or criterion is not None or run_name is not None:
1994
+ raise ValueError(
1995
+ "Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
1996
+ )
1727
1997
  else:
1728
- if run_id is None and criterion is None:
1729
- raise ValueError("Please specify at least one of 'model_artifact_id' or (run_name, run criterion)")
1998
+ if run_name is None and run_id is None:
1999
+ raise ValueError(
2000
+ "Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
2001
+ )
1730
2002
 
1731
2003
  try:
1732
2004
  resp: CreateModelVersionFromArtifactResponse = self._stub_refresher.call_model_stub(
@@ -1735,7 +2007,8 @@ class ChalkGRPCClient:
1735
2007
  model_name=name,
1736
2008
  model_artifact_id=model_artifact_id,
1737
2009
  training_run=ModelSerializer.convert_run_criterion_to_proto(
1738
- run_name=run_id,
2010
+ run_id=run_id,
2011
+ run_name=run_name,
1739
2012
  criterion=criterion,
1740
2013
  ),
1741
2014
  aliases=aliases,
@@ -1756,19 +2029,46 @@ class ChalkGRPCClient:
1756
2029
 
1757
2030
  def create_model_training_job(
1758
2031
  self,
1759
- train_fn: Callable[[Optional[Mapping[str, Any]]], bool],
1760
- model_name: str,
1761
- dataset_name: str,
1762
- config: Optional[Mapping[str, Any]] = None,
2032
+ script: str,
2033
+ function_name: str,
2034
+ experiment_name: str,
2035
+ branch: Optional[str] = None,
2036
+ config: str | None = None,
1763
2037
  resources: Optional[ResourceRequests] = None,
1764
- ) -> CreateModelTrainingJobResponse:
1765
- return self._stub_refresher.call_offline_query_stub(
1766
- lambda x: x.CreateModelTrainingJob(
1767
- CreateModelTrainingJobRequest(
1768
- training_job_request=offline_query_pb2.OfflineQueryRequest(
1769
- dataset_name=dataset_name,
1770
- )
1771
- )
2038
+ env_overrides: Optional[Mapping[str, str]] = None,
2039
+ enable_profiling: bool = False,
2040
+ max_retries: int = 0,
2041
+ ) -> CreateScriptTaskResponse:
2042
+ resources_request = {}
2043
+ if resources is not None:
2044
+ if resources.cpu is not None:
2045
+ resources_request["cpu"] = resources.cpu
2046
+ if resources.memory is not None:
2047
+ resources_request["memory"] = resources.memory
2048
+
2049
+ return self._stub_refresher.call_task_stub(
2050
+ lambda x: x.CreateScriptTask(
2051
+ CreateScriptTaskRequest(
2052
+ request=ScriptTaskRequest(
2053
+ function_reference_type="file",
2054
+ # Hardcoded script name
2055
+ function_reference=f"train.py::{function_name}",
2056
+ kind=ScriptTaskKind.SCRIPT_TASK_KIND_TRAINING_RUN,
2057
+ training_run=TrainingRunArgs(
2058
+ experiment_name=experiment_name,
2059
+ ),
2060
+ arguments_json=config,
2061
+ branch=branch,
2062
+ resource_requests=resources_pb2.ResourceRequirements(
2063
+ requests=resources_request,
2064
+ ),
2065
+ resource_group=resources.resource_group if resources is not None else None,
2066
+ env_overrides=env_overrides,
2067
+ enable_profiling=enable_profiling,
2068
+ max_retries=max_retries,
2069
+ ),
2070
+ source_file=script.encode("utf-8"),
2071
+ ),
1772
2072
  )
1773
2073
  )
1774
2074
 
@@ -1820,3 +2120,329 @@ class ChalkGRPCClient:
1820
2120
  f"Branch server did not start within {timeout_seconds} seconds. Last state: {BranchScalingState.Name(response.state)}"
1821
2121
  )
1822
2122
  time.sleep(poll_interval_seconds)
2123
+
2124
+ def get_job_queue_operation_summary(
2125
+ self,
2126
+ operation_id: str,
2127
+ environment_id: str | None = None,
2128
+ limit: int | None = None,
2129
+ offset: int | None = None,
2130
+ ) -> GetJobQueueOperationSummaryResponse:
2131
+ """Get summary information for a job queue operation.
2132
+
2133
+ Parameters
2134
+ ----------
2135
+ operation_id
2136
+ The ID of the operation to get summary for
2137
+ environment_id
2138
+ The environment ID. If None, uses the client's environment.
2139
+ limit
2140
+ Maximum number of job rows to return. Defaults to 10000.
2141
+ offset
2142
+ Offset for pagination. Defaults to 0.
2143
+
2144
+ Returns
2145
+ -------
2146
+ GetJobQueueOperationSummaryResponse
2147
+ The operation summary response containing job queue information.
2148
+
2149
+ Examples
2150
+ --------
2151
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
2152
+ >>> client = ChalkGRPCClient()
2153
+ >>> response = client.get_job_queue_operation_summary(operation_id="op_123")
2154
+ """
2155
+ env_id = environment_id or self._stub_refresher.environment_id
2156
+ if not env_id:
2157
+ raise ValueError("No environment specified")
2158
+
2159
+ request = GetJobQueueOperationSummaryRequest(
2160
+ operation_id=operation_id,
2161
+ environment_id=env_id,
2162
+ )
2163
+
2164
+ if limit is not None:
2165
+ request.limit = limit
2166
+ if offset is not None:
2167
+ request.offset = offset
2168
+
2169
+ return self._stub_refresher.call_job_queue_stub(lambda x: x.GetJobQueueOperationSummary(request))
2170
+
2171
+ def follow_model_training_job(
2172
+ self,
2173
+ operation_id: str,
2174
+ poll_interval: float = 2.0,
2175
+ output_callback: Optional[Callable[[str, str], None]] = None,
2176
+ ) -> None:
2177
+ """Follow a model training job, displaying both status and logs.
2178
+
2179
+ This method polls the job queue for status updates while also following logs
2180
+ in real-time. It continues until the job reaches a terminal state (completed,
2181
+ failed, or canceled).
2182
+
2183
+ Parameters
2184
+ ----------
2185
+ operation_id
2186
+ The operation ID of the model training job
2187
+ poll_interval
2188
+ Time in seconds between polling for status and logs. Defaults to 2.0 seconds.
2189
+ output_callback
2190
+ Optional callback function that receives (timestamp, message) for each log entry.
2191
+ If None, logs are displayed using Rich live display.
2192
+
2193
+ Examples
2194
+ --------
2195
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
2196
+ >>> client = ChalkGRPCClient()
2197
+ >>> client.follow_model_training_job(operation_id="op_123")
2198
+ """
2199
+ from chalk.utils.job_log_display import JobLogDisplay
2200
+
2201
+ # Create display manager
2202
+ display = JobLogDisplay(title="Model Training Jobs")
2203
+
2204
+ # Define callback for status polling
2205
+ def get_status_callback():
2206
+ return self.get_job_queue_operation_summary(operation_id=operation_id)
2207
+
2208
+ # Get log stub and construct log query
2209
+ log_query = f'operation_id:"{operation_id}"'
2210
+ log_stub = self._stub_refresher.log_stub
2211
+
2212
+ # Delegate to the display manager to handle all threading and coordination
2213
+ display.follow_job(
2214
+ get_status_callback=get_status_callback,
2215
+ log_stub=log_stub,
2216
+ log_query=log_query,
2217
+ poll_interval=poll_interval,
2218
+ output_callback=output_callback,
2219
+ )
2220
+
2221
+ def test_streaming_resolver(
2222
+ self,
2223
+ resolver: str | Resolver,
2224
+ message_bodies: "list[str | bytes | BaseModel] | None" = None,
2225
+ message_keys: list[str | None] | None = None,
2226
+ message_timestamps: list[str | dt.datetime] | None = None,
2227
+ message_filepath: str | None = None,
2228
+ request_timeout: Optional[float] = None,
2229
+ ) -> "StreamResolverTestResponse":
2230
+ """Test a streaming resolver with supplied messages.
2231
+
2232
+ This method tests streaming resolvers using the gRPC TestStreamingResolver endpoint.
2233
+ It supports both deployed resolvers (by FQN) and static/undeployed resolvers
2234
+ (automatically serialized from Resolver objects).
2235
+
2236
+ Parameters
2237
+ ----------
2238
+ resolver : str | Resolver
2239
+ The streaming resolver or its string name. If a StreamResolver object with
2240
+ feature_expressions is provided, it will be automatically serialized for testing.
2241
+ message_bodies : list[str | bytes | BaseModel], optional
2242
+ The message bodies to process. Can be JSON strings, raw bytes,
2243
+ or Pydantic models (will be serialized to JSON).
2244
+ Either message_bodies or message_filepath must be provided.
2245
+ message_keys : list[str | None], optional
2246
+ Optional keys for each message. If not provided, all keys will be None.
2247
+ Must match length of message_bodies if provided.
2248
+ message_timestamps : list[str | datetime], optional
2249
+ Optional timestamps for each message. If not provided, current time
2250
+ will be used. Must match length of message_bodies if provided.
2251
+ message_filepath : str, optional
2252
+ A filepath from which test messages will be ingested.
2253
+ This file should be newline delimited JSON with format:
2254
+ {"message_key": "my-key", "message_body": {"field1": "value1"}}
2255
+ Each line may optionally contain a "message_timestamp" field.
2256
+ Either message_bodies or message_filepath must be provided.
2257
+ request_timeout : float, optional
2258
+ Request timeout in seconds.
2259
+
2260
+ Returns
2261
+ -------
2262
+ StreamResolverTestResponse
2263
+ Response containing:
2264
+ - status: SUCCESS or FAILURE
2265
+ - data_uri: Optional signed URL to parquet file with results
2266
+ - errors: List of ChalkError objects
2267
+ - message: Human-readable message
2268
+
2269
+ Examples
2270
+ --------
2271
+ >>> from chalk.client.client_grpc import ChalkGRPCClient
2272
+ >>> client = ChalkGRPCClient()
2273
+ >>> response = client.test_streaming_resolver(
2274
+ ... resolver="my_module.my_stream_resolver",
2275
+ ... message_bodies=[
2276
+ ... '{"user_id": 1, "event": "login"}',
2277
+ ... '{"user_id": 2, "event": "logout"}',
2278
+ ... ],
2279
+ ... message_keys=["user_1", "user_2"],
2280
+ ... )
2281
+ >>> print(f"Status: {response.status}")
2282
+ >>> if response.data_uri:
2283
+ ... print(f"Results at: {response.data_uri}")
2284
+ """
2285
+ import base64
2286
+ import json
2287
+ from uuid import uuid4
2288
+
2289
+ import pyarrow as pa
2290
+
2291
+ from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TestStreamingResolverRequest
2292
+ from chalk.utils.pydanticutil.pydantic_compat import get_pydantic_model_json, is_pydantic_basemodel_instance
2293
+
2294
+ # Determine if resolver is static and needs serialization
2295
+ resolver_fqn: str | None = None
2296
+ static_stream_resolver_b64: str | None = None
2297
+
2298
+ if isinstance(resolver, str):
2299
+ resolver_fqn = resolver
2300
+ else:
2301
+ from chalk.features.resolver import StreamResolver
2302
+
2303
+ resolver_fqn = resolver.fqn
2304
+
2305
+ if isinstance(resolver, StreamResolver) and resolver.feature_expressions:
2306
+ from chalk.parsed.to_proto import ToProtoConverter
2307
+
2308
+ proto_resolver = ToProtoConverter.convert_stream_resolver(resolver)
2309
+ static_stream_resolver_b64 = base64.b64encode(
2310
+ proto_resolver.SerializeToString(deterministic=True)
2311
+ ).decode("utf-8")
2312
+
2313
+ # Load from file if provided
2314
+ if message_filepath is not None:
2315
+ if message_bodies is not None:
2316
+ raise ValueError("Cannot provide both message_filepath and message_bodies")
2317
+
2318
+ loaded_bodies: list[Any] = []
2319
+ loaded_keys: list[str | None] = []
2320
+ loaded_timestamps: list[str | None] = []
2321
+
2322
+ with open(message_filepath, "r") as f:
2323
+ for line in f:
2324
+ line = line.strip()
2325
+ if not line:
2326
+ continue
2327
+ msg = json.loads(line)
2328
+ loaded_bodies.append(msg.get("message_body", msg))
2329
+ loaded_keys.append(msg.get("message_key"))
2330
+ loaded_timestamps.append(msg.get("message_timestamp"))
2331
+
2332
+ message_bodies = loaded_bodies
2333
+ if message_keys is None and any(k is not None for k in loaded_keys):
2334
+ message_keys = loaded_keys
2335
+ if message_timestamps is None and any(t is not None for t in loaded_timestamps):
2336
+ # Cast needed: loaded_timestamps is list[str | None] from JSON,
2337
+ # but message_timestamps is list[str | datetime] - strings will be parsed later
2338
+ message_timestamps = typing.cast(list[str | dt.datetime], loaded_timestamps)
2339
+
2340
+ # Validate inputs
2341
+ if message_bodies is None:
2342
+ raise ValueError("Either message_bodies or message_filepath must be provided")
2343
+
2344
+ num_messages = len(message_bodies)
2345
+ if num_messages == 0:
2346
+ raise ValueError("message_bodies cannot be empty")
2347
+
2348
+ if message_keys is not None and len(message_keys) != num_messages:
2349
+ raise ValueError(
2350
+ f"message_keys length ({len(message_keys)}) must match message_bodies length ({num_messages})"
2351
+ )
2352
+
2353
+ if message_timestamps is not None and len(message_timestamps) != num_messages:
2354
+ raise ValueError(
2355
+ f"message_timestamps length ({len(message_timestamps)}) must match message_bodies length ({num_messages})"
2356
+ )
2357
+
2358
+ # Generate defaults
2359
+ message_ids = [str(uuid4()) for _ in range(num_messages)]
2360
+
2361
+ if message_keys is None:
2362
+ message_keys = typing.cast(list[str | None], [None] * num_messages)
2363
+
2364
+ if message_timestamps is None:
2365
+ message_timestamps = typing.cast(list[str | dt.datetime], [dt.datetime.now()] * num_messages)
2366
+
2367
+ # Convert message bodies to bytes
2368
+ processed_bodies: list[bytes] = []
2369
+ for body in message_bodies:
2370
+ if isinstance(body, bytes):
2371
+ processed_bodies.append(body)
2372
+ elif isinstance(body, str):
2373
+ processed_bodies.append(body.encode("utf-8"))
2374
+ elif is_pydantic_basemodel_instance(body):
2375
+ # Use utility function that handles both Pydantic v1 and v2
2376
+ processed_bodies.append(get_pydantic_model_json(body).encode("utf-8"))
2377
+ else:
2378
+ # Try JSON serialization for dict-like objects
2379
+ processed_bodies.append(json.dumps(body).encode("utf-8"))
2380
+
2381
+ # Convert timestamps to unix timestamps in milliseconds (int64)
2382
+ # At this point message_timestamps is guaranteed to be non-None due to the default assignment above
2383
+ assert message_timestamps is not None
2384
+ processed_timestamps: list[int] = []
2385
+ for ts in message_timestamps:
2386
+ if isinstance(ts, str):
2387
+ # Parse ISO format string
2388
+ parsed = dt.datetime.fromisoformat(ts.replace("Z", "+00:00"))
2389
+ processed_timestamps.append(int(parsed.timestamp() * 1000)) # milliseconds
2390
+ else:
2391
+ # Type narrowing: ts must be dt.datetime here
2392
+ processed_timestamps.append(int(ts.timestamp() * 1000)) # milliseconds
2393
+
2394
+ # Create Arrow table
2395
+ table = pa.table(
2396
+ {
2397
+ "message_id": message_ids,
2398
+ "message_key": message_keys,
2399
+ "message_data": processed_bodies,
2400
+ "publish_timestamp": processed_timestamps,
2401
+ }
2402
+ )
2403
+
2404
+ # Serialize to Arrow IPC format
2405
+ sink = pa.BufferOutputStream()
2406
+ with pa.ipc.new_stream(sink, table.schema) as writer:
2407
+ writer.write_table(table)
2408
+ input_data = sink.getvalue().to_pybytes()
2409
+
2410
+ # Create gRPC request
2411
+ request = TestStreamingResolverRequest(
2412
+ resolver_fqn=resolver_fqn or "",
2413
+ input_data=input_data,
2414
+ operation_id=None,
2415
+ debug=True,
2416
+ )
2417
+
2418
+ if static_stream_resolver_b64:
2419
+ request.static_stream_resolver_b64 = static_stream_resolver_b64
2420
+
2421
+ # Call new TestStreamingResolver endpoint
2422
+ proto_response = self._stub_refresher.call_streaming_stub(
2423
+ lambda x: x.TestStreamingResolver(
2424
+ request,
2425
+ timeout=request_timeout,
2426
+ )
2427
+ )
2428
+
2429
+ # Convert proto response to StreamResolverTestResponse
2430
+ from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TEST_STREAM_RESOLVER_STATUS_SUCCESS
2431
+
2432
+ status = (
2433
+ StreamResolverTestStatus.SUCCESS
2434
+ if proto_response.status == TEST_STREAM_RESOLVER_STATUS_SUCCESS
2435
+ else StreamResolverTestStatus.FAILURE
2436
+ )
2437
+
2438
+ # Convert proto errors to ChalkError objects
2439
+ errors_list: list[ChalkError] = []
2440
+ if proto_response.errors:
2441
+ errors_list = [ChalkErrorConverter.chalk_error_decode(err) for err in proto_response.errors]
2442
+
2443
+ return StreamResolverTestResponse(
2444
+ status=status,
2445
+ data_uri=proto_response.data_uri if proto_response.HasField("data_uri") else None,
2446
+ errors=errors_list if errors_list else None,
2447
+ message=proto_response.message if proto_response.message else None,
2448
+ )