chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/importer.py CHANGED
@@ -17,8 +17,9 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Ty
17
17
 
18
18
  import pyarrow as pa
19
19
 
20
+ import chalk.functions as F
20
21
  from chalk._lsp.error_builder import DiagnosticBuilder, LSPErrorBuilder
21
- from chalk.features import Feature, Features, FeatureSetBase, Filter, unwrap_feature
22
+ from chalk.features import Feature, Features, FeatureSetBase, Filter, Vector, unwrap_feature
22
23
  from chalk.features.feature_field import WindowConfigResolved
23
24
  from chalk.features.pseudofeatures import Now
24
25
 
@@ -101,6 +102,8 @@ supported_aggs = (
101
102
  "sum",
102
103
  "var",
103
104
  "var_sample",
105
+ "vector_sum",
106
+ "vector_mean",
104
107
  )
105
108
 
106
109
 
@@ -169,7 +172,16 @@ def _check_types(
169
172
  return
170
173
 
171
174
  joined_annotation = joined_feature.typ.parsed_annotation
172
- if aggregation not in {"count", "approx_count_distinct", "approx_top_k", "min_by_n", "max_by_n", "array_agg"}:
175
+ if aggregation not in {
176
+ "count",
177
+ "approx_count_distinct",
178
+ "approx_top_k",
179
+ "min_by_n",
180
+ "max_by_n",
181
+ "array_agg",
182
+ "vector_sum",
183
+ "vector_mean",
184
+ }:
173
185
  _validate_types(
174
186
  annotation=joined_annotation,
175
187
  permitted_types=(int, float),
@@ -195,7 +207,6 @@ def _check_types(
195
207
  joined=False,
196
208
  feature_name=feature_name,
197
209
  )
198
-
199
210
  elif aggregation == "min" or aggregation == "max":
200
211
  if _get_underlying_type(this_annotation, feature_name) != _get_underlying_type(
201
212
  joined_annotation, joined_feature.name
@@ -293,6 +304,19 @@ def _parse_agg_function_call(expr: Underscore | None) -> Tuple[str, Underscore,
293
304
  f"expecting 'int' type argument for 'k', but received arg of type '{type(call_expr._chalk__kwargs.get('k'))}'"
294
305
  )
295
306
  opts = FrozenOrderedSet(call_expr._chalk__kwargs.items())
307
+ elif aggregation == "approx_percentile":
308
+ if len(call_expr._chalk__args) > 0:
309
+ raise ChalkParseError("should not have any positional arguments")
310
+ elif {"quantile"} != call_expr._chalk__kwargs.keys():
311
+ raise ChalkParseError("expecting exactly one required keyword argument 'quantile'")
312
+ elif not isinstance(call_expr._chalk__kwargs.get("quantile"), float):
313
+ raise ChalkParseError(
314
+ f"expecting 'float' type argument for 'quantile', but received arg of type '{type(call_expr._chalk__kwargs.get('quantile'))}'"
315
+ )
316
+ # TODO: expand proto definition to accept kwargs that are not necessarily `k`
317
+ quantile = call_expr._chalk__kwargs["quantile"]
318
+ nano_quantile = int(round(quantile * 1_000_000_000))
319
+ opts = FrozenOrderedSet([("k", nano_quantile)])
296
320
  elif aggregation in ("min_by_n", "max_by_n"):
297
321
  if len(call_expr._chalk__kwargs) > 0:
298
322
  raise ChalkParseError("should not have any keyword arguments")
@@ -422,8 +446,6 @@ def run_post_import_fixups():
422
446
  # "1m", "2m", materialization={...},
423
447
  # expression=_.transactions[_.amount].sum(),
424
448
  # )
425
- assert f.underscore_expression is not None
426
- assert f.window_materialization is not None
427
449
 
428
450
  try:
429
451
  f.window_materialization_parsed = parse_windowed_materialization(f=f)
@@ -561,39 +583,51 @@ def parse_grouped_window(f: Feature) -> WindowConfigResolved:
561
583
  aggregation_kwargs=aggregation_kwargs,
562
584
  pyarrow_dtype=pyarrow_dtype,
563
585
  filters=parsed_filters,
564
- backfill_resolver=_try_parse_resolver_fqn(
565
- "backfill_resolver",
566
- f.window_materialization.get("backfill_resolver", None),
567
- )
568
- if isinstance(f.window_materialization, dict)
569
- else None,
570
- backfill_schedule=f.window_materialization.get("backfill_schedule", None)
571
- if isinstance(f.window_materialization, dict)
572
- else None,
573
- backfill_lookback_duration_seconds=_try_parse_duration(
574
- "backfill_lookback_duration",
575
- f.window_materialization.get("backfill_lookback_duration", None),
576
- )
577
- if isinstance(f.window_materialization, dict)
578
- else None,
579
- backfill_start_time=_try_parse_datetime(
580
- "backfill_start_time",
581
- f.window_materialization.get("backfill_start_time", None),
582
- )
583
- if isinstance(f.window_materialization, dict)
584
- else None,
585
- continuous_resolver=_try_parse_resolver_fqn(
586
- "continuous_resolver",
587
- f.window_materialization.get("continuous_resolver", None),
588
- )
589
- if isinstance(f.window_materialization, dict)
590
- else None,
591
- continuous_buffer_duration_seconds=_try_parse_duration(
592
- "continuous_buffer_duration",
593
- f.window_materialization.get("continuous_buffer_duration", None),
594
- )
595
- if isinstance(f.window_materialization, dict)
596
- else None,
586
+ backfill_resolver=(
587
+ _try_parse_resolver_fqn(
588
+ "backfill_resolver",
589
+ f.window_materialization.get("backfill_resolver", None),
590
+ )
591
+ if isinstance(f.window_materialization, dict)
592
+ else None
593
+ ),
594
+ backfill_schedule=(
595
+ f.window_materialization.get("backfill_schedule", None)
596
+ if isinstance(f.window_materialization, dict)
597
+ else None
598
+ ),
599
+ backfill_lookback_duration_seconds=(
600
+ _try_parse_duration(
601
+ "backfill_lookback_duration",
602
+ f.window_materialization.get("backfill_lookback_duration", None),
603
+ )
604
+ if isinstance(f.window_materialization, dict)
605
+ else None
606
+ ),
607
+ backfill_start_time=(
608
+ _try_parse_datetime(
609
+ "backfill_start_time",
610
+ f.window_materialization.get("backfill_start_time", None),
611
+ )
612
+ if isinstance(f.window_materialization, dict)
613
+ else None
614
+ ),
615
+ continuous_resolver=(
616
+ _try_parse_resolver_fqn(
617
+ "continuous_resolver",
618
+ f.window_materialization.get("continuous_resolver", None),
619
+ )
620
+ if isinstance(f.window_materialization, dict)
621
+ else None
622
+ ),
623
+ continuous_buffer_duration_seconds=(
624
+ _try_parse_duration(
625
+ "continuous_buffer_duration",
626
+ f.window_materialization.get("continuous_buffer_duration", None),
627
+ )
628
+ if isinstance(f.window_materialization, dict)
629
+ else None
630
+ ),
597
631
  )
598
632
 
599
633
  return cfg
@@ -710,6 +744,14 @@ def parse_windowed_materialization(f: Feature) -> WindowConfigResolved | None:
710
744
  aggregated_feature_name=aggregated_value,
711
745
  )
712
746
 
747
+ if aggregation == "sum" or aggregation == "mean":
748
+ try:
749
+ if issubclass(f.typ.parsed_annotation, Vector):
750
+ aggregation = f"vector_{aggregation}"
751
+ except TypeError:
752
+ # Not a class so not a Vector, skip
753
+ pass
754
+
713
755
  _check_types(
714
756
  feature_name=f.window_stem,
715
757
  aggregation=aggregation,
@@ -781,39 +823,51 @@ def parse_windowed_materialization(f: Feature) -> WindowConfigResolved | None:
781
823
  aggregation_kwargs=aggregation_kwargs,
782
824
  pyarrow_dtype=f.converter.pyarrow_dtype,
783
825
  filters=parsed_filters,
784
- backfill_resolver=_try_parse_resolver_fqn(
785
- "backfill_resolver",
786
- f.window_materialization.get("backfill_resolver", None),
787
- )
788
- if isinstance(f.window_materialization, dict)
789
- else None,
790
- backfill_schedule=f.window_materialization.get("backfill_schedule", None)
791
- if isinstance(f.window_materialization, dict)
792
- else None,
793
- backfill_lookback_duration_seconds=_try_parse_duration(
794
- "backfill_lookback_duration",
795
- f.window_materialization.get("backfill_lookback_duration", None),
796
- )
797
- if isinstance(f.window_materialization, dict)
798
- else None,
799
- backfill_start_time=_try_parse_datetime(
800
- "backfill_start_time",
801
- f.window_materialization.get("backfill_start_time", None),
802
- )
803
- if isinstance(f.window_materialization, dict)
804
- else None,
805
- continuous_resolver=_try_parse_resolver_fqn(
806
- "continuous_resolver",
807
- f.window_materialization.get("continuous_resolver", None),
808
- )
809
- if isinstance(f.window_materialization, dict)
810
- else None,
811
- continuous_buffer_duration_seconds=_try_parse_duration(
812
- "continuous_buffer_duration",
813
- f.window_materialization.get("continuous_buffer_duration", None),
814
- )
815
- if isinstance(f.window_materialization, dict)
816
- else None,
826
+ backfill_resolver=(
827
+ _try_parse_resolver_fqn(
828
+ "backfill_resolver",
829
+ f.window_materialization.get("backfill_resolver", None),
830
+ )
831
+ if isinstance(f.window_materialization, dict)
832
+ else None
833
+ ),
834
+ backfill_schedule=(
835
+ f.window_materialization.get("backfill_schedule", None)
836
+ if isinstance(f.window_materialization, dict)
837
+ else None
838
+ ),
839
+ backfill_lookback_duration_seconds=(
840
+ _try_parse_duration(
841
+ "backfill_lookback_duration",
842
+ f.window_materialization.get("backfill_lookback_duration", None),
843
+ )
844
+ if isinstance(f.window_materialization, dict)
845
+ else None
846
+ ),
847
+ backfill_start_time=(
848
+ _try_parse_datetime(
849
+ "backfill_start_time",
850
+ f.window_materialization.get("backfill_start_time", None),
851
+ )
852
+ if isinstance(f.window_materialization, dict)
853
+ else None
854
+ ),
855
+ continuous_resolver=(
856
+ _try_parse_resolver_fqn(
857
+ "continuous_resolver",
858
+ f.window_materialization.get("continuous_resolver", None),
859
+ )
860
+ if isinstance(f.window_materialization, dict)
861
+ else None
862
+ ),
863
+ continuous_buffer_duration_seconds=(
864
+ _try_parse_duration(
865
+ "continuous_buffer_duration",
866
+ f.window_materialization.get("continuous_buffer_duration", None),
867
+ )
868
+ if isinstance(f.window_materialization, dict)
869
+ else None
870
+ ),
817
871
  )
818
872
 
819
873
 
@@ -991,6 +1045,33 @@ class _UnderscoreValidationError(ValueError):
991
1045
  ...
992
1046
 
993
1047
 
1048
+ def _has_group_by_in_parent_chain(underscore: Underscore) -> bool:
1049
+ """
1050
+ Traverse parent chain to check if .group_by() exists before .agg().
1051
+
1052
+ For valid group_by_windowed: _.x.group_by(_.y).agg(_.z.sum())
1053
+ - Looks for: UnderscoreCall -> UnderscoreAttr("group_by")
1054
+
1055
+ Returns True if .group_by() found, False otherwise.
1056
+ """
1057
+ current: Optional[Any] = underscore
1058
+
1059
+ while current is not None:
1060
+ # Check if current is a .group_by() call
1061
+ if isinstance(current, UnderscoreCall):
1062
+ parent = current._chalk__parent
1063
+ if isinstance(parent, UnderscoreAttr) and parent._chalk__attr == "group_by":
1064
+ return True
1065
+
1066
+ # Move to parent
1067
+ if hasattr(current, "_chalk__parent"):
1068
+ current = current._chalk__parent
1069
+ else:
1070
+ break
1071
+
1072
+ return False
1073
+
1074
+
994
1075
  class ChalkImporter:
995
1076
  def __init__(self):
996
1077
  super().__init__()
@@ -1092,6 +1173,9 @@ class ChalkImporter:
1092
1173
  for feature_class in FeatureSetBase.registry.values():
1093
1174
  # Iterate through every class, to find every underscore definition.
1094
1175
  for f in feature_class.features:
1176
+ if f.is_windowed_pseudofeature is True:
1177
+ # need one LSP just for the base
1178
+ continue
1095
1179
  if f.underscore_expression is not None:
1096
1180
  # Validate that the underscore expression is well-formed.
1097
1181
  # If it is not well-formed, then an `_UnderscoreValidationError` will
@@ -1424,15 +1508,26 @@ def _supplemental_validate_underscore_expression(
1424
1508
 
1425
1509
  # TODO: Dominic - impl for UnderscoreCall args (we need some special casing for aggregate functions that take in UnderscoreItems)
1426
1510
  if isinstance(underscore, UnderscoreCall):
1511
+ if not isinstance(underscore._chalk__parent, UnderscoreAttr):
1512
+ # we only support calls on attrs, ie _.a.some_attr(*args, **kwargs)
1513
+ raise _UnderscoreValidationError(f"Cannot call non-attribute {underscore._chalk__parent}.")
1427
1514
  caller = underscore._chalk__parent._chalk__parent
1515
+ op_name = underscore._chalk__parent._chalk__attr
1516
+
1517
+ if (op := getattr(F, op_name, None)) is not None:
1518
+ if getattr(op, "_chalk__method_chaining_predicate", lambda _: True)(underscore):
1519
+ return _supplemental_validate_underscore_expression(
1520
+ state,
1521
+ class_namespace=class_namespace,
1522
+ underscore=op(caller, *underscore._chalk__args, **underscore._chalk__kwargs),
1523
+ )
1524
+
1428
1525
  maybe_parent_result = _supplemental_validate_underscore_expression(
1429
1526
  state=state,
1430
1527
  class_namespace=class_namespace,
1431
1528
  underscore=caller,
1432
1529
  )
1433
- if not isinstance(underscore._chalk__parent, UnderscoreAttr):
1434
- return None # TODO: Dominic - is this ever valid?
1435
- if underscore._chalk__parent._chalk__attr == "where":
1530
+ if op_name == "where":
1436
1531
  if maybe_parent_result is None:
1437
1532
  return None
1438
1533
  if not isinstance(maybe_parent_result, _HasManyNamespaceExpr) or not isinstance(caller, UnderscoreItem):
@@ -1457,9 +1552,26 @@ def _supplemental_validate_underscore_expression(
1457
1552
  raise _UnderscoreValidationError(
1458
1553
  f"the input '{arg!r}' is a feature namespace '{expr.namespace}' which cannot be used as a scalar value"
1459
1554
  )
1555
+ return None
1556
+
1557
+ # Validate .agg() usage (addressing TODO at line 1522)
1558
+ if op_name == "agg":
1559
+ if not _has_group_by_in_parent_chain(caller):
1560
+ raise _UnderscoreValidationError(
1561
+ "'.agg()' can only be used with '.group_by()' for group_by_windowed features. "
1562
+ + "For windowed features, use direct aggregation methods instead. "
1563
+ + "For example, instead of using '.agg(_.field.method())', use '.field.method()' directly on the filtered DataFrame"
1564
+ )
1460
1565
 
1461
1566
  return None
1462
1567
 
1568
+ # TODO: check that op_name is a supported agg or .agg/.group_by/etc
1569
+ # if op_name in supported_aggs:
1570
+ # # TODO: typechecking for agg fns
1571
+ # return None
1572
+ #
1573
+ # raise _UnderscoreValidationError(f"unrecognized function '{op_name}' in expression '{underscore}'")
1574
+
1463
1575
  if isinstance(underscore, UnderscoreItem):
1464
1576
  parent_result = _supplemental_validate_underscore_expression(
1465
1577
  state=state,
chalk/ml/__init__.py CHANGED
@@ -1,16 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
- from chalk.ml.model_file_transfer import HFSourceConfig, LocalSourceConfig, S3SourceConfig, SourceConfig
3
+ from chalk.ml.model_file_transfer import FileInfo, HFSourceConfig, LocalSourceConfig, S3SourceConfig, SourceConfig
4
4
  from chalk.ml.model_reference import ModelReference
5
- from chalk.ml.utils import ModelEncoding, ModelRunCriterion, ModelType
5
+ from chalk.ml.model_version import ModelVersion
6
+ from chalk.ml.utils import ModelClass, ModelEncoding, ModelRunCriterion, ModelType
6
7
 
7
8
  __all__ = (
8
9
  "ModelType",
10
+ "ModelClass",
9
11
  "ModelEncoding",
10
12
  "ModelReference",
13
+ "ModelVersion",
11
14
  "SourceConfig",
12
15
  "LocalSourceConfig",
13
16
  "S3SourceConfig",
14
17
  "HFSourceConfig",
15
18
  "ModelRunCriterion",
19
+ "FileInfo",
16
20
  )