chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import datetime as dt
4
4
  import inspect
5
5
  from enum import Enum
6
- from typing import Any, Callable, Literal, Mapping, TypeVar, Union
6
+ from typing import Any, Callable, Literal, Mapping, Optional, TypeVar, Union
7
7
 
8
8
  import pyarrow as pa
9
9
 
@@ -1286,6 +1286,28 @@ def recover(*vals: Any):
1286
1286
  return UnderscoreFunction("recover", *vals)
1287
1287
 
1288
1288
 
1289
+ def is_not_null(expr: Any):
1290
+ """
1291
+ Check if a value is not null.
1292
+
1293
+ Parameters
1294
+ ----------
1295
+ expr
1296
+ The value to check for nullity.
1297
+
1298
+ Examples
1299
+ --------
1300
+ >>> import chalk.functions as F
1301
+ >>> from chalk.features import _, features
1302
+ >>> @features
1303
+ ... class User:
1304
+ ... id: str
1305
+ ... nickname: str | None
1306
+ ... nickname_not_missing: bool = F.is_not_null(_.nickname)
1307
+ """
1308
+ return ~is_null(expr)
1309
+
1310
+
1289
1311
  def is_null(expr: Any):
1290
1312
  """
1291
1313
  Check if a value is null.
@@ -1293,7 +1315,7 @@ def is_null(expr: Any):
1293
1315
  Parameters
1294
1316
  ----------
1295
1317
  expr
1296
- The value to check if it is null.
1318
+ The value to check for nullity.
1297
1319
 
1298
1320
  Examples
1299
1321
  --------
@@ -1613,6 +1635,69 @@ def sagemaker_predict(
1613
1635
  )
1614
1636
 
1615
1637
 
1638
+ def openai_complete(
1639
+ api_key: Underscore | str,
1640
+ prompt: Underscore | str,
1641
+ model: Underscore | str,
1642
+ max_tokens: Underscore | int,
1643
+ temperature: Underscore | float,
1644
+ ):
1645
+ """
1646
+ Makes a completion request to OpenAI's chat API and returns the response.
1647
+
1648
+ This is a blocking expression that calls OpenAI's API during feature computation.
1649
+ The response includes the completion text along with token usage statistics.
1650
+
1651
+ Parameters
1652
+ ----------
1653
+ api_key
1654
+ The OpenAI API key to use for authentication.
1655
+ prompt
1656
+ The prompt text to send to the model.
1657
+ model
1658
+ The OpenAI model to use (e.g., "gpt-4", "gpt-3.5-turbo").
1659
+ max_tokens
1660
+ The maximum number of tokens to generate in the completion.
1661
+ temperature
1662
+ The sampling temperature to use, between 0 and 2. Higher values make
1663
+ output more random, lower values make it more deterministic.
1664
+
1665
+ Returns
1666
+ -------
1667
+ A struct containing:
1668
+ - completion: The generated text response
1669
+ - prompt_tokens: Number of tokens in the prompt
1670
+ - completion_tokens: Number of tokens in the completion
1671
+ - total_tokens: Total tokens used (prompt + completion)
1672
+ - model: The model used for the completion
1673
+ - finish_reason: Why the completion stopped (e.g., "stop", "length")
1674
+
1675
+ Examples
1676
+ --------
1677
+ >>> import chalk.functions as F
1678
+ >>> from chalk.features import _, features
1679
+ >>> @features
1680
+ ... class Document:
1681
+ ... id: str
1682
+ ... content: str
1683
+ ... summary: str = F.openai_complete(
1684
+ ... api_key="sk-...",
1685
+ ... prompt=_.content,
1686
+ ... model="gpt-4",
1687
+ ... max_tokens=100,
1688
+ ... temperature=0.7,
1689
+ ... ).completion
1690
+ """
1691
+ return UnderscoreFunction(
1692
+ "openai_complete",
1693
+ api_key,
1694
+ prompt,
1695
+ model,
1696
+ max_tokens,
1697
+ temperature,
1698
+ )
1699
+
1700
+
1616
1701
  def json_value(expr: Underscore, path: Union[str, Underscore]):
1617
1702
  """
1618
1703
  Extract structured data from a JSON string feature using a JSONPath expression.
@@ -3458,6 +3543,29 @@ def concat(first: Underscore | Any, second: Underscore | Any):
3458
3543
  return UnderscoreFunction("concat", first, second)
3459
3544
 
3460
3545
 
3546
+ def array(*args: Underscore | Any):
3547
+ """
3548
+ Creates an array from the given values.
3549
+
3550
+ Parameters
3551
+ ----------
3552
+ args
3553
+ The values to create the array from.
3554
+
3555
+ Examples
3556
+ --------
3557
+ >>> import chalk.functions as F
3558
+ >>> from chalk.features import _, features
3559
+ >>> @features
3560
+ ... class Name:
3561
+ ... id: str
3562
+ ... first_name: str
3563
+ ... last_name: str
3564
+ ... name: list[str] = F.array(_.first_name, _.last_name)
3565
+ """
3566
+ return UnderscoreFunction("array_constructor", *args)
3567
+
3568
+
3461
3569
  def array_sort(expr: Underscore | Any, descending: bool = False):
3462
3570
  """
3463
3571
  Returns an array which has the sorted order of the input
@@ -3470,6 +3578,17 @@ def array_sort(expr: Underscore | Any, descending: bool = False):
3470
3578
  The array to sort
3471
3579
  descending
3472
3580
  Whether to sort the array in descending order. Defaults to False.
3581
+
3582
+ Examples
3583
+ --------
3584
+ >>> import chalk.functions as F
3585
+ >>> from chalk.features import _, features
3586
+ >>> @features
3587
+ ... class LeaderBoard:
3588
+ ... id: str
3589
+ ... scores: list[int]
3590
+ ... sorted_scores_asc: list[int] = F.array_sort(_.scores)
3591
+ ... sorted_scores_desc: list[int] = F.array_sort(_.scores, descending=True)
3473
3592
  """
3474
3593
  if descending:
3475
3594
  return UnderscoreFunction("array_sort_desc", expr)
@@ -3484,6 +3603,16 @@ def array_stddev(expr: Underscore | Any):
3484
3603
  ----------
3485
3604
  expr
3486
3605
  The array to calculate the standard deviation
3606
+
3607
+ Examples
3608
+ --------
3609
+ >>> import chalk.functions as F
3610
+ >>> from chalk.features import _, features
3611
+ >>> @features
3612
+ ... class SensorData:
3613
+ ... id: str
3614
+ ... temperature_readings: list[float]
3615
+ ... temp_stddev: float = F.array_stddev(_.temperature_readings)
3487
3616
  """
3488
3617
  return UnderscoreFunction("array_stddev", expr, False)
3489
3618
 
@@ -3497,6 +3626,16 @@ def array_sample_stddev(expr: Underscore | Any):
3497
3626
  ----------
3498
3627
  expr
3499
3628
  The array to calculate the sample standard deviation
3629
+
3630
+ Examples
3631
+ --------
3632
+ >>> import chalk.functions as F
3633
+ >>> from chalk.features import _, features
3634
+ >>> @features
3635
+ ... class ExperimentResults:
3636
+ ... id: str
3637
+ ... sample_measurements: list[float]
3638
+ ... sample_stddev: float = F.array_sample_stddev(_.sample_measurements)
3500
3639
  """
3501
3640
  return UnderscoreFunction("array_stddev", expr, True)
3502
3641
 
@@ -3509,6 +3648,16 @@ def array_sum(expr: Underscore | Any):
3509
3648
  ----------
3510
3649
  expr
3511
3650
  The array to sum
3651
+
3652
+ Examples
3653
+ --------
3654
+ >>> import chalk.functions as F
3655
+ >>> from chalk.features import _, features
3656
+ >>> @features
3657
+ ... class Transaction:
3658
+ ... id: str
3659
+ ... line_items: list[float]
3660
+ ... total_amount: float = F.array_sum(_.line_items)
3512
3661
  """
3513
3662
  return UnderscoreFunction("array_sum", expr)
3514
3663
 
@@ -3521,6 +3670,16 @@ def array_average(expr: Underscore | Any):
3521
3670
  ----------
3522
3671
  expr
3523
3672
  The array to average
3673
+
3674
+ Examples
3675
+ --------
3676
+ >>> import chalk.functions as F
3677
+ >>> from chalk.features import _, features
3678
+ >>> @features
3679
+ ... class StudentGrades:
3680
+ ... id: str
3681
+ ... test_scores: list[float]
3682
+ ... average_score: float = F.array_average(_.test_scores)
3524
3683
  """
3525
3684
  return UnderscoreFunction("array_average", expr)
3526
3685
 
@@ -3533,6 +3692,16 @@ def array_median(expr: Underscore | Any):
3533
3692
  ----------
3534
3693
  expr
3535
3694
  The array to take the median of
3695
+
3696
+ Examples
3697
+ --------
3698
+ >>> import chalk.functions as F
3699
+ >>> from chalk.features import _, features
3700
+ >>> @features
3701
+ ... class HousingMarket:
3702
+ ... id: str
3703
+ ... property_prices: list[float]
3704
+ ... median_price: float = F.array_median(_.property_prices)
3536
3705
  """
3537
3706
  return UnderscoreFunction.with_f_dot_repr("array_median", expr)
3538
3707
 
@@ -3555,6 +3724,17 @@ def array_mode(expr: Underscore | Any, tiebreak: Literal["FIRST", "MAX", "MIN"]
3555
3724
  ``"MIN"`` will return 1, the min of the multimodes;
3556
3725
 
3557
3726
  Defaults to ``"FIRST"`` (the behavior of python's ``statistics.mode()``)
3727
+
3728
+ Examples
3729
+ --------
3730
+ >>> import chalk.functions as F
3731
+ >>> from chalk.features import _, features
3732
+ >>> @features
3733
+ ... class SurveyAnalysis:
3734
+ ... id: str
3735
+ ... responses: list[int]
3736
+ ... most_common_response: int = F.array_mode(_.responses)
3737
+ ... highest_mode: int = F.array_mode(_.responses, tiebreak="MAX")
3558
3738
  """
3559
3739
  int_mode = 0 if tiebreak == "FIRST" else 1 if tiebreak == "MAX" else 2 if tiebreak == "MIN" else -1
3560
3740
  if int_mode == -1:
@@ -3679,33 +3859,64 @@ def array_count_value(expr: Underscore, value: Union[str, Underscore]):
3679
3859
 
3680
3860
 
3681
3861
  def _underscore_lambda(
3682
- f: Callable[[Underscore], Underscore],
3862
+ f: Callable[..., Underscore],
3683
3863
  *,
3684
- parameter_type: pa.DataType,
3864
+ parameter_type: Optional[pa.DataType] = None,
3865
+ parameter_types: Optional[list[pa.DataType]] = None,
3685
3866
  ) -> Underscore:
3686
3867
  """
3687
3868
  This is a utility function for constructing lambda expressions in underscore expressions.
3688
- Note: only accepts functions which take a single positional argument.
3869
+ Accepts functions with any number of positional arguments.
3870
+
3871
+ The caller must specify the parameter type(s) for the callback.
3689
3872
 
3690
- The caller must specify the parameter type for the callback.
3873
+ Parameters
3874
+ ----------
3875
+ f
3876
+ A callable that takes one or more Underscore arguments and returns an Underscore
3877
+ parameter_type
3878
+ For backward compatibility: the type of the single parameter (if function has one parameter)
3879
+ parameter_types
3880
+ List of parameter types for each argument (if function has multiple parameters)
3691
3881
  """
3692
3882
 
3693
- # The function `f` will be called immediately, to produce an `UnderscoreLambda` object.
3694
- # It is passed a `lambda_parameter` value with a name derived from the provided function.
3695
- lambda_param_name = "param1"
3883
+ if parameter_type is not None and parameter_types is not None:
3884
+ raise ValueError("Cannot specify both parameter_type and parameter_types")
3885
+
3886
+ if parameter_type is not None:
3887
+ param_types_list = [parameter_type]
3888
+ elif parameter_types is not None:
3889
+ param_types_list = parameter_types
3890
+ else:
3891
+ raise ValueError("Must specify either parameter_type or parameter_types")
3892
+
3696
3893
  f_sig = inspect.signature(f)
3697
3894
  f_parameters = list(f_sig.parameters.keys())
3698
- if len(f_parameters) >= 1:
3699
- lambda_param_name = f_parameters[0]
3700
3895
 
3701
- # Construct a lambda parameter object, which will be passed into the provided callback.
3702
- lambda_param_underscore = UnderscoreFunction("lambda_parameter", lambda_param_name, parameter_type)
3703
- return UnderscoreFunction(
3704
- "lambda",
3705
- lambda_param_name,
3706
- parameter_type,
3707
- f(lambda_param_underscore),
3708
- )
3896
+ if len(f_parameters) != len(param_types_list):
3897
+ raise ValueError(f"Function has {len(f_parameters)} parameter(s) but {len(param_types_list)} type(s) provided")
3898
+
3899
+ if len(f_parameters) == 0:
3900
+ raise ValueError("Function must have at least one parameter")
3901
+
3902
+ lambda_param_underscores = []
3903
+
3904
+ for i, (param_name, param_type) in enumerate(zip(f_parameters, param_types_list)):
3905
+ if not param_name:
3906
+ param_name = f"param{i + 1}"
3907
+ lambda_param_underscore = UnderscoreFunction("lambda_parameter", param_name, param_type)
3908
+ lambda_param_underscores.append(lambda_param_underscore)
3909
+
3910
+ result_expr = f(*lambda_param_underscores)
3911
+
3912
+ lambda_args = []
3913
+ for param_name, param_type in zip(f_parameters, param_types_list):
3914
+ lambda_args.append(param_name)
3915
+ lambda_args.append(param_type)
3916
+
3917
+ lambda_args.append(result_expr)
3918
+
3919
+ return UnderscoreFunction("lambda", *lambda_args)
3709
3920
 
3710
3921
 
3711
3922
  def array_filter(
@@ -3736,7 +3947,6 @@ def array_filter(
3736
3947
  ... id: str
3737
3948
  ... recent_activities: list[float]
3738
3949
  ... average_activity: float
3739
- ...
3740
3950
  ... recent_high_value_activities: list[float] = F.array_filter(
3741
3951
  ... _.recent_activities,
3742
3952
  ... lambda amount: amount > _.average_activity,
@@ -3758,6 +3968,159 @@ def array_filter(
3758
3968
  )
3759
3969
 
3760
3970
 
3971
+ def array_transform(
3972
+ arr: Underscore,
3973
+ transform: Callable[[Underscore], Underscore],
3974
+ item_type: Union[pa.DataType, type],
3975
+ ) -> Underscore:
3976
+ """
3977
+ Applies a custom transform function to each element in an array, returning a new
3978
+ array containing transformed items.
3979
+
3980
+ Parameters
3981
+ ----------
3982
+ arr
3983
+ An array of values
3984
+ transform
3985
+ A Python function producing an underscore expression to be applied to each item
3986
+ in the array.
3987
+ item_type
3988
+ The type of each item in the array. This must be set explicitly.
3989
+
3990
+ Examples
3991
+ --------
3992
+ >>> import chalk.functions as F
3993
+ >>> from chalk.features import _, features
3994
+ >>> @features
3995
+ ... class Bookstore:
3996
+ ... id: str
3997
+ ... prices: list[float]
3998
+ ... store_discount: float
3999
+ ... final_price: list[float] = F.array_transform(
4000
+ ... _.prices,
4001
+ ... lambda amount: amount * _.store_discount,
4002
+ ... item_type=float,
4003
+ ... )
4004
+ """
4005
+
4006
+ if not isinstance(item_type, pa.DataType):
4007
+ item_type = rich_to_pyarrow(
4008
+ item_type,
4009
+ name="array_transform.item_type",
4010
+ respect_nullability=False,
4011
+ )
4012
+
4013
+ return UnderscoreFunction(
4014
+ "array_transform",
4015
+ arr,
4016
+ _underscore_lambda(transform, parameter_type=item_type),
4017
+ )
4018
+
4019
+
4020
+ def array_reduce(
4021
+ arr: Underscore,
4022
+ initial_value: Underscore | Any,
4023
+ arr_item_type: Union[pa.DataType, type],
4024
+ reduce: Callable[[Underscore, Underscore], Underscore],
4025
+ accumulator_type: Optional[Union[pa.DataType, type]] = None,
4026
+ output_func: Callable[[Underscore], Underscore] = lambda x: x,
4027
+ ) -> Underscore:
4028
+ """
4029
+ Reduces an array to a single value by applying a function to each element
4030
+ along with an accumulator.
4031
+
4032
+ Parameters
4033
+ ----------
4034
+ arr
4035
+ An array of values
4036
+ initial_value
4037
+ The initial value for the accumulator
4038
+ reduce
4039
+ A function that takes (accumulator, item) and returns the new accumulator value
4040
+ arr_item_type
4041
+ Type of each item in the array
4042
+ accumulator_type
4043
+ The Optional type of the accumulator result. Typically inferred from initial_value.
4044
+ output_func
4045
+ Optional function to transform the final accumulator value
4046
+
4047
+ Examples
4048
+ --------
4049
+ >>> import chalk.functions as F
4050
+ >>> from chalk.features import _, features
4051
+ >>> @features
4052
+ ... class User:
4053
+ ... id: str
4054
+ ... scores: list[int]
4055
+ ... total_score: int = F.array_reduce(
4056
+ ... arr=_.scores,
4057
+ ... initial_value=0,
4058
+ ... arr_item_type=int,
4059
+ ... reduce=lambda acc, score: acc + score,
4060
+ ... )
4061
+ """
4062
+ accumulator_type_arrow: Optional[pa.DataType] = None
4063
+ initial_value_type_arrow: Optional[pa.DataType] = None
4064
+ arr_item_type_arrow: Optional[pa.DataType] = None
4065
+
4066
+ if accumulator_type is not None:
4067
+ if not isinstance(accumulator_type, pa.DataType):
4068
+ accumulator_type_arrow = rich_to_pyarrow(
4069
+ accumulator_type,
4070
+ name="array_reduce.accumulator_type",
4071
+ respect_nullability=False,
4072
+ )
4073
+ else:
4074
+ accumulator_type_arrow = accumulator_type
4075
+
4076
+ if initial_value is not None:
4077
+ # Try to infer accumulator type from initial_value if not provided
4078
+ if not isinstance(initial_value, Underscore):
4079
+ # If initial_value is a pyarrow scalar, extract its type
4080
+ try:
4081
+ if isinstance(initial_value, pa.DataType):
4082
+ initial_value_type_arrow = initial_value
4083
+ elif isinstance(initial_value, pa.Scalar):
4084
+ accumulator_type = initial_value.type
4085
+ else:
4086
+ # Try to infer type from Python literal value
4087
+ inferred_scalar = pa.scalar(initial_value)
4088
+ accumulator_type = inferred_scalar.type
4089
+ except (TypeError, pa.ArrowInvalid):
4090
+ raise ValueError("Could not infer type of initial_value; please provide accumulator_type explicitly.")
4091
+
4092
+ if accumulator_type_arrow is None and initial_value_type_arrow is None:
4093
+ raise ValueError("initial_value type could not be determined; please provide it explicitly.")
4094
+
4095
+ if initial_value_type_arrow is not None:
4096
+ accumulator_type_arrow = initial_value_type_arrow
4097
+
4098
+ if arr_item_type is None:
4099
+ raise ValueError("arr_item_type must be provided to array_reduce")
4100
+
4101
+ if not isinstance(arr_item_type, pa.DataType):
4102
+ arr_item_type_arrow = rich_to_pyarrow(
4103
+ arr_item_type,
4104
+ name="array_reduce.arr_item_type",
4105
+ respect_nullability=False,
4106
+ )
4107
+ else:
4108
+ arr_item_type_arrow = arr_item_type
4109
+
4110
+ reduce_lambda_param_types = [accumulator_type_arrow, arr_item_type_arrow]
4111
+
4112
+ reduce_lambda = _underscore_lambda(reduce, parameter_types=reduce_lambda_param_types)
4113
+ output_lambda = _underscore_lambda(output_func, parameter_type=accumulator_type_arrow)
4114
+
4115
+ return UnderscoreFunction(
4116
+ "array_reduce",
4117
+ arr,
4118
+ initial_value,
4119
+ reduce_lambda,
4120
+ output_lambda,
4121
+ )
4122
+
4123
+
3761
4124
  def array_max(arr: Underscore):
3762
4125
  """
3763
4126
  Returns the maximum value in an array.
@@ -4139,6 +4502,11 @@ def max(*values: Any):
4139
4502
  )
4140
4503
 
4141
4504
 
4505
+ max._chalk__method_chaining_predicate = ( # pyright: ignore[reportFunctionMemberAccess]
4506
+ lambda underscore_call: len(underscore_call._chalk__args) > 0
4507
+ )
4508
+
4509
+
4142
4510
  def min(*values: Any):
4143
4511
  """
4144
4512
  Returns the minimum value in a list of values.
@@ -4170,6 +4538,11 @@ def min(*values: Any):
4170
4538
  )
4171
4539
 
4172
4540
 
4541
+ min._chalk__method_chaining_predicate = ( # pyright: ignore[reportFunctionMemberAccess]
4542
+ lambda underscore_call: len(underscore_call._chalk__args) > 0
4543
+ )
4544
+
4545
+
4173
4546
  def jinja(template: str):
4174
4547
  """
4175
4548
  Runs a Jinja template on the input columns.
@@ -4359,7 +4732,7 @@ def nth_bucket_end(value: Underscore, bucket_duration: str, n: int, initial_buck
4359
4732
 
4360
4733
 
4361
4734
  def inference(
4362
- model: ModelVersion, inputs: list[Underscore | Any], resource_hint: ResourceHint | None = None
4735
+ model: ModelVersion, inputs: list[Underscore | Any] | Underscore, resource_hint: ResourceHint | None = None
4363
4736
  ) -> Underscore | Feature:
4364
4737
  """
4365
4738
  Run inference on a deployed ML model.
@@ -5228,6 +5601,64 @@ def array_normalize(array: Underscore, p: Underscore | float | None = None):
5228
5601
  return UnderscoreFunction("array_normalize", array, 2.0 if p is None else p)
5229
5602
 
5230
5603
 
5604
+ def scale_vector(array: Underscore, p: Underscore | float):
5605
+ """
5606
+ Scales the input vector by the amount p.
5607
+
5608
+ Parameters
5609
+ ----------
5610
+ array
5611
+ The input vector
5612
+ p
5613
+ The factor by which to scale
5614
+
5615
+ Returns
5616
+ -------
5617
+ Array where each element is multiplied by p.
5618
+
5619
+ Examples
5620
+ --------
5621
+ >>> import chalk.functions as F
5622
+ >>> from chalk.features import _, features
5623
+ >>> @features
5624
+ ... class ProbabilityModel:
5625
+ ... id: str
5626
+ ... embedding: Vector[3]
5627
+ ... scale: float
5628
+ ... probabilities: Vector[3] = F.scale_vector(_.embedding, _.scale)
5629
+ """
5630
+ return UnderscoreFunction("scale_vector", array, p)
5631
+
5632
+
5633
+ def array_add(array1: Underscore, array2: Underscore):
5634
+ """
5635
+ Element-wise addition of two vectors.
5636
+
5637
+ Parameters
5638
+ ----------
5639
+ array1
5640
+ The first input vector
5641
+ array2
5642
+ The second input vector
5643
+
5644
+ Returns
5645
+ -------
5646
+ Array where each element is the sum of corresponding elements from array1 and array2.
5647
+
5648
+ Examples
5649
+ --------
5650
+ >>> import chalk.functions as F
5651
+ >>> from chalk.features import _, features
5652
+ >>> @features
5653
+ ... class VectorModel:
5654
+ ... id: str
5655
+ ... vec1: Vector[3]
5656
+ ... vec2: Vector[3]
5657
+ ... sum_vec: Vector[3] = F.array_add(_.vec1, _.vec2)
5658
+ """
5659
+ return UnderscoreFunction("array_add", array1, array2)
5660
+
5661
+
5231
5662
  def array_position(array: Underscore, element: Underscore):
5232
5663
  """
5233
5664
  Find the position of an element in the array (1-based indexing).
@@ -5431,6 +5862,7 @@ __all__ = (
5431
5862
  "abs",
5432
5863
  "acos",
5433
5864
  "array_agg",
5865
+ "array_add",
5434
5866
  "array_average",
5435
5867
  "array_count_value",
5436
5868
  "array_cum_sum",
@@ -5511,6 +5943,7 @@ __all__ = (
5511
5943
  "inference",
5512
5944
  "is_leap_year",
5513
5945
  "is_month_end",
5946
+ "is_not_null",
5514
5947
  "is_null",
5515
5948
  "is_us_federal_holiday",
5516
5949
  "jaccard_similarity",
@@ -5545,6 +5978,7 @@ __all__ = (
5545
5978
  "normal_cdf",
5546
5979
  "nth_bucket_end",
5547
5980
  "nth_bucket_start",
5981
+ "openai_complete",
5548
5982
  "parse_datetime",
5549
5983
  "partial_ratio",
5550
5984
  "pi",
@@ -5618,4 +6052,5 @@ __all__ = (
5618
6052
  "word_stem",
5619
6053
  "xgboost_regressor",
5620
6054
  "year",
6055
+ "scale_vector",
5621
6056
  )
@@ -3,9 +3,7 @@ from __future__ import annotations
3
3
  from dataclasses import dataclass
4
4
  from datetime import date
5
5
  from enum import Enum
6
- from typing import Literal, Optional, Union
7
-
8
- from typing_extensions import TypeAlias
6
+ from typing import Literal, Optional, TypeAlias, Union
9
7
 
10
8
 
11
9
  class DayOfWeek(int, Enum):
@@ -114,6 +114,10 @@ def _rule_from_pattern(pattern: str, base_path: Optional[Path] = None, source: O
114
114
  regex = _fnmatch_pathname_to_regex(pattern, directory_only)
115
115
  if anchored:
116
116
  regex = f"^{regex}"
117
+ else:
118
+ # For non-anchored patterns, match at path component boundaries
119
+ # (start of string or after a path separator)
120
+ regex = f"(^|/){regex}"
117
121
  regex = f"(?ms){regex}"
118
122
  return IgnoreRule(
119
123
  pattern=orig_pattern,
@@ -215,6 +219,6 @@ def _fnmatch_pathname_to_regex(pattern: str, directory_only: bool):
215
219
  if directory_only:
216
220
  res.append(r"/.*$")
217
221
  else:
218
- res.append("(/.*|[^/]*)$")
222
+ res.append("(/.*)?$")
219
223
 
220
224
  return "".join(res)