chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/sql/__init__.py CHANGED
@@ -10,6 +10,7 @@ from chalk.sql._internal.integrations.clickhouse import ClickhouseSourceImpl
10
10
  from chalk.sql._internal.integrations.cloudsql import CloudSQLSourceImpl
11
11
  from chalk.sql._internal.integrations.databricks import DatabricksSourceImpl
12
12
  from chalk.sql._internal.integrations.dynamodb import DynamoDBSourceImpl
13
+ from chalk.sql._internal.integrations.mssql import MSSQLSourceImpl
13
14
  from chalk.sql._internal.integrations.mysql import MySQLSourceImpl
14
15
  from chalk.sql._internal.integrations.postgres import PostgreSQLSourceImpl
15
16
  from chalk.sql._internal.integrations.redshift import RedshiftSourceImpl
@@ -464,6 +465,196 @@ def MySQLSource(
464
465
  )
465
466
 
466
467
 
468
+ @overload
469
+ def MSSQLSource() -> SQLSourceWithTableIngestProtocol:
470
+ """If you have only one MSSQL connection that you'd like
471
+ to add to Chalk, you do not need to specify any arguments
472
+ to construct the source in your code.
473
+
474
+ Returns
475
+ -------
476
+ SQLSourceWithTableIngestProtocol
477
+ The SQL source for use in Chalk resolvers.
478
+
479
+ Examples
480
+ --------
481
+ >>> mssql = MSSQLSource()
482
+ """
483
+ ...
484
+
485
+
486
+ @overload
487
+ def MSSQLSource(
488
+ *,
489
+ name: str,
490
+ engine_args: Optional[Dict[str, Any]] = ...,
491
+ async_engine_args: Optional[Dict[str, Any]] = ...,
492
+ ) -> SQLSourceWithTableIngestProtocol:
493
+ """If you have only one MSSQL integration, there's no need to provide
494
+ a distinguishing name.
495
+
496
+ But what happens when you have two data sources of the same kind?
497
+ When you create a new data source from your dashboard,
498
+ you have an option to provide a name for the integration.
499
+ You can then reference this name in the code directly.
500
+
501
+ Parameters
502
+ ----------
503
+ name
504
+ Name of the integration, as configured in your dashboard.
505
+ engine_args
506
+ Additional arguments to use when constructing the SQLAlchemy engine. These arguments will be
507
+ merged with any default arguments from the named integration.
508
+ async_engine_args
509
+ Additional arguments to use when constructing an async SQLAlchemy engine.
510
+
511
+ Returns
512
+ -------
513
+ SQLSourceWithTableIngestProtocol
514
+ The SQL source for use in Chalk resolvers.
515
+
516
+ Examples
517
+ --------
518
+ >>> source = MSSQLSource(name="RISK")
519
+ """
520
+ ...
521
+
522
+
523
+ @overload
524
+ def MSSQLSource(
525
+ *,
526
+ name: str | None = ...,
527
+ host: str,
528
+ port: Union[int, str] = ...,
529
+ db: str = ...,
530
+ user: str = ...,
531
+ password: str = ...,
532
+ client_id: str = ...,
533
+ client_secret: str = ...,
534
+ tenant_id: str = ...,
535
+ engine_args: Optional[Dict[str, Any]] = ...,
536
+ async_engine_args: Optional[Dict[str, Any]] = ...,
537
+ ) -> SQLSourceWithTableIngestProtocol:
538
+ """
539
+ You can also configure the integration directly using environment
540
+ variables on your local machine or from those added through the
541
+ generic environment variable support (https://docs.chalk.ai/docs/env-vars).
542
+
543
+ Authentication Methods:
544
+ - SQL Authentication: Provide `user` and `password`
545
+ - Azure AD Managed Identity: Leave `user`, `password`, `client_id`, `client_secret`, and `tenant_id` empty
546
+ - Azure AD Service Principal: Provide `client_id`, `client_secret`, and `tenant_id`
547
+
548
+ Parameters
549
+ ----------
550
+ name
551
+ Name of the integration. Not required unless if this SQL Source is used within SQL File Resolvers.
552
+ host
553
+ Name of host to connect to.
554
+ port
555
+ The port number to connect to at the server host.
556
+ db
557
+ The database name.
558
+ user
559
+ MSSQL username to connect as (for SQL authentication).
560
+ password
561
+ The password to be used for SQL authentication.
562
+ client_id
563
+ Azure AD Client ID (for Service Principal authentication).
564
+ client_secret
565
+ Azure AD Client Secret (for Service Principal authentication).
566
+ tenant_id
567
+ Azure AD Tenant ID (for Service Principal authentication).
568
+ engine_args
569
+ Additional arguments to use when constructing the SQLAlchemy engine.
570
+ async_engine_args
571
+ Additional arguments to use when constructing an async SQLAlchemy engine.
572
+
573
+ Returns
574
+ -------
575
+ SQLSourceWithTableIngestProtocol
576
+ The SQL source for use in Chalk resolvers.
577
+
578
+ Examples
579
+ --------
580
+ SQL Authentication:
581
+ >>> import os
582
+ >>> mssql = MSSQLSource(
583
+ ... host=os.getenv("MSSQL_HOST"),
584
+ ... port=os.getenv("MSSQL_TCP_PORT"),
585
+ ... db=os.getenv("MSSQL_DATABASE"),
586
+ ... user=os.getenv("MSSQL_USER"),
587
+ ... password=os.getenv("MSSQL_PWD"),
588
+ ... )
589
+
590
+ Managed Identity (running in Azure):
591
+ >>> mssql = MSSQLSource(
592
+ ... host=os.getenv("MSSQL_HOST"),
593
+ ... port=os.getenv("MSSQL_TCP_PORT"),
594
+ ... db=os.getenv("MSSQL_DATABASE"),
595
+ ... )
596
+
597
+ Service Principal:
598
+ >>> mssql = MSSQLSource(
599
+ ... host=os.getenv("MSSQL_HOST"),
600
+ ... port=os.getenv("MSSQL_TCP_PORT"),
601
+ ... db=os.getenv("MSSQL_DATABASE"),
602
+ ... client_id=os.getenv("MSSQL_CLIENT_ID"),
603
+ ... client_secret=os.getenv("MSSQL_CLIENT_SECRET"),
604
+ ... tenant_id=os.getenv("MSSQL_TENANT_ID"),
605
+ ... )
606
+
607
+ >>> from chalk.features import online
608
+ >>> @online
609
+ ... def resolver_fn() -> User.name:
610
+ ... return mssql.query_string("select name from users where id = 4").one()
611
+ """
612
+ ...
613
+
614
+
615
+ def MSSQLSource(
616
+ *,
617
+ host: Optional[str] = None,
618
+ port: Optional[Union[int, str]] = None,
619
+ db: Optional[str] = None,
620
+ user: Optional[str] = None,
621
+ password: Optional[str] = None,
622
+ client_id: Optional[str] = None,
623
+ client_secret: Optional[str] = None,
624
+ tenant_id: Optional[str] = None,
625
+ name: Optional[str] = None,
626
+ engine_args: Optional[Dict[str, Any]] = None,
627
+ async_engine_args: Optional[Dict[str, Any]] = None,
628
+ ) -> SQLSourceWithTableIngestProtocol:
629
+ """Create a MSSQL data source. SQL-based data sources
630
+ created without arguments assume a configuration in your
631
+ Chalk Dashboard. Those created with the `name=` keyword
632
+ argument will use the configuration for the integration
633
+ with the given name. And finally, those created with
634
+ explicit arguments will use those arguments to configure
635
+ the data source. See the overloaded signatures for more
636
+ details.
637
+
638
+ Supports three authentication methods:
639
+ - SQL Authentication: user + password
640
+ - Azure AD Managed Identity: no credentials (automatic in Azure)
641
+ - Azure AD Service Principal: client_id + client_secret + tenant_id
642
+ """
643
+ return MSSQLSourceImpl(
644
+ host=host,
645
+ port=port,
646
+ db=db,
647
+ user=user,
648
+ password=password,
649
+ client_id=client_id,
650
+ client_secret=client_secret,
651
+ tenant_id=tenant_id,
652
+ name=name,
653
+ engine_args=engine_args,
654
+ async_engine_args=async_engine_args,
655
+ )
656
+
657
+
467
658
  def SQLiteInMemorySource(
468
659
  name: Optional[str] = None,
469
660
  engine_args: Optional[Dict[str, Any]] = None,
@@ -848,6 +1039,8 @@ def BigQuerySource(
848
1039
  credentials_base64: Optional[str] = ...,
849
1040
  credentials_path: Optional[str] = ...,
850
1041
  engine_args: Optional[Dict[str, Any]] = ...,
1042
+ temp_project: Optional[str] = ...,
1043
+ temp_dataset: Optional[str] = ...,
851
1044
  ) -> BaseSQLSourceProtocol:
852
1045
  """You can also configure the integration directly using environment
853
1046
  variables on your local machine or from those added through the
@@ -869,6 +1062,10 @@ def BigQuerySource(
869
1062
  The path to the credentials file to use to connect.
870
1063
  engine_args
871
1064
  Additional arguments to use when constructing the SQLAlchemy engine.
1065
+ temp_project
1066
+ The BigQuery project to use for temporary tables.
1067
+ temp_dataset
1068
+ The BigQuery dataset to use for temporary tables.
872
1069
 
873
1070
  Returns
874
1071
  -------
@@ -897,6 +1094,8 @@ def BigQuerySource(
897
1094
  credentials_base64: Optional[str] = None,
898
1095
  credentials_path: Optional[str] = None,
899
1096
  engine_args: Optional[Dict[str, Any]] = None,
1097
+ temp_project: Optional[str] = None,
1098
+ temp_dataset: Optional[str] = None,
900
1099
  ) -> BaseSQLSourceProtocol:
901
1100
  """Create a BigQuery data source. SQL-based data sources
902
1101
  created without arguments assume a configuration in your
@@ -914,6 +1113,8 @@ def BigQuerySource(
914
1113
  location=location,
915
1114
  credentials_base64=credentials_base64,
916
1115
  credentials_path=credentials_path,
1116
+ temp_project=temp_project,
1117
+ temp_dataset=temp_dataset,
917
1118
  engine_args=engine_args,
918
1119
  )
919
1120
 
@@ -1236,6 +1437,8 @@ def DatabricksSource(
1236
1437
  access_token: str = ...,
1237
1438
  db: str = ...,
1238
1439
  port: str = ...,
1440
+ client_id: str = ...,
1441
+ client_secret: str = ...,
1239
1442
  engine_args: Optional[Dict[str, Any]] = ...,
1240
1443
  ) -> BaseSQLSourceProtocol:
1241
1444
  """You can also configure the integration directly using environment
@@ -1256,6 +1459,10 @@ def DatabricksSource(
1256
1459
  Database to use.
1257
1460
  port
1258
1461
  Port number to use.
1462
+ client_id
1463
+ OAuth service principal client ID (alternative to access_token).
1464
+ client_secret
1465
+ OAuth service principal client secret (alternative to access_token).
1259
1466
  engine_args
1260
1467
  Additional arguments to use when constructing the SQLAlchemy engine.
1261
1468
 
@@ -1274,6 +1481,14 @@ def DatabricksSource(
1274
1481
  ... db=os.getenv("DATABRICKS_DATABASE"),
1275
1482
  ... port=os.getenv("DATABRICKS_PORT"),
1276
1483
  ... )
1484
+ >>> databricks_with_oauth = DatabricksSource(
1485
+ ... host=os.getenv("DATABRICKS_HOST"),
1486
+ ... http_path=os.getenv("DATABRICKS_HTTP_PATH"),
1487
+ ... client_id=os.getenv("DATABRICKS_CLIENT_ID"),
1488
+ ... client_secret=os.getenv("DATABRICKS_CLIENT_SECRET"),
1489
+ ... db=os.getenv("DATABRICKS_DATABASE"),
1490
+ ... port=os.getenv("DATABRICKS_PORT"),
1491
+ ... )
1277
1492
  """
1278
1493
  ...
1279
1494
 
@@ -1286,6 +1501,8 @@ def DatabricksSource(
1286
1501
  access_token: Optional[str] = None,
1287
1502
  db: Optional[str] = None,
1288
1503
  port: Optional[Union[str, int]] = None,
1504
+ client_id: Optional[str] = None,
1505
+ client_secret: Optional[str] = None,
1289
1506
  engine_args: Optional[Dict[str, Any]] = None,
1290
1507
  ) -> BaseSQLSourceProtocol:
1291
1508
  """Create a Databricks data source. SQL-based data sources
@@ -1304,6 +1521,8 @@ def DatabricksSource(
1304
1521
  db=db,
1305
1522
  port=port,
1306
1523
  name=name,
1524
+ client_id=client_id,
1525
+ client_secret=client_secret,
1307
1526
  engine_args=engine_args,
1308
1527
  )
1309
1528
 
@@ -1747,6 +1966,8 @@ __all__ = (
1747
1966
  "DynamoDBSource",
1748
1967
  "FinalizedChalkQuery",
1749
1968
  "IncrementalSettings",
1969
+ "MSSQLSource",
1970
+ "MSSQLSourceImpl",
1750
1971
  "MySQLSource",
1751
1972
  "PostgreSQLSource",
1752
1973
  "RedshiftSource",
@@ -24,6 +24,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
24
24
  from chalk.utils.df_utils import pa_array_to_pl_series
25
25
  from chalk.utils.log_with_context import get_logger
26
26
  from chalk.utils.missing_dependency import missing_dependency_exception
27
+ from chalk.utils.pl_helpers import str_json_decode_compat
27
28
  from chalk.utils.threading import DEFAULT_IO_EXECUTOR, MultiSemaphore
28
29
  from chalk.utils.tracing import safe_incr, safe_set_gauge, safe_trace
29
30
 
@@ -485,7 +486,11 @@ class AthenaSourceImpl(BaseSQLSource):
485
486
  if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
486
487
  if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
487
488
  series = pa_array_to_pl_series(tbl[col_name])
488
- column = series.str.json_extract(feature.converter.polars_dtype).to_arrow().cast(expected_type)
489
+ column = (
490
+ str_json_decode_compat(series, feature.converter.polars_dtype)
491
+ .to_arrow()
492
+ .cast(expected_type)
493
+ )
489
494
  if actual_type != expected_type:
490
495
  column = column.cast(options=pc.CastOptions(target_type=expected_type, allow_time_truncate=True))
491
496
  if isinstance(column, pa.ChunkedArray):
@@ -225,6 +225,8 @@ _BQ_DATASET_NAME = "BQ_DATASET"
225
225
  _BQ_PROJECT_NAME = "BQ_PROJECT"
226
226
  _BQ_CREDENTIALS_BASE64_NAME = "BQ_CREDENTIALS_BASE64"
227
227
  _BQ_CREDENTIALS_PATH_NAME = "BQ_CREDENTIALS_PATH"
228
+ _BQ_TEMP_PROJECT_NAME = "BQ_TEMP_PROJECT"
229
+ _BQ_TEMP_DATASET_NAME = "BQ_TEMP_DATASET"
228
230
 
229
231
 
230
232
  class BigQuerySourceImpl(BaseSQLSource):
@@ -239,6 +241,8 @@ class BigQuerySourceImpl(BaseSQLSource):
239
241
  location: Optional[str] = None,
240
242
  credentials_base64: Optional[str] = None,
241
243
  credentials_path: Optional[str] = None,
244
+ temp_project: Optional[str] = None,
245
+ temp_dataset: Optional[str] = None,
242
246
  engine_args: Optional[Dict[str, Any]] = None,
243
247
  integration_variable_override: Optional[Mapping[str, str]] = None,
244
248
  ):
@@ -267,6 +271,12 @@ class BigQuerySourceImpl(BaseSQLSource):
267
271
  self.credentials_path = credentials_path or load_integration_variable(
268
272
  integration_name=name, name=_BQ_CREDENTIALS_PATH_NAME, override=integration_variable_override
269
273
  )
274
+ self.temp_project = temp_project or load_integration_variable(
275
+ integration_name=name, name=_BQ_TEMP_PROJECT_NAME, override=integration_variable_override
276
+ )
277
+ self.temp_dataset = temp_dataset or load_integration_variable(
278
+ integration_name=name, name=_BQ_TEMP_DATASET_NAME, override=integration_variable_override
279
+ )
270
280
  BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
271
281
 
272
282
  @functools.cached_property
@@ -397,6 +407,10 @@ class BigQuerySourceImpl(BaseSQLSource):
397
407
  except ModuleNotFoundError:
398
408
  raise missing_dependency_exception("chalkpy[bigquery]")
399
409
 
410
+ # Use temp_project/temp_dataset if specified, otherwise fall back to main project/dataset
411
+ temp_project = self.temp_project or self.project
412
+ temp_dataset = self.temp_dataset or self.dataset
413
+
400
414
  create_table_sql = create_temp_table.compile(dialect=self.get_sqlalchemy_dialect()).string
401
415
  create_table_sql = create_table_sql.replace("TEMPORARY", "", 1)
402
416
  chalk_logger.info(f"Creating temporary table {temp_table.name} in BigQuery {session_id=}: {create_table_sql}")
@@ -412,9 +426,10 @@ class BigQuerySourceImpl(BaseSQLSource):
412
426
  job_config=job_config,
413
427
  ).result()
414
428
  try:
429
+ temp_table_fqn = f"{temp_project}.{temp_dataset}.{temp_table.name}"
415
430
  connection.load_table_from_dataframe(
416
431
  temp_value.to_pandas(),
417
- f"{self.project}.{self.dataset}.{temp_table.name}",
432
+ temp_table_fqn,
418
433
  job_config=google.cloud.bigquery.LoadJobConfig(connection_properties=connection_properties),
419
434
  ).result()
420
435
  yield
@@ -433,7 +448,10 @@ class BigQuerySourceImpl(BaseSQLSource):
433
448
  def _bigquery_output_table(self, client: google.cloud.bigquery.Client) -> Iterator[str]:
434
449
  destination_table_name = f"temp_output_{str(uuid4()).replace('-', '_')}"
435
450
 
436
- destination = f"{client.project}.{self.dataset}.{destination_table_name}"
451
+ # Use temp_project/temp_dataset if specified, otherwise fall back to main project/dataset
452
+ temp_project = self.temp_project or self.project
453
+ temp_dataset = self.temp_dataset or self.dataset
454
+ destination = f"{temp_project}.{temp_dataset}.{destination_table_name}"
437
455
 
438
456
  try:
439
457
  yield destination
@@ -639,6 +657,8 @@ class BigQuerySourceImpl(BaseSQLSource):
639
657
  create_integration_variable(_BQ_PROJECT_NAME, self.name, self.project),
640
658
  create_integration_variable(_BQ_CREDENTIALS_BASE64_NAME, self.name, self.credentials_base64),
641
659
  create_integration_variable(_BQ_CREDENTIALS_PATH_NAME, self.name, self.credentials_path),
660
+ create_integration_variable(_BQ_TEMP_PROJECT_NAME, self.name, self.temp_project),
661
+ create_integration_variable(_BQ_TEMP_DATASET_NAME, self.name, self.temp_dataset),
642
662
  ]
643
663
  if v is not None
644
664
  }
@@ -17,6 +17,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
17
17
  from chalk.utils.df_utils import pa_array_to_pl_series
18
18
  from chalk.utils.log_with_context import get_logger
19
19
  from chalk.utils.missing_dependency import missing_dependency_exception
20
+ from chalk.utils.pl_helpers import str_json_decode_compat
20
21
  from chalk.utils.threading import DEFAULT_IO_EXECUTOR
21
22
  from chalk.utils.tracing import safe_incr, safe_trace
22
23
 
@@ -32,6 +33,8 @@ _DATABRICKS_HTTP_PATH_NAME = "DATABRICKS_HTTP_PATH"
32
33
  _DATABRICKS_TOKEN_NAME = "DATABRICKS_TOKEN"
33
34
  _DATABRICKS_DATABASE_NAME = "DATABRICKS_DATABASE"
34
35
  _DATABRICKS_PORT_NAME = "DATABRICKS_PORT"
36
+ _DATABRICKS_CLIENT_ID_NAME = "DATABRICKS_CLIENT_ID"
37
+ _DATABRICKS_CLIENT_SECRET_NAME = "DATABRICKS_CLIENT_SECRET"
35
38
 
36
39
 
37
40
  class DatabricksSourceImpl(BaseSQLSource):
@@ -45,6 +48,8 @@ class DatabricksSourceImpl(BaseSQLSource):
45
48
  db: Optional[str] = None,
46
49
  port: Optional[Union[int, str]] = None,
47
50
  name: Optional[str] = None,
51
+ client_id: Optional[str] = None,
52
+ client_secret: Optional[str] = None,
48
53
  engine_args: Optional[Dict[str, Any]] = None,
49
54
  executor: Optional[concurrent.futures.ThreadPoolExecutor] = None,
50
55
  integration_variable_override: Optional[Mapping[str, str]] = None,
@@ -73,21 +78,53 @@ class DatabricksSourceImpl(BaseSQLSource):
73
78
  name=_DATABRICKS_PORT_NAME, integration_name=name, parser=int, override=integration_variable_override
74
79
  )
75
80
  )
81
+ self.client_id = client_id or load_integration_variable(
82
+ name=_DATABRICKS_CLIENT_ID_NAME, integration_name=name, override=integration_variable_override
83
+ )
84
+ self.client_secret = client_secret or load_integration_variable(
85
+ name=_DATABRICKS_CLIENT_SECRET_NAME, integration_name=name, override=integration_variable_override
86
+ )
76
87
  self.executor = executor or DEFAULT_IO_EXECUTOR
77
88
 
89
+ has_token = self.access_token is not None
90
+ has_oauth = self.client_id is not None and self.client_secret is not None
91
+
92
+ if has_token and has_oauth:
93
+ chalk_logger.warning(
94
+ "Both OAuth credentials and a personal access token were provided. Using OAuth authentication."
95
+ )
96
+ self.access_token = None
97
+
98
+ self._credentials_provider = None
99
+ if has_oauth:
100
+ try:
101
+ from databricks.sdk.core import Config, oauth_service_principal
102
+ except ImportError:
103
+ raise missing_dependency_exception("chalkpy[databricks]")
104
+
105
+ def credentials_provider():
106
+ config = Config(host=self.host, client_id=self.client_id, client_secret=self.client_secret)
107
+ return oauth_service_principal(config)
108
+
109
+ self._credentials_provider = credentials_provider
110
+
78
111
  if engine_args is None:
79
112
  engine_args = {}
113
+
114
+ connect_args: dict[str, Any] = {
115
+ "keepalives": 1,
116
+ "keepalives_idle": 30,
117
+ "keepalives_interval": 10,
118
+ "keepalives_count": 5,
119
+ }
120
+
121
+ if self._credentials_provider:
122
+ connect_args["credentials_provider"] = self._credentials_provider
123
+
80
124
  engine_args.setdefault("pool_size", 20)
81
125
  engine_args.setdefault("max_overflow", 60)
82
- engine_args.setdefault(
83
- "connect_args",
84
- {
85
- "keepalives": 1,
86
- "keepalives_idle": 30,
87
- "keepalives_interval": 10,
88
- "keepalives_count": 5,
89
- },
90
- )
126
+ engine_args.setdefault("connect_args", connect_args)
127
+
91
128
  BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
92
129
 
93
130
  def supports_inefficient_fallback(self) -> bool:
@@ -96,6 +133,12 @@ class DatabricksSourceImpl(BaseSQLSource):
96
133
  def get_sqlglot_dialect(self) -> str | None:
97
134
  return "databricks"
98
135
 
136
+ def _get_client_auth(self) -> Dict[str, str | Callable | None]:
137
+ if self._credentials_provider:
138
+ return {"credentials_provider": self._credentials_provider}
139
+ else:
140
+ return {"access_token": self.access_token}
141
+
99
142
  @contextlib.contextmanager
100
143
  def _create_temp_table(
101
144
  self,
@@ -167,10 +210,7 @@ class DatabricksSourceImpl(BaseSQLSource):
167
210
 
168
211
  # Connect using databricks-sql-python for efficient Arrow fetching
169
212
  with sql.connect(
170
- server_hostname=self.host,
171
- http_path=self.http_path,
172
- access_token=self.access_token,
173
- catalog=self.db,
213
+ server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
174
214
  ) as databricks_conn:
175
215
  chalk_logger.info("Established connection with Databricks using databricks-sql-python")
176
216
 
@@ -226,7 +266,11 @@ class DatabricksSourceImpl(BaseSQLSource):
226
266
  if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
227
267
  if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
228
268
  series = pa_array_to_pl_series(tbl[col_name])
229
- column = series.str.json_extract(feature.converter.polars_dtype).to_arrow().cast(expected_type)
269
+ column = (
270
+ str_json_decode_compat(series, feature.converter.polars_dtype)
271
+ .to_arrow()
272
+ .cast(expected_type)
273
+ )
230
274
 
231
275
  # Cast to expected type if needed
232
276
  if actual_type != expected_type:
@@ -268,10 +312,7 @@ class DatabricksSourceImpl(BaseSQLSource):
268
312
 
269
313
  # Connect using databricks-sql-python for efficient Arrow fetching
270
314
  with sql.connect(
271
- server_hostname=self.host,
272
- http_path=self.http_path,
273
- access_token=self.access_token,
274
- catalog=self.db,
315
+ server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
275
316
  ) as databricks_cnx:
276
317
  with databricks_cnx.cursor() as cursor:
277
318
  formatted_op, positional_params, named_params = self.compile_query(finalized_query)
@@ -348,6 +389,8 @@ class DatabricksSourceImpl(BaseSQLSource):
348
389
  create_integration_variable(_DATABRICKS_TOKEN_NAME, self.name, self.access_token),
349
390
  create_integration_variable(_DATABRICKS_DATABASE_NAME, self.name, self.db),
350
391
  create_integration_variable(_DATABRICKS_PORT_NAME, self.name, self.port),
392
+ create_integration_variable(_DATABRICKS_CLIENT_ID_NAME, self.name, self.client_id),
393
+ create_integration_variable(_DATABRICKS_CLIENT_SECRET_NAME, self.name, self.client_secret),
351
394
  ]
352
395
  if v is not None
353
396
  }