chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,281 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import struct
5
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, Optional, Union
6
+
7
+ from chalk.integrations.named import create_integration_variable, load_integration_variable
8
+ from chalk.sql._internal.query_execution_parameters import QueryExecutionParameters
9
+ from chalk.sql._internal.sql_source import BaseSQLSource, SQLSourceKind, TableIngestMixIn
10
+ from chalk.sql.finalized_query import FinalizedChalkQuery
11
+ from chalk.sql.protocols import SQLSourceWithTableIngestProtocol
12
+ from chalk.utils.environment_parsing import env_var_bool
13
+ from chalk.utils.missing_dependency import missing_dependency_exception
14
+
15
+ if TYPE_CHECKING:
16
+ import pyarrow as pa
17
+ from sqlalchemy.engine import URL, Connection
18
+
19
+ _MSSQL_HOST_NAME = "MSSQL_HOST"
20
+ _MSSQL_TCP_PORT_NAME = "MSSQL_TCP_PORT"
21
+ _MSSQL_DATABASE_NAME = "MSSQL_DATABASE"
22
+ _MSSQL_USER_NAME = "MSSQL_USER"
23
+ _MSSQL_PWD_NAME = "MSSQL_PWD"
24
+ _MSSQL_CLIENT_ID_NAME = "MSSQL_CLIENT_ID"
25
+ _MSSQL_CLIENT_SECRET_NAME = "MSSQL_CLIENT_SECRET"
26
+ _MSSQL_TENANT_ID_NAME = "MSSQL_TENANT_ID"
27
+
28
+
29
+ class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestProtocol):
30
+ kind = SQLSourceKind.mssql
31
+
32
+ def __init__(
33
+ self,
34
+ host: Optional[str] = None,
35
+ port: Optional[Union[int, str]] = None,
36
+ db: Optional[str] = None,
37
+ user: Optional[str] = None,
38
+ password: Optional[str] = None,
39
+ client_id: Optional[str] = None,
40
+ client_secret: Optional[str] = None,
41
+ tenant_id: Optional[str] = None,
42
+ name: Optional[str] = None,
43
+ engine_args: Optional[Dict[str, Any]] = None,
44
+ async_engine_args: Optional[Dict[str, Any]] = None,
45
+ integration_variable_override: Optional[Mapping[str, str]] = None,
46
+ ):
47
+ try:
48
+ import pyodbc
49
+ except ImportError as e:
50
+ raise missing_dependency_exception("chalkpy[mssql]", original_error=e)
51
+ del pyodbc
52
+
53
+ self.name = name
54
+ self.host = host or load_integration_variable(
55
+ integration_name=name, name=_MSSQL_HOST_NAME, override=integration_variable_override
56
+ )
57
+ self.port = (
58
+ int(port)
59
+ if port is not None
60
+ else load_integration_variable(
61
+ integration_name=name, name=_MSSQL_TCP_PORT_NAME, parser=int, override=integration_variable_override
62
+ )
63
+ )
64
+ self.db = db or load_integration_variable(
65
+ integration_name=name, name=_MSSQL_DATABASE_NAME, override=integration_variable_override
66
+ )
67
+ self.user = user or load_integration_variable(
68
+ integration_name=name,
69
+ name=_MSSQL_USER_NAME,
70
+ override=integration_variable_override,
71
+ )
72
+ self.password = password or load_integration_variable(
73
+ integration_name=name,
74
+ name=_MSSQL_PWD_NAME,
75
+ override=integration_variable_override,
76
+ )
77
+ self.client_id = client_id or load_integration_variable(
78
+ integration_name=name,
79
+ name=_MSSQL_CLIENT_ID_NAME,
80
+ override=integration_variable_override,
81
+ )
82
+ self.client_secret = client_secret or load_integration_variable(
83
+ integration_name=name,
84
+ name=_MSSQL_CLIENT_SECRET_NAME,
85
+ override=integration_variable_override,
86
+ )
87
+ self.tenant_id = tenant_id or load_integration_variable(
88
+ integration_name=name,
89
+ name=_MSSQL_TENANT_ID_NAME,
90
+ override=integration_variable_override,
91
+ )
92
+ self.ingested_tables: Dict[str, Any] = {}
93
+
94
+ if engine_args is None:
95
+ engine_args = {}
96
+ if async_engine_args is None:
97
+ async_engine_args = {}
98
+
99
+ if name:
100
+ engine_args_from_ui = self._load_env_engine_args(name, override=integration_variable_override)
101
+ for k, v in engine_args_from_ui.items():
102
+ engine_args.setdefault(k, v)
103
+ async_engine_args.setdefault(k, v)
104
+
105
+ chalk_default_engine_args = {
106
+ "pool_size": 20,
107
+ "max_overflow": 60,
108
+ "pool_recycle": 90,
109
+ }
110
+ for k, v in chalk_default_engine_args.items():
111
+ engine_args.setdefault(k, v)
112
+ async_engine_args.setdefault(k, v)
113
+
114
+ # Set isolation level for read-only operations
115
+ engine_args.setdefault("isolation_level", os.environ.get("CHALK_SQL_ISOLATION_LEVEL", "AUTOCOMMIT"))
116
+ async_engine_args.setdefault("isolation_level", os.environ.get("CHALK_SQL_ISOLATION_LEVEL", "AUTOCOMMIT"))
117
+
118
+ BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args=async_engine_args)
119
+
120
+ # Register event listener for managed identity token injection
121
+ if not self.client_id and not self.user:
122
+ from sqlalchemy import event
123
+
124
+ event.listens_for(self.get_engine(), "do_connect")(self._inject_azure_token)
125
+
126
+ def _inject_azure_token(self, _dialect: Any, _conn_rec: Any, _cargs: Any, cparams: Dict[str, Any]) -> None:
127
+ """SQLAlchemy event handler to inject Azure AD token on each connection."""
128
+ try:
129
+ from azure.identity import DefaultAzureCredential
130
+ except ImportError:
131
+ raise missing_dependency_exception("chalkpy[mssql]")
132
+
133
+ try:
134
+ credential = DefaultAzureCredential()
135
+ token = credential.get_token("https://database.windows.net/.default")
136
+ except Exception as e:
137
+ raise Exception(f"Failed to acquire Azure AD token for MSSQL connection: {e}") from e
138
+
139
+ token_bytes = token.token.encode("utf-16-le")
140
+ token_struct = struct.pack(f"<I{len(token_bytes)}s", len(token_bytes), token_bytes)
141
+ cparams["attrs_before"] = {1256: token_struct} # SQL_COPT_SS_ACCESS_TOKEN
142
+
143
+ def get_sqlglot_dialect(self) -> str | None:
144
+ return "tsql"
145
+
146
+ def local_engine_url(self) -> "URL":
147
+ from sqlalchemy.engine.url import URL
148
+
149
+ trust_server_cert = env_var_bool("CHALK_MSSQL_TRUST_SERVER_CERTIFICATE", default=False)
150
+
151
+ if self.client_id and self.client_secret and self.tenant_id:
152
+ # Service Principal authentication
153
+ # Use pyodbc driver for Azure AD support
154
+ query_params = {
155
+ "driver": "ODBC Driver 18 for SQL Server",
156
+ "Authentication": "ActiveDirectoryServicePrincipal",
157
+ }
158
+ if trust_server_cert:
159
+ query_params["TrustServerCertificate"] = "yes"
160
+ return URL.create(
161
+ drivername="mssql+pyodbc",
162
+ username=self.client_id,
163
+ password=self.client_secret,
164
+ host=self.host,
165
+ port=self.port,
166
+ database=self.db,
167
+ query=query_params,
168
+ )
169
+ elif self.user and self.password:
170
+ # SQL authentication
171
+ query_params = {"driver": "ODBC Driver 18 for SQL Server"}
172
+ if trust_server_cert:
173
+ query_params["TrustServerCertificate"] = "yes"
174
+ return URL.create(
175
+ drivername="mssql+pyodbc",
176
+ username=self.user,
177
+ password=self.password,
178
+ host=self.host,
179
+ port=self.port,
180
+ database=self.db,
181
+ query=query_params,
182
+ )
183
+ else:
184
+ # Managed Identity: token injected via event listener
185
+ connection_string = (
186
+ f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={self.host},{self.port};DATABASE={self.db}"
187
+ )
188
+ if trust_server_cert:
189
+ connection_string += ";TrustServerCertificate=yes"
190
+ return URL.create(
191
+ drivername="mssql+pyodbc",
192
+ query={"odbc_connect": connection_string},
193
+ )
194
+
195
+ def _recreate_integration_variables(self) -> dict[str, str]:
196
+ return {
197
+ k: v
198
+ for k, v in [
199
+ create_integration_variable(_MSSQL_HOST_NAME, self.name, self.host),
200
+ create_integration_variable(_MSSQL_TCP_PORT_NAME, self.name, self.port),
201
+ create_integration_variable(_MSSQL_DATABASE_NAME, self.name, self.db),
202
+ create_integration_variable(_MSSQL_USER_NAME, self.name, self.user),
203
+ create_integration_variable(_MSSQL_PWD_NAME, self.name, self.password),
204
+ create_integration_variable(_MSSQL_CLIENT_ID_NAME, self.name, self.client_id),
205
+ create_integration_variable(_MSSQL_CLIENT_SECRET_NAME, self.name, self.client_secret),
206
+ create_integration_variable(_MSSQL_TENANT_ID_NAME, self.name, self.tenant_id),
207
+ ]
208
+ if v is not None
209
+ }
210
+
211
+ def execute_query_efficient_raw(
212
+ self,
213
+ finalized_query: FinalizedChalkQuery,
214
+ expected_output_schema: "pa.Schema",
215
+ connection: Optional["Connection"],
216
+ query_execution_parameters: QueryExecutionParameters,
217
+ ) -> Iterable["pa.RecordBatch"]:
218
+ """Execute query efficiently for MSSQL and return raw PyArrow RecordBatches."""
219
+ import contextlib
220
+
221
+ import pyarrow as pa
222
+ import pyarrow.compute as pc
223
+
224
+ # Get the compiled query
225
+ _, _, _ = self.compile_query(finalized_query)
226
+
227
+ # Use existing connection or create new one
228
+ with (self.get_engine().connect() if connection is None else contextlib.nullcontext(connection)) as cnx:
229
+ with cnx.begin():
230
+ # Handle temp tables
231
+ with contextlib.ExitStack() as exit_stack:
232
+ for (
233
+ _,
234
+ temp_value,
235
+ create_temp_table,
236
+ temp_table,
237
+ drop_temp_table,
238
+ ) in finalized_query.temp_tables.values():
239
+ exit_stack.enter_context(
240
+ self._create_temp_table(create_temp_table, temp_table, drop_temp_table, cnx, temp_value)
241
+ )
242
+
243
+ # Execute query
244
+ result = cnx.execute(finalized_query.query, finalized_query.params)
245
+
246
+ # Convert result to PyArrow
247
+ rows = result.fetchall()
248
+ column_names = result.keys()
249
+
250
+ if not rows:
251
+ # Return empty batch with expected schema
252
+ arrays = [pa.nulls(0, field.type) for field in expected_output_schema]
253
+ batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
254
+ if query_execution_parameters.yield_empty_batches:
255
+ yield batch
256
+ return
257
+
258
+ # Convert rows to column arrays
259
+ data: dict[str, list[Any]] = {}
260
+ for i, col_name in enumerate(column_names):
261
+ col_data = [row[i] for row in rows]
262
+ data[col_name] = col_data
263
+
264
+ # Create PyArrow table
265
+ table = pa.table(data)
266
+
267
+ # Map columns to expected schema
268
+ arrays: list[pa.Array] = []
269
+ for field in expected_output_schema:
270
+ if field.name in table.column_names:
271
+ col = table.column(field.name)
272
+ # Cast to expected type if needed
273
+ if col.type != field.type:
274
+ col = pc.cast(col, field.type)
275
+ arrays.append(col)
276
+ else:
277
+ # Column not found, create null array
278
+ arrays.append(pa.nulls(len(table), field.type))
279
+
280
+ batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
281
+ yield batch
@@ -28,6 +28,7 @@ from chalk.sql.protocols import SQLSourceWithTableIngestProtocol
28
28
  from chalk.utils.environment_parsing import env_var_bool
29
29
  from chalk.utils.log_with_context import get_logger
30
30
  from chalk.utils.missing_dependency import missing_dependency_exception
31
+ from chalk.utils.pl_helpers import polars_uses_schema_overrides
31
32
  from chalk.utils.tracing import safe_add_metrics, safe_add_tags, safe_trace
32
33
 
33
34
  if TYPE_CHECKING:
@@ -262,8 +263,11 @@ class PostgreSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIn
262
263
  # pl.read_csv(use_pyarrow=True) has the same performance degradation,
263
264
  # UNLESS a `dtypes` arg is provided.
264
265
 
265
- # 'dtypes' deprecated for 'schema_overrides' in polars 0.20+, but parameter renamed without breaking
266
- pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
266
+ # 'dtypes' deprecated for 'schema_overrides' in polars 0.20.31+
267
+ if polars_uses_schema_overrides:
268
+ pl_table = pl.read_csv(buffer, schema_overrides=parse_dtypes) # pyright: ignore[reportCallIssue]
269
+ else:
270
+ pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
267
271
  if boolean_columns:
268
272
  # DO NOT use map_dict. Causes a segfault when multiple uvicorn workers are handling
269
273
  # requests in parallel.
@@ -498,7 +502,11 @@ class PostgreSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIn
498
502
  else:
499
503
  parse_dtypes[field.name] = pl.Utf8
500
504
 
501
- pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
505
+ # 'dtypes' deprecated for 'schema_overrides' in polars 0.20.31+
506
+ if polars_uses_schema_overrides:
507
+ pl_table = pl.read_csv(buffer, schema_overrides=parse_dtypes) # pyright: ignore[reportCallIssue]
508
+ else:
509
+ pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
502
510
 
503
511
  # Convert to arrow and map to expected schema
504
512
  arrow_table = pl_table.to_arrow()
@@ -260,6 +260,8 @@ class RedshiftSourceImpl(BaseSQLSource):
260
260
  temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
261
261
  try:
262
262
  _logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
263
+ _public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
264
+ _public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
263
265
  cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
264
266
  except Exception as e:
265
267
  _public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
@@ -366,6 +368,8 @@ class RedshiftSourceImpl(BaseSQLSource):
366
368
  temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
367
369
  try:
368
370
  _logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
371
+ _public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
372
+ _public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
369
373
  cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
370
374
  except Exception as e:
371
375
  _public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
@@ -31,6 +31,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
31
31
  from chalk.utils.df_utils import is_list_like, pa_array_to_pl_series
32
32
  from chalk.utils.environment_parsing import env_var_bool
33
33
  from chalk.utils.missing_dependency import missing_dependency_exception
34
+ from chalk.utils.pl_helpers import str_json_decode_compat
34
35
  from chalk.utils.threading import DEFAULT_IO_EXECUTOR, MultiSemaphore
35
36
  from chalk.utils.tracing import safe_incr, safe_set_gauge
36
37
 
@@ -398,11 +399,19 @@ class SnowflakeSourceImpl(BaseSQLSource):
398
399
  if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
399
400
  if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
400
401
  series = pa_array_to_pl_series(tbl[col_name])
401
- column = series.str.json_extract(feature.converter.polars_dtype).to_arrow().cast(expected_type)
402
+ column = (
403
+ str_json_decode_compat(series, feature.converter.polars_dtype)
404
+ .to_arrow()
405
+ .cast(expected_type)
406
+ )
402
407
  if pa.types.is_struct(expected_type):
403
408
  if pa.types.is_string(actual_type):
404
409
  series = pa_array_to_pl_series(tbl[col_name])
405
- column = series.str.json_extract(feature.converter.polars_dtype).to_arrow().cast(expected_type)
410
+ column = (
411
+ str_json_decode_compat(series, feature.converter.polars_dtype)
412
+ .to_arrow()
413
+ .cast(expected_type)
414
+ )
406
415
  if actual_type != expected_type:
407
416
  column = column.cast(options=pc.CastOptions(target_type=expected_type, allow_time_truncate=True))
408
417
  if isinstance(column, pa.ChunkedArray):
@@ -4,6 +4,7 @@ import pyarrow as pa
4
4
 
5
5
  from chalk.features._encoding.pyarrow import pyarrow_to_polars
6
6
  from chalk.utils.df_utils import pa_cast, pa_table_to_pl_df
7
+ from chalk.utils.pl_helpers import str_json_decode_compat
7
8
 
8
9
 
9
10
  def convert_hex_to_binary(table: pa.Table, cols_to_convert: List[str]) -> pa.Table:
@@ -60,7 +61,7 @@ def json_parse_and_cast(tbl: pa.Table, schema: Mapping[str, pa.DataType]) -> pa.
60
61
  expr = pl.col(col_name)
61
62
  if pl_df.schema[col_name] == pl.Binary():
62
63
  expr = expr.cast(pl.Utf8())
63
- expr = expr.str.json_extract(pl_dtype).alias(col_name)
64
+ expr = str_json_decode_compat(expr, pl_dtype).alias(col_name)
64
65
  pl_exprs.append(expr)
65
66
 
66
67
  pl_df = pl_df.with_columns(pl_exprs)
@@ -19,20 +19,21 @@ from typing import (
19
19
  Literal,
20
20
  Mapping,
21
21
  Optional,
22
+ ParamSpec,
22
23
  Sequence,
23
24
  Type,
25
+ TypeAlias,
24
26
  TypeVar,
25
27
  Union,
26
28
  cast,
27
29
  )
28
30
 
29
31
  import yaml
30
- from typing_extensions import ParamSpec, TypeAlias
31
32
  from yaml.scanner import ScannerError
32
33
 
33
34
  from chalk import Environments, OfflineResolver, OnlineResolver, Tags
34
35
  from chalk._lsp.error_builder import SQLFileResolverErrorBuilder
35
- from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features
36
+ from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features, Underscore
36
37
  from chalk.features.feature_set import CURRENT_FEATURE_REGISTRY
37
38
  from chalk.features.namespace_context import build_namespaced_name
38
39
  from chalk.features.namespace_context import namespace as namespace_ctx
@@ -41,6 +42,8 @@ from chalk.features.resolver import Cron, ResolverArgErrorHandler, StreamResolve
41
42
  from chalk.sql._internal.incremental import IncrementalSettings
42
43
  from chalk.sql._internal.integrations.bigquery import BigQuerySourceImpl
43
44
  from chalk.sql._internal.integrations.cloudsql import CloudSQLSourceImpl
45
+ from chalk.sql._internal.integrations.databricks import DatabricksSourceImpl
46
+ from chalk.sql._internal.integrations.mssql import MSSQLSourceImpl
44
47
  from chalk.sql._internal.integrations.mysql import MySQLSourceImpl
45
48
  from chalk.sql._internal.integrations.postgres import PostgreSQLSourceImpl
46
49
  from chalk.sql._internal.integrations.redshift import RedshiftSourceImpl
@@ -84,8 +87,10 @@ _SOURCES: Mapping[str, Union[Type[BaseSQLSource], Type[StreamSource]]] = {
84
87
  "postgres": PostgreSQLSourceImpl,
85
88
  "postgresql": PostgreSQLSourceImpl,
86
89
  "mysql": MySQLSourceImpl,
90
+ "mssql": MSSQLSourceImpl,
87
91
  "bigquery": BigQuerySourceImpl,
88
92
  "cloudsql": CloudSQLSourceImpl,
93
+ "databricks": DatabricksSourceImpl,
89
94
  "redshift": RedshiftSourceImpl,
90
95
  "sqlite": SQLiteSourceImpl,
91
96
  "kafka": KafkaSource,
@@ -241,6 +246,7 @@ class SQLStringResult:
241
246
  override_comment_dict: Optional[CommentDict] = None
242
247
  override_name: Optional[str] = None
243
248
  autogenerated: bool = False
249
+ postprocessing_expr: Underscore | None = None
244
250
 
245
251
  def __post_init__(self):
246
252
  # Validation: if autogenerated is True, override_name must not be None
@@ -302,7 +308,7 @@ def get_sql_file_resolvers(
302
308
  """Iterate through all `.chalk.sql` filepaths, gather the sql strings, and get a resolver hopefully for each."""
303
309
  for dp, dn, fn in os.walk(os.path.expanduser(sql_file_resolve_location)):
304
310
  del dn # unused
305
- for f in fn:
311
+ for f in sorted(fn): # Sort filenames for deterministic ordering
306
312
  filepath = os.path.join(dp, f)
307
313
  if not filepath.endswith(CHALK_SQL_FILE_RESOLVER_FILENAME_SUFFIX):
308
314
  continue
@@ -315,7 +321,10 @@ def get_sql_file_resolvers(
315
321
  continue
316
322
  _filepath_to_sql_string[filepath] = sql_string_result.sql_string
317
323
  yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
318
- for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers():
324
+ # Only yield generated resolvers whose filepath is under the directory being scanned
325
+ for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers(
326
+ filter_by_directory=sql_file_resolve_location
327
+ ):
319
328
  yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
320
329
 
321
330
 
@@ -338,8 +347,12 @@ def get_sql_file_resolvers_from_paths(
338
347
  sql_string_result=sql_string_result,
339
348
  has_import_errors=has_import_errors,
340
349
  )
350
+ # Only yield generated resolvers whose filepath is in the paths list
351
+ # If paths is empty, yield all generated resolvers (no filtering)
341
352
  for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers():
342
- yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
353
+ # Check if this generated resolver's filepath is in the provided paths
354
+ if not paths or sql_string_result.path in paths:
355
+ yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
343
356
 
344
357
 
345
358
  def get_sql_file_resolver(
@@ -692,6 +705,7 @@ def get_sql_file_resolver(
692
705
  incremental_settings=incremental_settings,
693
706
  params_to_root_fqn=glot_result.args,
694
707
  ),
708
+ postprocessing=sql_string_result.postprocessing_expr,
695
709
  )
696
710
  except Exception as e:
697
711
  raise e
@@ -1572,6 +1586,7 @@ class GeneratedSQLFileResolverInfo:
1572
1586
  filepath: str
1573
1587
  sql_string: str
1574
1588
  comment_dict: CommentDict
1589
+ postprocessing_expr: Underscore | None
1575
1590
 
1576
1591
 
1577
1592
  class GeneratedSQLFileResolverRegistry:
@@ -1579,17 +1594,43 @@ class GeneratedSQLFileResolverRegistry:
1579
1594
  super().__init__()
1580
1595
  self.resolver_name_to_generated_infos: Dict[str, GeneratedSQLFileResolverInfo] = {}
1581
1596
 
1582
- def add_sql_file_resolver(self, name: str, filepath: str, sql_string: str, comment_dict: CommentDict):
1597
+ def add_sql_file_resolver(
1598
+ self,
1599
+ name: str,
1600
+ filepath: str,
1601
+ sql_string: str,
1602
+ comment_dict: CommentDict,
1603
+ postprocessing_expr: Underscore | None = None,
1604
+ ):
1583
1605
  if name in self.resolver_name_to_generated_infos and filepath != "<notebook>":
1584
1606
  raise ValueError(f"A SQL file resolver already exists with name '{name}'. They must have unique names.")
1585
1607
  self.resolver_name_to_generated_infos[name] = GeneratedSQLFileResolverInfo(
1586
- filepath=filepath,
1587
- sql_string=sql_string,
1588
- comment_dict=comment_dict,
1608
+ filepath=filepath, sql_string=sql_string, comment_dict=comment_dict, postprocessing_expr=postprocessing_expr
1589
1609
  )
1590
1610
 
1591
- def get_generated_sql_file_resolvers(self) -> Iterable[SQLStringResult]:
1611
+ def get_generated_sql_file_resolvers(self, filter_by_directory: Path | None = None) -> Iterable[SQLStringResult]:
1612
+ """
1613
+ Yield generated SQL file resolvers, optionally filtered by directory.
1614
+
1615
+ Args:
1616
+ filter_by_directory: If provided, only yield resolvers whose filepath is under this directory.
1617
+ If None, yield all generated resolvers (legacy behavior).
1618
+ """
1592
1619
  for name, generated_info in self.resolver_name_to_generated_infos.items():
1620
+ # If filtering by directory is requested, check if the resolver's filepath is under that directory
1621
+ if filter_by_directory is not None:
1622
+ # Special case: notebook resolvers (filepath == "<notebook>") should never be auto-yielded
1623
+ # when scanning directories, only when explicitly requested
1624
+ if generated_info.filepath == "<notebook>":
1625
+ continue
1626
+
1627
+ # Convert to absolute paths for comparison and check if resolver path is under filter directory
1628
+ resolver_path = Path(generated_info.filepath).resolve()
1629
+ filter_path = Path(filter_by_directory).resolve()
1630
+
1631
+ if not resolver_path.is_relative_to(filter_path):
1632
+ continue
1633
+
1593
1634
  yield SQLStringResult(
1594
1635
  path=generated_info.filepath,
1595
1636
  sql_string=generated_info.sql_string,
@@ -1597,6 +1638,7 @@ class GeneratedSQLFileResolverRegistry:
1597
1638
  override_comment_dict=generated_info.comment_dict,
1598
1639
  override_name=name,
1599
1640
  autogenerated=True,
1641
+ postprocessing_expr=generated_info.postprocessing_expr,
1600
1642
  )
1601
1643
 
1602
1644
 
@@ -1625,6 +1667,7 @@ def make_sql_file_resolver(
1625
1667
  partitioned_by: Collection[Any] | None = None,
1626
1668
  total: Optional[bool] = None,
1627
1669
  skip_sql_validation: Optional[bool] = None,
1670
+ postprocessing_expression: Optional[Underscore] = None,
1628
1671
  ):
1629
1672
  """Generate a Chalk SQL file resolver from a filepath and a sql string.
1630
1673
  This will generate a resolver in your web dashboard that can be queried,
@@ -1808,6 +1851,7 @@ def make_sql_file_resolver(
1808
1851
  sql_string=sql,
1809
1852
  comment_dict=comment_dict,
1810
1853
  name=name,
1854
+ postprocessing_expr=postprocessing_expression,
1811
1855
  )
1812
1856
  if is_defined_in_notebook:
1813
1857
  from chalk.sql import SQLSourceGroup
@@ -1844,6 +1888,7 @@ def make_sql_file_resolver(
1844
1888
  override_comment_dict=generated_info.comment_dict,
1845
1889
  override_name=name,
1846
1890
  autogenerated=True,
1891
+ postprocessing_expr=postprocessing_expression,
1847
1892
  )
1848
1893
  resolver_result = get_sql_file_resolver(
1849
1894
  sources=current_sql_sources, sql_string_result=info, has_import_errors=False
@@ -163,6 +163,7 @@ class SQLSourceKind(str, Enum):
163
163
  athena = "athena"
164
164
  duckdb = "duckdb"
165
165
  dynamodb = "dynamodb"
166
+ mssql = "mssql"
166
167
  mysql = "mysql"
167
168
  postgres = "postgres"
168
169
  redshift = "redshift"
@@ -213,11 +214,32 @@ class BaseSQLSource(BaseSQLSourceProtocol):
213
214
  if getattr(self, "kind", None) != SQLSourceKind.trino:
214
215
  engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
215
216
  async_engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
216
- self.engine_args = engine_args
217
- self.async_engine_args = async_engine_args
217
+ # Store raw args internally, expose filtered versions via properties
218
+ self._raw_engine_args = engine_args
219
+ self._raw_async_engine_args = async_engine_args
218
220
  self._engine = None
219
221
  self._async_engine = None
220
222
 
223
+ @property
224
+ def engine_args(self) -> Dict[str, Any]:
225
+ """Engine arguments with native_args filtered out for SQLAlchemy."""
226
+ return {k: v for k, v in self._raw_engine_args.items() if k != "native_args"}
227
+
228
+ @engine_args.setter
229
+ def engine_args(self, args: dict[str, Any]):
230
+ """Set raw engine args (for backward compatibility)."""
231
+ self._raw_engine_args = args
232
+
233
+ @property
234
+ def async_engine_args(self) -> Dict[str, Any]:
235
+ """Async engine arguments with native_args filtered out for SQLAlchemy."""
236
+ return {k: v for k, v in self._raw_async_engine_args.items() if k != "native_args"}
237
+
238
+ @async_engine_args.setter
239
+ def async_engine_args(self, args: dict[str, Any]):
240
+ """Set raw async engine args (for backward compatibility)."""
241
+ self._raw_async_engine_args = args
242
+
221
243
  @property
222
244
  def _engine_args(self):
223
245
  """Backcompat support for private subclassing of BaseSQLSource"""
@@ -238,6 +260,16 @@ class BaseSQLSource(BaseSQLSourceProtocol):
238
260
  """Backcompat support for private subclassing of BaseSQLSource"""
239
261
  self.async_engine_args = args
240
262
 
263
+ @property
264
+ def native_args(self) -> Dict[str, Any]:
265
+ """Native arguments to be passed to the underlying database driver.
266
+
267
+ These arguments are extracted from engine_args and async_engine_args
268
+ and are not passed to SQLAlchemy's create_engine or create_async_engine.
269
+ Instead, they should be used by subclasses to configure native driver connections.
270
+ """
271
+ return self._raw_engine_args.get("native_args", {})
272
+
241
273
  def get_sqlglot_dialect(self) -> Union[str, None]:
242
274
  """Returns the name of the SQL dialect (if it has one) for `sqlglot` to parse the SQL string.
243
275
  This allows for use of dialect-specific syntax while parsing and modifying queries."""
@@ -831,6 +863,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
831
863
  if self._engine is None:
832
864
  self.register_sqlalchemy_compiler_overrides()
833
865
  self._check_engine_isolation_level()
866
+ # engine_args property already filters out native_args
834
867
  self._engine = create_engine(url=self.local_engine_url(), **self.engine_args)
835
868
  return self._engine
836
869
 
@@ -840,6 +873,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
840
873
  if self._async_engine is None:
841
874
  self.register_sqlalchemy_compiler_overrides()
842
875
  self._check_engine_isolation_level()
876
+ # async_engine_args property already filters out native_args
843
877
  self._async_engine = create_async_engine(url=self.async_local_engine_url(), **self.async_engine_args)
844
878
  return self._async_engine
845
879
 
chalk/streams/__init__.py CHANGED
@@ -1,7 +1,5 @@
1
1
  import inspect
2
- from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, TypeVar, Union
3
-
4
- from typing_extensions import ParamSpec
2
+ from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, ParamSpec, TypeVar, Union
5
3
 
6
4
  from chalk._lsp.error_builder import get_resolver_error_builder
7
5
  from chalk.features.tag import Environments
@@ -27,6 +27,7 @@ _KAFKA_SASL_MECHANISM_NAME = "KAFKA_SASL_MECHANISM"
27
27
  _KAFKA_SASL_USERNAME_NAME = "KAFKA_SASL_USERNAME"
28
28
  _KAFKA_SASL_PASSWORD_NAME = "KAFKA_SASL_PASSWORD"
29
29
  _KAFKA_ADDITIONAL_KAFKA_ARGS_NAME = "KAFKA_ADDITIONAL_KAFKA_ARGS"
30
+ _KAFKA_DEAD_LETTER_QUEUE_TOPIC = "KAFKA_DEAD_LETTER_QUEUE_TOPIC"
30
31
 
31
32
 
32
33
  class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True):
@@ -165,7 +166,10 @@ class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True)
165
166
  ),
166
167
  name=name,
167
168
  late_arrival_deadline=late_arrival_deadline,
168
- dead_letter_queue_topic=dead_letter_queue_topic,
169
+ dead_letter_queue_topic=dead_letter_queue_topic
170
+ or load_integration_variable(
171
+ name=_KAFKA_DEAD_LETTER_QUEUE_TOPIC, integration_name=name, override=integration_variable_override
172
+ ),
169
173
  ssl_ca_file=ssl_ca_file
170
174
  or load_integration_variable(
171
175
  name=_KAFKA_SSL_CA_FILE_NAME, integration_name=name, override=integration_variable_override