chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
  10. chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
  11. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
  12. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  13. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  14. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  15. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  18. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  19. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  20. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  21. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  22. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  23. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  26. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  27. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  28. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  29. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  32. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  33. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  34. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  35. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  38. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  39. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  40. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  41. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  42. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  43. chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
  44. chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
  45. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
  46. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
  47. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  48. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  49. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  50. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  53. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
  54. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
  57. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  58. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  61. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
  62. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
  65. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  66. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  69. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  70. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  71. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  72. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  73. chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
  74. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
  75. chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
  76. chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
  77. chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
  78. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  79. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  80. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  81. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  82. chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
  83. chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
  84. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
  85. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
  86. chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
  87. chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
  88. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  89. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  90. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  91. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  92. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  93. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  94. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  95. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  96. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  97. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  98. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  99. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  100. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  101. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  102. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  103. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  104. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  105. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  106. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  107. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  108. chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
  109. chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
  110. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
  111. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
  112. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  113. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  114. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  115. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  116. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
  117. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
  118. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  119. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  120. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  121. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  122. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  123. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  124. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  125. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  126. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  127. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  128. chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
  129. chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
  130. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
  131. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
  132. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  133. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  134. chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
  135. chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
  136. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  137. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  138. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  139. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  140. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
  141. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
  142. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  143. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  144. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  145. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  146. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  147. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  148. chalk/_lsp/error_builder.py +11 -0
  149. chalk/_version.py +1 -1
  150. chalk/client/client.py +128 -43
  151. chalk/client/client_async.py +149 -0
  152. chalk/client/client_async_impl.py +22 -0
  153. chalk/client/client_grpc.py +539 -104
  154. chalk/client/client_impl.py +449 -122
  155. chalk/client/dataset.py +7 -1
  156. chalk/client/models.py +98 -0
  157. chalk/client/serialization/model_serialization.py +92 -9
  158. chalk/df/LazyFramePlaceholder.py +1154 -0
  159. chalk/features/_class_property.py +7 -0
  160. chalk/features/_embedding/embedding.py +1 -0
  161. chalk/features/_encoding/converter.py +83 -2
  162. chalk/features/feature_field.py +40 -30
  163. chalk/features/feature_set_decorator.py +1 -0
  164. chalk/features/feature_wrapper.py +42 -3
  165. chalk/features/hooks.py +81 -10
  166. chalk/features/inference.py +33 -31
  167. chalk/features/resolver.py +224 -24
  168. chalk/functions/__init__.py +65 -3
  169. chalk/gitignore/gitignore_parser.py +5 -1
  170. chalk/importer.py +142 -68
  171. chalk/ml/__init__.py +2 -0
  172. chalk/ml/model_hooks.py +194 -26
  173. chalk/ml/model_reference.py +56 -8
  174. chalk/ml/model_version.py +24 -15
  175. chalk/ml/utils.py +20 -17
  176. chalk/operators/_utils.py +10 -3
  177. chalk/parsed/_proto/export.py +22 -0
  178. chalk/parsed/duplicate_input_gql.py +3 -0
  179. chalk/parsed/json_conversions.py +20 -14
  180. chalk/parsed/to_proto.py +16 -4
  181. chalk/parsed/user_types_to_json.py +31 -10
  182. chalk/parsed/validation_from_registries.py +182 -0
  183. chalk/queries/named_query.py +16 -6
  184. chalk/queries/scheduled_query.py +9 -1
  185. chalk/serialization/parsed_annotation.py +24 -11
  186. chalk/sql/__init__.py +18 -0
  187. chalk/sql/_internal/integrations/databricks.py +55 -17
  188. chalk/sql/_internal/integrations/mssql.py +127 -62
  189. chalk/sql/_internal/integrations/redshift.py +4 -0
  190. chalk/sql/_internal/sql_file_resolver.py +53 -9
  191. chalk/sql/_internal/sql_source.py +35 -2
  192. chalk/streams/_kafka_source.py +5 -1
  193. chalk/streams/_windows.py +15 -2
  194. chalk/utils/_otel_version.py +13 -0
  195. chalk/utils/async_helpers.py +2 -2
  196. chalk/utils/missing_dependency.py +5 -4
  197. chalk/utils/tracing.py +185 -95
  198. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
  199. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
  200. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  201. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  202. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,182 @@
1
+ """
2
+ Unified validation layer that operates directly on registries.
3
+
4
+ This module provides validation that works for BOTH GQL and Proto conversion paths.
5
+ It triggers lazy validations by accessing properties on registry objects and performs
6
+ explicit validation checks.
7
+
8
+ This ensures validation parity between:
9
+ - GQL path: get_registered_types() → validate_graph()
10
+ - Proto path: ToProtoConverter.convert_graph()
11
+
12
+ By calling validate_all_from_registries() before conversion in both paths, we ensure
13
+ developers cannot add validation to one path and forget the other.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import TYPE_CHECKING
19
+
20
+ from chalk._lsp.error_builder import LSPErrorBuilder
21
+
22
+ if TYPE_CHECKING:
23
+ from chalk.features.feature_field import Feature
24
+ from chalk.features.feature_set import Features
25
+ from chalk.features.resolver import ResolverRegistry
26
+
27
+
28
+ def validate_all_from_registries(
29
+ features_registry: dict[str, type["Features"]],
30
+ resolver_registry: "ResolverRegistry",
31
+ ) -> None:
32
+ """
33
+ Trigger all validations by accessing properties on registry objects.
34
+ This can be run multiple times and not show duplicates.
35
+
36
+ This function should be called by BOTH GQL and Proto conversion paths BEFORE
37
+ they perform their conversions. It validates by triggering lazy validations:
38
+
39
+ - Error[24]: Feature names with protected prefixes
40
+ - Error[25]: Namespace names with protected prefixes
41
+ - Error[32]: Invalid join syntax (composite joins must use & not and)
42
+ - Error[35]: Missing has-one join definition
43
+ - Error[37]: Join filter with incorrect type annotation
44
+ - Error[40]: Invalid join lambda
45
+ - Error[42]: Bad foreign key types (type mismatch)
46
+ - Error[43]: Multi-namespace joins
47
+ - Error[51]: Multiple primary features (versioned primary keys)
48
+ - Error[119]: State type validation (stream resolvers)
49
+ - Error[135]: Unrecognized feature reference
50
+
51
+ Parameters
52
+ ----------
53
+ features_registry : dict[str, type[Features]]
54
+ The feature registry to validate (FeatureSetBase.registry).
55
+ resolver_registry : ResolverRegistry
56
+ The resolver registry to validate (RESOLVER_REGISTRY).
57
+
58
+ Returns
59
+ -------
60
+ None
61
+ Validation errors are accumulated in LSPErrorBuilder and raised as exceptions.
62
+ """
63
+
64
+ # ========================================================================
65
+ # FEATURE VALIDATION
66
+ # ========================================================================
67
+
68
+ for _, features_cls in features_registry.items():
69
+ # --------------------------------------------------------------------
70
+ # Error[51]: Multiple primary features (versioned primary keys)
71
+ # --------------------------------------------------------------------
72
+ # Accessing __chalk_primary__ triggers _discover_feature() which
73
+ # validates that there's only one primary key. Versioned features
74
+ # create multiple primary keys (e.g., id, id@2, id@3, id@4) which
75
+ # triggers Error[51].
76
+ try:
77
+ _ = features_cls.__chalk_primary__
78
+ except Exception as e:
79
+ # LSPErrorBuilder.promote_exception() re-raises LSP errors
80
+ if not LSPErrorBuilder.promote_exception(e):
81
+ # If it's not an LSP error, something else went wrong
82
+ raise
83
+
84
+ # --------------------------------------------------------------------
85
+ # Iterate through all features in this feature set
86
+ # --------------------------------------------------------------------
87
+ for feature in features_cls.features:
88
+ # Skip autogenerated and no-display features (same as user_types_to_json.py:138)
89
+ # This prevents validating internal features like __chalk_* that are allowed
90
+ # to have protected names
91
+ if feature.is_autogenerated or feature.no_display:
92
+ continue
93
+
94
+ # ----------------------------------------------------------------
95
+ # Error[32,35,37,40,42,43]: Join validation
96
+ # ----------------------------------------------------------------
97
+ # Accessing the .join property triggers:
98
+ # - _validate_join() in feature_field.py (Error[32,37])
99
+ # - _validate_filter() in feature_field.py (Error[40,42,43])
100
+ #
101
+ # During GQL conversion, convert_type_to_gql() also checks:
102
+ # - Error[35]: if t.is_has_one and t.join is None
103
+ try:
104
+ _ = feature.join
105
+ except Exception as e:
106
+ if not LSPErrorBuilder.promote_exception(e):
107
+ raise
108
+
109
+ # ----------------------------------------------------------------
110
+ # Error[24,25]: Feature and namespace name validation
111
+ # ----------------------------------------------------------------
112
+ try:
113
+ _validate_feature_names_from_registry(feature)
114
+ except Exception as e:
115
+ if not LSPErrorBuilder.promote_exception(e):
116
+ raise
117
+
118
+ # ========================================================================
119
+ # RESOLVER VALIDATION
120
+ # ========================================================================
121
+
122
+ for resolver in resolver_registry.get_all_resolvers():
123
+ # --------------------------------------------------------------------
124
+ # Error[135]: Unrecognized feature reference
125
+ # --------------------------------------------------------------------
126
+ # Accessing resolver.inputs triggers _do_parse() which validates
127
+ # that all input features are recognized and exist in the registry.
128
+ try:
129
+ _ = resolver.inputs
130
+ except Exception as e:
131
+ if not LSPErrorBuilder.promote_exception(e):
132
+ raise
133
+
134
+ # --------------------------------------------------------------------
135
+ # Error[119]: State type validation (stream resolvers)
136
+ # --------------------------------------------------------------------
137
+ # Accessing resolver state and default_args triggers validation
138
+ # that default state values match their type annotations.
139
+ try:
140
+ _ = resolver.state
141
+ _ = resolver.default_args
142
+ except Exception as e:
143
+ if not LSPErrorBuilder.promote_exception(e):
144
+ raise
145
+
146
+
147
+ def _validate_feature_names_from_registry(feature: "Feature") -> None:
148
+ """
149
+ Validate that feature names and namespace names don't use protected prefixes.
150
+
151
+ This performs the same validation as _validate_feature_names() in
152
+ _graph_validation.py, but operates on Feature objects from the registry
153
+ rather than UpsertFeatureGQL objects.
154
+
155
+ Parameters
156
+ ----------
157
+ feature : Feature
158
+ The feature to validate from FeatureSetBase.registry
159
+
160
+ Raises
161
+ ------
162
+ Exception
163
+ If feature or namespace name starts with '_chalk' or '__'
164
+ """
165
+ # Error[24]: Feature names cannot begin with '_chalk' or '__'
166
+ if feature.name.startswith("__") or feature.name.startswith("_chalk"):
167
+ feature.lsp_error_builder.add_diagnostic(
168
+ message="Feature names cannot begin with '_chalk' or '__'.",
169
+ range=feature.lsp_error_builder.property_range(feature.attribute_name or feature.name),
170
+ label="protected name",
171
+ code="24",
172
+ )
173
+
174
+ # Error[25]: Namespace names cannot begin with '_chalk' or '__'
175
+ if feature.namespace.startswith("__") or feature.namespace.startswith("_chalk"):
176
+ feature.lsp_error_builder.add_diagnostic(
177
+ message="Feature classes cannot have names that begin with '_chalk' or '__'.",
178
+ label="protected namespace",
179
+ range=feature.lsp_error_builder.decorator_kwarg_value_range("name")
180
+ or feature.lsp_error_builder.class_definition_range(),
181
+ code="25",
182
+ )
@@ -4,6 +4,7 @@ import inspect
4
4
  import traceback
5
5
  from typing import TYPE_CHECKING, Mapping, Sequence
6
6
 
7
+ from chalk._lsp.error_builder import LSPErrorBuilder
7
8
  from chalk.features import unwrap_feature
8
9
  from chalk.utils.object_inspect import get_source_object_starting
9
10
  from chalk.utils.source_parsing import should_skip_source_code_parsing
@@ -157,9 +158,12 @@ class NamedQuery:
157
158
  self._input = [str(f) for f in self._input_raw]
158
159
  elif self._output_raw is not None:
159
160
  self._input = [str(unwrap_feature(o).primary_feature) for o in self._output_raw]
160
- except Exception:
161
+ except Exception as e:
161
162
  self._input = None
162
- self.errors.append(traceback.format_exc())
163
+ if not LSPErrorBuilder.promote_exception(e):
164
+ self.errors.append(
165
+ f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
166
+ )
163
167
 
164
168
  return self._input
165
169
 
@@ -170,9 +174,12 @@ class NamedQuery:
170
174
  try:
171
175
  if self._output_raw is not None:
172
176
  self._output = [str(o) for o in self._output_raw]
173
- except Exception:
177
+ except Exception as e:
174
178
  self._output = None
175
- self.errors.append(f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}")
179
+ if not LSPErrorBuilder.promote_exception(e):
180
+ self.errors.append(
181
+ f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
182
+ )
176
183
 
177
184
  return self._output
178
185
 
@@ -183,9 +190,12 @@ class NamedQuery:
183
190
  try:
184
191
  if self._additional_logged_features_raw is not None:
185
192
  self._additional_logged_features = [str(alf) for alf in self._additional_logged_features_raw]
186
- except Exception:
193
+ except Exception as e:
187
194
  self._additional_logged_features = None
188
- self.errors.append(f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}")
195
+ if not LSPErrorBuilder.promote_exception(e):
196
+ self.errors.append(
197
+ f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
198
+ )
189
199
 
190
200
  return self._additional_logged_features
191
201
 
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from datetime import datetime, timezone
4
+ from datetime import datetime, timedelta, timezone
5
5
  from typing import TYPE_CHECKING, Collection
6
6
 
7
7
  from chalk.utils.duration import CronTab, Duration
@@ -28,6 +28,9 @@ class ScheduledQuery:
28
28
  incremental_resolvers: Collection[str] | None = None,
29
29
  planner_options: dict[str, str] | None = None,
30
30
  resource_group: str | None = None,
31
+ completion_deadline: timedelta | None = None,
32
+ num_shards: int | None = None,
33
+ num_workers: int | None = None,
31
34
  ):
32
35
  """Create an offline query which runs on a schedule.
33
36
 
@@ -159,6 +162,11 @@ class ScheduledQuery:
159
162
  self.planner_options = {k: str(v) for k, v in planner_options.items()} if planner_options else None
160
163
  self.resource_group = resource_group
161
164
 
165
+ self.completion_deadline = completion_deadline
166
+
167
+ self.num_shards = num_shards
168
+ self.num_workers = num_workers
169
+
162
170
  CRON_QUERY_REGISTRY[name] = self
163
171
 
164
172
 
@@ -350,6 +350,7 @@ class ParsedAnnotation:
350
350
  if "__chalk_document__" in args:
351
351
  self._is_document = True
352
352
  origin = get_origin(annotation)
353
+ self._parsed_annotation = cast(type, annotation)
353
354
 
354
355
  # The only allowed collections here are Set, List, or DataFrame
355
356
  if origin in (set, Set):
@@ -372,17 +373,6 @@ class ParsedAnnotation:
372
373
  annotation = args[0]
373
374
  if origin in (tuple, Tuple):
374
375
  args = get_args(annotation)
375
- if len(args) != 2 or args[1] is not ... or args[0] is ...:
376
- self._type_error(
377
- (
378
- "Tuple should be given exactly two type parameters. "
379
- "The first should be the type of the elements, and the second should be '...', "
380
- "which indicates that the tuple is of variable length. "
381
- "For example, 'Tuple[int, ...]' is a tuple of ints of variable length."
382
- ),
383
- code="74",
384
- label="invalid tuple",
385
- )
386
376
  annotation = args[0]
387
377
  if origin in (list, List):
388
378
  args = get_args(annotation)
@@ -567,3 +557,26 @@ class ParsedAnnotation:
567
557
  if self._parsed_annotation is None:
568
558
  self._parse_annotation()
569
559
  return self._is_feature_time
560
+
561
+ def is_dataframe_annotation(self) -> bool:
562
+ """
563
+ Check if the annotation represents a DataFrame type, even if validation failed.
564
+ This checks the raw parsed annotation without triggering full validation,
565
+ useful for preventing false positive errors when DataFrame validation fails.
566
+ """
567
+ from typing import get_args
568
+
569
+ from chalk.features.dataframe import DataFrameMeta
570
+
571
+ if self._parsed_annotation is None:
572
+ self._parse_annotation()
573
+
574
+ # Check if directly a DataFrame
575
+ if isinstance(self.parsed_annotation, DataFrameMeta):
576
+ return True
577
+
578
+ # Check if wrapped in Optional, Union, etc.
579
+ if any(isinstance(x, DataFrameMeta) for x in get_args(self.parsed_annotation)):
580
+ return True
581
+
582
+ return False
chalk/sql/__init__.py CHANGED
@@ -1437,6 +1437,8 @@ def DatabricksSource(
1437
1437
  access_token: str = ...,
1438
1438
  db: str = ...,
1439
1439
  port: str = ...,
1440
+ client_id: str = ...,
1441
+ client_secret: str = ...,
1440
1442
  engine_args: Optional[Dict[str, Any]] = ...,
1441
1443
  ) -> BaseSQLSourceProtocol:
1442
1444
  """You can also configure the integration directly using environment
@@ -1457,6 +1459,10 @@ def DatabricksSource(
1457
1459
  Database to use.
1458
1460
  port
1459
1461
  Port number to use.
1462
+ client_id
1463
+ OAuth service principal client ID (alternative to access_token).
1464
+ client_secret
1465
+ OAuth service principal client secret (alternative to access_token).
1460
1466
  engine_args
1461
1467
  Additional arguments to use when constructing the SQLAlchemy engine.
1462
1468
 
@@ -1475,6 +1481,14 @@ def DatabricksSource(
1475
1481
  ... db=os.getenv("DATABRICKS_DATABASE"),
1476
1482
  ... port=os.getenv("DATABRICKS_PORT"),
1477
1483
  ... )
1484
+ >>> databricks_with_oauth = DatabricksSource(
1485
+ ... host=os.getenv("DATABRICKS_HOST"),
1486
+ ... http_path=os.getenv("DATABRICKS_HTTP_PATH"),
1487
+ ... client_id=os.getenv("DATABRICKS_CLIENT_ID"),
1488
+ ... client_secret=os.getenv("DATABRICKS_CLIENT_SECRET"),
1489
+ ... db=os.getenv("DATABRICKS_DATABASE"),
1490
+ ... port=os.getenv("DATABRICKS_PORT"),
1491
+ ... )
1478
1492
  """
1479
1493
  ...
1480
1494
 
@@ -1487,6 +1501,8 @@ def DatabricksSource(
1487
1501
  access_token: Optional[str] = None,
1488
1502
  db: Optional[str] = None,
1489
1503
  port: Optional[Union[str, int]] = None,
1504
+ client_id: Optional[str] = None,
1505
+ client_secret: Optional[str] = None,
1490
1506
  engine_args: Optional[Dict[str, Any]] = None,
1491
1507
  ) -> BaseSQLSourceProtocol:
1492
1508
  """Create a Databricks data source. SQL-based data sources
@@ -1505,6 +1521,8 @@ def DatabricksSource(
1505
1521
  db=db,
1506
1522
  port=port,
1507
1523
  name=name,
1524
+ client_id=client_id,
1525
+ client_secret=client_secret,
1508
1526
  engine_args=engine_args,
1509
1527
  )
1510
1528
 
@@ -33,6 +33,8 @@ _DATABRICKS_HTTP_PATH_NAME = "DATABRICKS_HTTP_PATH"
33
33
  _DATABRICKS_TOKEN_NAME = "DATABRICKS_TOKEN"
34
34
  _DATABRICKS_DATABASE_NAME = "DATABRICKS_DATABASE"
35
35
  _DATABRICKS_PORT_NAME = "DATABRICKS_PORT"
36
+ _DATABRICKS_CLIENT_ID_NAME = "DATABRICKS_CLIENT_ID"
37
+ _DATABRICKS_CLIENT_SECRET_NAME = "DATABRICKS_CLIENT_SECRET"
36
38
 
37
39
 
38
40
  class DatabricksSourceImpl(BaseSQLSource):
@@ -46,6 +48,8 @@ class DatabricksSourceImpl(BaseSQLSource):
46
48
  db: Optional[str] = None,
47
49
  port: Optional[Union[int, str]] = None,
48
50
  name: Optional[str] = None,
51
+ client_id: Optional[str] = None,
52
+ client_secret: Optional[str] = None,
49
53
  engine_args: Optional[Dict[str, Any]] = None,
50
54
  executor: Optional[concurrent.futures.ThreadPoolExecutor] = None,
51
55
  integration_variable_override: Optional[Mapping[str, str]] = None,
@@ -74,21 +78,53 @@ class DatabricksSourceImpl(BaseSQLSource):
74
78
  name=_DATABRICKS_PORT_NAME, integration_name=name, parser=int, override=integration_variable_override
75
79
  )
76
80
  )
81
+ self.client_id = client_id or load_integration_variable(
82
+ name=_DATABRICKS_CLIENT_ID_NAME, integration_name=name, override=integration_variable_override
83
+ )
84
+ self.client_secret = client_secret or load_integration_variable(
85
+ name=_DATABRICKS_CLIENT_SECRET_NAME, integration_name=name, override=integration_variable_override
86
+ )
77
87
  self.executor = executor or DEFAULT_IO_EXECUTOR
78
88
 
89
+ has_token = self.access_token is not None
90
+ has_oauth = self.client_id is not None and self.client_secret is not None
91
+
92
+ if has_token and has_oauth:
93
+ chalk_logger.warning(
94
+ "Both OAuth credentials and a personal access token were provided. Using OAuth authentication."
95
+ )
96
+ self.access_token = None
97
+
98
+ self._credentials_provider = None
99
+ if has_oauth:
100
+ try:
101
+ from databricks.sdk.core import Config, oauth_service_principal
102
+ except ImportError:
103
+ raise missing_dependency_exception("chalkpy[databricks]")
104
+
105
+ def credentials_provider():
106
+ config = Config(host=self.host, client_id=self.client_id, client_secret=self.client_secret)
107
+ return oauth_service_principal(config)
108
+
109
+ self._credentials_provider = credentials_provider
110
+
79
111
  if engine_args is None:
80
112
  engine_args = {}
113
+
114
+ connect_args: dict[str, Any] = {
115
+ "keepalives": 1,
116
+ "keepalives_idle": 30,
117
+ "keepalives_interval": 10,
118
+ "keepalives_count": 5,
119
+ }
120
+
121
+ if self._credentials_provider:
122
+ connect_args["credentials_provider"] = self._credentials_provider
123
+
81
124
  engine_args.setdefault("pool_size", 20)
82
125
  engine_args.setdefault("max_overflow", 60)
83
- engine_args.setdefault(
84
- "connect_args",
85
- {
86
- "keepalives": 1,
87
- "keepalives_idle": 30,
88
- "keepalives_interval": 10,
89
- "keepalives_count": 5,
90
- },
91
- )
126
+ engine_args.setdefault("connect_args", connect_args)
127
+
92
128
  BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
93
129
 
94
130
  def supports_inefficient_fallback(self) -> bool:
@@ -97,6 +133,12 @@ class DatabricksSourceImpl(BaseSQLSource):
97
133
  def get_sqlglot_dialect(self) -> str | None:
98
134
  return "databricks"
99
135
 
136
+ def _get_client_auth(self) -> Dict[str, str | Callable | None]:
137
+ if self._credentials_provider:
138
+ return {"credentials_provider": self._credentials_provider}
139
+ else:
140
+ return {"access_token": self.access_token}
141
+
100
142
  @contextlib.contextmanager
101
143
  def _create_temp_table(
102
144
  self,
@@ -168,10 +210,7 @@ class DatabricksSourceImpl(BaseSQLSource):
168
210
 
169
211
  # Connect using databricks-sql-python for efficient Arrow fetching
170
212
  with sql.connect(
171
- server_hostname=self.host,
172
- http_path=self.http_path,
173
- access_token=self.access_token,
174
- catalog=self.db,
213
+ server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
175
214
  ) as databricks_conn:
176
215
  chalk_logger.info("Established connection with Databricks using databricks-sql-python")
177
216
 
@@ -273,10 +312,7 @@ class DatabricksSourceImpl(BaseSQLSource):
273
312
 
274
313
  # Connect using databricks-sql-python for efficient Arrow fetching
275
314
  with sql.connect(
276
- server_hostname=self.host,
277
- http_path=self.http_path,
278
- access_token=self.access_token,
279
- catalog=self.db,
315
+ server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
280
316
  ) as databricks_cnx:
281
317
  with databricks_cnx.cursor() as cursor:
282
318
  formatted_op, positional_params, named_params = self.compile_query(finalized_query)
@@ -353,6 +389,8 @@ class DatabricksSourceImpl(BaseSQLSource):
353
389
  create_integration_variable(_DATABRICKS_TOKEN_NAME, self.name, self.access_token),
354
390
  create_integration_variable(_DATABRICKS_DATABASE_NAME, self.name, self.db),
355
391
  create_integration_variable(_DATABRICKS_PORT_NAME, self.name, self.port),
392
+ create_integration_variable(_DATABRICKS_CLIENT_ID_NAME, self.name, self.client_id),
393
+ create_integration_variable(_DATABRICKS_CLIENT_SECRET_NAME, self.name, self.client_secret),
356
394
  ]
357
395
  if v is not None
358
396
  }