chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
- chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
- chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_version.py +1 -1
- chalk/client/client.py +128 -43
- chalk/client/client_async.py +149 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +539 -104
- chalk/client/client_impl.py +449 -122
- chalk/client/dataset.py +7 -1
- chalk/client/models.py +98 -0
- chalk/client/serialization/model_serialization.py +92 -9
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -10
- chalk/features/inference.py +33 -31
- chalk/features/resolver.py +224 -24
- chalk/functions/__init__.py +65 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +142 -68
- chalk/ml/__init__.py +2 -0
- chalk/ml/model_hooks.py +194 -26
- chalk/ml/model_reference.py +56 -8
- chalk/ml/model_version.py +24 -15
- chalk/ml/utils.py +20 -17
- chalk/operators/_utils.py +10 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +3 -0
- chalk/parsed/json_conversions.py +20 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +9 -1
- chalk/serialization/parsed_annotation.py +24 -11
- chalk/sql/__init__.py +18 -0
- chalk/sql/_internal/integrations/databricks.py +55 -17
- chalk/sql/_internal/integrations/mssql.py +127 -62
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/sql_file_resolver.py +53 -9
- chalk/sql/_internal/sql_source.py +35 -2
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +15 -2
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +2 -2
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/tracing.py +185 -95
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified validation layer that operates directly on registries.
|
|
3
|
+
|
|
4
|
+
This module provides validation that works for BOTH GQL and Proto conversion paths.
|
|
5
|
+
It triggers lazy validations by accessing properties on registry objects and performs
|
|
6
|
+
explicit validation checks.
|
|
7
|
+
|
|
8
|
+
This ensures validation parity between:
|
|
9
|
+
- GQL path: get_registered_types() → validate_graph()
|
|
10
|
+
- Proto path: ToProtoConverter.convert_graph()
|
|
11
|
+
|
|
12
|
+
By calling validate_all_from_registries() before conversion in both paths, we ensure
|
|
13
|
+
developers cannot add validation to one path and forget the other.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from chalk._lsp.error_builder import LSPErrorBuilder
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from chalk.features.feature_field import Feature
|
|
24
|
+
from chalk.features.feature_set import Features
|
|
25
|
+
from chalk.features.resolver import ResolverRegistry
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_all_from_registries(
|
|
29
|
+
features_registry: dict[str, type["Features"]],
|
|
30
|
+
resolver_registry: "ResolverRegistry",
|
|
31
|
+
) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Trigger all validations by accessing properties on registry objects.
|
|
34
|
+
This can be run multiple times and not show duplicates.
|
|
35
|
+
|
|
36
|
+
This function should be called by BOTH GQL and Proto conversion paths BEFORE
|
|
37
|
+
they perform their conversions. It validates by triggering lazy validations:
|
|
38
|
+
|
|
39
|
+
- Error[24]: Feature names with protected prefixes
|
|
40
|
+
- Error[25]: Namespace names with protected prefixes
|
|
41
|
+
- Error[32]: Invalid join syntax (composite joins must use & not and)
|
|
42
|
+
- Error[35]: Missing has-one join definition
|
|
43
|
+
- Error[37]: Join filter with incorrect type annotation
|
|
44
|
+
- Error[40]: Invalid join lambda
|
|
45
|
+
- Error[42]: Bad foreign key types (type mismatch)
|
|
46
|
+
- Error[43]: Multi-namespace joins
|
|
47
|
+
- Error[51]: Multiple primary features (versioned primary keys)
|
|
48
|
+
- Error[119]: State type validation (stream resolvers)
|
|
49
|
+
- Error[135]: Unrecognized feature reference
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
features_registry : dict[str, type[Features]]
|
|
54
|
+
The feature registry to validate (FeatureSetBase.registry).
|
|
55
|
+
resolver_registry : ResolverRegistry
|
|
56
|
+
The resolver registry to validate (RESOLVER_REGISTRY).
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
None
|
|
61
|
+
Validation errors are accumulated in LSPErrorBuilder and raised as exceptions.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# ========================================================================
|
|
65
|
+
# FEATURE VALIDATION
|
|
66
|
+
# ========================================================================
|
|
67
|
+
|
|
68
|
+
for _, features_cls in features_registry.items():
|
|
69
|
+
# --------------------------------------------------------------------
|
|
70
|
+
# Error[51]: Multiple primary features (versioned primary keys)
|
|
71
|
+
# --------------------------------------------------------------------
|
|
72
|
+
# Accessing __chalk_primary__ triggers _discover_feature() which
|
|
73
|
+
# validates that there's only one primary key. Versioned features
|
|
74
|
+
# create multiple primary keys (e.g., id, id@2, id@3, id@4) which
|
|
75
|
+
# triggers Error[51].
|
|
76
|
+
try:
|
|
77
|
+
_ = features_cls.__chalk_primary__
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# LSPErrorBuilder.promote_exception() re-raises LSP errors
|
|
80
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
81
|
+
# If it's not an LSP error, something else went wrong
|
|
82
|
+
raise
|
|
83
|
+
|
|
84
|
+
# --------------------------------------------------------------------
|
|
85
|
+
# Iterate through all features in this feature set
|
|
86
|
+
# --------------------------------------------------------------------
|
|
87
|
+
for feature in features_cls.features:
|
|
88
|
+
# Skip autogenerated and no-display features (same as user_types_to_json.py:138)
|
|
89
|
+
# This prevents validating internal features like __chalk_* that are allowed
|
|
90
|
+
# to have protected names
|
|
91
|
+
if feature.is_autogenerated or feature.no_display:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
# ----------------------------------------------------------------
|
|
95
|
+
# Error[32,35,37,40,42,43]: Join validation
|
|
96
|
+
# ----------------------------------------------------------------
|
|
97
|
+
# Accessing the .join property triggers:
|
|
98
|
+
# - _validate_join() in feature_field.py (Error[32,37])
|
|
99
|
+
# - _validate_filter() in feature_field.py (Error[40,42,43])
|
|
100
|
+
#
|
|
101
|
+
# During GQL conversion, convert_type_to_gql() also checks:
|
|
102
|
+
# - Error[35]: if t.is_has_one and t.join is None
|
|
103
|
+
try:
|
|
104
|
+
_ = feature.join
|
|
105
|
+
except Exception as e:
|
|
106
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
# ----------------------------------------------------------------
|
|
110
|
+
# Error[24,25]: Feature and namespace name validation
|
|
111
|
+
# ----------------------------------------------------------------
|
|
112
|
+
try:
|
|
113
|
+
_validate_feature_names_from_registry(feature)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
116
|
+
raise
|
|
117
|
+
|
|
118
|
+
# ========================================================================
|
|
119
|
+
# RESOLVER VALIDATION
|
|
120
|
+
# ========================================================================
|
|
121
|
+
|
|
122
|
+
for resolver in resolver_registry.get_all_resolvers():
|
|
123
|
+
# --------------------------------------------------------------------
|
|
124
|
+
# Error[135]: Unrecognized feature reference
|
|
125
|
+
# --------------------------------------------------------------------
|
|
126
|
+
# Accessing resolver.inputs triggers _do_parse() which validates
|
|
127
|
+
# that all input features are recognized and exist in the registry.
|
|
128
|
+
try:
|
|
129
|
+
_ = resolver.inputs
|
|
130
|
+
except Exception as e:
|
|
131
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
132
|
+
raise
|
|
133
|
+
|
|
134
|
+
# --------------------------------------------------------------------
|
|
135
|
+
# Error[119]: State type validation (stream resolvers)
|
|
136
|
+
# --------------------------------------------------------------------
|
|
137
|
+
# Accessing resolver state and default_args triggers validation
|
|
138
|
+
# that default state values match their type annotations.
|
|
139
|
+
try:
|
|
140
|
+
_ = resolver.state
|
|
141
|
+
_ = resolver.default_args
|
|
142
|
+
except Exception as e:
|
|
143
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
144
|
+
raise
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _validate_feature_names_from_registry(feature: "Feature") -> None:
|
|
148
|
+
"""
|
|
149
|
+
Validate that feature names and namespace names don't use protected prefixes.
|
|
150
|
+
|
|
151
|
+
This performs the same validation as _validate_feature_names() in
|
|
152
|
+
_graph_validation.py, but operates on Feature objects from the registry
|
|
153
|
+
rather than UpsertFeatureGQL objects.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
feature : Feature
|
|
158
|
+
The feature to validate from FeatureSetBase.registry
|
|
159
|
+
|
|
160
|
+
Raises
|
|
161
|
+
------
|
|
162
|
+
Exception
|
|
163
|
+
If feature or namespace name starts with '_chalk' or '__'
|
|
164
|
+
"""
|
|
165
|
+
# Error[24]: Feature names cannot begin with '_chalk' or '__'
|
|
166
|
+
if feature.name.startswith("__") or feature.name.startswith("_chalk"):
|
|
167
|
+
feature.lsp_error_builder.add_diagnostic(
|
|
168
|
+
message="Feature names cannot begin with '_chalk' or '__'.",
|
|
169
|
+
range=feature.lsp_error_builder.property_range(feature.attribute_name or feature.name),
|
|
170
|
+
label="protected name",
|
|
171
|
+
code="24",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Error[25]: Namespace names cannot begin with '_chalk' or '__'
|
|
175
|
+
if feature.namespace.startswith("__") or feature.namespace.startswith("_chalk"):
|
|
176
|
+
feature.lsp_error_builder.add_diagnostic(
|
|
177
|
+
message="Feature classes cannot have names that begin with '_chalk' or '__'.",
|
|
178
|
+
label="protected namespace",
|
|
179
|
+
range=feature.lsp_error_builder.decorator_kwarg_value_range("name")
|
|
180
|
+
or feature.lsp_error_builder.class_definition_range(),
|
|
181
|
+
code="25",
|
|
182
|
+
)
|
chalk/queries/named_query.py
CHANGED
|
@@ -4,6 +4,7 @@ import inspect
|
|
|
4
4
|
import traceback
|
|
5
5
|
from typing import TYPE_CHECKING, Mapping, Sequence
|
|
6
6
|
|
|
7
|
+
from chalk._lsp.error_builder import LSPErrorBuilder
|
|
7
8
|
from chalk.features import unwrap_feature
|
|
8
9
|
from chalk.utils.object_inspect import get_source_object_starting
|
|
9
10
|
from chalk.utils.source_parsing import should_skip_source_code_parsing
|
|
@@ -157,9 +158,12 @@ class NamedQuery:
|
|
|
157
158
|
self._input = [str(f) for f in self._input_raw]
|
|
158
159
|
elif self._output_raw is not None:
|
|
159
160
|
self._input = [str(unwrap_feature(o).primary_feature) for o in self._output_raw]
|
|
160
|
-
except Exception:
|
|
161
|
+
except Exception as e:
|
|
161
162
|
self._input = None
|
|
162
|
-
|
|
163
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
164
|
+
self.errors.append(
|
|
165
|
+
f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
|
|
166
|
+
)
|
|
163
167
|
|
|
164
168
|
return self._input
|
|
165
169
|
|
|
@@ -170,9 +174,12 @@ class NamedQuery:
|
|
|
170
174
|
try:
|
|
171
175
|
if self._output_raw is not None:
|
|
172
176
|
self._output = [str(o) for o in self._output_raw]
|
|
173
|
-
except Exception:
|
|
177
|
+
except Exception as e:
|
|
174
178
|
self._output = None
|
|
175
|
-
|
|
179
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
180
|
+
self.errors.append(
|
|
181
|
+
f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
|
|
182
|
+
)
|
|
176
183
|
|
|
177
184
|
return self._output
|
|
178
185
|
|
|
@@ -183,9 +190,12 @@ class NamedQuery:
|
|
|
183
190
|
try:
|
|
184
191
|
if self._additional_logged_features_raw is not None:
|
|
185
192
|
self._additional_logged_features = [str(alf) for alf in self._additional_logged_features_raw]
|
|
186
|
-
except Exception:
|
|
193
|
+
except Exception as e:
|
|
187
194
|
self._additional_logged_features = None
|
|
188
|
-
|
|
195
|
+
if not LSPErrorBuilder.promote_exception(e):
|
|
196
|
+
self.errors.append(
|
|
197
|
+
f"Error creating NamedQuery '{self.name} ({self.version})': {traceback.format_exc()}"
|
|
198
|
+
)
|
|
189
199
|
|
|
190
200
|
return self._additional_logged_features
|
|
191
201
|
|
chalk/queries/scheduled_query.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from datetime import datetime, timezone
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
5
|
from typing import TYPE_CHECKING, Collection
|
|
6
6
|
|
|
7
7
|
from chalk.utils.duration import CronTab, Duration
|
|
@@ -28,6 +28,9 @@ class ScheduledQuery:
|
|
|
28
28
|
incremental_resolvers: Collection[str] | None = None,
|
|
29
29
|
planner_options: dict[str, str] | None = None,
|
|
30
30
|
resource_group: str | None = None,
|
|
31
|
+
completion_deadline: timedelta | None = None,
|
|
32
|
+
num_shards: int | None = None,
|
|
33
|
+
num_workers: int | None = None,
|
|
31
34
|
):
|
|
32
35
|
"""Create an offline query which runs on a schedule.
|
|
33
36
|
|
|
@@ -159,6 +162,11 @@ class ScheduledQuery:
|
|
|
159
162
|
self.planner_options = {k: str(v) for k, v in planner_options.items()} if planner_options else None
|
|
160
163
|
self.resource_group = resource_group
|
|
161
164
|
|
|
165
|
+
self.completion_deadline = completion_deadline
|
|
166
|
+
|
|
167
|
+
self.num_shards = num_shards
|
|
168
|
+
self.num_workers = num_workers
|
|
169
|
+
|
|
162
170
|
CRON_QUERY_REGISTRY[name] = self
|
|
163
171
|
|
|
164
172
|
|
|
@@ -350,6 +350,7 @@ class ParsedAnnotation:
|
|
|
350
350
|
if "__chalk_document__" in args:
|
|
351
351
|
self._is_document = True
|
|
352
352
|
origin = get_origin(annotation)
|
|
353
|
+
self._parsed_annotation = cast(type, annotation)
|
|
353
354
|
|
|
354
355
|
# The only allowed collections here are Set, List, or DataFrame
|
|
355
356
|
if origin in (set, Set):
|
|
@@ -372,17 +373,6 @@ class ParsedAnnotation:
|
|
|
372
373
|
annotation = args[0]
|
|
373
374
|
if origin in (tuple, Tuple):
|
|
374
375
|
args = get_args(annotation)
|
|
375
|
-
if len(args) != 2 or args[1] is not ... or args[0] is ...:
|
|
376
|
-
self._type_error(
|
|
377
|
-
(
|
|
378
|
-
"Tuple should be given exactly two type parameters. "
|
|
379
|
-
"The first should be the type of the elements, and the second should be '...', "
|
|
380
|
-
"which indicates that the tuple is of variable length. "
|
|
381
|
-
"For example, 'Tuple[int, ...]' is a tuple of ints of variable length."
|
|
382
|
-
),
|
|
383
|
-
code="74",
|
|
384
|
-
label="invalid tuple",
|
|
385
|
-
)
|
|
386
376
|
annotation = args[0]
|
|
387
377
|
if origin in (list, List):
|
|
388
378
|
args = get_args(annotation)
|
|
@@ -567,3 +557,26 @@ class ParsedAnnotation:
|
|
|
567
557
|
if self._parsed_annotation is None:
|
|
568
558
|
self._parse_annotation()
|
|
569
559
|
return self._is_feature_time
|
|
560
|
+
|
|
561
|
+
def is_dataframe_annotation(self) -> bool:
|
|
562
|
+
"""
|
|
563
|
+
Check if the annotation represents a DataFrame type, even if validation failed.
|
|
564
|
+
This checks the raw parsed annotation without triggering full validation,
|
|
565
|
+
useful for preventing false positive errors when DataFrame validation fails.
|
|
566
|
+
"""
|
|
567
|
+
from typing import get_args
|
|
568
|
+
|
|
569
|
+
from chalk.features.dataframe import DataFrameMeta
|
|
570
|
+
|
|
571
|
+
if self._parsed_annotation is None:
|
|
572
|
+
self._parse_annotation()
|
|
573
|
+
|
|
574
|
+
# Check if directly a DataFrame
|
|
575
|
+
if isinstance(self.parsed_annotation, DataFrameMeta):
|
|
576
|
+
return True
|
|
577
|
+
|
|
578
|
+
# Check if wrapped in Optional, Union, etc.
|
|
579
|
+
if any(isinstance(x, DataFrameMeta) for x in get_args(self.parsed_annotation)):
|
|
580
|
+
return True
|
|
581
|
+
|
|
582
|
+
return False
|
chalk/sql/__init__.py
CHANGED
|
@@ -1437,6 +1437,8 @@ def DatabricksSource(
|
|
|
1437
1437
|
access_token: str = ...,
|
|
1438
1438
|
db: str = ...,
|
|
1439
1439
|
port: str = ...,
|
|
1440
|
+
client_id: str = ...,
|
|
1441
|
+
client_secret: str = ...,
|
|
1440
1442
|
engine_args: Optional[Dict[str, Any]] = ...,
|
|
1441
1443
|
) -> BaseSQLSourceProtocol:
|
|
1442
1444
|
"""You can also configure the integration directly using environment
|
|
@@ -1457,6 +1459,10 @@ def DatabricksSource(
|
|
|
1457
1459
|
Database to use.
|
|
1458
1460
|
port
|
|
1459
1461
|
Port number to use.
|
|
1462
|
+
client_id
|
|
1463
|
+
OAuth service principal client ID (alternative to access_token).
|
|
1464
|
+
client_secret
|
|
1465
|
+
OAuth service principal client secret (alternative to access_token).
|
|
1460
1466
|
engine_args
|
|
1461
1467
|
Additional arguments to use when constructing the SQLAlchemy engine.
|
|
1462
1468
|
|
|
@@ -1475,6 +1481,14 @@ def DatabricksSource(
|
|
|
1475
1481
|
... db=os.getenv("DATABRICKS_DATABASE"),
|
|
1476
1482
|
... port=os.getenv("DATABRICKS_PORT"),
|
|
1477
1483
|
... )
|
|
1484
|
+
>>> databricks_with_oauth = DatabricksSource(
|
|
1485
|
+
... host=os.getenv("DATABRICKS_HOST"),
|
|
1486
|
+
... http_path=os.getenv("DATABRICKS_HTTP_PATH"),
|
|
1487
|
+
... client_id=os.getenv("DATABRICKS_CLIENT_ID"),
|
|
1488
|
+
... client_secret=os.getenv("DATABRICKS_CLIENT_SECRET"),
|
|
1489
|
+
... db=os.getenv("DATABRICKS_DATABASE"),
|
|
1490
|
+
... port=os.getenv("DATABRICKS_PORT"),
|
|
1491
|
+
... )
|
|
1478
1492
|
"""
|
|
1479
1493
|
...
|
|
1480
1494
|
|
|
@@ -1487,6 +1501,8 @@ def DatabricksSource(
|
|
|
1487
1501
|
access_token: Optional[str] = None,
|
|
1488
1502
|
db: Optional[str] = None,
|
|
1489
1503
|
port: Optional[Union[str, int]] = None,
|
|
1504
|
+
client_id: Optional[str] = None,
|
|
1505
|
+
client_secret: Optional[str] = None,
|
|
1490
1506
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
1491
1507
|
) -> BaseSQLSourceProtocol:
|
|
1492
1508
|
"""Create a Databricks data source. SQL-based data sources
|
|
@@ -1505,6 +1521,8 @@ def DatabricksSource(
|
|
|
1505
1521
|
db=db,
|
|
1506
1522
|
port=port,
|
|
1507
1523
|
name=name,
|
|
1524
|
+
client_id=client_id,
|
|
1525
|
+
client_secret=client_secret,
|
|
1508
1526
|
engine_args=engine_args,
|
|
1509
1527
|
)
|
|
1510
1528
|
|
|
@@ -33,6 +33,8 @@ _DATABRICKS_HTTP_PATH_NAME = "DATABRICKS_HTTP_PATH"
|
|
|
33
33
|
_DATABRICKS_TOKEN_NAME = "DATABRICKS_TOKEN"
|
|
34
34
|
_DATABRICKS_DATABASE_NAME = "DATABRICKS_DATABASE"
|
|
35
35
|
_DATABRICKS_PORT_NAME = "DATABRICKS_PORT"
|
|
36
|
+
_DATABRICKS_CLIENT_ID_NAME = "DATABRICKS_CLIENT_ID"
|
|
37
|
+
_DATABRICKS_CLIENT_SECRET_NAME = "DATABRICKS_CLIENT_SECRET"
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class DatabricksSourceImpl(BaseSQLSource):
|
|
@@ -46,6 +48,8 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
46
48
|
db: Optional[str] = None,
|
|
47
49
|
port: Optional[Union[int, str]] = None,
|
|
48
50
|
name: Optional[str] = None,
|
|
51
|
+
client_id: Optional[str] = None,
|
|
52
|
+
client_secret: Optional[str] = None,
|
|
49
53
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
50
54
|
executor: Optional[concurrent.futures.ThreadPoolExecutor] = None,
|
|
51
55
|
integration_variable_override: Optional[Mapping[str, str]] = None,
|
|
@@ -74,21 +78,53 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
74
78
|
name=_DATABRICKS_PORT_NAME, integration_name=name, parser=int, override=integration_variable_override
|
|
75
79
|
)
|
|
76
80
|
)
|
|
81
|
+
self.client_id = client_id or load_integration_variable(
|
|
82
|
+
name=_DATABRICKS_CLIENT_ID_NAME, integration_name=name, override=integration_variable_override
|
|
83
|
+
)
|
|
84
|
+
self.client_secret = client_secret or load_integration_variable(
|
|
85
|
+
name=_DATABRICKS_CLIENT_SECRET_NAME, integration_name=name, override=integration_variable_override
|
|
86
|
+
)
|
|
77
87
|
self.executor = executor or DEFAULT_IO_EXECUTOR
|
|
78
88
|
|
|
89
|
+
has_token = self.access_token is not None
|
|
90
|
+
has_oauth = self.client_id is not None and self.client_secret is not None
|
|
91
|
+
|
|
92
|
+
if has_token and has_oauth:
|
|
93
|
+
chalk_logger.warning(
|
|
94
|
+
"Both OAuth credentials and a personal access token were provided. Using OAuth authentication."
|
|
95
|
+
)
|
|
96
|
+
self.access_token = None
|
|
97
|
+
|
|
98
|
+
self._credentials_provider = None
|
|
99
|
+
if has_oauth:
|
|
100
|
+
try:
|
|
101
|
+
from databricks.sdk.core import Config, oauth_service_principal
|
|
102
|
+
except ImportError:
|
|
103
|
+
raise missing_dependency_exception("chalkpy[databricks]")
|
|
104
|
+
|
|
105
|
+
def credentials_provider():
|
|
106
|
+
config = Config(host=self.host, client_id=self.client_id, client_secret=self.client_secret)
|
|
107
|
+
return oauth_service_principal(config)
|
|
108
|
+
|
|
109
|
+
self._credentials_provider = credentials_provider
|
|
110
|
+
|
|
79
111
|
if engine_args is None:
|
|
80
112
|
engine_args = {}
|
|
113
|
+
|
|
114
|
+
connect_args: dict[str, Any] = {
|
|
115
|
+
"keepalives": 1,
|
|
116
|
+
"keepalives_idle": 30,
|
|
117
|
+
"keepalives_interval": 10,
|
|
118
|
+
"keepalives_count": 5,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if self._credentials_provider:
|
|
122
|
+
connect_args["credentials_provider"] = self._credentials_provider
|
|
123
|
+
|
|
81
124
|
engine_args.setdefault("pool_size", 20)
|
|
82
125
|
engine_args.setdefault("max_overflow", 60)
|
|
83
|
-
engine_args.setdefault(
|
|
84
|
-
|
|
85
|
-
{
|
|
86
|
-
"keepalives": 1,
|
|
87
|
-
"keepalives_idle": 30,
|
|
88
|
-
"keepalives_interval": 10,
|
|
89
|
-
"keepalives_count": 5,
|
|
90
|
-
},
|
|
91
|
-
)
|
|
126
|
+
engine_args.setdefault("connect_args", connect_args)
|
|
127
|
+
|
|
92
128
|
BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
|
|
93
129
|
|
|
94
130
|
def supports_inefficient_fallback(self) -> bool:
|
|
@@ -97,6 +133,12 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
97
133
|
def get_sqlglot_dialect(self) -> str | None:
|
|
98
134
|
return "databricks"
|
|
99
135
|
|
|
136
|
+
def _get_client_auth(self) -> Dict[str, str | Callable | None]:
|
|
137
|
+
if self._credentials_provider:
|
|
138
|
+
return {"credentials_provider": self._credentials_provider}
|
|
139
|
+
else:
|
|
140
|
+
return {"access_token": self.access_token}
|
|
141
|
+
|
|
100
142
|
@contextlib.contextmanager
|
|
101
143
|
def _create_temp_table(
|
|
102
144
|
self,
|
|
@@ -168,10 +210,7 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
168
210
|
|
|
169
211
|
# Connect using databricks-sql-python for efficient Arrow fetching
|
|
170
212
|
with sql.connect(
|
|
171
|
-
server_hostname=self.host,
|
|
172
|
-
http_path=self.http_path,
|
|
173
|
-
access_token=self.access_token,
|
|
174
|
-
catalog=self.db,
|
|
213
|
+
server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
|
|
175
214
|
) as databricks_conn:
|
|
176
215
|
chalk_logger.info("Established connection with Databricks using databricks-sql-python")
|
|
177
216
|
|
|
@@ -273,10 +312,7 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
273
312
|
|
|
274
313
|
# Connect using databricks-sql-python for efficient Arrow fetching
|
|
275
314
|
with sql.connect(
|
|
276
|
-
server_hostname=self.host,
|
|
277
|
-
http_path=self.http_path,
|
|
278
|
-
access_token=self.access_token,
|
|
279
|
-
catalog=self.db,
|
|
315
|
+
server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
|
|
280
316
|
) as databricks_cnx:
|
|
281
317
|
with databricks_cnx.cursor() as cursor:
|
|
282
318
|
formatted_op, positional_params, named_params = self.compile_query(finalized_query)
|
|
@@ -353,6 +389,8 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
353
389
|
create_integration_variable(_DATABRICKS_TOKEN_NAME, self.name, self.access_token),
|
|
354
390
|
create_integration_variable(_DATABRICKS_DATABASE_NAME, self.name, self.db),
|
|
355
391
|
create_integration_variable(_DATABRICKS_PORT_NAME, self.name, self.port),
|
|
392
|
+
create_integration_variable(_DATABRICKS_CLIENT_ID_NAME, self.name, self.client_id),
|
|
393
|
+
create_integration_variable(_DATABRICKS_CLIENT_SECRET_NAME, self.name, self.client_secret),
|
|
356
394
|
]
|
|
357
395
|
if v is not None
|
|
358
396
|
}
|