chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
- chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
- chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_version.py +1 -1
- chalk/client/client.py +128 -43
- chalk/client/client_async.py +149 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +539 -104
- chalk/client/client_impl.py +449 -122
- chalk/client/dataset.py +7 -1
- chalk/client/models.py +98 -0
- chalk/client/serialization/model_serialization.py +92 -9
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -10
- chalk/features/inference.py +33 -31
- chalk/features/resolver.py +224 -24
- chalk/functions/__init__.py +65 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +142 -68
- chalk/ml/__init__.py +2 -0
- chalk/ml/model_hooks.py +194 -26
- chalk/ml/model_reference.py +56 -8
- chalk/ml/model_version.py +24 -15
- chalk/ml/utils.py +20 -17
- chalk/operators/_utils.py +10 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +3 -0
- chalk/parsed/json_conversions.py +20 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +9 -1
- chalk/serialization/parsed_annotation.py +24 -11
- chalk/sql/__init__.py +18 -0
- chalk/sql/_internal/integrations/databricks.py +55 -17
- chalk/sql/_internal/integrations/mssql.py +127 -62
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/sql_file_resolver.py +53 -9
- chalk/sql/_internal/sql_source.py +35 -2
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +15 -2
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +2 -2
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/tracing.py +185 -95
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
|
|
4
|
+
import struct
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, Optional, Union
|
|
5
6
|
|
|
6
7
|
from chalk.integrations.named import create_integration_variable, load_integration_variable
|
|
8
|
+
from chalk.sql._internal.query_execution_parameters import QueryExecutionParameters
|
|
7
9
|
from chalk.sql._internal.sql_source import BaseSQLSource, SQLSourceKind, TableIngestMixIn
|
|
10
|
+
from chalk.sql.finalized_query import FinalizedChalkQuery
|
|
8
11
|
from chalk.sql.protocols import SQLSourceWithTableIngestProtocol
|
|
12
|
+
from chalk.utils.environment_parsing import env_var_bool
|
|
9
13
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
10
14
|
|
|
11
15
|
if TYPE_CHECKING:
|
|
12
|
-
|
|
16
|
+
import pyarrow as pa
|
|
17
|
+
from sqlalchemy.engine import URL, Connection
|
|
13
18
|
|
|
14
19
|
_MSSQL_HOST_NAME = "MSSQL_HOST"
|
|
15
20
|
_MSSQL_TCP_PORT_NAME = "MSSQL_TCP_PORT"
|
|
@@ -40,10 +45,10 @@ class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestP
|
|
|
40
45
|
integration_variable_override: Optional[Mapping[str, str]] = None,
|
|
41
46
|
):
|
|
42
47
|
try:
|
|
43
|
-
import
|
|
44
|
-
except ImportError:
|
|
45
|
-
raise missing_dependency_exception("chalkpy[mssql]")
|
|
46
|
-
del
|
|
48
|
+
import pyodbc
|
|
49
|
+
except ImportError as e:
|
|
50
|
+
raise missing_dependency_exception("chalkpy[mssql]", original_error=e)
|
|
51
|
+
del pyodbc
|
|
47
52
|
|
|
48
53
|
self.name = name
|
|
49
54
|
self.host = host or load_integration_variable(
|
|
@@ -112,16 +117,46 @@ class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestP
|
|
|
112
117
|
|
|
113
118
|
BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args=async_engine_args)
|
|
114
119
|
|
|
120
|
+
# Register event listener for managed identity token injection
|
|
121
|
+
if not self.client_id and not self.user:
|
|
122
|
+
from sqlalchemy import event
|
|
123
|
+
|
|
124
|
+
event.listens_for(self.get_engine(), "do_connect")(self._inject_azure_token)
|
|
125
|
+
|
|
126
|
+
def _inject_azure_token(self, _dialect: Any, _conn_rec: Any, _cargs: Any, cparams: Dict[str, Any]) -> None:
|
|
127
|
+
"""SQLAlchemy event handler to inject Azure AD token on each connection."""
|
|
128
|
+
try:
|
|
129
|
+
from azure.identity import DefaultAzureCredential
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise missing_dependency_exception("chalkpy[mssql]")
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
credential = DefaultAzureCredential()
|
|
135
|
+
token = credential.get_token("https://database.windows.net/.default")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
raise Exception(f"Failed to acquire Azure AD token for MSSQL connection: {e}") from e
|
|
138
|
+
|
|
139
|
+
token_bytes = token.token.encode("utf-16-le")
|
|
140
|
+
token_struct = struct.pack(f"<I{len(token_bytes)}s", len(token_bytes), token_bytes)
|
|
141
|
+
cparams["attrs_before"] = {1256: token_struct} # SQL_COPT_SS_ACCESS_TOKEN
|
|
142
|
+
|
|
115
143
|
def get_sqlglot_dialect(self) -> str | None:
|
|
116
144
|
return "tsql"
|
|
117
145
|
|
|
118
146
|
def local_engine_url(self) -> "URL":
|
|
119
147
|
from sqlalchemy.engine.url import URL
|
|
120
148
|
|
|
121
|
-
|
|
149
|
+
trust_server_cert = env_var_bool("CHALK_MSSQL_TRUST_SERVER_CERTIFICATE", default=False)
|
|
150
|
+
|
|
122
151
|
if self.client_id and self.client_secret and self.tenant_id:
|
|
123
152
|
# Service Principal authentication
|
|
124
153
|
# Use pyodbc driver for Azure AD support
|
|
154
|
+
query_params = {
|
|
155
|
+
"driver": "ODBC Driver 18 for SQL Server",
|
|
156
|
+
"Authentication": "ActiveDirectoryServicePrincipal",
|
|
157
|
+
}
|
|
158
|
+
if trust_server_cert:
|
|
159
|
+
query_params["TrustServerCertificate"] = "yes"
|
|
125
160
|
return URL.create(
|
|
126
161
|
drivername="mssql+pyodbc",
|
|
127
162
|
username=self.client_id,
|
|
@@ -129,74 +164,32 @@ class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestP
|
|
|
129
164
|
host=self.host,
|
|
130
165
|
port=self.port,
|
|
131
166
|
database=self.db,
|
|
132
|
-
query=
|
|
133
|
-
"driver": "ODBC Driver 18 for SQL Server",
|
|
134
|
-
"Authentication": "ActiveDirectoryServicePrincipal",
|
|
135
|
-
},
|
|
167
|
+
query=query_params,
|
|
136
168
|
)
|
|
137
169
|
elif self.user and self.password:
|
|
138
170
|
# SQL authentication
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
password=self.password,
|
|
143
|
-
host=self.host,
|
|
144
|
-
port=self.port,
|
|
145
|
-
database=self.db,
|
|
146
|
-
)
|
|
147
|
-
else:
|
|
148
|
-
# Managed Identity authentication
|
|
171
|
+
query_params = {"driver": "ODBC Driver 18 for SQL Server"}
|
|
172
|
+
if trust_server_cert:
|
|
173
|
+
query_params["TrustServerCertificate"] = "yes"
|
|
149
174
|
return URL.create(
|
|
150
175
|
drivername="mssql+pyodbc",
|
|
151
|
-
host=self.host,
|
|
152
|
-
port=self.port,
|
|
153
|
-
database=self.db,
|
|
154
|
-
query={
|
|
155
|
-
"driver": "ODBC Driver 18 for SQL Server",
|
|
156
|
-
"Authentication": "ActiveDirectoryMsi",
|
|
157
|
-
},
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
def async_local_engine_url(self) -> "URL":
|
|
161
|
-
from sqlalchemy.engine.url import URL
|
|
162
|
-
|
|
163
|
-
# Determine authentication method
|
|
164
|
-
if self.client_id and self.client_secret and self.tenant_id:
|
|
165
|
-
# Service Principal authentication
|
|
166
|
-
return URL.create(
|
|
167
|
-
drivername="mssql+aioodbc",
|
|
168
|
-
username=self.client_id,
|
|
169
|
-
password=self.client_secret,
|
|
170
|
-
host=self.host,
|
|
171
|
-
port=self.port,
|
|
172
|
-
database=self.db,
|
|
173
|
-
query={
|
|
174
|
-
"driver": "ODBC Driver 18 for SQL Server",
|
|
175
|
-
"Authentication": "ActiveDirectoryServicePrincipal",
|
|
176
|
-
},
|
|
177
|
-
)
|
|
178
|
-
elif self.user and self.password:
|
|
179
|
-
# SQL authentication
|
|
180
|
-
return URL.create(
|
|
181
|
-
drivername="mssql+aioodbc",
|
|
182
176
|
username=self.user,
|
|
183
177
|
password=self.password,
|
|
184
178
|
host=self.host,
|
|
185
179
|
port=self.port,
|
|
186
180
|
database=self.db,
|
|
187
|
-
query=
|
|
181
|
+
query=query_params,
|
|
188
182
|
)
|
|
189
183
|
else:
|
|
190
|
-
# Managed Identity
|
|
184
|
+
# Managed Identity: token injected via event listener
|
|
185
|
+
connection_string = (
|
|
186
|
+
f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={self.host},{self.port};DATABASE={self.db}"
|
|
187
|
+
)
|
|
188
|
+
if trust_server_cert:
|
|
189
|
+
connection_string += ";TrustServerCertificate=yes"
|
|
191
190
|
return URL.create(
|
|
192
|
-
drivername="mssql+
|
|
193
|
-
|
|
194
|
-
port=self.port,
|
|
195
|
-
database=self.db,
|
|
196
|
-
query={
|
|
197
|
-
"driver": "ODBC Driver 18 for SQL Server",
|
|
198
|
-
"Authentication": "ActiveDirectoryMsi",
|
|
199
|
-
},
|
|
191
|
+
drivername="mssql+pyodbc",
|
|
192
|
+
query={"odbc_connect": connection_string},
|
|
200
193
|
)
|
|
201
194
|
|
|
202
195
|
def _recreate_integration_variables(self) -> dict[str, str]:
|
|
@@ -214,3 +207,75 @@ class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestP
|
|
|
214
207
|
]
|
|
215
208
|
if v is not None
|
|
216
209
|
}
|
|
210
|
+
|
|
211
|
+
def execute_query_efficient_raw(
|
|
212
|
+
self,
|
|
213
|
+
finalized_query: FinalizedChalkQuery,
|
|
214
|
+
expected_output_schema: "pa.Schema",
|
|
215
|
+
connection: Optional["Connection"],
|
|
216
|
+
query_execution_parameters: QueryExecutionParameters,
|
|
217
|
+
) -> Iterable["pa.RecordBatch"]:
|
|
218
|
+
"""Execute query efficiently for MSSQL and return raw PyArrow RecordBatches."""
|
|
219
|
+
import contextlib
|
|
220
|
+
|
|
221
|
+
import pyarrow as pa
|
|
222
|
+
import pyarrow.compute as pc
|
|
223
|
+
|
|
224
|
+
# Get the compiled query
|
|
225
|
+
_, _, _ = self.compile_query(finalized_query)
|
|
226
|
+
|
|
227
|
+
# Use existing connection or create new one
|
|
228
|
+
with (self.get_engine().connect() if connection is None else contextlib.nullcontext(connection)) as cnx:
|
|
229
|
+
with cnx.begin():
|
|
230
|
+
# Handle temp tables
|
|
231
|
+
with contextlib.ExitStack() as exit_stack:
|
|
232
|
+
for (
|
|
233
|
+
_,
|
|
234
|
+
temp_value,
|
|
235
|
+
create_temp_table,
|
|
236
|
+
temp_table,
|
|
237
|
+
drop_temp_table,
|
|
238
|
+
) in finalized_query.temp_tables.values():
|
|
239
|
+
exit_stack.enter_context(
|
|
240
|
+
self._create_temp_table(create_temp_table, temp_table, drop_temp_table, cnx, temp_value)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Execute query
|
|
244
|
+
result = cnx.execute(finalized_query.query, finalized_query.params)
|
|
245
|
+
|
|
246
|
+
# Convert result to PyArrow
|
|
247
|
+
rows = result.fetchall()
|
|
248
|
+
column_names = result.keys()
|
|
249
|
+
|
|
250
|
+
if not rows:
|
|
251
|
+
# Return empty batch with expected schema
|
|
252
|
+
arrays = [pa.nulls(0, field.type) for field in expected_output_schema]
|
|
253
|
+
batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
|
|
254
|
+
if query_execution_parameters.yield_empty_batches:
|
|
255
|
+
yield batch
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
# Convert rows to column arrays
|
|
259
|
+
data: dict[str, list[Any]] = {}
|
|
260
|
+
for i, col_name in enumerate(column_names):
|
|
261
|
+
col_data = [row[i] for row in rows]
|
|
262
|
+
data[col_name] = col_data
|
|
263
|
+
|
|
264
|
+
# Create PyArrow table
|
|
265
|
+
table = pa.table(data)
|
|
266
|
+
|
|
267
|
+
# Map columns to expected schema
|
|
268
|
+
arrays: list[pa.Array] = []
|
|
269
|
+
for field in expected_output_schema:
|
|
270
|
+
if field.name in table.column_names:
|
|
271
|
+
col = table.column(field.name)
|
|
272
|
+
# Cast to expected type if needed
|
|
273
|
+
if col.type != field.type:
|
|
274
|
+
col = pc.cast(col, field.type)
|
|
275
|
+
arrays.append(col)
|
|
276
|
+
else:
|
|
277
|
+
# Column not found, create null array
|
|
278
|
+
arrays.append(pa.nulls(len(table), field.type))
|
|
279
|
+
|
|
280
|
+
batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
|
|
281
|
+
yield batch
|
|
@@ -260,6 +260,8 @@ class RedshiftSourceImpl(BaseSQLSource):
|
|
|
260
260
|
temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
|
|
261
261
|
try:
|
|
262
262
|
_logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
|
|
263
|
+
_public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
|
|
264
|
+
_public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
|
|
263
265
|
cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
|
|
264
266
|
except Exception as e:
|
|
265
267
|
_public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
|
|
@@ -366,6 +368,8 @@ class RedshiftSourceImpl(BaseSQLSource):
|
|
|
366
368
|
temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
|
|
367
369
|
try:
|
|
368
370
|
_logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
|
|
371
|
+
_public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
|
|
372
|
+
_public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
|
|
369
373
|
cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
|
|
370
374
|
except Exception as e:
|
|
371
375
|
_public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
|
|
@@ -33,7 +33,7 @@ from yaml.scanner import ScannerError
|
|
|
33
33
|
|
|
34
34
|
from chalk import Environments, OfflineResolver, OnlineResolver, Tags
|
|
35
35
|
from chalk._lsp.error_builder import SQLFileResolverErrorBuilder
|
|
36
|
-
from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features
|
|
36
|
+
from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features, Underscore
|
|
37
37
|
from chalk.features.feature_set import CURRENT_FEATURE_REGISTRY
|
|
38
38
|
from chalk.features.namespace_context import build_namespaced_name
|
|
39
39
|
from chalk.features.namespace_context import namespace as namespace_ctx
|
|
@@ -42,6 +42,8 @@ from chalk.features.resolver import Cron, ResolverArgErrorHandler, StreamResolve
|
|
|
42
42
|
from chalk.sql._internal.incremental import IncrementalSettings
|
|
43
43
|
from chalk.sql._internal.integrations.bigquery import BigQuerySourceImpl
|
|
44
44
|
from chalk.sql._internal.integrations.cloudsql import CloudSQLSourceImpl
|
|
45
|
+
from chalk.sql._internal.integrations.databricks import DatabricksSourceImpl
|
|
46
|
+
from chalk.sql._internal.integrations.mssql import MSSQLSourceImpl
|
|
45
47
|
from chalk.sql._internal.integrations.mysql import MySQLSourceImpl
|
|
46
48
|
from chalk.sql._internal.integrations.postgres import PostgreSQLSourceImpl
|
|
47
49
|
from chalk.sql._internal.integrations.redshift import RedshiftSourceImpl
|
|
@@ -85,8 +87,10 @@ _SOURCES: Mapping[str, Union[Type[BaseSQLSource], Type[StreamSource]]] = {
|
|
|
85
87
|
"postgres": PostgreSQLSourceImpl,
|
|
86
88
|
"postgresql": PostgreSQLSourceImpl,
|
|
87
89
|
"mysql": MySQLSourceImpl,
|
|
90
|
+
"mssql": MSSQLSourceImpl,
|
|
88
91
|
"bigquery": BigQuerySourceImpl,
|
|
89
92
|
"cloudsql": CloudSQLSourceImpl,
|
|
93
|
+
"databricks": DatabricksSourceImpl,
|
|
90
94
|
"redshift": RedshiftSourceImpl,
|
|
91
95
|
"sqlite": SQLiteSourceImpl,
|
|
92
96
|
"kafka": KafkaSource,
|
|
@@ -242,6 +246,7 @@ class SQLStringResult:
|
|
|
242
246
|
override_comment_dict: Optional[CommentDict] = None
|
|
243
247
|
override_name: Optional[str] = None
|
|
244
248
|
autogenerated: bool = False
|
|
249
|
+
postprocessing_expr: Underscore | None = None
|
|
245
250
|
|
|
246
251
|
def __post_init__(self):
|
|
247
252
|
# Validation: if autogenerated is True, override_name must not be None
|
|
@@ -303,7 +308,7 @@ def get_sql_file_resolvers(
|
|
|
303
308
|
"""Iterate through all `.chalk.sql` filepaths, gather the sql strings, and get a resolver hopefully for each."""
|
|
304
309
|
for dp, dn, fn in os.walk(os.path.expanduser(sql_file_resolve_location)):
|
|
305
310
|
del dn # unused
|
|
306
|
-
for f in fn:
|
|
311
|
+
for f in sorted(fn): # Sort filenames for deterministic ordering
|
|
307
312
|
filepath = os.path.join(dp, f)
|
|
308
313
|
if not filepath.endswith(CHALK_SQL_FILE_RESOLVER_FILENAME_SUFFIX):
|
|
309
314
|
continue
|
|
@@ -316,7 +321,10 @@ def get_sql_file_resolvers(
|
|
|
316
321
|
continue
|
|
317
322
|
_filepath_to_sql_string[filepath] = sql_string_result.sql_string
|
|
318
323
|
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
319
|
-
|
|
324
|
+
# Only yield generated resolvers whose filepath is under the directory being scanned
|
|
325
|
+
for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers(
|
|
326
|
+
filter_by_directory=sql_file_resolve_location
|
|
327
|
+
):
|
|
320
328
|
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
321
329
|
|
|
322
330
|
|
|
@@ -339,8 +347,12 @@ def get_sql_file_resolvers_from_paths(
|
|
|
339
347
|
sql_string_result=sql_string_result,
|
|
340
348
|
has_import_errors=has_import_errors,
|
|
341
349
|
)
|
|
350
|
+
# Only yield generated resolvers whose filepath is in the paths list
|
|
351
|
+
# If paths is empty, yield all generated resolvers (no filtering)
|
|
342
352
|
for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers():
|
|
343
|
-
|
|
353
|
+
# Check if this generated resolver's filepath is in the provided paths
|
|
354
|
+
if not paths or sql_string_result.path in paths:
|
|
355
|
+
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
344
356
|
|
|
345
357
|
|
|
346
358
|
def get_sql_file_resolver(
|
|
@@ -693,6 +705,7 @@ def get_sql_file_resolver(
|
|
|
693
705
|
incremental_settings=incremental_settings,
|
|
694
706
|
params_to_root_fqn=glot_result.args,
|
|
695
707
|
),
|
|
708
|
+
postprocessing=sql_string_result.postprocessing_expr,
|
|
696
709
|
)
|
|
697
710
|
except Exception as e:
|
|
698
711
|
raise e
|
|
@@ -1573,6 +1586,7 @@ class GeneratedSQLFileResolverInfo:
|
|
|
1573
1586
|
filepath: str
|
|
1574
1587
|
sql_string: str
|
|
1575
1588
|
comment_dict: CommentDict
|
|
1589
|
+
postprocessing_expr: Underscore | None
|
|
1576
1590
|
|
|
1577
1591
|
|
|
1578
1592
|
class GeneratedSQLFileResolverRegistry:
|
|
@@ -1580,17 +1594,43 @@ class GeneratedSQLFileResolverRegistry:
|
|
|
1580
1594
|
super().__init__()
|
|
1581
1595
|
self.resolver_name_to_generated_infos: Dict[str, GeneratedSQLFileResolverInfo] = {}
|
|
1582
1596
|
|
|
1583
|
-
def add_sql_file_resolver(
|
|
1597
|
+
def add_sql_file_resolver(
|
|
1598
|
+
self,
|
|
1599
|
+
name: str,
|
|
1600
|
+
filepath: str,
|
|
1601
|
+
sql_string: str,
|
|
1602
|
+
comment_dict: CommentDict,
|
|
1603
|
+
postprocessing_expr: Underscore | None = None,
|
|
1604
|
+
):
|
|
1584
1605
|
if name in self.resolver_name_to_generated_infos and filepath != "<notebook>":
|
|
1585
1606
|
raise ValueError(f"A SQL file resolver already exists with name '{name}'. They must have unique names.")
|
|
1586
1607
|
self.resolver_name_to_generated_infos[name] = GeneratedSQLFileResolverInfo(
|
|
1587
|
-
filepath=filepath,
|
|
1588
|
-
sql_string=sql_string,
|
|
1589
|
-
comment_dict=comment_dict,
|
|
1608
|
+
filepath=filepath, sql_string=sql_string, comment_dict=comment_dict, postprocessing_expr=postprocessing_expr
|
|
1590
1609
|
)
|
|
1591
1610
|
|
|
1592
|
-
def get_generated_sql_file_resolvers(self) -> Iterable[SQLStringResult]:
|
|
1611
|
+
def get_generated_sql_file_resolvers(self, filter_by_directory: Path | None = None) -> Iterable[SQLStringResult]:
|
|
1612
|
+
"""
|
|
1613
|
+
Yield generated SQL file resolvers, optionally filtered by directory.
|
|
1614
|
+
|
|
1615
|
+
Args:
|
|
1616
|
+
filter_by_directory: If provided, only yield resolvers whose filepath is under this directory.
|
|
1617
|
+
If None, yield all generated resolvers (legacy behavior).
|
|
1618
|
+
"""
|
|
1593
1619
|
for name, generated_info in self.resolver_name_to_generated_infos.items():
|
|
1620
|
+
# If filtering by directory is requested, check if the resolver's filepath is under that directory
|
|
1621
|
+
if filter_by_directory is not None:
|
|
1622
|
+
# Special case: notebook resolvers (filepath == "<notebook>") should never be auto-yielded
|
|
1623
|
+
# when scanning directories, only when explicitly requested
|
|
1624
|
+
if generated_info.filepath == "<notebook>":
|
|
1625
|
+
continue
|
|
1626
|
+
|
|
1627
|
+
# Convert to absolute paths for comparison and check if resolver path is under filter directory
|
|
1628
|
+
resolver_path = Path(generated_info.filepath).resolve()
|
|
1629
|
+
filter_path = Path(filter_by_directory).resolve()
|
|
1630
|
+
|
|
1631
|
+
if not resolver_path.is_relative_to(filter_path):
|
|
1632
|
+
continue
|
|
1633
|
+
|
|
1594
1634
|
yield SQLStringResult(
|
|
1595
1635
|
path=generated_info.filepath,
|
|
1596
1636
|
sql_string=generated_info.sql_string,
|
|
@@ -1598,6 +1638,7 @@ class GeneratedSQLFileResolverRegistry:
|
|
|
1598
1638
|
override_comment_dict=generated_info.comment_dict,
|
|
1599
1639
|
override_name=name,
|
|
1600
1640
|
autogenerated=True,
|
|
1641
|
+
postprocessing_expr=generated_info.postprocessing_expr,
|
|
1601
1642
|
)
|
|
1602
1643
|
|
|
1603
1644
|
|
|
@@ -1626,6 +1667,7 @@ def make_sql_file_resolver(
|
|
|
1626
1667
|
partitioned_by: Collection[Any] | None = None,
|
|
1627
1668
|
total: Optional[bool] = None,
|
|
1628
1669
|
skip_sql_validation: Optional[bool] = None,
|
|
1670
|
+
postprocessing_expression: Optional[Underscore] = None,
|
|
1629
1671
|
):
|
|
1630
1672
|
"""Generate a Chalk SQL file resolver from a filepath and a sql string.
|
|
1631
1673
|
This will generate a resolver in your web dashboard that can be queried,
|
|
@@ -1809,6 +1851,7 @@ def make_sql_file_resolver(
|
|
|
1809
1851
|
sql_string=sql,
|
|
1810
1852
|
comment_dict=comment_dict,
|
|
1811
1853
|
name=name,
|
|
1854
|
+
postprocessing_expr=postprocessing_expression,
|
|
1812
1855
|
)
|
|
1813
1856
|
if is_defined_in_notebook:
|
|
1814
1857
|
from chalk.sql import SQLSourceGroup
|
|
@@ -1845,6 +1888,7 @@ def make_sql_file_resolver(
|
|
|
1845
1888
|
override_comment_dict=generated_info.comment_dict,
|
|
1846
1889
|
override_name=name,
|
|
1847
1890
|
autogenerated=True,
|
|
1891
|
+
postprocessing_expr=postprocessing_expression,
|
|
1848
1892
|
)
|
|
1849
1893
|
resolver_result = get_sql_file_resolver(
|
|
1850
1894
|
sources=current_sql_sources, sql_string_result=info, has_import_errors=False
|
|
@@ -214,11 +214,32 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
214
214
|
if getattr(self, "kind", None) != SQLSourceKind.trino:
|
|
215
215
|
engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
|
|
216
216
|
async_engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
|
|
217
|
-
|
|
218
|
-
self.
|
|
217
|
+
# Store raw args internally, expose filtered versions via properties
|
|
218
|
+
self._raw_engine_args = engine_args
|
|
219
|
+
self._raw_async_engine_args = async_engine_args
|
|
219
220
|
self._engine = None
|
|
220
221
|
self._async_engine = None
|
|
221
222
|
|
|
223
|
+
@property
|
|
224
|
+
def engine_args(self) -> Dict[str, Any]:
|
|
225
|
+
"""Engine arguments with native_args filtered out for SQLAlchemy."""
|
|
226
|
+
return {k: v for k, v in self._raw_engine_args.items() if k != "native_args"}
|
|
227
|
+
|
|
228
|
+
@engine_args.setter
|
|
229
|
+
def engine_args(self, args: dict[str, Any]):
|
|
230
|
+
"""Set raw engine args (for backward compatibility)."""
|
|
231
|
+
self._raw_engine_args = args
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def async_engine_args(self) -> Dict[str, Any]:
|
|
235
|
+
"""Async engine arguments with native_args filtered out for SQLAlchemy."""
|
|
236
|
+
return {k: v for k, v in self._raw_async_engine_args.items() if k != "native_args"}
|
|
237
|
+
|
|
238
|
+
@async_engine_args.setter
|
|
239
|
+
def async_engine_args(self, args: dict[str, Any]):
|
|
240
|
+
"""Set raw async engine args (for backward compatibility)."""
|
|
241
|
+
self._raw_async_engine_args = args
|
|
242
|
+
|
|
222
243
|
@property
|
|
223
244
|
def _engine_args(self):
|
|
224
245
|
"""Backcompat support for private subclassing of BaseSQLSource"""
|
|
@@ -239,6 +260,16 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
239
260
|
"""Backcompat support for private subclassing of BaseSQLSource"""
|
|
240
261
|
self.async_engine_args = args
|
|
241
262
|
|
|
263
|
+
@property
|
|
264
|
+
def native_args(self) -> Dict[str, Any]:
|
|
265
|
+
"""Native arguments to be passed to the underlying database driver.
|
|
266
|
+
|
|
267
|
+
These arguments are extracted from engine_args and async_engine_args
|
|
268
|
+
and are not passed to SQLAlchemy's create_engine or create_async_engine.
|
|
269
|
+
Instead, they should be used by subclasses to configure native driver connections.
|
|
270
|
+
"""
|
|
271
|
+
return self._raw_engine_args.get("native_args", {})
|
|
272
|
+
|
|
242
273
|
def get_sqlglot_dialect(self) -> Union[str, None]:
|
|
243
274
|
"""Returns the name of the SQL dialect (if it has one) for `sqlglot` to parse the SQL string.
|
|
244
275
|
This allows for use of dialect-specific syntax while parsing and modifying queries."""
|
|
@@ -832,6 +863,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
832
863
|
if self._engine is None:
|
|
833
864
|
self.register_sqlalchemy_compiler_overrides()
|
|
834
865
|
self._check_engine_isolation_level()
|
|
866
|
+
# engine_args property already filters out native_args
|
|
835
867
|
self._engine = create_engine(url=self.local_engine_url(), **self.engine_args)
|
|
836
868
|
return self._engine
|
|
837
869
|
|
|
@@ -841,6 +873,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
841
873
|
if self._async_engine is None:
|
|
842
874
|
self.register_sqlalchemy_compiler_overrides()
|
|
843
875
|
self._check_engine_isolation_level()
|
|
876
|
+
# async_engine_args property already filters out native_args
|
|
844
877
|
self._async_engine = create_async_engine(url=self.async_local_engine_url(), **self.async_engine_args)
|
|
845
878
|
return self._async_engine
|
|
846
879
|
|
chalk/streams/_kafka_source.py
CHANGED
|
@@ -27,6 +27,7 @@ _KAFKA_SASL_MECHANISM_NAME = "KAFKA_SASL_MECHANISM"
|
|
|
27
27
|
_KAFKA_SASL_USERNAME_NAME = "KAFKA_SASL_USERNAME"
|
|
28
28
|
_KAFKA_SASL_PASSWORD_NAME = "KAFKA_SASL_PASSWORD"
|
|
29
29
|
_KAFKA_ADDITIONAL_KAFKA_ARGS_NAME = "KAFKA_ADDITIONAL_KAFKA_ARGS"
|
|
30
|
+
_KAFKA_DEAD_LETTER_QUEUE_TOPIC = "KAFKA_DEAD_LETTER_QUEUE_TOPIC"
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True):
|
|
@@ -165,7 +166,10 @@ class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True)
|
|
|
165
166
|
),
|
|
166
167
|
name=name,
|
|
167
168
|
late_arrival_deadline=late_arrival_deadline,
|
|
168
|
-
dead_letter_queue_topic=dead_letter_queue_topic
|
|
169
|
+
dead_letter_queue_topic=dead_letter_queue_topic
|
|
170
|
+
or load_integration_variable(
|
|
171
|
+
name=_KAFKA_DEAD_LETTER_QUEUE_TOPIC, integration_name=name, override=integration_variable_override
|
|
172
|
+
),
|
|
169
173
|
ssl_ca_file=ssl_ca_file
|
|
170
174
|
or load_integration_variable(
|
|
171
175
|
name=_KAFKA_SSL_CA_FILE_NAME, integration_name=name, override=integration_variable_override
|
chalk/streams/_windows.py
CHANGED
|
@@ -70,6 +70,7 @@ class WindowedMeta(type, Generic[TRich]):
|
|
|
70
70
|
validations=None,
|
|
71
71
|
offline_ttl=None,
|
|
72
72
|
expression=None,
|
|
73
|
+
offline_expression=None,
|
|
73
74
|
materialization=None,
|
|
74
75
|
) # noqa
|
|
75
76
|
|
|
@@ -79,9 +80,14 @@ JsonValue = Any
|
|
|
79
80
|
|
|
80
81
|
def get_name_with_duration(name_or_fqn: str, duration: Union[str, int, timedelta]) -> str:
|
|
81
82
|
duration_secs = parse_chalk_duration_s(duration)
|
|
83
|
+
name_or_fqn_components = name_or_fqn.split("@")
|
|
84
|
+
assert len(name_or_fqn_components) <= 2, f"Received invalid fqn format.\nfqn={name_or_fqn}"
|
|
85
|
+
unversioned_fqn = name_or_fqn_components[0]
|
|
86
|
+
version = None if len(name_or_fqn_components) != 2 else name_or_fqn_components[1]
|
|
87
|
+
|
|
82
88
|
if duration_secs >= CHALK_MAX_TIMEDELTA.total_seconds():
|
|
83
|
-
return f"{
|
|
84
|
-
return f"{
|
|
89
|
+
return f"{unversioned_fqn}__all__" + ("" if version is None else f"@{version}")
|
|
90
|
+
return f"{unversioned_fqn}__{duration_secs}__" + ("" if version is None else f"@{version}")
|
|
85
91
|
|
|
86
92
|
|
|
87
93
|
if TYPE_CHECKING:
|
|
@@ -175,6 +181,7 @@ class Windowed(Generic[TRich], metaclass=_WINDOWED_METACLASS):
|
|
|
175
181
|
window_durations=tuple(self.buckets_seconds) if bucket is None else tuple(),
|
|
176
182
|
window_duration=window_duration,
|
|
177
183
|
underscore_expression=self._expression,
|
|
184
|
+
offline_underscore_expression=self._offline_expression,
|
|
178
185
|
window_materialization=(
|
|
179
186
|
MaterializationWindowConfig(bucket_duration=timedelta(seconds=window_duration))
|
|
180
187
|
if self._materialization is True and window_duration is not None
|
|
@@ -208,6 +215,7 @@ class Windowed(Generic[TRich], metaclass=_WINDOWED_METACLASS):
|
|
|
208
215
|
kind: Type[TRich] | None,
|
|
209
216
|
offline_ttl: Duration | ellipsis | None,
|
|
210
217
|
expression: Underscore | None,
|
|
218
|
+
offline_expression: Underscore | None,
|
|
211
219
|
materialization: MaterializationWindowConfig | Literal[True] | None,
|
|
212
220
|
):
|
|
213
221
|
super().__init__()
|
|
@@ -234,6 +242,7 @@ class Windowed(Generic[TRich], metaclass=_WINDOWED_METACLASS):
|
|
|
234
242
|
self._validations = validations
|
|
235
243
|
self._dtype = dtype
|
|
236
244
|
self._expression = expression
|
|
245
|
+
self._offline_expression = offline_expression
|
|
237
246
|
self._materialization = materialization
|
|
238
247
|
|
|
239
248
|
|
|
@@ -574,6 +583,7 @@ def windowed(
|
|
|
574
583
|
validations: List[Validation] | None = None,
|
|
575
584
|
dtype: pa.DataType | None = None,
|
|
576
585
|
expression: Underscore | None = None,
|
|
586
|
+
offline_expression: Underscore | None = None,
|
|
577
587
|
materialization: MaterializationWindowConfig | Literal[True] | None = None,
|
|
578
588
|
) -> Windowed[TRich]:
|
|
579
589
|
"""Create a windowed feature.
|
|
@@ -650,6 +660,8 @@ def windowed(
|
|
|
650
660
|
the feature value and will treat it as failed.
|
|
651
661
|
expression
|
|
652
662
|
The expression to compute the feature. This is an underscore expression, like `_.transactions[_.amount].sum()`.
|
|
663
|
+
offline_expression
|
|
664
|
+
Defines an alternate expression to compute the feature during offline queries.
|
|
653
665
|
validations
|
|
654
666
|
A list of Validations to apply to this feature.
|
|
655
667
|
|
|
@@ -748,5 +760,6 @@ def windowed(
|
|
|
748
760
|
validations=validations,
|
|
749
761
|
offline_ttl=offline_ttl,
|
|
750
762
|
expression=expression,
|
|
763
|
+
offline_expression=offline_expression,
|
|
751
764
|
materialization=materialization,
|
|
752
765
|
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
def _can_use_otel_trace() -> bool:
|
|
2
|
+
try:
|
|
3
|
+
from opentelemetry import context # pyright: ignore[reportUnusedImport]
|
|
4
|
+
from opentelemetry import trace # pyright: ignore[reportUnusedImport]
|
|
5
|
+
from opentelemetry.sdk.resources import Resource # pyright: ignore[reportUnusedImport]
|
|
6
|
+
from opentelemetry.sdk.trace import TracerProvider # pyright: ignore[reportUnusedImport]
|
|
7
|
+
|
|
8
|
+
return True
|
|
9
|
+
except Exception:
|
|
10
|
+
return False
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
can_use_otel_trace = _can_use_otel_trace()
|
chalk/utils/async_helpers.py
CHANGED
|
@@ -61,8 +61,8 @@ def run_coroutine_fn_threadsafe(
|
|
|
61
61
|
@functools.wraps(coro_fn)
|
|
62
62
|
async def wrapped_with_context(*args: P.args, **kwargs: P.kwargs):
|
|
63
63
|
try:
|
|
64
|
-
safe_activate_trace_context(current_trace_context)
|
|
65
|
-
|
|
64
|
+
with safe_activate_trace_context(current_trace_context):
|
|
65
|
+
res = await coro_fn(*args, **kwargs)
|
|
66
66
|
except BaseException as exc:
|
|
67
67
|
ans.set_exception(exc)
|
|
68
68
|
else:
|
|
@@ -5,7 +5,8 @@ class MissingDependencyException(ImportError):
|
|
|
5
5
|
...
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def missing_dependency_exception(name: str):
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
def missing_dependency_exception(name: str, original_error: Exception | None = None):
|
|
9
|
+
msg = f"Missing pip dependency '{name}' for chalkpy=={chalk.__version__}. Please add this to your requirements.txt file and pip install."
|
|
10
|
+
if original_error:
|
|
11
|
+
msg += f"\n\n{original_error}"
|
|
12
|
+
return MissingDependencyException(msg)
|