chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import struct
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, Optional, Union
|
|
6
|
+
|
|
7
|
+
from chalk.integrations.named import create_integration_variable, load_integration_variable
|
|
8
|
+
from chalk.sql._internal.query_execution_parameters import QueryExecutionParameters
|
|
9
|
+
from chalk.sql._internal.sql_source import BaseSQLSource, SQLSourceKind, TableIngestMixIn
|
|
10
|
+
from chalk.sql.finalized_query import FinalizedChalkQuery
|
|
11
|
+
from chalk.sql.protocols import SQLSourceWithTableIngestProtocol
|
|
12
|
+
from chalk.utils.environment_parsing import env_var_bool
|
|
13
|
+
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import pyarrow as pa
|
|
17
|
+
from sqlalchemy.engine import URL, Connection
|
|
18
|
+
|
|
19
|
+
_MSSQL_HOST_NAME = "MSSQL_HOST"
|
|
20
|
+
_MSSQL_TCP_PORT_NAME = "MSSQL_TCP_PORT"
|
|
21
|
+
_MSSQL_DATABASE_NAME = "MSSQL_DATABASE"
|
|
22
|
+
_MSSQL_USER_NAME = "MSSQL_USER"
|
|
23
|
+
_MSSQL_PWD_NAME = "MSSQL_PWD"
|
|
24
|
+
_MSSQL_CLIENT_ID_NAME = "MSSQL_CLIENT_ID"
|
|
25
|
+
_MSSQL_CLIENT_SECRET_NAME = "MSSQL_CLIENT_SECRET"
|
|
26
|
+
_MSSQL_TENANT_ID_NAME = "MSSQL_TENANT_ID"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MSSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIngestProtocol):
|
|
30
|
+
kind = SQLSourceKind.mssql
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
host: Optional[str] = None,
|
|
35
|
+
port: Optional[Union[int, str]] = None,
|
|
36
|
+
db: Optional[str] = None,
|
|
37
|
+
user: Optional[str] = None,
|
|
38
|
+
password: Optional[str] = None,
|
|
39
|
+
client_id: Optional[str] = None,
|
|
40
|
+
client_secret: Optional[str] = None,
|
|
41
|
+
tenant_id: Optional[str] = None,
|
|
42
|
+
name: Optional[str] = None,
|
|
43
|
+
engine_args: Optional[Dict[str, Any]] = None,
|
|
44
|
+
async_engine_args: Optional[Dict[str, Any]] = None,
|
|
45
|
+
integration_variable_override: Optional[Mapping[str, str]] = None,
|
|
46
|
+
):
|
|
47
|
+
try:
|
|
48
|
+
import pyodbc
|
|
49
|
+
except ImportError as e:
|
|
50
|
+
raise missing_dependency_exception("chalkpy[mssql]", original_error=e)
|
|
51
|
+
del pyodbc
|
|
52
|
+
|
|
53
|
+
self.name = name
|
|
54
|
+
self.host = host or load_integration_variable(
|
|
55
|
+
integration_name=name, name=_MSSQL_HOST_NAME, override=integration_variable_override
|
|
56
|
+
)
|
|
57
|
+
self.port = (
|
|
58
|
+
int(port)
|
|
59
|
+
if port is not None
|
|
60
|
+
else load_integration_variable(
|
|
61
|
+
integration_name=name, name=_MSSQL_TCP_PORT_NAME, parser=int, override=integration_variable_override
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
self.db = db or load_integration_variable(
|
|
65
|
+
integration_name=name, name=_MSSQL_DATABASE_NAME, override=integration_variable_override
|
|
66
|
+
)
|
|
67
|
+
self.user = user or load_integration_variable(
|
|
68
|
+
integration_name=name,
|
|
69
|
+
name=_MSSQL_USER_NAME,
|
|
70
|
+
override=integration_variable_override,
|
|
71
|
+
)
|
|
72
|
+
self.password = password or load_integration_variable(
|
|
73
|
+
integration_name=name,
|
|
74
|
+
name=_MSSQL_PWD_NAME,
|
|
75
|
+
override=integration_variable_override,
|
|
76
|
+
)
|
|
77
|
+
self.client_id = client_id or load_integration_variable(
|
|
78
|
+
integration_name=name,
|
|
79
|
+
name=_MSSQL_CLIENT_ID_NAME,
|
|
80
|
+
override=integration_variable_override,
|
|
81
|
+
)
|
|
82
|
+
self.client_secret = client_secret or load_integration_variable(
|
|
83
|
+
integration_name=name,
|
|
84
|
+
name=_MSSQL_CLIENT_SECRET_NAME,
|
|
85
|
+
override=integration_variable_override,
|
|
86
|
+
)
|
|
87
|
+
self.tenant_id = tenant_id or load_integration_variable(
|
|
88
|
+
integration_name=name,
|
|
89
|
+
name=_MSSQL_TENANT_ID_NAME,
|
|
90
|
+
override=integration_variable_override,
|
|
91
|
+
)
|
|
92
|
+
self.ingested_tables: Dict[str, Any] = {}
|
|
93
|
+
|
|
94
|
+
if engine_args is None:
|
|
95
|
+
engine_args = {}
|
|
96
|
+
if async_engine_args is None:
|
|
97
|
+
async_engine_args = {}
|
|
98
|
+
|
|
99
|
+
if name:
|
|
100
|
+
engine_args_from_ui = self._load_env_engine_args(name, override=integration_variable_override)
|
|
101
|
+
for k, v in engine_args_from_ui.items():
|
|
102
|
+
engine_args.setdefault(k, v)
|
|
103
|
+
async_engine_args.setdefault(k, v)
|
|
104
|
+
|
|
105
|
+
chalk_default_engine_args = {
|
|
106
|
+
"pool_size": 20,
|
|
107
|
+
"max_overflow": 60,
|
|
108
|
+
"pool_recycle": 90,
|
|
109
|
+
}
|
|
110
|
+
for k, v in chalk_default_engine_args.items():
|
|
111
|
+
engine_args.setdefault(k, v)
|
|
112
|
+
async_engine_args.setdefault(k, v)
|
|
113
|
+
|
|
114
|
+
# Set isolation level for read-only operations
|
|
115
|
+
engine_args.setdefault("isolation_level", os.environ.get("CHALK_SQL_ISOLATION_LEVEL", "AUTOCOMMIT"))
|
|
116
|
+
async_engine_args.setdefault("isolation_level", os.environ.get("CHALK_SQL_ISOLATION_LEVEL", "AUTOCOMMIT"))
|
|
117
|
+
|
|
118
|
+
BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args=async_engine_args)
|
|
119
|
+
|
|
120
|
+
# Register event listener for managed identity token injection
|
|
121
|
+
if not self.client_id and not self.user:
|
|
122
|
+
from sqlalchemy import event
|
|
123
|
+
|
|
124
|
+
event.listens_for(self.get_engine(), "do_connect")(self._inject_azure_token)
|
|
125
|
+
|
|
126
|
+
def _inject_azure_token(self, _dialect: Any, _conn_rec: Any, _cargs: Any, cparams: Dict[str, Any]) -> None:
|
|
127
|
+
"""SQLAlchemy event handler to inject Azure AD token on each connection."""
|
|
128
|
+
try:
|
|
129
|
+
from azure.identity import DefaultAzureCredential
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise missing_dependency_exception("chalkpy[mssql]")
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
credential = DefaultAzureCredential()
|
|
135
|
+
token = credential.get_token("https://database.windows.net/.default")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
raise Exception(f"Failed to acquire Azure AD token for MSSQL connection: {e}") from e
|
|
138
|
+
|
|
139
|
+
token_bytes = token.token.encode("utf-16-le")
|
|
140
|
+
token_struct = struct.pack(f"<I{len(token_bytes)}s", len(token_bytes), token_bytes)
|
|
141
|
+
cparams["attrs_before"] = {1256: token_struct} # SQL_COPT_SS_ACCESS_TOKEN
|
|
142
|
+
|
|
143
|
+
def get_sqlglot_dialect(self) -> str | None:
|
|
144
|
+
return "tsql"
|
|
145
|
+
|
|
146
|
+
def local_engine_url(self) -> "URL":
|
|
147
|
+
from sqlalchemy.engine.url import URL
|
|
148
|
+
|
|
149
|
+
trust_server_cert = env_var_bool("CHALK_MSSQL_TRUST_SERVER_CERTIFICATE", default=False)
|
|
150
|
+
|
|
151
|
+
if self.client_id and self.client_secret and self.tenant_id:
|
|
152
|
+
# Service Principal authentication
|
|
153
|
+
# Use pyodbc driver for Azure AD support
|
|
154
|
+
query_params = {
|
|
155
|
+
"driver": "ODBC Driver 18 for SQL Server",
|
|
156
|
+
"Authentication": "ActiveDirectoryServicePrincipal",
|
|
157
|
+
}
|
|
158
|
+
if trust_server_cert:
|
|
159
|
+
query_params["TrustServerCertificate"] = "yes"
|
|
160
|
+
return URL.create(
|
|
161
|
+
drivername="mssql+pyodbc",
|
|
162
|
+
username=self.client_id,
|
|
163
|
+
password=self.client_secret,
|
|
164
|
+
host=self.host,
|
|
165
|
+
port=self.port,
|
|
166
|
+
database=self.db,
|
|
167
|
+
query=query_params,
|
|
168
|
+
)
|
|
169
|
+
elif self.user and self.password:
|
|
170
|
+
# SQL authentication
|
|
171
|
+
query_params = {"driver": "ODBC Driver 18 for SQL Server"}
|
|
172
|
+
if trust_server_cert:
|
|
173
|
+
query_params["TrustServerCertificate"] = "yes"
|
|
174
|
+
return URL.create(
|
|
175
|
+
drivername="mssql+pyodbc",
|
|
176
|
+
username=self.user,
|
|
177
|
+
password=self.password,
|
|
178
|
+
host=self.host,
|
|
179
|
+
port=self.port,
|
|
180
|
+
database=self.db,
|
|
181
|
+
query=query_params,
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
# Managed Identity: token injected via event listener
|
|
185
|
+
connection_string = (
|
|
186
|
+
f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={self.host},{self.port};DATABASE={self.db}"
|
|
187
|
+
)
|
|
188
|
+
if trust_server_cert:
|
|
189
|
+
connection_string += ";TrustServerCertificate=yes"
|
|
190
|
+
return URL.create(
|
|
191
|
+
drivername="mssql+pyodbc",
|
|
192
|
+
query={"odbc_connect": connection_string},
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def _recreate_integration_variables(self) -> dict[str, str]:
|
|
196
|
+
return {
|
|
197
|
+
k: v
|
|
198
|
+
for k, v in [
|
|
199
|
+
create_integration_variable(_MSSQL_HOST_NAME, self.name, self.host),
|
|
200
|
+
create_integration_variable(_MSSQL_TCP_PORT_NAME, self.name, self.port),
|
|
201
|
+
create_integration_variable(_MSSQL_DATABASE_NAME, self.name, self.db),
|
|
202
|
+
create_integration_variable(_MSSQL_USER_NAME, self.name, self.user),
|
|
203
|
+
create_integration_variable(_MSSQL_PWD_NAME, self.name, self.password),
|
|
204
|
+
create_integration_variable(_MSSQL_CLIENT_ID_NAME, self.name, self.client_id),
|
|
205
|
+
create_integration_variable(_MSSQL_CLIENT_SECRET_NAME, self.name, self.client_secret),
|
|
206
|
+
create_integration_variable(_MSSQL_TENANT_ID_NAME, self.name, self.tenant_id),
|
|
207
|
+
]
|
|
208
|
+
if v is not None
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
def execute_query_efficient_raw(
|
|
212
|
+
self,
|
|
213
|
+
finalized_query: FinalizedChalkQuery,
|
|
214
|
+
expected_output_schema: "pa.Schema",
|
|
215
|
+
connection: Optional["Connection"],
|
|
216
|
+
query_execution_parameters: QueryExecutionParameters,
|
|
217
|
+
) -> Iterable["pa.RecordBatch"]:
|
|
218
|
+
"""Execute query efficiently for MSSQL and return raw PyArrow RecordBatches."""
|
|
219
|
+
import contextlib
|
|
220
|
+
|
|
221
|
+
import pyarrow as pa
|
|
222
|
+
import pyarrow.compute as pc
|
|
223
|
+
|
|
224
|
+
# Get the compiled query
|
|
225
|
+
_, _, _ = self.compile_query(finalized_query)
|
|
226
|
+
|
|
227
|
+
# Use existing connection or create new one
|
|
228
|
+
with (self.get_engine().connect() if connection is None else contextlib.nullcontext(connection)) as cnx:
|
|
229
|
+
with cnx.begin():
|
|
230
|
+
# Handle temp tables
|
|
231
|
+
with contextlib.ExitStack() as exit_stack:
|
|
232
|
+
for (
|
|
233
|
+
_,
|
|
234
|
+
temp_value,
|
|
235
|
+
create_temp_table,
|
|
236
|
+
temp_table,
|
|
237
|
+
drop_temp_table,
|
|
238
|
+
) in finalized_query.temp_tables.values():
|
|
239
|
+
exit_stack.enter_context(
|
|
240
|
+
self._create_temp_table(create_temp_table, temp_table, drop_temp_table, cnx, temp_value)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Execute query
|
|
244
|
+
result = cnx.execute(finalized_query.query, finalized_query.params)
|
|
245
|
+
|
|
246
|
+
# Convert result to PyArrow
|
|
247
|
+
rows = result.fetchall()
|
|
248
|
+
column_names = result.keys()
|
|
249
|
+
|
|
250
|
+
if not rows:
|
|
251
|
+
# Return empty batch with expected schema
|
|
252
|
+
arrays = [pa.nulls(0, field.type) for field in expected_output_schema]
|
|
253
|
+
batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
|
|
254
|
+
if query_execution_parameters.yield_empty_batches:
|
|
255
|
+
yield batch
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
# Convert rows to column arrays
|
|
259
|
+
data: dict[str, list[Any]] = {}
|
|
260
|
+
for i, col_name in enumerate(column_names):
|
|
261
|
+
col_data = [row[i] for row in rows]
|
|
262
|
+
data[col_name] = col_data
|
|
263
|
+
|
|
264
|
+
# Create PyArrow table
|
|
265
|
+
table = pa.table(data)
|
|
266
|
+
|
|
267
|
+
# Map columns to expected schema
|
|
268
|
+
arrays: list[pa.Array] = []
|
|
269
|
+
for field in expected_output_schema:
|
|
270
|
+
if field.name in table.column_names:
|
|
271
|
+
col = table.column(field.name)
|
|
272
|
+
# Cast to expected type if needed
|
|
273
|
+
if col.type != field.type:
|
|
274
|
+
col = pc.cast(col, field.type)
|
|
275
|
+
arrays.append(col)
|
|
276
|
+
else:
|
|
277
|
+
# Column not found, create null array
|
|
278
|
+
arrays.append(pa.nulls(len(table), field.type))
|
|
279
|
+
|
|
280
|
+
batch = pa.RecordBatch.from_arrays(arrays, schema=expected_output_schema)
|
|
281
|
+
yield batch
|
|
@@ -28,6 +28,7 @@ from chalk.sql.protocols import SQLSourceWithTableIngestProtocol
|
|
|
28
28
|
from chalk.utils.environment_parsing import env_var_bool
|
|
29
29
|
from chalk.utils.log_with_context import get_logger
|
|
30
30
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
31
|
+
from chalk.utils.pl_helpers import polars_uses_schema_overrides
|
|
31
32
|
from chalk.utils.tracing import safe_add_metrics, safe_add_tags, safe_trace
|
|
32
33
|
|
|
33
34
|
if TYPE_CHECKING:
|
|
@@ -262,8 +263,11 @@ class PostgreSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIn
|
|
|
262
263
|
# pl.read_csv(use_pyarrow=True) has the same performance degradation,
|
|
263
264
|
# UNLESS a `dtypes` arg is provided.
|
|
264
265
|
|
|
265
|
-
# 'dtypes' deprecated for 'schema_overrides' in polars 0.20
|
|
266
|
-
|
|
266
|
+
# 'dtypes' deprecated for 'schema_overrides' in polars 0.20.31+
|
|
267
|
+
if polars_uses_schema_overrides:
|
|
268
|
+
pl_table = pl.read_csv(buffer, schema_overrides=parse_dtypes) # pyright: ignore[reportCallIssue]
|
|
269
|
+
else:
|
|
270
|
+
pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
|
|
267
271
|
if boolean_columns:
|
|
268
272
|
# DO NOT use map_dict. Causes a segfault when multiple uvicorn workers are handling
|
|
269
273
|
# requests in parallel.
|
|
@@ -498,7 +502,11 @@ class PostgreSQLSourceImpl(BaseSQLSource, TableIngestMixIn, SQLSourceWithTableIn
|
|
|
498
502
|
else:
|
|
499
503
|
parse_dtypes[field.name] = pl.Utf8
|
|
500
504
|
|
|
501
|
-
|
|
505
|
+
# 'dtypes' deprecated for 'schema_overrides' in polars 0.20.31+
|
|
506
|
+
if polars_uses_schema_overrides:
|
|
507
|
+
pl_table = pl.read_csv(buffer, schema_overrides=parse_dtypes) # pyright: ignore[reportCallIssue]
|
|
508
|
+
else:
|
|
509
|
+
pl_table = pl.read_csv(buffer, dtypes=parse_dtypes) # pyright: ignore[reportCallIssue]
|
|
502
510
|
|
|
503
511
|
# Convert to arrow and map to expected schema
|
|
504
512
|
arrow_table = pl_table.to_arrow()
|
|
@@ -260,6 +260,8 @@ class RedshiftSourceImpl(BaseSQLSource):
|
|
|
260
260
|
temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
|
|
261
261
|
try:
|
|
262
262
|
_logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
|
|
263
|
+
_public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
|
|
264
|
+
_public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
|
|
263
265
|
cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
|
|
264
266
|
except Exception as e:
|
|
265
267
|
_public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
|
|
@@ -366,6 +368,8 @@ class RedshiftSourceImpl(BaseSQLSource):
|
|
|
366
368
|
temp_table_name = f"query_{str(uuid.uuid4()).replace('-', '_')}"
|
|
367
369
|
try:
|
|
368
370
|
_logger.debug(f"Executing query & creating temp table '{temp_table_name}'")
|
|
371
|
+
_public_logger.info(f"Executing Redshift query [{temp_query_id}]: {operation}")
|
|
372
|
+
_public_logger.debug(f"Query parameters [{temp_query_id}]: {params}")
|
|
369
373
|
cursor.execute(f"CREATE TEMP TABLE {temp_table_name} AS ({operation})", params)
|
|
370
374
|
except Exception as e:
|
|
371
375
|
_public_logger.error(f"Failed to create temp table for operation: {operation}", exc_info=e)
|
|
@@ -31,6 +31,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
|
|
|
31
31
|
from chalk.utils.df_utils import is_list_like, pa_array_to_pl_series
|
|
32
32
|
from chalk.utils.environment_parsing import env_var_bool
|
|
33
33
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
34
|
+
from chalk.utils.pl_helpers import str_json_decode_compat
|
|
34
35
|
from chalk.utils.threading import DEFAULT_IO_EXECUTOR, MultiSemaphore
|
|
35
36
|
from chalk.utils.tracing import safe_incr, safe_set_gauge
|
|
36
37
|
|
|
@@ -398,11 +399,19 @@ class SnowflakeSourceImpl(BaseSQLSource):
|
|
|
398
399
|
if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
|
|
399
400
|
if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
|
|
400
401
|
series = pa_array_to_pl_series(tbl[col_name])
|
|
401
|
-
column =
|
|
402
|
+
column = (
|
|
403
|
+
str_json_decode_compat(series, feature.converter.polars_dtype)
|
|
404
|
+
.to_arrow()
|
|
405
|
+
.cast(expected_type)
|
|
406
|
+
)
|
|
402
407
|
if pa.types.is_struct(expected_type):
|
|
403
408
|
if pa.types.is_string(actual_type):
|
|
404
409
|
series = pa_array_to_pl_series(tbl[col_name])
|
|
405
|
-
column =
|
|
410
|
+
column = (
|
|
411
|
+
str_json_decode_compat(series, feature.converter.polars_dtype)
|
|
412
|
+
.to_arrow()
|
|
413
|
+
.cast(expected_type)
|
|
414
|
+
)
|
|
406
415
|
if actual_type != expected_type:
|
|
407
416
|
column = column.cast(options=pc.CastOptions(target_type=expected_type, allow_time_truncate=True))
|
|
408
417
|
if isinstance(column, pa.ChunkedArray):
|
|
@@ -4,6 +4,7 @@ import pyarrow as pa
|
|
|
4
4
|
|
|
5
5
|
from chalk.features._encoding.pyarrow import pyarrow_to_polars
|
|
6
6
|
from chalk.utils.df_utils import pa_cast, pa_table_to_pl_df
|
|
7
|
+
from chalk.utils.pl_helpers import str_json_decode_compat
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def convert_hex_to_binary(table: pa.Table, cols_to_convert: List[str]) -> pa.Table:
|
|
@@ -60,7 +61,7 @@ def json_parse_and_cast(tbl: pa.Table, schema: Mapping[str, pa.DataType]) -> pa.
|
|
|
60
61
|
expr = pl.col(col_name)
|
|
61
62
|
if pl_df.schema[col_name] == pl.Binary():
|
|
62
63
|
expr = expr.cast(pl.Utf8())
|
|
63
|
-
expr = expr
|
|
64
|
+
expr = str_json_decode_compat(expr, pl_dtype).alias(col_name)
|
|
64
65
|
pl_exprs.append(expr)
|
|
65
66
|
|
|
66
67
|
pl_df = pl_df.with_columns(pl_exprs)
|
|
@@ -19,20 +19,21 @@ from typing import (
|
|
|
19
19
|
Literal,
|
|
20
20
|
Mapping,
|
|
21
21
|
Optional,
|
|
22
|
+
ParamSpec,
|
|
22
23
|
Sequence,
|
|
23
24
|
Type,
|
|
25
|
+
TypeAlias,
|
|
24
26
|
TypeVar,
|
|
25
27
|
Union,
|
|
26
28
|
cast,
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
import yaml
|
|
30
|
-
from typing_extensions import ParamSpec, TypeAlias
|
|
31
32
|
from yaml.scanner import ScannerError
|
|
32
33
|
|
|
33
34
|
from chalk import Environments, OfflineResolver, OnlineResolver, Tags
|
|
34
35
|
from chalk._lsp.error_builder import SQLFileResolverErrorBuilder
|
|
35
|
-
from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features
|
|
36
|
+
from chalk.features import DataFrame, Feature, FeatureNotFoundException, Features, Underscore
|
|
36
37
|
from chalk.features.feature_set import CURRENT_FEATURE_REGISTRY
|
|
37
38
|
from chalk.features.namespace_context import build_namespaced_name
|
|
38
39
|
from chalk.features.namespace_context import namespace as namespace_ctx
|
|
@@ -41,6 +42,8 @@ from chalk.features.resolver import Cron, ResolverArgErrorHandler, StreamResolve
|
|
|
41
42
|
from chalk.sql._internal.incremental import IncrementalSettings
|
|
42
43
|
from chalk.sql._internal.integrations.bigquery import BigQuerySourceImpl
|
|
43
44
|
from chalk.sql._internal.integrations.cloudsql import CloudSQLSourceImpl
|
|
45
|
+
from chalk.sql._internal.integrations.databricks import DatabricksSourceImpl
|
|
46
|
+
from chalk.sql._internal.integrations.mssql import MSSQLSourceImpl
|
|
44
47
|
from chalk.sql._internal.integrations.mysql import MySQLSourceImpl
|
|
45
48
|
from chalk.sql._internal.integrations.postgres import PostgreSQLSourceImpl
|
|
46
49
|
from chalk.sql._internal.integrations.redshift import RedshiftSourceImpl
|
|
@@ -84,8 +87,10 @@ _SOURCES: Mapping[str, Union[Type[BaseSQLSource], Type[StreamSource]]] = {
|
|
|
84
87
|
"postgres": PostgreSQLSourceImpl,
|
|
85
88
|
"postgresql": PostgreSQLSourceImpl,
|
|
86
89
|
"mysql": MySQLSourceImpl,
|
|
90
|
+
"mssql": MSSQLSourceImpl,
|
|
87
91
|
"bigquery": BigQuerySourceImpl,
|
|
88
92
|
"cloudsql": CloudSQLSourceImpl,
|
|
93
|
+
"databricks": DatabricksSourceImpl,
|
|
89
94
|
"redshift": RedshiftSourceImpl,
|
|
90
95
|
"sqlite": SQLiteSourceImpl,
|
|
91
96
|
"kafka": KafkaSource,
|
|
@@ -241,6 +246,7 @@ class SQLStringResult:
|
|
|
241
246
|
override_comment_dict: Optional[CommentDict] = None
|
|
242
247
|
override_name: Optional[str] = None
|
|
243
248
|
autogenerated: bool = False
|
|
249
|
+
postprocessing_expr: Underscore | None = None
|
|
244
250
|
|
|
245
251
|
def __post_init__(self):
|
|
246
252
|
# Validation: if autogenerated is True, override_name must not be None
|
|
@@ -302,7 +308,7 @@ def get_sql_file_resolvers(
|
|
|
302
308
|
"""Iterate through all `.chalk.sql` filepaths, gather the sql strings, and get a resolver hopefully for each."""
|
|
303
309
|
for dp, dn, fn in os.walk(os.path.expanduser(sql_file_resolve_location)):
|
|
304
310
|
del dn # unused
|
|
305
|
-
for f in fn:
|
|
311
|
+
for f in sorted(fn): # Sort filenames for deterministic ordering
|
|
306
312
|
filepath = os.path.join(dp, f)
|
|
307
313
|
if not filepath.endswith(CHALK_SQL_FILE_RESOLVER_FILENAME_SUFFIX):
|
|
308
314
|
continue
|
|
@@ -315,7 +321,10 @@ def get_sql_file_resolvers(
|
|
|
315
321
|
continue
|
|
316
322
|
_filepath_to_sql_string[filepath] = sql_string_result.sql_string
|
|
317
323
|
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
318
|
-
|
|
324
|
+
# Only yield generated resolvers whose filepath is under the directory being scanned
|
|
325
|
+
for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers(
|
|
326
|
+
filter_by_directory=sql_file_resolve_location
|
|
327
|
+
):
|
|
319
328
|
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
320
329
|
|
|
321
330
|
|
|
@@ -338,8 +347,12 @@ def get_sql_file_resolvers_from_paths(
|
|
|
338
347
|
sql_string_result=sql_string_result,
|
|
339
348
|
has_import_errors=has_import_errors,
|
|
340
349
|
)
|
|
350
|
+
# Only yield generated resolvers whose filepath is in the paths list
|
|
351
|
+
# If paths is empty, yield all generated resolvers (no filtering)
|
|
341
352
|
for sql_string_result in _GENERATED_SQL_FILE_RESOLVER_REGISTRY.get_generated_sql_file_resolvers():
|
|
342
|
-
|
|
353
|
+
# Check if this generated resolver's filepath is in the provided paths
|
|
354
|
+
if not paths or sql_string_result.path in paths:
|
|
355
|
+
yield get_sql_file_resolver(sources, sql_string_result, has_import_errors)
|
|
343
356
|
|
|
344
357
|
|
|
345
358
|
def get_sql_file_resolver(
|
|
@@ -692,6 +705,7 @@ def get_sql_file_resolver(
|
|
|
692
705
|
incremental_settings=incremental_settings,
|
|
693
706
|
params_to_root_fqn=glot_result.args,
|
|
694
707
|
),
|
|
708
|
+
postprocessing=sql_string_result.postprocessing_expr,
|
|
695
709
|
)
|
|
696
710
|
except Exception as e:
|
|
697
711
|
raise e
|
|
@@ -1572,6 +1586,7 @@ class GeneratedSQLFileResolverInfo:
|
|
|
1572
1586
|
filepath: str
|
|
1573
1587
|
sql_string: str
|
|
1574
1588
|
comment_dict: CommentDict
|
|
1589
|
+
postprocessing_expr: Underscore | None
|
|
1575
1590
|
|
|
1576
1591
|
|
|
1577
1592
|
class GeneratedSQLFileResolverRegistry:
|
|
@@ -1579,17 +1594,43 @@ class GeneratedSQLFileResolverRegistry:
|
|
|
1579
1594
|
super().__init__()
|
|
1580
1595
|
self.resolver_name_to_generated_infos: Dict[str, GeneratedSQLFileResolverInfo] = {}
|
|
1581
1596
|
|
|
1582
|
-
def add_sql_file_resolver(
|
|
1597
|
+
def add_sql_file_resolver(
|
|
1598
|
+
self,
|
|
1599
|
+
name: str,
|
|
1600
|
+
filepath: str,
|
|
1601
|
+
sql_string: str,
|
|
1602
|
+
comment_dict: CommentDict,
|
|
1603
|
+
postprocessing_expr: Underscore | None = None,
|
|
1604
|
+
):
|
|
1583
1605
|
if name in self.resolver_name_to_generated_infos and filepath != "<notebook>":
|
|
1584
1606
|
raise ValueError(f"A SQL file resolver already exists with name '{name}'. They must have unique names.")
|
|
1585
1607
|
self.resolver_name_to_generated_infos[name] = GeneratedSQLFileResolverInfo(
|
|
1586
|
-
filepath=filepath,
|
|
1587
|
-
sql_string=sql_string,
|
|
1588
|
-
comment_dict=comment_dict,
|
|
1608
|
+
filepath=filepath, sql_string=sql_string, comment_dict=comment_dict, postprocessing_expr=postprocessing_expr
|
|
1589
1609
|
)
|
|
1590
1610
|
|
|
1591
|
-
def get_generated_sql_file_resolvers(self) -> Iterable[SQLStringResult]:
|
|
1611
|
+
def get_generated_sql_file_resolvers(self, filter_by_directory: Path | None = None) -> Iterable[SQLStringResult]:
|
|
1612
|
+
"""
|
|
1613
|
+
Yield generated SQL file resolvers, optionally filtered by directory.
|
|
1614
|
+
|
|
1615
|
+
Args:
|
|
1616
|
+
filter_by_directory: If provided, only yield resolvers whose filepath is under this directory.
|
|
1617
|
+
If None, yield all generated resolvers (legacy behavior).
|
|
1618
|
+
"""
|
|
1592
1619
|
for name, generated_info in self.resolver_name_to_generated_infos.items():
|
|
1620
|
+
# If filtering by directory is requested, check if the resolver's filepath is under that directory
|
|
1621
|
+
if filter_by_directory is not None:
|
|
1622
|
+
# Special case: notebook resolvers (filepath == "<notebook>") should never be auto-yielded
|
|
1623
|
+
# when scanning directories, only when explicitly requested
|
|
1624
|
+
if generated_info.filepath == "<notebook>":
|
|
1625
|
+
continue
|
|
1626
|
+
|
|
1627
|
+
# Convert to absolute paths for comparison and check if resolver path is under filter directory
|
|
1628
|
+
resolver_path = Path(generated_info.filepath).resolve()
|
|
1629
|
+
filter_path = Path(filter_by_directory).resolve()
|
|
1630
|
+
|
|
1631
|
+
if not resolver_path.is_relative_to(filter_path):
|
|
1632
|
+
continue
|
|
1633
|
+
|
|
1593
1634
|
yield SQLStringResult(
|
|
1594
1635
|
path=generated_info.filepath,
|
|
1595
1636
|
sql_string=generated_info.sql_string,
|
|
@@ -1597,6 +1638,7 @@ class GeneratedSQLFileResolverRegistry:
|
|
|
1597
1638
|
override_comment_dict=generated_info.comment_dict,
|
|
1598
1639
|
override_name=name,
|
|
1599
1640
|
autogenerated=True,
|
|
1641
|
+
postprocessing_expr=generated_info.postprocessing_expr,
|
|
1600
1642
|
)
|
|
1601
1643
|
|
|
1602
1644
|
|
|
@@ -1625,6 +1667,7 @@ def make_sql_file_resolver(
|
|
|
1625
1667
|
partitioned_by: Collection[Any] | None = None,
|
|
1626
1668
|
total: Optional[bool] = None,
|
|
1627
1669
|
skip_sql_validation: Optional[bool] = None,
|
|
1670
|
+
postprocessing_expression: Optional[Underscore] = None,
|
|
1628
1671
|
):
|
|
1629
1672
|
"""Generate a Chalk SQL file resolver from a filepath and a sql string.
|
|
1630
1673
|
This will generate a resolver in your web dashboard that can be queried,
|
|
@@ -1808,6 +1851,7 @@ def make_sql_file_resolver(
|
|
|
1808
1851
|
sql_string=sql,
|
|
1809
1852
|
comment_dict=comment_dict,
|
|
1810
1853
|
name=name,
|
|
1854
|
+
postprocessing_expr=postprocessing_expression,
|
|
1811
1855
|
)
|
|
1812
1856
|
if is_defined_in_notebook:
|
|
1813
1857
|
from chalk.sql import SQLSourceGroup
|
|
@@ -1844,6 +1888,7 @@ def make_sql_file_resolver(
|
|
|
1844
1888
|
override_comment_dict=generated_info.comment_dict,
|
|
1845
1889
|
override_name=name,
|
|
1846
1890
|
autogenerated=True,
|
|
1891
|
+
postprocessing_expr=postprocessing_expression,
|
|
1847
1892
|
)
|
|
1848
1893
|
resolver_result = get_sql_file_resolver(
|
|
1849
1894
|
sources=current_sql_sources, sql_string_result=info, has_import_errors=False
|
|
@@ -163,6 +163,7 @@ class SQLSourceKind(str, Enum):
|
|
|
163
163
|
athena = "athena"
|
|
164
164
|
duckdb = "duckdb"
|
|
165
165
|
dynamodb = "dynamodb"
|
|
166
|
+
mssql = "mssql"
|
|
166
167
|
mysql = "mysql"
|
|
167
168
|
postgres = "postgres"
|
|
168
169
|
redshift = "redshift"
|
|
@@ -213,11 +214,32 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
213
214
|
if getattr(self, "kind", None) != SQLSourceKind.trino:
|
|
214
215
|
engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
|
|
215
216
|
async_engine_args.setdefault("pool_pre_ping", env_var_bool("USE_CLIENT_POOL_PRE_PING"))
|
|
216
|
-
|
|
217
|
-
self.
|
|
217
|
+
# Store raw args internally, expose filtered versions via properties
|
|
218
|
+
self._raw_engine_args = engine_args
|
|
219
|
+
self._raw_async_engine_args = async_engine_args
|
|
218
220
|
self._engine = None
|
|
219
221
|
self._async_engine = None
|
|
220
222
|
|
|
223
|
+
@property
|
|
224
|
+
def engine_args(self) -> Dict[str, Any]:
|
|
225
|
+
"""Engine arguments with native_args filtered out for SQLAlchemy."""
|
|
226
|
+
return {k: v for k, v in self._raw_engine_args.items() if k != "native_args"}
|
|
227
|
+
|
|
228
|
+
@engine_args.setter
|
|
229
|
+
def engine_args(self, args: dict[str, Any]):
|
|
230
|
+
"""Set raw engine args (for backward compatibility)."""
|
|
231
|
+
self._raw_engine_args = args
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def async_engine_args(self) -> Dict[str, Any]:
|
|
235
|
+
"""Async engine arguments with native_args filtered out for SQLAlchemy."""
|
|
236
|
+
return {k: v for k, v in self._raw_async_engine_args.items() if k != "native_args"}
|
|
237
|
+
|
|
238
|
+
@async_engine_args.setter
|
|
239
|
+
def async_engine_args(self, args: dict[str, Any]):
|
|
240
|
+
"""Set raw async engine args (for backward compatibility)."""
|
|
241
|
+
self._raw_async_engine_args = args
|
|
242
|
+
|
|
221
243
|
@property
|
|
222
244
|
def _engine_args(self):
|
|
223
245
|
"""Backcompat support for private subclassing of BaseSQLSource"""
|
|
@@ -238,6 +260,16 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
238
260
|
"""Backcompat support for private subclassing of BaseSQLSource"""
|
|
239
261
|
self.async_engine_args = args
|
|
240
262
|
|
|
263
|
+
@property
|
|
264
|
+
def native_args(self) -> Dict[str, Any]:
|
|
265
|
+
"""Native arguments to be passed to the underlying database driver.
|
|
266
|
+
|
|
267
|
+
These arguments are extracted from engine_args and async_engine_args
|
|
268
|
+
and are not passed to SQLAlchemy's create_engine or create_async_engine.
|
|
269
|
+
Instead, they should be used by subclasses to configure native driver connections.
|
|
270
|
+
"""
|
|
271
|
+
return self._raw_engine_args.get("native_args", {})
|
|
272
|
+
|
|
241
273
|
def get_sqlglot_dialect(self) -> Union[str, None]:
|
|
242
274
|
"""Returns the name of the SQL dialect (if it has one) for `sqlglot` to parse the SQL string.
|
|
243
275
|
This allows for use of dialect-specific syntax while parsing and modifying queries."""
|
|
@@ -831,6 +863,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
831
863
|
if self._engine is None:
|
|
832
864
|
self.register_sqlalchemy_compiler_overrides()
|
|
833
865
|
self._check_engine_isolation_level()
|
|
866
|
+
# engine_args property already filters out native_args
|
|
834
867
|
self._engine = create_engine(url=self.local_engine_url(), **self.engine_args)
|
|
835
868
|
return self._engine
|
|
836
869
|
|
|
@@ -840,6 +873,7 @@ class BaseSQLSource(BaseSQLSourceProtocol):
|
|
|
840
873
|
if self._async_engine is None:
|
|
841
874
|
self.register_sqlalchemy_compiler_overrides()
|
|
842
875
|
self._check_engine_isolation_level()
|
|
876
|
+
# async_engine_args property already filters out native_args
|
|
843
877
|
self._async_engine = create_async_engine(url=self.async_local_engine_url(), **self.async_engine_args)
|
|
844
878
|
return self._async_engine
|
|
845
879
|
|
chalk/streams/__init__.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, TypeVar, Union
|
|
3
|
-
|
|
4
|
-
from typing_extensions import ParamSpec
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, ParamSpec, TypeVar, Union
|
|
5
3
|
|
|
6
4
|
from chalk._lsp.error_builder import get_resolver_error_builder
|
|
7
5
|
from chalk.features.tag import Environments
|
chalk/streams/_kafka_source.py
CHANGED
|
@@ -27,6 +27,7 @@ _KAFKA_SASL_MECHANISM_NAME = "KAFKA_SASL_MECHANISM"
|
|
|
27
27
|
_KAFKA_SASL_USERNAME_NAME = "KAFKA_SASL_USERNAME"
|
|
28
28
|
_KAFKA_SASL_PASSWORD_NAME = "KAFKA_SASL_PASSWORD"
|
|
29
29
|
_KAFKA_ADDITIONAL_KAFKA_ARGS_NAME = "KAFKA_ADDITIONAL_KAFKA_ARGS"
|
|
30
|
+
_KAFKA_DEAD_LETTER_QUEUE_TOPIC = "KAFKA_DEAD_LETTER_QUEUE_TOPIC"
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True):
|
|
@@ -165,7 +166,10 @@ class KafkaSource(StreamSource, SinkIntegrationProtocol, BaseModel, frozen=True)
|
|
|
165
166
|
),
|
|
166
167
|
name=name,
|
|
167
168
|
late_arrival_deadline=late_arrival_deadline,
|
|
168
|
-
dead_letter_queue_topic=dead_letter_queue_topic
|
|
169
|
+
dead_letter_queue_topic=dead_letter_queue_topic
|
|
170
|
+
or load_integration_variable(
|
|
171
|
+
name=_KAFKA_DEAD_LETTER_QUEUE_TOPIC, integration_name=name, override=integration_variable_override
|
|
172
|
+
),
|
|
169
173
|
ssl_ca_file=ssl_ca_file
|
|
170
174
|
or load_integration_variable(
|
|
171
175
|
name=_KAFKA_SSL_CA_FILE_NAME, integration_name=name, override=integration_variable_override
|