chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/sql/__init__.py
CHANGED
|
@@ -10,6 +10,7 @@ from chalk.sql._internal.integrations.clickhouse import ClickhouseSourceImpl
|
|
|
10
10
|
from chalk.sql._internal.integrations.cloudsql import CloudSQLSourceImpl
|
|
11
11
|
from chalk.sql._internal.integrations.databricks import DatabricksSourceImpl
|
|
12
12
|
from chalk.sql._internal.integrations.dynamodb import DynamoDBSourceImpl
|
|
13
|
+
from chalk.sql._internal.integrations.mssql import MSSQLSourceImpl
|
|
13
14
|
from chalk.sql._internal.integrations.mysql import MySQLSourceImpl
|
|
14
15
|
from chalk.sql._internal.integrations.postgres import PostgreSQLSourceImpl
|
|
15
16
|
from chalk.sql._internal.integrations.redshift import RedshiftSourceImpl
|
|
@@ -464,6 +465,196 @@ def MySQLSource(
|
|
|
464
465
|
)
|
|
465
466
|
|
|
466
467
|
|
|
468
|
+
@overload
|
|
469
|
+
def MSSQLSource() -> SQLSourceWithTableIngestProtocol:
|
|
470
|
+
"""If you have only one MSSQL connection that you'd like
|
|
471
|
+
to add to Chalk, you do not need to specify any arguments
|
|
472
|
+
to construct the source in your code.
|
|
473
|
+
|
|
474
|
+
Returns
|
|
475
|
+
-------
|
|
476
|
+
SQLSourceWithTableIngestProtocol
|
|
477
|
+
The SQL source for use in Chalk resolvers.
|
|
478
|
+
|
|
479
|
+
Examples
|
|
480
|
+
--------
|
|
481
|
+
>>> mssql = MSSQLSource()
|
|
482
|
+
"""
|
|
483
|
+
...
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@overload
|
|
487
|
+
def MSSQLSource(
|
|
488
|
+
*,
|
|
489
|
+
name: str,
|
|
490
|
+
engine_args: Optional[Dict[str, Any]] = ...,
|
|
491
|
+
async_engine_args: Optional[Dict[str, Any]] = ...,
|
|
492
|
+
) -> SQLSourceWithTableIngestProtocol:
|
|
493
|
+
"""If you have only one MSSQL integration, there's no need to provide
|
|
494
|
+
a distinguishing name.
|
|
495
|
+
|
|
496
|
+
But what happens when you have two data sources of the same kind?
|
|
497
|
+
When you create a new data source from your dashboard,
|
|
498
|
+
you have an option to provide a name for the integration.
|
|
499
|
+
You can then reference this name in the code directly.
|
|
500
|
+
|
|
501
|
+
Parameters
|
|
502
|
+
----------
|
|
503
|
+
name
|
|
504
|
+
Name of the integration, as configured in your dashboard.
|
|
505
|
+
engine_args
|
|
506
|
+
Additional arguments to use when constructing the SQLAlchemy engine. These arguments will be
|
|
507
|
+
merged with any default arguments from the named integration.
|
|
508
|
+
async_engine_args
|
|
509
|
+
Additional arguments to use when constructing an async SQLAlchemy engine.
|
|
510
|
+
|
|
511
|
+
Returns
|
|
512
|
+
-------
|
|
513
|
+
SQLSourceWithTableIngestProtocol
|
|
514
|
+
The SQL source for use in Chalk resolvers.
|
|
515
|
+
|
|
516
|
+
Examples
|
|
517
|
+
--------
|
|
518
|
+
>>> source = MSSQLSource(name="RISK")
|
|
519
|
+
"""
|
|
520
|
+
...
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
@overload
|
|
524
|
+
def MSSQLSource(
|
|
525
|
+
*,
|
|
526
|
+
name: str | None = ...,
|
|
527
|
+
host: str,
|
|
528
|
+
port: Union[int, str] = ...,
|
|
529
|
+
db: str = ...,
|
|
530
|
+
user: str = ...,
|
|
531
|
+
password: str = ...,
|
|
532
|
+
client_id: str = ...,
|
|
533
|
+
client_secret: str = ...,
|
|
534
|
+
tenant_id: str = ...,
|
|
535
|
+
engine_args: Optional[Dict[str, Any]] = ...,
|
|
536
|
+
async_engine_args: Optional[Dict[str, Any]] = ...,
|
|
537
|
+
) -> SQLSourceWithTableIngestProtocol:
|
|
538
|
+
"""
|
|
539
|
+
You can also configure the integration directly using environment
|
|
540
|
+
variables on your local machine or from those added through the
|
|
541
|
+
generic environment variable support (https://docs.chalk.ai/docs/env-vars).
|
|
542
|
+
|
|
543
|
+
Authentication Methods:
|
|
544
|
+
- SQL Authentication: Provide `user` and `password`
|
|
545
|
+
- Azure AD Managed Identity: Leave `user`, `password`, `client_id`, `client_secret`, and `tenant_id` empty
|
|
546
|
+
- Azure AD Service Principal: Provide `client_id`, `client_secret`, and `tenant_id`
|
|
547
|
+
|
|
548
|
+
Parameters
|
|
549
|
+
----------
|
|
550
|
+
name
|
|
551
|
+
Name of the integration. Not required unless if this SQL Source is used within SQL File Resolvers.
|
|
552
|
+
host
|
|
553
|
+
Name of host to connect to.
|
|
554
|
+
port
|
|
555
|
+
The port number to connect to at the server host.
|
|
556
|
+
db
|
|
557
|
+
The database name.
|
|
558
|
+
user
|
|
559
|
+
MSSQL username to connect as (for SQL authentication).
|
|
560
|
+
password
|
|
561
|
+
The password to be used for SQL authentication.
|
|
562
|
+
client_id
|
|
563
|
+
Azure AD Client ID (for Service Principal authentication).
|
|
564
|
+
client_secret
|
|
565
|
+
Azure AD Client Secret (for Service Principal authentication).
|
|
566
|
+
tenant_id
|
|
567
|
+
Azure AD Tenant ID (for Service Principal authentication).
|
|
568
|
+
engine_args
|
|
569
|
+
Additional arguments to use when constructing the SQLAlchemy engine.
|
|
570
|
+
async_engine_args
|
|
571
|
+
Additional arguments to use when constructing an async SQLAlchemy engine.
|
|
572
|
+
|
|
573
|
+
Returns
|
|
574
|
+
-------
|
|
575
|
+
SQLSourceWithTableIngestProtocol
|
|
576
|
+
The SQL source for use in Chalk resolvers.
|
|
577
|
+
|
|
578
|
+
Examples
|
|
579
|
+
--------
|
|
580
|
+
SQL Authentication:
|
|
581
|
+
>>> import os
|
|
582
|
+
>>> mssql = MSSQLSource(
|
|
583
|
+
... host=os.getenv("MSSQL_HOST"),
|
|
584
|
+
... port=os.getenv("MSSQL_TCP_PORT"),
|
|
585
|
+
... db=os.getenv("MSSQL_DATABASE"),
|
|
586
|
+
... user=os.getenv("MSSQL_USER"),
|
|
587
|
+
... password=os.getenv("MSSQL_PWD"),
|
|
588
|
+
... )
|
|
589
|
+
|
|
590
|
+
Managed Identity (running in Azure):
|
|
591
|
+
>>> mssql = MSSQLSource(
|
|
592
|
+
... host=os.getenv("MSSQL_HOST"),
|
|
593
|
+
... port=os.getenv("MSSQL_TCP_PORT"),
|
|
594
|
+
... db=os.getenv("MSSQL_DATABASE"),
|
|
595
|
+
... )
|
|
596
|
+
|
|
597
|
+
Service Principal:
|
|
598
|
+
>>> mssql = MSSQLSource(
|
|
599
|
+
... host=os.getenv("MSSQL_HOST"),
|
|
600
|
+
... port=os.getenv("MSSQL_TCP_PORT"),
|
|
601
|
+
... db=os.getenv("MSSQL_DATABASE"),
|
|
602
|
+
... client_id=os.getenv("MSSQL_CLIENT_ID"),
|
|
603
|
+
... client_secret=os.getenv("MSSQL_CLIENT_SECRET"),
|
|
604
|
+
... tenant_id=os.getenv("MSSQL_TENANT_ID"),
|
|
605
|
+
... )
|
|
606
|
+
|
|
607
|
+
>>> from chalk.features import online
|
|
608
|
+
>>> @online
|
|
609
|
+
... def resolver_fn() -> User.name:
|
|
610
|
+
... return mssql.query_string("select name from users where id = 4").one()
|
|
611
|
+
"""
|
|
612
|
+
...
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def MSSQLSource(
|
|
616
|
+
*,
|
|
617
|
+
host: Optional[str] = None,
|
|
618
|
+
port: Optional[Union[int, str]] = None,
|
|
619
|
+
db: Optional[str] = None,
|
|
620
|
+
user: Optional[str] = None,
|
|
621
|
+
password: Optional[str] = None,
|
|
622
|
+
client_id: Optional[str] = None,
|
|
623
|
+
client_secret: Optional[str] = None,
|
|
624
|
+
tenant_id: Optional[str] = None,
|
|
625
|
+
name: Optional[str] = None,
|
|
626
|
+
engine_args: Optional[Dict[str, Any]] = None,
|
|
627
|
+
async_engine_args: Optional[Dict[str, Any]] = None,
|
|
628
|
+
) -> SQLSourceWithTableIngestProtocol:
|
|
629
|
+
"""Create a MSSQL data source. SQL-based data sources
|
|
630
|
+
created without arguments assume a configuration in your
|
|
631
|
+
Chalk Dashboard. Those created with the `name=` keyword
|
|
632
|
+
argument will use the configuration for the integration
|
|
633
|
+
with the given name. And finally, those created with
|
|
634
|
+
explicit arguments will use those arguments to configure
|
|
635
|
+
the data source. See the overloaded signatures for more
|
|
636
|
+
details.
|
|
637
|
+
|
|
638
|
+
Supports three authentication methods:
|
|
639
|
+
- SQL Authentication: user + password
|
|
640
|
+
- Azure AD Managed Identity: no credentials (automatic in Azure)
|
|
641
|
+
- Azure AD Service Principal: client_id + client_secret + tenant_id
|
|
642
|
+
"""
|
|
643
|
+
return MSSQLSourceImpl(
|
|
644
|
+
host=host,
|
|
645
|
+
port=port,
|
|
646
|
+
db=db,
|
|
647
|
+
user=user,
|
|
648
|
+
password=password,
|
|
649
|
+
client_id=client_id,
|
|
650
|
+
client_secret=client_secret,
|
|
651
|
+
tenant_id=tenant_id,
|
|
652
|
+
name=name,
|
|
653
|
+
engine_args=engine_args,
|
|
654
|
+
async_engine_args=async_engine_args,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
|
|
467
658
|
def SQLiteInMemorySource(
|
|
468
659
|
name: Optional[str] = None,
|
|
469
660
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
@@ -848,6 +1039,8 @@ def BigQuerySource(
|
|
|
848
1039
|
credentials_base64: Optional[str] = ...,
|
|
849
1040
|
credentials_path: Optional[str] = ...,
|
|
850
1041
|
engine_args: Optional[Dict[str, Any]] = ...,
|
|
1042
|
+
temp_project: Optional[str] = ...,
|
|
1043
|
+
temp_dataset: Optional[str] = ...,
|
|
851
1044
|
) -> BaseSQLSourceProtocol:
|
|
852
1045
|
"""You can also configure the integration directly using environment
|
|
853
1046
|
variables on your local machine or from those added through the
|
|
@@ -869,6 +1062,10 @@ def BigQuerySource(
|
|
|
869
1062
|
The path to the credentials file to use to connect.
|
|
870
1063
|
engine_args
|
|
871
1064
|
Additional arguments to use when constructing the SQLAlchemy engine.
|
|
1065
|
+
temp_project
|
|
1066
|
+
The BigQuery project to use for temporary tables.
|
|
1067
|
+
temp_dataset
|
|
1068
|
+
The BigQuery dataset to use for temporary tables.
|
|
872
1069
|
|
|
873
1070
|
Returns
|
|
874
1071
|
-------
|
|
@@ -897,6 +1094,8 @@ def BigQuerySource(
|
|
|
897
1094
|
credentials_base64: Optional[str] = None,
|
|
898
1095
|
credentials_path: Optional[str] = None,
|
|
899
1096
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
1097
|
+
temp_project: Optional[str] = None,
|
|
1098
|
+
temp_dataset: Optional[str] = None,
|
|
900
1099
|
) -> BaseSQLSourceProtocol:
|
|
901
1100
|
"""Create a BigQuery data source. SQL-based data sources
|
|
902
1101
|
created without arguments assume a configuration in your
|
|
@@ -914,6 +1113,8 @@ def BigQuerySource(
|
|
|
914
1113
|
location=location,
|
|
915
1114
|
credentials_base64=credentials_base64,
|
|
916
1115
|
credentials_path=credentials_path,
|
|
1116
|
+
temp_project=temp_project,
|
|
1117
|
+
temp_dataset=temp_dataset,
|
|
917
1118
|
engine_args=engine_args,
|
|
918
1119
|
)
|
|
919
1120
|
|
|
@@ -1236,6 +1437,8 @@ def DatabricksSource(
|
|
|
1236
1437
|
access_token: str = ...,
|
|
1237
1438
|
db: str = ...,
|
|
1238
1439
|
port: str = ...,
|
|
1440
|
+
client_id: str = ...,
|
|
1441
|
+
client_secret: str = ...,
|
|
1239
1442
|
engine_args: Optional[Dict[str, Any]] = ...,
|
|
1240
1443
|
) -> BaseSQLSourceProtocol:
|
|
1241
1444
|
"""You can also configure the integration directly using environment
|
|
@@ -1256,6 +1459,10 @@ def DatabricksSource(
|
|
|
1256
1459
|
Database to use.
|
|
1257
1460
|
port
|
|
1258
1461
|
Port number to use.
|
|
1462
|
+
client_id
|
|
1463
|
+
OAuth service principal client ID (alternative to access_token).
|
|
1464
|
+
client_secret
|
|
1465
|
+
OAuth service principal client secret (alternative to access_token).
|
|
1259
1466
|
engine_args
|
|
1260
1467
|
Additional arguments to use when constructing the SQLAlchemy engine.
|
|
1261
1468
|
|
|
@@ -1274,6 +1481,14 @@ def DatabricksSource(
|
|
|
1274
1481
|
... db=os.getenv("DATABRICKS_DATABASE"),
|
|
1275
1482
|
... port=os.getenv("DATABRICKS_PORT"),
|
|
1276
1483
|
... )
|
|
1484
|
+
>>> databricks_with_oauth = DatabricksSource(
|
|
1485
|
+
... host=os.getenv("DATABRICKS_HOST"),
|
|
1486
|
+
... http_path=os.getenv("DATABRICKS_HTTP_PATH"),
|
|
1487
|
+
... client_id=os.getenv("DATABRICKS_CLIENT_ID"),
|
|
1488
|
+
... client_secret=os.getenv("DATABRICKS_CLIENT_SECRET"),
|
|
1489
|
+
... db=os.getenv("DATABRICKS_DATABASE"),
|
|
1490
|
+
... port=os.getenv("DATABRICKS_PORT"),
|
|
1491
|
+
... )
|
|
1277
1492
|
"""
|
|
1278
1493
|
...
|
|
1279
1494
|
|
|
@@ -1286,6 +1501,8 @@ def DatabricksSource(
|
|
|
1286
1501
|
access_token: Optional[str] = None,
|
|
1287
1502
|
db: Optional[str] = None,
|
|
1288
1503
|
port: Optional[Union[str, int]] = None,
|
|
1504
|
+
client_id: Optional[str] = None,
|
|
1505
|
+
client_secret: Optional[str] = None,
|
|
1289
1506
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
1290
1507
|
) -> BaseSQLSourceProtocol:
|
|
1291
1508
|
"""Create a Databricks data source. SQL-based data sources
|
|
@@ -1304,6 +1521,8 @@ def DatabricksSource(
|
|
|
1304
1521
|
db=db,
|
|
1305
1522
|
port=port,
|
|
1306
1523
|
name=name,
|
|
1524
|
+
client_id=client_id,
|
|
1525
|
+
client_secret=client_secret,
|
|
1307
1526
|
engine_args=engine_args,
|
|
1308
1527
|
)
|
|
1309
1528
|
|
|
@@ -1747,6 +1966,8 @@ __all__ = (
|
|
|
1747
1966
|
"DynamoDBSource",
|
|
1748
1967
|
"FinalizedChalkQuery",
|
|
1749
1968
|
"IncrementalSettings",
|
|
1969
|
+
"MSSQLSource",
|
|
1970
|
+
"MSSQLSourceImpl",
|
|
1750
1971
|
"MySQLSource",
|
|
1751
1972
|
"PostgreSQLSource",
|
|
1752
1973
|
"RedshiftSource",
|
|
@@ -24,6 +24,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
|
|
|
24
24
|
from chalk.utils.df_utils import pa_array_to_pl_series
|
|
25
25
|
from chalk.utils.log_with_context import get_logger
|
|
26
26
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
27
|
+
from chalk.utils.pl_helpers import str_json_decode_compat
|
|
27
28
|
from chalk.utils.threading import DEFAULT_IO_EXECUTOR, MultiSemaphore
|
|
28
29
|
from chalk.utils.tracing import safe_incr, safe_set_gauge, safe_trace
|
|
29
30
|
|
|
@@ -485,7 +486,11 @@ class AthenaSourceImpl(BaseSQLSource):
|
|
|
485
486
|
if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
|
|
486
487
|
if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
|
|
487
488
|
series = pa_array_to_pl_series(tbl[col_name])
|
|
488
|
-
column =
|
|
489
|
+
column = (
|
|
490
|
+
str_json_decode_compat(series, feature.converter.polars_dtype)
|
|
491
|
+
.to_arrow()
|
|
492
|
+
.cast(expected_type)
|
|
493
|
+
)
|
|
489
494
|
if actual_type != expected_type:
|
|
490
495
|
column = column.cast(options=pc.CastOptions(target_type=expected_type, allow_time_truncate=True))
|
|
491
496
|
if isinstance(column, pa.ChunkedArray):
|
|
@@ -225,6 +225,8 @@ _BQ_DATASET_NAME = "BQ_DATASET"
|
|
|
225
225
|
_BQ_PROJECT_NAME = "BQ_PROJECT"
|
|
226
226
|
_BQ_CREDENTIALS_BASE64_NAME = "BQ_CREDENTIALS_BASE64"
|
|
227
227
|
_BQ_CREDENTIALS_PATH_NAME = "BQ_CREDENTIALS_PATH"
|
|
228
|
+
_BQ_TEMP_PROJECT_NAME = "BQ_TEMP_PROJECT"
|
|
229
|
+
_BQ_TEMP_DATASET_NAME = "BQ_TEMP_DATASET"
|
|
228
230
|
|
|
229
231
|
|
|
230
232
|
class BigQuerySourceImpl(BaseSQLSource):
|
|
@@ -239,6 +241,8 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
239
241
|
location: Optional[str] = None,
|
|
240
242
|
credentials_base64: Optional[str] = None,
|
|
241
243
|
credentials_path: Optional[str] = None,
|
|
244
|
+
temp_project: Optional[str] = None,
|
|
245
|
+
temp_dataset: Optional[str] = None,
|
|
242
246
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
243
247
|
integration_variable_override: Optional[Mapping[str, str]] = None,
|
|
244
248
|
):
|
|
@@ -267,6 +271,12 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
267
271
|
self.credentials_path = credentials_path or load_integration_variable(
|
|
268
272
|
integration_name=name, name=_BQ_CREDENTIALS_PATH_NAME, override=integration_variable_override
|
|
269
273
|
)
|
|
274
|
+
self.temp_project = temp_project or load_integration_variable(
|
|
275
|
+
integration_name=name, name=_BQ_TEMP_PROJECT_NAME, override=integration_variable_override
|
|
276
|
+
)
|
|
277
|
+
self.temp_dataset = temp_dataset or load_integration_variable(
|
|
278
|
+
integration_name=name, name=_BQ_TEMP_DATASET_NAME, override=integration_variable_override
|
|
279
|
+
)
|
|
270
280
|
BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
|
|
271
281
|
|
|
272
282
|
@functools.cached_property
|
|
@@ -397,6 +407,10 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
397
407
|
except ModuleNotFoundError:
|
|
398
408
|
raise missing_dependency_exception("chalkpy[bigquery]")
|
|
399
409
|
|
|
410
|
+
# Use temp_project/temp_dataset if specified, otherwise fall back to main project/dataset
|
|
411
|
+
temp_project = self.temp_project or self.project
|
|
412
|
+
temp_dataset = self.temp_dataset or self.dataset
|
|
413
|
+
|
|
400
414
|
create_table_sql = create_temp_table.compile(dialect=self.get_sqlalchemy_dialect()).string
|
|
401
415
|
create_table_sql = create_table_sql.replace("TEMPORARY", "", 1)
|
|
402
416
|
chalk_logger.info(f"Creating temporary table {temp_table.name} in BigQuery {session_id=}: {create_table_sql}")
|
|
@@ -412,9 +426,10 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
412
426
|
job_config=job_config,
|
|
413
427
|
).result()
|
|
414
428
|
try:
|
|
429
|
+
temp_table_fqn = f"{temp_project}.{temp_dataset}.{temp_table.name}"
|
|
415
430
|
connection.load_table_from_dataframe(
|
|
416
431
|
temp_value.to_pandas(),
|
|
417
|
-
|
|
432
|
+
temp_table_fqn,
|
|
418
433
|
job_config=google.cloud.bigquery.LoadJobConfig(connection_properties=connection_properties),
|
|
419
434
|
).result()
|
|
420
435
|
yield
|
|
@@ -433,7 +448,10 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
433
448
|
def _bigquery_output_table(self, client: google.cloud.bigquery.Client) -> Iterator[str]:
|
|
434
449
|
destination_table_name = f"temp_output_{str(uuid4()).replace('-', '_')}"
|
|
435
450
|
|
|
436
|
-
|
|
451
|
+
# Use temp_project/temp_dataset if specified, otherwise fall back to main project/dataset
|
|
452
|
+
temp_project = self.temp_project or self.project
|
|
453
|
+
temp_dataset = self.temp_dataset or self.dataset
|
|
454
|
+
destination = f"{temp_project}.{temp_dataset}.{destination_table_name}"
|
|
437
455
|
|
|
438
456
|
try:
|
|
439
457
|
yield destination
|
|
@@ -639,6 +657,8 @@ class BigQuerySourceImpl(BaseSQLSource):
|
|
|
639
657
|
create_integration_variable(_BQ_PROJECT_NAME, self.name, self.project),
|
|
640
658
|
create_integration_variable(_BQ_CREDENTIALS_BASE64_NAME, self.name, self.credentials_base64),
|
|
641
659
|
create_integration_variable(_BQ_CREDENTIALS_PATH_NAME, self.name, self.credentials_path),
|
|
660
|
+
create_integration_variable(_BQ_TEMP_PROJECT_NAME, self.name, self.temp_project),
|
|
661
|
+
create_integration_variable(_BQ_TEMP_DATASET_NAME, self.name, self.temp_dataset),
|
|
642
662
|
]
|
|
643
663
|
if v is not None
|
|
644
664
|
}
|
|
@@ -17,6 +17,7 @@ from chalk.sql.finalized_query import FinalizedChalkQuery
|
|
|
17
17
|
from chalk.utils.df_utils import pa_array_to_pl_series
|
|
18
18
|
from chalk.utils.log_with_context import get_logger
|
|
19
19
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
20
|
+
from chalk.utils.pl_helpers import str_json_decode_compat
|
|
20
21
|
from chalk.utils.threading import DEFAULT_IO_EXECUTOR
|
|
21
22
|
from chalk.utils.tracing import safe_incr, safe_trace
|
|
22
23
|
|
|
@@ -32,6 +33,8 @@ _DATABRICKS_HTTP_PATH_NAME = "DATABRICKS_HTTP_PATH"
|
|
|
32
33
|
_DATABRICKS_TOKEN_NAME = "DATABRICKS_TOKEN"
|
|
33
34
|
_DATABRICKS_DATABASE_NAME = "DATABRICKS_DATABASE"
|
|
34
35
|
_DATABRICKS_PORT_NAME = "DATABRICKS_PORT"
|
|
36
|
+
_DATABRICKS_CLIENT_ID_NAME = "DATABRICKS_CLIENT_ID"
|
|
37
|
+
_DATABRICKS_CLIENT_SECRET_NAME = "DATABRICKS_CLIENT_SECRET"
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
class DatabricksSourceImpl(BaseSQLSource):
|
|
@@ -45,6 +48,8 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
45
48
|
db: Optional[str] = None,
|
|
46
49
|
port: Optional[Union[int, str]] = None,
|
|
47
50
|
name: Optional[str] = None,
|
|
51
|
+
client_id: Optional[str] = None,
|
|
52
|
+
client_secret: Optional[str] = None,
|
|
48
53
|
engine_args: Optional[Dict[str, Any]] = None,
|
|
49
54
|
executor: Optional[concurrent.futures.ThreadPoolExecutor] = None,
|
|
50
55
|
integration_variable_override: Optional[Mapping[str, str]] = None,
|
|
@@ -73,21 +78,53 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
73
78
|
name=_DATABRICKS_PORT_NAME, integration_name=name, parser=int, override=integration_variable_override
|
|
74
79
|
)
|
|
75
80
|
)
|
|
81
|
+
self.client_id = client_id or load_integration_variable(
|
|
82
|
+
name=_DATABRICKS_CLIENT_ID_NAME, integration_name=name, override=integration_variable_override
|
|
83
|
+
)
|
|
84
|
+
self.client_secret = client_secret or load_integration_variable(
|
|
85
|
+
name=_DATABRICKS_CLIENT_SECRET_NAME, integration_name=name, override=integration_variable_override
|
|
86
|
+
)
|
|
76
87
|
self.executor = executor or DEFAULT_IO_EXECUTOR
|
|
77
88
|
|
|
89
|
+
has_token = self.access_token is not None
|
|
90
|
+
has_oauth = self.client_id is not None and self.client_secret is not None
|
|
91
|
+
|
|
92
|
+
if has_token and has_oauth:
|
|
93
|
+
chalk_logger.warning(
|
|
94
|
+
"Both OAuth credentials and a personal access token were provided. Using OAuth authentication."
|
|
95
|
+
)
|
|
96
|
+
self.access_token = None
|
|
97
|
+
|
|
98
|
+
self._credentials_provider = None
|
|
99
|
+
if has_oauth:
|
|
100
|
+
try:
|
|
101
|
+
from databricks.sdk.core import Config, oauth_service_principal
|
|
102
|
+
except ImportError:
|
|
103
|
+
raise missing_dependency_exception("chalkpy[databricks]")
|
|
104
|
+
|
|
105
|
+
def credentials_provider():
|
|
106
|
+
config = Config(host=self.host, client_id=self.client_id, client_secret=self.client_secret)
|
|
107
|
+
return oauth_service_principal(config)
|
|
108
|
+
|
|
109
|
+
self._credentials_provider = credentials_provider
|
|
110
|
+
|
|
78
111
|
if engine_args is None:
|
|
79
112
|
engine_args = {}
|
|
113
|
+
|
|
114
|
+
connect_args: dict[str, Any] = {
|
|
115
|
+
"keepalives": 1,
|
|
116
|
+
"keepalives_idle": 30,
|
|
117
|
+
"keepalives_interval": 10,
|
|
118
|
+
"keepalives_count": 5,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if self._credentials_provider:
|
|
122
|
+
connect_args["credentials_provider"] = self._credentials_provider
|
|
123
|
+
|
|
80
124
|
engine_args.setdefault("pool_size", 20)
|
|
81
125
|
engine_args.setdefault("max_overflow", 60)
|
|
82
|
-
engine_args.setdefault(
|
|
83
|
-
|
|
84
|
-
{
|
|
85
|
-
"keepalives": 1,
|
|
86
|
-
"keepalives_idle": 30,
|
|
87
|
-
"keepalives_interval": 10,
|
|
88
|
-
"keepalives_count": 5,
|
|
89
|
-
},
|
|
90
|
-
)
|
|
126
|
+
engine_args.setdefault("connect_args", connect_args)
|
|
127
|
+
|
|
91
128
|
BaseSQLSource.__init__(self, name=name, engine_args=engine_args, async_engine_args={})
|
|
92
129
|
|
|
93
130
|
def supports_inefficient_fallback(self) -> bool:
|
|
@@ -96,6 +133,12 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
96
133
|
def get_sqlglot_dialect(self) -> str | None:
|
|
97
134
|
return "databricks"
|
|
98
135
|
|
|
136
|
+
def _get_client_auth(self) -> Dict[str, str | Callable | None]:
|
|
137
|
+
if self._credentials_provider:
|
|
138
|
+
return {"credentials_provider": self._credentials_provider}
|
|
139
|
+
else:
|
|
140
|
+
return {"access_token": self.access_token}
|
|
141
|
+
|
|
99
142
|
@contextlib.contextmanager
|
|
100
143
|
def _create_temp_table(
|
|
101
144
|
self,
|
|
@@ -167,10 +210,7 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
167
210
|
|
|
168
211
|
# Connect using databricks-sql-python for efficient Arrow fetching
|
|
169
212
|
with sql.connect(
|
|
170
|
-
server_hostname=self.host,
|
|
171
|
-
http_path=self.http_path,
|
|
172
|
-
access_token=self.access_token,
|
|
173
|
-
catalog=self.db,
|
|
213
|
+
server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
|
|
174
214
|
) as databricks_conn:
|
|
175
215
|
chalk_logger.info("Established connection with Databricks using databricks-sql-python")
|
|
176
216
|
|
|
@@ -226,7 +266,11 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
226
266
|
if pa.types.is_list(expected_type) or pa.types.is_large_list(expected_type):
|
|
227
267
|
if pa.types.is_string(actual_type) or pa.types.is_large_string(actual_type):
|
|
228
268
|
series = pa_array_to_pl_series(tbl[col_name])
|
|
229
|
-
column =
|
|
269
|
+
column = (
|
|
270
|
+
str_json_decode_compat(series, feature.converter.polars_dtype)
|
|
271
|
+
.to_arrow()
|
|
272
|
+
.cast(expected_type)
|
|
273
|
+
)
|
|
230
274
|
|
|
231
275
|
# Cast to expected type if needed
|
|
232
276
|
if actual_type != expected_type:
|
|
@@ -268,10 +312,7 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
268
312
|
|
|
269
313
|
# Connect using databricks-sql-python for efficient Arrow fetching
|
|
270
314
|
with sql.connect(
|
|
271
|
-
server_hostname=self.host,
|
|
272
|
-
http_path=self.http_path,
|
|
273
|
-
access_token=self.access_token,
|
|
274
|
-
catalog=self.db,
|
|
315
|
+
server_hostname=self.host, http_path=self.http_path, catalog=self.db, **self._get_client_auth()
|
|
275
316
|
) as databricks_cnx:
|
|
276
317
|
with databricks_cnx.cursor() as cursor:
|
|
277
318
|
formatted_op, positional_params, named_params = self.compile_query(finalized_query)
|
|
@@ -348,6 +389,8 @@ class DatabricksSourceImpl(BaseSQLSource):
|
|
|
348
389
|
create_integration_variable(_DATABRICKS_TOKEN_NAME, self.name, self.access_token),
|
|
349
390
|
create_integration_variable(_DATABRICKS_DATABASE_NAME, self.name, self.db),
|
|
350
391
|
create_integration_variable(_DATABRICKS_PORT_NAME, self.name, self.port),
|
|
392
|
+
create_integration_variable(_DATABRICKS_CLIENT_ID_NAME, self.name, self.client_id),
|
|
393
|
+
create_integration_variable(_DATABRICKS_CLIENT_SECRET_NAME, self.name, self.client_secret),
|
|
351
394
|
]
|
|
352
395
|
if v is not None
|
|
353
396
|
}
|