dagster-snowflake 0.24.2__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_snowflake/__init__.py +8 -5
- dagster_snowflake/components/__init__.py +5 -0
- dagster_snowflake/components/sql_component/__init__.py +0 -0
- dagster_snowflake/components/sql_component/component.py +61 -0
- dagster_snowflake/resources.py +124 -19
- dagster_snowflake/snowflake_io_manager.py +32 -21
- dagster_snowflake/version.py +1 -1
- dagster_snowflake-0.28.1.dist-info/METADATA +36 -0
- dagster_snowflake-0.28.1.dist-info/RECORD +16 -0
- {dagster_snowflake-0.24.2.dist-info → dagster_snowflake-0.28.1.dist-info}/WHEEL +1 -1
- dagster_snowflake-0.28.1.dist-info/entry_points.txt +2 -0
- {dagster_snowflake-0.24.2.dist-info → dagster_snowflake-0.28.1.dist-info/licenses}/LICENSE +1 -1
- dagster_snowflake-0.24.2.dist-info/METADATA +0 -26
- dagster_snowflake-0.24.2.dist-info/RECORD +0 -12
- {dagster_snowflake-0.24.2.dist-info → dagster_snowflake-0.28.1.dist-info}/top_level.txt +0 -0
dagster_snowflake/__init__.py
CHANGED
|
@@ -1,16 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
|
|
3
|
+
from dagster_snowflake.components import (
|
|
4
|
+
SnowflakeConnectionComponent as SnowflakeConnectionComponent,
|
|
5
|
+
)
|
|
6
|
+
from dagster_snowflake.ops import snowflake_op_for_query as snowflake_op_for_query
|
|
7
|
+
from dagster_snowflake.resources import (
|
|
5
8
|
SnowflakeConnection as SnowflakeConnection,
|
|
6
9
|
SnowflakeResource as SnowflakeResource,
|
|
7
10
|
fetch_last_updated_timestamps as fetch_last_updated_timestamps,
|
|
8
11
|
snowflake_resource as snowflake_resource,
|
|
9
12
|
)
|
|
10
|
-
from .snowflake_io_manager import (
|
|
13
|
+
from dagster_snowflake.snowflake_io_manager import (
|
|
11
14
|
SnowflakeIOManager as SnowflakeIOManager,
|
|
12
15
|
build_snowflake_io_manager as build_snowflake_io_manager,
|
|
13
16
|
)
|
|
14
|
-
from .version import __version__
|
|
17
|
+
from dagster_snowflake.version import __version__
|
|
15
18
|
|
|
16
19
|
DagsterLibraryRegistry.register("dagster-snowflake", __version__)
|
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
from typing import Any, cast
|
|
3
|
+
|
|
4
|
+
import dagster as dg
|
|
5
|
+
from dagster._annotations import preview, public
|
|
6
|
+
from dagster._core.definitions.definitions_class import Definitions
|
|
7
|
+
from dagster.components.core.context import ComponentLoadContext
|
|
8
|
+
from dagster.components.lib.sql_component.sql_client import SQLClient
|
|
9
|
+
from pydantic import BaseModel, create_model
|
|
10
|
+
|
|
11
|
+
from dagster_snowflake.resources import SnowflakeResource
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@public
|
|
15
|
+
@preview
|
|
16
|
+
class SnowflakeConnectionComponentBase(dg.Component, dg.Resolvable, dg.Model, SQLClient):
|
|
17
|
+
"""A component that represents a Snowflake connection. Use this component if you are
|
|
18
|
+
also using the TemplatedSqlComponent to execute SQL queries, and need to connect to Snowflake.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@cached_property
|
|
22
|
+
def _snowflake_resource(self) -> SnowflakeResource:
|
|
23
|
+
return SnowflakeResource(
|
|
24
|
+
**{
|
|
25
|
+
(field.alias or field_name): getattr(self, field_name)
|
|
26
|
+
for field_name, field in self.__class__.model_fields.items()
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def connect_and_execute(self, sql: str) -> None:
|
|
31
|
+
"""Connect to the SQL database and execute the SQL query."""
|
|
32
|
+
return self._snowflake_resource.connect_and_execute(sql)
|
|
33
|
+
|
|
34
|
+
def build_defs(self, context: ComponentLoadContext) -> Definitions:
|
|
35
|
+
return Definitions()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _copy_fields_to_model(
|
|
39
|
+
copy_from: type[BaseModel], copy_to: type[BaseModel], new_model_cls_name: str
|
|
40
|
+
) -> None:
|
|
41
|
+
"""Given two models, creates a copy of the second model with the fields of the first model."""
|
|
42
|
+
field_definitions: dict[str, tuple[type, Any]] = {
|
|
43
|
+
field_name: (cast("type", field.annotation), field)
|
|
44
|
+
for field_name, field in copy_from.model_fields.items()
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return create_model(
|
|
48
|
+
new_model_cls_name,
|
|
49
|
+
__base__=copy_to,
|
|
50
|
+
__doc__=copy_to.__doc__,
|
|
51
|
+
**field_definitions, # type: ignore
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
SnowflakeConnectionComponent = public(preview)(
|
|
56
|
+
_copy_fields_to_model(
|
|
57
|
+
copy_from=SnowflakeResource,
|
|
58
|
+
copy_to=SnowflakeConnectionComponentBase,
|
|
59
|
+
new_model_cls_name="SnowflakeConnectionComponent",
|
|
60
|
+
)
|
|
61
|
+
)
|
dagster_snowflake/resources.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import sys
|
|
3
3
|
import warnings
|
|
4
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
4
5
|
from contextlib import closing, contextmanager
|
|
5
6
|
from datetime import datetime
|
|
6
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional, Union
|
|
7
8
|
|
|
8
9
|
import dagster._check as check
|
|
9
10
|
from cryptography.hazmat.backends import default_backend
|
|
@@ -17,11 +18,11 @@ from dagster import (
|
|
|
17
18
|
from dagster._annotations import public
|
|
18
19
|
from dagster._core.definitions.resource_definition import dagster_maintained_resource
|
|
19
20
|
from dagster._core.storage.event_log.sql_event_log import SqlDbConnection
|
|
20
|
-
from dagster._model.pydantic_compat_layer import compat_model_validator
|
|
21
21
|
from dagster._utils.cached_method import cached_method
|
|
22
|
-
from
|
|
22
|
+
from dagster.components.lib.sql_component.sql_client import SQLClient
|
|
23
|
+
from pydantic import Field, model_validator, validator
|
|
23
24
|
|
|
24
|
-
from .constants import (
|
|
25
|
+
from dagster_snowflake.constants import (
|
|
25
26
|
SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER,
|
|
26
27
|
SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_SQLALCHEMY,
|
|
27
28
|
)
|
|
@@ -40,7 +41,7 @@ except ImportError:
|
|
|
40
41
|
raise
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext):
|
|
44
|
+
class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext, SQLClient):
|
|
44
45
|
"""A resource for connecting to the Snowflake data warehouse.
|
|
45
46
|
|
|
46
47
|
If connector configuration is not set, SnowflakeResource.get_connection() will return a
|
|
@@ -132,8 +133,8 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
132
133
|
"Raw private key to use. See the `Snowflake documentation"
|
|
133
134
|
" <https://docs.snowflake.com/en/user-guide/key-pair-auth.html>`__ for details."
|
|
134
135
|
" Alternately, set private_key_path and private_key_password. To avoid issues with"
|
|
135
|
-
" newlines in the keys, you can base64 encode the key. You can retrieve
|
|
136
|
-
" encoded key with this shell command: ``cat rsa_key.p8 | base64``"
|
|
136
|
+
" newlines in the keys, you can optionally base64 encode the key. You can retrieve"
|
|
137
|
+
" the base64 encoded key with this shell command: ``cat rsa_key.p8 | base64``"
|
|
137
138
|
),
|
|
138
139
|
)
|
|
139
140
|
|
|
@@ -235,7 +236,7 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
235
236
|
"Indicate alternative database connection engine. Permissible option is "
|
|
236
237
|
"'sqlalchemy' otherwise defaults to use the Snowflake Connector for Python."
|
|
237
238
|
),
|
|
238
|
-
is_required=False,
|
|
239
|
+
is_required=False, # type: ignore
|
|
239
240
|
)
|
|
240
241
|
|
|
241
242
|
cache_column_metadata: Optional[str] = Field(
|
|
@@ -259,6 +260,15 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
259
260
|
default=None,
|
|
260
261
|
description="Optional parameter to specify the authentication mechanism to use.",
|
|
261
262
|
)
|
|
263
|
+
additional_snowflake_connection_args: Optional[dict[str, Any]] = Field(
|
|
264
|
+
default=None,
|
|
265
|
+
description=(
|
|
266
|
+
"Additional keyword arguments to pass to the snowflake.connector.connect function. For a full list of"
|
|
267
|
+
" available arguments, see the `Snowflake documentation"
|
|
268
|
+
" <https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-connect>`__."
|
|
269
|
+
" This config will be ignored if using the sqlalchemy connector."
|
|
270
|
+
),
|
|
271
|
+
)
|
|
262
272
|
|
|
263
273
|
@validator("paramstyle")
|
|
264
274
|
def validate_paramstyle(cls, v: Optional[str]) -> Optional[str]:
|
|
@@ -272,13 +282,13 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
272
282
|
|
|
273
283
|
@validator("connector")
|
|
274
284
|
def validate_connector(cls, v: Optional[str]) -> Optional[str]:
|
|
275
|
-
if v is not None and v
|
|
285
|
+
if v is not None and v not in ["sqlalchemy", "adbc"]:
|
|
276
286
|
raise ValueError(
|
|
277
|
-
"Snowflake Resource: 'connector' configuration value must be None or
|
|
287
|
+
"Snowflake Resource: 'connector' configuration value must be None, sqlalchemy or adbc."
|
|
278
288
|
)
|
|
279
289
|
return v
|
|
280
290
|
|
|
281
|
-
@
|
|
291
|
+
@model_validator(mode="before")
|
|
282
292
|
def validate_authentication(cls, values):
|
|
283
293
|
auths_set = 0
|
|
284
294
|
auths_set += 1 if values.get("password") is not None else 0
|
|
@@ -339,12 +349,15 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
339
349
|
conn_args["private_key"] = self._snowflake_private_key(self._resolved_config_dict)
|
|
340
350
|
|
|
341
351
|
conn_args["application"] = SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER
|
|
352
|
+
|
|
353
|
+
if self._resolved_config_dict.get("additional_snowflake_connection_args") is not None:
|
|
354
|
+
conn_args.update(self._resolved_config_dict["additional_snowflake_connection_args"])
|
|
342
355
|
return conn_args
|
|
343
356
|
|
|
344
357
|
@property
|
|
345
358
|
@cached_method
|
|
346
359
|
def _sqlalchemy_connection_args(self) -> Mapping[str, Any]:
|
|
347
|
-
conn_args:
|
|
360
|
+
conn_args: dict[str, Any] = {
|
|
348
361
|
k: self._resolved_config_dict.get(k)
|
|
349
362
|
for k in (
|
|
350
363
|
"account",
|
|
@@ -379,6 +392,81 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
379
392
|
|
|
380
393
|
return sqlalchemy_engine_args
|
|
381
394
|
|
|
395
|
+
@property
|
|
396
|
+
@cached_method
|
|
397
|
+
def _adbc_connection_args(self) -> Mapping[str, Any]:
|
|
398
|
+
config = self._resolved_config_dict
|
|
399
|
+
adbc_engine_args = {}
|
|
400
|
+
|
|
401
|
+
if config.get("account"):
|
|
402
|
+
adbc_engine_args["adbc.snowflake.sql.account"] = config["account"]
|
|
403
|
+
if config.get("user"):
|
|
404
|
+
adbc_engine_args["username"] = config["user"]
|
|
405
|
+
if config.get("password"):
|
|
406
|
+
adbc_engine_args["password"] = config["password"]
|
|
407
|
+
if config.get("database"):
|
|
408
|
+
adbc_engine_args["adbc.snowflake.sql.db"] = config["database"]
|
|
409
|
+
if config.get("schema"):
|
|
410
|
+
adbc_engine_args["adbc.snowflake.sql.schema"] = config["schema"]
|
|
411
|
+
if config.get("role"):
|
|
412
|
+
adbc_engine_args["adbc.snowflake.sql.role"] = config["role"]
|
|
413
|
+
if config.get("warehouse"):
|
|
414
|
+
adbc_engine_args["adbc.snowflake.sql.warehouse"] = config["warehouse"]
|
|
415
|
+
|
|
416
|
+
if config.get("authenticator"):
|
|
417
|
+
auth_mapping = {
|
|
418
|
+
"snowflake": "auth_snowflake",
|
|
419
|
+
"oauth": "auth_oauth",
|
|
420
|
+
"externalbrowser": "auth_ext_browser",
|
|
421
|
+
"okta": "auth_okta",
|
|
422
|
+
"jwt": "auth_jwt",
|
|
423
|
+
"snowflake_jwt": "auth_jwt",
|
|
424
|
+
}
|
|
425
|
+
auth_type = auth_mapping.get(config["authenticator"].lower(), config["authenticator"])
|
|
426
|
+
adbc_engine_args["adbc.snowflake.sql.auth_type"] = auth_type
|
|
427
|
+
|
|
428
|
+
if config.get("private_key") or config.get("private_key_path"):
|
|
429
|
+
# ADBC expects the raw private key value as bytes for jwt_private_key_pkcs8_value
|
|
430
|
+
adbc_engine_args["adbc.snowflake.sql.auth_type"] = "auth_jwt"
|
|
431
|
+
if config.get("private_key"):
|
|
432
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.jwt_private_key_pkcs8_value"] = (
|
|
433
|
+
config["private_key"]
|
|
434
|
+
)
|
|
435
|
+
elif config.get("private_key_path"):
|
|
436
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.jwt_private_key"] = config[
|
|
437
|
+
"private_key_path"
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
if config.get("private_key_password"):
|
|
441
|
+
adbc_engine_args[
|
|
442
|
+
"adbc.snowflake.sql.client_option.jwt_private_key_pkcs8_password"
|
|
443
|
+
] = config["private_key_password"]
|
|
444
|
+
|
|
445
|
+
if config.get("login_timeout"):
|
|
446
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.login_timeout"] = (
|
|
447
|
+
f"{config['login_timeout']}s"
|
|
448
|
+
)
|
|
449
|
+
if config.get("network_timeout"):
|
|
450
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.request_timeout"] = (
|
|
451
|
+
f"{config['network_timeout']}s"
|
|
452
|
+
)
|
|
453
|
+
if config.get("client_session_keep_alive") is not None:
|
|
454
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.keep_session_alive"] = str(
|
|
455
|
+
config["client_session_keep_alive"]
|
|
456
|
+
).lower()
|
|
457
|
+
|
|
458
|
+
adbc_engine_args["adbc.snowflake.sql.client_option.app_name"] = (
|
|
459
|
+
SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
if config.get("additional_snowflake_connection_args"):
|
|
463
|
+
for key, value in config["additional_snowflake_connection_args"].items():
|
|
464
|
+
# Allow direct ADBC option names to be passed through
|
|
465
|
+
if key.startswith("adbc.snowflake."):
|
|
466
|
+
adbc_engine_args[key] = value # noqa: PERF403
|
|
467
|
+
|
|
468
|
+
return adbc_engine_args
|
|
469
|
+
|
|
382
470
|
def _snowflake_private_key(self, config) -> bytes:
|
|
383
471
|
# If the user has defined a path to a private key, we will use that.
|
|
384
472
|
if config.get("private_key_path", None) is not None:
|
|
@@ -386,7 +474,7 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
386
474
|
with open(config.get("private_key_path"), "rb") as key:
|
|
387
475
|
private_key = key.read()
|
|
388
476
|
else:
|
|
389
|
-
private_key = config.get("private_key", None)
|
|
477
|
+
private_key = config.get("private_key", None).encode()
|
|
390
478
|
|
|
391
479
|
kwargs = {}
|
|
392
480
|
if config.get("private_key_password", None) is not None:
|
|
@@ -398,7 +486,9 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
398
486
|
p_key = serialization.load_pem_private_key(
|
|
399
487
|
private_key, backend=default_backend(), **kwargs
|
|
400
488
|
)
|
|
401
|
-
|
|
489
|
+
|
|
490
|
+
# key fails to load, possibly indicating key is base64 encoded
|
|
491
|
+
except ValueError:
|
|
402
492
|
try:
|
|
403
493
|
private_key = base64.b64decode(private_key)
|
|
404
494
|
p_key = serialization.load_pem_private_key(
|
|
@@ -462,6 +552,15 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
462
552
|
yield conn
|
|
463
553
|
conn.close()
|
|
464
554
|
engine.dispose()
|
|
555
|
+
elif self.connector == "adbc":
|
|
556
|
+
import adbc_driver_snowflake.dbapi
|
|
557
|
+
|
|
558
|
+
conn = adbc_driver_snowflake.dbapi.connect(
|
|
559
|
+
db_kwargs=self._adbc_connection_args, # pyright: ignore[reportArgumentType]
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
yield conn
|
|
563
|
+
conn.close()
|
|
465
564
|
else:
|
|
466
565
|
conn = snowflake.connector.connect(**self._connection_args)
|
|
467
566
|
|
|
@@ -479,6 +578,10 @@ class SnowflakeResource(ConfigurableResource, IAttachDifferentObjectToOpContext)
|
|
|
479
578
|
snowflake_connection_resource=self,
|
|
480
579
|
)
|
|
481
580
|
|
|
581
|
+
def connect_and_execute(self, sql: str) -> None:
|
|
582
|
+
with self.get_connection() as conn:
|
|
583
|
+
conn.cursor().execute(sql)
|
|
584
|
+
|
|
482
585
|
|
|
483
586
|
class SnowflakeConnection:
|
|
484
587
|
"""A connection to Snowflake that can execute queries. In general this class should not be
|
|
@@ -567,9 +670,6 @@ class SnowflakeConnection:
|
|
|
567
670
|
|
|
568
671
|
with self.get_connection() as conn:
|
|
569
672
|
with closing(conn.cursor()) as cursor:
|
|
570
|
-
if sys.version_info[0] < 3:
|
|
571
|
-
sql = sql.encode("utf-8")
|
|
572
|
-
|
|
573
673
|
self.log.info("Executing query: " + sql)
|
|
574
674
|
parameters = dict(parameters) if isinstance(parameters, Mapping) else parameters
|
|
575
675
|
cursor.execute(sql, parameters)
|
|
@@ -620,7 +720,7 @@ class SnowflakeConnection:
|
|
|
620
720
|
if not fetch_results and use_pandas_result:
|
|
621
721
|
check.failed("If use_pandas_result is True, fetch_results must also be True.")
|
|
622
722
|
|
|
623
|
-
results:
|
|
723
|
+
results: list[Any] = []
|
|
624
724
|
with self.get_connection() as conn:
|
|
625
725
|
with closing(conn.cursor()) as cursor:
|
|
626
726
|
for raw_sql in sql_queries:
|
|
@@ -729,6 +829,7 @@ def fetch_last_updated_timestamps(
|
|
|
729
829
|
schema: str,
|
|
730
830
|
tables: Sequence[str],
|
|
731
831
|
database: Optional[str] = None,
|
|
832
|
+
ignore_missing_tables: Optional[bool] = False,
|
|
732
833
|
) -> Mapping[str, datetime]:
|
|
733
834
|
"""Fetch the last updated times of a list of tables in Snowflake.
|
|
734
835
|
|
|
@@ -742,6 +843,8 @@ def fetch_last_updated_timestamps(
|
|
|
742
843
|
tables (Sequence[str]): A list of table names to fetch the last updated time for.
|
|
743
844
|
database (Optional[str]): The database of the table. Only required if the connection
|
|
744
845
|
has not been set with a database.
|
|
846
|
+
ignore_missing_tables (Optional[bool]): If True, tables not found in Snowflake
|
|
847
|
+
will be excluded from the result.
|
|
745
848
|
|
|
746
849
|
Returns:
|
|
747
850
|
Mapping[str, datetime]: A dictionary of table names to their last updated time in UTC.
|
|
@@ -755,7 +858,7 @@ def fetch_last_updated_timestamps(
|
|
|
755
858
|
)
|
|
756
859
|
|
|
757
860
|
query = f"""
|
|
758
|
-
SELECT table_name, CONVERT_TIMEZONE('UTC', last_altered) AS last_altered
|
|
861
|
+
SELECT table_name, CONVERT_TIMEZONE('UTC', last_altered) AS last_altered
|
|
759
862
|
FROM {fully_qualified_table_name}
|
|
760
863
|
WHERE table_schema = '{schema}' AND table_name IN ({tables_str});
|
|
761
864
|
"""
|
|
@@ -767,6 +870,8 @@ def fetch_last_updated_timestamps(
|
|
|
767
870
|
result_correct_case = {}
|
|
768
871
|
for table_name in tables:
|
|
769
872
|
if table_name.upper() not in result_mapping:
|
|
873
|
+
if ignore_missing_tables:
|
|
874
|
+
continue
|
|
770
875
|
raise ValueError(f"Table {table_name} could not be found.")
|
|
771
876
|
last_altered = result_mapping[table_name.upper()]
|
|
772
877
|
check.invariant(
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
+
from collections.abc import Sequence
|
|
2
3
|
from contextlib import contextmanager
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional, cast
|
|
4
5
|
|
|
5
6
|
from dagster import IOManagerDefinition, OutputContext, io_manager
|
|
6
7
|
from dagster._config.pythonic_config import ConfigurableIOManagerFactory
|
|
7
|
-
from dagster._core.definitions.
|
|
8
|
+
from dagster._core.definitions.partitions.utils import TimeWindow
|
|
8
9
|
from dagster._core.storage.db_io_manager import (
|
|
9
10
|
DbClient,
|
|
10
11
|
DbIOManager,
|
|
@@ -14,22 +15,21 @@ from dagster._core.storage.db_io_manager import (
|
|
|
14
15
|
)
|
|
15
16
|
from dagster._core.storage.io_manager import dagster_maintained_io_manager
|
|
16
17
|
from pydantic import Field
|
|
17
|
-
from snowflake.connector.errors import ProgrammingError
|
|
18
18
|
|
|
19
|
-
from .resources import SnowflakeResource
|
|
19
|
+
from dagster_snowflake.resources import SnowflakeResource
|
|
20
20
|
|
|
21
21
|
SNOWFLAKE_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def build_snowflake_io_manager(
|
|
25
|
-
type_handlers: Sequence[DbTypeHandler], default_load_type: Optional[
|
|
25
|
+
type_handlers: Sequence[DbTypeHandler], default_load_type: Optional[type] = None
|
|
26
26
|
) -> IOManagerDefinition:
|
|
27
27
|
"""Builds an IO manager definition that reads inputs from and writes outputs to Snowflake.
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
30
|
type_handlers (Sequence[DbTypeHandler]): Each handler defines how to translate between
|
|
31
31
|
slices of Snowflake tables and an in-memory type - e.g. a Pandas DataFrame. If only
|
|
32
|
-
one DbTypeHandler is provided, it will be used as
|
|
32
|
+
one DbTypeHandler is provided, it will be used as the default_load_type.
|
|
33
33
|
default_load_type (Type): When an input has no type annotation, load it as this type.
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
@@ -58,7 +58,7 @@ def build_snowflake_io_manager(
|
|
|
58
58
|
|
|
59
59
|
snowflake_io_manager = build_snowflake_io_manager([SnowflakePandasTypeHandler(), SnowflakePySparkTypeHandler()])
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
Definitions(
|
|
62
62
|
assets=[my_table, my_second_table],
|
|
63
63
|
resources={
|
|
64
64
|
"io_manager": snowflake_io_manager.configured({
|
|
@@ -74,7 +74,7 @@ def build_snowflake_io_manager(
|
|
|
74
74
|
|
|
75
75
|
.. code-block:: python
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
Definitions(
|
|
78
78
|
assets=[my_table]
|
|
79
79
|
resources={"io_manager" snowflake_io_manager.configured(
|
|
80
80
|
{"database": "my_database", "schema": "my_schema", ...} # will be used as the schema
|
|
@@ -279,6 +279,15 @@ class SnowflakeIOManager(ConfigurableIOManagerFactory):
|
|
|
279
279
|
default=None,
|
|
280
280
|
description="Optional parameter to specify the authentication mechanism to use.",
|
|
281
281
|
)
|
|
282
|
+
additional_snowflake_connection_args: Optional[dict[str, Any]] = Field(
|
|
283
|
+
default=None,
|
|
284
|
+
description=(
|
|
285
|
+
"Additional keyword arguments to pass to the snowflake.connector.connect function. For a full list of"
|
|
286
|
+
" available arguments, see the `Snowflake documentation"
|
|
287
|
+
" <https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-connect>`__."
|
|
288
|
+
" This config will be ignored if using the sqlalchemy connector."
|
|
289
|
+
),
|
|
290
|
+
)
|
|
282
291
|
|
|
283
292
|
@staticmethod
|
|
284
293
|
@abstractmethod
|
|
@@ -300,7 +309,7 @@ class SnowflakeIOManager(ConfigurableIOManagerFactory):
|
|
|
300
309
|
...
|
|
301
310
|
|
|
302
311
|
@staticmethod
|
|
303
|
-
def default_load_type() -> Optional[
|
|
312
|
+
def default_load_type() -> Optional[type]:
|
|
304
313
|
"""If an asset or op is not annotated with an return type, default_load_type will be used to
|
|
305
314
|
determine which TypeHandler to use to store and load the output.
|
|
306
315
|
|
|
@@ -347,27 +356,29 @@ class SnowflakeDbClient(DbClient):
|
|
|
347
356
|
if context.resource_config
|
|
348
357
|
else {}
|
|
349
358
|
)
|
|
350
|
-
with SnowflakeResource(schema=table_slice.schema, **no_schema_config).get_connection(
|
|
359
|
+
with SnowflakeResource(schema=table_slice.schema, **no_schema_config).get_connection( # pyright: ignore[reportArgumentType]
|
|
351
360
|
raw_conn=False
|
|
352
361
|
) as conn:
|
|
353
362
|
yield conn
|
|
354
363
|
|
|
355
364
|
@staticmethod
|
|
356
365
|
def ensure_schema_exists(context: OutputContext, table_slice: TableSlice, connection) -> None:
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
366
|
+
with connection.cursor() as cursor:
|
|
367
|
+
cursor.execute(
|
|
368
|
+
f"show schemas like '{table_slice.schema}' in database {table_slice.database}"
|
|
369
|
+
)
|
|
370
|
+
schemas = cursor.fetchall()
|
|
371
|
+
|
|
362
372
|
if len(schemas) == 0:
|
|
363
|
-
connection.cursor()
|
|
373
|
+
with connection.cursor() as cursor:
|
|
374
|
+
cursor.execute(f"create schema {table_slice.schema};")
|
|
364
375
|
|
|
365
376
|
@staticmethod
|
|
366
377
|
def delete_table_slice(context: OutputContext, table_slice: TableSlice, connection) -> None:
|
|
367
378
|
try:
|
|
368
379
|
connection.cursor().execute(_get_cleanup_statement(table_slice))
|
|
369
|
-
except
|
|
370
|
-
if "does not exist" in e
|
|
380
|
+
except Exception as e:
|
|
381
|
+
if "does not exist or not authorized" in str(e):
|
|
371
382
|
# table doesn't exist yet, so ignore the error
|
|
372
383
|
return
|
|
373
384
|
else:
|
|
@@ -376,7 +387,7 @@ class SnowflakeDbClient(DbClient):
|
|
|
376
387
|
@staticmethod
|
|
377
388
|
def get_select_statement(table_slice: TableSlice) -> str:
|
|
378
389
|
col_str = ", ".join(table_slice.columns) if table_slice.columns else "*"
|
|
379
|
-
if table_slice.partition_dimensions
|
|
390
|
+
if table_slice.partition_dimensions:
|
|
380
391
|
query = (
|
|
381
392
|
f"SELECT {col_str} FROM"
|
|
382
393
|
f" {table_slice.database}.{table_slice.schema}.{table_slice.table} WHERE\n"
|
|
@@ -390,7 +401,7 @@ def _get_cleanup_statement(table_slice: TableSlice) -> str:
|
|
|
390
401
|
"""Returns a SQL statement that deletes data in the given table to make way for the output data
|
|
391
402
|
being written.
|
|
392
403
|
"""
|
|
393
|
-
if table_slice.partition_dimensions
|
|
404
|
+
if table_slice.partition_dimensions:
|
|
394
405
|
query = (
|
|
395
406
|
f"DELETE FROM {table_slice.database}.{table_slice.schema}.{table_slice.table} WHERE\n"
|
|
396
407
|
)
|
|
@@ -411,7 +422,7 @@ def _partition_where_clause(partition_dimensions: Sequence[TablePartitionDimensi
|
|
|
411
422
|
|
|
412
423
|
|
|
413
424
|
def _time_window_where_clause(table_partition: TablePartitionDimension) -> str:
|
|
414
|
-
partition = cast(TimeWindow, table_partition.partitions)
|
|
425
|
+
partition = cast("TimeWindow", table_partition.partitions)
|
|
415
426
|
start_dt, end_dt = partition
|
|
416
427
|
start_dt_str = start_dt.strftime(SNOWFLAKE_DATETIME_FORMAT)
|
|
417
428
|
end_dt_str = end_dt.strftime(SNOWFLAKE_DATETIME_FORMAT)
|
dagster_snowflake/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.28.1"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-snowflake
|
|
3
|
+
Version: 0.28.1
|
|
4
|
+
Summary: Package for Snowflake Dagster framework components.
|
|
5
|
+
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.9,<3.14
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: dagster==1.12.1
|
|
19
|
+
Requires-Dist: snowflake-connector-python>=3.4.0
|
|
20
|
+
Requires-Dist: pyOpenSSL>=22.1.0
|
|
21
|
+
Provides-Extra: snowflake-sqlalchemy
|
|
22
|
+
Requires-Dist: sqlalchemy!=1.4.42; extra == "snowflake-sqlalchemy"
|
|
23
|
+
Requires-Dist: snowflake-sqlalchemy; extra == "snowflake-sqlalchemy"
|
|
24
|
+
Provides-Extra: pandas
|
|
25
|
+
Requires-Dist: pandas; extra == "pandas"
|
|
26
|
+
Requires-Dist: snowflake-connector-python[pandas]>=3.4.0; extra == "pandas"
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: license
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
Dynamic: provides-extra
|
|
34
|
+
Dynamic: requires-dist
|
|
35
|
+
Dynamic: requires-python
|
|
36
|
+
Dynamic: summary
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
dagster_snowflake/__init__.py,sha256=aDqda8Dff_jKm6tjyvOX2BqiXYNIVVMmPbL2h7pZKec,785
|
|
2
|
+
dagster_snowflake/constants.py,sha256=26PGL1eFncm0WfbgZjA7jaznfaRx75vVJnqbieVYeco,481
|
|
3
|
+
dagster_snowflake/ops.py,sha256=L_MP28fLm7_hrJmzMoDocLwyvVnkpy1LVwUSLIArKWc,2225
|
|
4
|
+
dagster_snowflake/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
5
|
+
dagster_snowflake/resources.py,sha256=1s-vLJ3d9xGIXSgF04NqFkuggf_J4BXdjdXxSwWkghk,36042
|
|
6
|
+
dagster_snowflake/snowflake_io_manager.py,sha256=E40E-09u4BMu-Rt5vFfjG8z4Y2N7uaebXVxbOfQqgcg,17682
|
|
7
|
+
dagster_snowflake/version.py,sha256=ZRQKbgDaGz_yuLk-cUKuk6ZBKCSRKZC8nQd041NRNXk,23
|
|
8
|
+
dagster_snowflake/components/__init__.py,sha256=RlBmjimRJNlrlkZKMrZftBjco1wmD4cOFyomfFyooks,145
|
|
9
|
+
dagster_snowflake/components/sql_component/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
dagster_snowflake/components/sql_component/component.py,sha256=jSQK0odxTY3KQ5BDhCAp-gJbfGW81-lVs7kcCXWKiiM,2141
|
|
11
|
+
dagster_snowflake-0.28.1.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
|
|
12
|
+
dagster_snowflake-0.28.1.dist-info/METADATA,sha256=maVAgut9C8nr48ST1zOZPuw3QGSMyhWPzqG670BBYy0,1350
|
|
13
|
+
dagster_snowflake-0.28.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
dagster_snowflake-0.28.1.dist-info/entry_points.txt,sha256=Ddljs88OCl4BNyhSyE8lIAe54bIqEms0cOkR9vUAqsM,72
|
|
15
|
+
dagster_snowflake-0.28.1.dist-info/top_level.txt,sha256=uECYCiluOxLQ996SCUPBBwdK0CTyz45FjWqf7WDqMMc,18
|
|
16
|
+
dagster_snowflake-0.28.1.dist-info/RECORD,,
|
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: dagster-snowflake
|
|
3
|
-
Version: 0.24.2
|
|
4
|
-
Summary: Package for Snowflake Dagster framework components.
|
|
5
|
-
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake
|
|
6
|
-
Author: Dagster Labs
|
|
7
|
-
Author-email: hello@dagsterlabs.com
|
|
8
|
-
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
-
Classifier: Operating System :: OS Independent
|
|
16
|
-
Requires-Python: >=3.8,<3.13
|
|
17
|
-
License-File: LICENSE
|
|
18
|
-
Requires-Dist: dagster ==1.8.2
|
|
19
|
-
Requires-Dist: snowflake-connector-python >=3.4.0
|
|
20
|
-
Provides-Extra: pandas
|
|
21
|
-
Requires-Dist: pandas ; extra == 'pandas'
|
|
22
|
-
Requires-Dist: snowflake-connector-python[pandas] >=3.4.0 ; extra == 'pandas'
|
|
23
|
-
Provides-Extra: snowflake.sqlalchemy
|
|
24
|
-
Requires-Dist: sqlalchemy !=1.4.42 ; extra == 'snowflake.sqlalchemy'
|
|
25
|
-
Requires-Dist: snowflake-sqlalchemy ; extra == 'snowflake.sqlalchemy'
|
|
26
|
-
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
dagster_snowflake/__init__.py,sha256=hPi8JkjApGPlu4ClTzx0BikfxMf440yW7jsRYZCkGcs,605
|
|
2
|
-
dagster_snowflake/constants.py,sha256=26PGL1eFncm0WfbgZjA7jaznfaRx75vVJnqbieVYeco,481
|
|
3
|
-
dagster_snowflake/ops.py,sha256=L_MP28fLm7_hrJmzMoDocLwyvVnkpy1LVwUSLIArKWc,2225
|
|
4
|
-
dagster_snowflake/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
5
|
-
dagster_snowflake/resources.py,sha256=QVh3IUyg5RZCKgjdYl_-Dv37XrdAp0SgkLqhL8RSZpA,31166
|
|
6
|
-
dagster_snowflake/snowflake_io_manager.py,sha256=eP2zme6HzstCGQs0fONVAZqqmt54D7M1hvEYm-mqjZE,17188
|
|
7
|
-
dagster_snowflake/version.py,sha256=UBF3OYTcBAovta3ux5ybxb0MZYAGpGO79WH_ax2NGeI,23
|
|
8
|
-
dagster_snowflake-0.24.2.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
9
|
-
dagster_snowflake-0.24.2.dist-info/METADATA,sha256=ZdtH_yk0ibEM8yIETF8WcRc62mt1GFm6fHje0LA7v6Q,1119
|
|
10
|
-
dagster_snowflake-0.24.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
11
|
-
dagster_snowflake-0.24.2.dist-info/top_level.txt,sha256=uECYCiluOxLQ996SCUPBBwdK0CTyz45FjWqf7WDqMMc,18
|
|
12
|
-
dagster_snowflake-0.24.2.dist-info/RECORD,,
|
|
File without changes
|