dagster-snowflake-pyspark 0.20.2__py3-none-any.whl → 0.28.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_snowflake_pyspark/__init__.py +3 -3
- dagster_snowflake_pyspark/constants.py +5 -0
- dagster_snowflake_pyspark/py.typed +1 -0
- dagster_snowflake_pyspark/snowflake_pyspark_type_handler.py +78 -17
- dagster_snowflake_pyspark/version.py +1 -1
- dagster_snowflake_pyspark-0.28.2.dist-info/METADATA +28 -0
- dagster_snowflake_pyspark-0.28.2.dist-info/RECORD +10 -0
- {dagster_snowflake_pyspark-0.20.2.dist-info → dagster_snowflake_pyspark-0.28.2.dist-info}/WHEEL +1 -1
- dagster_snowflake_pyspark-0.20.2.dist-info/METADATA +0 -21
- dagster_snowflake_pyspark-0.20.2.dist-info/RECORD +0 -8
- {dagster_snowflake_pyspark-0.20.2.dist-info → dagster_snowflake_pyspark-0.28.2.dist-info/licenses}/LICENSE +0 -0
- {dagster_snowflake_pyspark-0.20.2.dist-info → dagster_snowflake_pyspark-0.28.2.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
2
|
|
|
3
|
-
from .snowflake_pyspark_type_handler import (
|
|
3
|
+
from dagster_snowflake_pyspark.snowflake_pyspark_type_handler import (
|
|
4
4
|
SnowflakePySparkIOManager as SnowflakePySparkIOManager,
|
|
5
5
|
SnowflakePySparkTypeHandler as SnowflakePySparkTypeHandler,
|
|
6
6
|
snowflake_pyspark_io_manager as snowflake_pyspark_io_manager,
|
|
7
7
|
)
|
|
8
|
-
from .version import __version__ as __version__
|
|
8
|
+
from dagster_snowflake_pyspark.version import __version__ as __version__
|
|
9
9
|
|
|
10
10
|
DagsterLibraryRegistry.register("dagster-snowflake-pyspark", __version__)
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
# Description: This file contains the Snowflake connection identifiers for the Snowflake partner account.
|
|
2
|
+
# The connection identifiers are used to identify the partner account when connecting to Snowflake.
|
|
3
|
+
# We use different connection identifiers for different connection code paths to ensure that each is
|
|
4
|
+
# working as expected.
|
|
5
|
+
SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK = "DagsterLabs_Dagster_Pyspark"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
partial
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import dagster._check as check
|
|
4
5
|
from dagster import InputContext, MetadataValue, OutputContext, TableColumn, TableSchema
|
|
@@ -9,6 +10,8 @@ from dagster_snowflake.snowflake_io_manager import SnowflakeDbClient
|
|
|
9
10
|
from pyspark.sql import DataFrame, SparkSession
|
|
10
11
|
from pyspark.sql.types import StructType
|
|
11
12
|
|
|
13
|
+
from dagster_snowflake_pyspark.constants import SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK
|
|
14
|
+
|
|
12
15
|
SNOWFLAKE_CONNECTOR = "net.snowflake.spark.snowflake"
|
|
13
16
|
|
|
14
17
|
|
|
@@ -25,6 +28,7 @@ def _get_snowflake_options(config, table_slice: TableSlice) -> Mapping[str, str]
|
|
|
25
28
|
"sfDatabase": config["database"],
|
|
26
29
|
"sfSchema": table_slice.schema,
|
|
27
30
|
"sfWarehouse": config["warehouse"],
|
|
31
|
+
"APPLICATION": SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK,
|
|
28
32
|
}
|
|
29
33
|
|
|
30
34
|
return conf
|
|
@@ -52,7 +56,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
|
|
|
52
56
|
def my_table() -> pd.DataFrame: # the name of the asset will be the table name
|
|
53
57
|
...
|
|
54
58
|
|
|
55
|
-
|
|
59
|
+
Definitions(
|
|
56
60
|
assets=[my_table],
|
|
57
61
|
resources={
|
|
58
62
|
"io_manager": MySnowflakeIOManager(database="MY_DATABASE", account=EnvVar("SNOWFLAKE_ACCOUNT"), warehouse="my_warehouse", ...)
|
|
@@ -61,7 +65,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
|
|
|
61
65
|
|
|
62
66
|
"""
|
|
63
67
|
|
|
64
|
-
def handle_output(
|
|
68
|
+
def handle_output( # pyright: ignore[reportIncompatibleMethodOverride]
|
|
65
69
|
self, context: OutputContext, table_slice: TableSlice, obj: DataFrame, _
|
|
66
70
|
) -> Mapping[str, RawMetadataValue]:
|
|
67
71
|
options = _get_snowflake_options(context.resource_config, table_slice)
|
|
@@ -83,7 +87,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
|
|
|
83
87
|
),
|
|
84
88
|
}
|
|
85
89
|
|
|
86
|
-
def load_input(self, context: InputContext, table_slice: TableSlice, _) -> DataFrame:
|
|
90
|
+
def load_input(self, context: InputContext, table_slice: TableSlice, _) -> DataFrame: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
87
91
|
options = _get_snowflake_options(context.resource_config, table_slice)
|
|
88
92
|
|
|
89
93
|
spark = SparkSession.builder.getOrCreate() # type: ignore
|
|
@@ -128,7 +132,7 @@ Examples:
|
|
|
128
132
|
def my_table() -> DataFrame: # the name of the asset will be the table name
|
|
129
133
|
...
|
|
130
134
|
|
|
131
|
-
|
|
135
|
+
Definitions(
|
|
132
136
|
assets=[my_table],
|
|
133
137
|
resources={
|
|
134
138
|
"io_manager": snowflake_pyspark_io_manager.configured({
|
|
@@ -143,10 +147,38 @@ Examples:
|
|
|
143
147
|
|
|
144
148
|
Note that the warehouse configuration value is required when using the snowflake_pyspark_io_manager
|
|
145
149
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
+
You can set a default schema to store the assets using the ``schema`` configuration value of the Snowflake I/O
|
|
151
|
+
Manager. This schema will be used if no other schema is specified directly on an asset or op.
|
|
152
|
+
|
|
153
|
+
.. code-block:: python
|
|
154
|
+
|
|
155
|
+
Definitions(
|
|
156
|
+
assets=[my_table]
|
|
157
|
+
resources={"io_manager" snowflake_pyspark_io_manager.configured(
|
|
158
|
+
{"database": "my_database", "schema": "my_schema", ...} # will be used as the schema
|
|
159
|
+
)}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
On individual assets, you an also specify the schema where they should be stored using metadata or
|
|
164
|
+
by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
|
|
165
|
+
take precedence.
|
|
166
|
+
|
|
167
|
+
.. code-block:: python
|
|
168
|
+
|
|
169
|
+
@asset(
|
|
170
|
+
key_prefix=["my_schema"] # will be used as the schema in snowflake
|
|
171
|
+
)
|
|
172
|
+
def my_table() -> DataFrame:
|
|
173
|
+
...
|
|
174
|
+
|
|
175
|
+
@asset(
|
|
176
|
+
metadata={"schema": "my_schema"} # will be used as the schema in snowflake
|
|
177
|
+
)
|
|
178
|
+
def my_other_table() -> DataFrame:
|
|
179
|
+
...
|
|
180
|
+
|
|
181
|
+
For ops, the schema can be specified by including a "schema" entry in output metadata.
|
|
150
182
|
|
|
151
183
|
.. code-block:: python
|
|
152
184
|
|
|
@@ -154,9 +186,10 @@ Examples:
|
|
|
154
186
|
out={"my_table": Out(metadata={"schema": "my_schema"})}
|
|
155
187
|
)
|
|
156
188
|
def make_my_table() -> DataFrame:
|
|
157
|
-
# the returned value will be stored at my_schema.my_table
|
|
158
189
|
...
|
|
159
190
|
|
|
191
|
+
If none of these is provided, the schema will default to "public".
|
|
192
|
+
|
|
160
193
|
To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
|
|
161
194
|
In or AssetIn.
|
|
162
195
|
|
|
@@ -193,7 +226,7 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
|
|
|
193
226
|
def my_table() -> DataFrame: # the name of the asset will be the table name
|
|
194
227
|
...
|
|
195
228
|
|
|
196
|
-
|
|
229
|
+
Definitions(
|
|
197
230
|
assets=[my_table],
|
|
198
231
|
resources={
|
|
199
232
|
"io_manager": SnowflakePySparkIOManager(
|
|
@@ -208,10 +241,38 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
|
|
|
208
241
|
|
|
209
242
|
Note that the warehouse configuration value is required when using the SnowflakePySparkIOManager
|
|
210
243
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
244
|
+
You can set a default schema to store the assets using the ``schema`` configuration value of the Snowflake I/O
|
|
245
|
+
Manager. This schema will be used if no other schema is specified directly on an asset or op.
|
|
246
|
+
|
|
247
|
+
.. code-block:: python
|
|
248
|
+
|
|
249
|
+
Definitions(
|
|
250
|
+
assets=[my_table]
|
|
251
|
+
resources={
|
|
252
|
+
"io_manager" SnowflakePySparkIOManager(database="my_database", schema="my_schema", ...)
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
On individual assets, you an also specify the schema where they should be stored using metadata or
|
|
258
|
+
by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
|
|
259
|
+
take precedence.
|
|
260
|
+
|
|
261
|
+
.. code-block:: python
|
|
262
|
+
|
|
263
|
+
@asset(
|
|
264
|
+
key_prefix=["my_schema"] # will be used as the schema in snowflake
|
|
265
|
+
)
|
|
266
|
+
def my_table() -> DataFrame:
|
|
267
|
+
...
|
|
268
|
+
|
|
269
|
+
@asset(
|
|
270
|
+
metadata={"schema": "my_schema"} # will be used as the schema in snowflake
|
|
271
|
+
)
|
|
272
|
+
def my_other_table() -> DataFrame:
|
|
273
|
+
...
|
|
274
|
+
|
|
275
|
+
For ops, the schema can be specified by including a "schema" entry in output metadata.
|
|
215
276
|
|
|
216
277
|
.. code-block:: python
|
|
217
278
|
|
|
@@ -219,9 +280,9 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
|
|
|
219
280
|
out={"my_table": Out(metadata={"schema": "my_schema"})}
|
|
220
281
|
)
|
|
221
282
|
def make_my_table() -> DataFrame:
|
|
222
|
-
# the returned value will be stored at my_schema.my_table
|
|
223
283
|
...
|
|
224
284
|
|
|
285
|
+
If none of these is provided, the schema will default to "public".
|
|
225
286
|
To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
|
|
226
287
|
In or AssetIn.
|
|
227
288
|
|
|
@@ -245,5 +306,5 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
|
|
|
245
306
|
return [SnowflakePySparkTypeHandler()]
|
|
246
307
|
|
|
247
308
|
@staticmethod
|
|
248
|
-
def default_load_type() -> Optional[
|
|
309
|
+
def default_load_type() -> Optional[type]:
|
|
249
310
|
return DataFrame
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.28.2"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-snowflake-pyspark
|
|
3
|
+
Version: 0.28.2
|
|
4
|
+
Summary: Package for integrating Snowflake and PySpark with Dagster.
|
|
5
|
+
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake-pyspark
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.10,<3.14
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: dagster==1.12.2
|
|
15
|
+
Requires-Dist: dagster-snowflake==0.28.2
|
|
16
|
+
Requires-Dist: pyspark<4
|
|
17
|
+
Requires-Dist: requests
|
|
18
|
+
Requires-Dist: sqlalchemy!=1.4.42
|
|
19
|
+
Requires-Dist: snowflake-sqlalchemy>=1.2
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: author-email
|
|
22
|
+
Dynamic: classifier
|
|
23
|
+
Dynamic: home-page
|
|
24
|
+
Dynamic: license
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
Dynamic: requires-dist
|
|
27
|
+
Dynamic: requires-python
|
|
28
|
+
Dynamic: summary
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
dagster_snowflake_pyspark/__init__.py,sha256=6uFEmuB7ctAVeYqjIvlpUkS3H6NsfTkCTGxDCnFdDOk,472
|
|
2
|
+
dagster_snowflake_pyspark/constants.py,sha256=0GwhKlR3tzwIv2FbgK9e2D78iAPWXwhni_bSdfoFyNM,410
|
|
3
|
+
dagster_snowflake_pyspark/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
4
|
+
dagster_snowflake_pyspark/snowflake_pyspark_type_handler.py,sha256=Hn3izqO4ctRBkFOYmocRsDtgWzyyqzEy0ZjvM1eSCcg,11157
|
|
5
|
+
dagster_snowflake_pyspark/version.py,sha256=K-TM2fq9AmH_Dk8Cadam72wILDZ_6qftLHvY9P1Fc3I,23
|
|
6
|
+
dagster_snowflake_pyspark-0.28.2.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
|
7
|
+
dagster_snowflake_pyspark-0.28.2.dist-info/METADATA,sha256=DY1OKr4Dwnfn1Up_WY_4R_aJb3duDFOzzgJgnWi0VNs,918
|
|
8
|
+
dagster_snowflake_pyspark-0.28.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
dagster_snowflake_pyspark-0.28.2.dist-info/top_level.txt,sha256=NH48Qcesg34H5Ih-KKuOhwmWzvcaqVkN9lvADwCJv8U,26
|
|
10
|
+
dagster_snowflake_pyspark-0.28.2.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: dagster-snowflake-pyspark
|
|
3
|
-
Version: 0.20.2
|
|
4
|
-
Summary: Package for integrating Snowflake and PySpark with Dagster.
|
|
5
|
-
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake-pyspark
|
|
6
|
-
Author: Elementl
|
|
7
|
-
Author-email: hello@elementl.com
|
|
8
|
-
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
-
Classifier: Operating System :: OS Independent
|
|
14
|
-
License-File: LICENSE
|
|
15
|
-
Requires-Dist: dagster (==1.4.2)
|
|
16
|
-
Requires-Dist: dagster-snowflake (==0.20.2)
|
|
17
|
-
Requires-Dist: pyspark
|
|
18
|
-
Requires-Dist: requests
|
|
19
|
-
Requires-Dist: sqlalchemy (!=1.4.42)
|
|
20
|
-
Requires-Dist: snowflake-sqlalchemy (>=1.2)
|
|
21
|
-
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
dagster_snowflake_pyspark/__init__.py,sha256=dWjc48ASaSugpewtr71snkK6nm7bNXg4EYoloTdzv5E,421
|
|
2
|
-
dagster_snowflake_pyspark/snowflake_pyspark_type_handler.py,sha256=GBd6FpCINCKEsVFFPq1zPdTIjkfhLrny4w_xtZwYX24,9245
|
|
3
|
-
dagster_snowflake_pyspark/version.py,sha256=WRDggaYcSjn5kcl6mpNUIM5VWOlcd9HZTeEz-I3_kvQ,23
|
|
4
|
-
dagster_snowflake_pyspark-0.20.2.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
|
5
|
-
dagster_snowflake_pyspark-0.20.2.dist-info/METADATA,sha256=HOCkicrfiMn_UA5_QQoKGY2nJgOCxTUChjPCIC9vVBc,810
|
|
6
|
-
dagster_snowflake_pyspark-0.20.2.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
|
|
7
|
-
dagster_snowflake_pyspark-0.20.2.dist-info/top_level.txt,sha256=NH48Qcesg34H5Ih-KKuOhwmWzvcaqVkN9lvADwCJv8U,26
|
|
8
|
-
dagster_snowflake_pyspark-0.20.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|