dagster-snowflake-pyspark 0.20.2__py3-none-any.whl → 0.28.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
- from dagster._core.libraries import DagsterLibraryRegistry
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
2
 
3
- from .snowflake_pyspark_type_handler import (
3
+ from dagster_snowflake_pyspark.snowflake_pyspark_type_handler import (
4
4
  SnowflakePySparkIOManager as SnowflakePySparkIOManager,
5
5
  SnowflakePySparkTypeHandler as SnowflakePySparkTypeHandler,
6
6
  snowflake_pyspark_io_manager as snowflake_pyspark_io_manager,
7
7
  )
8
- from .version import __version__ as __version__
8
+ from dagster_snowflake_pyspark.version import __version__ as __version__
9
9
 
10
10
  DagsterLibraryRegistry.register("dagster-snowflake-pyspark", __version__)
@@ -0,0 +1,5 @@
1
+ # Description: This file contains the Snowflake connection identifiers for the Snowflake partner account.
2
+ # The connection identifiers are used to identify the partner account when connecting to Snowflake.
3
+ # We use different connection identifiers for different connection code paths to ensure that each is
4
+ # working as expected.
5
+ SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK = "DagsterLabs_Dagster_Pyspark"
@@ -0,0 +1 @@
1
+ partial
@@ -1,4 +1,5 @@
1
- from typing import Mapping, Optional, Sequence, Type
1
+ from collections.abc import Mapping, Sequence
2
+ from typing import Optional
2
3
 
3
4
  import dagster._check as check
4
5
  from dagster import InputContext, MetadataValue, OutputContext, TableColumn, TableSchema
@@ -9,6 +10,8 @@ from dagster_snowflake.snowflake_io_manager import SnowflakeDbClient
9
10
  from pyspark.sql import DataFrame, SparkSession
10
11
  from pyspark.sql.types import StructType
11
12
 
13
+ from dagster_snowflake_pyspark.constants import SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK
14
+
12
15
  SNOWFLAKE_CONNECTOR = "net.snowflake.spark.snowflake"
13
16
 
14
17
 
@@ -25,6 +28,7 @@ def _get_snowflake_options(config, table_slice: TableSlice) -> Mapping[str, str]
25
28
  "sfDatabase": config["database"],
26
29
  "sfSchema": table_slice.schema,
27
30
  "sfWarehouse": config["warehouse"],
31
+ "APPLICATION": SNOWFLAKE_PARTNER_CONNECTION_IDENTIFIER_PYSPARK,
28
32
  }
29
33
 
30
34
  return conf
@@ -52,7 +56,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
52
56
  def my_table() -> pd.DataFrame: # the name of the asset will be the table name
53
57
  ...
54
58
 
55
- defs = Definitions(
59
+ Definitions(
56
60
  assets=[my_table],
57
61
  resources={
58
62
  "io_manager": MySnowflakeIOManager(database="MY_DATABASE", account=EnvVar("SNOWFLAKE_ACCOUNT"), warehouse="my_warehouse", ...)
@@ -61,7 +65,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
61
65
 
62
66
  """
63
67
 
64
- def handle_output(
68
+ def handle_output( # pyright: ignore[reportIncompatibleMethodOverride]
65
69
  self, context: OutputContext, table_slice: TableSlice, obj: DataFrame, _
66
70
  ) -> Mapping[str, RawMetadataValue]:
67
71
  options = _get_snowflake_options(context.resource_config, table_slice)
@@ -83,7 +87,7 @@ class SnowflakePySparkTypeHandler(DbTypeHandler[DataFrame]):
83
87
  ),
84
88
  }
85
89
 
86
- def load_input(self, context: InputContext, table_slice: TableSlice, _) -> DataFrame:
90
+ def load_input(self, context: InputContext, table_slice: TableSlice, _) -> DataFrame: # pyright: ignore[reportIncompatibleMethodOverride]
87
91
  options = _get_snowflake_options(context.resource_config, table_slice)
88
92
 
89
93
  spark = SparkSession.builder.getOrCreate() # type: ignore
@@ -128,7 +132,7 @@ Examples:
128
132
  def my_table() -> DataFrame: # the name of the asset will be the table name
129
133
  ...
130
134
 
131
- defs = Definitions(
135
+ Definitions(
132
136
  assets=[my_table],
133
137
  resources={
134
138
  "io_manager": snowflake_pyspark_io_manager.configured({
@@ -143,10 +147,38 @@ Examples:
143
147
 
144
148
  Note that the warehouse configuration value is required when using the snowflake_pyspark_io_manager
145
149
 
146
- If you do not provide a schema, Dagster will determine a schema based on the assets and ops using
147
- the I/O Manager. For assets, the schema will be determined from the asset key.
148
- For ops, the schema can be specified by including a "schema" entry in output metadata. If "schema" is not provided
149
- via config or on the asset/op, "public" will be used for the schema.
150
+ You can set a default schema to store the assets using the ``schema`` configuration value of the Snowflake I/O
151
+ Manager. This schema will be used if no other schema is specified directly on an asset or op.
152
+
153
+ .. code-block:: python
154
+
155
+ Definitions(
156
+ assets=[my_table]
157
+ resources={"io_manager" snowflake_pyspark_io_manager.configured(
158
+ {"database": "my_database", "schema": "my_schema", ...} # will be used as the schema
159
+ )}
160
+ )
161
+
162
+
163
+ On individual assets, you an also specify the schema where they should be stored using metadata or
164
+ by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
165
+ take precedence.
166
+
167
+ .. code-block:: python
168
+
169
+ @asset(
170
+ key_prefix=["my_schema"] # will be used as the schema in snowflake
171
+ )
172
+ def my_table() -> DataFrame:
173
+ ...
174
+
175
+ @asset(
176
+ metadata={"schema": "my_schema"} # will be used as the schema in snowflake
177
+ )
178
+ def my_other_table() -> DataFrame:
179
+ ...
180
+
181
+ For ops, the schema can be specified by including a "schema" entry in output metadata.
150
182
 
151
183
  .. code-block:: python
152
184
 
@@ -154,9 +186,10 @@ Examples:
154
186
  out={"my_table": Out(metadata={"schema": "my_schema"})}
155
187
  )
156
188
  def make_my_table() -> DataFrame:
157
- # the returned value will be stored at my_schema.my_table
158
189
  ...
159
190
 
191
+ If none of these is provided, the schema will default to "public".
192
+
160
193
  To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
161
194
  In or AssetIn.
162
195
 
@@ -193,7 +226,7 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
193
226
  def my_table() -> DataFrame: # the name of the asset will be the table name
194
227
  ...
195
228
 
196
- defs = Definitions(
229
+ Definitions(
197
230
  assets=[my_table],
198
231
  resources={
199
232
  "io_manager": SnowflakePySparkIOManager(
@@ -208,10 +241,38 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
208
241
 
209
242
  Note that the warehouse configuration value is required when using the SnowflakePySparkIOManager
210
243
 
211
- If you do not provide a schema, Dagster will determine a schema based on the assets and ops using
212
- the I/O Manager. For assets, the schema will be determined from the asset key, as in the above example.
213
- For ops, the schema can be specified by including a "schema" entry in output metadata. If "schema" is not provided
214
- via config or on the asset/op, "public" will be used for the schema.
244
+ You can set a default schema to store the assets using the ``schema`` configuration value of the Snowflake I/O
245
+ Manager. This schema will be used if no other schema is specified directly on an asset or op.
246
+
247
+ .. code-block:: python
248
+
249
+ Definitions(
250
+ assets=[my_table]
251
+ resources={
252
+ "io_manager" SnowflakePySparkIOManager(database="my_database", schema="my_schema", ...)
253
+ }
254
+ )
255
+
256
+
257
+ On individual assets, you an also specify the schema where they should be stored using metadata or
258
+ by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
259
+ take precedence.
260
+
261
+ .. code-block:: python
262
+
263
+ @asset(
264
+ key_prefix=["my_schema"] # will be used as the schema in snowflake
265
+ )
266
+ def my_table() -> DataFrame:
267
+ ...
268
+
269
+ @asset(
270
+ metadata={"schema": "my_schema"} # will be used as the schema in snowflake
271
+ )
272
+ def my_other_table() -> DataFrame:
273
+ ...
274
+
275
+ For ops, the schema can be specified by including a "schema" entry in output metadata.
215
276
 
216
277
  .. code-block:: python
217
278
 
@@ -219,9 +280,9 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
219
280
  out={"my_table": Out(metadata={"schema": "my_schema"})}
220
281
  )
221
282
  def make_my_table() -> DataFrame:
222
- # the returned value will be stored at my_schema.my_table
223
283
  ...
224
284
 
285
+ If none of these is provided, the schema will default to "public".
225
286
  To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
226
287
  In or AssetIn.
227
288
 
@@ -245,5 +306,5 @@ class SnowflakePySparkIOManager(SnowflakeIOManager):
245
306
  return [SnowflakePySparkTypeHandler()]
246
307
 
247
308
  @staticmethod
248
- def default_load_type() -> Optional[Type]:
309
+ def default_load_type() -> Optional[type]:
249
310
  return DataFrame
@@ -1 +1 @@
1
- __version__ = "0.20.2"
1
+ __version__ = "0.28.2"
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-snowflake-pyspark
3
+ Version: 0.28.2
4
+ Summary: Package for integrating Snowflake and PySpark with Dagster.
5
+ Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake-pyspark
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.10,<3.14
13
+ License-File: LICENSE
14
+ Requires-Dist: dagster==1.12.2
15
+ Requires-Dist: dagster-snowflake==0.28.2
16
+ Requires-Dist: pyspark<4
17
+ Requires-Dist: requests
18
+ Requires-Dist: sqlalchemy!=1.4.42
19
+ Requires-Dist: snowflake-sqlalchemy>=1.2
20
+ Dynamic: author
21
+ Dynamic: author-email
22
+ Dynamic: classifier
23
+ Dynamic: home-page
24
+ Dynamic: license
25
+ Dynamic: license-file
26
+ Dynamic: requires-dist
27
+ Dynamic: requires-python
28
+ Dynamic: summary
@@ -0,0 +1,10 @@
1
+ dagster_snowflake_pyspark/__init__.py,sha256=6uFEmuB7ctAVeYqjIvlpUkS3H6NsfTkCTGxDCnFdDOk,472
2
+ dagster_snowflake_pyspark/constants.py,sha256=0GwhKlR3tzwIv2FbgK9e2D78iAPWXwhni_bSdfoFyNM,410
3
+ dagster_snowflake_pyspark/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
4
+ dagster_snowflake_pyspark/snowflake_pyspark_type_handler.py,sha256=Hn3izqO4ctRBkFOYmocRsDtgWzyyqzEy0ZjvM1eSCcg,11157
5
+ dagster_snowflake_pyspark/version.py,sha256=K-TM2fq9AmH_Dk8Cadam72wILDZ_6qftLHvY9P1Fc3I,23
6
+ dagster_snowflake_pyspark-0.28.2.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
7
+ dagster_snowflake_pyspark-0.28.2.dist-info/METADATA,sha256=DY1OKr4Dwnfn1Up_WY_4R_aJb3duDFOzzgJgnWi0VNs,918
8
+ dagster_snowflake_pyspark-0.28.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ dagster_snowflake_pyspark-0.28.2.dist-info/top_level.txt,sha256=NH48Qcesg34H5Ih-KKuOhwmWzvcaqVkN9lvADwCJv8U,26
10
+ dagster_snowflake_pyspark-0.28.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.33.6)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,21 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dagster-snowflake-pyspark
3
- Version: 0.20.2
4
- Summary: Package for integrating Snowflake and PySpark with Dagster.
5
- Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-snowflake-pyspark
6
- Author: Elementl
7
- Author-email: hello@elementl.com
8
- License: Apache-2.0
9
- Classifier: Programming Language :: Python :: 3.8
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Operating System :: OS Independent
14
- License-File: LICENSE
15
- Requires-Dist: dagster (==1.4.2)
16
- Requires-Dist: dagster-snowflake (==0.20.2)
17
- Requires-Dist: pyspark
18
- Requires-Dist: requests
19
- Requires-Dist: sqlalchemy (!=1.4.42)
20
- Requires-Dist: snowflake-sqlalchemy (>=1.2)
21
-
@@ -1,8 +0,0 @@
1
- dagster_snowflake_pyspark/__init__.py,sha256=dWjc48ASaSugpewtr71snkK6nm7bNXg4EYoloTdzv5E,421
2
- dagster_snowflake_pyspark/snowflake_pyspark_type_handler.py,sha256=GBd6FpCINCKEsVFFPq1zPdTIjkfhLrny4w_xtZwYX24,9245
3
- dagster_snowflake_pyspark/version.py,sha256=WRDggaYcSjn5kcl6mpNUIM5VWOlcd9HZTeEz-I3_kvQ,23
4
- dagster_snowflake_pyspark-0.20.2.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
5
- dagster_snowflake_pyspark-0.20.2.dist-info/METADATA,sha256=HOCkicrfiMn_UA5_QQoKGY2nJgOCxTUChjPCIC9vVBc,810
6
- dagster_snowflake_pyspark-0.20.2.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
7
- dagster_snowflake_pyspark-0.20.2.dist-info/top_level.txt,sha256=NH48Qcesg34H5Ih-KKuOhwmWzvcaqVkN9lvADwCJv8U,26
8
- dagster_snowflake_pyspark-0.20.2.dist-info/RECORD,,