dagster-duckdb-pyspark 0.21.8__py3-none-any.whl → 0.22.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-duckdb-pyspark might be problematic. Click here for more details.
- dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py +65 -15
- dagster_duckdb_pyspark/version.py +1 -1
- {dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/METADATA +4 -3
- dagster_duckdb_pyspark-0.22.13.dist-info/RECORD +9 -0
- dagster_duckdb_pyspark-0.21.8.dist-info/RECORD +0 -9
- {dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/LICENSE +0 -0
- {dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/WHEEL +0 -0
- {dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/top_level.txt +0 -0
|
@@ -120,17 +120,40 @@ Examples:
|
|
|
120
120
|
def my_table() -> pyspark.sql.DataFrame: # the name of the asset will be the table name
|
|
121
121
|
...
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
123
|
+
defs = Definitions(
|
|
124
|
+
assets=[my_table],
|
|
125
|
+
resources={"io_manager": duckdb_pyspark_io_manager.configured({"database": "my_db.duckdb"})}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
You can set a default schema to store the assets using the ``schema`` configuration value of the DuckDB I/O
|
|
129
|
+
Manager. This schema will be used if no other schema is specified directly on an asset or op.
|
|
130
|
+
|
|
131
|
+
.. code-block:: python
|
|
132
|
+
|
|
133
|
+
defs = Definitions(
|
|
134
|
+
assets=[my_table],
|
|
135
|
+
resources={"io_manager": duckdb_pyspark_io_manager.configured({"database": "my_db.duckdb", "schema": "my_schema"})}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
On individual assets, you an also specify the schema where they should be stored using metadata or
|
|
139
|
+
by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
|
|
140
|
+
take precedence.
|
|
141
|
+
|
|
142
|
+
.. code-block:: python
|
|
143
|
+
|
|
144
|
+
@asset(
|
|
145
|
+
key_prefix=["my_schema"] # will be used as the schema in duckdb
|
|
128
146
|
)
|
|
147
|
+
def my_table() -> pyspark.sql.DataFrame:
|
|
148
|
+
...
|
|
129
149
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
150
|
+
@asset(
|
|
151
|
+
metadata={"schema": "my_schema"} # will be used as the schema in duckdb
|
|
152
|
+
)
|
|
153
|
+
def my_other_table() -> pyspark.sql.DataFrame:
|
|
154
|
+
...
|
|
155
|
+
|
|
156
|
+
For ops, the schema can be specified by including a "schema" entry in output metadata.
|
|
134
157
|
|
|
135
158
|
.. code-block:: python
|
|
136
159
|
|
|
@@ -138,9 +161,10 @@ Examples:
|
|
|
138
161
|
out={"my_table": Out(metadata={"schema": "my_schema"})}
|
|
139
162
|
)
|
|
140
163
|
def make_my_table() -> pyspark.sql.DataFrame:
|
|
141
|
-
# the returned value will be stored at my_schema.my_table
|
|
142
164
|
...
|
|
143
165
|
|
|
166
|
+
If none of these is provided, the schema will default to "public".
|
|
167
|
+
|
|
144
168
|
To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
|
|
145
169
|
In or AssetIn.
|
|
146
170
|
|
|
@@ -180,10 +204,35 @@ class DuckDBPySparkIOManager(DuckDBIOManager):
|
|
|
180
204
|
resources={"io_manager": DuckDBPySparkIOManager(database="my_db.duckdb")}
|
|
181
205
|
)
|
|
182
206
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
207
|
+
You can set a default schema to store the assets using the ``schema`` configuration value of the DuckDB I/O
|
|
208
|
+
Manager. This schema will be used if no other schema is specified directly on an asset or op.
|
|
209
|
+
|
|
210
|
+
.. code-block:: python
|
|
211
|
+
|
|
212
|
+
defs = Definitions(
|
|
213
|
+
assets=[my_table],
|
|
214
|
+
resources={"io_manager": DuckDBPySparkIOManager(database="my_db.duckdb", schema="my_schema")}
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
On individual assets, you an also specify the schema where they should be stored using metadata or
|
|
218
|
+
by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
|
|
219
|
+
take precedence.
|
|
220
|
+
|
|
221
|
+
.. code-block:: python
|
|
222
|
+
|
|
223
|
+
@asset(
|
|
224
|
+
key_prefix=["my_schema"] # will be used as the schema in duckdb
|
|
225
|
+
)
|
|
226
|
+
def my_table() -> pyspark.sql.DataFrame:
|
|
227
|
+
...
|
|
228
|
+
|
|
229
|
+
@asset(
|
|
230
|
+
metadata={"schema": "my_schema"} # will be used as the schema in duckdb
|
|
231
|
+
)
|
|
232
|
+
def my_other_table() -> pyspark.sql.DataFrame:
|
|
233
|
+
...
|
|
234
|
+
|
|
235
|
+
For ops, the schema can be specified by including a "schema" entry in output metadata.
|
|
187
236
|
|
|
188
237
|
.. code-block:: python
|
|
189
238
|
|
|
@@ -191,9 +240,10 @@ class DuckDBPySparkIOManager(DuckDBIOManager):
|
|
|
191
240
|
out={"my_table": Out(metadata={"schema": "my_schema"})}
|
|
192
241
|
)
|
|
193
242
|
def make_my_table() -> pyspark.sql.DataFrame:
|
|
194
|
-
# the returned value will be stored at my_schema.my_table
|
|
195
243
|
...
|
|
196
244
|
|
|
245
|
+
If none of these is provided, the schema will default to "public".
|
|
246
|
+
|
|
197
247
|
To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
|
|
198
248
|
In or AssetIn.
|
|
199
249
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.22.13"
|
{dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dagster-duckdb-pyspark
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.22.13
|
|
4
4
|
Summary: Package for storing PySpark DataFrames in DuckDB.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-duckb-pyspark
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -11,9 +11,10 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.8,<3.13
|
|
14
15
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: dagster ==1.
|
|
16
|
-
Requires-Dist: dagster-duckdb ==0.
|
|
16
|
+
Requires-Dist: dagster ==1.6.13
|
|
17
|
+
Requires-Dist: dagster-duckdb ==0.22.13
|
|
17
18
|
Requires-Dist: pyspark >=3
|
|
18
19
|
Requires-Dist: pandas <2.1
|
|
19
20
|
Requires-Dist: pyarrow
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
dagster_duckdb_pyspark/__init__.py,sha256=KjwD42HKQJslK2WPFg2F7mvHe1hPyrp02xSWM0Az39Y,382
|
|
2
|
+
dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py,sha256=Tqo9McLXY_dmzgszA3nK5X7Hbws7jd8WuXMSXWfMDaQ,9588
|
|
3
|
+
dagster_duckdb_pyspark/py.typed,sha256=mDShSrm8qg9qjacQc2F-rI8ATllqP6EdgHuEYxuCXZ0,7
|
|
4
|
+
dagster_duckdb_pyspark/version.py,sha256=VAcGmKdxSYGbjyk0WfyoGR-XHmPIVhRnru9ULJVVTXg,24
|
|
5
|
+
dagster_duckdb_pyspark-0.22.13.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
6
|
+
dagster_duckdb_pyspark-0.22.13.dist-info/METADATA,sha256=VGH_daPnQXGLcgFaDM1EJbs52lV9TehLuhVuJ5BYNdc,774
|
|
7
|
+
dagster_duckdb_pyspark-0.22.13.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
8
|
+
dagster_duckdb_pyspark-0.22.13.dist-info/top_level.txt,sha256=UYh0E2YiAlK01-DAkx0eikRaH-TIk0n9jijQK2joJBs,23
|
|
9
|
+
dagster_duckdb_pyspark-0.22.13.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
dagster_duckdb_pyspark/__init__.py,sha256=KjwD42HKQJslK2WPFg2F7mvHe1hPyrp02xSWM0Az39Y,382
|
|
2
|
-
dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py,sha256=t9lqCpo-ibaThEFzxjqownu_yF_tFpVvQO6_ITgPLlY,7980
|
|
3
|
-
dagster_duckdb_pyspark/py.typed,sha256=mDShSrm8qg9qjacQc2F-rI8ATllqP6EdgHuEYxuCXZ0,7
|
|
4
|
-
dagster_duckdb_pyspark/version.py,sha256=jPkfpQ5mRGK2m0RepyLzFn3H4QXjLiZoPEXnwuz2MwE,23
|
|
5
|
-
dagster_duckdb_pyspark-0.21.8.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
6
|
-
dagster_duckdb_pyspark-0.21.8.dist-info/METADATA,sha256=5RU5WccbNtJUkS61BEsOvcVCzUjDd8hCzmLKl6asWZ8,742
|
|
7
|
-
dagster_duckdb_pyspark-0.21.8.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
8
|
-
dagster_duckdb_pyspark-0.21.8.dist-info/top_level.txt,sha256=UYh0E2YiAlK01-DAkx0eikRaH-TIk0n9jijQK2joJBs,23
|
|
9
|
-
dagster_duckdb_pyspark-0.21.8.dist-info/RECORD,,
|
{dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
{dagster_duckdb_pyspark-0.21.8.dist-info → dagster_duckdb_pyspark-0.22.13.dist-info}/top_level.txt
RENAMED
|
File without changes
|