PyPI - dagster-duckdb-pyspark - Versions diffs - 0.20.15__py3-none-any.whl → 0.25.9__py3-none-any.whl - Mend

dagster-duckdb-pyspark 0.20.15py3-none-any.whl → 0.25.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dagster-duckdb-pyspark might be problematic. Click here for more details.

Files changed (9) hide show

dagster_duckdb_pyspark/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from dagster._core.libraries import DagsterLibraryRegistry
-from .duckdb_pyspark_type_handler import (
+from dagster_duckdb_pyspark.duckdb_pyspark_type_handler import (
     DuckDBPySparkIOManager as DuckDBPySparkIOManager,
     DuckDBPySparkTypeHandler as DuckDBPySparkTypeHandler,
     duckdb_pyspark_io_manager as duckdb_pyspark_io_manager,
 )
-from .version import __version__
+from dagster_duckdb_pyspark.version import __version__
 DagsterLibraryRegistry.register("dagster-duckdb-pyspark", __version__)

dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py CHANGED Viewed

@@ -1,15 +1,12 @@
-from typing import Optional, Sequence, Type
+from collections.abc import Sequence
+from typing import Optional
 import pyarrow as pa
 import pyspark
 import pyspark.sql
 from dagster import InputContext, MetadataValue, OutputContext, TableColumn, TableSchema
 from dagster._core.storage.db_io_manager import DbTypeHandler, TableSlice
-from dagster_duckdb.io_manager import (
-    DuckDbClient,
-    DuckDBIOManager,
-    build_duckdb_io_manager,
-)
+from dagster_duckdb.io_manager import DuckDbClient, DuckDBIOManager, build_duckdb_io_manager
 from pyspark.sql import SparkSession
 from pyspark.sql.types import StructType
@@ -120,17 +117,40 @@ Examples:
         def my_table() -> pyspark.sql.DataFrame:  # the name of the asset will be the table name
             ...
-        @repository
-        def my_repo():
-            return with_resources(
-                [my_table],
-                {"io_manager": duckdb_pyspark_io_manager.configured({"database": "my_db.duckdb"})}
+        defs = Definitions(
+            assets=[my_table],
+            resources={"io_manager": duckdb_pyspark_io_manager.configured({"database": "my_db.duckdb"})}
+        )
+    You can set a default schema to store the assets using the ``schema`` configuration value of the DuckDB I/O
+    Manager. This schema will be used if no other schema is specified directly on an asset or op.
+    .. code-block:: python
+        defs = Definitions(
+            assets=[my_table],
+            resources={"io_manager": duckdb_pyspark_io_manager.configured({"database": "my_db.duckdb", "schema": "my_schema"})}
+        )
+    On individual assets, you an also specify the schema where they should be stored using metadata or
+    by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
+    take precedence.
+    .. code-block:: python
+            @asset(
+                key_prefix=["my_schema"]  # will be used as the schema in duckdb
+            )
+            def my_table() -> pyspark.sql.DataFrame:
+                ...
+            @asset(
+                metadata={"schema": "my_schema"}  # will be used as the schema in duckdb
             )
+            def my_other_table() -> pyspark.sql.DataFrame:
+                ...
-    If you do not provide a schema, Dagster will determine a schema based on the assets and ops using
-    the I/O Manager. For assets, the schema will be determined from the asset key.
-    For ops, the schema can be specified by including a "schema" entry in output metadata. If "schema" is not provided
-    via config or on the asset/op, "public" will be used for the schema.
+    For ops, the schema can be specified by including a "schema" entry in output metadata.
     .. code-block:: python
@@ -138,9 +158,10 @@ Examples:
             out={"my_table": Out(metadata={"schema": "my_schema"})}
         )
         def make_my_table() -> pyspark.sql.DataFrame:
-            # the returned value will be stored at my_schema.my_table
             ...
+    If none of these is provided, the schema will default to "public".
     To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
     In or AssetIn.
@@ -180,10 +201,35 @@ class DuckDBPySparkIOManager(DuckDBIOManager):
                 resources={"io_manager": DuckDBPySparkIOManager(database="my_db.duckdb")}
             )
-        If you do not provide a schema, Dagster will determine a schema based on the assets and ops using
-        the I/O Manager. For assets, the schema will be determined from the asset key, as in the above example.
-        For ops, the schema can be specified by including a "schema" entry in output metadata. If "schema" is not provided
-        via config or on the asset/op, "public" will be used for the schema.
+        You can set a default schema to store the assets using the ``schema`` configuration value of the DuckDB I/O
+        Manager. This schema will be used if no other schema is specified directly on an asset or op.
+        .. code-block:: python
+            defs = Definitions(
+                assets=[my_table],
+                resources={"io_manager": DuckDBPySparkIOManager(database="my_db.duckdb", schema="my_schema")}
+            )
+        On individual assets, you an also specify the schema where they should be stored using metadata or
+        by adding a ``key_prefix`` to the asset key. If both ``key_prefix`` and metadata are defined, the metadata will
+        take precedence.
+        .. code-block:: python
+                @asset(
+                    key_prefix=["my_schema"]  # will be used as the schema in duckdb
+                )
+                def my_table() -> pyspark.sql.DataFrame:
+                    ...
+                @asset(
+                    metadata={"schema": "my_schema"}  # will be used as the schema in duckdb
+                )
+                def my_other_table() -> pyspark.sql.DataFrame:
+                    ...
+        For ops, the schema can be specified by including a "schema" entry in output metadata.
         .. code-block:: python
@@ -191,9 +237,10 @@ class DuckDBPySparkIOManager(DuckDBIOManager):
                 out={"my_table": Out(metadata={"schema": "my_schema"})}
             )
             def make_my_table() -> pyspark.sql.DataFrame:
-                # the returned value will be stored at my_schema.my_table
                 ...
+        If none of these is provided, the schema will default to "public".
         To only use specific columns of a table as input to a downstream op or asset, add the metadata "columns" to the
         In or AssetIn.
@@ -217,5 +264,5 @@ class DuckDBPySparkIOManager(DuckDBIOManager):
         return [DuckDBPySparkTypeHandler()]
     @staticmethod
-    def default_load_type() -> Optional[Type]:
+    def default_load_type() -> Optional[type]:
         return pyspark.sql.DataFrame

dagster_duckdb_pyspark/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.20.15"
1	+ __version__ = "0.25.9"

{dagster_duckdb_pyspark-0.20.15.dist-info → dagster_duckdb_pyspark-0.25.9.dist-info}/METADATA RENAMED Viewed

@@ -1,20 +1,20 @@
 Metadata-Version: 2.1
 Name: dagster-duckdb-pyspark
-Version: 0.20.15
+Version: 0.25.9
 Summary: Package for storing PySpark DataFrames in DuckDB.
 Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-duckb-pyspark
 Author: Dagster Labs
 Author-email: hello@dagsterlabs.com
 License: Apache-2.0
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9,<3.13
 License-File: LICENSE
-Requires-Dist: dagster (==1.4.15)
-Requires-Dist: dagster-duckdb (==0.20.15)
-Requires-Dist: pyspark (>=3)
-Requires-Dist: pandas (<2.1)
+Requires-Dist: dagster ==1.9.9
+Requires-Dist: dagster-duckdb ==0.25.9
+Requires-Dist: pyspark >=3
+Requires-Dist: pandas
 Requires-Dist: pyarrow

dagster_duckdb_pyspark-0.25.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+dagster_duckdb_pyspark/__init__.py,sha256=nNQtyXTaozhmDECy2dlb2OvT-6zz5A6l9By91wxG6y0,426
+dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py,sha256=OJgWa3CMlE_LpSvqTGFzEdg_0zQnpQ_0ctrBYRNlkAU,9592
+dagster_duckdb_pyspark/py.typed,sha256=mDShSrm8qg9qjacQc2F-rI8ATllqP6EdgHuEYxuCXZ0,7
+dagster_duckdb_pyspark/version.py,sha256=NsKiCCQq5j7wW1paL-Bw27h63w_P0r0bIHvsX9TsjGY,23
+dagster_duckdb_pyspark-0.25.9.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
+dagster_duckdb_pyspark-0.25.9.dist-info/METADATA,sha256=MytvcUUtHjknO6PoDWecMuDt60LExUrOMFqxV3e_Tzg,716
+dagster_duckdb_pyspark-0.25.9.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+dagster_duckdb_pyspark-0.25.9.dist-info/top_level.txt,sha256=UYh0E2YiAlK01-DAkx0eikRaH-TIk0n9jijQK2joJBs,23
+dagster_duckdb_pyspark-0.25.9.dist-info/RECORD,,

{dagster_duckdb_pyspark-0.20.15.dist-info → dagster_duckdb_pyspark-0.25.9.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.33.6)
+Generator: bdist_wheel (0.41.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

dagster_duckdb_pyspark-0.20.15.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-dagster_duckdb_pyspark/__init__.py,sha256=KjwD42HKQJslK2WPFg2F7mvHe1hPyrp02xSWM0Az39Y,382
-dagster_duckdb_pyspark/duckdb_pyspark_type_handler.py,sha256=t9lqCpo-ibaThEFzxjqownu_yF_tFpVvQO6_ITgPLlY,7980
-dagster_duckdb_pyspark/py.typed,sha256=mDShSrm8qg9qjacQc2F-rI8ATllqP6EdgHuEYxuCXZ0,7
-dagster_duckdb_pyspark/version.py,sha256=qx2qBaxijq_GGtI51BGXX5lmkBS7HlcGbPCmaum1Gf8,24
-dagster_duckdb_pyspark-0.20.15.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
-dagster_duckdb_pyspark-0.20.15.dist-info/METADATA,sha256=-7wm5PZNbYKRTK9emLVdKn8zFWFFY6CzT-1ABRD6hZ0,753
-dagster_duckdb_pyspark-0.20.15.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
-dagster_duckdb_pyspark-0.20.15.dist-info/top_level.txt,sha256=UYh0E2YiAlK01-DAkx0eikRaH-TIk0n9jijQK2joJBs,23
-dagster_duckdb_pyspark-0.20.15.dist-info/RECORD,,

{dagster_duckdb_pyspark-0.20.15.dist-info → dagster_duckdb_pyspark-0.25.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{dagster_duckdb_pyspark-0.20.15.dist-info → dagster_duckdb_pyspark-0.25.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

dagster-duckdb-pyspark 0.20.15__py3-none-any.whl → 0.25.9__py3-none-any.whl

Potentially problematic release.

dagster-duckdb-pyspark 0.20.15py3-none-any.whl → 0.25.9py3-none-any.whl