dagster-pyspark 0.20.17__py3-none-any.whl → 0.27.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_pyspark/__init__.py +12 -7
- dagster_pyspark/py.typed +1 -0
- dagster_pyspark/resources.py +7 -7
- dagster_pyspark/utils.py +21 -3
- dagster_pyspark/version.py +1 -1
- {dagster_pyspark-0.20.17.dist-info → dagster_pyspark-0.27.16.dist-info}/METADATA +17 -7
- dagster_pyspark-0.27.16.dist-info/RECORD +11 -0
- {dagster_pyspark-0.20.17.dist-info → dagster_pyspark-0.27.16.dist-info}/WHEEL +1 -1
- {dagster_pyspark-0.20.17.dist-info → dagster_pyspark-0.27.16.dist-info/licenses}/LICENSE +1 -1
- dagster_pyspark-0.20.17.dist-info/RECORD +0 -10
- {dagster_pyspark-0.20.17.dist-info → dagster_pyspark-0.27.16.dist-info}/top_level.txt +0 -0
dagster_pyspark/__init__.py
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
2
|
|
|
3
|
-
from .resources import
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from dagster_pyspark.resources import (
|
|
4
|
+
LazyPySparkResource,
|
|
5
|
+
PySparkResource,
|
|
6
|
+
lazy_pyspark_resource,
|
|
7
|
+
pyspark_resource,
|
|
8
|
+
)
|
|
9
|
+
from dagster_pyspark.types import DataFrame
|
|
10
|
+
from dagster_pyspark.version import __version__
|
|
6
11
|
|
|
7
12
|
DagsterLibraryRegistry.register("dagster-pyspark", __version__)
|
|
8
13
|
|
|
9
14
|
__all__ = [
|
|
10
15
|
"DataFrame",
|
|
11
|
-
"pyspark_resource",
|
|
12
|
-
"lazy_pyspark_resource",
|
|
13
|
-
"PySparkResource",
|
|
14
16
|
"LazyPySparkResource",
|
|
17
|
+
"PySparkResource",
|
|
18
|
+
"lazy_pyspark_resource",
|
|
19
|
+
"pyspark_resource",
|
|
15
20
|
]
|
dagster_pyspark/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
partial
|
dagster_pyspark/resources.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import dagster._check as check
|
|
4
4
|
from dagster import ConfigurableResource, resource
|
|
@@ -15,9 +15,9 @@ def spark_session_from_config(spark_conf=None):
|
|
|
15
15
|
builder = SparkSession.builder
|
|
16
16
|
flat = flatten_dict(spark_conf)
|
|
17
17
|
for key, value in flat:
|
|
18
|
-
builder = builder.config(key, value)
|
|
18
|
+
builder = builder.config(key, value) # pyright: ignore[reportAttributeAccessIssue]
|
|
19
19
|
|
|
20
|
-
return builder.getOrCreate()
|
|
20
|
+
return builder.getOrCreate() # pyright: ignore[reportAttributeAccessIssue]
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class PySparkResource(ConfigurableResource):
|
|
@@ -45,7 +45,7 @@ class PySparkResource(ConfigurableResource):
|
|
|
45
45
|
my_op()
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
|
-
spark_config:
|
|
48
|
+
spark_config: dict[str, Any]
|
|
49
49
|
_spark_session = PrivateAttr(default=None)
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
@@ -118,7 +118,7 @@ class LazyPySparkResource(ConfigurableResource):
|
|
|
118
118
|
my_op()
|
|
119
119
|
"""
|
|
120
120
|
|
|
121
|
-
spark_config:
|
|
121
|
+
spark_config: dict[str, Any]
|
|
122
122
|
_spark_session = PrivateAttr(default=None)
|
|
123
123
|
|
|
124
124
|
@classmethod
|
|
@@ -132,12 +132,12 @@ class LazyPySparkResource(ConfigurableResource):
|
|
|
132
132
|
@property
|
|
133
133
|
def spark_session(self) -> Any:
|
|
134
134
|
self._init_session()
|
|
135
|
-
return self._spark_session
|
|
135
|
+
return check.not_none(self._spark_session)
|
|
136
136
|
|
|
137
137
|
@property
|
|
138
138
|
def spark_context(self) -> Any:
|
|
139
139
|
self._init_session()
|
|
140
|
-
return self._spark_session.sparkContext
|
|
140
|
+
return check.not_none(self._spark_session).sparkContext
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
@dagster_maintained_resource
|
dagster_pyspark/utils.py
CHANGED
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import re
|
|
2
3
|
import zipfile
|
|
3
4
|
|
|
4
5
|
import dagster._check as check
|
|
5
6
|
|
|
7
|
+
DEFAULT_EXCLUDE = [
|
|
8
|
+
r".*pytest.*",
|
|
9
|
+
r".*__pycache__.*",
|
|
10
|
+
r".*pyc$",
|
|
11
|
+
r".*\/venv\/.*",
|
|
12
|
+
r".*\.egg-info$",
|
|
13
|
+
r".*\/logs\/.*",
|
|
14
|
+
]
|
|
6
15
|
|
|
7
|
-
|
|
8
|
-
|
|
16
|
+
|
|
17
|
+
def build_pyspark_zip(zip_file, path, exclude=DEFAULT_EXCLUDE) -> None:
|
|
18
|
+
"""Archives the current path into a file named `zip_file`.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
zip_file (str): The name of the zip file to create.
|
|
22
|
+
path (str): The path to archive.
|
|
23
|
+
exclude (Optional[List[str]]): A list of regular expression patterns to exclude paths from
|
|
24
|
+
the archive. Regular expressions will be matched against the absolute filepath with
|
|
25
|
+
`re.search`.
|
|
26
|
+
"""
|
|
9
27
|
check.str_param(zip_file, "zip_file")
|
|
10
28
|
check.str_param(path, "path")
|
|
11
29
|
|
|
@@ -15,7 +33,7 @@ def build_pyspark_zip(zip_file, path):
|
|
|
15
33
|
abs_fname = os.path.join(root, fname)
|
|
16
34
|
|
|
17
35
|
# Skip various artifacts
|
|
18
|
-
if
|
|
36
|
+
if any([re.search(pattern, abs_fname) for pattern in exclude]):
|
|
19
37
|
continue
|
|
20
38
|
|
|
21
39
|
zf.write(abs_fname, os.path.relpath(os.path.join(root, fname), path))
|
dagster_pyspark/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.27.16"
|
|
@@ -1,18 +1,28 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-pyspark
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.16
|
|
4
4
|
Summary: Package for PySpark Dagster framework components.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/dagster-framework/pyspark
|
|
6
6
|
Author: Dagster Labs
|
|
7
7
|
Author-email: hello@dagsterlabs.com
|
|
8
8
|
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.9
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
13
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
14
|
Classifier: Operating System :: OS Independent
|
|
15
|
+
Requires-Python: >=3.9,<3.14
|
|
14
16
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: dagster
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist: pyspark
|
|
18
|
-
|
|
17
|
+
Requires-Dist: dagster==1.11.16
|
|
18
|
+
Requires-Dist: dagster_spark==0.27.16
|
|
19
|
+
Requires-Dist: pyspark<5,>=3
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: author-email
|
|
22
|
+
Dynamic: classifier
|
|
23
|
+
Dynamic: home-page
|
|
24
|
+
Dynamic: license
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
Dynamic: requires-dist
|
|
27
|
+
Dynamic: requires-python
|
|
28
|
+
Dynamic: summary
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
dagster_pyspark/__init__.py,sha256=peKeESvPFAqt8aeyT4Px_SL-rALE3Yr2cNxG7cdng5Q,490
|
|
2
|
+
dagster_pyspark/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
3
|
+
dagster_pyspark/resources.py,sha256=hXkBHQMgzUyHmZN8u4UIYLwpuI4lXmB1UVg1Z5XCDhs,6073
|
|
4
|
+
dagster_pyspark/types.py,sha256=pSc9mTMkpWQbkzxc3DhstUtZyUx5dFGORaqnGzGqf2o,31132
|
|
5
|
+
dagster_pyspark/utils.py,sha256=3DKp8guR52IlUwfBbN-54aMp6qz1JiM9nH7YTVpdPJo,1204
|
|
6
|
+
dagster_pyspark/version.py,sha256=gNIskoVbgqFeY6KiyOzkhfP7GLwnNVmVeil7tBvkL44,24
|
|
7
|
+
dagster_pyspark-0.27.16.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
|
|
8
|
+
dagster_pyspark-0.27.16.dist-info/METADATA,sha256=F9zdsnU-d0KOYrOiJF5sw0kSvNiC7CX3ZrU2DX_Er_4,943
|
|
9
|
+
dagster_pyspark-0.27.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
dagster_pyspark-0.27.16.dist-info/top_level.txt,sha256=GgRyO5yEM3TjS5GzHHCvW_aqA41ryZah_yAtNSeorUw,16
|
|
11
|
+
dagster_pyspark-0.27.16.dist-info/RECORD,,
|
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
dagster_pyspark/__init__.py,sha256=Jf745Ecqp5duFzOoFvnitmmj14s3TbQqk6gA126BLEE,423
|
|
2
|
-
dagster_pyspark/resources.py,sha256=weaH7Spi5JeE25i0UJGcIyfaH_f2-Rl3mCPF7vBitQM,5953
|
|
3
|
-
dagster_pyspark/types.py,sha256=pSc9mTMkpWQbkzxc3DhstUtZyUx5dFGORaqnGzGqf2o,31132
|
|
4
|
-
dagster_pyspark/utils.py,sha256=RmhaVR9S_QhnPffBIuwlZBSO-0Sb2RcRLemk3VBrxDc,693
|
|
5
|
-
dagster_pyspark/version.py,sha256=eM75ih3rRMeRs2KAFT1C-wtFC_fhUYsPhS2X2Aa8ILk,24
|
|
6
|
-
dagster_pyspark-0.20.17.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
7
|
-
dagster_pyspark-0.20.17.dist-info/METADATA,sha256=58Y-MKz48BUeo4Qx27BwjeR7m9kxZVJ4QbJzpgC4Q6g,687
|
|
8
|
-
dagster_pyspark-0.20.17.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
|
|
9
|
-
dagster_pyspark-0.20.17.dist-info/top_level.txt,sha256=GgRyO5yEM3TjS5GzHHCvW_aqA41ryZah_yAtNSeorUw,16
|
|
10
|
-
dagster_pyspark-0.20.17.dist-info/RECORD,,
|
|
File without changes
|