cryoflow-plugin-collections 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryoflow_plugin_collections-0.2.0/.gitignore +227 -0
- cryoflow_plugin_collections-0.2.0/PKG-INFO +24 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/__init__.py +41 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/input/__init__.py +1 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/input/csv_scan.py +52 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/input/ipc_scan.py +52 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/input/parquet_scan.py +52 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/__init__.py +21 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/core.py +32 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/polars.py +26 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/returns/__init__.py +7 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/returns/maybe.py +13 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/libs/returns/result.py +15 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/output/__init__.py +5 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/output/parquet_writer.py +75 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/transform/__init__.py +5 -0
- cryoflow_plugin_collections-0.2.0/cryoflow_plugin_collections/transform/multiplier.py +85 -0
- cryoflow_plugin_collections-0.2.0/pyproject.toml +39 -0
- cryoflow_plugin_collections-0.2.0/tests/input/test_csv_scan_plugin.py +138 -0
- cryoflow_plugin_collections-0.2.0/tests/input/test_ipc_scan_plugin.py +138 -0
- cryoflow_plugin_collections-0.2.0/tests/input/test_parquet_scan_plugin.py +138 -0
- cryoflow_plugin_collections-0.2.0/tests/libs_imports/core/test_core_reexport.py +30 -0
- cryoflow_plugin_collections-0.2.0/tests/libs_imports/polars/test_polars_reexport.py +91 -0
- cryoflow_plugin_collections-0.2.0/tests/libs_imports/returns/test_returns_reexport.py +116 -0
- cryoflow_plugin_collections-0.2.0/tests/output/test_parquet_writer_plugin.py +171 -0
- cryoflow_plugin_collections-0.2.0/tests/transform/test_column_multiplier_plugin.py +163 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# Python.gitignore
|
|
2
|
+
# @see: https://github.com/github/gitignore/blob/main/Python.gitignore
|
|
3
|
+
# Byte-compiled / optimized / DLL files
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[codz]
|
|
6
|
+
*$py.class
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
.Python
|
|
13
|
+
build/
|
|
14
|
+
develop-eggs/
|
|
15
|
+
dist/
|
|
16
|
+
downloads/
|
|
17
|
+
eggs/
|
|
18
|
+
.eggs/
|
|
19
|
+
lib/
|
|
20
|
+
lib64/
|
|
21
|
+
parts/
|
|
22
|
+
sdist/
|
|
23
|
+
var/
|
|
24
|
+
wheels/
|
|
25
|
+
share/python-wheels/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
.installed.cfg
|
|
28
|
+
*.egg
|
|
29
|
+
MANIFEST
|
|
30
|
+
|
|
31
|
+
# PyInstaller
|
|
32
|
+
# Usually these files are written by a python script from a template
|
|
33
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
34
|
+
*.manifest
|
|
35
|
+
*.spec
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
|
|
41
|
+
# Unit test / coverage reports
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
.nox/
|
|
45
|
+
.coverage
|
|
46
|
+
.coverage.*
|
|
47
|
+
.cache
|
|
48
|
+
nosetests.xml
|
|
49
|
+
coverage.xml
|
|
50
|
+
*.cover
|
|
51
|
+
*.py.cover
|
|
52
|
+
.hypothesis/
|
|
53
|
+
.pytest_cache/
|
|
54
|
+
cover/
|
|
55
|
+
|
|
56
|
+
# Translations
|
|
57
|
+
*.mo
|
|
58
|
+
*.pot
|
|
59
|
+
|
|
60
|
+
# Django stuff:
|
|
61
|
+
*.log
|
|
62
|
+
local_settings.py
|
|
63
|
+
db.sqlite3
|
|
64
|
+
db.sqlite3-journal
|
|
65
|
+
|
|
66
|
+
# Flask stuff:
|
|
67
|
+
instance/
|
|
68
|
+
.webassets-cache
|
|
69
|
+
|
|
70
|
+
# Scrapy stuff:
|
|
71
|
+
.scrapy
|
|
72
|
+
|
|
73
|
+
# Sphinx documentation
|
|
74
|
+
docs/_build/
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
.pybuilder/
|
|
78
|
+
target/
|
|
79
|
+
|
|
80
|
+
# Jupyter Notebook
|
|
81
|
+
.ipynb_checkpoints
|
|
82
|
+
|
|
83
|
+
# IPython
|
|
84
|
+
profile_default/
|
|
85
|
+
ipython_config.py
|
|
86
|
+
|
|
87
|
+
# pyenv
|
|
88
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
89
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
90
|
+
# .python-version
|
|
91
|
+
|
|
92
|
+
# pipenv
|
|
93
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
94
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
95
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
96
|
+
# install all needed dependencies.
|
|
97
|
+
# Pipfile.lock
|
|
98
|
+
|
|
99
|
+
# UV
|
|
100
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
101
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
102
|
+
# commonly ignored for libraries.
|
|
103
|
+
# uv.lock
|
|
104
|
+
|
|
105
|
+
# poetry
|
|
106
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
107
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
108
|
+
# commonly ignored for libraries.
|
|
109
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
110
|
+
# poetry.lock
|
|
111
|
+
# poetry.toml
|
|
112
|
+
|
|
113
|
+
# pdm
|
|
114
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
115
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
116
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
117
|
+
# pdm.lock
|
|
118
|
+
# pdm.toml
|
|
119
|
+
.pdm-python
|
|
120
|
+
.pdm-build/
|
|
121
|
+
|
|
122
|
+
# pixi
|
|
123
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
124
|
+
# pixi.lock
|
|
125
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
126
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
127
|
+
.pixi
|
|
128
|
+
|
|
129
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
130
|
+
__pypackages__/
|
|
131
|
+
|
|
132
|
+
# Celery stuff
|
|
133
|
+
celerybeat-schedule
|
|
134
|
+
celerybeat.pid
|
|
135
|
+
|
|
136
|
+
# Redis
|
|
137
|
+
*.rdb
|
|
138
|
+
*.aof
|
|
139
|
+
*.pid
|
|
140
|
+
|
|
141
|
+
# RabbitMQ
|
|
142
|
+
mnesia/
|
|
143
|
+
rabbitmq/
|
|
144
|
+
rabbitmq-data/
|
|
145
|
+
|
|
146
|
+
# ActiveMQ
|
|
147
|
+
activemq-data/
|
|
148
|
+
|
|
149
|
+
# SageMath parsed files
|
|
150
|
+
*.sage.py
|
|
151
|
+
|
|
152
|
+
# Environments
|
|
153
|
+
.env
|
|
154
|
+
.envrc
|
|
155
|
+
.venv
|
|
156
|
+
env/
|
|
157
|
+
venv/
|
|
158
|
+
ENV/
|
|
159
|
+
env.bak/
|
|
160
|
+
venv.bak/
|
|
161
|
+
|
|
162
|
+
# Spyder project settings
|
|
163
|
+
.spyderproject
|
|
164
|
+
.spyproject
|
|
165
|
+
|
|
166
|
+
# Rope project settings
|
|
167
|
+
.ropeproject
|
|
168
|
+
|
|
169
|
+
# mkdocs documentation
|
|
170
|
+
/site
|
|
171
|
+
|
|
172
|
+
# mypy
|
|
173
|
+
.mypy_cache/
|
|
174
|
+
.dmypy.json
|
|
175
|
+
dmypy.json
|
|
176
|
+
|
|
177
|
+
# Pyre type checker
|
|
178
|
+
.pyre/
|
|
179
|
+
|
|
180
|
+
# pytype static type analyzer
|
|
181
|
+
.pytype/
|
|
182
|
+
|
|
183
|
+
# Cython debug symbols
|
|
184
|
+
cython_debug/
|
|
185
|
+
|
|
186
|
+
# PyCharm
|
|
187
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
188
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
189
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
190
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
191
|
+
# .idea/
|
|
192
|
+
|
|
193
|
+
# Abstra
|
|
194
|
+
# Abstra is an AI-powered process automation framework.
|
|
195
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
196
|
+
# Learn more at https://abstra.io/docs
|
|
197
|
+
.abstra/
|
|
198
|
+
|
|
199
|
+
# Visual Studio Code
|
|
200
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
201
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
202
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
203
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
204
|
+
# .vscode/
|
|
205
|
+
|
|
206
|
+
# Ruff stuff:
|
|
207
|
+
.ruff_cache/
|
|
208
|
+
|
|
209
|
+
# PyPI configuration file
|
|
210
|
+
.pypirc
|
|
211
|
+
|
|
212
|
+
# Marimo
|
|
213
|
+
marimo/_static/
|
|
214
|
+
marimo/_lsp/
|
|
215
|
+
__marimo__/
|
|
216
|
+
|
|
217
|
+
# Streamlit
|
|
218
|
+
.streamlit/secrets.toml
|
|
219
|
+
|
|
220
|
+
# Nix.gitignore
|
|
221
|
+
# @see: https://github.com/github/gitignore/blob/main/Nix.gitignore
|
|
222
|
+
# Ignore build outputs from performing a nix-build or `nix build` command
|
|
223
|
+
result
|
|
224
|
+
result-*
|
|
225
|
+
|
|
226
|
+
# Ignore automatically generated direnv output
|
|
227
|
+
.direnv
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cryoflow-plugin-collections
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Sample plugin for cryoflow
|
|
5
|
+
Project-URL: Homepage, https://github.com/yasunori0418/cryoflow
|
|
6
|
+
Project-URL: Repository, https://github.com/yasunori0418/cryoflow
|
|
7
|
+
Project-URL: Issues, https://github.com/yasunori0418/cryoflow/issues
|
|
8
|
+
Author-email: yasunori0418 <yasunori.kirin0418@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: cryoflow-core
|
|
23
|
+
Requires-Dist: polars>=1.37.1
|
|
24
|
+
Requires-Dist: returns>=0.23.0
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""cryoflow plugin collections.
|
|
2
|
+
|
|
3
|
+
This package provides:
|
|
4
|
+
1. Built-in sample plugins for cryoflow
|
|
5
|
+
- ColumnMultiplierPlugin: Multiply numeric columns by a factor
|
|
6
|
+
- ParquetWriterPlugin: Write data to Parquet files
|
|
7
|
+
|
|
8
|
+
2. Library re-exports for external plugin development (libs subpackage)
|
|
9
|
+
- Reduces additional dependencies for external plugin developers
|
|
10
|
+
- Ensures version compatibility with cryoflow-core
|
|
11
|
+
|
|
12
|
+
For External Plugin Developers:
|
|
13
|
+
# Recommended: Import from libs to reduce dependencies
|
|
14
|
+
from cryoflow_plugin_collections.libs.polars import pl
|
|
15
|
+
from cryoflow_plugin_collections.libs.returns import Result, Success, Failure
|
|
16
|
+
from cryoflow_plugin_collections.libs.core import TransformPlugin, FrameData
|
|
17
|
+
|
|
18
|
+
class MyCustomPlugin(TransformPlugin):
|
|
19
|
+
def execute(self, df: FrameData) -> Result[FrameData, Exception]:
|
|
20
|
+
# Your implementation
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
For Built-in Sample Plugins:
|
|
24
|
+
# Sample plugins use direct imports (internal implementation detail)
|
|
25
|
+
import polars as pl
|
|
26
|
+
from returns.result import Result, Success, Failure
|
|
27
|
+
from cryoflow_core.plugin import TransformPlugin, FrameData
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
31
|
+
|
|
32
|
+
from cryoflow_plugin_collections.output import ParquetWriterPlugin
|
|
33
|
+
from cryoflow_plugin_collections.transform import ColumnMultiplierPlugin
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
__version__ = version('cryoflow-plugin-collections')
|
|
37
|
+
except PackageNotFoundError:
|
|
38
|
+
# Fallback for development environment
|
|
39
|
+
__version__ = 'unknown'
|
|
40
|
+
|
|
41
|
+
__all__ = ['ColumnMultiplierPlugin', 'ParquetWriterPlugin']
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Input plugins for cryoflow."""
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""CSV input plugin for cryoflow."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from returns.result import Failure, Result, Success
|
|
5
|
+
|
|
6
|
+
from cryoflow_core.plugin import FrameData, InputPlugin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CsvScanPlugin(InputPlugin):
|
|
10
|
+
"""Load data from a CSV file using lazy evaluation.
|
|
11
|
+
|
|
12
|
+
Options:
|
|
13
|
+
input_path (str | Path): Path to the input CSV file.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
"""Return the plugin identifier name."""
|
|
18
|
+
return 'csv_scan'
|
|
19
|
+
|
|
20
|
+
def execute(self) -> Result[FrameData, Exception]:
|
|
21
|
+
"""Load data from a CSV file.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Result containing LazyFrame on success or Exception on failure.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
input_path_opt = self.options.get('input_path')
|
|
28
|
+
if input_path_opt is None:
|
|
29
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
30
|
+
input_path = self.resolve_path(input_path_opt)
|
|
31
|
+
if not input_path.exists():
|
|
32
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
33
|
+
return Success(pl.scan_csv(input_path))
|
|
34
|
+
except Exception as e:
|
|
35
|
+
return Failure(e)
|
|
36
|
+
|
|
37
|
+
def dry_run(self) -> Result[dict[str, pl.DataType], Exception]:
|
|
38
|
+
"""Return the schema of the CSV file without loading data.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Result containing schema dict on success or Exception on failure.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
input_path_opt = self.options.get('input_path')
|
|
45
|
+
if input_path_opt is None:
|
|
46
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
47
|
+
input_path = self.resolve_path(input_path_opt)
|
|
48
|
+
if not input_path.exists():
|
|
49
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
50
|
+
return Success(dict(pl.scan_csv(input_path).collect_schema()))
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return Failure(e)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""IPC (Arrow) input plugin for cryoflow."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from returns.result import Failure, Result, Success
|
|
5
|
+
|
|
6
|
+
from cryoflow_core.plugin import FrameData, InputPlugin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class IpcScanPlugin(InputPlugin):
|
|
10
|
+
"""Load data from an IPC (Arrow) file using lazy evaluation.
|
|
11
|
+
|
|
12
|
+
Options:
|
|
13
|
+
input_path (str | Path): Path to the input IPC file.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
"""Return the plugin identifier name."""
|
|
18
|
+
return 'ipc_scan'
|
|
19
|
+
|
|
20
|
+
def execute(self) -> Result[FrameData, Exception]:
|
|
21
|
+
"""Load data from an IPC file.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Result containing LazyFrame on success or Exception on failure.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
input_path_opt = self.options.get('input_path')
|
|
28
|
+
if input_path_opt is None:
|
|
29
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
30
|
+
input_path = self.resolve_path(input_path_opt)
|
|
31
|
+
if not input_path.exists():
|
|
32
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
33
|
+
return Success(pl.scan_ipc(input_path))
|
|
34
|
+
except Exception as e:
|
|
35
|
+
return Failure(e)
|
|
36
|
+
|
|
37
|
+
def dry_run(self) -> Result[dict[str, pl.DataType], Exception]:
|
|
38
|
+
"""Return the schema of the IPC file without loading data.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Result containing schema dict on success or Exception on failure.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
input_path_opt = self.options.get('input_path')
|
|
45
|
+
if input_path_opt is None:
|
|
46
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
47
|
+
input_path = self.resolve_path(input_path_opt)
|
|
48
|
+
if not input_path.exists():
|
|
49
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
50
|
+
return Success(dict(pl.scan_ipc(input_path).collect_schema()))
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return Failure(e)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Parquet input plugin for cryoflow."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from returns.result import Failure, Result, Success
|
|
5
|
+
|
|
6
|
+
from cryoflow_core.plugin import FrameData, InputPlugin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ParquetScanPlugin(InputPlugin):
|
|
10
|
+
"""Load data from a Parquet file using lazy evaluation.
|
|
11
|
+
|
|
12
|
+
Options:
|
|
13
|
+
input_path (str | Path): Path to the input Parquet file.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
"""Return the plugin identifier name."""
|
|
18
|
+
return 'parquet_scan'
|
|
19
|
+
|
|
20
|
+
def execute(self) -> Result[FrameData, Exception]:
|
|
21
|
+
"""Load data from a Parquet file.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Result containing LazyFrame on success or Exception on failure.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
input_path_opt = self.options.get('input_path')
|
|
28
|
+
if input_path_opt is None:
|
|
29
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
30
|
+
input_path = self.resolve_path(input_path_opt)
|
|
31
|
+
if not input_path.exists():
|
|
32
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
33
|
+
return Success(pl.scan_parquet(input_path))
|
|
34
|
+
except Exception as e:
|
|
35
|
+
return Failure(e)
|
|
36
|
+
|
|
37
|
+
def dry_run(self) -> Result[dict[str, pl.DataType], Exception]:
|
|
38
|
+
"""Return the schema of the Parquet file without loading data.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Result containing schema dict on success or Exception on failure.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
input_path_opt = self.options.get('input_path')
|
|
45
|
+
if input_path_opt is None:
|
|
46
|
+
return Failure(ValueError("Option 'input_path' is required"))
|
|
47
|
+
input_path = self.resolve_path(input_path_opt)
|
|
48
|
+
if not input_path.exists():
|
|
49
|
+
return Failure(FileNotFoundError(f'Input file not found: {input_path}'))
|
|
50
|
+
return Success(dict(pl.scan_parquet(input_path).collect_schema()))
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return Failure(e)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Library re-exports for external cryoflow plugin development.
|
|
2
|
+
|
|
3
|
+
This subpackage provides convenient re-exports of commonly used libraries
|
|
4
|
+
for external plugin developers. By importing from this package, you can
|
|
5
|
+
reduce additional dependencies and ensure version compatibility.
|
|
6
|
+
|
|
7
|
+
Available modules:
|
|
8
|
+
- polars: Data processing with Polars
|
|
9
|
+
- returns: Error handling with Result monad
|
|
10
|
+
- core: cryoflow_core base classes and types
|
|
11
|
+
|
|
12
|
+
Usage (for external plugin developers):
|
|
13
|
+
from cryoflow_plugin_collections.libs.polars import pl
|
|
14
|
+
from cryoflow_plugin_collections.libs.returns.result import Result, Success, Failure
|
|
15
|
+
from cryoflow_plugin_collections.libs.core import TransformPlugin, FrameData
|
|
16
|
+
|
|
17
|
+
Note: Built-in sample plugins (transform/multiplier, output/parquet_writer)
|
|
18
|
+
use direct imports and do not rely on this re-export mechanism.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Intentionally minimal - import specific modules as needed
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Cryoflow core types re-export for plugin development.
|
|
2
|
+
|
|
3
|
+
Provides base classes and type definitions from cryoflow_core that are
|
|
4
|
+
commonly used when developing plugins.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from cryoflow_plugin_collections.libs.core import (
|
|
8
|
+
FrameData,
|
|
9
|
+
InputPlugin,
|
|
10
|
+
TransformPlugin,
|
|
11
|
+
OutputPlugin,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
class MyPlugin(TransformPlugin):
|
|
15
|
+
def execute(self, df: FrameData) -> Result[FrameData, Exception]:
|
|
16
|
+
# Your implementation
|
|
17
|
+
pass
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from cryoflow_core.plugin import (
|
|
21
|
+
FrameData as FrameData,
|
|
22
|
+
InputPlugin as InputPlugin,
|
|
23
|
+
OutputPlugin as OutputPlugin,
|
|
24
|
+
TransformPlugin as TransformPlugin,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
'FrameData', # Type alias: LazyFrame | DataFrame
|
|
29
|
+
'InputPlugin', # Base class for input plugins
|
|
30
|
+
'TransformPlugin', # Base class for transform plugins
|
|
31
|
+
'OutputPlugin', # Base class for output plugins
|
|
32
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Polars re-export for plugin development.
|
|
2
|
+
|
|
3
|
+
Provides complete polars API re-export for external plugin developers.
|
|
4
|
+
This module transparently re-exports all 228+ public APIs from polars,
|
|
5
|
+
ensuring version compatibility and reducing dependency management overhead.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Import as module (enables pl.col(), pl.DataFrame(), etc.)
|
|
9
|
+
from cryoflow_plugin_collections.libs import polars as pl
|
|
10
|
+
pl.col("name")
|
|
11
|
+
|
|
12
|
+
# Import the polars module object
|
|
13
|
+
from cryoflow_plugin_collections.libs.polars import pl
|
|
14
|
+
pl.col("name")
|
|
15
|
+
|
|
16
|
+
# Import individual functions/types
|
|
17
|
+
from cryoflow_plugin_collections.libs.polars import col, lit, when, DataFrame, LazyFrame
|
|
18
|
+
|
|
19
|
+
All imports provide full type hints and IDE autocomplete support.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import polars as pl
|
|
23
|
+
from polars import * # noqa: F403, F401 # pyright: ignore[reportWildcardImportFromLibrary]
|
|
24
|
+
|
|
25
|
+
# Build __all__ dynamically to include all polars public APIs plus 'pl'
|
|
26
|
+
__all__ = [name for name in dir(pl) if not name.startswith('_')] # pyright: ignore[reportUnsupportedDunderAll]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Sample output plugin for cryoflow."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
from returns.result import Failure, Result, Success
|
|
7
|
+
|
|
8
|
+
from cryoflow_core.plugin import FrameData, OutputPlugin
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ParquetWriterPlugin(OutputPlugin):
|
|
12
|
+
"""Write data frame to Parquet file.
|
|
13
|
+
|
|
14
|
+
Options:
|
|
15
|
+
output_path (str | Path): Path to the output Parquet file.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def name(self) -> str:
|
|
19
|
+
"""Return the plugin identifier name."""
|
|
20
|
+
return 'parquet_writer'
|
|
21
|
+
|
|
22
|
+
def execute(self, df: FrameData) -> Result[None, Exception]:
|
|
23
|
+
"""Write the data frame to a Parquet file.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
df: Input LazyFrame or DataFrame.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Result containing None on success or Exception on failure.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
output_path_opt = self.options.get('output_path')
|
|
33
|
+
if output_path_opt is None:
|
|
34
|
+
return Failure(ValueError("Option 'output_path' is required"))
|
|
35
|
+
|
|
36
|
+
output_path = self.resolve_path(output_path_opt)
|
|
37
|
+
|
|
38
|
+
# Create parent directory if needed
|
|
39
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
# Write based on frame type
|
|
42
|
+
if isinstance(df, pl.LazyFrame):
|
|
43
|
+
df.sink_parquet(output_path)
|
|
44
|
+
else: # DataFrame
|
|
45
|
+
df.write_parquet(output_path)
|
|
46
|
+
|
|
47
|
+
return Success(None)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return Failure(e)
|
|
50
|
+
|
|
51
|
+
def dry_run(self, schema: dict[str, pl.DataType]) -> Result[dict[str, pl.DataType], Exception]:
|
|
52
|
+
"""Validate that output path is writable.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
schema: Input schema (not modified by output plugin).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Result containing schema unchanged or Exception on failure.
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
output_path_opt = self.options.get('output_path')
|
|
62
|
+
if output_path_opt is None:
|
|
63
|
+
return Failure(ValueError("Option 'output_path' is required"))
|
|
64
|
+
|
|
65
|
+
output_path = self.resolve_path(output_path_opt)
|
|
66
|
+
|
|
67
|
+
# Check if parent directory can be created
|
|
68
|
+
try:
|
|
69
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
return Failure(ValueError(f'Cannot create parent directory for {output_path}: {e}'))
|
|
72
|
+
|
|
73
|
+
return Success(schema)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
return Failure(e)
|