cornflex 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cornflex-1.0.1/.github/workflows/pr.yml +26 -0
- cornflex-1.0.1/.github/workflows/publish.yml +41 -0
- cornflex-1.0.1/.gitignore +165 -0
- cornflex-1.0.1/LICENSE +21 -0
- cornflex-1.0.1/PKG-INFO +133 -0
- cornflex-1.0.1/README.md +115 -0
- cornflex-1.0.1/conftest.py +0 -0
- cornflex-1.0.1/cornflex/__init__.py +3 -0
- cornflex-1.0.1/cornflex/reader.py +128 -0
- cornflex-1.0.1/pyproject.toml +28 -0
- cornflex-1.0.1/tests/test_reader.py +198 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: PR Build
|
|
2
|
+
on:
|
|
3
|
+
pull_request:
|
|
4
|
+
branches:
|
|
5
|
+
- main
|
|
6
|
+
jobs:
|
|
7
|
+
build-and-test:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
steps:
|
|
10
|
+
- name: Checkout code
|
|
11
|
+
uses: actions/checkout@v3
|
|
12
|
+
- name: Set up Python
|
|
13
|
+
uses: actions/setup-python@v4
|
|
14
|
+
with:
|
|
15
|
+
python-version: '3.x'
|
|
16
|
+
- name: Install dependencies
|
|
17
|
+
run: |
|
|
18
|
+
python -m pip install --upgrade pip
|
|
19
|
+
pip install build pytest
|
|
20
|
+
pip install -e .
|
|
21
|
+
- name: Run tests
|
|
22
|
+
run: |
|
|
23
|
+
pytest
|
|
24
|
+
- name: Build package
|
|
25
|
+
run: |
|
|
26
|
+
python -m build
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- '*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build-and-publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
- name: Checkout code
|
|
14
|
+
uses: actions/checkout@v3
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v4
|
|
18
|
+
with:
|
|
19
|
+
python-version: '3.x'
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install build twine
|
|
25
|
+
pip install -e ".[dev]"
|
|
26
|
+
|
|
27
|
+
- name: Run tests
|
|
28
|
+
run: |
|
|
29
|
+
pip install pytest
|
|
30
|
+
pytest
|
|
31
|
+
|
|
32
|
+
- name: Build package
|
|
33
|
+
run: |
|
|
34
|
+
python -m build
|
|
35
|
+
|
|
36
|
+
- name: Publish package to PyPI
|
|
37
|
+
env:
|
|
38
|
+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
|
39
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
|
40
|
+
run: |
|
|
41
|
+
python -m twine upload dist/*
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# poetry
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
102
|
+
#poetry.lock
|
|
103
|
+
|
|
104
|
+
# pdm
|
|
105
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
106
|
+
#pdm.lock
|
|
107
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
108
|
+
# in version control.
|
|
109
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
110
|
+
.pdm.toml
|
|
111
|
+
.pdm-python
|
|
112
|
+
.pdm-build/
|
|
113
|
+
|
|
114
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
115
|
+
__pypackages__/
|
|
116
|
+
|
|
117
|
+
# Celery stuff
|
|
118
|
+
celerybeat-schedule
|
|
119
|
+
celerybeat.pid
|
|
120
|
+
|
|
121
|
+
# SageMath parsed files
|
|
122
|
+
*.sage.py
|
|
123
|
+
|
|
124
|
+
# Environments
|
|
125
|
+
.env
|
|
126
|
+
.venv
|
|
127
|
+
env/
|
|
128
|
+
venv/
|
|
129
|
+
ENV/
|
|
130
|
+
env.bak/
|
|
131
|
+
venv.bak/
|
|
132
|
+
|
|
133
|
+
# Spyder project settings
|
|
134
|
+
.spyderproject
|
|
135
|
+
.spyproject
|
|
136
|
+
|
|
137
|
+
# Rope project settings
|
|
138
|
+
.ropeproject
|
|
139
|
+
|
|
140
|
+
# mkdocs documentation
|
|
141
|
+
/site
|
|
142
|
+
|
|
143
|
+
# mypy
|
|
144
|
+
.mypy_cache/
|
|
145
|
+
.dmypy.json
|
|
146
|
+
dmypy.json
|
|
147
|
+
|
|
148
|
+
# Pyre type checker
|
|
149
|
+
.pyre/
|
|
150
|
+
|
|
151
|
+
# pytype static type analyzer
|
|
152
|
+
.pytype/
|
|
153
|
+
|
|
154
|
+
# Cython debug symbols
|
|
155
|
+
cython_debug/
|
|
156
|
+
|
|
157
|
+
# PyCharm
|
|
158
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
159
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
160
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
161
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
162
|
+
#.idea/
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
.DS_Store
|
cornflex-1.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Erik Bremstedt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cornflex-1.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cornflex
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: simple sftp reader
|
|
5
|
+
Project-URL: Homepage, https://github.com/ebremstedt/cornflex
|
|
6
|
+
Project-URL: Issues, https://github.com/ebremstedt/cornflex/issues
|
|
7
|
+
Author-email: Erik Bremstedt <erik.bremstedt@gmail.com>
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.11.0
|
|
13
|
+
Requires-Dist: chardet>=5.0.0
|
|
14
|
+
Requires-Dist: croniter>0.3.0
|
|
15
|
+
Requires-Dist: paramiko>=3.0.0
|
|
16
|
+
Requires-Dist: polars>=0.20.0
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# 🌽💪 Cornflex 🌽💪
|
|
20
|
+
|
|
21
|
+
A lightweight SFTP reader built on top of `paramiko` and `polars`. Connects to remote servers and pulls files down as strings, Polars DataFrames, or raw text.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install paramiko polars chardet
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Then drop `cornflex/` into your project.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from cornflex import SFTPReader
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Connect with password
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
reader = SFTPReader(
|
|
45
|
+
hostname="sftp.example.com",
|
|
46
|
+
username="myuser",
|
|
47
|
+
password="mypassword",
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Connect with PEM key
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
reader = SFTPReader(
|
|
55
|
+
hostname="sftp.example.com",
|
|
56
|
+
username="myuser",
|
|
57
|
+
pem_file="/path/to/key.pem",
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Methods
|
|
64
|
+
|
|
65
|
+
### `get_files(remote_path, file_pattern)`
|
|
66
|
+
|
|
67
|
+
Lists files in a remote directory. Supports glob-style patterns.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
reader.get_files(remote_path="/data", file_pattern="*.csv")
|
|
71
|
+
# ["orders_2024.csv", "users_2024.csv"]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### `get_csv_file(file_name, remote_path, column_names)`
|
|
77
|
+
|
|
78
|
+
Fetches a CSV and returns it as a Polars DataFrame.
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
reader.connect()
|
|
82
|
+
df = reader.get_csv_file("orders.csv", remote_path="/data")
|
|
83
|
+
|
|
84
|
+
# Override column names (useful when file has no header)
|
|
85
|
+
df = reader.get_csv_file("orders.csv", remote_path="/data", column_names=["id", "amount", "date"])
|
|
86
|
+
reader.close()
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
### `get_xml_file_to_string(file_name, remote_path)`
|
|
92
|
+
|
|
93
|
+
Fetches an XML file and returns it as a string.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
reader.connect()
|
|
97
|
+
xml = reader.get_xml_file_to_string("feed.xml", remote_path="/exports")
|
|
98
|
+
reader.close()
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
### `file_to_string(file_name, remote_path, encoding)`
|
|
104
|
+
|
|
105
|
+
Fetches any file as a string. Auto-detects encoding via `chardet` if not specified.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
reader.connect()
|
|
109
|
+
content = reader.file_to_string("report.txt", remote_path="/reports")
|
|
110
|
+
|
|
111
|
+
# Force encoding
|
|
112
|
+
content = reader.file_to_string("report.txt", encoding="latin-1")
|
|
113
|
+
reader.close()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Notes
|
|
119
|
+
|
|
120
|
+
- `get_files()` handles connect/close internally.
|
|
121
|
+
- For all other methods, call `connect()` before and `close()` after.
|
|
122
|
+
- Either `password` or `pem_file` must be provided — not both, not neither.
|
|
123
|
+
- `get_csv_file` assumes UTF-8. Use `file_to_string` for other encodings.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Dependencies
|
|
128
|
+
|
|
129
|
+
| Package | Purpose |
|
|
130
|
+
|---|---|
|
|
131
|
+
| `paramiko` | SSH/SFTP connection |
|
|
132
|
+
| `polars` | DataFrame output |
|
|
133
|
+
| `chardet` | Encoding detection |
|
cornflex-1.0.1/README.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# 🌽💪 Cornflex 🌽💪
|
|
2
|
+
|
|
3
|
+
A lightweight SFTP reader built on top of `paramiko` and `polars`. Connects to remote servers and pulls files down as strings, Polars DataFrames, or raw text.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install paramiko polars chardet
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Then drop `cornflex/` into your project.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from cornflex import SFTPReader
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Connect with password
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
reader = SFTPReader(
|
|
27
|
+
hostname="sftp.example.com",
|
|
28
|
+
username="myuser",
|
|
29
|
+
password="mypassword",
|
|
30
|
+
)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Connect with PEM key
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
reader = SFTPReader(
|
|
37
|
+
hostname="sftp.example.com",
|
|
38
|
+
username="myuser",
|
|
39
|
+
pem_file="/path/to/key.pem",
|
|
40
|
+
)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Methods
|
|
46
|
+
|
|
47
|
+
### `get_files(remote_path, file_pattern)`
|
|
48
|
+
|
|
49
|
+
Lists files in a remote directory. Supports glob-style patterns.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
reader.get_files(remote_path="/data", file_pattern="*.csv")
|
|
53
|
+
# ["orders_2024.csv", "users_2024.csv"]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
### `get_csv_file(file_name, remote_path, column_names)`
|
|
59
|
+
|
|
60
|
+
Fetches a CSV and returns it as a Polars DataFrame.
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
reader.connect()
|
|
64
|
+
df = reader.get_csv_file("orders.csv", remote_path="/data")
|
|
65
|
+
|
|
66
|
+
# Override column names (useful when file has no header)
|
|
67
|
+
df = reader.get_csv_file("orders.csv", remote_path="/data", column_names=["id", "amount", "date"])
|
|
68
|
+
reader.close()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
### `get_xml_file_to_string(file_name, remote_path)`
|
|
74
|
+
|
|
75
|
+
Fetches an XML file and returns it as a string.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
reader.connect()
|
|
79
|
+
xml = reader.get_xml_file_to_string("feed.xml", remote_path="/exports")
|
|
80
|
+
reader.close()
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
### `file_to_string(file_name, remote_path, encoding)`
|
|
86
|
+
|
|
87
|
+
Fetches any file as a string. Auto-detects encoding via `chardet` if not specified.
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
reader.connect()
|
|
91
|
+
content = reader.file_to_string("report.txt", remote_path="/reports")
|
|
92
|
+
|
|
93
|
+
# Force encoding
|
|
94
|
+
content = reader.file_to_string("report.txt", encoding="latin-1")
|
|
95
|
+
reader.close()
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Notes
|
|
101
|
+
|
|
102
|
+
- `get_files()` handles connect/close internally.
|
|
103
|
+
- For all other methods, call `connect()` before and `close()` after.
|
|
104
|
+
- Either `password` or `pem_file` must be provided — not both, not neither.
|
|
105
|
+
- `get_csv_file` assumes UTF-8. Use `file_to_string` for other encodings.
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Dependencies
|
|
110
|
+
|
|
111
|
+
| Package | Purpose |
|
|
112
|
+
|---|---|
|
|
113
|
+
| `paramiko` | SSH/SFTP connection |
|
|
114
|
+
| `polars` | DataFrame output |
|
|
115
|
+
| `chardet` | Encoding detection |
|
|
File without changes
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from io import StringIO
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
import chardet
|
|
5
|
+
import fnmatch
|
|
6
|
+
import paramiko
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SFTPReader:
|
|
12
|
+
hostname: str
|
|
13
|
+
username: str
|
|
14
|
+
pem_file: Optional[str] = None
|
|
15
|
+
password: Optional[str] = None
|
|
16
|
+
port: int = 22
|
|
17
|
+
|
|
18
|
+
def __post_init__(self) -> None:
|
|
19
|
+
self._client: Optional[paramiko.SSHClient] = None
|
|
20
|
+
self._sftp: Optional[paramiko.SFTPClient] = None
|
|
21
|
+
|
|
22
|
+
def connect(self) -> None:
|
|
23
|
+
if not self.password and not self.pem_file:
|
|
24
|
+
raise ValueError("Either password or pem_file must be provided")
|
|
25
|
+
|
|
26
|
+
self._client = paramiko.SSHClient()
|
|
27
|
+
self._client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
28
|
+
|
|
29
|
+
connect_kwargs: dict[str, Any] = {
|
|
30
|
+
"hostname": self.hostname,
|
|
31
|
+
"port": self.port,
|
|
32
|
+
"username": self.username,
|
|
33
|
+
"timeout": 30,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if self.pem_file:
|
|
37
|
+
private_key = paramiko.RSAKey.from_private_key_file(self.pem_file)
|
|
38
|
+
connect_kwargs["pkey"] = private_key
|
|
39
|
+
else:
|
|
40
|
+
connect_kwargs["password"] = self.password
|
|
41
|
+
|
|
42
|
+
self._client.connect(**connect_kwargs)
|
|
43
|
+
self._sftp = self._client.open_sftp()
|
|
44
|
+
|
|
45
|
+
def close(self) -> None:
|
|
46
|
+
if self._sftp:
|
|
47
|
+
self._sftp.close()
|
|
48
|
+
if self._client:
|
|
49
|
+
self._client.close()
|
|
50
|
+
|
|
51
|
+
def get_files(
|
|
52
|
+
self,
|
|
53
|
+
remote_path: str = ".",
|
|
54
|
+
file_pattern: str = "*",
|
|
55
|
+
) -> List[str]:
|
|
56
|
+
self.connect()
|
|
57
|
+
try:
|
|
58
|
+
all_files: List[str] = self._sftp.listdir(remote_path)
|
|
59
|
+
return [f for f in all_files if fnmatch.fnmatch(name=f, pat=file_pattern)]
|
|
60
|
+
finally:
|
|
61
|
+
self.close()
|
|
62
|
+
|
|
63
|
+
def get_csv_file(
|
|
64
|
+
self,
|
|
65
|
+
file_name: str,
|
|
66
|
+
remote_path: str = ".",
|
|
67
|
+
column_names: Optional[list[str]] = None,
|
|
68
|
+
) -> Optional[pl.DataFrame]:
|
|
69
|
+
if not self._sftp:
|
|
70
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
remote_file_path = f"{remote_path.rstrip('/')}/{file_name}"
|
|
74
|
+
with self._sftp.file(remote_file_path, "r") as file:
|
|
75
|
+
content = file.read().decode("utf-8")
|
|
76
|
+
|
|
77
|
+
if column_names:
|
|
78
|
+
return pl.read_csv(
|
|
79
|
+
source=StringIO(content),
|
|
80
|
+
has_header=False,
|
|
81
|
+
new_columns=column_names,
|
|
82
|
+
)
|
|
83
|
+
return pl.read_csv(source=StringIO(content))
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"Error getting {file_name}: {e}")
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
def get_xml_file_to_string(
|
|
89
|
+
self,
|
|
90
|
+
file_name: str,
|
|
91
|
+
remote_path: str = ".",
|
|
92
|
+
) -> Optional[str]:
|
|
93
|
+
if not self._sftp:
|
|
94
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
remote_file_path = f"{remote_path.rstrip('/')}/{file_name}"
|
|
98
|
+
with self._sftp.file(remote_file_path, "r") as file:
|
|
99
|
+
content = file.read().decode("utf-8")
|
|
100
|
+
return content
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"Error getting {file_name}: {e}")
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def file_to_string(
|
|
106
|
+
self,
|
|
107
|
+
file_name: str,
|
|
108
|
+
remote_path: str = ".",
|
|
109
|
+
encoding: Optional[str] = None,
|
|
110
|
+
) -> Optional[str]:
|
|
111
|
+
if not self._sftp:
|
|
112
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
remote_file_path = f"{remote_path.rstrip('/')}/{file_name}"
|
|
116
|
+
with self._sftp.file(remote_file_path, "rb") as file:
|
|
117
|
+
raw_bytes = file.read()
|
|
118
|
+
|
|
119
|
+
if encoding is None:
|
|
120
|
+
detected = chardet.detect(raw_bytes)
|
|
121
|
+
encoding = detected["encoding"]
|
|
122
|
+
|
|
123
|
+
content = raw_bytes.decode(encoding)
|
|
124
|
+
return content
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
print(f"Error getting {file_name}: {e}")
|
|
128
|
+
return None
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cornflex"
|
|
7
|
+
version = "1.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Erik Bremstedt", email="erik.bremstedt@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "simple sftp reader"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.11.0"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"croniter>0.3.0",
|
|
21
|
+
"paramiko>=3.0.0",
|
|
22
|
+
"polars>=0.20.0",
|
|
23
|
+
"chardet>=5.0.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/ebremstedt/cornflex"
|
|
28
|
+
Issues = "https://github.com/ebremstedt/cornflex/issues"
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
from unittest.mock import MagicMock, patch, PropertyMock
|
|
3
|
+
import pytest
|
|
4
|
+
import polars as pl
|
|
5
|
+
from polars.testing import assert_frame_equal
|
|
6
|
+
|
|
7
|
+
from cornflex.reader import SFTPReader
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def reader_password() -> SFTPReader:
|
|
12
|
+
return SFTPReader(hostname="host", username="user", password="pass")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def reader_pem() -> SFTPReader:
|
|
17
|
+
return SFTPReader(hostname="host", username="user", pem_file="/path/to/key.pem")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def connected_reader(reader_password: SFTPReader) -> SFTPReader:
|
|
22
|
+
reader_password._sftp = MagicMock()
|
|
23
|
+
return reader_password
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# --- connect ---
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_connect_raises_without_auth() -> None:
|
|
30
|
+
reader = SFTPReader(hostname="host", username="user")
|
|
31
|
+
with pytest.raises(
|
|
32
|
+
ValueError, match="Either password or pem_file must be provided"
|
|
33
|
+
):
|
|
34
|
+
reader.connect()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@patch("cornflex.reader.paramiko.SSHClient")
|
|
38
|
+
def test_connect_with_password(
|
|
39
|
+
mock_ssh: MagicMock, reader_password: SFTPReader
|
|
40
|
+
) -> None:
|
|
41
|
+
reader_password.connect()
|
|
42
|
+
mock_ssh().connect.assert_called_once()
|
|
43
|
+
call_kwargs = mock_ssh().connect.call_args.kwargs
|
|
44
|
+
assert call_kwargs["password"] == "pass"
|
|
45
|
+
assert "pkey" not in call_kwargs
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@patch("cornflex.reader.paramiko.RSAKey.from_private_key_file")
|
|
49
|
+
@patch("cornflex.reader.paramiko.SSHClient")
|
|
50
|
+
def test_connect_with_pem(
|
|
51
|
+
mock_ssh: MagicMock, mock_rsa: MagicMock, reader_pem: SFTPReader
|
|
52
|
+
) -> None:
|
|
53
|
+
reader_pem.connect()
|
|
54
|
+
mock_rsa.assert_called_once_with("/path/to/key.pem")
|
|
55
|
+
call_kwargs = mock_ssh().connect.call_args.kwargs
|
|
56
|
+
assert "pkey" in call_kwargs
|
|
57
|
+
assert "password" not in call_kwargs
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# --- close ---
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_close_calls_sftp_and_client(reader_password: SFTPReader) -> None:
|
|
64
|
+
reader_password._sftp = MagicMock()
|
|
65
|
+
reader_password._client = MagicMock()
|
|
66
|
+
reader_password.close()
|
|
67
|
+
reader_password._sftp.close.assert_called_once()
|
|
68
|
+
reader_password._client.close.assert_called_once()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_close_without_connection_does_not_raise(reader_password: SFTPReader) -> None:
|
|
72
|
+
reader_password.close()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# --- get_files ---
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@patch("cornflex.reader.paramiko.SSHClient")
|
|
79
|
+
def test_get_files_returns_filtered(
|
|
80
|
+
mock_ssh: MagicMock, reader_password: SFTPReader
|
|
81
|
+
) -> None:
|
|
82
|
+
mock_sftp = MagicMock()
|
|
83
|
+
mock_sftp.listdir.return_value = ["orders.csv", "users.csv", "config.json"]
|
|
84
|
+
mock_ssh().open_sftp.return_value = mock_sftp
|
|
85
|
+
|
|
86
|
+
result = reader_password.get_files(remote_path="/data", file_pattern="*.csv")
|
|
87
|
+
assert result == ["orders.csv", "users.csv"]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@patch("cornflex.reader.paramiko.SSHClient")
|
|
91
|
+
def test_get_files_no_match(mock_ssh: MagicMock, reader_password: SFTPReader) -> None:
|
|
92
|
+
mock_sftp = MagicMock()
|
|
93
|
+
mock_sftp.listdir.return_value = ["config.json", "notes.txt"]
|
|
94
|
+
mock_ssh().open_sftp.return_value = mock_sftp
|
|
95
|
+
|
|
96
|
+
result = reader_password.get_files(file_pattern="*.csv")
|
|
97
|
+
assert result == []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# --- get_csv_file ---
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_get_csv_file_raises_when_not_connected(reader_password: SFTPReader) -> None:
|
|
104
|
+
with pytest.raises(RuntimeError, match="Not connected"):
|
|
105
|
+
reader_password.get_csv_file("file.csv")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_get_csv_file_returns_dataframe(connected_reader: SFTPReader) -> None:
|
|
109
|
+
csv_content = b"id,name\n1,Alice\n2,Bob"
|
|
110
|
+
mock_file = MagicMock()
|
|
111
|
+
mock_file.read.return_value = csv_content
|
|
112
|
+
mock_file.__enter__ = lambda s: s
|
|
113
|
+
mock_file.__exit__ = MagicMock(return_value=False)
|
|
114
|
+
connected_reader._sftp.file.return_value = mock_file
|
|
115
|
+
|
|
116
|
+
result = connected_reader.get_csv_file("data.csv", remote_path="/data")
|
|
117
|
+
expected = pl.DataFrame({"id": [1, 2], "name": ["Alice", "Bob"]})
|
|
118
|
+
assert_frame_equal(result, expected)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_get_csv_file_with_column_names(connected_reader: SFTPReader) -> None:
|
|
122
|
+
csv_content = b"1,Alice\n2,Bob"
|
|
123
|
+
mock_file = MagicMock()
|
|
124
|
+
mock_file.read.return_value = csv_content
|
|
125
|
+
mock_file.__enter__ = lambda s: s
|
|
126
|
+
mock_file.__exit__ = MagicMock(return_value=False)
|
|
127
|
+
connected_reader._sftp.file.return_value = mock_file
|
|
128
|
+
|
|
129
|
+
result = connected_reader.get_csv_file("data.csv", column_names=["id", "name"])
|
|
130
|
+
assert result.columns == ["id", "name"]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_get_csv_file_returns_none_on_error(connected_reader: SFTPReader) -> None:
|
|
134
|
+
connected_reader._sftp.file.side_effect = Exception("boom")
|
|
135
|
+
result = connected_reader.get_csv_file("bad.csv")
|
|
136
|
+
assert result is None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# --- get_xml_file_to_string ---
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_get_xml_raises_when_not_connected(reader_password: SFTPReader) -> None:
|
|
143
|
+
with pytest.raises(RuntimeError, match="Not connected"):
|
|
144
|
+
reader_password.get_xml_file_to_string("file.xml")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_get_xml_returns_string(connected_reader: SFTPReader) -> None:
|
|
148
|
+
xml_content = b"<root><item>1</item></root>"
|
|
149
|
+
mock_file = MagicMock()
|
|
150
|
+
mock_file.read.return_value = xml_content
|
|
151
|
+
mock_file.__enter__ = lambda s: s
|
|
152
|
+
mock_file.__exit__ = MagicMock(return_value=False)
|
|
153
|
+
connected_reader._sftp.file.return_value = mock_file
|
|
154
|
+
|
|
155
|
+
result = connected_reader.get_xml_file_to_string("feed.xml", remote_path="/exports")
|
|
156
|
+
assert result == "<root><item>1</item></root>"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_get_xml_returns_none_on_error(connected_reader: SFTPReader) -> None:
|
|
160
|
+
connected_reader._sftp.file.side_effect = Exception("boom")
|
|
161
|
+
result = connected_reader.get_xml_file_to_string("bad.xml")
|
|
162
|
+
assert result is None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# --- file_to_string ---
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_file_to_string_raises_when_not_connected(reader_password: SFTPReader) -> None:
|
|
169
|
+
with pytest.raises(RuntimeError, match="Not connected"):
|
|
170
|
+
reader_password.file_to_string("file.txt")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_file_to_string_with_explicit_encoding(connected_reader: SFTPReader) -> None:
|
|
174
|
+
mock_file = MagicMock()
|
|
175
|
+
mock_file.read.return_value = "héllo".encode("latin-1")
|
|
176
|
+
mock_file.__enter__ = lambda s: s
|
|
177
|
+
mock_file.__exit__ = MagicMock(return_value=False)
|
|
178
|
+
connected_reader._sftp.file.return_value = mock_file
|
|
179
|
+
|
|
180
|
+
result = connected_reader.file_to_string("report.txt", encoding="latin-1")
|
|
181
|
+
assert result == "héllo"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_file_to_string_auto_detects_encoding(connected_reader: SFTPReader) -> None:
|
|
185
|
+
mock_file = MagicMock()
|
|
186
|
+
mock_file.read.return_value = "hello world".encode("utf-8")
|
|
187
|
+
mock_file.__enter__ = lambda s: s
|
|
188
|
+
mock_file.__exit__ = MagicMock(return_value=False)
|
|
189
|
+
connected_reader._sftp.file.return_value = mock_file
|
|
190
|
+
|
|
191
|
+
result = connected_reader.file_to_string("report.txt")
|
|
192
|
+
assert result == "hello world"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def test_file_to_string_returns_none_on_error(connected_reader: SFTPReader) -> None:
|
|
196
|
+
connected_reader._sftp.file.side_effect = Exception("boom")
|
|
197
|
+
result = connected_reader.file_to_string("bad.txt")
|
|
198
|
+
assert result is None
|