aiogzip 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiogzip-0.1.0/LICENSE +21 -0
- aiogzip-0.1.0/PKG-INFO +179 -0
- aiogzip-0.1.0/README.md +155 -0
- aiogzip-0.1.0/pyproject.toml +35 -0
- aiogzip-0.1.0/setup.cfg +4 -0
- aiogzip-0.1.0/src/__init__.py +12 -0
- aiogzip-0.1.0/src/aiogzip.egg-info/PKG-INFO +179 -0
- aiogzip-0.1.0/src/aiogzip.egg-info/SOURCES.txt +11 -0
- aiogzip-0.1.0/src/aiogzip.egg-info/dependency_links.txt +1 -0
- aiogzip-0.1.0/src/aiogzip.egg-info/requires.txt +9 -0
- aiogzip-0.1.0/src/aiogzip.egg-info/top_level.txt +2 -0
- aiogzip-0.1.0/src/aiogzip.py +594 -0
- aiogzip-0.1.0/tests/test_aiogzip.py +1140 -0
aiogzip-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Geoff Davis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
aiogzip-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiogzip
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Asynchronous gzip file reader/writer with aiocsv support.
|
|
5
|
+
Author-email: Geoff Davis <geoff@keksi.ai>
|
|
6
|
+
Project-URL: Homepage, https://github.com/geoff-davis/aiogzip
|
|
7
|
+
Project-URL: Issues, https://github.com/geoff-davis/aiogzip/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Framework :: AsyncIO
|
|
12
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
17
|
+
Provides-Extra: csv
|
|
18
|
+
Requires-Dist: aiocsv>=1.2.0; extra == "csv"
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
22
|
+
Requires-Dist: psutil; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# aiogzip ⚡️
|
|
26
|
+
|
|
27
|
+
**An asynchronous library for reading and writing gzip-compressed files.**
|
|
28
|
+
|
|
29
|
+
`aiogzip` provides a fast, simple, and asyncio-native interface for handling `.gz` files, making it a useful complement to Python's built-in `gzip` module for asynchronous applications.
|
|
30
|
+
|
|
31
|
+
It is designed for high-performance I/O operations, especially for text-based data pipelines, and integrates seamlessly with other `async` libraries like `aiocsv`.
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- **Truly Asynchronous**: Built with `asyncio` and `aiofiles` for non-blocking file I/O.
|
|
36
|
+
- **High-Performance Text Processing**: Significantly faster than the standard `gzip` library for text and JSONL file operations.
|
|
37
|
+
- **Simple API**: Mimics the interface of `gzip.open()`, making it easy to adopt.
|
|
38
|
+
- **Separate Binary and Text Modes**: `AsyncGzipBinaryFile` and `AsyncGzipTextFile` provide clear, type-safe handling of data.
|
|
39
|
+
- **Excellent Compression Quality**: Achieves compression ratios nearly identical to the standard `gzip` module.
|
|
40
|
+
- **`aiocsv` Integration**: Read and write compressed CSV files effortlessly.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
Install `aiogzip` using pip. To include optional `aiocsv` support, specify the `[csv]` extra.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Standard installation
|
|
50
|
+
pip install aiogzip
|
|
51
|
+
|
|
52
|
+
# With aiocsv support
|
|
53
|
+
pip install aiogzip[csv]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
Using `aiogzip` is as simple as using the standard `gzip` module, but with `async`/`await`.
|
|
61
|
+
|
|
62
|
+
### Writing to a Compressed File
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import asyncio
|
|
66
|
+
from aiogzip import AsyncGzipFile
|
|
67
|
+
|
|
68
|
+
async def main():
|
|
69
|
+
# Write binary data
|
|
70
|
+
async with AsyncGzipFile("file.gz", "wb") as f:
|
|
71
|
+
await f.write(b"Hello, async world!")
|
|
72
|
+
|
|
73
|
+
# Write text data
|
|
74
|
+
async with AsyncGzipFile("file.txt.gz", "wt") as f:
|
|
75
|
+
await f.write("This is a text file.")
|
|
76
|
+
|
|
77
|
+
asyncio.run(main())
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Reading from a Compressed File
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import asyncio
|
|
84
|
+
from aiogzip import AsyncGzipFile
|
|
85
|
+
|
|
86
|
+
async def main():
|
|
87
|
+
# Read the entire file
|
|
88
|
+
async with AsyncGzipFile("file.gz", "rb") as f:
|
|
89
|
+
content = await f.read()
|
|
90
|
+
print(content)
|
|
91
|
+
|
|
92
|
+
# Iterate over lines in a text file
|
|
93
|
+
async with AsyncGzipFile("file.txt.gz", "rt") as f:
|
|
94
|
+
async for line in f:
|
|
95
|
+
print(line.strip())
|
|
96
|
+
|
|
97
|
+
asyncio.run(main())
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Performance
|
|
103
|
+
|
|
104
|
+
`aiogzip` is a specialized tool that excels in text-based, async workflows but may be slower than the standard library for binary operations.
|
|
105
|
+
|
|
106
|
+
According to the benchmarks, `aiogzip` is:
|
|
107
|
+
|
|
108
|
+
- **4.3x faster** for general text file operations.
|
|
109
|
+
- **1.6x faster** when processing structured text like JSONL files.
|
|
110
|
+
- **1.1x slower** for binary file operations using 1KB chunk sizes.
|
|
111
|
+
|
|
112
|
+
The key is to match the tool to the task. Use `aiogzip` where its async and text-handling capabilities provide the most significant advantage.
|
|
113
|
+
|
|
114
|
+
### Async and Concurrent Processing Benefits
|
|
115
|
+
|
|
116
|
+
`aiogzip` excels in scenarios where you need to process multiple files concurrently or integrate with other async libraries:
|
|
117
|
+
|
|
118
|
+
- **Concurrent file processing**: Process multiple `.gz` files simultaneously without blocking
|
|
119
|
+
- **Async pipeline integration**: Seamlessly works with `aiocsv`, `aiohttp`, and other async libraries
|
|
120
|
+
- **Non-blocking I/O**: Allows your application to handle other tasks while file operations are in progress
|
|
121
|
+
- **Better resource utilization**: More efficient use of system resources in I/O-bound applications
|
|
122
|
+
|
|
123
|
+
**Note**: The benefits of async are most visible when there's actual I/O latency (network storage, remote APIs, etc.) or when mixing file operations with other async tasks. For purely local file processing on SSDs, the async overhead may exceed the benefits due to minimal I/O wait times.
|
|
124
|
+
|
|
125
|
+
### When to Use `aiogzip`
|
|
126
|
+
|
|
127
|
+
✅ **Recommended for:**
|
|
128
|
+
|
|
129
|
+
- Async applications processing text, CSV, or JSONL files.
|
|
130
|
+
- Streaming text-based data pipelines.
|
|
131
|
+
- Applications where async integration and concurrent file processing are more important than raw binary I/O speed.
|
|
132
|
+
|
|
133
|
+
### When to Use Standard `gzip`
|
|
134
|
+
|
|
135
|
+
❌ **Consider standard `gzip` for:**
|
|
136
|
+
|
|
137
|
+
- Purely synchronous applications.
|
|
138
|
+
- Applications that are highly memory-constrained, as `aiogzip` may use more memory during decompression of highly compressible data due to internal buffering.
|
|
139
|
+
- Workloads dominated by binary file I/O where maximum performance is essential.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Limitations
|
|
144
|
+
|
|
145
|
+
`aiogzip` focuses on the most common file-based read/write operations and does not implement the full API of the standard `gzip` module. Notably, it does not currently support:
|
|
146
|
+
|
|
147
|
+
- In-memory compression/decompression (e.g., `gzip.compress`/`gzip.decompress`).
|
|
148
|
+
- The `seek()` and `tell()` methods for navigating within a file stream.
|
|
149
|
+
- Reading or writing gzip headers and metadata like `mtime`.
|
|
150
|
+
|
|
151
|
+
## Development
|
|
152
|
+
|
|
153
|
+
This project uses `setuptools` for packaging.
|
|
154
|
+
|
|
155
|
+
1. **Clone the repository**:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
git clone https://github.com/geoff-davis/aiogzip.git
|
|
159
|
+
cd aiogzip
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
2. **Create a virtual environment and install dependencies**:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
python -m venv .venv
|
|
166
|
+
source .venv/bin/activate
|
|
167
|
+
pip install -e ".[csv]" # Install in editable mode with extras
|
|
168
|
+
pip install -e ".[dev]" # Install dev dependencies
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
3. **Run tests**:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
pytest
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
This project is licensed under the **MIT License**. See the `LICENSE` file for details.
|
aiogzip-0.1.0/README.md
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# aiogzip ⚡️
|
|
2
|
+
|
|
3
|
+
**An asynchronous library for reading and writing gzip-compressed files.**
|
|
4
|
+
|
|
5
|
+
`aiogzip` provides a fast, simple, and asyncio-native interface for handling `.gz` files, making it a useful complement to Python's built-in `gzip` module for asynchronous applications.
|
|
6
|
+
|
|
7
|
+
It is designed for high-performance I/O operations, especially for text-based data pipelines, and integrates seamlessly with other `async` libraries like `aiocsv`.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Truly Asynchronous**: Built with `asyncio` and `aiofiles` for non-blocking file I/O.
|
|
12
|
+
- **High-Performance Text Processing**: Significantly faster than the standard `gzip` library for text and JSONL file operations.
|
|
13
|
+
- **Simple API**: Mimics the interface of `gzip.open()`, making it easy to adopt.
|
|
14
|
+
- **Separate Binary and Text Modes**: `AsyncGzipBinaryFile` and `AsyncGzipTextFile` provide clear, type-safe handling of data.
|
|
15
|
+
- **Excellent Compression Quality**: Achieves compression ratios nearly identical to the standard `gzip` module.
|
|
16
|
+
- **`aiocsv` Integration**: Read and write compressed CSV files effortlessly.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
Install `aiogzip` using pip. To include optional `aiocsv` support, specify the `[csv]` extra.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Standard installation
|
|
26
|
+
pip install aiogzip
|
|
27
|
+
|
|
28
|
+
# With aiocsv support
|
|
29
|
+
pip install aiogzip[csv]
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Quickstart
|
|
35
|
+
|
|
36
|
+
Using `aiogzip` is as simple as using the standard `gzip` module, but with `async`/`await`.
|
|
37
|
+
|
|
38
|
+
### Writing to a Compressed File
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import asyncio
|
|
42
|
+
from aiogzip import AsyncGzipFile
|
|
43
|
+
|
|
44
|
+
async def main():
|
|
45
|
+
# Write binary data
|
|
46
|
+
async with AsyncGzipFile("file.gz", "wb") as f:
|
|
47
|
+
await f.write(b"Hello, async world!")
|
|
48
|
+
|
|
49
|
+
# Write text data
|
|
50
|
+
async with AsyncGzipFile("file.txt.gz", "wt") as f:
|
|
51
|
+
await f.write("This is a text file.")
|
|
52
|
+
|
|
53
|
+
asyncio.run(main())
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Reading from a Compressed File
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import asyncio
|
|
60
|
+
from aiogzip import AsyncGzipFile
|
|
61
|
+
|
|
62
|
+
async def main():
|
|
63
|
+
# Read the entire file
|
|
64
|
+
async with AsyncGzipFile("file.gz", "rb") as f:
|
|
65
|
+
content = await f.read()
|
|
66
|
+
print(content)
|
|
67
|
+
|
|
68
|
+
# Iterate over lines in a text file
|
|
69
|
+
async with AsyncGzipFile("file.txt.gz", "rt") as f:
|
|
70
|
+
async for line in f:
|
|
71
|
+
print(line.strip())
|
|
72
|
+
|
|
73
|
+
asyncio.run(main())
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Performance
|
|
79
|
+
|
|
80
|
+
`aiogzip` is a specialized tool that excels in text-based, async workflows but may be slower than the standard library for binary operations.
|
|
81
|
+
|
|
82
|
+
According to the benchmarks, `aiogzip` is:
|
|
83
|
+
|
|
84
|
+
- **4.3x faster** for general text file operations.
|
|
85
|
+
- **1.6x faster** when processing structured text like JSONL files.
|
|
86
|
+
- **1.1x slower** for binary file operations using 1KB chunk sizes.
|
|
87
|
+
|
|
88
|
+
The key is to match the tool to the task. Use `aiogzip` where its async and text-handling capabilities provide the most significant advantage.
|
|
89
|
+
|
|
90
|
+
### Async and Concurrent Processing Benefits
|
|
91
|
+
|
|
92
|
+
`aiogzip` excels in scenarios where you need to process multiple files concurrently or integrate with other async libraries:
|
|
93
|
+
|
|
94
|
+
- **Concurrent file processing**: Process multiple `.gz` files simultaneously without blocking
|
|
95
|
+
- **Async pipeline integration**: Seamlessly works with `aiocsv`, `aiohttp`, and other async libraries
|
|
96
|
+
- **Non-blocking I/O**: Allows your application to handle other tasks while file operations are in progress
|
|
97
|
+
- **Better resource utilization**: More efficient use of system resources in I/O-bound applications
|
|
98
|
+
|
|
99
|
+
**Note**: The benefits of async are most visible when there's actual I/O latency (network storage, remote APIs, etc.) or when mixing file operations with other async tasks. For purely local file processing on SSDs, the async overhead may exceed the benefits due to minimal I/O wait times.
|
|
100
|
+
|
|
101
|
+
### When to Use `aiogzip`
|
|
102
|
+
|
|
103
|
+
✅ **Recommended for:**
|
|
104
|
+
|
|
105
|
+
- Async applications processing text, CSV, or JSONL files.
|
|
106
|
+
- Streaming text-based data pipelines.
|
|
107
|
+
- Applications where async integration and concurrent file processing are more important than raw binary I/O speed.
|
|
108
|
+
|
|
109
|
+
### When to Use Standard `gzip`
|
|
110
|
+
|
|
111
|
+
❌ **Consider standard `gzip` for:**
|
|
112
|
+
|
|
113
|
+
- Purely synchronous applications.
|
|
114
|
+
- Applications that are highly memory-constrained, as `aiogzip` may use more memory during decompression of highly compressible data due to internal buffering.
|
|
115
|
+
- Workloads dominated by binary file I/O where maximum performance is essential.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Limitations
|
|
120
|
+
|
|
121
|
+
`aiogzip` focuses on the most common file-based read/write operations and does not implement the full API of the standard `gzip` module. Notably, it does not currently support:
|
|
122
|
+
|
|
123
|
+
- In-memory compression/decompression (e.g., `gzip.compress`/`gzip.decompress`).
|
|
124
|
+
- The `seek()` and `tell()` methods for navigating within a file stream.
|
|
125
|
+
- Reading or writing gzip headers and metadata like `mtime`.
|
|
126
|
+
|
|
127
|
+
## Development
|
|
128
|
+
|
|
129
|
+
This project uses `setuptools` for packaging.
|
|
130
|
+
|
|
131
|
+
1. **Clone the repository**:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
git clone https://github.com/geoff-davis/aiogzip.git
|
|
135
|
+
cd aiogzip
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. **Create a virtual environment and install dependencies**:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
python -m venv .venv
|
|
142
|
+
source .venv/bin/activate
|
|
143
|
+
pip install -e ".[csv]" # Install in editable mode with extras
|
|
144
|
+
pip install -e ".[dev]" # Install dev dependencies
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
3. **Run tests**:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
pytest
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
This project is licensed under the **MIT License**. See the `LICENSE` file for details.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "aiogzip"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Geoff Davis", email="geoff@keksi.ai" },
|
|
10
|
+
]
|
|
11
|
+
description = "Asynchronous gzip file reader/writer with aiocsv support."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.9"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Framework :: AsyncIO",
|
|
19
|
+
"Topic :: System :: Archiving :: Compression",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"aiofiles>=23.0.0",
|
|
23
|
+
]
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
csv = [
|
|
26
|
+
"aiocsv>=1.2.0",
|
|
27
|
+
]
|
|
28
|
+
dev = [
|
|
29
|
+
"pytest>=8.0.0",
|
|
30
|
+
"pytest-asyncio",
|
|
31
|
+
"psutil",
|
|
32
|
+
]
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/geoff-davis/aiogzip"
|
|
35
|
+
Issues = "https://github.com/geoff-davis/aiogzip/issues"
|
aiogzip-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# src/aiogzip/__init__.py
|
|
2
|
+
"""AsyncGzipFile - Asynchronous gzip file reader/writer."""
|
|
3
|
+
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
|
|
6
|
+
from .aiogzip import AsyncGzipBinaryFile, AsyncGzipFile, AsyncGzipTextFile
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"AsyncGzipFile",
|
|
10
|
+
"AsyncGzipBinaryFile",
|
|
11
|
+
"AsyncGzipTextFile",
|
|
12
|
+
]
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aiogzip
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Asynchronous gzip file reader/writer with aiocsv support.
|
|
5
|
+
Author-email: Geoff Davis <geoff@keksi.ai>
|
|
6
|
+
Project-URL: Homepage, https://github.com/geoff-davis/aiogzip
|
|
7
|
+
Project-URL: Issues, https://github.com/geoff-davis/aiogzip/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Framework :: AsyncIO
|
|
12
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
17
|
+
Provides-Extra: csv
|
|
18
|
+
Requires-Dist: aiocsv>=1.2.0; extra == "csv"
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
22
|
+
Requires-Dist: psutil; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# aiogzip ⚡️
|
|
26
|
+
|
|
27
|
+
**An asynchronous library for reading and writing gzip-compressed files.**
|
|
28
|
+
|
|
29
|
+
`aiogzip` provides a fast, simple, and asyncio-native interface for handling `.gz` files, making it a useful complement to Python's built-in `gzip` module for asynchronous applications.
|
|
30
|
+
|
|
31
|
+
It is designed for high-performance I/O operations, especially for text-based data pipelines, and integrates seamlessly with other `async` libraries like `aiocsv`.
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- **Truly Asynchronous**: Built with `asyncio` and `aiofiles` for non-blocking file I/O.
|
|
36
|
+
- **High-Performance Text Processing**: Significantly faster than the standard `gzip` library for text and JSONL file operations.
|
|
37
|
+
- **Simple API**: Mimics the interface of `gzip.open()`, making it easy to adopt.
|
|
38
|
+
- **Separate Binary and Text Modes**: `AsyncGzipBinaryFile` and `AsyncGzipTextFile` provide clear, type-safe handling of data.
|
|
39
|
+
- **Excellent Compression Quality**: Achieves compression ratios nearly identical to the standard `gzip` module.
|
|
40
|
+
- **`aiocsv` Integration**: Read and write compressed CSV files effortlessly.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
Install `aiogzip` using pip. To include optional `aiocsv` support, specify the `[csv]` extra.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Standard installation
|
|
50
|
+
pip install aiogzip
|
|
51
|
+
|
|
52
|
+
# With aiocsv support
|
|
53
|
+
pip install aiogzip[csv]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
Using `aiogzip` is as simple as using the standard `gzip` module, but with `async`/`await`.
|
|
61
|
+
|
|
62
|
+
### Writing to a Compressed File
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import asyncio
|
|
66
|
+
from aiogzip import AsyncGzipFile
|
|
67
|
+
|
|
68
|
+
async def main():
|
|
69
|
+
# Write binary data
|
|
70
|
+
async with AsyncGzipFile("file.gz", "wb") as f:
|
|
71
|
+
await f.write(b"Hello, async world!")
|
|
72
|
+
|
|
73
|
+
# Write text data
|
|
74
|
+
async with AsyncGzipFile("file.txt.gz", "wt") as f:
|
|
75
|
+
await f.write("This is a text file.")
|
|
76
|
+
|
|
77
|
+
asyncio.run(main())
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Reading from a Compressed File
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import asyncio
|
|
84
|
+
from aiogzip import AsyncGzipFile
|
|
85
|
+
|
|
86
|
+
async def main():
|
|
87
|
+
# Read the entire file
|
|
88
|
+
async with AsyncGzipFile("file.gz", "rb") as f:
|
|
89
|
+
content = await f.read()
|
|
90
|
+
print(content)
|
|
91
|
+
|
|
92
|
+
# Iterate over lines in a text file
|
|
93
|
+
async with AsyncGzipFile("file.txt.gz", "rt") as f:
|
|
94
|
+
async for line in f:
|
|
95
|
+
print(line.strip())
|
|
96
|
+
|
|
97
|
+
asyncio.run(main())
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Performance
|
|
103
|
+
|
|
104
|
+
`aiogzip` is a specialized tool that excels in text-based, async workflows but may be slower than the standard library for binary operations.
|
|
105
|
+
|
|
106
|
+
According to the benchmarks, `aiogzip` is:
|
|
107
|
+
|
|
108
|
+
- **4.3x faster** for general text file operations.
|
|
109
|
+
- **1.6x faster** when processing structured text like JSONL files.
|
|
110
|
+
- **1.1x slower** for binary file operations using 1KB chunk sizes.
|
|
111
|
+
|
|
112
|
+
The key is to match the tool to the task. Use `aiogzip` where its async and text-handling capabilities provide the most significant advantage.
|
|
113
|
+
|
|
114
|
+
### Async and Concurrent Processing Benefits
|
|
115
|
+
|
|
116
|
+
`aiogzip` excels in scenarios where you need to process multiple files concurrently or integrate with other async libraries:
|
|
117
|
+
|
|
118
|
+
- **Concurrent file processing**: Process multiple `.gz` files simultaneously without blocking
|
|
119
|
+
- **Async pipeline integration**: Seamlessly works with `aiocsv`, `aiohttp`, and other async libraries
|
|
120
|
+
- **Non-blocking I/O**: Allows your application to handle other tasks while file operations are in progress
|
|
121
|
+
- **Better resource utilization**: More efficient use of system resources in I/O-bound applications
|
|
122
|
+
|
|
123
|
+
**Note**: The benefits of async are most visible when there's actual I/O latency (network storage, remote APIs, etc.) or when mixing file operations with other async tasks. For purely local file processing on SSDs, the async overhead may exceed the benefits due to minimal I/O wait times.
|
|
124
|
+
|
|
125
|
+
### When to Use `aiogzip`
|
|
126
|
+
|
|
127
|
+
✅ **Recommended for:**
|
|
128
|
+
|
|
129
|
+
- Async applications processing text, CSV, or JSONL files.
|
|
130
|
+
- Streaming text-based data pipelines.
|
|
131
|
+
- Applications where async integration and concurrent file processing are more important than raw binary I/O speed.
|
|
132
|
+
|
|
133
|
+
### When to Use Standard `gzip`
|
|
134
|
+
|
|
135
|
+
❌ **Consider standard `gzip` for:**
|
|
136
|
+
|
|
137
|
+
- Purely synchronous applications.
|
|
138
|
+
- Applications that are highly memory-constrained, as `aiogzip` may use more memory during decompression of highly compressible data due to internal buffering.
|
|
139
|
+
- Workloads dominated by binary file I/O where maximum performance is essential.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Limitations
|
|
144
|
+
|
|
145
|
+
`aiogzip` focuses on the most common file-based read/write operations and does not implement the full API of the standard `gzip` module. Notably, it does not currently support:
|
|
146
|
+
|
|
147
|
+
- In-memory compression/decompression (e.g., `gzip.compress`/`gzip.decompress`).
|
|
148
|
+
- The `seek()` and `tell()` methods for navigating within a file stream.
|
|
149
|
+
- Reading or writing gzip headers and metadata like `mtime`.
|
|
150
|
+
|
|
151
|
+
## Development
|
|
152
|
+
|
|
153
|
+
This project uses `setuptools` for packaging.
|
|
154
|
+
|
|
155
|
+
1. **Clone the repository**:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
git clone https://github.com/geoff-davis/aiogzip.git
|
|
159
|
+
cd aiogzip
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
2. **Create a virtual environment and install dependencies**:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
python -m venv .venv
|
|
166
|
+
source .venv/bin/activate
|
|
167
|
+
pip install -e ".[csv]" # Install in editable mode with extras
|
|
168
|
+
pip install -e ".[dev]" # Install dev dependencies
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
3. **Run tests**:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
pytest
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
This project is licensed under the **MIT License**. See the `LICENSE` file for details.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/__init__.py
|
|
5
|
+
src/aiogzip.py
|
|
6
|
+
src/aiogzip.egg-info/PKG-INFO
|
|
7
|
+
src/aiogzip.egg-info/SOURCES.txt
|
|
8
|
+
src/aiogzip.egg-info/dependency_links.txt
|
|
9
|
+
src/aiogzip.egg-info/requires.txt
|
|
10
|
+
src/aiogzip.egg-info/top_level.txt
|
|
11
|
+
tests/test_aiogzip.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|