cafs-cache-cdn-client 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cafs_cache_cdn_client-1.0.5/PKG-INFO +99 -0
- cafs_cache_cdn_client-1.0.5/README.md +78 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/__init__.py +1 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/README.md +63 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/__init__.py +7 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/blob/__init__.py +0 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/blob/hash_.py +34 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/blob/package.py +198 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/blob/utils.py +37 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/client.py +535 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/exceptions.py +30 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/cafs/types.py +19 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/client.py +142 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/file_utils.py +96 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/repo/__init__.py +3 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/repo/client.py +102 -0
- cafs_cache_cdn_client-1.0.5/cafs_cache_cdn_client/repo/datatypes.py +34 -0
- cafs_cache_cdn_client-1.0.5/pyproject.toml +74 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: cafs-cache-cdn-client
|
3
|
+
Version: 1.0.5
|
4
|
+
Summary: Async Cache CDN client implementation
|
5
|
+
Keywords: cafs,cache
|
6
|
+
Author: Konstantin Belov
|
7
|
+
Author-email: k.belov@gaijin.team
|
8
|
+
Requires-Python: >=3.11,<4.0
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
15
|
+
Requires-Dist: aiofiles
|
16
|
+
Requires-Dist: aiohttp
|
17
|
+
Requires-Dist: blake3
|
18
|
+
Requires-Dist: zstandard
|
19
|
+
Description-Content-Type: text/markdown
|
20
|
+
|
21
|
+
# Cache CDN Client
|
22
|
+
A Python client library for interacting with the Cache CDN service based on CAFS, allowing efficient pushing and pulling of cached content.
|
23
|
+
## Installation
|
24
|
+
``` bash
|
25
|
+
pip install cafs-cache-cdn-client
|
26
|
+
```
|
27
|
+
## Features
|
28
|
+
- Asynchronous API for high-performance operations
|
29
|
+
- Push local directories to cache
|
30
|
+
- Pull cached content to local directories
|
31
|
+
- Check existence of cached references
|
32
|
+
- Tag references for easier access
|
33
|
+
- Attach additional files to existing references
|
34
|
+
- Delete references when no longer needed
|
35
|
+
|
36
|
+
## Usage Example
|
37
|
+
```python
|
38
|
+
import asyncio
|
39
|
+
import logging
|
40
|
+
from pathlib import Path
|
41
|
+
from cafs_cache_cdn_client import CacheCdnClient
|
42
|
+
|
43
|
+
# Configure logging to see detailed operation information
|
44
|
+
logging.basicConfig(level=logging.DEBUG)
|
45
|
+
|
46
|
+
|
47
|
+
async def main():
|
48
|
+
# Initialize the client with the server URL
|
49
|
+
# The connection_per_cafs_server parameter controls concurrency
|
50
|
+
client = CacheCdnClient(
|
51
|
+
'http://cache-server.example.com:8300',
|
52
|
+
connection_per_cafs_server=10
|
53
|
+
)
|
54
|
+
|
55
|
+
# Use as an async context manager to ensure proper resource cleanup
|
56
|
+
async with client:
|
57
|
+
# Push a local directory to cache with a 2-hour TTL
|
58
|
+
await client.push('project_name', 'build_artifacts',
|
59
|
+
'/path/to/build/output', ttl_hours=2,
|
60
|
+
comment='Build artifacts from CI run #123')
|
61
|
+
|
62
|
+
# Check if a reference exists
|
63
|
+
exists = await client.check('project_name', 'build_artifacts')
|
64
|
+
print(f"Reference exists: {exists}")
|
65
|
+
|
66
|
+
# Pull cached content to a local directory
|
67
|
+
await client.pull('project_name', 'build_artifacts',
|
68
|
+
'/path/to/destination')
|
69
|
+
|
70
|
+
# Tag a reference for easier access later
|
71
|
+
await client.tag('project_name', 'build_artifacts', 'latest_stable')
|
72
|
+
|
73
|
+
# Attach an additional file to an existing reference
|
74
|
+
await client.attach('project_name', 'build_artifacts',
|
75
|
+
Path('/path/to/metadata.json'))
|
76
|
+
|
77
|
+
# Delete a reference when no longer needed
|
78
|
+
await client.delete('project_name', 'old_artifacts')
|
79
|
+
|
80
|
+
|
81
|
+
# Run the example
|
82
|
+
if __name__ == '__main__':
|
83
|
+
asyncio.run(main())
|
84
|
+
```
|
85
|
+
|
86
|
+
## API Reference
|
87
|
+
### `CacheCdnClient`
|
88
|
+
- **Constructor**: `CacheCdnClient(server: str, connection_per_cafs_server: int = 1)`
|
89
|
+
- `server`: URL of the cache server
|
90
|
+
- `connection_per_cafs_server`: Number of concurrent connections per CAFS server
|
91
|
+
|
92
|
+
- **Methods**:
|
93
|
+
- `push(repo: str, ref: str, directory: Path | str, ttl_hours: int = 0, comment: str | None = None)` - Push a local directory to cache
|
94
|
+
- `pull(repo: str, ref: str, directory: Path | str)` - Pull cached content to a local directory
|
95
|
+
- `check(repo: str, ref: str) -> bool` - Check if a reference exists
|
96
|
+
- `tag(repo: str, ref: str, tag: str)` - Create a tag for a reference
|
97
|
+
- `attach(repo: str, ref: str, file_path: Path)` - Attach a file to an existing reference
|
98
|
+
- `delete(repo: str, ref: str)` - Delete a reference
|
99
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Cache CDN Client
|
2
|
+
A Python client library for interacting with the Cache CDN service based on CAFS, allowing efficient pushing and pulling of cached content.
|
3
|
+
## Installation
|
4
|
+
``` bash
|
5
|
+
pip install cafs-cache-cdn-client
|
6
|
+
```
|
7
|
+
## Features
|
8
|
+
- Asynchronous API for high-performance operations
|
9
|
+
- Push local directories to cache
|
10
|
+
- Pull cached content to local directories
|
11
|
+
- Check existence of cached references
|
12
|
+
- Tag references for easier access
|
13
|
+
- Attach additional files to existing references
|
14
|
+
- Delete references when no longer needed
|
15
|
+
|
16
|
+
## Usage Example
|
17
|
+
```python
|
18
|
+
import asyncio
|
19
|
+
import logging
|
20
|
+
from pathlib import Path
|
21
|
+
from cafs_cache_cdn_client import CacheCdnClient
|
22
|
+
|
23
|
+
# Configure logging to see detailed operation information
|
24
|
+
logging.basicConfig(level=logging.DEBUG)
|
25
|
+
|
26
|
+
|
27
|
+
async def main():
|
28
|
+
# Initialize the client with the server URL
|
29
|
+
# The connection_per_cafs_server parameter controls concurrency
|
30
|
+
client = CacheCdnClient(
|
31
|
+
'http://cache-server.example.com:8300',
|
32
|
+
connection_per_cafs_server=10
|
33
|
+
)
|
34
|
+
|
35
|
+
# Use as an async context manager to ensure proper resource cleanup
|
36
|
+
async with client:
|
37
|
+
# Push a local directory to cache with a 2-hour TTL
|
38
|
+
await client.push('project_name', 'build_artifacts',
|
39
|
+
'/path/to/build/output', ttl_hours=2,
|
40
|
+
comment='Build artifacts from CI run #123')
|
41
|
+
|
42
|
+
# Check if a reference exists
|
43
|
+
exists = await client.check('project_name', 'build_artifacts')
|
44
|
+
print(f"Reference exists: {exists}")
|
45
|
+
|
46
|
+
# Pull cached content to a local directory
|
47
|
+
await client.pull('project_name', 'build_artifacts',
|
48
|
+
'/path/to/destination')
|
49
|
+
|
50
|
+
# Tag a reference for easier access later
|
51
|
+
await client.tag('project_name', 'build_artifacts', 'latest_stable')
|
52
|
+
|
53
|
+
# Attach an additional file to an existing reference
|
54
|
+
await client.attach('project_name', 'build_artifacts',
|
55
|
+
Path('/path/to/metadata.json'))
|
56
|
+
|
57
|
+
# Delete a reference when no longer needed
|
58
|
+
await client.delete('project_name', 'old_artifacts')
|
59
|
+
|
60
|
+
|
61
|
+
# Run the example
|
62
|
+
if __name__ == '__main__':
|
63
|
+
asyncio.run(main())
|
64
|
+
```
|
65
|
+
|
66
|
+
## API Reference
|
67
|
+
### `CacheCdnClient`
|
68
|
+
- **Constructor**: `CacheCdnClient(server: str, connection_per_cafs_server: int = 1)`
|
69
|
+
- `server`: URL of the cache server
|
70
|
+
- `connection_per_cafs_server`: Number of concurrent connections per CAFS server
|
71
|
+
|
72
|
+
- **Methods**:
|
73
|
+
- `push(repo: str, ref: str, directory: Path | str, ttl_hours: int = 0, comment: str | None = None)` - Push a local directory to cache
|
74
|
+
- `pull(repo: str, ref: str, directory: Path | str)` - Pull cached content to a local directory
|
75
|
+
- `check(repo: str, ref: str) -> bool` - Check if a reference exists
|
76
|
+
- `tag(repo: str, ref: str, tag: str)` - Create a tag for a reference
|
77
|
+
- `attach(repo: str, ref: str, file_path: Path)` - Attach a file to an existing reference
|
78
|
+
- `delete(repo: str, ref: str)` - Delete a reference
|
@@ -0,0 +1 @@
|
|
1
|
+
from .client import CacheCdnClient
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# CAFS Client
|
2
|
+
|
3
|
+
CAFS Client is a Python library that provides an asynchronous interface for interacting with CAFS servers.
|
4
|
+
|
5
|
+
More information about CAFS protocol can be found in the
|
6
|
+
[G-CVSNT](https://github.com/GaijinEntertainment/G-CVSNT/tree/master/cvsnt/cvsnt-2.5.05.3744/keyValueServer) repository.
|
7
|
+
|
8
|
+
## Usage Example
|
9
|
+
|
10
|
+
Below is a complete example demonstrating all major functionality of the CAFSClient:
|
11
|
+
|
12
|
+
```python
|
13
|
+
import asyncio
|
14
|
+
from pathlib import Path
|
15
|
+
from cafs_cache_cdn_client.cafs import CAFSClient, CompressionT
|
16
|
+
|
17
|
+
|
18
|
+
async def cafs_client_demo():
|
19
|
+
|
20
|
+
client = CAFSClient(
|
21
|
+
server_root='/data',
|
22
|
+
servers=['localhost', 'example.com:2403'],
|
23
|
+
connection_per_server=2,
|
24
|
+
connect_timeout=5.0
|
25
|
+
)
|
26
|
+
|
27
|
+
async with client:
|
28
|
+
# 1. Upload a file (stream operation)
|
29
|
+
source_file = Path('./sample.txt')
|
30
|
+
blob_hash = await client.stream(
|
31
|
+
path=source_file,
|
32
|
+
compression=CompressionT.ZSTD,
|
33
|
+
)
|
34
|
+
print(f'File uploaded with hash: {blob_hash}')
|
35
|
+
|
36
|
+
# 2. Check if the file exists on the server
|
37
|
+
exists = await client.check(blob_hash)
|
38
|
+
print(f'File exists: {exists}')
|
39
|
+
|
40
|
+
# 3. Get the file size
|
41
|
+
size = await client.size(blob_hash)
|
42
|
+
print(f'File size: {size} bytes')
|
43
|
+
|
44
|
+
# 4. Download the file (pull operation)
|
45
|
+
download_path = Path('./downloaded_sample.txt')
|
46
|
+
await client.pull(blob_hash, download_path)
|
47
|
+
|
48
|
+
if __name__ == '__main__':
|
49
|
+
asyncio.run(cafs_client_demo())
|
50
|
+
```
|
51
|
+
|
52
|
+
## Retry Mechanism
|
53
|
+
|
54
|
+
The CAFSClient implements a robust retry mechanism. This feature ensures that operations attempt to complete even if some servers or connections are unavailable:
|
55
|
+
|
56
|
+
- When `retry=True` is specified (default for most operations), the client will automatically retry the operation across all available connections in the pool.
|
57
|
+
- The client will iterate through all available connections until either:
|
58
|
+
1. The operation succeeds
|
59
|
+
2. All connections in the pool have been exhausted without success
|
60
|
+
|
61
|
+
This behavior makes the client resilient to temporary network issues or server unavailability when multiple servers are configured. For critical operations, always use the default `retry=True` setting to maximize the chances of operation success in distributed environments.
|
62
|
+
|
63
|
+
If a specific operation needs to fail immediately without attempting other connections, you can disable this behavior by setting `retry=False` when calling methods like `pull()`, `check()`, `size()`, and `stream()` of the client.
|
File without changes
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
import aiofiles
|
4
|
+
from blake3 import blake3
|
5
|
+
|
6
|
+
from cafs_cache_cdn_client.cafs.types import AsyncReader
|
7
|
+
|
8
|
+
__all__ = (
|
9
|
+
'calc_hash',
|
10
|
+
'calc_hash_file',
|
11
|
+
)
|
12
|
+
|
13
|
+
DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024
|
14
|
+
|
15
|
+
|
16
|
+
async def calc_hash(
|
17
|
+
reader: 'AsyncReader', buffer_size: int = DEFAULT_BUFFER_SIZE
|
18
|
+
) -> str:
|
19
|
+
hasher = blake3() # pylint: disable=not-callable
|
20
|
+
|
21
|
+
while True:
|
22
|
+
buffer = await reader.read(buffer_size)
|
23
|
+
if not buffer:
|
24
|
+
break
|
25
|
+
hasher.update(buffer)
|
26
|
+
|
27
|
+
return hasher.hexdigest()
|
28
|
+
|
29
|
+
|
30
|
+
async def calc_hash_file(
|
31
|
+
file_path: Path, buffer_size: int = DEFAULT_BUFFER_SIZE
|
32
|
+
) -> str:
|
33
|
+
async with aiofiles.open(file_path, 'rb') as f:
|
34
|
+
return await calc_hash(f, buffer_size)
|
@@ -0,0 +1,198 @@
|
|
1
|
+
import zlib
|
2
|
+
from enum import Enum
|
3
|
+
from logging import Logger, LoggerAdapter, getLogger
|
4
|
+
from typing import Protocol
|
5
|
+
|
6
|
+
try:
|
7
|
+
import zstandard as zstd
|
8
|
+
except ImportError:
|
9
|
+
zstd = None # type: ignore[assignment]
|
10
|
+
|
11
|
+
from cafs_cache_cdn_client.cafs.types import AsyncReader, AsyncWriter
|
12
|
+
|
13
|
+
__all__ = (
|
14
|
+
'CompressionT',
|
15
|
+
'Packer',
|
16
|
+
'Unpacker',
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
module_logger = getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
class CompressionT(bytes, Enum):
|
24
|
+
ZSTD = b'ZSTD'
|
25
|
+
ZLIB = b'ZLIB'
|
26
|
+
NONE = b'NONE'
|
27
|
+
|
28
|
+
def __str__(self) -> str:
|
29
|
+
return self.decode('utf-8')
|
30
|
+
|
31
|
+
|
32
|
+
FULL_HEADER_SIZE = 16
|
33
|
+
COMPRESSION_HEADER_SIZE = 4
|
34
|
+
DEFAULT_CHUNK_SIZE = 16 * 1024 * 1024
|
35
|
+
|
36
|
+
|
37
|
+
class Compressor(Protocol):
|
38
|
+
def compress(self, data: bytes) -> bytes:
|
39
|
+
pass
|
40
|
+
|
41
|
+
def flush(self) -> bytes:
|
42
|
+
pass
|
43
|
+
|
44
|
+
|
45
|
+
class Decompressor(Protocol):
|
46
|
+
def decompress(self, data: bytes) -> bytes:
|
47
|
+
pass
|
48
|
+
|
49
|
+
def flush(self) -> bytes:
|
50
|
+
pass
|
51
|
+
|
52
|
+
|
53
|
+
class Packer:
|
54
|
+
logger: Logger | LoggerAdapter
|
55
|
+
chunk_size: int
|
56
|
+
|
57
|
+
_reader: 'AsyncReader'
|
58
|
+
_eof_reached: bool
|
59
|
+
_buffer: bytearray
|
60
|
+
_compressor: Compressor | None
|
61
|
+
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
reader: 'AsyncReader',
|
65
|
+
compression: CompressionT = CompressionT.NONE,
|
66
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
67
|
+
logger: Logger | LoggerAdapter | None = None,
|
68
|
+
) -> None:
|
69
|
+
self._reader = reader
|
70
|
+
self._eof_reached = False
|
71
|
+
self.chunk_size = chunk_size
|
72
|
+
|
73
|
+
self._compressor = None
|
74
|
+
if compression == CompressionT.ZLIB:
|
75
|
+
self._compressor = zlib.compressobj()
|
76
|
+
elif compression == CompressionT.ZSTD:
|
77
|
+
if not zstd:
|
78
|
+
raise RuntimeError(
|
79
|
+
'ZSTD compression is not available, please install zstandard'
|
80
|
+
)
|
81
|
+
self._compressor = zstd.ZstdCompressor().compressobj()
|
82
|
+
|
83
|
+
self._buffer = bytearray(
|
84
|
+
compression + b'\x00' * (FULL_HEADER_SIZE - COMPRESSION_HEADER_SIZE)
|
85
|
+
)
|
86
|
+
self.logger = logger or module_logger
|
87
|
+
self.logger.debug('Initialized packer with compression: %s', compression)
|
88
|
+
|
89
|
+
async def read(self, size: int = -1) -> bytes:
|
90
|
+
if size == 0:
|
91
|
+
return b''
|
92
|
+
|
93
|
+
while (size > 0 and len(self._buffer) < size) and not self._eof_reached:
|
94
|
+
await self._fill_buffer()
|
95
|
+
|
96
|
+
if size < 0 or len(self._buffer) <= size:
|
97
|
+
result = bytes(self._buffer)
|
98
|
+
self._buffer.clear()
|
99
|
+
return result
|
100
|
+
|
101
|
+
result = bytes(self._buffer[:size])
|
102
|
+
self._buffer = self._buffer[size:]
|
103
|
+
return result
|
104
|
+
|
105
|
+
async def _fill_buffer(self) -> None:
|
106
|
+
chunk = await self._reader.read(self.chunk_size)
|
107
|
+
self.logger.debug('Filling buffer with chunk of %d bytes', len(chunk))
|
108
|
+
|
109
|
+
if not chunk:
|
110
|
+
self._eof_reached = True
|
111
|
+
self.logger.debug('EOF reached')
|
112
|
+
if self._compressor:
|
113
|
+
data = self._compressor.flush()
|
114
|
+
self.logger.debug('Flushing compressor: %d bytes', len(data))
|
115
|
+
self._buffer.extend(data)
|
116
|
+
return
|
117
|
+
|
118
|
+
if not self._compressor:
|
119
|
+
self._buffer.extend(chunk)
|
120
|
+
return
|
121
|
+
|
122
|
+
data = self._compressor.compress(chunk)
|
123
|
+
self.logger.debug('Got %d bytes from compressor', len(data))
|
124
|
+
self._buffer.extend(data)
|
125
|
+
|
126
|
+
|
127
|
+
class Unpacker:
|
128
|
+
logger: Logger | LoggerAdapter
|
129
|
+
chunk_size: int
|
130
|
+
|
131
|
+
_writer: 'AsyncWriter'
|
132
|
+
_header: bytearray
|
133
|
+
_buffer: bytearray
|
134
|
+
_decompressor: Decompressor | None
|
135
|
+
|
136
|
+
def __init__(
|
137
|
+
self,
|
138
|
+
writer: 'AsyncWriter',
|
139
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
140
|
+
logger: Logger | LoggerAdapter | None = None,
|
141
|
+
) -> None:
|
142
|
+
self._writer = writer
|
143
|
+
self._buffer = bytearray()
|
144
|
+
self._decompressor = None
|
145
|
+
self._header = bytearray()
|
146
|
+
self.chunk_size = chunk_size
|
147
|
+
self.logger = logger or module_logger
|
148
|
+
|
149
|
+
async def write(self, data: bytes, /) -> None:
|
150
|
+
if not data:
|
151
|
+
return
|
152
|
+
await self._fill_buffer(data)
|
153
|
+
if len(self._buffer) >= self.chunk_size:
|
154
|
+
await self._writer.write(self._buffer)
|
155
|
+
self._buffer.clear()
|
156
|
+
|
157
|
+
async def flush(self) -> None:
|
158
|
+
if self._decompressor:
|
159
|
+
data = self._decompressor.flush()
|
160
|
+
self.logger.debug('Flushing decompressor: %d bytes', len(data))
|
161
|
+
self._buffer.extend(data)
|
162
|
+
if self._buffer:
|
163
|
+
await self._writer.write(self._buffer)
|
164
|
+
self._buffer.clear()
|
165
|
+
await self._writer.flush()
|
166
|
+
|
167
|
+
async def _fill_buffer(self, data: bytes) -> None:
|
168
|
+
self.logger.debug('Filling buffer with chunk of %d bytes', len(data))
|
169
|
+
if len(self._header) < FULL_HEADER_SIZE:
|
170
|
+
header_offset = FULL_HEADER_SIZE - len(self._header)
|
171
|
+
self._header.extend(data[:header_offset])
|
172
|
+
data = data[header_offset:]
|
173
|
+
if len(self._header) < FULL_HEADER_SIZE:
|
174
|
+
return
|
175
|
+
|
176
|
+
compression_type = CompressionT(self._header[:COMPRESSION_HEADER_SIZE])
|
177
|
+
self.logger.debug('Extracted compression type: %s', compression_type)
|
178
|
+
|
179
|
+
if compression_type == CompressionT.NONE:
|
180
|
+
self._decompressor = None
|
181
|
+
elif compression_type == CompressionT.ZLIB:
|
182
|
+
d = zlib.decompressobj()
|
183
|
+
self._decompressor = d
|
184
|
+
elif compression_type == CompressionT.ZSTD:
|
185
|
+
if not zstd:
|
186
|
+
raise RuntimeError('zstandard is not available')
|
187
|
+
self._decompressor = zstd.ZstdDecompressor().decompressobj()
|
188
|
+
|
189
|
+
if not data:
|
190
|
+
return
|
191
|
+
|
192
|
+
if not self._decompressor:
|
193
|
+
self._buffer.extend(data)
|
194
|
+
return
|
195
|
+
|
196
|
+
data = self._decompressor.decompress(data)
|
197
|
+
self.logger.debug('Got %d bytes from decompressor', len(data))
|
198
|
+
self._buffer.extend(data)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from .package import CompressionT
|
4
|
+
|
5
|
+
__all__ = ('choose_compression',)
|
6
|
+
|
7
|
+
MAGIC_HEADER_SIZE = 4
|
8
|
+
MINIMAL_COMPRESSION_SIZE = 1024
|
9
|
+
|
10
|
+
|
11
|
+
# Magic header prefixes for various compression formats
|
12
|
+
MAGIC_HEADER_PREFIXES = [
|
13
|
+
bytes([0x1F, 0x8B]), # gzip
|
14
|
+
bytes([0x42, 0x5A, 0x68]), # bzip2
|
15
|
+
bytes([0x50, 0x4B, 0x03]), # zip
|
16
|
+
bytes([0x28, 0xB5, 0x2F, 0xFD]), # zstd
|
17
|
+
bytes([0x78, 0x01]), # default compression level
|
18
|
+
]
|
19
|
+
|
20
|
+
|
21
|
+
def is_file_already_compressed(file_path: Path) -> bool:
|
22
|
+
with open(file_path, 'rb') as file:
|
23
|
+
magic_header_buff = file.read(MAGIC_HEADER_SIZE)
|
24
|
+
|
25
|
+
return any(magic_header_buff.startswith(prefix) for prefix in MAGIC_HEADER_PREFIXES)
|
26
|
+
|
27
|
+
|
28
|
+
def choose_compression(
|
29
|
+
file_path: Path, preferred_compression: CompressionT = CompressionT.NONE
|
30
|
+
) -> CompressionT:
|
31
|
+
if file_path.stat().st_size < MINIMAL_COMPRESSION_SIZE:
|
32
|
+
return CompressionT.NONE
|
33
|
+
|
34
|
+
if is_file_already_compressed(file_path):
|
35
|
+
return CompressionT.NONE
|
36
|
+
|
37
|
+
return preferred_compression
|