cafs-cache-cdn-client 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.3
2
+ Name: cafs-cache-cdn-client
3
+ Version: 1.0.5
4
+ Summary: Async Cache CDN client implementation
5
+ Keywords: cafs,cache
6
+ Author: Konstantin Belov
7
+ Author-email: k.belov@gaijin.team
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Requires-Dist: aiofiles
16
+ Requires-Dist: aiohttp
17
+ Requires-Dist: blake3
18
+ Requires-Dist: zstandard
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Cache CDN Client
22
+ A Python client library for interacting with the Cache CDN service based on CAFS, allowing efficient pushing and pulling of cached content.
23
+ ## Installation
24
+ ``` bash
25
+ pip install cafs-cache-cdn-client
26
+ ```
27
+ ## Features
28
+ - Asynchronous API for high-performance operations
29
+ - Push local directories to cache
30
+ - Pull cached content to local directories
31
+ - Check existence of cached references
32
+ - Tag references for easier access
33
+ - Attach additional files to existing references
34
+ - Delete references when no longer needed
35
+
36
+ ## Usage Example
37
+ ```python
38
+ import asyncio
39
+ import logging
40
+ from pathlib import Path
41
+ from cafs_cache_cdn_client import CacheCdnClient
42
+
43
+ # Configure logging to see detailed operation information
44
+ logging.basicConfig(level=logging.DEBUG)
45
+
46
+
47
+ async def main():
48
+ # Initialize the client with the server URL
49
+ # The connection_per_cafs_server parameter controls concurrency
50
+ client = CacheCdnClient(
51
+ 'http://cache-server.example.com:8300',
52
+ connection_per_cafs_server=10
53
+ )
54
+
55
+ # Use as an async context manager to ensure proper resource cleanup
56
+ async with client:
57
+ # Push a local directory to cache with a 2-hour TTL
58
+ await client.push('project_name', 'build_artifacts',
59
+ '/path/to/build/output', ttl_hours=2,
60
+ comment='Build artifacts from CI run #123')
61
+
62
+ # Check if a reference exists
63
+ exists = await client.check('project_name', 'build_artifacts')
64
+ print(f"Reference exists: {exists}")
65
+
66
+ # Pull cached content to a local directory
67
+ await client.pull('project_name', 'build_artifacts',
68
+ '/path/to/destination')
69
+
70
+ # Tag a reference for easier access later
71
+ await client.tag('project_name', 'build_artifacts', 'latest_stable')
72
+
73
+ # Attach an additional file to an existing reference
74
+ await client.attach('project_name', 'build_artifacts',
75
+ Path('/path/to/metadata.json'))
76
+
77
+ # Delete a reference when no longer needed
78
+ await client.delete('project_name', 'old_artifacts')
79
+
80
+
81
+ # Run the example
82
+ if __name__ == '__main__':
83
+ asyncio.run(main())
84
+ ```
85
+
86
+ ## API Reference
87
+ ### `CacheCdnClient`
88
+ - **Constructor**: `CacheCdnClient(server: str, connection_per_cafs_server: int = 1)`
89
+ - `server`: URL of the cache server
90
+ - `connection_per_cafs_server`: Number of concurrent connections per CAFS server
91
+
92
+ - **Methods**:
93
+ - `push(repo: str, ref: str, directory: Path | str, ttl_hours: int = 0, comment: str | None = None)` - Push a local directory to cache
94
+ - `pull(repo: str, ref: str, directory: Path | str)` - Pull cached content to a local directory
95
+ - `check(repo: str, ref: str) -> bool` - Check if a reference exists
96
+ - `tag(repo: str, ref: str, tag: str)` - Create a tag for a reference
97
+ - `attach(repo: str, ref: str, file_path: Path)` - Attach a file to an existing reference
98
+ - `delete(repo: str, ref: str)` - Delete a reference
99
+
@@ -0,0 +1,78 @@
1
+ # Cache CDN Client
2
+ A Python client library for interacting with the Cache CDN service based on CAFS, allowing efficient pushing and pulling of cached content.
3
+ ## Installation
4
+ ``` bash
5
+ pip install cafs-cache-cdn-client
6
+ ```
7
+ ## Features
8
+ - Asynchronous API for high-performance operations
9
+ - Push local directories to cache
10
+ - Pull cached content to local directories
11
+ - Check existence of cached references
12
+ - Tag references for easier access
13
+ - Attach additional files to existing references
14
+ - Delete references when no longer needed
15
+
16
+ ## Usage Example
17
+ ```python
18
+ import asyncio
19
+ import logging
20
+ from pathlib import Path
21
+ from cafs_cache_cdn_client import CacheCdnClient
22
+
23
+ # Configure logging to see detailed operation information
24
+ logging.basicConfig(level=logging.DEBUG)
25
+
26
+
27
+ async def main():
28
+ # Initialize the client with the server URL
29
+ # The connection_per_cafs_server parameter controls concurrency
30
+ client = CacheCdnClient(
31
+ 'http://cache-server.example.com:8300',
32
+ connection_per_cafs_server=10
33
+ )
34
+
35
+ # Use as an async context manager to ensure proper resource cleanup
36
+ async with client:
37
+ # Push a local directory to cache with a 2-hour TTL
38
+ await client.push('project_name', 'build_artifacts',
39
+ '/path/to/build/output', ttl_hours=2,
40
+ comment='Build artifacts from CI run #123')
41
+
42
+ # Check if a reference exists
43
+ exists = await client.check('project_name', 'build_artifacts')
44
+ print(f"Reference exists: {exists}")
45
+
46
+ # Pull cached content to a local directory
47
+ await client.pull('project_name', 'build_artifacts',
48
+ '/path/to/destination')
49
+
50
+ # Tag a reference for easier access later
51
+ await client.tag('project_name', 'build_artifacts', 'latest_stable')
52
+
53
+ # Attach an additional file to an existing reference
54
+ await client.attach('project_name', 'build_artifacts',
55
+ Path('/path/to/metadata.json'))
56
+
57
+ # Delete a reference when no longer needed
58
+ await client.delete('project_name', 'old_artifacts')
59
+
60
+
61
+ # Run the example
62
+ if __name__ == '__main__':
63
+ asyncio.run(main())
64
+ ```
65
+
66
+ ## API Reference
67
+ ### `CacheCdnClient`
68
+ - **Constructor**: `CacheCdnClient(server: str, connection_per_cafs_server: int = 1)`
69
+ - `server`: URL of the cache server
70
+ - `connection_per_cafs_server`: Number of concurrent connections per CAFS server
71
+
72
+ - **Methods**:
73
+ - `push(repo: str, ref: str, directory: Path | str, ttl_hours: int = 0, comment: str | None = None)` - Push a local directory to cache
74
+ - `pull(repo: str, ref: str, directory: Path | str)` - Pull cached content to a local directory
75
+ - `check(repo: str, ref: str) -> bool` - Check if a reference exists
76
+ - `tag(repo: str, ref: str, tag: str)` - Create a tag for a reference
77
+ - `attach(repo: str, ref: str, file_path: Path)` - Attach a file to an existing reference
78
+ - `delete(repo: str, ref: str)` - Delete a reference
@@ -0,0 +1 @@
1
+ from .client import CacheCdnClient
@@ -0,0 +1,63 @@
1
+ # CAFS Client
2
+
3
+ CAFS Client is a Python library that provides an asynchronous interface for interacting with CAFS servers.
4
+
5
+ More information about CAFS protocol can be found in the
6
+ [G-CVSNT](https://github.com/GaijinEntertainment/G-CVSNT/tree/master/cvsnt/cvsnt-2.5.05.3744/keyValueServer) repository.
7
+
8
+ ## Usage Example
9
+
10
+ Below is a complete example demonstrating all major functionality of the CAFSClient:
11
+
12
+ ```python
13
+ import asyncio
14
+ from pathlib import Path
15
+ from cafs_cache_cdn_client.cafs import CAFSClient, CompressionT
16
+
17
+
18
+ async def cafs_client_demo():
19
+
20
+ client = CAFSClient(
21
+ server_root='/data',
22
+ servers=['localhost', 'example.com:2403'],
23
+ connection_per_server=2,
24
+ connect_timeout=5.0
25
+ )
26
+
27
+ async with client:
28
+ # 1. Upload a file (stream operation)
29
+ source_file = Path('./sample.txt')
30
+ blob_hash = await client.stream(
31
+ path=source_file,
32
+ compression=CompressionT.ZSTD,
33
+ )
34
+ print(f'File uploaded with hash: {blob_hash}')
35
+
36
+ # 2. Check if the file exists on the server
37
+ exists = await client.check(blob_hash)
38
+ print(f'File exists: {exists}')
39
+
40
+ # 3. Get the file size
41
+ size = await client.size(blob_hash)
42
+ print(f'File size: {size} bytes')
43
+
44
+ # 4. Download the file (pull operation)
45
+ download_path = Path('./downloaded_sample.txt')
46
+ await client.pull(blob_hash, download_path)
47
+
48
+ if __name__ == '__main__':
49
+ asyncio.run(cafs_client_demo())
50
+ ```
51
+
52
+ ## Retry Mechanism
53
+
54
+ The CAFSClient implements a robust retry mechanism. This feature ensures that operations attempt to complete even if some servers or connections are unavailable:
55
+
56
+ - When `retry=True` is specified (default for most operations), the client will automatically retry the operation across all available connections in the pool.
57
+ - The client will iterate through all available connections until either:
58
+ 1. The operation succeeds
59
+ 2. All connections in the pool have been exhausted without success
60
+
61
+ This behavior makes the client resilient to temporary network issues or server unavailability when multiple servers are configured. For critical operations, always use the default `retry=True` setting to maximize the chances of operation success in distributed environments.
62
+
63
+ If a specific operation needs to fail immediately without attempting other connections, you can disable this behavior by setting `retry=False` when calling methods like `pull()`, `check()`, `size()`, and `stream()` of the client.
@@ -0,0 +1,7 @@
1
+ from .blob.package import CompressionT
2
+ from .client import CAFSClient
3
+
4
+ __all__ = (
5
+ 'CAFSClient',
6
+ 'CompressionT',
7
+ )
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+
3
+ import aiofiles
4
+ from blake3 import blake3
5
+
6
+ from cafs_cache_cdn_client.cafs.types import AsyncReader
7
+
8
+ __all__ = (
9
+ 'calc_hash',
10
+ 'calc_hash_file',
11
+ )
12
+
13
+ DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024
14
+
15
+
16
+ async def calc_hash(
17
+ reader: 'AsyncReader', buffer_size: int = DEFAULT_BUFFER_SIZE
18
+ ) -> str:
19
+ hasher = blake3() # pylint: disable=not-callable
20
+
21
+ while True:
22
+ buffer = await reader.read(buffer_size)
23
+ if not buffer:
24
+ break
25
+ hasher.update(buffer)
26
+
27
+ return hasher.hexdigest()
28
+
29
+
30
+ async def calc_hash_file(
31
+ file_path: Path, buffer_size: int = DEFAULT_BUFFER_SIZE
32
+ ) -> str:
33
+ async with aiofiles.open(file_path, 'rb') as f:
34
+ return await calc_hash(f, buffer_size)
@@ -0,0 +1,198 @@
1
+ import zlib
2
+ from enum import Enum
3
+ from logging import Logger, LoggerAdapter, getLogger
4
+ from typing import Protocol
5
+
6
+ try:
7
+ import zstandard as zstd
8
+ except ImportError:
9
+ zstd = None # type: ignore[assignment]
10
+
11
+ from cafs_cache_cdn_client.cafs.types import AsyncReader, AsyncWriter
12
+
13
+ __all__ = (
14
+ 'CompressionT',
15
+ 'Packer',
16
+ 'Unpacker',
17
+ )
18
+
19
+
20
+ module_logger = getLogger(__name__)
21
+
22
+
23
+ class CompressionT(bytes, Enum):
24
+ ZSTD = b'ZSTD'
25
+ ZLIB = b'ZLIB'
26
+ NONE = b'NONE'
27
+
28
+ def __str__(self) -> str:
29
+ return self.decode('utf-8')
30
+
31
+
32
+ FULL_HEADER_SIZE = 16
33
+ COMPRESSION_HEADER_SIZE = 4
34
+ DEFAULT_CHUNK_SIZE = 16 * 1024 * 1024
35
+
36
+
37
+ class Compressor(Protocol):
38
+ def compress(self, data: bytes) -> bytes:
39
+ pass
40
+
41
+ def flush(self) -> bytes:
42
+ pass
43
+
44
+
45
+ class Decompressor(Protocol):
46
+ def decompress(self, data: bytes) -> bytes:
47
+ pass
48
+
49
+ def flush(self) -> bytes:
50
+ pass
51
+
52
+
53
+ class Packer:
54
+ logger: Logger | LoggerAdapter
55
+ chunk_size: int
56
+
57
+ _reader: 'AsyncReader'
58
+ _eof_reached: bool
59
+ _buffer: bytearray
60
+ _compressor: Compressor | None
61
+
62
+ def __init__(
63
+ self,
64
+ reader: 'AsyncReader',
65
+ compression: CompressionT = CompressionT.NONE,
66
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
67
+ logger: Logger | LoggerAdapter | None = None,
68
+ ) -> None:
69
+ self._reader = reader
70
+ self._eof_reached = False
71
+ self.chunk_size = chunk_size
72
+
73
+ self._compressor = None
74
+ if compression == CompressionT.ZLIB:
75
+ self._compressor = zlib.compressobj()
76
+ elif compression == CompressionT.ZSTD:
77
+ if not zstd:
78
+ raise RuntimeError(
79
+ 'ZSTD compression is not available, please install zstandard'
80
+ )
81
+ self._compressor = zstd.ZstdCompressor().compressobj()
82
+
83
+ self._buffer = bytearray(
84
+ compression + b'\x00' * (FULL_HEADER_SIZE - COMPRESSION_HEADER_SIZE)
85
+ )
86
+ self.logger = logger or module_logger
87
+ self.logger.debug('Initialized packer with compression: %s', compression)
88
+
89
+ async def read(self, size: int = -1) -> bytes:
90
+ if size == 0:
91
+ return b''
92
+
93
+ while (size > 0 and len(self._buffer) < size) and not self._eof_reached:
94
+ await self._fill_buffer()
95
+
96
+ if size < 0 or len(self._buffer) <= size:
97
+ result = bytes(self._buffer)
98
+ self._buffer.clear()
99
+ return result
100
+
101
+ result = bytes(self._buffer[:size])
102
+ self._buffer = self._buffer[size:]
103
+ return result
104
+
105
+ async def _fill_buffer(self) -> None:
106
+ chunk = await self._reader.read(self.chunk_size)
107
+ self.logger.debug('Filling buffer with chunk of %d bytes', len(chunk))
108
+
109
+ if not chunk:
110
+ self._eof_reached = True
111
+ self.logger.debug('EOF reached')
112
+ if self._compressor:
113
+ data = self._compressor.flush()
114
+ self.logger.debug('Flushing compressor: %d bytes', len(data))
115
+ self._buffer.extend(data)
116
+ return
117
+
118
+ if not self._compressor:
119
+ self._buffer.extend(chunk)
120
+ return
121
+
122
+ data = self._compressor.compress(chunk)
123
+ self.logger.debug('Got %d bytes from compressor', len(data))
124
+ self._buffer.extend(data)
125
+
126
+
127
+ class Unpacker:
128
+ logger: Logger | LoggerAdapter
129
+ chunk_size: int
130
+
131
+ _writer: 'AsyncWriter'
132
+ _header: bytearray
133
+ _buffer: bytearray
134
+ _decompressor: Decompressor | None
135
+
136
+ def __init__(
137
+ self,
138
+ writer: 'AsyncWriter',
139
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
140
+ logger: Logger | LoggerAdapter | None = None,
141
+ ) -> None:
142
+ self._writer = writer
143
+ self._buffer = bytearray()
144
+ self._decompressor = None
145
+ self._header = bytearray()
146
+ self.chunk_size = chunk_size
147
+ self.logger = logger or module_logger
148
+
149
+ async def write(self, data: bytes, /) -> None:
150
+ if not data:
151
+ return
152
+ await self._fill_buffer(data)
153
+ if len(self._buffer) >= self.chunk_size:
154
+ await self._writer.write(self._buffer)
155
+ self._buffer.clear()
156
+
157
+ async def flush(self) -> None:
158
+ if self._decompressor:
159
+ data = self._decompressor.flush()
160
+ self.logger.debug('Flushing decompressor: %d bytes', len(data))
161
+ self._buffer.extend(data)
162
+ if self._buffer:
163
+ await self._writer.write(self._buffer)
164
+ self._buffer.clear()
165
+ await self._writer.flush()
166
+
167
+ async def _fill_buffer(self, data: bytes) -> None:
168
+ self.logger.debug('Filling buffer with chunk of %d bytes', len(data))
169
+ if len(self._header) < FULL_HEADER_SIZE:
170
+ header_offset = FULL_HEADER_SIZE - len(self._header)
171
+ self._header.extend(data[:header_offset])
172
+ data = data[header_offset:]
173
+ if len(self._header) < FULL_HEADER_SIZE:
174
+ return
175
+
176
+ compression_type = CompressionT(self._header[:COMPRESSION_HEADER_SIZE])
177
+ self.logger.debug('Extracted compression type: %s', compression_type)
178
+
179
+ if compression_type == CompressionT.NONE:
180
+ self._decompressor = None
181
+ elif compression_type == CompressionT.ZLIB:
182
+ d = zlib.decompressobj()
183
+ self._decompressor = d
184
+ elif compression_type == CompressionT.ZSTD:
185
+ if not zstd:
186
+ raise RuntimeError('zstandard is not available')
187
+ self._decompressor = zstd.ZstdDecompressor().decompressobj()
188
+
189
+ if not data:
190
+ return
191
+
192
+ if not self._decompressor:
193
+ self._buffer.extend(data)
194
+ return
195
+
196
+ data = self._decompressor.decompress(data)
197
+ self.logger.debug('Got %d bytes from decompressor', len(data))
198
+ self._buffer.extend(data)
@@ -0,0 +1,37 @@
1
+ from pathlib import Path
2
+
3
+ from .package import CompressionT
4
+
5
+ __all__ = ('choose_compression',)
6
+
7
+ MAGIC_HEADER_SIZE = 4
8
+ MINIMAL_COMPRESSION_SIZE = 1024
9
+
10
+
11
+ # Magic header prefixes for various compression formats
12
+ MAGIC_HEADER_PREFIXES = [
13
+ bytes([0x1F, 0x8B]), # gzip
14
+ bytes([0x42, 0x5A, 0x68]), # bzip2
15
+ bytes([0x50, 0x4B, 0x03]), # zip
16
+ bytes([0x28, 0xB5, 0x2F, 0xFD]), # zstd
17
+ bytes([0x78, 0x01]), # default compression level
18
+ ]
19
+
20
+
21
+ def is_file_already_compressed(file_path: Path) -> bool:
22
+ with open(file_path, 'rb') as file:
23
+ magic_header_buff = file.read(MAGIC_HEADER_SIZE)
24
+
25
+ return any(magic_header_buff.startswith(prefix) for prefix in MAGIC_HEADER_PREFIXES)
26
+
27
+
28
+ def choose_compression(
29
+ file_path: Path, preferred_compression: CompressionT = CompressionT.NONE
30
+ ) -> CompressionT:
31
+ if file_path.stat().st_size < MINIMAL_COMPRESSION_SIZE:
32
+ return CompressionT.NONE
33
+
34
+ if is_file_already_compressed(file_path):
35
+ return CompressionT.NONE
36
+
37
+ return preferred_compression