fastsafetensor-3fs-reader 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. fastsafetensor_3fs_reader-0.3.3/MANIFEST.in +6 -0
  2. fastsafetensor_3fs_reader-0.3.3/PKG-INFO +218 -0
  3. fastsafetensor_3fs_reader-0.3.3/README.md +192 -0
  4. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/__init__.py +49 -0
  5. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/_backend.py +121 -0
  6. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/_cuda_utils.py +226 -0
  7. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/_lib_preload.py +171 -0
  8. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/_mount_utils.py +15 -0
  9. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/cpp/BS_thread_pool.hpp +2510 -0
  10. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/cpp/__init__.py +1 -0
  11. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/cpp/include/hf3fs_usrbio.h +179 -0
  12. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/cpp/usrbio_reader_v2.cpp +694 -0
  13. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/interface.py +51 -0
  14. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/mock.py +76 -0
  15. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/reader_cpp.py +103 -0
  16. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader/reader_py.py +603 -0
  17. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader.egg-info/PKG-INFO +218 -0
  18. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader.egg-info/SOURCES.txt +27 -0
  19. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader.egg-info/dependency_links.txt +1 -0
  20. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader.egg-info/requires.txt +10 -0
  21. fastsafetensor_3fs_reader-0.3.3/fastsafetensor_3fs_reader.egg-info/top_level.txt +1 -0
  22. fastsafetensor_3fs_reader-0.3.3/pyproject.toml +75 -0
  23. fastsafetensor_3fs_reader-0.3.3/setup.cfg +4 -0
  24. fastsafetensor_3fs_reader-0.3.3/setup.py +124 -0
  25. fastsafetensor_3fs_reader-0.3.3/tests/__init__.py +1 -0
  26. fastsafetensor_3fs_reader-0.3.3/tests/conftest.py +164 -0
  27. fastsafetensor_3fs_reader-0.3.3/tests/test_lib_preload.py +317 -0
  28. fastsafetensor_3fs_reader-0.3.3/tests/test_mock.py +374 -0
  29. fastsafetensor_3fs_reader-0.3.3/tests/test_threefs.py +434 -0
@@ -0,0 +1,6 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ include setup.py
5
+ recursive-include fastsafetensor_3fs_reader *.py *.cpp *.hpp *.h *.cc
6
+ recursive-include tests *.py
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: fastsafetensor-3fs-reader
3
+ Version: 0.3.3
4
+ Summary: 3FS USRBIO file reader for fastsafetensors
5
+ License: Apache-2.0
6
+ Project-URL: Repository, https://github.com/ABNER-1/fastsafetensor_3fs_reader
7
+ Keywords: 3fs,usrbio,safetensors,gpu,io
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest>=8.1.1; extra == "test"
21
+ Requires-Dist: numpy; extra == "test"
22
+ Provides-Extra: gpu
23
+ Requires-Dist: torch>=2.0; extra == "gpu"
24
+ Provides-Extra: lint
25
+ Requires-Dist: ruff>=0.6.0; extra == "lint"
26
+
27
+ # fastsafetensor-3fs-reader
28
+
29
+ 3FS USRBIO file reader for fastsafetensors.
30
+
31
+ This package provides a high-performance reader for 3FS USRBIO files with
32
+ two backend implementations (C++ and pure-Python) and a mock for testing.
33
+
34
+ ## Backends
35
+
36
+ | Backend | Module | Requirements | Performance |
37
+ |---------|--------|-------------|-------------|
38
+ | **C++** | `reader_cpp.py` | `libhf3fs_api_shared.so` + libtorch + CUDA | Best (GIL-free, native USRBIO async I/O) |
39
+ | **Python** | `reader_py.py` | `hf3fs_py_usrbio` (+ optional PyTorch for GPU) | Good (USRBIO via Client API or OS pread) |
40
+ | **Mock** | `mock.py` | None | For testing only |
41
+
42
+ The package auto-selects the best available backend at import time:
43
+ C++ → Python → Mock. Use `get_backend()` to check which one is active.
44
+
45
+ > **Note:** The C++ backend supports **pipelined mode** (double-buffered async
46
+ > H2D copy via `cudaMemcpyAsync`) which overlaps network I/O with GPU memory
47
+ > transfer for significantly better throughput. Pass `pipelined=True` to
48
+ > `read_chunked()` to enable it. The Python backend does not support
49
+ > pipelining and will silently fall back to non-pipelined mode.
50
+
51
+ ## Installation
52
+
53
+ ### Pure-Python mode (no C++ compilation)
54
+
55
+ ```bash
56
+ FST3FS_NO_EXT=1 pip install .
57
+ ```
58
+
59
+ ### With C++ extension
60
+
61
+ Requires `libhf3fs_api_shared.so` (from a 3FS build) and CUDA Runtime.
62
+ The `hf3fs_usrbio.h` header is bundled in the package, so no external
63
+ header dependency is needed:
64
+
65
+ ```bash
66
+ export HF3FS_LIB_DIR=/path/to/3FS/build/lib # directory with libhf3fs_api_shared.so
67
+ pip install .
68
+ ```
69
+
70
+ ### Automatic `libhf3fs_api_shared.so` discovery
71
+
72
+ At import time, the package automatically searches for
73
+ `libhf3fs_api_shared.so` using the following priority:
74
+
75
+ 1. **`HF3FS_LIB_DIR`** environment variable (user-explicit, highest priority).
76
+ 2. **`LD_LIBRARY_PATH`** directories (user already configured).
77
+ 3. **`hf3fs_py_usrbio` pip install path** — if `hf3fs_py_usrbio` is installed
78
+ via pip, the library is typically located in a sibling `.libs/` directory
79
+ (e.g. `site-packages/hf3fs_py_usrbio.libs/`). This is discovered
80
+ automatically so you don't need to set `LD_LIBRARY_PATH` manually.
81
+
82
+ The library is pre-loaded with `RTLD_GLOBAL` so that both the C++ and
83
+ Python backends can resolve its symbols. Use `get_hf3fs_lib_path()` to
84
+ check which path was loaded:
85
+
86
+ ```python
87
+ from fastsafetensor_3fs_reader import get_hf3fs_lib_path
88
+ print(get_hf3fs_lib_path()) # e.g. "/path/to/site-packages/hf3fs_py_usrbio.libs/libhf3fs_api_shared.so"
89
+ ```
90
+
91
+ ### Installing hf3fs_py_usrbio (for the Python backend)
92
+
93
+ `hf3fs_py_usrbio` is **not** available on PyPI. It must be built from the
94
+ [DeepSeek 3FS](https://github.com/deepseek-ai/3FS) source tree:
95
+
96
+ ```bash
97
+ git clone https://github.com/deepseek-ai/3FS
98
+ cd 3FS
99
+ git submodule update --init --recursive
100
+ # Follow 3FS build instructions (cmake, etc.)
101
+ # After build, install the Python package:
102
+ cd build && pip install ..
103
+ ```
104
+
105
+ > **Important:** The default pip-installed `hf3fs_py_usrbio` package is
106
+ > suitable for **testing and validation** but is **not recommended for
107
+ > production use**. For production deployments, build 3FS from source with
108
+ > optimized compiler flags tailored to your hardware. Refer to projects like
109
+ > [SGLang](https://github.com/sgl-project/sglang) for examples of
110
+ > production-grade 3FS compilation workflows.
111
+
112
+ ## Usage
113
+
114
+ ```python
115
+ from fastsafetensor_3fs_reader import (
116
+ ThreeFSFileReader,
117
+ MockFileReader,
118
+ is_available,
119
+ get_backend,
120
+ )
121
+
122
+ # Check which backend is active
123
+ print(f"Backend: {get_backend()}") # "cpp", "python", or "mock"
124
+
125
+ # Use mock reader for testing (always available)
126
+ reader = MockFileReader()
127
+ headers = reader.read_headers_batch(["/path/to/file.safetensors"])
128
+ reader.close()
129
+
130
+ # Use 3FS reader when available
131
+ if is_available():
132
+ reader = ThreeFSFileReader(mount_point="/mnt/3fs")
133
+ headers = reader.read_headers_batch([
134
+ "/mnt/3fs/model-00001.safetensors",
135
+ "/mnt/3fs/model-00002.safetensors",
136
+ ])
137
+
138
+ # Read tensor data into GPU memory
139
+ import torch
140
+ buf = torch.empty(1024 * 1024, dtype=torch.uint8, device="cuda")
141
+ bytes_read = reader.read_chunked(
142
+ path="/mnt/3fs/model-00001.safetensors",
143
+ dev_ptr=buf.data_ptr(),
144
+ file_offset=0,
145
+ total_length=1024 * 1024,
146
+ )
147
+ reader.close()
148
+ ```
149
+
150
+ ## Benchmark
151
+
152
+ The `hack/benchmark/` directory contains a comprehensive benchmarking suite.
153
+ Use `benchmark_runner.py` to measure read throughput across different backends,
154
+ buffer sizes, chunk sizes, and process counts.
155
+
156
+ ### Full benchmark (read + GPU copy)
157
+
158
+ ```bash
159
+ python hack/benchmark/benchmark_runner.py \
160
+ --mount-point /mnt/3fs \
161
+ --backends cpp,python \
162
+ --buffer-sizes 8,16,32,64,128,256,512 \
163
+ --chunk-sizes 8,16,32,64,128,256,512 \
164
+ --num-processes 1,2,4,8 \
165
+ --iterations 3
166
+ ```
167
+
168
+ ### Download-only benchmark (host memory only, no GPU copy)
169
+
170
+ ```bash
171
+ python hack/benchmark/benchmark_runner.py \
172
+ --mount-point /mnt/3fs \
173
+ --backends cpp,python \
174
+ --buffer-sizes 8,16,32,64,128,256,512 \
175
+ --chunk-sizes 8,16,32,64,128,256,512 \
176
+ --num-processes 1,2,4,8 \
177
+ --download-only \
178
+ --iterations 3
179
+ ```
180
+
181
+ ### Key parameters
182
+
183
+ | Parameter | Description | Default |
184
+ |-----------|-------------|-------------------------------|
185
+ | `--mount-point` | 3FS FUSE mount-point path | *(required)* |
186
+ | `--backends` | Comma-separated backend names | `mock,python,cpp` |
187
+ | `--buffer-sizes` | Buffer sizes in MB | `8,16,32,64,128,256,512,1024` |
188
+ | `--chunk-sizes` | Chunk sizes in MB | `8,16,32,64,128,256,512,1024` |
189
+ | `--num-processes` | Process counts | `1,2,4,8` |
190
+ | `--download-only` | Read into host memory only (skip GPU copy) | `false` |
191
+ | `--iterations` | Iterations per combination | `3` |
192
+ | `--mode` | `grid` (sweep all combos) or `single` | `grid` |
193
+ | `--output-dir` | Directory for CSV and chart output | `./benchmark_results` |
194
+
195
+ ### Performance Results
196
+
197
+ > **Test environment:** Single 400 Gbps RDMA NIC.
198
+ > These numbers represent a **loading baseline** under specific storage and
199
+ > network hardware conditions — they do **not** represent the performance
200
+ > ceiling of the system.
201
+
202
+ **Model:** DeepSeek-V3 (total ~640 GB safetensors)
203
+
204
+ | Configuration | Avg Throughput (GB/s) | Peak Throughput with fastsafetensors (GB/s) | Load Time (s) | Backend |
205
+ |---|---|---|---|---|
206
+ | 8 processes, buffer=8 MB | 35.0 | 32.0 | 30.34 | C++ (non-pipelined) |
207
+ | 8 processes, buffer=16 MB | 37.6 | 36.6 | 25.73 | C++ (pipelined) |
208
+
209
+ #### Benchmark: RDMA throughput across buffer sizes (8M / 16M / 32M)
210
+
211
+ ![RDMA throughput across buffer sizes](docs/images/cpp_performance.png)
212
+
213
+ #### Production: model weight loading with fastsafetensors (pipelined, peak 36.6 GB/s)
214
+
215
+ ![Model weight loading throughput](docs/images/cpp_load.png)
216
+ ## License
217
+
218
+ Apache-2.0
@@ -0,0 +1,192 @@
1
+ # fastsafetensor-3fs-reader
2
+
3
+ 3FS USRBIO file reader for fastsafetensors.
4
+
5
+ This package provides a high-performance reader for 3FS USRBIO files with
6
+ two backend implementations (C++ and pure-Python) and a mock for testing.
7
+
8
+ ## Backends
9
+
10
+ | Backend | Module | Requirements | Performance |
11
+ |---------|--------|-------------|-------------|
12
+ | **C++** | `reader_cpp.py` | `libhf3fs_api_shared.so` + libtorch + CUDA | Best (GIL-free, native USRBIO async I/O) |
13
+ | **Python** | `reader_py.py` | `hf3fs_py_usrbio` (+ optional PyTorch for GPU) | Good (USRBIO via Client API or OS pread) |
14
+ | **Mock** | `mock.py` | None | For testing only |
15
+
16
+ The package auto-selects the best available backend at import time:
17
+ C++ → Python → Mock. Use `get_backend()` to check which one is active.
18
+
19
+ > **Note:** The C++ backend supports **pipelined mode** (double-buffered async
20
+ > H2D copy via `cudaMemcpyAsync`) which overlaps network I/O with GPU memory
21
+ > transfer for significantly better throughput. Pass `pipelined=True` to
22
+ > `read_chunked()` to enable it. The Python backend does not support
23
+ > pipelining and will silently fall back to non-pipelined mode.
24
+
25
+ ## Installation
26
+
27
+ ### Pure-Python mode (no C++ compilation)
28
+
29
+ ```bash
30
+ FST3FS_NO_EXT=1 pip install .
31
+ ```
32
+
33
+ ### With C++ extension
34
+
35
+ Requires `libhf3fs_api_shared.so` (from a 3FS build) and CUDA Runtime.
36
+ The `hf3fs_usrbio.h` header is bundled in the package, so no external
37
+ header dependency is needed:
38
+
39
+ ```bash
40
+ export HF3FS_LIB_DIR=/path/to/3FS/build/lib # directory with libhf3fs_api_shared.so
41
+ pip install .
42
+ ```
43
+
44
+ ### Automatic `libhf3fs_api_shared.so` discovery
45
+
46
+ At import time, the package automatically searches for
47
+ `libhf3fs_api_shared.so` using the following priority:
48
+
49
+ 1. **`HF3FS_LIB_DIR`** environment variable (user-explicit, highest priority).
50
+ 2. **`LD_LIBRARY_PATH`** directories (user already configured).
51
+ 3. **`hf3fs_py_usrbio` pip install path** — if `hf3fs_py_usrbio` is installed
52
+ via pip, the library is typically located in a sibling `.libs/` directory
53
+ (e.g. `site-packages/hf3fs_py_usrbio.libs/`). This is discovered
54
+ automatically so you don't need to set `LD_LIBRARY_PATH` manually.
55
+
56
+ The library is pre-loaded with `RTLD_GLOBAL` so that both the C++ and
57
+ Python backends can resolve its symbols. Use `get_hf3fs_lib_path()` to
58
+ check which path was loaded:
59
+
60
+ ```python
61
+ from fastsafetensor_3fs_reader import get_hf3fs_lib_path
62
+ print(get_hf3fs_lib_path()) # e.g. "/path/to/site-packages/hf3fs_py_usrbio.libs/libhf3fs_api_shared.so"
63
+ ```
64
+
65
+ ### Installing hf3fs_py_usrbio (for the Python backend)
66
+
67
+ `hf3fs_py_usrbio` is **not** available on PyPI. It must be built from the
68
+ [DeepSeek 3FS](https://github.com/deepseek-ai/3FS) source tree:
69
+
70
+ ```bash
71
+ git clone https://github.com/deepseek-ai/3FS
72
+ cd 3FS
73
+ git submodule update --init --recursive
74
+ # Follow 3FS build instructions (cmake, etc.)
75
+ # After build, install the Python package:
76
+ cd build && pip install ..
77
+ ```
78
+
79
+ > **Important:** The default pip-installed `hf3fs_py_usrbio` package is
80
+ > suitable for **testing and validation** but is **not recommended for
81
+ > production use**. For production deployments, build 3FS from source with
82
+ > optimized compiler flags tailored to your hardware. Refer to projects like
83
+ > [SGLang](https://github.com/sgl-project/sglang) for examples of
84
+ > production-grade 3FS compilation workflows.
85
+
86
+ ## Usage
87
+
88
+ ```python
89
+ from fastsafetensor_3fs_reader import (
90
+ ThreeFSFileReader,
91
+ MockFileReader,
92
+ is_available,
93
+ get_backend,
94
+ )
95
+
96
+ # Check which backend is active
97
+ print(f"Backend: {get_backend()}") # "cpp", "python", or "mock"
98
+
99
+ # Use mock reader for testing (always available)
100
+ reader = MockFileReader()
101
+ headers = reader.read_headers_batch(["/path/to/file.safetensors"])
102
+ reader.close()
103
+
104
+ # Use 3FS reader when available
105
+ if is_available():
106
+ reader = ThreeFSFileReader(mount_point="/mnt/3fs")
107
+ headers = reader.read_headers_batch([
108
+ "/mnt/3fs/model-00001.safetensors",
109
+ "/mnt/3fs/model-00002.safetensors",
110
+ ])
111
+
112
+ # Read tensor data into GPU memory
113
+ import torch
114
+ buf = torch.empty(1024 * 1024, dtype=torch.uint8, device="cuda")
115
+ bytes_read = reader.read_chunked(
116
+ path="/mnt/3fs/model-00001.safetensors",
117
+ dev_ptr=buf.data_ptr(),
118
+ file_offset=0,
119
+ total_length=1024 * 1024,
120
+ )
121
+ reader.close()
122
+ ```
123
+
124
+ ## Benchmark
125
+
126
+ The `hack/benchmark/` directory contains a comprehensive benchmarking suite.
127
+ Use `benchmark_runner.py` to measure read throughput across different backends,
128
+ buffer sizes, chunk sizes, and process counts.
129
+
130
+ ### Full benchmark (read + GPU copy)
131
+
132
+ ```bash
133
+ python hack/benchmark/benchmark_runner.py \
134
+ --mount-point /mnt/3fs \
135
+ --backends cpp,python \
136
+ --buffer-sizes 8,16,32,64,128,256,512 \
137
+ --chunk-sizes 8,16,32,64,128,256,512 \
138
+ --num-processes 1,2,4,8 \
139
+ --iterations 3
140
+ ```
141
+
142
+ ### Download-only benchmark (host memory only, no GPU copy)
143
+
144
+ ```bash
145
+ python hack/benchmark/benchmark_runner.py \
146
+ --mount-point /mnt/3fs \
147
+ --backends cpp,python \
148
+ --buffer-sizes 8,16,32,64,128,256,512 \
149
+ --chunk-sizes 8,16,32,64,128,256,512 \
150
+ --num-processes 1,2,4,8 \
151
+ --download-only \
152
+ --iterations 3
153
+ ```
154
+
155
+ ### Key parameters
156
+
157
+ | Parameter | Description | Default |
158
+ |-----------|-------------|-------------------------------|
159
+ | `--mount-point` | 3FS FUSE mount-point path | *(required)* |
160
+ | `--backends` | Comma-separated backend names | `mock,python,cpp` |
161
+ | `--buffer-sizes` | Buffer sizes in MB | `8,16,32,64,128,256,512,1024` |
162
+ | `--chunk-sizes` | Chunk sizes in MB | `8,16,32,64,128,256,512,1024` |
163
+ | `--num-processes` | Process counts | `1,2,4,8` |
164
+ | `--download-only` | Read into host memory only (skip GPU copy) | `false` |
165
+ | `--iterations` | Iterations per combination | `3` |
166
+ | `--mode` | `grid` (sweep all combos) or `single` | `grid` |
167
+ | `--output-dir` | Directory for CSV and chart output | `./benchmark_results` |
168
+
169
+ ### Performance Results
170
+
171
+ > **Test environment:** Single 400 Gbps RDMA NIC.
172
+ > These numbers represent a **loading baseline** under specific storage and
173
+ > network hardware conditions — they do **not** represent the performance
174
+ > ceiling of the system.
175
+
176
+ **Model:** DeepSeek-V3 (total ~640 GB safetensors)
177
+
178
+ | Configuration | Avg Throughput (GB/s) | Peak Throughput with fastsafetensors (GB/s) | Load Time (s) | Backend |
179
+ |---|---|---|---|---|
180
+ | 8 processes, buffer=8 MB | 35.0 | 32.0 | 30.34 | C++ (non-pipelined) |
181
+ | 8 processes, buffer=16 MB | 37.6 | 36.6 | 25.73 | C++ (pipelined) |
182
+
183
+ #### Benchmark: RDMA throughput across buffer sizes (8M / 16M / 32M)
184
+
185
+ ![RDMA throughput across buffer sizes](docs/images/cpp_performance.png)
186
+
187
+ #### Production: model weight loading with fastsafetensors (pipelined, peak 36.6 GB/s)
188
+
189
+ ![Model weight loading throughput](docs/images/cpp_load.png)
190
+ ## License
191
+
192
+ Apache-2.0
@@ -0,0 +1,49 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ """fastsafetensor_3fs_reader -- 3FS USRBIO file reader for fastsafetensors.
4
+
5
+ Quick start::
6
+
7
+ from fastsafetensor_3fs_reader import ThreeFSFileReader, is_available
8
+
9
+ if is_available():
10
+ reader = ThreeFSFileReader(mount_point="/mnt/3fs")
11
+ headers = reader.read_headers_batch(["/mnt/3fs/model.safetensors"])
12
+ reader.close()
13
+
14
+ Backend auto-selection (override via ``FASTSAFETENSORS_BACKEND``)::
15
+
16
+ cpp -> python -> mock
17
+ """
18
+
19
+ from ._lib_preload import get_hf3fs_lib_path, preload_hf3fs_library
20
+
21
+ preload_hf3fs_library() # must run before any backend import
22
+
23
+ from ._mount_utils import extract_mount_point
24
+ from .interface import FileReaderInterface
25
+ from .mock import MockFileReader
26
+
27
+ from ._backend import ( # noqa: E402
28
+ create_reader,
29
+ get_backend,
30
+ init_backend,
31
+ is_available,
32
+ )
33
+
34
+ # init_backend() must run BEFORE importing ThreeFSFileReader: Python's
35
+ # ``from mod import name`` captures the value at import time.
36
+ init_backend()
37
+
38
+ from ._backend import ThreeFSFileReader # noqa: E402
39
+
40
+ __all__ = [
41
+ "FileReaderInterface",
42
+ "ThreeFSFileReader",
43
+ "MockFileReader",
44
+ "extract_mount_point",
45
+ "get_hf3fs_lib_path",
46
+ "is_available",
47
+ "get_backend",
48
+ "create_reader",
49
+ ]
@@ -0,0 +1,121 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ """Backend selection: ``FASTSAFETENSORS_BACKEND`` → cpp / python / mock."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import os
9
+ from typing import Any
10
+
11
+ from .interface import FileReaderInterface
12
+ from .mock import MockFileReader
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ _VALID_BACKENDS = ("cpp", "python", "mock", "auto")
17
+
18
+ _BACKEND: str = "mock"
19
+ ThreeFSFileReader: type | None = None
20
+
21
+
22
+ def _load_backend(name: str) -> None:
23
+ global ThreeFSFileReader, _BACKEND
24
+ if name == "cpp":
25
+ from .reader_cpp import ThreeFSFileReaderCpp
26
+
27
+ ThreeFSFileReader = ThreeFSFileReaderCpp
28
+ _BACKEND = "cpp"
29
+ elif name == "python":
30
+ from .reader_py import ThreeFSFileReaderPy
31
+
32
+ ThreeFSFileReader = ThreeFSFileReaderPy
33
+ _BACKEND = "python"
34
+ elif name == "mock":
35
+ ThreeFSFileReader = MockFileReader
36
+ _BACKEND = "mock"
37
+ else:
38
+ raise ValueError(f"Unknown backend: {name!r}")
39
+
40
+
41
+ def init_backend() -> None:
42
+ """Auto-select backend (cpp → python → mock).
43
+
44
+ Override with ``FASTSAFETENSORS_BACKEND=cpp|python|mock``.
45
+ """
46
+ forced = os.environ.get("FASTSAFETENSORS_BACKEND", "").lower().strip()
47
+ if forced and forced not in _VALID_BACKENDS:
48
+ raise ValueError(
49
+ f"FASTSAFETENSORS_BACKEND={forced!r} is invalid. "
50
+ f"Valid values: {', '.join(_VALID_BACKENDS)} (or unset)"
51
+ )
52
+
53
+ if forced and forced != "auto":
54
+ _load_backend(forced)
55
+ logger.info(
56
+ "using backend=%r (forced via FASTSAFETENSORS_BACKEND)",
57
+ _BACKEND,
58
+ )
59
+ else:
60
+ for candidate in ("cpp", "python"):
61
+ try:
62
+ _load_backend(candidate)
63
+ logger.info(
64
+ "using backend=%r (auto-selected)", _BACKEND
65
+ )
66
+ break
67
+ except ImportError as exc:
68
+ logger.debug(
69
+ "backend=%r not available (%s), trying next",
70
+ candidate,
71
+ exc,
72
+ )
73
+
74
+ if ThreeFSFileReader is None:
75
+ _load_backend("mock")
76
+ logger.warning(
77
+ "no real 3FS backend available "
78
+ "(cpp/python both failed), falling back to mock backend"
79
+ )
80
+
81
+
82
+ def is_available() -> bool:
83
+ return _BACKEND in ("cpp", "python")
84
+
85
+
86
+ def get_backend() -> str:
87
+ return _BACKEND
88
+
89
+
90
+ def create_reader(backend: str = "auto", **kwargs: Any) -> FileReaderInterface:
91
+ """Create a reader instance, optionally forcing a specific backend.
92
+
93
+ ``**kwargs`` are forwarded to the reader constructor.
94
+ """
95
+ if backend == "auto":
96
+ if ThreeFSFileReader is None:
97
+ raise RuntimeError("No backend is available")
98
+ return ThreeFSFileReader(**kwargs)
99
+ elif backend == "cpp":
100
+ from .reader_cpp import ThreeFSFileReaderCpp
101
+
102
+ return ThreeFSFileReaderCpp(**kwargs)
103
+ elif backend == "python":
104
+ from .reader_py import ThreeFSFileReaderPy
105
+
106
+ return ThreeFSFileReaderPy(**kwargs)
107
+ elif backend == "mock":
108
+ return MockFileReader(**kwargs)
109
+ else:
110
+ raise ValueError(
111
+ f"backend={backend!r} is invalid. Valid values: {', '.join(_VALID_BACKENDS)}"
112
+ )
113
+
114
+
115
+ __all__ = [
116
+ "ThreeFSFileReader",
117
+ "init_backend",
118
+ "is_available",
119
+ "get_backend",
120
+ "create_reader",
121
+ ]