fastsafetensor-3fs-reader 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastsafetensor_3fs_reader/__init__.py +49 -0
- fastsafetensor_3fs_reader/_backend.py +121 -0
- fastsafetensor_3fs_reader/_cuda_utils.py +226 -0
- fastsafetensor_3fs_reader/_lib_preload.py +171 -0
- fastsafetensor_3fs_reader/_mount_utils.py +15 -0
- fastsafetensor_3fs_reader/cpp/BS_thread_pool.hpp +2510 -0
- fastsafetensor_3fs_reader/cpp/__init__.py +1 -0
- fastsafetensor_3fs_reader/cpp/include/hf3fs_usrbio.h +179 -0
- fastsafetensor_3fs_reader/cpp/usrbio_reader_v2.cpp +694 -0
- fastsafetensor_3fs_reader/interface.py +51 -0
- fastsafetensor_3fs_reader/mock.py +76 -0
- fastsafetensor_3fs_reader/reader_cpp.py +103 -0
- fastsafetensor_3fs_reader/reader_py.py +603 -0
- fastsafetensor_3fs_reader-0.3.3.dist-info/METADATA +218 -0
- fastsafetensor_3fs_reader-0.3.3.dist-info/RECORD +17 -0
- fastsafetensor_3fs_reader-0.3.3.dist-info/WHEEL +5 -0
- fastsafetensor_3fs_reader-0.3.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
"""fastsafetensor_3fs_reader -- 3FS USRBIO file reader for fastsafetensors.
|
|
4
|
+
|
|
5
|
+
Quick start::
|
|
6
|
+
|
|
7
|
+
from fastsafetensor_3fs_reader import ThreeFSFileReader, is_available
|
|
8
|
+
|
|
9
|
+
if is_available():
|
|
10
|
+
reader = ThreeFSFileReader(mount_point="/mnt/3fs")
|
|
11
|
+
headers = reader.read_headers_batch(["/mnt/3fs/model.safetensors"])
|
|
12
|
+
reader.close()
|
|
13
|
+
|
|
14
|
+
Backend auto-selection (override via ``FASTSAFETENSORS_BACKEND``)::
|
|
15
|
+
|
|
16
|
+
cpp -> python -> mock
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from ._lib_preload import get_hf3fs_lib_path, preload_hf3fs_library
|
|
20
|
+
|
|
21
|
+
preload_hf3fs_library() # must run before any backend import
|
|
22
|
+
|
|
23
|
+
from ._mount_utils import extract_mount_point
|
|
24
|
+
from .interface import FileReaderInterface
|
|
25
|
+
from .mock import MockFileReader
|
|
26
|
+
|
|
27
|
+
from ._backend import ( # noqa: E402
|
|
28
|
+
create_reader,
|
|
29
|
+
get_backend,
|
|
30
|
+
init_backend,
|
|
31
|
+
is_available,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# init_backend() must run BEFORE importing ThreeFSFileReader: Python's
|
|
35
|
+
# ``from mod import name`` captures the value at import time.
|
|
36
|
+
init_backend()
|
|
37
|
+
|
|
38
|
+
from ._backend import ThreeFSFileReader # noqa: E402
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"FileReaderInterface",
|
|
42
|
+
"ThreeFSFileReader",
|
|
43
|
+
"MockFileReader",
|
|
44
|
+
"extract_mount_point",
|
|
45
|
+
"get_hf3fs_lib_path",
|
|
46
|
+
"is_available",
|
|
47
|
+
"get_backend",
|
|
48
|
+
"create_reader",
|
|
49
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
"""Backend selection: ``FASTSAFETENSORS_BACKEND`` → cpp / python / mock."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .interface import FileReaderInterface
|
|
12
|
+
from .mock import MockFileReader
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
_VALID_BACKENDS = ("cpp", "python", "mock", "auto")
|
|
17
|
+
|
|
18
|
+
_BACKEND: str = "mock"
|
|
19
|
+
ThreeFSFileReader: type | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _load_backend(name: str) -> None:
|
|
23
|
+
global ThreeFSFileReader, _BACKEND
|
|
24
|
+
if name == "cpp":
|
|
25
|
+
from .reader_cpp import ThreeFSFileReaderCpp
|
|
26
|
+
|
|
27
|
+
ThreeFSFileReader = ThreeFSFileReaderCpp
|
|
28
|
+
_BACKEND = "cpp"
|
|
29
|
+
elif name == "python":
|
|
30
|
+
from .reader_py import ThreeFSFileReaderPy
|
|
31
|
+
|
|
32
|
+
ThreeFSFileReader = ThreeFSFileReaderPy
|
|
33
|
+
_BACKEND = "python"
|
|
34
|
+
elif name == "mock":
|
|
35
|
+
ThreeFSFileReader = MockFileReader
|
|
36
|
+
_BACKEND = "mock"
|
|
37
|
+
else:
|
|
38
|
+
raise ValueError(f"Unknown backend: {name!r}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def init_backend() -> None:
|
|
42
|
+
"""Auto-select backend (cpp → python → mock).
|
|
43
|
+
|
|
44
|
+
Override with ``FASTSAFETENSORS_BACKEND=cpp|python|mock``.
|
|
45
|
+
"""
|
|
46
|
+
forced = os.environ.get("FASTSAFETENSORS_BACKEND", "").lower().strip()
|
|
47
|
+
if forced and forced not in _VALID_BACKENDS:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"FASTSAFETENSORS_BACKEND={forced!r} is invalid. "
|
|
50
|
+
f"Valid values: {', '.join(_VALID_BACKENDS)} (or unset)"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if forced and forced != "auto":
|
|
54
|
+
_load_backend(forced)
|
|
55
|
+
logger.info(
|
|
56
|
+
"using backend=%r (forced via FASTSAFETENSORS_BACKEND)",
|
|
57
|
+
_BACKEND,
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
for candidate in ("cpp", "python"):
|
|
61
|
+
try:
|
|
62
|
+
_load_backend(candidate)
|
|
63
|
+
logger.info(
|
|
64
|
+
"using backend=%r (auto-selected)", _BACKEND
|
|
65
|
+
)
|
|
66
|
+
break
|
|
67
|
+
except ImportError as exc:
|
|
68
|
+
logger.debug(
|
|
69
|
+
"backend=%r not available (%s), trying next",
|
|
70
|
+
candidate,
|
|
71
|
+
exc,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if ThreeFSFileReader is None:
|
|
75
|
+
_load_backend("mock")
|
|
76
|
+
logger.warning(
|
|
77
|
+
"no real 3FS backend available "
|
|
78
|
+
"(cpp/python both failed), falling back to mock backend"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_available() -> bool:
|
|
83
|
+
return _BACKEND in ("cpp", "python")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_backend() -> str:
|
|
87
|
+
return _BACKEND
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def create_reader(backend: str = "auto", **kwargs: Any) -> FileReaderInterface:
|
|
91
|
+
"""Create a reader instance, optionally forcing a specific backend.
|
|
92
|
+
|
|
93
|
+
``**kwargs`` are forwarded to the reader constructor.
|
|
94
|
+
"""
|
|
95
|
+
if backend == "auto":
|
|
96
|
+
if ThreeFSFileReader is None:
|
|
97
|
+
raise RuntimeError("No backend is available")
|
|
98
|
+
return ThreeFSFileReader(**kwargs)
|
|
99
|
+
elif backend == "cpp":
|
|
100
|
+
from .reader_cpp import ThreeFSFileReaderCpp
|
|
101
|
+
|
|
102
|
+
return ThreeFSFileReaderCpp(**kwargs)
|
|
103
|
+
elif backend == "python":
|
|
104
|
+
from .reader_py import ThreeFSFileReaderPy
|
|
105
|
+
|
|
106
|
+
return ThreeFSFileReaderPy(**kwargs)
|
|
107
|
+
elif backend == "mock":
|
|
108
|
+
return MockFileReader(**kwargs)
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"backend={backend!r} is invalid. Valid values: {', '.join(_VALID_BACKENDS)}"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__all__ = [
|
|
116
|
+
"ThreeFSFileReader",
|
|
117
|
+
"init_backend",
|
|
118
|
+
"is_available",
|
|
119
|
+
"get_backend",
|
|
120
|
+
"create_reader",
|
|
121
|
+
]
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
"""CUDA Runtime helpers for safe host-to-target memory copies via ctypes."""
|
|
4
|
+
|
|
5
|
+
import ctypes
|
|
6
|
+
import ctypes.util
|
|
7
|
+
|
|
8
|
+
_cudart_lib = None
|
|
9
|
+
|
|
10
|
+
class _CudaPointerAttributes(ctypes.Structure):
|
|
11
|
+
_fields_ = [
|
|
12
|
+
("type", ctypes.c_int),
|
|
13
|
+
("device", ctypes.c_int),
|
|
14
|
+
("devicePointer", ctypes.c_void_p),
|
|
15
|
+
("hostPointer", ctypes.c_void_p),
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
def _load_cudart():
|
|
19
|
+
global _cudart_lib
|
|
20
|
+
if _cudart_lib is not None:
|
|
21
|
+
return _cudart_lib
|
|
22
|
+
|
|
23
|
+
for name in ("libcudart.so", "libcudart.so.12", "libcudart.so.11"):
|
|
24
|
+
try:
|
|
25
|
+
_cudart_lib = ctypes.CDLL(name)
|
|
26
|
+
return _cudart_lib
|
|
27
|
+
except OSError:
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
path = ctypes.util.find_library("cudart")
|
|
31
|
+
if path:
|
|
32
|
+
try:
|
|
33
|
+
_cudart_lib = ctypes.CDLL(path)
|
|
34
|
+
return _cudart_lib
|
|
35
|
+
except OSError:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def _get_cuda_ptr_type(ptr: int) -> str:
|
|
41
|
+
"""Return the CUDA memory type of *ptr*.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
``'device'`` -- confirmed CUDA device or managed memory.
|
|
45
|
+
``'host'`` -- confirmed host (pageable or pinned) memory.
|
|
46
|
+
``'unknown'`` -- ``cudaPointerGetAttributes`` failed.
|
|
47
|
+
"""
|
|
48
|
+
lib = _load_cudart()
|
|
49
|
+
if lib is None:
|
|
50
|
+
return "unknown"
|
|
51
|
+
try:
|
|
52
|
+
attrs = _CudaPointerAttributes()
|
|
53
|
+
err = lib.cudaPointerGetAttributes(ctypes.byref(attrs), ctypes.c_void_p(ptr))
|
|
54
|
+
if err == 0:
|
|
55
|
+
# cudaMemoryTypeDevice == 2, cudaMemoryTypeManaged == 3
|
|
56
|
+
if attrs.type in (2, 3):
|
|
57
|
+
return "device"
|
|
58
|
+
else:
|
|
59
|
+
return "host"
|
|
60
|
+
else:
|
|
61
|
+
# Clear the sticky CUDA error so subsequent calls are not affected.
|
|
62
|
+
if hasattr(lib, "cudaGetLastError"):
|
|
63
|
+
lib.cudaGetLastError()
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
return "unknown"
|
|
67
|
+
|
|
68
|
+
def _is_cuda_ptr(ptr: int) -> bool:
|
|
69
|
+
"""Kept for backward compatibility; prefer ``_get_cuda_ptr_type``."""
|
|
70
|
+
return _get_cuda_ptr_type(ptr) == "device"
|
|
71
|
+
|
|
72
|
+
def _cuda_memcpy(dst: int, src: int, nbytes: int, kind: int = 1) -> None:
|
|
73
|
+
"""Call ``cudaMemcpy`` via ctypes.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
kind: ``cudaMemcpyKind`` value.
|
|
77
|
+
``1`` = ``cudaMemcpyHostToDevice`` (default).
|
|
78
|
+
``4`` = ``cudaMemcpyDefault`` -- CUDA runtime infers
|
|
79
|
+
src/dst memory types at call time.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
RuntimeError: If ``cudaMemcpy`` returns a non-zero error code.
|
|
83
|
+
OSError: If ``libcudart.so`` cannot be loaded.
|
|
84
|
+
"""
|
|
85
|
+
lib = _load_cudart()
|
|
86
|
+
if lib is None:
|
|
87
|
+
raise OSError("libcudart.so not found; cannot perform cudaMemcpy")
|
|
88
|
+
err = lib.cudaMemcpy(
|
|
89
|
+
ctypes.c_void_p(dst),
|
|
90
|
+
ctypes.c_void_p(src),
|
|
91
|
+
ctypes.c_size_t(nbytes),
|
|
92
|
+
ctypes.c_int(kind),
|
|
93
|
+
)
|
|
94
|
+
if err != 0:
|
|
95
|
+
raise RuntimeError(f"cudaMemcpy failed with CUDA error code {err}")
|
|
96
|
+
|
|
97
|
+
_cudaMemcpy_fn = None
|
|
98
|
+
_cudaHostRegister_fn = None
|
|
99
|
+
_cudaHostUnregister_fn = None
|
|
100
|
+
|
|
101
|
+
def _get_fast_cudaMemcpy():
|
|
102
|
+
"""Return a cached ctypes function pointer for ``cudaMemcpy``."""
|
|
103
|
+
global _cudaMemcpy_fn
|
|
104
|
+
if _cudaMemcpy_fn is None:
|
|
105
|
+
lib = _load_cudart()
|
|
106
|
+
if lib is not None:
|
|
107
|
+
fn = lib.cudaMemcpy
|
|
108
|
+
fn.restype = ctypes.c_int
|
|
109
|
+
fn.argtypes = [
|
|
110
|
+
ctypes.c_void_p,
|
|
111
|
+
ctypes.c_void_p,
|
|
112
|
+
ctypes.c_size_t,
|
|
113
|
+
ctypes.c_int,
|
|
114
|
+
]
|
|
115
|
+
_cudaMemcpy_fn = fn
|
|
116
|
+
return _cudaMemcpy_fn
|
|
117
|
+
|
|
118
|
+
def _fast_cuda_memcpy(dst: int, src: int, nbytes: int, kind: int = 4) -> None:
|
|
119
|
+
"""Call ``cudaMemcpy`` using a cached function pointer.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
kind: ``cudaMemcpyKind``. Default ``4`` = ``cudaMemcpyDefault``.
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
OSError: If ``libcudart.so`` cannot be loaded.
|
|
126
|
+
RuntimeError: If ``cudaMemcpy`` returns a non-zero error code.
|
|
127
|
+
"""
|
|
128
|
+
fn = _get_fast_cudaMemcpy()
|
|
129
|
+
if fn is None:
|
|
130
|
+
raise OSError("libcudart.so not found; cannot perform cudaMemcpy")
|
|
131
|
+
err = fn(dst, src, nbytes, kind)
|
|
132
|
+
if err != 0:
|
|
133
|
+
raise RuntimeError(f"cudaMemcpy failed with CUDA error code {err}")
|
|
134
|
+
|
|
135
|
+
def _cuda_host_register(ptr: int, size: int) -> bool:
|
|
136
|
+
"""Register a host memory region as CUDA pinned memory.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
``True`` if registration succeeded, ``False`` otherwise.
|
|
140
|
+
"""
|
|
141
|
+
lib = _load_cudart()
|
|
142
|
+
if lib is None:
|
|
143
|
+
return False
|
|
144
|
+
try:
|
|
145
|
+
fn = lib.cudaHostRegister
|
|
146
|
+
fn.restype = ctypes.c_int
|
|
147
|
+
fn.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_uint]
|
|
148
|
+
err = fn(ctypes.c_void_p(ptr), ctypes.c_size_t(size), ctypes.c_uint(0))
|
|
149
|
+
if err != 0:
|
|
150
|
+
lib.cudaGetLastError()
|
|
151
|
+
return False
|
|
152
|
+
return True
|
|
153
|
+
except Exception:
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
def _cuda_host_unregister(ptr: int) -> None:
|
|
157
|
+
"""Unregister a previously pinned host memory region.
|
|
158
|
+
|
|
159
|
+
Failure is silently ignored: the CUDA context may already be destroyed
|
|
160
|
+
at process exit, in which case CUDA cleans up automatically.
|
|
161
|
+
"""
|
|
162
|
+
lib = _load_cudart()
|
|
163
|
+
if lib is None:
|
|
164
|
+
return
|
|
165
|
+
try:
|
|
166
|
+
fn = lib.cudaHostUnregister
|
|
167
|
+
fn.restype = ctypes.c_int
|
|
168
|
+
fn.argtypes = [ctypes.c_void_p]
|
|
169
|
+
fn(ctypes.c_void_p(ptr))
|
|
170
|
+
lib.cudaGetLastError()
|
|
171
|
+
except Exception:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
def _copy_host_to_target(
|
|
175
|
+
staging_buf: bytearray,
|
|
176
|
+
staging_ptr: int,
|
|
177
|
+
target_ptr: int,
|
|
178
|
+
nbytes: int,
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Copy *nbytes* from *staging_buf* to *target_ptr*.
|
|
181
|
+
|
|
182
|
+
* No-op if *target_ptr* is 0.
|
|
183
|
+
* Uses ``cudaMemcpyHostToDevice`` for confirmed GPU pointers.
|
|
184
|
+
* Uses ``cudaMemcpyDefault`` when pointer type is unknown.
|
|
185
|
+
* Falls back to ``ctypes.memmove`` if CUDA is unavailable.
|
|
186
|
+
"""
|
|
187
|
+
if target_ptr == 0 or nbytes == 0:
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
ptr_type = _get_cuda_ptr_type(target_ptr)
|
|
191
|
+
|
|
192
|
+
if ptr_type == "device":
|
|
193
|
+
_cuda_memcpy(target_ptr, staging_ptr, nbytes, kind=1)
|
|
194
|
+
else:
|
|
195
|
+
try:
|
|
196
|
+
_cuda_memcpy(target_ptr, staging_ptr, nbytes, kind=4)
|
|
197
|
+
return
|
|
198
|
+
except (RuntimeError, OSError):
|
|
199
|
+
pass
|
|
200
|
+
ctypes.memmove(target_ptr, staging_ptr, nbytes)
|
|
201
|
+
|
|
202
|
+
def _copy_target_to_host(
|
|
203
|
+
src_ptr: int,
|
|
204
|
+
dst_buf: bytearray,
|
|
205
|
+
nbytes: int,
|
|
206
|
+
) -> None:
|
|
207
|
+
"""Inverse of ``_copy_host_to_target``: copy from *src_ptr* to *dst_buf*."""
|
|
208
|
+
if src_ptr == 0 or nbytes == 0:
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
if len(dst_buf) < nbytes:
|
|
212
|
+
raise ValueError(f"dst_buf too small: {len(dst_buf)} < {nbytes}")
|
|
213
|
+
|
|
214
|
+
dst_ptr = ctypes.addressof((ctypes.c_char * len(dst_buf)).from_buffer(dst_buf))
|
|
215
|
+
|
|
216
|
+
ptr_type = _get_cuda_ptr_type(src_ptr)
|
|
217
|
+
|
|
218
|
+
if ptr_type == "device":
|
|
219
|
+
_cuda_memcpy(dst_ptr, src_ptr, nbytes, kind=2)
|
|
220
|
+
else:
|
|
221
|
+
try:
|
|
222
|
+
_cuda_memcpy(dst_ptr, src_ptr, nbytes, kind=4)
|
|
223
|
+
return
|
|
224
|
+
except (RuntimeError, OSError):
|
|
225
|
+
pass
|
|
226
|
+
ctypes.memmove(dst_buf, src_ptr, nbytes)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
"""Auto-discover and preload ``libhf3fs_api_shared.so`` with ``RTLD_GLOBAL``.
|
|
4
|
+
|
|
5
|
+
Pre-loads the library so that the C++ extension (``_core_v2``) and the
|
|
6
|
+
Python backend (``hf3fs_fuse.io``) can resolve symbols at import time
|
|
7
|
+
without requiring the user to set ``LD_LIBRARY_PATH``.
|
|
8
|
+
|
|
9
|
+
Discovery priority (first match wins):
|
|
10
|
+
1. ``HF3FS_LIB_DIR`` environment variable.
|
|
11
|
+
2. ``LD_LIBRARY_PATH`` directories.
|
|
12
|
+
3. ``hf3fs_py_usrbio`` pip install path.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import ctypes
|
|
18
|
+
import importlib.util
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_LIB_NAME = "libhf3fs_api_shared.so"
|
|
26
|
+
_preloaded_path: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _glob_lib(directory: Path) -> str | None:
|
|
30
|
+
"""Return the first matching ``libhf3fs_api_shared.so*`` in *directory*."""
|
|
31
|
+
if not directory.is_dir():
|
|
32
|
+
return None
|
|
33
|
+
for entry in directory.glob(f"{_LIB_NAME}*"):
|
|
34
|
+
if entry.is_file():
|
|
35
|
+
return str(entry)
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# Discovery helpers
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
def _find_in_env_var() -> str | None:
|
|
44
|
+
lib_dir = os.environ.get("HF3FS_LIB_DIR")
|
|
45
|
+
if not lib_dir:
|
|
46
|
+
return None
|
|
47
|
+
candidate = os.path.join(lib_dir, _LIB_NAME)
|
|
48
|
+
if os.path.isfile(candidate):
|
|
49
|
+
return candidate
|
|
50
|
+
return _glob_lib(Path(lib_dir))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _find_in_ld_library_path() -> str | None:
|
|
54
|
+
ld_path = os.environ.get("LD_LIBRARY_PATH", "")
|
|
55
|
+
if not ld_path:
|
|
56
|
+
return None
|
|
57
|
+
for directory in ld_path.split(os.pathsep):
|
|
58
|
+
directory = directory.strip()
|
|
59
|
+
if not directory:
|
|
60
|
+
continue
|
|
61
|
+
candidate = os.path.join(directory, _LIB_NAME)
|
|
62
|
+
if os.path.isfile(candidate):
|
|
63
|
+
return candidate
|
|
64
|
+
found = _glob_lib(Path(directory))
|
|
65
|
+
if found:
|
|
66
|
+
return found
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _find_in_pip_packages() -> str | None:
|
|
71
|
+
"""Locate the library relative to ``hf3fs_py_usrbio``'s install path.
|
|
72
|
+
|
|
73
|
+
Handles two install layouts:
|
|
74
|
+
* Package directory: ``site-packages/hf3fs_py_usrbio/__init__.py``
|
|
75
|
+
* Single-file C extension: ``site-packages/hf3fs_py_usrbio.cpython-*.so``
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
spec = importlib.util.find_spec("hf3fs_py_usrbio")
|
|
79
|
+
except (ModuleNotFoundError, ValueError):
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
if spec is None or spec.origin is None:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
origin = Path(spec.origin)
|
|
86
|
+
|
|
87
|
+
if origin.name == "__init__.py" or origin.suffix == ".py":
|
|
88
|
+
pkg_dir = origin.parent
|
|
89
|
+
site_packages = pkg_dir.parent
|
|
90
|
+
else:
|
|
91
|
+
# Single-file .so sitting directly in site-packages
|
|
92
|
+
pkg_dir = None
|
|
93
|
+
site_packages = origin.parent
|
|
94
|
+
|
|
95
|
+
candidates = [
|
|
96
|
+
site_packages / "hf3fs_py_usrbio.libs", # auditwheel convention
|
|
97
|
+
site_packages,
|
|
98
|
+
]
|
|
99
|
+
if pkg_dir is not None:
|
|
100
|
+
candidates.extend([pkg_dir / "lib", pkg_dir])
|
|
101
|
+
|
|
102
|
+
for directory in candidates:
|
|
103
|
+
found = _glob_lib(directory)
|
|
104
|
+
if found:
|
|
105
|
+
return found
|
|
106
|
+
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Public API
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
def preload_hf3fs_library() -> bool:
|
|
115
|
+
"""Discover and pre-load ``libhf3fs_api_shared.so`` with ``RTLD_GLOBAL``.
|
|
116
|
+
|
|
117
|
+
Returns ``True`` if the library was loaded (or already loaded), ``False``
|
|
118
|
+
otherwise.
|
|
119
|
+
"""
|
|
120
|
+
global _preloaded_path
|
|
121
|
+
|
|
122
|
+
if _preloaded_path is not None:
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
for finder, source in (
|
|
126
|
+
(_find_in_env_var, "HF3FS_LIB_DIR"),
|
|
127
|
+
(_find_in_ld_library_path, "LD_LIBRARY_PATH"),
|
|
128
|
+
(_find_in_pip_packages, "hf3fs_py_usrbio pip package"),
|
|
129
|
+
):
|
|
130
|
+
path = finder()
|
|
131
|
+
if path:
|
|
132
|
+
return _do_preload(path, source=source)
|
|
133
|
+
|
|
134
|
+
logger.debug(
|
|
135
|
+
"preload_hf3fs_library: %s not found in any search path", _LIB_NAME
|
|
136
|
+
)
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_hf3fs_lib_path() -> str | None:
|
|
141
|
+
return _preloaded_path
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# Internal helpers
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def _do_preload(path: str, *, source: str) -> bool:
|
|
149
|
+
global _preloaded_path
|
|
150
|
+
try:
|
|
151
|
+
ctypes.CDLL(path, mode=ctypes.RTLD_GLOBAL)
|
|
152
|
+
_preloaded_path = path
|
|
153
|
+
logger.info(
|
|
154
|
+
"preload_hf3fs_library: loaded %s from %s (source: %s)",
|
|
155
|
+
_LIB_NAME,
|
|
156
|
+
path,
|
|
157
|
+
source,
|
|
158
|
+
)
|
|
159
|
+
return True
|
|
160
|
+
except OSError as exc:
|
|
161
|
+
logger.warning(
|
|
162
|
+
"preload_hf3fs_library: failed to load %s from %s (source: %s): %s",
|
|
163
|
+
_LIB_NAME,
|
|
164
|
+
path,
|
|
165
|
+
source,
|
|
166
|
+
exc,
|
|
167
|
+
)
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
__all__ = ["preload_hf3fs_library", "get_hf3fs_lib_path"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Utilities for resolving the 3FS mount point from file paths."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from hf3fs_py_usrbio import extract_mount_point
|
|
8
|
+
|
|
9
|
+
except ImportError:
|
|
10
|
+
# No-op stub when hf3fs_py_usrbio is not installed.
|
|
11
|
+
def extract_mount_point(path: str) -> str: # type: ignore[misc]
|
|
12
|
+
return ""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
__all__ = ["extract_mount_point"]
|