fsspec 2025.2.0__tar.gz → 2025.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2025.2.0 → fsspec-2025.3.1}/PKG-INFO +1 -1
- fsspec-2025.3.1/Untitled.ipynb +6 -0
- fsspec-2025.3.1/correct_permissions.bin +1 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/changelog.rst +33 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/_version.py +9 -4
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/asyn.py +12 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/caching.py +40 -1
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/core.py +1 -1
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/asyn_wrapper.py +9 -5
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cached.py +13 -1
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dirfs.py +8 -4
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/github.py +46 -18
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/http.py +24 -0
- fsspec-2025.3.1/fsspec/implementations/http_sync.py +931 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/local.py +8 -7
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/reference.py +2 -3
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/registry.py +4 -1
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.codespellrc +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.coveragerc +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.gitattributes +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.github/workflows/main.yaml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.github/workflows/pypipublish.yaml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.gitignore +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/.pre-commit-config.yaml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/LICENSE +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/README.md +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-downstream.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-friends.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-py38.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-typecheck.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-win.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/Makefile +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/README.md +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/environment.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/make.bat +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/_static/custom.css +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/api.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/async.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/conf.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/copying.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/developer.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/features.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/img/gui.png +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/index.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/intro.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/usage.rst +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/__init__.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/archive.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/callbacks.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/compression.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/config.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/conftest.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/dircache.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/exceptions.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/fuse.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/generic.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/gui.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/arrow.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cache_metadata.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dask.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/data.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dbfs.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/ftp.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/git.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/memory.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/smb.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/tar.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/webhdfs.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/zip.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/json.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/mapping.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/parquet.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/spec.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/__init__.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/common.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/mv.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/open.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/pipe.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/transaction.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/utils.py +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/install_s3fs.sh +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/pyproject.toml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/readthedocs.yml +0 -0
- {fsspec-2025.2.0 → fsspec-2025.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.3.1
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hello
|
|
@@ -1,6 +1,39 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2025.3.1
|
|
5
|
+
--------
|
|
6
|
+
|
|
7
|
+
Enhancements
|
|
8
|
+
|
|
9
|
+
- LFS support in github: (#1810)
|
|
10
|
+
|
|
11
|
+
Fixes
|
|
12
|
+
|
|
13
|
+
- json should be a method fo requests shim (#1814)
|
|
14
|
+
- don't raise if known_implementation has no given error string (#1804)
|
|
15
|
+
|
|
16
|
+
Other
|
|
17
|
+
|
|
18
|
+
- rename protocols for sync-http (#1810)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
2025.3.0
|
|
22
|
+
--------
|
|
23
|
+
|
|
24
|
+
Enhancements
|
|
25
|
+
|
|
26
|
+
- add pipe_file to HTTP (#1799, 1801)
|
|
27
|
+
- add sync http for pyodide (#1177)
|
|
28
|
+
- ls performance for local and detail=False (#1789)
|
|
29
|
+
|
|
30
|
+
Fixes
|
|
31
|
+
|
|
32
|
+
- dir/info consistency in dirfs (#1798)
|
|
33
|
+
- referenceFS async consistency (#1794, 1795)
|
|
34
|
+
- CI (#1793)
|
|
35
|
+
|
|
36
|
+
|
|
4
37
|
2025.2.0
|
|
5
38
|
--------
|
|
6
39
|
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
# file generated by
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
3
6
|
TYPE_CHECKING = False
|
|
4
7
|
if TYPE_CHECKING:
|
|
5
|
-
from typing import Tuple
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
12
|
else:
|
|
8
13
|
VERSION_TUPLE = object
|
|
@@ -12,5 +17,5 @@ __version__: str
|
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
|
13
18
|
version_tuple: VERSION_TUPLE
|
|
14
19
|
|
|
15
|
-
__version__ = version = '2025.
|
|
16
|
-
__version_tuple__ = version_tuple = (2025,
|
|
20
|
+
__version__ = version = '2025.3.1'
|
|
21
|
+
__version_tuple__ = version_tuple = (2025, 3, 1)
|
|
@@ -151,6 +151,18 @@ def get_loop():
|
|
|
151
151
|
return loop[0]
|
|
152
152
|
|
|
153
153
|
|
|
154
|
+
def reset_after_fork():
|
|
155
|
+
global lock
|
|
156
|
+
loop[0] = None
|
|
157
|
+
iothread[0] = None
|
|
158
|
+
lock = None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if hasattr(os, "register_at_fork"):
|
|
162
|
+
# should be posix; this will do nothing for spawn or forkserver subprocesses
|
|
163
|
+
os.register_at_fork(after_in_child=reset_after_fork)
|
|
164
|
+
|
|
165
|
+
|
|
154
166
|
if TYPE_CHECKING:
|
|
155
167
|
import resource
|
|
156
168
|
|
|
@@ -37,6 +37,7 @@ T = TypeVar("T")
|
|
|
37
37
|
logger = logging.getLogger("fsspec")
|
|
38
38
|
|
|
39
39
|
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
|
40
|
+
MultiFetcher = Callable[list[[int, int]], bytes] # Maps [(start, end)] to bytes
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
class BaseCache:
|
|
@@ -109,6 +110,26 @@ class MMapCache(BaseCache):
|
|
|
109
110
|
Ensure there is enough disc space in the temporary location.
|
|
110
111
|
|
|
111
112
|
This cache method might only work on posix
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
blocksize: int
|
|
117
|
+
How far to read ahead in numbers of bytes
|
|
118
|
+
fetcher: Fetcher
|
|
119
|
+
Function of the form f(start, end) which gets bytes from remote as
|
|
120
|
+
specified
|
|
121
|
+
size: int
|
|
122
|
+
How big this file is
|
|
123
|
+
location: str
|
|
124
|
+
Where to create the temporary file. If None, a temporary file is
|
|
125
|
+
created using tempfile.TemporaryFile().
|
|
126
|
+
blocks: set[int]
|
|
127
|
+
Set of block numbers that have already been fetched. If None, an empty
|
|
128
|
+
set is created.
|
|
129
|
+
multi_fetcher: MultiFetcher
|
|
130
|
+
Function of the form f([(start, end)]) which gets bytes from remote
|
|
131
|
+
as specified. This function is used to fetch multiple blocks at once.
|
|
132
|
+
If not specified, the fetcher function is used instead.
|
|
112
133
|
"""
|
|
113
134
|
|
|
114
135
|
name = "mmap"
|
|
@@ -120,10 +141,12 @@ class MMapCache(BaseCache):
|
|
|
120
141
|
size: int,
|
|
121
142
|
location: str | None = None,
|
|
122
143
|
blocks: set[int] | None = None,
|
|
144
|
+
multi_fetcher: MultiFetcher | None = None,
|
|
123
145
|
) -> None:
|
|
124
146
|
super().__init__(blocksize, fetcher, size)
|
|
125
147
|
self.blocks = set() if blocks is None else blocks
|
|
126
148
|
self.location = location
|
|
149
|
+
self.multi_fetcher = multi_fetcher
|
|
127
150
|
self.cache = self._makefile()
|
|
128
151
|
|
|
129
152
|
def _makefile(self) -> mmap.mmap | bytearray:
|
|
@@ -164,6 +187,8 @@ class MMapCache(BaseCache):
|
|
|
164
187
|
# Count the number of blocks already cached
|
|
165
188
|
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
|
166
189
|
|
|
190
|
+
ranges = []
|
|
191
|
+
|
|
167
192
|
# Consolidate needed blocks.
|
|
168
193
|
# Algorithm adapted from Python 2.x itertools documentation.
|
|
169
194
|
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
|
@@ -185,13 +210,27 @@ class MMapCache(BaseCache):
|
|
|
185
210
|
logger.debug(
|
|
186
211
|
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
|
187
212
|
)
|
|
188
|
-
|
|
213
|
+
ranges.append((sstart, send))
|
|
189
214
|
|
|
190
215
|
# Update set of cached blocks
|
|
191
216
|
self.blocks.update(_blocks)
|
|
192
217
|
# Update cache statistics with number of blocks we had to cache
|
|
193
218
|
self.miss_count += len(_blocks)
|
|
194
219
|
|
|
220
|
+
if not ranges:
|
|
221
|
+
return self.cache[start:end]
|
|
222
|
+
|
|
223
|
+
if self.multi_fetcher:
|
|
224
|
+
logger.debug(f"MMap get blocks {ranges}")
|
|
225
|
+
for idx, r in enumerate(self.multi_fetcher(ranges)):
|
|
226
|
+
(sstart, send) = ranges[idx]
|
|
227
|
+
logger.debug(f"MMap copy block ({sstart}-{send}")
|
|
228
|
+
self.cache[sstart:send] = r
|
|
229
|
+
else:
|
|
230
|
+
for sstart, send in ranges:
|
|
231
|
+
logger.debug(f"MMap get block ({sstart}-{send}")
|
|
232
|
+
self.cache[sstart:send] = self.fetcher(sstart, send)
|
|
233
|
+
|
|
195
234
|
return self.cache[start:end]
|
|
196
235
|
|
|
197
236
|
def __getstate__(self) -> dict[str, Any]:
|
|
@@ -452,7 +452,7 @@ def open(
|
|
|
452
452
|
newline: bytes or None
|
|
453
453
|
Used for line terminator in text mode. If None, uses system default;
|
|
454
454
|
if blank, uses no translation.
|
|
455
|
-
expand: bool or
|
|
455
|
+
expand: bool or None
|
|
456
456
|
Whether to regard file paths containing special glob characters as needing
|
|
457
457
|
expansion (finding the first match) or absolute. Setting False allows using
|
|
458
458
|
paths which do embed such characters. If None (default), this argument
|
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
4
|
|
|
5
|
-
from fsspec.asyn import AsyncFileSystem
|
|
5
|
+
from fsspec.asyn import AsyncFileSystem, running_async
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def async_wrapper(func, obj=None):
|
|
@@ -42,10 +42,14 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
42
42
|
The synchronous filesystem instance to wrap.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
protocol = "async_wrapper"
|
|
46
|
+
cachable = False
|
|
47
|
+
|
|
48
|
+
def __init__(self, fs, *args, asynchronous=None, **kwargs):
|
|
49
|
+
if asynchronous is None:
|
|
50
|
+
asynchronous = running_async()
|
|
51
|
+
super().__init__(*args, asynchronous=asynchronous, **kwargs)
|
|
52
|
+
self.sync_fs = fs
|
|
49
53
|
self.protocol = self.sync_fs.protocol
|
|
50
54
|
self._wrap_all_sync_methods()
|
|
51
55
|
|
|
@@ -362,7 +362,19 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
362
362
|
)
|
|
363
363
|
else:
|
|
364
364
|
detail["blocksize"] = f.blocksize
|
|
365
|
-
|
|
365
|
+
|
|
366
|
+
def _fetch_ranges(ranges):
|
|
367
|
+
return self.fs.cat_ranges(
|
|
368
|
+
[path] * len(ranges),
|
|
369
|
+
[r[0] for r in ranges],
|
|
370
|
+
[r[1] for r in ranges],
|
|
371
|
+
**kwargs,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
multi_fetcher = None if self.compression else _fetch_ranges
|
|
375
|
+
f.cache = MMapCache(
|
|
376
|
+
f.blocksize, f._fetch_range, f.size, fn, blocks, multi_fetcher=multi_fetcher
|
|
377
|
+
)
|
|
366
378
|
close = f.close
|
|
367
379
|
f.close = lambda: self.close_and_update(f, close)
|
|
368
380
|
self.save_cache()
|
|
@@ -36,8 +36,6 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
36
36
|
super().__init__(**storage_options)
|
|
37
37
|
if fs is None:
|
|
38
38
|
fs = filesystem(protocol=target_protocol, **(target_options or {}))
|
|
39
|
-
if (path is not None) ^ (fo is not None) is False:
|
|
40
|
-
raise ValueError("Provide path or fo, not both")
|
|
41
39
|
path = path or fo
|
|
42
40
|
|
|
43
41
|
if self.asynchronous and not fs.async_impl:
|
|
@@ -233,10 +231,16 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
233
231
|
return self.fs.exists(self._join(path))
|
|
234
232
|
|
|
235
233
|
async def _info(self, path, **kwargs):
|
|
236
|
-
|
|
234
|
+
info = await self.fs._info(self._join(path), **kwargs)
|
|
235
|
+
info = info.copy()
|
|
236
|
+
info["name"] = self._relpath(info["name"])
|
|
237
|
+
return info
|
|
237
238
|
|
|
238
239
|
def info(self, path, **kwargs):
|
|
239
|
-
|
|
240
|
+
info = self.fs.info(self._join(path), **kwargs)
|
|
241
|
+
info = info.copy()
|
|
242
|
+
info["name"] = self._relpath(info["name"])
|
|
243
|
+
return info
|
|
240
244
|
|
|
241
245
|
async def _ls(self, path, detail=True, **kwargs):
|
|
242
246
|
ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import
|
|
1
|
+
import base64
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import requests
|
|
4
4
|
|
|
5
5
|
from ..spec import AbstractFileSystem
|
|
6
6
|
from ..utils import infer_storage_options
|
|
@@ -16,8 +16,10 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
16
16
|
repository. You may specify a point in the repos history, by SHA, branch
|
|
17
17
|
or tag (default is current master).
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
For files less than 1 MB in size, file content is returned directly in a
|
|
20
|
+
MemoryFile. For larger files, or for files tracked by git-lfs, file content
|
|
21
|
+
is returned as an HTTPFile wrapping the ``download_url`` provided by the
|
|
22
|
+
GitHub API.
|
|
21
23
|
|
|
22
24
|
When using fsspec.open, allows URIs of the form:
|
|
23
25
|
|
|
@@ -36,7 +38,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
36
38
|
"""
|
|
37
39
|
|
|
38
40
|
url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
|
|
39
|
-
|
|
41
|
+
content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}"
|
|
40
42
|
protocol = "github"
|
|
41
43
|
timeout = (60, 60) # connect, read timeouts
|
|
42
44
|
|
|
@@ -63,6 +65,12 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
63
65
|
|
|
64
66
|
self.root = sha
|
|
65
67
|
self.ls("")
|
|
68
|
+
try:
|
|
69
|
+
from .http import HTTPFileSystem
|
|
70
|
+
|
|
71
|
+
self.http_fs = HTTPFileSystem(**kwargs)
|
|
72
|
+
except ImportError:
|
|
73
|
+
self.http_fs = None
|
|
66
74
|
|
|
67
75
|
@property
|
|
68
76
|
def kw(self):
|
|
@@ -212,28 +220,48 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
212
220
|
path,
|
|
213
221
|
mode="rb",
|
|
214
222
|
block_size=None,
|
|
215
|
-
autocommit=True,
|
|
216
223
|
cache_options=None,
|
|
217
224
|
sha=None,
|
|
218
225
|
**kwargs,
|
|
219
226
|
):
|
|
220
227
|
if mode != "rb":
|
|
221
228
|
raise NotImplementedError
|
|
222
|
-
|
|
229
|
+
|
|
230
|
+
# construct a url to hit the GitHub API's repo contents API
|
|
231
|
+
url = self.content_url.format(
|
|
223
232
|
org=self.org, repo=self.repo, path=path, sha=sha or self.root
|
|
224
233
|
)
|
|
234
|
+
|
|
235
|
+
# make a request to this API, and parse the response as JSON
|
|
225
236
|
r = requests.get(url, timeout=self.timeout, **self.kw)
|
|
226
237
|
if r.status_code == 404:
|
|
227
238
|
raise FileNotFoundError(path)
|
|
228
239
|
r.raise_for_status()
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
+
content_json = r.json()
|
|
241
|
+
|
|
242
|
+
# if the response's content key is not empty, try to parse it as base64
|
|
243
|
+
if content_json["content"]:
|
|
244
|
+
content = base64.b64decode(content_json["content"])
|
|
245
|
+
|
|
246
|
+
# as long as the content does not start with the string
|
|
247
|
+
# "version https://git-lfs.github.com/"
|
|
248
|
+
# then it is probably not a git-lfs pointer and we can just return
|
|
249
|
+
# the content directly
|
|
250
|
+
if not content.startswith(b"version https://git-lfs.github.com/"):
|
|
251
|
+
return MemoryFile(None, None, content)
|
|
252
|
+
|
|
253
|
+
# we land here if the content was not present in the first response
|
|
254
|
+
# (regular file over 1MB or git-lfs tracked file)
|
|
255
|
+
# in this case, we get let the HTTPFileSystem handle the download
|
|
256
|
+
if self.http_fs is None:
|
|
257
|
+
raise ImportError(
|
|
258
|
+
"Please install fsspec[http] to access github files >1 MB "
|
|
259
|
+
"or git-lfs tracked files."
|
|
260
|
+
)
|
|
261
|
+
return self.http_fs.open(
|
|
262
|
+
content_json["download_url"],
|
|
263
|
+
mode=mode,
|
|
264
|
+
block_size=block_size,
|
|
265
|
+
cache_options=cache_options,
|
|
266
|
+
**kwargs,
|
|
267
|
+
)
|
|
@@ -522,6 +522,30 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
522
522
|
except (FileNotFoundError, ValueError):
|
|
523
523
|
return False
|
|
524
524
|
|
|
525
|
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
526
|
+
"""
|
|
527
|
+
Write bytes to a remote file over HTTP.
|
|
528
|
+
|
|
529
|
+
Parameters
|
|
530
|
+
----------
|
|
531
|
+
path : str
|
|
532
|
+
Target URL where the data should be written
|
|
533
|
+
value : bytes
|
|
534
|
+
Data to be written
|
|
535
|
+
mode : str
|
|
536
|
+
How to write to the file - 'overwrite' or 'append'
|
|
537
|
+
**kwargs : dict
|
|
538
|
+
Additional parameters to pass to the HTTP request
|
|
539
|
+
"""
|
|
540
|
+
url = self._strip_protocol(path)
|
|
541
|
+
headers = kwargs.pop("headers", {})
|
|
542
|
+
headers["Content-Length"] = str(len(value))
|
|
543
|
+
|
|
544
|
+
session = await self.set_session()
|
|
545
|
+
|
|
546
|
+
async with session.put(url, data=value, headers=headers, **kwargs) as r:
|
|
547
|
+
r.raise_for_status()
|
|
548
|
+
|
|
525
549
|
|
|
526
550
|
class HTTPFile(AbstractBufferedFile):
|
|
527
551
|
"""
|