fsspec 2025.2.0__tar.gz → 2025.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {fsspec-2025.2.0 → fsspec-2025.3.1}/PKG-INFO +1 -1
  2. fsspec-2025.3.1/Untitled.ipynb +6 -0
  3. fsspec-2025.3.1/correct_permissions.bin +1 -0
  4. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/changelog.rst +33 -0
  5. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/_version.py +9 -4
  6. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/asyn.py +12 -0
  7. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/caching.py +40 -1
  8. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/core.py +1 -1
  9. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/asyn_wrapper.py +9 -5
  10. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cached.py +13 -1
  11. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dirfs.py +8 -4
  12. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/github.py +46 -18
  13. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/http.py +24 -0
  14. fsspec-2025.3.1/fsspec/implementations/http_sync.py +931 -0
  15. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/local.py +8 -7
  16. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/reference.py +2 -3
  17. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/registry.py +4 -1
  18. {fsspec-2025.2.0 → fsspec-2025.3.1}/.codespellrc +0 -0
  19. {fsspec-2025.2.0 → fsspec-2025.3.1}/.coveragerc +0 -0
  20. {fsspec-2025.2.0 → fsspec-2025.3.1}/.gitattributes +0 -0
  21. {fsspec-2025.2.0 → fsspec-2025.3.1}/.github/workflows/main.yaml +0 -0
  22. {fsspec-2025.2.0 → fsspec-2025.3.1}/.github/workflows/pypipublish.yaml +0 -0
  23. {fsspec-2025.2.0 → fsspec-2025.3.1}/.gitignore +0 -0
  24. {fsspec-2025.2.0 → fsspec-2025.3.1}/.pre-commit-config.yaml +0 -0
  25. {fsspec-2025.2.0 → fsspec-2025.3.1}/LICENSE +0 -0
  26. {fsspec-2025.2.0 → fsspec-2025.3.1}/README.md +0 -0
  27. {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-downstream.yml +0 -0
  28. {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-friends.yml +0 -0
  29. {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-py38.yml +0 -0
  30. {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-typecheck.yml +0 -0
  31. {fsspec-2025.2.0 → fsspec-2025.3.1}/ci/environment-win.yml +0 -0
  32. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/Makefile +0 -0
  33. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/README.md +0 -0
  34. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/environment.yml +0 -0
  35. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/make.bat +0 -0
  36. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/_static/custom.css +0 -0
  37. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/api.rst +0 -0
  38. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/async.rst +0 -0
  39. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/conf.py +0 -0
  40. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/copying.rst +0 -0
  41. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/developer.rst +0 -0
  42. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/features.rst +0 -0
  43. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/img/gui.png +0 -0
  44. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/index.rst +0 -0
  45. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/intro.rst +0 -0
  46. {fsspec-2025.2.0 → fsspec-2025.3.1}/docs/source/usage.rst +0 -0
  47. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/__init__.py +0 -0
  48. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/archive.py +0 -0
  49. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/callbacks.py +0 -0
  50. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/compression.py +0 -0
  51. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/config.py +0 -0
  52. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/conftest.py +0 -0
  53. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/dircache.py +0 -0
  54. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/exceptions.py +0 -0
  55. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/fuse.py +0 -0
  56. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/generic.py +0 -0
  57. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/gui.py +0 -0
  58. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/__init__.py +0 -0
  59. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/arrow.py +0 -0
  60. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cache_mapper.py +0 -0
  61. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/cache_metadata.py +0 -0
  62. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dask.py +0 -0
  63. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/data.py +0 -0
  64. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/dbfs.py +0 -0
  65. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/ftp.py +0 -0
  66. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/git.py +0 -0
  67. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/jupyter.py +0 -0
  68. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/libarchive.py +0 -0
  69. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/memory.py +0 -0
  70. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/sftp.py +0 -0
  71. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/smb.py +0 -0
  72. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/tar.py +0 -0
  73. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/webhdfs.py +0 -0
  74. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/implementations/zip.py +0 -0
  75. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/json.py +0 -0
  76. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/mapping.py +0 -0
  77. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/parquet.py +0 -0
  78. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/spec.py +0 -0
  79. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/__init__.py +0 -0
  80. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/common.py +0 -0
  81. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/copy.py +0 -0
  82. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/get.py +0 -0
  83. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/mv.py +0 -0
  84. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/open.py +0 -0
  85. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/pipe.py +0 -0
  86. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/tests/abstract/put.py +0 -0
  87. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/transaction.py +0 -0
  88. {fsspec-2025.2.0 → fsspec-2025.3.1}/fsspec/utils.py +0 -0
  89. {fsspec-2025.2.0 → fsspec-2025.3.1}/install_s3fs.sh +0 -0
  90. {fsspec-2025.2.0 → fsspec-2025.3.1}/pyproject.toml +0 -0
  91. {fsspec-2025.2.0 → fsspec-2025.3.1}/readthedocs.yml +0 -0
  92. {fsspec-2025.2.0 → fsspec-2025.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2025.2.0
3
+ Version: 2025.3.1
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -0,0 +1,6 @@
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
@@ -0,0 +1 @@
1
+ hello
@@ -1,6 +1,39 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 2025.3.1
5
+ --------
6
+
7
+ Enhancements
8
+
9
+ - LFS support in github: (#1810)
10
+
11
+ Fixes
12
+
13
+ - json should be a method fo requests shim (#1814)
14
+ - don't raise if known_implementation has no given error string (#1804)
15
+
16
+ Other
17
+
18
+ - rename protocols for sync-http (#1810)
19
+
20
+
21
+ 2025.3.0
22
+ --------
23
+
24
+ Enhancements
25
+
26
+ - add pipe_file to HTTP (#1799, 1801)
27
+ - add sync http for pyodide (#1177)
28
+ - ls performance for local and detail=False (#1789)
29
+
30
+ Fixes
31
+
32
+ - dir/info consistency in dirfs (#1798)
33
+ - referenceFS async consistency (#1794, 1795)
34
+ - CI (#1793)
35
+
36
+
4
37
  2025.2.0
5
38
  --------
6
39
 
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '2025.2.0'
16
- __version_tuple__ = version_tuple = (2025, 2, 0)
20
+ __version__ = version = '2025.3.1'
21
+ __version_tuple__ = version_tuple = (2025, 3, 1)
@@ -151,6 +151,18 @@ def get_loop():
151
151
  return loop[0]
152
152
 
153
153
 
154
+ def reset_after_fork():
155
+ global lock
156
+ loop[0] = None
157
+ iothread[0] = None
158
+ lock = None
159
+
160
+
161
+ if hasattr(os, "register_at_fork"):
162
+ # should be posix; this will do nothing for spawn or forkserver subprocesses
163
+ os.register_at_fork(after_in_child=reset_after_fork)
164
+
165
+
154
166
  if TYPE_CHECKING:
155
167
  import resource
156
168
 
@@ -37,6 +37,7 @@ T = TypeVar("T")
37
37
  logger = logging.getLogger("fsspec")
38
38
 
39
39
  Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
40
+ MultiFetcher = Callable[list[[int, int]], bytes] # Maps [(start, end)] to bytes
40
41
 
41
42
 
42
43
  class BaseCache:
@@ -109,6 +110,26 @@ class MMapCache(BaseCache):
109
110
  Ensure there is enough disc space in the temporary location.
110
111
 
111
112
  This cache method might only work on posix
113
+
114
+ Parameters
115
+ ----------
116
+ blocksize: int
117
+ How far to read ahead in numbers of bytes
118
+ fetcher: Fetcher
119
+ Function of the form f(start, end) which gets bytes from remote as
120
+ specified
121
+ size: int
122
+ How big this file is
123
+ location: str
124
+ Where to create the temporary file. If None, a temporary file is
125
+ created using tempfile.TemporaryFile().
126
+ blocks: set[int]
127
+ Set of block numbers that have already been fetched. If None, an empty
128
+ set is created.
129
+ multi_fetcher: MultiFetcher
130
+ Function of the form f([(start, end)]) which gets bytes from remote
131
+ as specified. This function is used to fetch multiple blocks at once.
132
+ If not specified, the fetcher function is used instead.
112
133
  """
113
134
 
114
135
  name = "mmap"
@@ -120,10 +141,12 @@ class MMapCache(BaseCache):
120
141
  size: int,
121
142
  location: str | None = None,
122
143
  blocks: set[int] | None = None,
144
+ multi_fetcher: MultiFetcher | None = None,
123
145
  ) -> None:
124
146
  super().__init__(blocksize, fetcher, size)
125
147
  self.blocks = set() if blocks is None else blocks
126
148
  self.location = location
149
+ self.multi_fetcher = multi_fetcher
127
150
  self.cache = self._makefile()
128
151
 
129
152
  def _makefile(self) -> mmap.mmap | bytearray:
@@ -164,6 +187,8 @@ class MMapCache(BaseCache):
164
187
  # Count the number of blocks already cached
165
188
  self.hit_count += sum(1 for i in block_range if i in self.blocks)
166
189
 
190
+ ranges = []
191
+
167
192
  # Consolidate needed blocks.
168
193
  # Algorithm adapted from Python 2.x itertools documentation.
169
194
  # We are grouping an enumerated sequence of blocks. By comparing when the difference
@@ -185,13 +210,27 @@ class MMapCache(BaseCache):
185
210
  logger.debug(
186
211
  f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
187
212
  )
188
- self.cache[sstart:send] = self.fetcher(sstart, send)
213
+ ranges.append((sstart, send))
189
214
 
190
215
  # Update set of cached blocks
191
216
  self.blocks.update(_blocks)
192
217
  # Update cache statistics with number of blocks we had to cache
193
218
  self.miss_count += len(_blocks)
194
219
 
220
+ if not ranges:
221
+ return self.cache[start:end]
222
+
223
+ if self.multi_fetcher:
224
+ logger.debug(f"MMap get blocks {ranges}")
225
+ for idx, r in enumerate(self.multi_fetcher(ranges)):
226
+ (sstart, send) = ranges[idx]
227
+ logger.debug(f"MMap copy block ({sstart}-{send}")
228
+ self.cache[sstart:send] = r
229
+ else:
230
+ for sstart, send in ranges:
231
+ logger.debug(f"MMap get block ({sstart}-{send}")
232
+ self.cache[sstart:send] = self.fetcher(sstart, send)
233
+
195
234
  return self.cache[start:end]
196
235
 
197
236
  def __getstate__(self) -> dict[str, Any]:
@@ -452,7 +452,7 @@ def open(
452
452
  newline: bytes or None
453
453
  Used for line terminator in text mode. If None, uses system default;
454
454
  if blank, uses no translation.
455
- expand: bool or Nonw
455
+ expand: bool or None
456
456
  Whether to regard file paths containing special glob characters as needing
457
457
  expansion (finding the first match) or absolute. Setting False allows using
458
458
  paths which do embed such characters. If None (default), this argument
@@ -2,7 +2,7 @@ import asyncio
2
2
  import functools
3
3
  import inspect
4
4
 
5
- from fsspec.asyn import AsyncFileSystem
5
+ from fsspec.asyn import AsyncFileSystem, running_async
6
6
 
7
7
 
8
8
  def async_wrapper(func, obj=None):
@@ -42,10 +42,14 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
42
42
  The synchronous filesystem instance to wrap.
43
43
  """
44
44
 
45
- def __init__(self, sync_fs, *args, **kwargs):
46
- super().__init__(*args, **kwargs)
47
- self.asynchronous = True
48
- self.sync_fs = sync_fs
45
+ protocol = "async_wrapper"
46
+ cachable = False
47
+
48
+ def __init__(self, fs, *args, asynchronous=None, **kwargs):
49
+ if asynchronous is None:
50
+ asynchronous = running_async()
51
+ super().__init__(*args, asynchronous=asynchronous, **kwargs)
52
+ self.sync_fs = fs
49
53
  self.protocol = self.sync_fs.protocol
50
54
  self._wrap_all_sync_methods()
51
55
 
@@ -362,7 +362,19 @@ class CachingFileSystem(AbstractFileSystem):
362
362
  )
363
363
  else:
364
364
  detail["blocksize"] = f.blocksize
365
- f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
365
+
366
+ def _fetch_ranges(ranges):
367
+ return self.fs.cat_ranges(
368
+ [path] * len(ranges),
369
+ [r[0] for r in ranges],
370
+ [r[1] for r in ranges],
371
+ **kwargs,
372
+ )
373
+
374
+ multi_fetcher = None if self.compression else _fetch_ranges
375
+ f.cache = MMapCache(
376
+ f.blocksize, f._fetch_range, f.size, fn, blocks, multi_fetcher=multi_fetcher
377
+ )
366
378
  close = f.close
367
379
  f.close = lambda: self.close_and_update(f, close)
368
380
  self.save_cache()
@@ -36,8 +36,6 @@ class DirFileSystem(AsyncFileSystem):
36
36
  super().__init__(**storage_options)
37
37
  if fs is None:
38
38
  fs = filesystem(protocol=target_protocol, **(target_options or {}))
39
- if (path is not None) ^ (fo is not None) is False:
40
- raise ValueError("Provide path or fo, not both")
41
39
  path = path or fo
42
40
 
43
41
  if self.asynchronous and not fs.async_impl:
@@ -233,10 +231,16 @@ class DirFileSystem(AsyncFileSystem):
233
231
  return self.fs.exists(self._join(path))
234
232
 
235
233
  async def _info(self, path, **kwargs):
236
- return await self.fs._info(self._join(path), **kwargs)
234
+ info = await self.fs._info(self._join(path), **kwargs)
235
+ info = info.copy()
236
+ info["name"] = self._relpath(info["name"])
237
+ return info
237
238
 
238
239
  def info(self, path, **kwargs):
239
- return self.fs.info(self._join(path), **kwargs)
240
+ info = self.fs.info(self._join(path), **kwargs)
241
+ info = info.copy()
242
+ info["name"] = self._relpath(info["name"])
243
+ return info
240
244
 
241
245
  async def _ls(self, path, detail=True, **kwargs):
242
246
  ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
@@ -1,6 +1,6 @@
1
- import requests
1
+ import base64
2
2
 
3
- import fsspec
3
+ import requests
4
4
 
5
5
  from ..spec import AbstractFileSystem
6
6
  from ..utils import infer_storage_options
@@ -16,8 +16,10 @@ class GithubFileSystem(AbstractFileSystem):
16
16
  repository. You may specify a point in the repos history, by SHA, branch
17
17
  or tag (default is current master).
18
18
 
19
- Given that code files tend to be small, and that github does not support
20
- retrieving partial content, we always fetch whole files.
19
+ For files less than 1 MB in size, file content is returned directly in a
20
+ MemoryFile. For larger files, or for files tracked by git-lfs, file content
21
+ is returned as an HTTPFile wrapping the ``download_url`` provided by the
22
+ GitHub API.
21
23
 
22
24
  When using fsspec.open, allows URIs of the form:
23
25
 
@@ -36,7 +38,7 @@ class GithubFileSystem(AbstractFileSystem):
36
38
  """
37
39
 
38
40
  url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
39
- rurl = "https://raw.githubusercontent.com/{org}/{repo}/{sha}/{path}"
41
+ content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}"
40
42
  protocol = "github"
41
43
  timeout = (60, 60) # connect, read timeouts
42
44
 
@@ -63,6 +65,12 @@ class GithubFileSystem(AbstractFileSystem):
63
65
 
64
66
  self.root = sha
65
67
  self.ls("")
68
+ try:
69
+ from .http import HTTPFileSystem
70
+
71
+ self.http_fs = HTTPFileSystem(**kwargs)
72
+ except ImportError:
73
+ self.http_fs = None
66
74
 
67
75
  @property
68
76
  def kw(self):
@@ -212,28 +220,48 @@ class GithubFileSystem(AbstractFileSystem):
212
220
  path,
213
221
  mode="rb",
214
222
  block_size=None,
215
- autocommit=True,
216
223
  cache_options=None,
217
224
  sha=None,
218
225
  **kwargs,
219
226
  ):
220
227
  if mode != "rb":
221
228
  raise NotImplementedError
222
- url = self.rurl.format(
229
+
230
+ # construct a url to hit the GitHub API's repo contents API
231
+ url = self.content_url.format(
223
232
  org=self.org, repo=self.repo, path=path, sha=sha or self.root
224
233
  )
234
+
235
+ # make a request to this API, and parse the response as JSON
225
236
  r = requests.get(url, timeout=self.timeout, **self.kw)
226
237
  if r.status_code == 404:
227
238
  raise FileNotFoundError(path)
228
239
  r.raise_for_status()
229
- return MemoryFile(None, None, r.content)
230
-
231
- def cat(self, path, recursive=False, on_error="raise", **kwargs):
232
- paths = self.expand_path(path, recursive=recursive)
233
- urls = [
234
- self.rurl.format(org=self.org, repo=self.repo, path=u, sha=self.root)
235
- for u, sh in paths
236
- ]
237
- fs = fsspec.filesystem("http")
238
- data = fs.cat(urls, on_error="return")
239
- return {u: v for ((k, v), u) in zip(data.items(), urls)}
240
+ content_json = r.json()
241
+
242
+ # if the response's content key is not empty, try to parse it as base64
243
+ if content_json["content"]:
244
+ content = base64.b64decode(content_json["content"])
245
+
246
+ # as long as the content does not start with the string
247
+ # "version https://git-lfs.github.com/"
248
+ # then it is probably not a git-lfs pointer and we can just return
249
+ # the content directly
250
+ if not content.startswith(b"version https://git-lfs.github.com/"):
251
+ return MemoryFile(None, None, content)
252
+
253
+ # we land here if the content was not present in the first response
254
+ # (regular file over 1MB or git-lfs tracked file)
255
+ # in this case, we get let the HTTPFileSystem handle the download
256
+ if self.http_fs is None:
257
+ raise ImportError(
258
+ "Please install fsspec[http] to access github files >1 MB "
259
+ "or git-lfs tracked files."
260
+ )
261
+ return self.http_fs.open(
262
+ content_json["download_url"],
263
+ mode=mode,
264
+ block_size=block_size,
265
+ cache_options=cache_options,
266
+ **kwargs,
267
+ )
@@ -522,6 +522,30 @@ class HTTPFileSystem(AsyncFileSystem):
522
522
  except (FileNotFoundError, ValueError):
523
523
  return False
524
524
 
525
+ async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
526
+ """
527
+ Write bytes to a remote file over HTTP.
528
+
529
+ Parameters
530
+ ----------
531
+ path : str
532
+ Target URL where the data should be written
533
+ value : bytes
534
+ Data to be written
535
+ mode : str
536
+ How to write to the file - 'overwrite' or 'append'
537
+ **kwargs : dict
538
+ Additional parameters to pass to the HTTP request
539
+ """
540
+ url = self._strip_protocol(path)
541
+ headers = kwargs.pop("headers", {})
542
+ headers["Content-Length"] = str(len(value))
543
+
544
+ session = await self.set_session()
545
+
546
+ async with session.put(url, data=value, headers=headers, **kwargs) as r:
547
+ r.raise_for_status()
548
+
525
549
 
526
550
  class HTTPFile(AbstractBufferedFile):
527
551
  """