fsspec 2023.12.2__py3-none-any.whl → 2024.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-12-11T16:18:48-0500",
11
+ "date": "2024-02-04T20:21:42-0500",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "dd8cb9bf620be4d9153e854dd1431c23a2be6db0",
15
- "version": "2023.12.2"
14
+ "full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
15
+ "version": "2024.2.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
fsspec/asyn.py CHANGED
@@ -11,7 +11,7 @@ from contextlib import contextmanager
11
11
  from glob import has_magic
12
12
  from typing import TYPE_CHECKING, Iterable
13
13
 
14
- from .callbacks import _DEFAULT_CALLBACK
14
+ from .callbacks import DEFAULT_CALLBACK
15
15
  from .exceptions import FSTimeoutError
16
16
  from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
17
  from .spec import AbstractBufferedFile, AbstractFileSystem
@@ -205,7 +205,7 @@ def running_async() -> bool:
205
205
  async def _run_coros_in_chunks(
206
206
  coros,
207
207
  batch_size=None,
208
- callback=_DEFAULT_CALLBACK,
208
+ callback=DEFAULT_CALLBACK,
209
209
  timeout=None,
210
210
  return_exceptions=False,
211
211
  nofiles=False,
@@ -245,7 +245,7 @@ async def _run_coros_in_chunks(
245
245
  asyncio.Task(asyncio.wait_for(c, timeout=timeout))
246
246
  for c in coros[start : start + batch_size]
247
247
  ]
248
- if callback is not _DEFAULT_CALLBACK:
248
+ if callback is not DEFAULT_CALLBACK:
249
249
  [
250
250
  t.add_done_callback(lambda *_, **__: callback.relative_update(1))
251
251
  for t in chunk
@@ -506,7 +506,7 @@ class AsyncFileSystem(AbstractFileSystem):
506
506
  lpath,
507
507
  rpath,
508
508
  recursive=False,
509
- callback=_DEFAULT_CALLBACK,
509
+ callback=DEFAULT_CALLBACK,
510
510
  batch_size=None,
511
511
  maxdepth=None,
512
512
  **kwargs,
@@ -568,8 +568,8 @@ class AsyncFileSystem(AbstractFileSystem):
568
568
  coros = []
569
569
  callback.set_size(len(file_pairs))
570
570
  for lfile, rfile in file_pairs:
571
- callback.branch(lfile, rfile, kwargs)
572
- coros.append(self._put_file(lfile, rfile, **kwargs))
571
+ put_file = callback.branch_coro(self._put_file)
572
+ coros.append(put_file(lfile, rfile, **kwargs))
573
573
 
574
574
  return await _run_coros_in_chunks(
575
575
  coros, batch_size=batch_size, callback=callback
@@ -583,7 +583,7 @@ class AsyncFileSystem(AbstractFileSystem):
583
583
  rpath,
584
584
  lpath,
585
585
  recursive=False,
586
- callback=_DEFAULT_CALLBACK,
586
+ callback=DEFAULT_CALLBACK,
587
587
  maxdepth=None,
588
588
  **kwargs,
589
589
  ):
@@ -645,8 +645,8 @@ class AsyncFileSystem(AbstractFileSystem):
645
645
  coros = []
646
646
  callback.set_size(len(lpaths))
647
647
  for lpath, rpath in zip(lpaths, rpaths):
648
- callback.branch(rpath, lpath, kwargs)
649
- coros.append(self._get_file(rpath, lpath, **kwargs))
648
+ get_file = callback.branch_coro(self._get_file)
649
+ coros.append(get_file(rpath, lpath, **kwargs))
650
650
  return await _run_coros_in_chunks(
651
651
  coros, batch_size=batch_size, callback=callback
652
652
  )
fsspec/callbacks.py CHANGED
@@ -1,3 +1,6 @@
1
+ from functools import wraps
2
+
3
+
1
4
  class Callback:
2
5
  """
3
6
  Base class and interface for callback mechanism
@@ -25,6 +28,60 @@ class Callback:
25
28
  self.hooks = hooks or {}
26
29
  self.kw = kwargs
27
30
 
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, *exc_args):
35
+ self.close()
36
+
37
+ def close(self):
38
+ """Close callback."""
39
+
40
+ def branched(self, path_1, path_2, **kwargs):
41
+ """
42
+ Return callback for child transfers
43
+
44
+ If this callback is operating at a higher level, e.g., put, which may
45
+ trigger transfers that can also be monitored. The function returns a callback
46
+ that has to be passed to the child method, e.g., put_file,
47
+ as `callback=` argument.
48
+
49
+ The implementation uses `callback.branch` for compatibility.
50
+ When implementing callbacks, it is recommended to override this function instead
51
+ of `branch` and avoid calling `super().branched(...)`.
52
+
53
+ Prefer using this function over `branch`.
54
+
55
+ Parameters
56
+ ----------
57
+ path_1: str
58
+ Child's source path
59
+ path_2: str
60
+ Child's destination path
61
+ **kwargs:
62
+ Arbitrary keyword arguments
63
+
64
+ Returns
65
+ -------
66
+ callback: Callback
67
+ A callback instance to be passed to the child method
68
+ """
69
+ self.branch(path_1, path_2, kwargs)
70
+ # mutate kwargs so that we can force the caller to pass "callback=" explicitly
71
+ return kwargs.pop("callback", DEFAULT_CALLBACK)
72
+
73
+ def branch_coro(self, fn):
74
+ """
75
+ Wraps a coroutine, and pass a new child callback to it.
76
+ """
77
+
78
+ @wraps(fn)
79
+ async def func(path1, path2: str, **kwargs):
80
+ with self.branched(path1, path2, **kwargs) as child:
81
+ return await fn(path1, path2, callback=child, **kwargs)
82
+
83
+ return func
84
+
28
85
  def set_size(self, size):
29
86
  """
30
87
  Set the internal maximum size attribute
@@ -140,10 +197,10 @@ class Callback:
140
197
 
141
198
  For the special value of ``None``, return the global instance of
142
199
  ``NoOpCallback``. This is an alternative to including
143
- ``callback=_DEFAULT_CALLBACK`` directly in a method signature.
200
+ ``callback=DEFAULT_CALLBACK`` directly in a method signature.
144
201
  """
145
202
  if maybe_callback is None:
146
- return _DEFAULT_CALLBACK
203
+ return DEFAULT_CALLBACK
147
204
  return maybe_callback
148
205
 
149
206
 
@@ -186,7 +243,9 @@ class TqdmCallback(Callback):
186
243
  tqdm_kwargs : dict, (optional)
187
244
  Any argument accepted by the tqdm constructor.
188
245
  See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
189
- Will be forwarded to tqdm.
246
+ Will be forwarded to `tqdm_cls`.
247
+ tqdm_cls: (optional)
248
+ subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
190
249
 
191
250
  Examples
192
251
  --------
@@ -209,30 +268,57 @@ class TqdmCallback(Callback):
209
268
  recursive=True,
210
269
  callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
211
270
  )
271
+
272
+ You can also customize the progress bar by passing a subclass of `tqdm`.
273
+
274
+ .. code-block:: python
275
+
276
+ class TqdmFormat(tqdm):
277
+ '''Provides a `total_time` format parameter'''
278
+ @property
279
+ def format_dict(self):
280
+ d = super().format_dict
281
+ total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
282
+ d.update(total_time=self.format_interval(total_time) + " in total")
283
+ return d
284
+
285
+ >>> with TqdmCallback(
286
+ tqdm_kwargs={
287
+ "desc": "desc",
288
+ "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
289
+ },
290
+ tqdm_cls=TqdmFormat,
291
+ ) as callback:
292
+ fs.upload(".", path2distant_data, recursive=True, callback=callback)
212
293
  """
213
294
 
214
295
  def __init__(self, tqdm_kwargs=None, *args, **kwargs):
215
296
  try:
216
- import tqdm
297
+ from tqdm import tqdm
217
298
 
218
- self._tqdm = tqdm
219
299
  except ImportError as exce:
220
300
  raise ImportError(
221
301
  "Using TqdmCallback requires tqdm to be installed"
222
302
  ) from exce
223
303
 
304
+ self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
224
305
  self._tqdm_kwargs = tqdm_kwargs or {}
306
+ self.tqdm = None
225
307
  super().__init__(*args, **kwargs)
226
308
 
227
- def set_size(self, size):
228
- self.tqdm = self._tqdm.tqdm(total=size, **self._tqdm_kwargs)
309
+ def call(self, *args, **kwargs):
310
+ if self.tqdm is None:
311
+ self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
312
+ self.tqdm.total = self.size
313
+ self.tqdm.update(self.value - self.tqdm.n)
229
314
 
230
- def relative_update(self, inc=1):
231
- self.tqdm.update(inc)
315
+ def close(self):
316
+ if self.tqdm is not None:
317
+ self.tqdm.close()
318
+ self.tqdm = None
232
319
 
233
320
  def __del__(self):
234
- self.tqdm.close()
235
- self.tqdm = None
321
+ return self.close()
236
322
 
237
323
 
238
- _DEFAULT_CALLBACK = NoOpCallback()
324
+ DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
fsspec/compression.py CHANGED
@@ -90,15 +90,15 @@ except ImportError:
90
90
  try:
91
91
  from lzma import LZMAFile
92
92
 
93
- register_compression("lzma", LZMAFile, "xz")
94
- register_compression("xz", LZMAFile, "xz", force=True)
93
+ register_compression("lzma", LZMAFile, "lzma")
94
+ register_compression("xz", LZMAFile, "xz")
95
95
  except ImportError:
96
96
  pass
97
97
 
98
98
  try:
99
99
  import lzmaffi
100
100
 
101
- register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
101
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
102
102
  register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
103
103
  except ImportError:
104
104
  pass
fsspec/exceptions.py CHANGED
@@ -10,12 +10,8 @@ class BlocksizeMismatchError(ValueError):
10
10
  written with
11
11
  """
12
12
 
13
- ...
14
-
15
13
 
16
14
  class FSTimeoutError(asyncio.TimeoutError):
17
15
  """
18
16
  Raised when a fsspec function timed out occurs
19
17
  """
20
-
21
- ...
fsspec/generic.py CHANGED
@@ -8,7 +8,7 @@ import uuid
8
8
  from typing import Optional
9
9
 
10
10
  from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
11
- from .callbacks import _DEFAULT_CALLBACK
11
+ from .callbacks import DEFAULT_CALLBACK
12
12
  from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
13
13
 
14
14
  _generic_fs = {}
@@ -279,7 +279,7 @@ class GenericFileSystem(AsyncFileSystem):
279
279
  url,
280
280
  url2,
281
281
  blocksize=2**20,
282
- callback=_DEFAULT_CALLBACK,
282
+ callback=DEFAULT_CALLBACK,
283
283
  **kwargs,
284
284
  ):
285
285
  fs = _resolve_fs(url, self.method)
fsspec/gui.py CHANGED
@@ -153,8 +153,9 @@ class SigSlot:
153
153
  break
154
154
  except Exception as e:
155
155
  logger.exception(
156
- "Exception (%s) while executing callback for signal: %s"
157
- "" % (e, sig)
156
+ "Exception (%s) while executing callback for signal: %s",
157
+ e,
158
+ sig,
158
159
  )
159
160
 
160
161
  def show(self, threads=False):
@@ -5,6 +5,7 @@ import secrets
5
5
  import shutil
6
6
  from contextlib import suppress
7
7
  from functools import cached_property, wraps
8
+ from urllib.parse import parse_qs
8
9
 
9
10
  from fsspec.spec import AbstractFileSystem
10
11
  from fsspec.utils import (
@@ -255,6 +256,7 @@ class HadoopFileSystem(ArrowFSWrapper):
255
256
  port=0,
256
257
  user=None,
257
258
  kerb_ticket=None,
259
+ replication=3,
258
260
  extra_conf=None,
259
261
  **kwargs,
260
262
  ):
@@ -270,6 +272,8 @@ class HadoopFileSystem(ArrowFSWrapper):
270
272
  If given, connect as this username
271
273
  kerb_ticket: str or None
272
274
  If given, use this ticket for authentication
275
+ replication: int
276
+ set replication factor of file for write operations. default value is 3.
273
277
  extra_conf: None or dict
274
278
  Passed on to HadoopFileSystem
275
279
  """
@@ -280,6 +284,7 @@ class HadoopFileSystem(ArrowFSWrapper):
280
284
  port=port,
281
285
  user=user,
282
286
  kerb_ticket=kerb_ticket,
287
+ replication=replication,
283
288
  extra_conf=extra_conf,
284
289
  )
285
290
  super().__init__(fs=fs, **kwargs)
@@ -294,4 +299,8 @@ class HadoopFileSystem(ArrowFSWrapper):
294
299
  out["user"] = ops["username"]
295
300
  if ops.get("port", None):
296
301
  out["port"] = ops["port"]
302
+ if ops.get("url_query", None):
303
+ queries = parse_qs(ops["url_query"])
304
+ if queries.get("replication", None):
305
+ out["replication"] = int(queries["replication"][0])
297
306
  return out
@@ -2,13 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import hashlib
5
- from typing import TYPE_CHECKING
6
5
 
7
6
  from fsspec.implementations.local import make_path_posix
8
7
 
9
- if TYPE_CHECKING:
10
- from typing import Any
11
-
12
8
 
13
9
  class AbstractCacheMapper(abc.ABC):
14
10
  """Abstract super-class for mappers from remote URLs to local cached
@@ -19,7 +15,7 @@ class AbstractCacheMapper(abc.ABC):
19
15
  def __call__(self, path: str) -> str:
20
16
  ...
21
17
 
22
- def __eq__(self, other: Any) -> bool:
18
+ def __eq__(self, other: object) -> bool:
23
19
  # Identity only depends on class. When derived classes have attributes
24
20
  # they will need to be included.
25
21
  return isinstance(other, type(self))
@@ -56,7 +52,7 @@ class BasenameCacheMapper(AbstractCacheMapper):
56
52
  else:
57
53
  return prefix # No separator found, simple filename
58
54
 
59
- def __eq__(self, other: Any) -> bool:
55
+ def __eq__(self, other: object) -> bool:
60
56
  return super().__eq__(other) and self.directory_levels == other.directory_levels
61
57
 
62
58
  def __hash__(self) -> int:
@@ -10,7 +10,7 @@ from shutil import rmtree
10
10
  from typing import TYPE_CHECKING, Any, Callable, ClassVar
11
11
 
12
12
  from fsspec import AbstractFileSystem, filesystem
13
- from fsspec.callbacks import _DEFAULT_CALLBACK
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
14
  from fsspec.compression import compr
15
15
  from fsspec.core import BaseCache, MMapCache
16
16
  from fsspec.exceptions import BlocksizeMismatchError
@@ -524,7 +524,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
524
524
  protocol = "filecache"
525
525
  local_file = True
526
526
 
527
- def open_many(self, open_files):
527
+ def open_many(self, open_files, **kwargs):
528
528
  paths = [of.path for of in open_files]
529
529
  if "r" in open_files.mode:
530
530
  self._mkcache()
@@ -535,6 +535,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
535
535
  path,
536
536
  mode=open_files.mode,
537
537
  fn=os.path.join(self.storage[-1], self._mapper(path)),
538
+ **kwargs,
538
539
  )
539
540
  for path in paths
540
541
  ]
@@ -606,7 +607,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
606
607
  path,
607
608
  recursive=False,
608
609
  on_error="raise",
609
- callback=_DEFAULT_CALLBACK,
610
+ callback=DEFAULT_CALLBACK,
610
611
  **kwargs,
611
612
  ):
612
613
  paths = self.expand_path(
@@ -650,7 +651,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
650
651
  path = self._strip_protocol(path)
651
652
  if "r" not in mode:
652
653
  fn = self._make_local_details(path)
653
- return LocalTempFile(self, path, mode=mode, fn=fn)
654
+ user_specified_kwargs = {
655
+ k: v
656
+ for k, v in kwargs.items()
657
+ # those kwargs were added by open(), we don't want them
658
+ if k not in ["autocommit", "block_size", "cache_options"]
659
+ }
660
+ return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
654
661
  detail = self._check_file(path)
655
662
  if detail:
656
663
  detail, fn = detail
@@ -775,8 +782,18 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
775
782
 
776
783
  if "r" not in mode:
777
784
  fn = os.path.join(self.storage[-1], sha)
785
+ user_specified_kwargs = {
786
+ k: v
787
+ for k, v in kwargs.items()
788
+ if k not in ["autocommit", "block_size", "cache_options"]
789
+ } # those were added by open()
778
790
  return LocalTempFile(
779
- self, path, mode=mode, autocommit=not self._intrans, fn=fn
791
+ self,
792
+ path,
793
+ mode=mode,
794
+ autocommit=not self._intrans,
795
+ fn=fn,
796
+ **user_specified_kwargs,
780
797
  )
781
798
  fn = self._check_file(path)
782
799
  if fn:
@@ -812,7 +829,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
812
829
  class LocalTempFile:
813
830
  """A temporary local file, which will be uploaded on commit"""
814
831
 
815
- def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
832
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
816
833
  self.fn = fn
817
834
  self.fh = open(fn, mode)
818
835
  self.mode = mode
@@ -822,6 +839,7 @@ class LocalTempFile:
822
839
  self.fs = fs
823
840
  self.closed = False
824
841
  self.autocommit = autocommit
842
+ self.kwargs = kwargs
825
843
 
826
844
  def __reduce__(self):
827
845
  # always open in r+b to allow continuing writing at a location
@@ -849,7 +867,7 @@ class LocalTempFile:
849
867
  os.remove(self.fn)
850
868
 
851
869
  def commit(self):
852
- self.fs.put(self.fn, self.path)
870
+ self.fs.put(self.fn, self.path, **self.kwargs)
853
871
  try:
854
872
  os.remove(self.fn)
855
873
  except (PermissionError, FileNotFoundError):
@@ -2,6 +2,8 @@ import base64
2
2
  import urllib
3
3
 
4
4
  import requests
5
+ import requests.exceptions
6
+ from requests.adapters import HTTPAdapter, Retry
5
7
 
6
8
  from fsspec import AbstractFileSystem
7
9
  from fsspec.spec import AbstractBufferedFile
@@ -42,13 +44,19 @@ class DatabricksFileSystem(AbstractFileSystem):
42
44
  """
43
45
  self.instance = instance
44
46
  self.token = token
45
-
46
47
  self.session = requests.Session()
48
+ self.retries = Retry(
49
+ total=10,
50
+ backoff_factor=0.05,
51
+ status_forcelist=[408, 429, 500, 502, 503, 504],
52
+ )
53
+
54
+ self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
47
55
  self.session.headers.update({"Authorization": f"Bearer {self.token}"})
48
56
 
49
57
  super().__init__(**kwargs)
50
58
 
51
- def ls(self, path, detail=True):
59
+ def ls(self, path, detail=True, **kwargs):
52
60
  """
53
61
  List the contents of the given path.
54
62
 
@@ -137,7 +145,7 @@ class DatabricksFileSystem(AbstractFileSystem):
137
145
 
138
146
  self.mkdirs(path, **kwargs)
139
147
 
140
- def rm(self, path, recursive=False):
148
+ def rm(self, path, recursive=False, **kwargs):
141
149
  """
142
150
  Remove the file or folder at the given absolute path.
143
151
 
@@ -166,7 +174,9 @@ class DatabricksFileSystem(AbstractFileSystem):
166
174
  raise e
167
175
  self.invalidate_cache(self._parent(path))
168
176
 
169
- def mv(self, source_path, destination_path, recursive=False, maxdepth=None):
177
+ def mv(
178
+ self, source_path, destination_path, recursive=False, maxdepth=None, **kwargs
179
+ ):
170
180
  """
171
181
  Move a source to a destination path.
172
182
 
@@ -124,6 +124,12 @@ class DirFileSystem(AsyncFileSystem):
124
124
  def pipe(self, path, *args, **kwargs):
125
125
  return self.fs.pipe(self._join(path), *args, **kwargs)
126
126
 
127
+ async def _pipe_file(self, path, *args, **kwargs):
128
+ return await self.fs._pipe_file(self._join(path), *args, **kwargs)
129
+
130
+ def pipe_file(self, path, *args, **kwargs):
131
+ return self.fs.pipe_file(self._join(path), *args, **kwargs)
132
+
127
133
  async def _cat_file(self, path, *args, **kwargs):
128
134
  return await self.fs._cat_file(self._join(path), *args, **kwargs)
129
135
 
@@ -171,12 +171,15 @@ class FTPFileSystem(AbstractFileSystem):
171
171
  def cb(x):
172
172
  out.append(x)
173
173
 
174
- self.ftp.retrbinary(
175
- f"RETR {path}",
176
- blocksize=self.blocksize,
177
- rest=start,
178
- callback=cb,
179
- )
174
+ try:
175
+ self.ftp.retrbinary(
176
+ f"RETR {path}",
177
+ blocksize=self.blocksize,
178
+ rest=start,
179
+ callback=cb,
180
+ )
181
+ except (Error, error_perm) as orig_exc:
182
+ raise FileNotFoundError(path) from orig_exc
180
183
  return b"".join(out)
181
184
 
182
185
  def _open(
@@ -361,15 +364,17 @@ def _mlsd2(ftp, path="."):
361
364
  minfo = []
362
365
  ftp.dir(path, lines.append)
363
366
  for line in lines:
364
- line = line.split()
367
+ split_line = line.split()
368
+ if len(split_line) < 9:
369
+ continue
365
370
  this = (
366
- line[-1],
371
+ split_line[-1],
367
372
  {
368
- "modify": " ".join(line[5:8]),
369
- "unix.owner": line[2],
370
- "unix.group": line[3],
371
- "unix.mode": line[0],
372
- "size": line[4],
373
+ "modify": " ".join(split_line[5:8]),
374
+ "unix.owner": split_line[2],
375
+ "unix.group": split_line[3],
376
+ "unix.mode": split_line[0],
377
+ "size": split_line[4],
373
378
  },
374
379
  )
375
380
  if "d" == this[1]["unix.mode"][0]:
@@ -36,8 +36,11 @@ class GithubFileSystem(AbstractFileSystem):
36
36
  url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
37
37
  rurl = "https://raw.githubusercontent.com/{org}/{repo}/{sha}/{path}"
38
38
  protocol = "github"
39
+ timeout = (60, 60) # connect, read timeouts
39
40
 
40
- def __init__(self, org, repo, sha=None, username=None, token=None, **kwargs):
41
+ def __init__(
42
+ self, org, repo, sha=None, username=None, token=None, timeout=None, **kwargs
43
+ ):
41
44
  super().__init__(**kwargs)
42
45
  self.org = org
43
46
  self.repo = repo
@@ -45,10 +48,14 @@ class GithubFileSystem(AbstractFileSystem):
45
48
  raise ValueError("Auth required both username and token")
46
49
  self.username = username
47
50
  self.token = token
51
+ if timeout is not None:
52
+ self.timeout = timeout
48
53
  if sha is None:
49
54
  # look up default branch (not necessarily "master")
50
55
  u = "https://api.github.com/repos/{org}/{repo}"
51
- r = requests.get(u.format(org=org, repo=repo), **self.kw)
56
+ r = requests.get(
57
+ u.format(org=org, repo=repo), timeout=self.timeout, **self.kw
58
+ )
52
59
  r.raise_for_status()
53
60
  sha = r.json()["default_branch"]
54
61
 
@@ -79,7 +86,8 @@ class GithubFileSystem(AbstractFileSystem):
79
86
  List of string
80
87
  """
81
88
  r = requests.get(
82
- f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos"
89
+ f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos",
90
+ timeout=cls.timeout,
83
91
  )
84
92
  r.raise_for_status()
85
93
  return [repo["name"] for repo in r.json()]
@@ -89,6 +97,7 @@ class GithubFileSystem(AbstractFileSystem):
89
97
  """Names of tags in the repo"""
90
98
  r = requests.get(
91
99
  f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
100
+ timeout=self.timeout,
92
101
  **self.kw,
93
102
  )
94
103
  r.raise_for_status()
@@ -99,6 +108,7 @@ class GithubFileSystem(AbstractFileSystem):
99
108
  """Names of branches in the repo"""
100
109
  r = requests.get(
101
110
  f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
111
+ timeout=self.timeout,
102
112
  **self.kw,
103
113
  )
104
114
  r.raise_for_status()
@@ -147,7 +157,9 @@ class GithubFileSystem(AbstractFileSystem):
147
157
  _sha = out["sha"]
148
158
  if path not in self.dircache or sha not in [self.root, None]:
149
159
  r = requests.get(
150
- self.url.format(org=self.org, repo=self.repo, sha=_sha), **self.kw
160
+ self.url.format(org=self.org, repo=self.repo, sha=_sha),
161
+ timeout=self.timeout,
162
+ **self.kw,
151
163
  )
152
164
  if r.status_code == 404:
153
165
  raise FileNotFoundError(path)
@@ -208,7 +220,7 @@ class GithubFileSystem(AbstractFileSystem):
208
220
  url = self.rurl.format(
209
221
  org=self.org, repo=self.repo, path=path, sha=sha or self.root
210
222
  )
211
- r = requests.get(url, **self.kw)
223
+ r = requests.get(url, timeout=self.timeout, **self.kw)
212
224
  if r.status_code == 404:
213
225
  raise FileNotFoundError(path)
214
226
  r.raise_for_status()