scmrepo 3.0.0__tar.gz → 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scmrepo might be problematic. Click here for more details.

Files changed (71) hide show
  1. {scmrepo-3.0.0 → scmrepo-3.2.0}/.pre-commit-config.yaml +1 -1
  2. {scmrepo-3.0.0/src/scmrepo.egg-info → scmrepo-3.2.0}/PKG-INFO +5 -5
  3. {scmrepo-3.0.0 → scmrepo-3.2.0}/pyproject.toml +4 -5
  4. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/dulwich/__init__.py +14 -1
  5. scmrepo-3.2.0/src/scmrepo/git/lfs/client.py +275 -0
  6. scmrepo-3.2.0/src/scmrepo/git/lfs/progress.py +159 -0
  7. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/smudge.py +4 -1
  8. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/storage.py +4 -2
  9. {scmrepo-3.0.0 → scmrepo-3.2.0/src/scmrepo.egg-info}/PKG-INFO +5 -5
  10. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo.egg-info/requires.txt +4 -4
  11. scmrepo-3.0.0/src/scmrepo/git/lfs/client.py +0 -171
  12. scmrepo-3.0.0/src/scmrepo/git/lfs/progress.py +0 -52
  13. {scmrepo-3.0.0 → scmrepo-3.2.0}/.coveragerc +0 -0
  14. {scmrepo-3.0.0 → scmrepo-3.2.0}/.cruft.json +0 -0
  15. {scmrepo-3.0.0 → scmrepo-3.2.0}/.gitattributes +0 -0
  16. {scmrepo-3.0.0 → scmrepo-3.2.0}/.github/dependabot.yml +0 -0
  17. {scmrepo-3.0.0 → scmrepo-3.2.0}/.github/workflows/release.yaml +0 -0
  18. {scmrepo-3.0.0 → scmrepo-3.2.0}/.github/workflows/tests.yaml +0 -0
  19. {scmrepo-3.0.0 → scmrepo-3.2.0}/.github/workflows/update-template.yaml +0 -0
  20. {scmrepo-3.0.0 → scmrepo-3.2.0}/.gitignore +0 -0
  21. {scmrepo-3.0.0 → scmrepo-3.2.0}/CODE_OF_CONDUCT.rst +0 -0
  22. {scmrepo-3.0.0 → scmrepo-3.2.0}/CONTRIBUTING.rst +0 -0
  23. {scmrepo-3.0.0 → scmrepo-3.2.0}/LICENSE +0 -0
  24. {scmrepo-3.0.0 → scmrepo-3.2.0}/README.rst +0 -0
  25. {scmrepo-3.0.0 → scmrepo-3.2.0}/noxfile.py +0 -0
  26. {scmrepo-3.0.0 → scmrepo-3.2.0}/setup.cfg +0 -0
  27. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/__init__.py +0 -0
  28. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/asyn.py +0 -0
  29. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/base.py +0 -0
  30. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/exceptions.py +0 -0
  31. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/fs.py +0 -0
  32. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/__init__.py +0 -0
  33. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/__init__.py +0 -0
  34. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/base.py +0 -0
  35. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/dulwich/asyncssh_vendor.py +0 -0
  36. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/dulwich/client.py +0 -0
  37. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/gitpython.py +0 -0
  38. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/pygit2/__init__.py +0 -0
  39. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/pygit2/callbacks.py +0 -0
  40. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/backend/pygit2/filter.py +0 -0
  41. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/config.py +0 -0
  42. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/credentials.py +0 -0
  43. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/__init__.py +0 -0
  44. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/exceptions.py +0 -0
  45. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/fetch.py +0 -0
  46. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/object.py +0 -0
  47. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/lfs/pointer.py +0 -0
  48. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/objects.py +0 -0
  49. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/git/stash.py +0 -0
  50. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/noscm.py +0 -0
  51. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/progress.py +0 -0
  52. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/py.typed +0 -0
  53. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo/utils.py +0 -0
  54. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo.egg-info/SOURCES.txt +0 -0
  55. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo.egg-info/dependency_links.txt +0 -0
  56. {scmrepo-3.0.0 → scmrepo-3.2.0}/src/scmrepo.egg-info/top_level.txt +0 -0
  57. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/__init__.py +0 -0
  58. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/conftest.py +0 -0
  59. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/docker-compose.yml +0 -0
  60. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/git-init/git.sh +0 -0
  61. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_credentials.py +0 -0
  62. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_dulwich.py +0 -0
  63. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_fs.py +0 -0
  64. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_git.py +0 -0
  65. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_lfs.py +0 -0
  66. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_noscm.py +0 -0
  67. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_pygit2.py +0 -0
  68. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_scmrepo.py +0 -0
  69. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/test_stash.py +0 -0
  70. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/user.key +0 -0
  71. {scmrepo-3.0.0 → scmrepo-3.2.0}/tests/user.key.pub +0 -0
@@ -20,7 +20,7 @@ repos:
20
20
  - id: sort-simple-yaml
21
21
  - id: trailing-whitespace
22
22
  - repo: https://github.com/astral-sh/ruff-pre-commit
23
- rev: 'v0.1.13'
23
+ rev: 'v0.2.2'
24
24
  hooks:
25
25
  - id: ruff
26
26
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scmrepo
3
- Version: 3.0.0
3
+ Version: 3.2.0
4
4
  Summary: scmrepo
5
5
  Author-email: Iterative <support@dvc.org>
6
6
  License: Apache-2.0
@@ -19,13 +19,12 @@ Requires-Dist: gitpython>3
19
19
  Requires-Dist: dulwich>=0.21.6
20
20
  Requires-Dist: pygit2>=1.14.0
21
21
  Requires-Dist: pygtrie>=2.3.2
22
- Requires-Dist: fsspec>=2024.2.0
22
+ Requires-Dist: fsspec[tqdm]>=2024.2.0
23
23
  Requires-Dist: pathspec>=0.9.0
24
24
  Requires-Dist: asyncssh<3,>=2.13.1
25
25
  Requires-Dist: funcy>=1.14
26
- Requires-Dist: shortuuid>=0.5.0
27
- Requires-Dist: dvc-objects<5,>=4
28
- Requires-Dist: dvc-http>=2.29.0
26
+ Requires-Dist: aiohttp-retry>=2.5.0
27
+ Requires-Dist: tqdm
29
28
  Provides-Extra: tests
30
29
  Requires-Dist: pytest==7.2.0; extra == "tests"
31
30
  Requires-Dist: pytest-sugar==0.9.5; extra == "tests"
@@ -40,6 +39,7 @@ Requires-Dist: paramiko==3.3.1; extra == "tests"
40
39
  Requires-Dist: types-certifi==2021.10.8.3; extra == "tests"
41
40
  Requires-Dist: types-mock==5.1.0.2; extra == "tests"
42
41
  Requires-Dist: types-paramiko==3.4.0.20240120; extra == "tests"
42
+ Requires-Dist: types-tqdm; extra == "tests"
43
43
  Provides-Extra: dev
44
44
  Requires-Dist: scmrepo[tests]; extra == "dev"
45
45
 
@@ -25,13 +25,12 @@ dependencies = [
25
25
  "dulwich>=0.21.6",
26
26
  "pygit2>=1.14.0",
27
27
  "pygtrie>=2.3.2",
28
- "fsspec>=2024.2.0",
28
+ "fsspec[tqdm]>=2024.2.0",
29
29
  "pathspec>=0.9.0",
30
30
  "asyncssh>=2.13.1,<3",
31
31
  "funcy>=1.14",
32
- "shortuuid>=0.5.0",
33
- "dvc-objects>=4,<5",
34
- "dvc-http>=2.29.0",
32
+ "aiohttp-retry>=2.5.0",
33
+ "tqdm",
35
34
  ]
36
35
 
37
36
  [project.urls]
@@ -54,6 +53,7 @@ tests = [
54
53
  "types-certifi==2021.10.8.3",
55
54
  "types-mock==5.1.0.2",
56
55
  "types-paramiko==3.4.0.20240120",
56
+ "types-tqdm",
57
57
  ]
58
58
  dev = [
59
59
  "scmrepo[tests]",
@@ -109,7 +109,6 @@ files = ["src", "tests"]
109
109
  [[tool.mypy.overrides]]
110
110
  module = [
111
111
  "pygtrie",
112
- "dvc_http.*",
113
112
  "funcy",
114
113
  "git",
115
114
  "gitdb.*",
@@ -842,7 +842,7 @@ class DulwichBackend(BaseGitBackend): # pylint:disable=abstract-method
842
842
  if revision and revision not in rev_mapping:
843
843
  rev_mapping[revision] = ref
844
844
  for rev in revs:
845
- results[rev] = rev_mapping.get(rev, None)
845
+ results[rev] = rev_mapping.get(rev)
846
846
  return results
847
847
 
848
848
  def diff(self, rev_a: str, rev_b: str, binary=False) -> str:
@@ -978,3 +978,16 @@ def _parse_identity(identity: str) -> tuple[str, str]:
978
978
  if not m:
979
979
  raise SCMError("Could not parse tagger identity '{identity}'")
980
980
  return m.group("name"), m.group("email")
981
+
982
+
983
+ def ls_remote(url: str) -> dict[str, str]:
984
+ from dulwich import porcelain
985
+ from dulwich.client import HTTPUnauthorized
986
+
987
+ try:
988
+ refs = porcelain.ls_remote(url)
989
+ return {os.fsdecode(ref): sha.decode("ascii") for ref, sha in refs.items()}
990
+ except HTTPUnauthorized as exc:
991
+ raise AuthError(url) from exc
992
+ except Exception as exc: # noqa: BLE001
993
+ raise InvalidRemote(url) from exc
@@ -0,0 +1,275 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import re
5
+ import shutil
6
+ from abc import abstractmethod
7
+ from collections.abc import Iterable, Iterator
8
+ from contextlib import AbstractContextManager, contextmanager, suppress
9
+ from tempfile import NamedTemporaryFile
10
+ from typing import TYPE_CHECKING, Any, Optional
11
+
12
+ import aiohttp
13
+ from aiohttp_retry import ExponentialRetry, RetryClient
14
+ from fsspec.asyn import _run_coros_in_chunks, sync_wrapper
15
+ from fsspec.callbacks import DEFAULT_CALLBACK
16
+ from fsspec.implementations.http import HTTPFileSystem
17
+ from funcy import cached_property
18
+
19
+ from scmrepo.git.backend.dulwich import _get_ssh_vendor
20
+ from scmrepo.git.credentials import Credential, CredentialNotFoundError
21
+
22
+ from .exceptions import LFSError
23
+ from .pointer import Pointer
24
+
25
+ if TYPE_CHECKING:
26
+ from fsspec.callbacks import Callback
27
+
28
+ from .storage import LFSStorage
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class LFSClient(AbstractContextManager):
34
+ """Naive read-only LFS HTTP client."""
35
+
36
+ JSON_CONTENT_TYPE = "application/vnd.git-lfs+json"
37
+
38
+ _REQUEST_TIMEOUT = 60
39
+ _SESSION_RETRIES = 5
40
+ _SESSION_BACKOFF_FACTOR = 0.1
41
+
42
+ def __init__(self, url: str):
43
+ """
44
+ Args:
45
+ url: LFS server URL.
46
+ """
47
+ self.url = url
48
+
49
+ def __exit__(self, *args, **kwargs):
50
+ self.close()
51
+
52
+ @cached_property
53
+ def _fs(self) -> HTTPFileSystem:
54
+ async def get_client(**kwargs):
55
+ return RetryClient(
56
+ connector=aiohttp.TCPConnector(
57
+ # Force cleanup of closed SSL transports.
58
+ # See https://github.com/iterative/dvc/issues/7414
59
+ enable_cleanup_closed=True,
60
+ ),
61
+ timeout=aiohttp.ClientTimeout(
62
+ total=None,
63
+ connect=self._REQUEST_TIMEOUT,
64
+ sock_connect=self._REQUEST_TIMEOUT,
65
+ sock_read=self._REQUEST_TIMEOUT,
66
+ ),
67
+ retry_options=ExponentialRetry(
68
+ attempts=self._SESSION_RETRIES,
69
+ factor=self._SESSION_BACKOFF_FACTOR,
70
+ max_timeout=self._REQUEST_TIMEOUT,
71
+ exceptions={aiohttp.ClientError},
72
+ ),
73
+ **kwargs,
74
+ )
75
+
76
+ return HTTPFileSystem(get_client=get_client)
77
+
78
+ @property
79
+ def loop(self):
80
+ return self._fs.loop
81
+
82
+ @classmethod
83
+ def from_git_url(cls, git_url: str) -> "LFSClient":
84
+ if git_url.startswith(("ssh://", "git@")):
85
+ return _SSHLFSClient.from_git_url(git_url)
86
+ if git_url.startswith("https://"):
87
+ return _HTTPLFSClient.from_git_url(git_url)
88
+ raise NotImplementedError(f"Unsupported Git URL: {git_url}")
89
+
90
+ def close(self):
91
+ pass
92
+
93
+ @abstractmethod
94
+ def _get_auth_header(self, *, upload: bool) -> dict:
95
+ ...
96
+
97
+ async def _batch_request(
98
+ self,
99
+ objects: Iterable[Pointer],
100
+ upload: bool = False,
101
+ ref: Optional[str] = None,
102
+ hash_algo: str = "sha256",
103
+ ) -> dict[str, Any]:
104
+ """Send LFS API /objects/batch request."""
105
+ url = f"{self.url}/objects/batch"
106
+ body: dict[str, Any] = {
107
+ "operation": "upload" if upload else "download",
108
+ "transfers": ["basic"],
109
+ "objects": [{"oid": obj.oid, "size": obj.size} for obj in objects],
110
+ "hash_algo": hash_algo,
111
+ }
112
+ if ref:
113
+ body["ref"] = [{"name": ref}]
114
+ session = await self._fs.set_session()
115
+ headers = {
116
+ "Accept": self.JSON_CONTENT_TYPE,
117
+ "Content-Type": self.JSON_CONTENT_TYPE,
118
+ }
119
+ try:
120
+ async with session.post(
121
+ url,
122
+ headers=headers,
123
+ json=body,
124
+ raise_for_status=True,
125
+ ) as resp:
126
+ data = await resp.json()
127
+ except aiohttp.ClientResponseError as exc:
128
+ if exc.status != 401:
129
+ raise
130
+ auth_header = self._get_auth_header(upload=upload)
131
+ if not auth_header:
132
+ raise
133
+ async with session.post(
134
+ url,
135
+ headers={**headers, **auth_header},
136
+ json=body,
137
+ raise_for_status=True,
138
+ ) as resp:
139
+ data = await resp.json()
140
+ return data
141
+
142
+ async def _download(
143
+ self,
144
+ storage: "LFSStorage",
145
+ objects: Iterable[Pointer],
146
+ callback: "Callback" = DEFAULT_CALLBACK,
147
+ batch_size: Optional[int] = None,
148
+ **kwargs,
149
+ ):
150
+ async def _get_one(from_path: str, to_path: str, **kwargs):
151
+ with _as_atomic(to_path, create_parents=True) as tmp_file:
152
+ with callback.branched(from_path, tmp_file) as child:
153
+ await self._fs._get_file(
154
+ from_path, tmp_file, callback=child, **kwargs
155
+ )
156
+ callback.relative_update()
157
+
158
+ resp_data = await self._batch_request(objects, **kwargs)
159
+ if resp_data.get("transfer", "basic") != "basic":
160
+ raise LFSError("Unsupported LFS transfer type")
161
+ coros = []
162
+ for data in resp_data.get("objects", []):
163
+ obj = Pointer(data["oid"], data["size"])
164
+ download = data.get("actions", {}).get("download", {})
165
+ url = download.get("href")
166
+ if not url:
167
+ logger.debug("No download URL for LFS object '%s'", obj)
168
+ continue
169
+ headers = download.get("header", {})
170
+ to_path = storage.oid_to_path(obj.oid)
171
+ coros.append(_get_one(url, to_path, headers=headers))
172
+ for result in await _run_coros_in_chunks(
173
+ coros, batch_size=batch_size, return_exceptions=True
174
+ ):
175
+ if isinstance(result, BaseException):
176
+ raise result
177
+
178
+ download = sync_wrapper(_download)
179
+
180
+
181
+ class _HTTPLFSClient(LFSClient):
182
+ def __init__(self, url: str, git_url: str):
183
+ """
184
+ Args:
185
+ url: LFS server URL.
186
+ git_url: Git HTTP URL.
187
+ """
188
+ super().__init__(url)
189
+ self.git_url = git_url
190
+
191
+ @classmethod
192
+ def from_git_url(cls, git_url: str) -> "_HTTPLFSClient":
193
+ if git_url.endswith(".git"):
194
+ url = f"{git_url}/info/lfs"
195
+ else:
196
+ url = f"{git_url}.git/info/lfs"
197
+ return cls(url, git_url=git_url)
198
+
199
+ def _get_auth_header(self, *, upload: bool) -> dict:
200
+ try:
201
+ creds = Credential(url=self.git_url).fill()
202
+ if creds.username and creds.password:
203
+ return {
204
+ aiohttp.hdrs.AUTHORIZATION: aiohttp.BasicAuth(
205
+ creds.username, creds.password
206
+ ).encode()
207
+ }
208
+ except CredentialNotFoundError:
209
+ pass
210
+ return {}
211
+
212
+
213
+ class _SSHLFSClient(LFSClient):
214
+ _URL_PATTERN = re.compile(
215
+ r"(?:ssh://)?git@(?P<host>\S+?)(?::(?P<port>\d+))?(?:[:/])(?P<path>\S+?)\.git"
216
+ )
217
+
218
+ def __init__(self, url: str, host: str, port: int, path: str):
219
+ """
220
+ Args:
221
+ url: LFS server URL.
222
+ host: Git SSH server host.
223
+ port: Git SSH server port.
224
+ path: Git project path.
225
+ """
226
+ super().__init__(url)
227
+ self.host = host
228
+ self.port = port
229
+ self.path = path
230
+ self._ssh = _get_ssh_vendor()
231
+
232
+ @classmethod
233
+ def from_git_url(cls, git_url: str) -> "_SSHLFSClient":
234
+ result = cls._URL_PATTERN.match(git_url)
235
+ if not result:
236
+ raise ValueError(f"Invalid Git SSH URL: {git_url}")
237
+ host, port, path = result.group("host", "port", "path")
238
+ url = f"https://{host}/{path}.git/info/lfs"
239
+ return cls(url, host, int(port or 22), path)
240
+
241
+ def _get_auth_header(self, *, upload: bool) -> dict:
242
+ return self._git_lfs_authenticate(
243
+ self.host, self.port, f"{self.path}.git", upload=upload
244
+ ).get("header", {})
245
+
246
+ def _git_lfs_authenticate(
247
+ self, host: str, port: int, path: str, *, upload: bool = False
248
+ ) -> dict:
249
+ action = "upload" if upload else "download"
250
+ return json.loads(
251
+ self._ssh.run_command(
252
+ command=f"git-lfs-authenticate {path} {action}",
253
+ host=host,
254
+ port=port,
255
+ username="git",
256
+ ).read()
257
+ )
258
+
259
+
260
+ @contextmanager
261
+ def _as_atomic(to_info: str, create_parents: bool = False) -> Iterator[str]:
262
+ parent = os.path.dirname(to_info)
263
+ if create_parents:
264
+ os.makedirs(parent, exist_ok=True)
265
+
266
+ tmp_file = NamedTemporaryFile(dir=parent, delete=False)
267
+ tmp_file.close()
268
+ try:
269
+ yield tmp_file.name
270
+ except BaseException:
271
+ with suppress(FileNotFoundError):
272
+ os.unlink(tmp_file.name)
273
+ raise
274
+ else:
275
+ shutil.move(tmp_file.name, to_info)
@@ -0,0 +1,159 @@
1
+ import logging
2
+ import sys
3
+ from typing import Any, BinaryIO, Callable, ClassVar, Optional, Union
4
+
5
+ from fsspec.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback
6
+ from tqdm import tqdm
7
+
8
+ from scmrepo.progress import GitProgressEvent
9
+
10
+
11
+ class _Tqdm(tqdm):
12
+ """
13
+ maximum-compatibility tqdm-based progressbars
14
+ """
15
+
16
+ BAR_FMT_DEFAULT = (
17
+ "{percentage:3.0f}% {desc}|{bar}|"
18
+ "{postfix[info]}{n_fmt}/{total_fmt}"
19
+ " [{elapsed}<{remaining}, {rate_fmt:>11}]"
20
+ )
21
+ # nested bars should have fixed bar widths to align nicely
22
+ BAR_FMT_DEFAULT_NESTED = (
23
+ "{percentage:3.0f}%|{bar:10}|{desc:{ncols_desc}.{ncols_desc}}"
24
+ "{postfix[info]}{n_fmt}/{total_fmt}"
25
+ " [{elapsed}<{remaining}, {rate_fmt:>11}]"
26
+ )
27
+ BAR_FMT_NOTOTAL = "{desc}{bar:b}|{postfix[info]}{n_fmt} [{elapsed}, {rate_fmt:>11}]"
28
+ BYTES_DEFAULTS: ClassVar[dict[str, Any]] = {
29
+ "unit": "B",
30
+ "unit_scale": True,
31
+ "unit_divisor": 1024,
32
+ "miniters": 1,
33
+ }
34
+
35
+ def __init__( # noqa: PLR0913
36
+ self,
37
+ iterable=None,
38
+ disable=None,
39
+ level=logging.ERROR,
40
+ desc=None,
41
+ leave=False,
42
+ bar_format=None,
43
+ bytes=False, # noqa: A002
44
+ file=None,
45
+ total=None,
46
+ postfix=None,
47
+ **kwargs,
48
+ ):
49
+ kwargs = kwargs.copy()
50
+ if bytes:
51
+ kwargs = {**self.BYTES_DEFAULTS, **kwargs}
52
+ else:
53
+ kwargs.setdefault("unit_scale", total > 999 if total else True)
54
+ if file is None:
55
+ file = sys.stderr
56
+ super().__init__(
57
+ iterable=iterable,
58
+ disable=disable,
59
+ leave=leave,
60
+ desc=desc,
61
+ bar_format="!",
62
+ lock_args=(False,),
63
+ total=total,
64
+ **kwargs,
65
+ )
66
+ self.postfix = postfix or {"info": ""}
67
+ if bar_format is None:
68
+ if self.__len__():
69
+ self.bar_format = (
70
+ self.BAR_FMT_DEFAULT_NESTED if self.pos else self.BAR_FMT_DEFAULT
71
+ )
72
+ else:
73
+ self.bar_format = self.BAR_FMT_NOTOTAL
74
+ else:
75
+ self.bar_format = bar_format
76
+ self.refresh()
77
+
78
+ def update_to(self, current, total=None):
79
+ if total:
80
+ self.total = total
81
+ self.update(current - self.n)
82
+
83
+ def close(self):
84
+ self.postfix["info"] = ""
85
+ # remove ETA (either unknown or zero); remove completed bar
86
+ self.bar_format = self.bar_format.replace("<{remaining}", "").replace(
87
+ "|{bar:10}|", " "
88
+ )
89
+ super().close()
90
+
91
+ @property
92
+ def format_dict(self):
93
+ """inject `ncols_desc` to fill the display width (`ncols`)"""
94
+ d = super().format_dict
95
+ ncols = d["ncols"] or 80
96
+ # assumes `bar_format` has max one of ("ncols_desc" & "ncols_info")
97
+
98
+ meter = self.format_meter( # type: ignore[call-arg]
99
+ ncols_desc=1, ncols_info=1, **d
100
+ )
101
+ ncols_left = ncols - len(meter) + 1
102
+ ncols_left = max(ncols_left, 0)
103
+ if ncols_left:
104
+ d["ncols_desc"] = d["ncols_info"] = ncols_left
105
+ else:
106
+ # work-around for zero-width description
107
+ d["ncols_desc"] = d["ncols_info"] = 1
108
+ d["prefix"] = ""
109
+ return d
110
+
111
+
112
+ class LFSCallback(Callback):
113
+ """Callback subclass to generate Git/LFS style progress."""
114
+
115
+ def __init__(
116
+ self,
117
+ *args,
118
+ git_progress: Optional[Callable[[GitProgressEvent], None]] = None,
119
+ direction: str = "Downloading",
120
+ **kwargs,
121
+ ):
122
+ super().__init__(*args, **kwargs)
123
+ self.direction = direction
124
+ self.git_progress = git_progress
125
+
126
+ def call(self, *args, **kwargs):
127
+ super().call(*args, **kwargs)
128
+ self._update_git()
129
+
130
+ def _update_git(self):
131
+ if not self.git_progress:
132
+ return
133
+ event = GitProgressEvent(
134
+ phase=f"{self.direction} LFS objects",
135
+ completed=self.value,
136
+ total=self.size,
137
+ )
138
+ self.git_progress(event)
139
+
140
+ def branched(self, path_1: Union[str, BinaryIO], path_2: str, **kwargs):
141
+ if self.git_progress:
142
+ return TqdmCallback(
143
+ tqdm_kwargs={
144
+ "desc": path_1 if isinstance(path_1, str) else path_2,
145
+ "bytes": True,
146
+ },
147
+ tqdm_cls=_Tqdm,
148
+ )
149
+ return DEFAULT_CALLBACK
150
+
151
+ @classmethod
152
+ def as_lfs_callback(
153
+ cls,
154
+ git_progress: Optional[Callable[[GitProgressEvent], None]] = None,
155
+ **kwargs,
156
+ ):
157
+ if git_progress is None:
158
+ return DEFAULT_CALLBACK
159
+ return cls(git_progress=git_progress, **kwargs)
@@ -11,7 +11,10 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def smudge(
14
- storage: "LFSStorage", fobj: BinaryIO, url: Optional[str] = None
14
+ storage: "LFSStorage",
15
+ fobj: BinaryIO,
16
+ url: Optional[str] = None,
17
+ batch_size: Optional[int] = None,
15
18
  ) -> BinaryIO:
16
19
  """Wrap the specified binary IO stream and run LFS smudge if necessary."""
17
20
  reader = io.BufferedReader(fobj) # type: ignore[arg-type]
@@ -20,13 +20,14 @@ class LFSStorage:
20
20
  url: str,
21
21
  objects: Collection[Pointer],
22
22
  progress: Optional[Callable[["GitProgressEvent"], None]] = None,
23
+ batch_size: Optional[int] = None,
23
24
  ):
24
25
  from .client import LFSClient
25
26
 
26
27
  with LFSCallback.as_lfs_callback(progress) as cb:
27
28
  cb.set_size(len(objects))
28
29
  with LFSClient.from_git_url(url) as client:
29
- client.download(self, objects, callback=cb)
30
+ client.download(self, objects, callback=cb, batch_size=batch_size)
30
31
 
31
32
  def oid_to_path(self, oid: str):
32
33
  return os.path.join(self.path, "objects", oid[0:2], oid[2:4], oid)
@@ -40,6 +41,7 @@ class LFSStorage:
40
41
  self,
41
42
  obj: Union[Pointer, str],
42
43
  fetch_url: Optional[str] = None,
44
+ batch_size: Optional[int] = None,
43
45
  **kwargs,
44
46
  ) -> BinaryIO:
45
47
  oid = obj if isinstance(obj, str) else obj.oid
@@ -50,7 +52,7 @@ class LFSStorage:
50
52
  if not fetch_url or not isinstance(obj, Pointer):
51
53
  raise
52
54
  try:
53
- self.fetch(fetch_url, [obj])
55
+ self.fetch(fetch_url, [obj], batch_size=batch_size)
54
56
  except BaseException as exc: # noqa: BLE001
55
57
  raise FileNotFoundError(
56
58
  errno.ENOENT, os.strerror(errno.ENOENT), path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scmrepo
3
- Version: 3.0.0
3
+ Version: 3.2.0
4
4
  Summary: scmrepo
5
5
  Author-email: Iterative <support@dvc.org>
6
6
  License: Apache-2.0
@@ -19,13 +19,12 @@ Requires-Dist: gitpython>3
19
19
  Requires-Dist: dulwich>=0.21.6
20
20
  Requires-Dist: pygit2>=1.14.0
21
21
  Requires-Dist: pygtrie>=2.3.2
22
- Requires-Dist: fsspec>=2024.2.0
22
+ Requires-Dist: fsspec[tqdm]>=2024.2.0
23
23
  Requires-Dist: pathspec>=0.9.0
24
24
  Requires-Dist: asyncssh<3,>=2.13.1
25
25
  Requires-Dist: funcy>=1.14
26
- Requires-Dist: shortuuid>=0.5.0
27
- Requires-Dist: dvc-objects<5,>=4
28
- Requires-Dist: dvc-http>=2.29.0
26
+ Requires-Dist: aiohttp-retry>=2.5.0
27
+ Requires-Dist: tqdm
29
28
  Provides-Extra: tests
30
29
  Requires-Dist: pytest==7.2.0; extra == "tests"
31
30
  Requires-Dist: pytest-sugar==0.9.5; extra == "tests"
@@ -40,6 +39,7 @@ Requires-Dist: paramiko==3.3.1; extra == "tests"
40
39
  Requires-Dist: types-certifi==2021.10.8.3; extra == "tests"
41
40
  Requires-Dist: types-mock==5.1.0.2; extra == "tests"
42
41
  Requires-Dist: types-paramiko==3.4.0.20240120; extra == "tests"
42
+ Requires-Dist: types-tqdm; extra == "tests"
43
43
  Provides-Extra: dev
44
44
  Requires-Dist: scmrepo[tests]; extra == "dev"
45
45
 
@@ -2,13 +2,12 @@ gitpython>3
2
2
  dulwich>=0.21.6
3
3
  pygit2>=1.14.0
4
4
  pygtrie>=2.3.2
5
- fsspec>=2024.2.0
5
+ fsspec[tqdm]>=2024.2.0
6
6
  pathspec>=0.9.0
7
7
  asyncssh<3,>=2.13.1
8
8
  funcy>=1.14
9
- shortuuid>=0.5.0
10
- dvc-objects<5,>=4
11
- dvc-http>=2.29.0
9
+ aiohttp-retry>=2.5.0
10
+ tqdm
12
11
 
13
12
  [dev]
14
13
  scmrepo[tests]
@@ -26,6 +25,7 @@ paramiko==3.3.1
26
25
  types-certifi==2021.10.8.3
27
26
  types-mock==5.1.0.2
28
27
  types-paramiko==3.4.0.20240120
28
+ types-tqdm
29
29
 
30
30
  [tests:python_version < "3.10" and implementation_name != "pypy"]
31
31
  pytest-docker==2.2.0
@@ -1,171 +0,0 @@
1
- import logging
2
- from collections.abc import Iterable
3
- from contextlib import AbstractContextManager
4
- from typing import TYPE_CHECKING, Any, Optional
5
-
6
- import aiohttp
7
- from dvc_http import HTTPFileSystem
8
- from dvc_objects.executors import batch_coros
9
- from dvc_objects.fs import localfs
10
- from dvc_objects.fs.utils import as_atomic
11
- from fsspec.asyn import sync_wrapper
12
- from fsspec.callbacks import DEFAULT_CALLBACK
13
- from funcy import cached_property
14
-
15
- from scmrepo.git.credentials import Credential, CredentialNotFoundError
16
-
17
- from .exceptions import LFSError
18
- from .pointer import Pointer
19
-
20
- if TYPE_CHECKING:
21
- from fsspec.callbacks import Callback
22
-
23
- from .storage import LFSStorage
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- # pylint: disable=abstract-method
29
- class _LFSFileSystem(HTTPFileSystem):
30
- def _prepare_credentials(self, **config):
31
- return {}
32
-
33
-
34
- class LFSClient(AbstractContextManager):
35
- """Naive read-only LFS HTTP client."""
36
-
37
- JSON_CONTENT_TYPE = "application/vnd.git-lfs+json"
38
-
39
- def __init__(
40
- self,
41
- url: str,
42
- git_url: Optional[str] = None,
43
- headers: Optional[dict[str, str]] = None,
44
- ):
45
- """
46
- Args:
47
- url: LFS server URL.
48
- """
49
- self.url = url
50
- self.git_url = git_url
51
- self.headers: dict[str, str] = headers or {}
52
-
53
- def __exit__(self, *args, **kwargs):
54
- self.close()
55
-
56
- @cached_property
57
- def fs(self) -> "_LFSFileSystem":
58
- return _LFSFileSystem()
59
-
60
- @property
61
- def httpfs(self) -> "HTTPFileSystem":
62
- return self.fs.fs
63
-
64
- @property
65
- def loop(self):
66
- return self.httpfs.loop
67
-
68
- @classmethod
69
- def from_git_url(cls, git_url: str) -> "LFSClient":
70
- if git_url.endswith(".git"):
71
- url = f"{git_url}/info/lfs"
72
- else:
73
- url = f"{git_url}.git/info/lfs"
74
- return cls(url, git_url=git_url)
75
-
76
- def close(self):
77
- pass
78
-
79
- def _get_auth(self) -> Optional[aiohttp.BasicAuth]:
80
- try:
81
- creds = Credential(url=self.git_url).fill()
82
- if creds.username and creds.password:
83
- return aiohttp.BasicAuth(creds.username, creds.password)
84
- except CredentialNotFoundError:
85
- pass
86
- return None
87
-
88
- async def _set_session(self) -> aiohttp.ClientSession:
89
- return await self.fs.fs.set_session()
90
-
91
- async def _batch_request(
92
- self,
93
- objects: Iterable[Pointer],
94
- upload: bool = False,
95
- ref: Optional[str] = None,
96
- hash_algo: str = "sha256",
97
- ) -> dict[str, Any]:
98
- """Send LFS API /objects/batch request."""
99
- url = f"{self.url}/objects/batch"
100
- body: dict[str, Any] = {
101
- "operation": "upload" if upload else "download",
102
- "transfers": ["basic"],
103
- "objects": [{"oid": obj.oid, "size": obj.size} for obj in objects],
104
- "hash_algo": hash_algo,
105
- }
106
- if ref:
107
- body["ref"] = [{"name": ref}]
108
- session = await self._set_session()
109
- headers = dict(self.headers)
110
- headers["Accept"] = self.JSON_CONTENT_TYPE
111
- headers["Content-Type"] = self.JSON_CONTENT_TYPE
112
- try:
113
- async with session.post(
114
- url,
115
- headers=headers,
116
- json=body,
117
- raise_for_status=True,
118
- ) as resp:
119
- data = await resp.json()
120
- except aiohttp.ClientResponseError as exc:
121
- if exc.status != 401:
122
- raise
123
- auth = self._get_auth()
124
- if auth is None:
125
- raise
126
- async with session.post(
127
- url,
128
- auth=auth,
129
- headers=headers,
130
- json=body,
131
- raise_for_status=True,
132
- ) as resp:
133
- data = await resp.json()
134
- return data
135
-
136
- async def _download(
137
- self,
138
- storage: "LFSStorage",
139
- objects: Iterable[Pointer],
140
- callback: "Callback" = DEFAULT_CALLBACK,
141
- **kwargs,
142
- ):
143
- async def _get_one(from_path: str, to_path: str, **kwargs):
144
- with as_atomic(localfs, to_path, create_parents=True) as tmp_file:
145
- with callback.branched(from_path, tmp_file) as child:
146
- await self.httpfs._get_file(
147
- from_path, tmp_file, callback=child, **kwargs
148
- ) # pylint: disable=protected-access
149
- callback.relative_update()
150
-
151
- resp_data = await self._batch_request(objects, **kwargs)
152
- if resp_data.get("transfer", "basic") != "basic":
153
- raise LFSError("Unsupported LFS transfer type")
154
- coros = []
155
- for data in resp_data.get("objects", []):
156
- obj = Pointer(data["oid"], data["size"])
157
- download = data.get("actions", {}).get("download", {})
158
- url = download.get("href")
159
- if not url:
160
- logger.debug("No download URL for LFS object '%s'", obj)
161
- continue
162
- headers = download.get("header", {})
163
- to_path = storage.oid_to_path(obj.oid)
164
- coros.append(_get_one(url, to_path, headers=headers))
165
- for result in await batch_coros(
166
- coros, batch_size=self.fs.jobs, return_exceptions=True
167
- ):
168
- if isinstance(result, BaseException):
169
- raise result
170
-
171
- download = sync_wrapper(_download)
@@ -1,52 +0,0 @@
1
- from typing import BinaryIO, Callable, Optional, Union
2
-
3
- from dvc_objects.fs.callbacks import TqdmCallback
4
- from fsspec.callbacks import DEFAULT_CALLBACK, Callback
5
-
6
- from scmrepo.progress import GitProgressEvent
7
-
8
-
9
- class LFSCallback(Callback):
10
- """Callback subclass to generate Git/LFS style progress."""
11
-
12
- def __init__(
13
- self,
14
- *args,
15
- git_progress: Optional[Callable[[GitProgressEvent], None]] = None,
16
- direction: str = "Downloading",
17
- **kwargs,
18
- ):
19
- super().__init__(*args, **kwargs)
20
- self.direction = direction
21
- self.git_progress = git_progress
22
-
23
- def call(self, *args, **kwargs):
24
- super().call(*args, **kwargs)
25
- self._update_git()
26
-
27
- def _update_git(self):
28
- if not self.git_progress:
29
- return
30
- event = GitProgressEvent(
31
- phase=f"{self.direction} LFS objects",
32
- completed=self.value,
33
- total=self.size,
34
- )
35
- self.git_progress(event)
36
-
37
- def branched(self, path_1: Union[str, BinaryIO], path_2: str, **kwargs):
38
- if self.git_progress:
39
- return TqdmCallback(
40
- bytes=True, desc=path_1 if isinstance(path_1, str) else path_2
41
- )
42
- return DEFAULT_CALLBACK
43
-
44
- @classmethod
45
- def as_lfs_callback(
46
- cls,
47
- git_progress: Optional[Callable[[GitProgressEvent], None]] = None,
48
- **kwargs,
49
- ):
50
- if git_progress is None:
51
- return DEFAULT_CALLBACK
52
- return cls(git_progress=git_progress, **kwargs)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes