arcfs-fsspec 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcfs_fsspec-0.1.7/PKG-INFO +11 -0
- arcfs_fsspec-0.1.7/pyproject.toml +26 -0
- arcfs_fsspec-0.1.7/setup.cfg +4 -0
- arcfs_fsspec-0.1.7/src/arcfs/__init__.py +0 -0
- arcfs_fsspec-0.1.7/src/arcfs/async_lfs_file.py +187 -0
- arcfs_fsspec-0.1.7/src/arcfs/fs.py +605 -0
- arcfs_fsspec-0.1.7/src/arcfs/gitlab_client.py +1173 -0
- arcfs_fsspec-0.1.7/src/arcfs/transactions.py +156 -0
- arcfs_fsspec-0.1.7/src/arcfs/utils.py +112 -0
- arcfs_fsspec-0.1.7/src/arcfs_fsspec.egg-info/PKG-INFO +11 -0
- arcfs_fsspec-0.1.7/src/arcfs_fsspec.egg-info/SOURCES.txt +13 -0
- arcfs_fsspec-0.1.7/src/arcfs_fsspec.egg-info/dependency_links.txt +1 -0
- arcfs_fsspec-0.1.7/src/arcfs_fsspec.egg-info/requires.txt +7 -0
- arcfs_fsspec-0.1.7/src/arcfs_fsspec.egg-info/top_level.txt +1 -0
- arcfs_fsspec-0.1.7/tests/test_gl_arc_fs_unit.py +484 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arcfs-fsspec
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: GitLab ARC filesystem backend for fsspec
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: aiofiles
|
|
7
|
+
Requires-Dist: aiohttp
|
|
8
|
+
Requires-Dist: fsspec
|
|
9
|
+
Provides-Extra: test
|
|
10
|
+
Requires-Dist: pytest; extra == "test"
|
|
11
|
+
Requires-Dist: pytest-asyncio; extra == "test"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "arcfs-fsspec"
|
|
7
|
+
version = "0.1.7"
|
|
8
|
+
description = "GitLab ARC filesystem backend for fsspec"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"aiofiles",
|
|
12
|
+
"aiohttp",
|
|
13
|
+
"fsspec",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
test = [
|
|
18
|
+
"pytest",
|
|
19
|
+
"pytest-asyncio",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[tool.setuptools]
|
|
23
|
+
package-dir = {"" = "src"}
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.packages.find]
|
|
26
|
+
where = ["src"]
|
|
File without changes
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""async_lfs_file.py
|
|
2
|
+
|
|
3
|
+
Async streamed file used by GitLabARCFileSystem.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import io
|
|
9
|
+
from hashlib import sha256
|
|
10
|
+
|
|
11
|
+
import aiofiles
|
|
12
|
+
from fsspec.asyn import AbstractAsyncStreamedFile
|
|
13
|
+
|
|
14
|
+
from .transactions import commit_lfs_transaction
|
|
15
|
+
|
|
16
|
+
tempfile = aiofiles.tempfile
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AsyncLFSFile(AbstractAsyncStreamedFile):
|
|
20
|
+
"""
|
|
21
|
+
Async fsspec streamed file backed by a temporary file and Git LFS commits.
|
|
22
|
+
|
|
23
|
+
Reads lazily download the GitLab file into a temporary file. Writes update
|
|
24
|
+
the temporary file and commit changed content through the LFS transaction
|
|
25
|
+
workflow when the context manager exits successfully.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, fs, path, token, repo_id, ref, mode="rb", **kwargs):
|
|
29
|
+
"""
|
|
30
|
+
Create an async streamed file that reads from GitLab and writes via LFS.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
fs: Owning ``GitLabARCFileSystem`` instance.
|
|
34
|
+
path: Repository-internal file path.
|
|
35
|
+
token: GitLab token used for LFS upload authentication.
|
|
36
|
+
repo_id: Numeric GitLab project id.
|
|
37
|
+
ref: Branch, tag, or commit SHA to read from or base writes on.
|
|
38
|
+
mode: File mode such as ``"rb"`` or ``"wb"``.
|
|
39
|
+
**kwargs: Additional fsspec streamed-file arguments.
|
|
40
|
+
"""
|
|
41
|
+
super().__init__(fs=fs, path=path, mode=mode, **kwargs)
|
|
42
|
+
self.path = path
|
|
43
|
+
self.token = token
|
|
44
|
+
self.repo_id = repo_id
|
|
45
|
+
self.ref = ref
|
|
46
|
+
self.mode = mode
|
|
47
|
+
|
|
48
|
+
self._tmp = None
|
|
49
|
+
self._shasum = sha256()
|
|
50
|
+
self._changed = False
|
|
51
|
+
self._downloaded = False
|
|
52
|
+
self.fs = fs
|
|
53
|
+
|
|
54
|
+
async def _ensure_tmp(self):
|
|
55
|
+
"""
|
|
56
|
+
Create the temporary backing file if it does not already exist.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
None.
|
|
60
|
+
"""
|
|
61
|
+
if self._tmp is None:
|
|
62
|
+
self._tmp = await tempfile.NamedTemporaryFile(mode="w+b", delete=True)
|
|
63
|
+
|
|
64
|
+
async def __aenter__(self):
|
|
65
|
+
"""
|
|
66
|
+
Enter the async context manager and prepare the backing file.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
This ``AsyncLFSFile`` instance, ready for async reads or writes.
|
|
70
|
+
"""
|
|
71
|
+
await self._ensure_tmp()
|
|
72
|
+
if "r" in self.mode and not self._downloaded:
|
|
73
|
+
await self._download_from_gitlab()
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
77
|
+
"""
|
|
78
|
+
Exit the async context manager, committing changed data when successful.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
exc_type: Exception type raised inside the context, or ``None``.
|
|
82
|
+
exc: Exception instance raised inside the context, or ``None``.
|
|
83
|
+
tb: Traceback for the exception, or ``None``.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
None.
|
|
87
|
+
"""
|
|
88
|
+
if exc_type is None:
|
|
89
|
+
await self._commit()
|
|
90
|
+
if self._tmp:
|
|
91
|
+
await self._tmp.close()
|
|
92
|
+
self._tmp = None
|
|
93
|
+
self.closed = True
|
|
94
|
+
|
|
95
|
+
async def _download_from_gitlab(self):
|
|
96
|
+
"""
|
|
97
|
+
Download the remote GitLab file into the temporary backing file.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
None.
|
|
101
|
+
"""
|
|
102
|
+
ref = self.ref or await self.fs.client.get_default_branch(self.repo_id)
|
|
103
|
+
|
|
104
|
+
await self._ensure_tmp()
|
|
105
|
+
async for chunk in self.fs.client.stream_file(
|
|
106
|
+
repo_id=self.repo_id,
|
|
107
|
+
path=self.path,
|
|
108
|
+
ref=ref,
|
|
109
|
+
):
|
|
110
|
+
await self._tmp.write(chunk)
|
|
111
|
+
|
|
112
|
+
await self._tmp.seek(0)
|
|
113
|
+
self._downloaded = True
|
|
114
|
+
|
|
115
|
+
async def read(self, length=-1):
|
|
116
|
+
"""
|
|
117
|
+
Read bytes from the temporary file, downloading first when needed.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
length: Maximum number of bytes to read, or ``-1`` for the rest of
|
|
121
|
+
the file.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Bytes read from the file.
|
|
125
|
+
"""
|
|
126
|
+
await self._ensure_tmp()
|
|
127
|
+
if "r" in self.mode and not self._downloaded:
|
|
128
|
+
await self._download_from_gitlab()
|
|
129
|
+
return await self._tmp.read(length)
|
|
130
|
+
|
|
131
|
+
async def write(self, data):
|
|
132
|
+
"""
|
|
133
|
+
Write bytes to the temporary file and update the upload checksum.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
data: ``bytes`` or ``str`` data to write. Strings are encoded as
|
|
137
|
+
UTF-8 bytes before writing.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Number of bytes written, as returned by the temporary file.
|
|
141
|
+
"""
|
|
142
|
+
await self._ensure_tmp()
|
|
143
|
+
if isinstance(data, str):
|
|
144
|
+
data = data.encode()
|
|
145
|
+
self._changed = True
|
|
146
|
+
self._shasum.update(data)
|
|
147
|
+
return await self._tmp.write(data)
|
|
148
|
+
|
|
149
|
+
async def _commit(self, feature_branch_prefix: str = "run_results"):
|
|
150
|
+
"""
|
|
151
|
+
Commit changed temporary-file content through the Git LFS workflow.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
feature_branch_prefix: Prefix used for the generated feature branch.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
None.
|
|
158
|
+
"""
|
|
159
|
+
if not self._changed:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
await self._ensure_tmp()
|
|
163
|
+
await self._tmp.seek(0, io.SEEK_END)
|
|
164
|
+
size = await self._tmp.tell()
|
|
165
|
+
await self._tmp.seek(0)
|
|
166
|
+
sha = self._shasum.hexdigest()
|
|
167
|
+
|
|
168
|
+
repo = await self.fs.client.get_project_by_id(self.repo_id)
|
|
169
|
+
if not repo:
|
|
170
|
+
raise FileNotFoundError(f"Project id {self.repo_id} not found")
|
|
171
|
+
|
|
172
|
+
await commit_lfs_transaction(
|
|
173
|
+
client=self.fs.client,
|
|
174
|
+
token=str(self.token or ""),
|
|
175
|
+
repo=repo,
|
|
176
|
+
base_branch=self.ref,
|
|
177
|
+
final_path=self.path,
|
|
178
|
+
sha=sha,
|
|
179
|
+
size=size,
|
|
180
|
+
data_stream=self._tmp,
|
|
181
|
+
feature_branch_prefix=feature_branch_prefix,
|
|
182
|
+
tmp_pointer_name=True,
|
|
183
|
+
create_mr=True,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if hasattr(self.fs, "_invalidate_after_write"):
|
|
187
|
+
self.fs._invalidate_after_write(repo=repo, inside_path=self.path)
|