msgraphfs 0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msgraphfs/__init__.py +1 -0
- msgraphfs/core.py +1331 -0
- msgraphfs-0.1.dist-info/METADATA +250 -0
- msgraphfs-0.1.dist-info/RECORD +7 -0
- msgraphfs-0.1.dist-info/WHEEL +4 -0
- msgraphfs-0.1.dist-info/entry_points.txt +2 -0
- msgraphfs-0.1.dist-info/licenses/LICENSE +21 -0
msgraphfs/core.py
ADDED
|
@@ -0,0 +1,1331 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import mimetypes
|
|
5
|
+
import re
|
|
6
|
+
import weakref
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from authlib.integrations.httpx_client import AsyncOAuth2Client
|
|
10
|
+
from fsspec.asyn import (
|
|
11
|
+
AbstractAsyncStreamedFile,
|
|
12
|
+
AbstractBufferedFile,
|
|
13
|
+
AsyncFileSystem,
|
|
14
|
+
FSTimeoutError,
|
|
15
|
+
sync,
|
|
16
|
+
sync_wrapper,
|
|
17
|
+
)
|
|
18
|
+
from fsspec.utils import tokenize
|
|
19
|
+
from httpx import HTTPStatusError, Response
|
|
20
|
+
from httpx._types import URLTypes
|
|
21
|
+
|
|
22
|
+
HTTPX_RETRYABLE_ERRORS = (
|
|
23
|
+
asyncio.TimeoutError,
|
|
24
|
+
httpx.NetworkError,
|
|
25
|
+
httpx.ProxyError,
|
|
26
|
+
httpx.TimeoutException,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
HTTPX_RETRYABLE_HTTP_STATUS_CODES = (500, 502, 503, 504)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def parse_range_header(range_header):
|
|
36
|
+
# Regular expression to match a range header like 'bytes=0-499'
|
|
37
|
+
range_pattern = r"bytes=(\d+)?-(\d+)?"
|
|
38
|
+
|
|
39
|
+
match = re.match(range_pattern, range_header)
|
|
40
|
+
|
|
41
|
+
if match:
|
|
42
|
+
start = match.group(1)
|
|
43
|
+
start = int(start) if start else None # Convert to int if not None
|
|
44
|
+
end = match.group(2) # Could be None if range is 'bytes=100-'
|
|
45
|
+
end = int(end) if end else None # Convert to int if not None
|
|
46
|
+
return start, end
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError("Invalid Range header format")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def wrap_http_not_found_exceptions(func):
|
|
52
|
+
"""Wrap a function that calls an HTTP request to handle 404 errors."""
|
|
53
|
+
|
|
54
|
+
async def wrapper(*args, **kwargs):
|
|
55
|
+
try:
|
|
56
|
+
return await func(*args, **kwargs)
|
|
57
|
+
except HTTPStatusError as e:
|
|
58
|
+
if e.response.status_code == 404:
|
|
59
|
+
path = e.request.url.path
|
|
60
|
+
if "root:" in path:
|
|
61
|
+
path = path.split("root:")[-1]
|
|
62
|
+
path = path[:-1] if path[-1] == ":" else path
|
|
63
|
+
raise FileNotFoundError(f"File not found: {path}") from e
|
|
64
|
+
raise e
|
|
65
|
+
|
|
66
|
+
return wrapper
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@wrap_http_not_found_exceptions
|
|
70
|
+
async def _http_call_with_retry(func, *, args=(), kwargs=None, retries) -> Response:
|
|
71
|
+
kwargs = kwargs or {}
|
|
72
|
+
retries = 1
|
|
73
|
+
for i in range(retries):
|
|
74
|
+
try:
|
|
75
|
+
response = await func(*args, **kwargs)
|
|
76
|
+
response.raise_for_status()
|
|
77
|
+
return response
|
|
78
|
+
except HTTPX_RETRYABLE_ERRORS as e:
|
|
79
|
+
if i == retries - 1:
|
|
80
|
+
raise e
|
|
81
|
+
_logger.debug("Retryable error: %s", e)
|
|
82
|
+
await asyncio.sleep(min(1.7**i * 0.1, 15))
|
|
83
|
+
continue
|
|
84
|
+
except HTTPStatusError as e:
|
|
85
|
+
if e.response.status_code in HTTPX_RETRYABLE_HTTP_STATUS_CODES:
|
|
86
|
+
if i == retries - 1:
|
|
87
|
+
raise e
|
|
88
|
+
_logger.debug("Retryable HTTP status code: %s", e.response.status_code)
|
|
89
|
+
await asyncio.sleep(min(1.7**i * 0.1, 15))
|
|
90
|
+
continue
|
|
91
|
+
if e.response.status_code != 404:
|
|
92
|
+
_logger.error(
|
|
93
|
+
"HTTP error %s: %s", e.response.status_code, e.response.content
|
|
94
|
+
)
|
|
95
|
+
raise e
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class AbstractMSGraphFS(AsyncFileSystem):
|
|
99
|
+
"""A filesystem that represents microsoft files exposed through the microsoft graph
|
|
100
|
+
API.
|
|
101
|
+
|
|
102
|
+
parameters:
|
|
103
|
+
oauth2_client_params (dict): Parameters for the OAuth2 client to use for
|
|
104
|
+
authentication. see https://docs.authlib.org/en/latest/client/api.html#authlib.integrations.httpx_client.AsyncOAuth2Client
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
retries = 5
|
|
108
|
+
blocksize = 10 * 1024 * 1024 # 10 MB
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
oauth2_client_params: dict,
|
|
113
|
+
**kwargs,
|
|
114
|
+
):
|
|
115
|
+
super_kwargs = kwargs.copy()
|
|
116
|
+
super_kwargs.pop("use_listings_cache", None)
|
|
117
|
+
super_kwargs.pop("listings_expiry_time", None)
|
|
118
|
+
super_kwargs.pop("max_paths", None)
|
|
119
|
+
# passed to fsspec superclass... we don't support directory caching
|
|
120
|
+
super().__init__(**super_kwargs)
|
|
121
|
+
|
|
122
|
+
self.client: AsyncOAuth2Client = AsyncOAuth2Client(
|
|
123
|
+
**oauth2_client_params,
|
|
124
|
+
follow_redirects=True,
|
|
125
|
+
)
|
|
126
|
+
if not self.asynchronous:
|
|
127
|
+
weakref.finalize(self, self.close_http_session, self.client, self.loop)
|
|
128
|
+
self.use_recycle_bin = kwargs.get("use_recycle_bin", False)
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def close_http_session(
|
|
132
|
+
client: AsyncOAuth2Client, loop: asyncio.AbstractEventLoop | None = None
|
|
133
|
+
):
|
|
134
|
+
"""Close the HTTP session."""
|
|
135
|
+
if loop is not None and loop.is_running() and not loop.is_closed():
|
|
136
|
+
try:
|
|
137
|
+
loop = asyncio.get_event_loop()
|
|
138
|
+
loop.create_task(client.aclose())
|
|
139
|
+
return
|
|
140
|
+
except RuntimeError:
|
|
141
|
+
pass
|
|
142
|
+
try:
|
|
143
|
+
sync(loop, client.aclose, timeout=0.1)
|
|
144
|
+
return
|
|
145
|
+
except FSTimeoutError:
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
def _path_to_url(self, path, item_id=None, action=None) -> str:
|
|
149
|
+
"""This method must be implemented by subclasses to convert a path to a valid
|
|
150
|
+
URL to call the Microsoft Graph API for the given path according to the target
|
|
151
|
+
service.
|
|
152
|
+
|
|
153
|
+
(OneDrive, SharePoint, etc.)
|
|
154
|
+
"""
|
|
155
|
+
raise NotImplementedError
|
|
156
|
+
|
|
157
|
+
def _get_path(self, drive_item_info: dict) -> str:
|
|
158
|
+
parent_path = drive_item_info["parentReference"].get("path")
|
|
159
|
+
if not parent_path:
|
|
160
|
+
return "/"
|
|
161
|
+
# remove all the part before the "root:"
|
|
162
|
+
parent_path = parent_path.split("root:")[1]
|
|
163
|
+
if parent_path and not parent_path.startswith("/"):
|
|
164
|
+
parent_path = "/" + parent_path
|
|
165
|
+
return parent_path + "/" + drive_item_info["name"]
|
|
166
|
+
|
|
167
|
+
def _drive_item_info_to_fsspec_info(self, drive_item_info: dict) -> dict:
|
|
168
|
+
"""Convert a drive item info to a fsspec info dictionary.
|
|
169
|
+
|
|
170
|
+
see
|
|
171
|
+
https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
|
|
172
|
+
"""
|
|
173
|
+
_type = "other"
|
|
174
|
+
if drive_item_info.get("folder"):
|
|
175
|
+
_type = "directory"
|
|
176
|
+
elif drive_item_info.get("file"):
|
|
177
|
+
_type = "file"
|
|
178
|
+
data = {
|
|
179
|
+
"name": self._get_path(drive_item_info),
|
|
180
|
+
"size": drive_item_info.get("size", 0),
|
|
181
|
+
"type": _type,
|
|
182
|
+
"item_info": drive_item_info,
|
|
183
|
+
"time": datetime.datetime.fromisoformat(
|
|
184
|
+
drive_item_info.get("createdDateTime", "1970-01-01T00:00:00Z")
|
|
185
|
+
),
|
|
186
|
+
"mtime": datetime.datetime.fromisoformat(
|
|
187
|
+
drive_item_info.get("lastModifiedDateTime", "1970-01-01T00:00:00Z")
|
|
188
|
+
),
|
|
189
|
+
"id": drive_item_info.get("id"),
|
|
190
|
+
}
|
|
191
|
+
if _type == "file":
|
|
192
|
+
data["mimetype"] = drive_item_info.get("file", {}).get("mimeType", "")
|
|
193
|
+
return data
|
|
194
|
+
|
|
195
|
+
async def _get_item_id(self, path: str, throw_on_missing=False) -> str | None:
|
|
196
|
+
"""Get the item ID of a file or directory.
|
|
197
|
+
|
|
198
|
+
Parameters:
|
|
199
|
+
path (str): The path to the file or directory.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
str: The item ID of the file or directory if it exists, otherwise None.
|
|
203
|
+
"""
|
|
204
|
+
url = self._path_to_url(path)
|
|
205
|
+
try:
|
|
206
|
+
response = await self._msgraph_get(url, params={"select": "id"})
|
|
207
|
+
return response.json()["id"]
|
|
208
|
+
except FileNotFoundError:
|
|
209
|
+
if throw_on_missing:
|
|
210
|
+
raise
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
get_item_id = sync_wrapper(_get_item_id)
|
|
214
|
+
|
|
215
|
+
async def _get_item_reference(self, path: str, item_id: str | None = None) -> dict:
|
|
216
|
+
"""Return a dictionary with information about the item reference of the given
|
|
217
|
+
path.
|
|
218
|
+
|
|
219
|
+
This method is useful when you need to get an itemReference to
|
|
220
|
+
use as an argument in other methods. see
|
|
221
|
+
https://docs.microsoft.com/en-us/graph/api/resources/itemreference?view=graph-rest-1.0
|
|
222
|
+
"""
|
|
223
|
+
url = self._path_to_url(path, item_id=item_id)
|
|
224
|
+
response = await self._msgraph_get(
|
|
225
|
+
url,
|
|
226
|
+
params={
|
|
227
|
+
"select": "id,driveId,driveType,name,path,shareId,sharepointIds,siteId"
|
|
228
|
+
},
|
|
229
|
+
)
|
|
230
|
+
return response.json()
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def _guess_type(path: str) -> str:
|
|
234
|
+
return mimetypes.guess_type(path)[0] or "application/octet-stream"
|
|
235
|
+
|
|
236
|
+
################################################
|
|
237
|
+
# Helper methods to call the Microsoft Graph API
|
|
238
|
+
################################################
|
|
239
|
+
async def _call_msgraph(
|
|
240
|
+
self, http_method: str, url: URLTypes, *args, **kwargs
|
|
241
|
+
) -> Response:
|
|
242
|
+
"""Call the Microsoft Graph API."""
|
|
243
|
+
return await _http_call_with_retry(
|
|
244
|
+
self.client.request,
|
|
245
|
+
args=(http_method, url, *args),
|
|
246
|
+
kwargs=kwargs,
|
|
247
|
+
retries=self.retries,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
call_msgraph = sync_wrapper(_call_msgraph)
|
|
251
|
+
|
|
252
|
+
async def _msgraph_get(self, url: URLTypes, *args, **kwargs) -> Response:
|
|
253
|
+
"""Send a GET request to the Microsoft Graph API."""
|
|
254
|
+
return await self._call_msgraph("GET", url, *args, **kwargs)
|
|
255
|
+
|
|
256
|
+
msgraph_get = sync_wrapper(_msgraph_get)
|
|
257
|
+
|
|
258
|
+
async def _msgraph_post(self, url: URLTypes, *args, **kwargs) -> Response:
|
|
259
|
+
"""Send a POST request to the Microsoft Graph API."""
|
|
260
|
+
return await self._call_msgraph("POST", url, *args, **kwargs)
|
|
261
|
+
|
|
262
|
+
msgraph_post = sync_wrapper(_msgraph_post)
|
|
263
|
+
|
|
264
|
+
async def _msgraph_put(self, url: URLTypes, *args, **kwargs) -> Response:
|
|
265
|
+
"""Send a PUT request to the Microsoft Graph API."""
|
|
266
|
+
return await self._call_msgraph("PUT", url, *args, **kwargs)
|
|
267
|
+
|
|
268
|
+
msgraph_put = sync_wrapper(_msgraph_put)
|
|
269
|
+
|
|
270
|
+
async def _msgraph_delete(self, url: URLTypes, *args, **kwargs) -> Response:
|
|
271
|
+
"""Send a DELETE request to the Microsoft Graph API."""
|
|
272
|
+
return await self._call_msgraph("DELETE", url, *args, **kwargs)
|
|
273
|
+
|
|
274
|
+
msgraph_delete = sync_wrapper(_msgraph_delete)
|
|
275
|
+
|
|
276
|
+
async def _msgraph_patch(self, url: URLTypes, *args, **kwargs) -> Response:
|
|
277
|
+
"""Send a PATCH request to the Microsoft Graph API."""
|
|
278
|
+
return await self._call_msgraph("PATCH", url, *args, **kwargs)
|
|
279
|
+
|
|
280
|
+
msgraph_patch = sync_wrapper(_msgraph_patch)
|
|
281
|
+
|
|
282
|
+
################################################
|
|
283
|
+
# Others methods
|
|
284
|
+
################################################
|
|
285
|
+
|
|
286
|
+
async def _get_copy_status(self, url: str) -> dict[str:str]:
|
|
287
|
+
"""Get the status of a copy operation.
|
|
288
|
+
|
|
289
|
+
The response will be a dictionary with the following keys
|
|
290
|
+
"status": The status of the copy operation. Possible values are:
|
|
291
|
+
"completed", "failed", "inProgress", "notStarted" "resource_id":
|
|
292
|
+
The ID of the resource that was copied. "percent_complete": The
|
|
293
|
+
percentage of the copy operation that has completed.
|
|
294
|
+
"""
|
|
295
|
+
response = await httpx.AsyncClient().get(url)
|
|
296
|
+
value = response.json()
|
|
297
|
+
return {
|
|
298
|
+
"status": value.get("status"),
|
|
299
|
+
"resource_id": value.get("resourceId"),
|
|
300
|
+
"percent_complete": value.get("percentageComplete"),
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
get_copy_status = sync_wrapper(_get_copy_status)
|
|
304
|
+
|
|
305
|
+
async def _msggraph_item_copy(
|
|
306
|
+
self, path1: str, path2: str, wait_completion=True, **kwargs
|
|
307
|
+
):
|
|
308
|
+
"""Copy a path to another.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
path1 : str
|
|
313
|
+
Source path
|
|
314
|
+
path2 : str
|
|
315
|
+
Destination path
|
|
316
|
+
wait_completion : bool (=True)
|
|
317
|
+
In microsoft graph API, in many cases the copy action is performed
|
|
318
|
+
asynchronously. The response from the API will only indicate that the
|
|
319
|
+
copy operation was accepted or rejected; If wait_completion is True,
|
|
320
|
+
the method will return only after the copy operation is completed by
|
|
321
|
+
monitoring the status of the copy operation.
|
|
322
|
+
If wait_completion is False, the method will return immediately after the
|
|
323
|
+
call to the Microsoft Graph API with the URL where the status of the copy
|
|
324
|
+
operation can be monitored. You can use this URL to call the method get_copy_status
|
|
325
|
+
to monitor the status of the copy operation. (or _get_copy_status method
|
|
326
|
+
in the case of async running)
|
|
327
|
+
|
|
328
|
+
Note: the status URL does not require authentication to be accessed. It can be
|
|
329
|
+
accessed by anyone who has the URL since it's a temporary URL that is only valid
|
|
330
|
+
for a short period of time. It's particularly useful when you want to monitor the
|
|
331
|
+
status of the copy operation from a different process or machine (for exemple, in
|
|
332
|
+
a web application).
|
|
333
|
+
"""
|
|
334
|
+
source_item_id = await self._get_item_id(path1, throw_on_missing=True)
|
|
335
|
+
url = self._path_to_url(path1, item_id=source_item_id, action="copy")
|
|
336
|
+
path2 = self._strip_protocol(path2)
|
|
337
|
+
parent_path, _file_name = path2.rsplit("/", 1)
|
|
338
|
+
item_reference = await self._get_item_reference(parent_path)
|
|
339
|
+
json = {
|
|
340
|
+
"parentReference": item_reference,
|
|
341
|
+
"name": _file_name,
|
|
342
|
+
}
|
|
343
|
+
response = await self._msgraph_post(url, json=json)
|
|
344
|
+
headers = response.headers
|
|
345
|
+
status_url = headers.get("Location")
|
|
346
|
+
if not wait_completion:
|
|
347
|
+
return status_url
|
|
348
|
+
while True:
|
|
349
|
+
status = await self._get_copy_status(status_url)
|
|
350
|
+
if status["status"] == "completed":
|
|
351
|
+
break
|
|
352
|
+
if status["status"] == "failed":
|
|
353
|
+
raise RuntimeError("Copy operation failed")
|
|
354
|
+
await asyncio.sleep(1)
|
|
355
|
+
|
|
356
|
+
async def __delete_item(self, path: str, item_id: str | None = None, **kwargs):
|
|
357
|
+
item_id = item_id or await self._get_item_id(path, throw_on_missing=True)
|
|
358
|
+
use_recycle_bin = kwargs.get("use_recycle_bin", self.use_recycle_bin)
|
|
359
|
+
if use_recycle_bin:
|
|
360
|
+
url = self._path_to_url(path, item_id=item_id)
|
|
361
|
+
await self._msgraph_delete(url)
|
|
362
|
+
else:
|
|
363
|
+
url = self._path_to_url(path, item_id=item_id, action="permanentDelete")
|
|
364
|
+
await self._msgraph_post(url)
|
|
365
|
+
self.invalidate_cache(path)
|
|
366
|
+
|
|
367
|
+
#############################################################
|
|
368
|
+
# Implement required async methods for the fsspec interface
|
|
369
|
+
#############################################################
|
|
370
|
+
async def _created(self, path: str) -> datetime.datetime:
|
|
371
|
+
return (await self._info(path))["time"]
|
|
372
|
+
|
|
373
|
+
created = sync_wrapper(_created)
|
|
374
|
+
|
|
375
|
+
async def _modified(self, path) -> datetime.datetime:
|
|
376
|
+
return (await self._info(path))["mtime"]
|
|
377
|
+
|
|
378
|
+
modified = sync_wrapper(_modified)
|
|
379
|
+
|
|
380
|
+
async def _exists(self, path: str, **kwargs) -> bool:
|
|
381
|
+
return await self._get_item_id(path) is not None
|
|
382
|
+
|
|
383
|
+
async def _info(
|
|
384
|
+
self, path: str, item_id: str | None = None, expand: str | None = None, **kwargs
|
|
385
|
+
) -> dict:
|
|
386
|
+
"""Get information about a file or directory.
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
path : str
|
|
391
|
+
Path to get information about
|
|
392
|
+
item_id: str
|
|
393
|
+
If given, the item_id will be used instead of the path to get
|
|
394
|
+
information about the given path.
|
|
395
|
+
expand: str
|
|
396
|
+
A string used to expand the properties of the item. see
|
|
397
|
+
https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
|
|
398
|
+
For example, if you want to expand the properties to include the thumbnails,
|
|
399
|
+
you can pass "thumbnails" as the value of the expand parameter.
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
url = self._path_to_url(path, item_id=item_id)
|
|
403
|
+
params = {}
|
|
404
|
+
if expand:
|
|
405
|
+
params = {"expand": expand}
|
|
406
|
+
response = await self._msgraph_get(url, params=params)
|
|
407
|
+
return self._drive_item_info_to_fsspec_info(response.json())
|
|
408
|
+
|
|
409
|
+
async def _ls(
|
|
410
|
+
self,
|
|
411
|
+
path: str,
|
|
412
|
+
detail: bool = True,
|
|
413
|
+
item_id: str | None = None,
|
|
414
|
+
expand: str | None = None,
|
|
415
|
+
**kwargs,
|
|
416
|
+
) -> list[dict | str]:
|
|
417
|
+
"""List files in the given path.
|
|
418
|
+
|
|
419
|
+
Parameters
|
|
420
|
+
----------
|
|
421
|
+
path : str
|
|
422
|
+
Path to list files in
|
|
423
|
+
detail: bool
|
|
424
|
+
if True, gives a list of dictionaries, where each is the same as
|
|
425
|
+
the result of ``info(path)``. If False, gives a list of paths
|
|
426
|
+
(str).
|
|
427
|
+
item_id: str
|
|
428
|
+
If given, the item_id will be used instead of the path to list
|
|
429
|
+
the files in the given path.
|
|
430
|
+
expand: str
|
|
431
|
+
A string used to expand the properties of the item. see
|
|
432
|
+
https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
|
|
433
|
+
For example, if you want to expand the properties to include the thumbnails,
|
|
434
|
+
you can pass "thumbnails" as the value of the expand parameter.
|
|
435
|
+
kwargs: may have additional backend-specific options, such as version
|
|
436
|
+
information
|
|
437
|
+
"""
|
|
438
|
+
url = self._path_to_url(path, item_id=item_id, action="children")
|
|
439
|
+
params = None
|
|
440
|
+
if expand and not detail:
|
|
441
|
+
raise ValueError(
|
|
442
|
+
"The expand parameter can only be used when detail is True"
|
|
443
|
+
)
|
|
444
|
+
if not detail:
|
|
445
|
+
params = {"select": "name,parentReference"}
|
|
446
|
+
if expand:
|
|
447
|
+
params = {"expand": expand}
|
|
448
|
+
response = await self._msgraph_get(url, params=params)
|
|
449
|
+
items = response.json().get("value", [])
|
|
450
|
+
if not items:
|
|
451
|
+
# maybe the path is a file
|
|
452
|
+
try:
|
|
453
|
+
item = await self._info(path, expand=expand, **kwargs)
|
|
454
|
+
if item["type"] == "file":
|
|
455
|
+
items = [item["item_info"]]
|
|
456
|
+
except FileNotFoundError:
|
|
457
|
+
pass
|
|
458
|
+
if detail:
|
|
459
|
+
return [self._drive_item_info_to_fsspec_info(item) for item in items]
|
|
460
|
+
else:
|
|
461
|
+
return [self._get_path(item) for item in items]
|
|
462
|
+
|
|
463
|
+
async def _cat_file(
|
|
464
|
+
self,
|
|
465
|
+
path: str,
|
|
466
|
+
start: int = None,
|
|
467
|
+
end: int = None,
|
|
468
|
+
item_id: str | None = None,
|
|
469
|
+
**kwargs,
|
|
470
|
+
):
|
|
471
|
+
url = self._path_to_url(path, item_id=item_id, action="content")
|
|
472
|
+
headers = kwargs.get("headers", {})
|
|
473
|
+
if start is not None or end is not None:
|
|
474
|
+
range = await self._process_limits(path, start, end)
|
|
475
|
+
# range is expressed as "bytes={start}-{end}"
|
|
476
|
+
# extract start and end values from the range string
|
|
477
|
+
# to know if we are at the end of the file
|
|
478
|
+
rstart, rend = parse_range_header(range)
|
|
479
|
+
if rend is not None:
|
|
480
|
+
size = await self._size(path)
|
|
481
|
+
if rend > size:
|
|
482
|
+
rend = size
|
|
483
|
+
if rstart and rend and (rstart > rend or rstart == rend == size):
|
|
484
|
+
return b""
|
|
485
|
+
headers["Range"] = range
|
|
486
|
+
response = await self._msgraph_get(url, headers=headers)
|
|
487
|
+
return response.content
|
|
488
|
+
|
|
489
|
+
async def _pipe_file(self, path: str, value: bytes, **kwargs):
|
|
490
|
+
async with await self.open_async(path, "wb") as f:
|
|
491
|
+
await f.write(value)
|
|
492
|
+
|
|
493
|
+
async def _get_file(self, rpath: str, lpath: str, **kwargs):
|
|
494
|
+
headers = kwargs.get("headers", {})
|
|
495
|
+
content = await self._cat_file(rpath, **kwargs, headers=headers)
|
|
496
|
+
with open(lpath, "wb") as f:
|
|
497
|
+
f.write(content)
|
|
498
|
+
|
|
499
|
+
async def _put_file(self, lpath: str, rpath: str, **kwargs):
|
|
500
|
+
with open(lpath, "rb") as f:
|
|
501
|
+
data = f.read()
|
|
502
|
+
await self._pipe_file(rpath, data, **kwargs)
|
|
503
|
+
while rpath:
|
|
504
|
+
self.invalidate_cache(rpath)
|
|
505
|
+
rpath = self._parent(rpath)
|
|
506
|
+
|
|
507
|
+
async def _rm_file(self, path: str, item_id: str | None = None, **kwargs):
|
|
508
|
+
if not await self._isfile(path):
|
|
509
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
510
|
+
await self.__delete_item(path, item_id=item_id, **kwargs)
|
|
511
|
+
|
|
512
|
+
async def _copy(
|
|
513
|
+
self,
|
|
514
|
+
path1,
|
|
515
|
+
path2,
|
|
516
|
+
recursive=False,
|
|
517
|
+
on_error=None,
|
|
518
|
+
maxdepth=None,
|
|
519
|
+
batch_size=None,
|
|
520
|
+
wait_completion=True,
|
|
521
|
+
**kwargs,
|
|
522
|
+
):
|
|
523
|
+
if recursive:
|
|
524
|
+
return await self._msggraph_item_copy(
|
|
525
|
+
path1, path2, wait_completion=wait_completion, **kwargs
|
|
526
|
+
)
|
|
527
|
+
return await super()._copy(
|
|
528
|
+
path1,
|
|
529
|
+
path2,
|
|
530
|
+
recursive=recursive,
|
|
531
|
+
on_error=on_error,
|
|
532
|
+
maxdepth=maxdepth,
|
|
533
|
+
batch_size=batch_size,
|
|
534
|
+
wait_completion=wait_completion,
|
|
535
|
+
**kwargs,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
async def _cp_file(self, path1: str, path2: str, wait_completion=True, **kwargs):
|
|
539
|
+
return await self._msggraph_item_copy(
|
|
540
|
+
path1, path2, wait_completion=wait_completion, **kwargs
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
async def _isfile(self, path: str) -> bool:
|
|
544
|
+
url = self._path_to_url(path)
|
|
545
|
+
try:
|
|
546
|
+
response = await self._msgraph_get(url, params={"select": "file"})
|
|
547
|
+
except FileNotFoundError:
|
|
548
|
+
return False
|
|
549
|
+
return response.json().get("file") is not None
|
|
550
|
+
|
|
551
|
+
async def _isdir(self, path: str) -> bool:
|
|
552
|
+
url = self._path_to_url(path)
|
|
553
|
+
try:
|
|
554
|
+
response = await self._msgraph_get(url, params={"select": "folder"})
|
|
555
|
+
except FileNotFoundError:
|
|
556
|
+
return False
|
|
557
|
+
return response.json().get("folder") is not None
|
|
558
|
+
|
|
559
|
+
async def _size(self, path: str) -> int:
|
|
560
|
+
url = self._path_to_url(path)
|
|
561
|
+
response = await self._msgraph_get(url, params={"select": "size"})
|
|
562
|
+
return response.json().get("size", 0)
|
|
563
|
+
|
|
564
|
+
async def _mkdir(self, path, create_parents=True, exist_ok=False, **kwargs) -> str:
|
|
565
|
+
path = self._strip_protocol(path).rstrip("/")
|
|
566
|
+
parent, child = path.rsplit("/", 1)
|
|
567
|
+
parent_id = await self._get_item_id(parent)
|
|
568
|
+
if not parent_id and not create_parents:
|
|
569
|
+
raise FileNotFoundError(f"Parent directory does not exists: {parent}")
|
|
570
|
+
if not parent_id:
|
|
571
|
+
await self._mkdir(parent, create_parents=create_parents)
|
|
572
|
+
parent_id = await self._get_item_id(parent)
|
|
573
|
+
url = self._path_to_url(path, item_id=parent_id, action="children")
|
|
574
|
+
response = await self._msgraph_post(
|
|
575
|
+
url,
|
|
576
|
+
json={
|
|
577
|
+
"name": child,
|
|
578
|
+
"folder": {},
|
|
579
|
+
"@microsoft.graph.conflictBehavior": "fail",
|
|
580
|
+
},
|
|
581
|
+
)
|
|
582
|
+
return response.json()["id"]
|
|
583
|
+
|
|
584
|
+
async def _makedirs(self, path: str, exist_ok: bool = False):
|
|
585
|
+
try:
|
|
586
|
+
await self._mkdir(path, create_parents=True)
|
|
587
|
+
except HTTPStatusError as e:
|
|
588
|
+
if e.response.status_code == 409:
|
|
589
|
+
if not exist_ok:
|
|
590
|
+
raise FileExistsError(f"Directory already exists: {path}") from e
|
|
591
|
+
else:
|
|
592
|
+
raise e
|
|
593
|
+
|
|
594
|
+
async def _rmdir(self, path: str, **kwargs):
|
|
595
|
+
"""Remove a directory if it's empty.
|
|
596
|
+
|
|
597
|
+
Parameters
|
|
598
|
+
----------
|
|
599
|
+
path : str
|
|
600
|
+
Path of the directory to
|
|
601
|
+
|
|
602
|
+
use_recycle_bin : bool
|
|
603
|
+
If specified, the value will be used instead of the default value
|
|
604
|
+
of the use_recycle_bin attribute of the class. If the value is True, the
|
|
605
|
+
directory will be deleted and moved to the recycle bin. If False,
|
|
606
|
+
the directory will be permanently deleted. Default is False.
|
|
607
|
+
"""
|
|
608
|
+
if not await self._isdir(path):
|
|
609
|
+
raise FileNotFoundError(f"Directory not found: {path}")
|
|
610
|
+
if await self._ls(path):
|
|
611
|
+
raise OSError(f"Directory not empty: {path}")
|
|
612
|
+
item_id = await self._get_item_id(path, throw_on_missing=True)
|
|
613
|
+
await self.__delete_item(path, item_id=item_id, **kwargs)
|
|
614
|
+
|
|
615
|
+
rmdir = sync_wrapper(_rmdir) # not into the list of async methods to auto wrap
|
|
616
|
+
|
|
617
|
+
async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
|
|
618
|
+
paths = path
|
|
619
|
+
if not isinstance(paths, list):
|
|
620
|
+
paths = [path]
|
|
621
|
+
for path in paths:
|
|
622
|
+
if not recursive and await self._isdir(path) and await self._ls(path):
|
|
623
|
+
raise OSError(f"Directory not empty: {path}")
|
|
624
|
+
await self.__delete_item(path, **kwargs)
|
|
625
|
+
|
|
626
|
+
async def _mv(self, path1, path2, **kwargs):
|
|
627
|
+
source_item_id = await self._get_item_id(path1, throw_on_missing=True)
|
|
628
|
+
url = self._path_to_url(path1, item_id=source_item_id)
|
|
629
|
+
path2 = self._strip_protocol(path2)
|
|
630
|
+
destination_item_id = await self._get_item_id(path2)
|
|
631
|
+
item_reference = None
|
|
632
|
+
name = None
|
|
633
|
+
if destination_item_id:
|
|
634
|
+
item_reference = await self._get_item_reference(path2)
|
|
635
|
+
else:
|
|
636
|
+
parent_path, name = path2.rsplit("/", 1)
|
|
637
|
+
item_reference = await self._get_item_reference(parent_path)
|
|
638
|
+
json = {
|
|
639
|
+
"parentReference": item_reference,
|
|
640
|
+
}
|
|
641
|
+
if name:
|
|
642
|
+
json["name"] = name
|
|
643
|
+
|
|
644
|
+
await self._msgraph_patch(url, json=json)
|
|
645
|
+
self.invalidate_cache(path1)
|
|
646
|
+
|
|
647
|
+
mv = sync_wrapper(_mv)
|
|
648
|
+
|
|
649
|
+
def _open(
|
|
650
|
+
self,
|
|
651
|
+
path,
|
|
652
|
+
mode="rb",
|
|
653
|
+
block_size="default",
|
|
654
|
+
cache_type="readahead",
|
|
655
|
+
autocommit=True,
|
|
656
|
+
size=None,
|
|
657
|
+
cache_options=None,
|
|
658
|
+
item_id=None,
|
|
659
|
+
**kwargs,
|
|
660
|
+
):
|
|
661
|
+
"""Open a file for reading or writing.
|
|
662
|
+
|
|
663
|
+
Parameters
|
|
664
|
+
----------
|
|
665
|
+
path: string
|
|
666
|
+
Path of file
|
|
667
|
+
mode: string
|
|
668
|
+
One of 'r', 'w', 'a', 'rb', 'wb', or 'ab'. These have the same meaning
|
|
669
|
+
as they do for the built-in `open` function.
|
|
670
|
+
block_size: int
|
|
671
|
+
Size of data-node blocks if reading
|
|
672
|
+
fill_cache: bool
|
|
673
|
+
If seeking to new a part of the file beyond the current buffer,
|
|
674
|
+
with this True, the buffer will be filled between the sections to
|
|
675
|
+
best support random access. When reading only a few specific chunks
|
|
676
|
+
out of a file, performance may be better if False.
|
|
677
|
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
|
678
|
+
Caching policy in read mode. See the definitions in ``core``.
|
|
679
|
+
cache_options : dict
|
|
680
|
+
Additional options passed to the constructor for the cache specified
|
|
681
|
+
by `cache_type`.
|
|
682
|
+
item_id: str
|
|
683
|
+
If given, the item_id will be used instead of the path to open the file.
|
|
684
|
+
kwargs: dict-like
|
|
685
|
+
Additional parameters used for s3 methods. Typically used for
|
|
686
|
+
ServerSideEncryption.
|
|
687
|
+
"""
|
|
688
|
+
if ("r" in mode or "a" in mode) and not self.isfile(path):
|
|
689
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
690
|
+
if "a" in mode and not size:
|
|
691
|
+
size = self.size(path)
|
|
692
|
+
return MSGraphBuffredFile(
|
|
693
|
+
fs=self,
|
|
694
|
+
path=path,
|
|
695
|
+
mode=mode,
|
|
696
|
+
block_size=block_size,
|
|
697
|
+
autocommit=autocommit,
|
|
698
|
+
cache_type=cache_type,
|
|
699
|
+
cache_options=cache_options,
|
|
700
|
+
size=size,
|
|
701
|
+
item_id=item_id or self.get_item_id(path),
|
|
702
|
+
**kwargs,
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
async def open_async(self, path, mode="rb", **kwargs):
|
|
706
|
+
if ("r" in mode or "a" in mode) and not await self._isfile(path):
|
|
707
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
708
|
+
if "b" not in mode or kwargs.get("compression"):
|
|
709
|
+
raise ValueError
|
|
710
|
+
size = None
|
|
711
|
+
item_id = kwargs.get("item_id") or await self._get_item_id(
|
|
712
|
+
path, throw_on_missing=False
|
|
713
|
+
)
|
|
714
|
+
if "rb" in mode or "a" in mode:
|
|
715
|
+
# we must provice the size of the file to the constructor
|
|
716
|
+
# to avoid the need to call the info method from within the constructor
|
|
717
|
+
# since in case of async running, the _info method is a coroutine
|
|
718
|
+
# and it's not allowed to call a coroutine from a constructor. If the
|
|
719
|
+
# size is provided, the info method will not be called from the constructor
|
|
720
|
+
info = await self._info(path)
|
|
721
|
+
size = info["size"]
|
|
722
|
+
return MSGrpahStreamedFile(
|
|
723
|
+
self, path, mode, size=size, item_id=item_id, **kwargs
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
async def _touch(self, path, truncate=True, item_id=None, **kwargs):
|
|
727
|
+
# if the file exists, update the last modified date time
|
|
728
|
+
# otherwise, create an empty file"""
|
|
729
|
+
item_id = item_id or await self._get_item_id(path)
|
|
730
|
+
if item_id and not truncate:
|
|
731
|
+
if truncate:
|
|
732
|
+
url = self._path_to_url(path, item_id=item_id, action="content")
|
|
733
|
+
await self._msgraph_put(
|
|
734
|
+
url,
|
|
735
|
+
content=b"",
|
|
736
|
+
headers={"Content-Type": "application/octet-stream"},
|
|
737
|
+
)
|
|
738
|
+
else:
|
|
739
|
+
url = self._path_to_url(path, item_id=item_id)
|
|
740
|
+
await self._msgraph_patch(
|
|
741
|
+
url, json={"lastModifiedDateTime": datetime.now().isoformat()}
|
|
742
|
+
)
|
|
743
|
+
else:
|
|
744
|
+
parent_path, file_name = path.rsplit("/", 1)
|
|
745
|
+
parent_id = await self._get_item_id(parent_path, throw_on_missing=True)
|
|
746
|
+
item_id = f"{parent_id}:/{file_name}:"
|
|
747
|
+
url = self._path_to_url(path, item_id=item_id, action="content")
|
|
748
|
+
headers = {"Content-Type": self._guess_type(path)}
|
|
749
|
+
await self._msgraph_put(url, content=b"", headers=headers)
|
|
750
|
+
self.invalidate_cache(path)
|
|
751
|
+
|
|
752
|
+
touch = sync_wrapper(_touch)
|
|
753
|
+
|
|
754
|
+
async def _checksum(self, path, refresh=False):
|
|
755
|
+
"""Unique value for current version of file.
|
|
756
|
+
|
|
757
|
+
If the checksum is the same from one moment to another, the contents
|
|
758
|
+
are guaranteed to be the same. If the checksum changes, the contents
|
|
759
|
+
*might* have changed.
|
|
760
|
+
|
|
761
|
+
Parameters
|
|
762
|
+
----------
|
|
763
|
+
path : string/bytes
|
|
764
|
+
path of file to get checksum for
|
|
765
|
+
refresh : bool (=False)
|
|
766
|
+
if False, look in local cache for file details first
|
|
767
|
+
"""
|
|
768
|
+
|
|
769
|
+
info = await self._info(path, refresh=refresh)
|
|
770
|
+
|
|
771
|
+
if info["type"] != "directory":
|
|
772
|
+
return int(info["ETag"].strip('"').split("-")[0], 16)
|
|
773
|
+
else:
|
|
774
|
+
return int(tokenize(info), 16)
|
|
775
|
+
|
|
776
|
+
checksum = sync_wrapper(_checksum)
|
|
777
|
+
|
|
778
|
+
########################################################
|
|
779
|
+
# Additional methods specific to the Microsoft Graph API
|
|
780
|
+
########################################################
|
|
781
|
+
async def _get_content(self, path, item_id=None, params=None) -> bytes:
|
|
782
|
+
"""Get the item content.
|
|
783
|
+
|
|
784
|
+
Can set format in params to precise the output format (useful to convert docx to pdf)
|
|
785
|
+
|
|
786
|
+
Parameters:
|
|
787
|
+
item_id (str): The ID of the item to get the content of.
|
|
788
|
+
params (dict): Additional parameters to pass to the request.
|
|
789
|
+
|
|
790
|
+
Returns:
|
|
791
|
+
bytes: stream of content
|
|
792
|
+
"""
|
|
793
|
+
params = params or {}
|
|
794
|
+
url = self._path_to_url(path, item_id=item_id, action="content")
|
|
795
|
+
response = await self._msgraph_get(url, **params)
|
|
796
|
+
return response.content
|
|
797
|
+
|
|
798
|
+
get_content = sync_wrapper(_get_content)
|
|
799
|
+
|
|
800
|
+
async def _preview(self, path, item_id: str | None = None) -> str:
|
|
801
|
+
if not await self._isfile(path):
|
|
802
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
803
|
+
url = self._path_to_url(path, item_id=item_id, action="preview")
|
|
804
|
+
response = await self._msgraph_post(url)
|
|
805
|
+
return response.json().get("getUrl", [])
|
|
806
|
+
|
|
807
|
+
preview = sync_wrapper(_preview)
|
|
808
|
+
|
|
809
|
+
async def _checkout(self, path: str, item_id: str | None = None):
|
|
810
|
+
"""Check out a file to prevent others from editing the document, and prevent
|
|
811
|
+
your changes from being visible until the documented is checked in.
|
|
812
|
+
|
|
813
|
+
Parameters
|
|
814
|
+
----------
|
|
815
|
+
path : str
|
|
816
|
+
Path of the file to check out
|
|
817
|
+
item_id: str
|
|
818
|
+
If given, the item_id will be used instead of the path to check
|
|
819
|
+
out the file.
|
|
820
|
+
"""
|
|
821
|
+
if not await self._isfile(path):
|
|
822
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
823
|
+
url = self._path_to_url(path, item_id=item_id, action="checkout")
|
|
824
|
+
await self._msgraph_post(url)
|
|
825
|
+
|
|
826
|
+
checkout = sync_wrapper(_checkout)
|
|
827
|
+
|
|
828
|
+
async def _checkin(self, path: str, comment: str, item_id: str | None = None):
|
|
829
|
+
"""Check in a checked out file, which makes the version of the document
|
|
830
|
+
available to others.
|
|
831
|
+
|
|
832
|
+
Parameters
|
|
833
|
+
----------
|
|
834
|
+
path : str
|
|
835
|
+
Path of the file to check in
|
|
836
|
+
comment : str
|
|
837
|
+
Comment to add to the check-in
|
|
838
|
+
item_id: str
|
|
839
|
+
If given, the item_id will be used instead of the path to check
|
|
840
|
+
in the file.
|
|
841
|
+
"""
|
|
842
|
+
if not await self._isfile(path):
|
|
843
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
844
|
+
url = self._path_to_url(path, item_id=item_id, action="checkin")
|
|
845
|
+
await self._msgraph_post(url, json={"comment": comment})
|
|
846
|
+
|
|
847
|
+
checkin = sync_wrapper(_checkin)
|
|
848
|
+
|
|
849
|
+
async def _get_versions(self, path: str, item_id: str | None = None) -> list[dict]:
|
|
850
|
+
"""Get the versions of a file.
|
|
851
|
+
|
|
852
|
+
Parameters
|
|
853
|
+
----------
|
|
854
|
+
path : str
|
|
855
|
+
Path of the file to get the versions of
|
|
856
|
+
item_id: str
|
|
857
|
+
If given, the item_id will be used instead of the path to get
|
|
858
|
+
the versions of the file.
|
|
859
|
+
"""
|
|
860
|
+
if not await self._isfile(path):
|
|
861
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
862
|
+
url = self._path_to_url(path, item_id=item_id, action="versions")
|
|
863
|
+
response = await self._msgraph_get(url)
|
|
864
|
+
return response.json().get("value", [])
|
|
865
|
+
|
|
866
|
+
get_versions = sync_wrapper(_get_versions)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
class MSGDriveFS(AbstractMSGraphFS):
|
|
870
|
+
"""A filesystem that represents a SharePoint site dirve as a filesystem.
|
|
871
|
+
|
|
872
|
+
parameters:
|
|
873
|
+
drive_id (str): The ID of the SharePoint drive.
|
|
874
|
+
site_name (str): The name of the SharePoint site (optional, only used to list the recycle bin items).
|
|
875
|
+
use_recycle_bin: bool (=False)
|
|
876
|
+
If True, when a file is deleted, it will be moved to the recycle bin.
|
|
877
|
+
If False, the file will be permanently deleted. Default is False.
|
|
878
|
+
oauth2_client_params (dict): Parameters for the OAuth2 client to use for
|
|
879
|
+
authentication. see https://docs.authlib.org/en/latest/client/api.html#authlib.integrations.httpx_client.AsyncOAuth2Client
|
|
880
|
+
"""
|
|
881
|
+
|
|
882
|
+
protocol = ["msgd"]
|
|
883
|
+
|
|
884
|
+
def __init__(
|
|
885
|
+
self,
|
|
886
|
+
drive_id: str,
|
|
887
|
+
oauth2_client_params: dict,
|
|
888
|
+
site_name: str | None = None,
|
|
889
|
+
**kwargs,
|
|
890
|
+
):
|
|
891
|
+
super().__init__(oauth2_client_params=oauth2_client_params, **kwargs)
|
|
892
|
+
self.site_name: str = site_name
|
|
893
|
+
self.drive_id: str = drive_id
|
|
894
|
+
self.drive_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}"
|
|
895
|
+
|
|
896
|
+
def _path_to_url(self, path, item_id=None, action=None) -> str:
|
|
897
|
+
action = action and f"/{action}" if action else ""
|
|
898
|
+
path = self._strip_protocol(path).rstrip("/")
|
|
899
|
+
if path and not path.startswith("/"):
|
|
900
|
+
path = "/" + path
|
|
901
|
+
if path:
|
|
902
|
+
path = f":{path}:"
|
|
903
|
+
if item_id:
|
|
904
|
+
return f"{self.drive_url}/items/{item_id}{action}"
|
|
905
|
+
|
|
906
|
+
return f"{self.drive_url}/root{path}{action}"
|
|
907
|
+
|
|
908
|
+
async def _get_site_id(self) -> str:
|
|
909
|
+
url = f"https://graph.microsoft.com/v1.0/sites?search=¼{self.site_name}"
|
|
910
|
+
response = await self._msgraph_get(url)
|
|
911
|
+
return response.json()["value"][0]["id"]
|
|
912
|
+
|
|
913
|
+
async def _get_item_reference(self, path: str, item_id: str | None = None) -> dict:
|
|
914
|
+
item_reference = await super()._get_item_reference(path, item_id=item_id)
|
|
915
|
+
return {
|
|
916
|
+
"driveId": self.drive_id,
|
|
917
|
+
"id": item_reference["id"],
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
async def _get_recycle_bin_items(self) -> list[dict]:
|
|
921
|
+
"""Get the items in the recycle bin. (Beta!!)
|
|
922
|
+
|
|
923
|
+
Returns:
|
|
924
|
+
list[dict]: A list of dictionaries with information about the items in the recycle bin.
|
|
925
|
+
|
|
926
|
+
see https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
|
|
927
|
+
"""
|
|
928
|
+
site_id = await self._get_site_id()
|
|
929
|
+
url = f"https://graph.microsoft.com/beta/sites/{site_id}/recycleBin/items"
|
|
930
|
+
response = await self._msgraph_get(url)
|
|
931
|
+
return response.json().get("value", [])
|
|
932
|
+
|
|
933
|
+
get_recycle_bin_items = sync_wrapper(_get_recycle_bin_items)
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
class AsyncStreamedFileMixin:
|
|
937
|
+
"""Mixin for streamed file-like objects using async iterators."""
|
|
938
|
+
|
|
939
|
+
def _init__mixin(self, **kwargs):
|
|
940
|
+
self.path = self.fs._strip_protocol(self.path)
|
|
941
|
+
block_size = kwargs.get("block_size", "default")
|
|
942
|
+
if block_size == "default":
|
|
943
|
+
block_size = None
|
|
944
|
+
self.blocksize = block_size if block_size is not None else self.fs.blocksize
|
|
945
|
+
if "w" in self.mode or "a" in self.mode:
|
|
946
|
+
# block_size must bet a multiple of 320 KiB
|
|
947
|
+
if self.blocksize % (320 * 1024) != 0:
|
|
948
|
+
raise ValueError("block_size must be a multiple of 320 KiB")
|
|
949
|
+
self._item_id = kwargs.get("item_id")
|
|
950
|
+
self._append_mode = "a" in self.mode and self.item_id is not None
|
|
951
|
+
if self._append_mode:
|
|
952
|
+
self.loc = kwargs.get("size", 0)
|
|
953
|
+
self._reset_session_info()
|
|
954
|
+
|
|
955
|
+
@property
|
|
956
|
+
async def item_id(self):
|
|
957
|
+
if self._item_id is None:
|
|
958
|
+
self._item_id = await self.fs._get_item_id(self.path)
|
|
959
|
+
return self._item_id
|
|
960
|
+
|
|
961
|
+
async def _create_upload_session(self) -> tuple[str, datetime.datetime]:
|
|
962
|
+
"""Create a new upload session for the file.
|
|
963
|
+
|
|
964
|
+
Returns:
|
|
965
|
+
tuple[str, datetime.datetime]: The URL of the upload session and the expiration date time.
|
|
966
|
+
|
|
967
|
+
see https://learn.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0
|
|
968
|
+
"""
|
|
969
|
+
item_id = await self.item_id
|
|
970
|
+
if not item_id:
|
|
971
|
+
parent_path, file_name = self.path.rsplit("/", 1)
|
|
972
|
+
parent_id = await self.fs._get_item_id(parent_path)
|
|
973
|
+
item_id = f"{parent_id}:/{file_name}:"
|
|
974
|
+
url = self.fs._path_to_url(
|
|
975
|
+
self.path, item_id=item_id, action="createUploadSession"
|
|
976
|
+
)
|
|
977
|
+
response = await self.fs._msgraph_post(
|
|
978
|
+
url,
|
|
979
|
+
json={
|
|
980
|
+
"@microsoft.graph.conflictBehavior": "replace",
|
|
981
|
+
# We don't know the size of the file. Explicit commit is required.
|
|
982
|
+
"deferCommit": True,
|
|
983
|
+
},
|
|
984
|
+
)
|
|
985
|
+
json = response.json()
|
|
986
|
+
expiration_dt = datetime.datetime.fromisoformat(json["expirationDateTime"])
|
|
987
|
+
return json["uploadUrl"], expiration_dt
|
|
988
|
+
|
|
989
|
+
@property
|
|
990
|
+
def _is_upload_session_expired(self) -> bool:
|
|
991
|
+
"""Check if the current upload session is expired."""
|
|
992
|
+
if not self._upload_expiration_dt:
|
|
993
|
+
return True
|
|
994
|
+
now = datetime.datetime.now(datetime.UTC)
|
|
995
|
+
return now > self._upload_expiration_dt
|
|
996
|
+
|
|
997
|
+
def _reset_session_info(self):
|
|
998
|
+
"""Reset the upload session information."""
|
|
999
|
+
self._upload_session_url = None
|
|
1000
|
+
self._upload_expiration_dt = None
|
|
1001
|
+
self._chunk_start_pos = 0
|
|
1002
|
+
self._remaining_bytes = None
|
|
1003
|
+
self._write_called = False
|
|
1004
|
+
|
|
1005
|
+
async def _upload_content_at_once(self, data):
|
|
1006
|
+
headers = self.kwargs.get("headers", {})
|
|
1007
|
+
if "content-type" not in headers:
|
|
1008
|
+
headers["content-type"] = self.fs._guess_type(self.path)
|
|
1009
|
+
item_id = await self.item_id
|
|
1010
|
+
if not item_id:
|
|
1011
|
+
parent_path, file_name = self.path.rsplit("/", 1)
|
|
1012
|
+
parent_id = await self.fs._get_item_id(parent_path, throw_on_missing=True)
|
|
1013
|
+
item_id = f"{parent_id}:/{file_name}:"
|
|
1014
|
+
url = self.fs._path_to_url(self.path, item_id=item_id, action="content")
|
|
1015
|
+
await self.fs._msgraph_put(url, content=data, headers=headers)
|
|
1016
|
+
self.fs.invalidate_cache(self.path)
|
|
1017
|
+
|
|
1018
|
+
async def _abort_upload_session(self):
|
|
1019
|
+
"""Abort the current upload session."""
|
|
1020
|
+
if self._upload_session_url and not self._is_upload_session_expired:
|
|
1021
|
+
await self.fs._msgraph_delete(self._upload_session_url)
|
|
1022
|
+
self._reset_session_info()
|
|
1023
|
+
|
|
1024
|
+
async def _commit_upload_session(self):
|
|
1025
|
+
"""Commit the current upload session."""
|
|
1026
|
+
if self._upload_session_url and self._is_upload_session_expired:
|
|
1027
|
+
raise RuntimeError("The upload session has expired.")
|
|
1028
|
+
if self._upload_session_url:
|
|
1029
|
+
await self.fs._msgraph_post(self._upload_session_url)
|
|
1030
|
+
self._reset_session_info()
|
|
1031
|
+
|
|
1032
|
+
async def _commit(self):
|
|
1033
|
+
_logger.debug("Commit %s" % self)
|
|
1034
|
+
# Avoid resetting a file that has been opened in append mode
|
|
1035
|
+
# and has not been written to.
|
|
1036
|
+
append_no_write = self._append_mode and not self._write_called
|
|
1037
|
+
if self.tell() == 0:
|
|
1038
|
+
if self.buffer is not None:
|
|
1039
|
+
_logger.debug("Empty file committed %s" % self)
|
|
1040
|
+
await self._abort_upload_session()
|
|
1041
|
+
await self.fs._touch(self.path, **self.kwargs)
|
|
1042
|
+
elif not self._upload_session_url:
|
|
1043
|
+
if self.buffer is not None:
|
|
1044
|
+
if not append_no_write:
|
|
1045
|
+
_logger.debug("One-shot upload of %s" % self)
|
|
1046
|
+
self.buffer.seek(0)
|
|
1047
|
+
data = self.buffer.read()
|
|
1048
|
+
await self._upload_content_at_once(data)
|
|
1049
|
+
else:
|
|
1050
|
+
raise RuntimeError
|
|
1051
|
+
|
|
1052
|
+
if append_no_write:
|
|
1053
|
+
# if not written, we must abort the upload session otherwise the file
|
|
1054
|
+
# will be truncated
|
|
1055
|
+
await self._abort_upload_session()
|
|
1056
|
+
else:
|
|
1057
|
+
await self._commit_upload_session()
|
|
1058
|
+
|
|
1059
|
+
# complex cache invalidation, since file's appearance can cause several
|
|
1060
|
+
# directories
|
|
1061
|
+
parts = self.path.split("/")
|
|
1062
|
+
path = parts[0]
|
|
1063
|
+
for p in parts[1:]:
|
|
1064
|
+
if path in self.fs.dircache and not [
|
|
1065
|
+
True for f in self.fs.dircache[path] if f["name"] == path + "/" + p
|
|
1066
|
+
]:
|
|
1067
|
+
self.fs.invalidate_cache(path)
|
|
1068
|
+
path = path + "/" + p
|
|
1069
|
+
pass
|
|
1070
|
+
|
|
1071
|
+
commit = sync_wrapper(_commit)
|
|
1072
|
+
|
|
1073
|
+
async def _discard(self):
|
|
1074
|
+
await self._abort_upload_session()
|
|
1075
|
+
|
|
1076
|
+
discard = sync_wrapper(_discard)
|
|
1077
|
+
|
|
1078
|
+
async def _init_write_append_mode(self):
|
|
1079
|
+
"""Add the initial content of the file to the buffer."""
|
|
1080
|
+
if self._append_mode and not self._write_called:
|
|
1081
|
+
# If the file is opened in append mode, we must get the current content
|
|
1082
|
+
# of the file and add it to the buffer.
|
|
1083
|
+
content = await self.fs._cat_file(self.path, item_id=self._item_id)
|
|
1084
|
+
self.buffer.write(content)
|
|
1085
|
+
self.loc = len(content)
|
|
1086
|
+
|
|
1087
|
+
########################################################
|
|
1088
|
+
## AbstractBufferedFile methods to implement or override
|
|
1089
|
+
########################################################
|
|
1090
|
+
|
|
1091
|
+
async def _upload_chunk(self, final=False):
|
|
1092
|
+
"""Write one part of a multi-block file upload.
|
|
1093
|
+
|
|
1094
|
+
Parameters
|
|
1095
|
+
==========
|
|
1096
|
+
final: bool
|
|
1097
|
+
This is the last block, so should complete file, if
|
|
1098
|
+
self.autocommit is True.
|
|
1099
|
+
"""
|
|
1100
|
+
if self.autocommit and final and self.tell() < self.blocksize:
|
|
1101
|
+
# only happens when closing small file, use on-shot PUT
|
|
1102
|
+
chunk_to_write = False
|
|
1103
|
+
else:
|
|
1104
|
+
self.buffer.seek(0)
|
|
1105
|
+
if self._remaining_bytes:
|
|
1106
|
+
chunk_to_write = self._remaining_bytes + self.buffer.read(
|
|
1107
|
+
self.blocksize - len(self._remaining_bytes)
|
|
1108
|
+
)
|
|
1109
|
+
self._remaining_bytes = None
|
|
1110
|
+
else:
|
|
1111
|
+
chunk_to_write = self.buffer.read(self.blocksize)
|
|
1112
|
+
# we must write into chunk of the same block size. We therefore need to
|
|
1113
|
+
# buffer the remaining bytes if the buffer is not a multiple of the block size
|
|
1114
|
+
while chunk_to_write:
|
|
1115
|
+
chunk_size = len(chunk_to_write)
|
|
1116
|
+
if chunk_size < self.blocksize and not final:
|
|
1117
|
+
self._remaining_bytes = chunk_to_write
|
|
1118
|
+
break
|
|
1119
|
+
|
|
1120
|
+
headers = {
|
|
1121
|
+
"Content-Length": str(chunk_size),
|
|
1122
|
+
"Content-Range": f"bytes {self._chunk_start_pos}-{self._chunk_start_pos + chunk_size - 1}/*",
|
|
1123
|
+
}
|
|
1124
|
+
response = await self.fs._msgraph_put(
|
|
1125
|
+
self._upload_session_url,
|
|
1126
|
+
content=chunk_to_write,
|
|
1127
|
+
headers=headers,
|
|
1128
|
+
)
|
|
1129
|
+
self._upload_expiration_dt = datetime.datetime.fromisoformat(
|
|
1130
|
+
response.json()["expirationDateTime"]
|
|
1131
|
+
)
|
|
1132
|
+
self._chunk_start_pos += chunk_size
|
|
1133
|
+
chunk_to_write = self.buffer.read(self.blocksize)
|
|
1134
|
+
|
|
1135
|
+
if self.autocommit and final:
|
|
1136
|
+
await self._commit()
|
|
1137
|
+
return not final
|
|
1138
|
+
|
|
1139
|
+
async def _initiate_upload(self):
|
|
1140
|
+
if self.autocommit and self.tell() < self.blocksize:
|
|
1141
|
+
# only happens when closing small file, use on-shot PUT
|
|
1142
|
+
return
|
|
1143
|
+
# If the file to be uploaded is larger than the block size, then we need to
|
|
1144
|
+
# create an upload session to upload the file in chunks.
|
|
1145
|
+
self._chunk_start_pos = 0
|
|
1146
|
+
(
|
|
1147
|
+
self._upload_session_url,
|
|
1148
|
+
self._upload_expiration_dt,
|
|
1149
|
+
) = await self._create_upload_session()
|
|
1150
|
+
|
|
1151
|
+
async def _fetch_range(self, start, end) -> bytes:
|
|
1152
|
+
"""Get the specified set of bytes from remote."""
|
|
1153
|
+
item_id = await self.fs._get_item_id(self.path)
|
|
1154
|
+
return await self.fs._cat_file(self.path, start=start, end=end, item_id=item_id)
|
|
1155
|
+
|
|
1156
|
+
@property
|
|
1157
|
+
def loop(self):
|
|
1158
|
+
return self.fs.loop
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
class MSGraphBuffredFile(AsyncStreamedFileMixin, AbstractBufferedFile):
|
|
1162
|
+
"""A file-like object representing a file in a SharePoint drive.
|
|
1163
|
+
|
|
1164
|
+
Parameters
|
|
1165
|
+
----------
|
|
1166
|
+
fs: MSGDriveFS
|
|
1167
|
+
The filesystem this file is part of.
|
|
1168
|
+
path: str
|
|
1169
|
+
The path to the file.
|
|
1170
|
+
mode: str
|
|
1171
|
+
The mode to open the file in.
|
|
1172
|
+
One of 'rb', 'wb', 'ab'. These have the same meaning
|
|
1173
|
+
as they do for the built-in `open` function.
|
|
1174
|
+
block_size: int
|
|
1175
|
+
Buffer size for reading or writing, 'default' for class default
|
|
1176
|
+
autocommit: bool
|
|
1177
|
+
Whether to write to final destination; may only impact what
|
|
1178
|
+
happens when file is being closed.
|
|
1179
|
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
|
1180
|
+
Caching policy in read mode. See the definitions in ``core``.
|
|
1181
|
+
cache_options : dict
|
|
1182
|
+
Additional options passed to the constructor for the cache specified
|
|
1183
|
+
by `cache_type`.
|
|
1184
|
+
size: int
|
|
1185
|
+
If given and in read mode, suppressed having to look up the file size
|
|
1186
|
+
kwargs:
|
|
1187
|
+
Gets stored as self.kwargs
|
|
1188
|
+
"""
|
|
1189
|
+
|
|
1190
|
+
def __init__(
|
|
1191
|
+
self,
|
|
1192
|
+
fs: MSGDriveFS,
|
|
1193
|
+
path: str,
|
|
1194
|
+
mode: str = "rb",
|
|
1195
|
+
block_size: int | None = None,
|
|
1196
|
+
autocommit: bool = True,
|
|
1197
|
+
cache_type: str = "readahead",
|
|
1198
|
+
cache_options: dict | None = None,
|
|
1199
|
+
size: int | None = None,
|
|
1200
|
+
**kwargs,
|
|
1201
|
+
):
|
|
1202
|
+
AbstractBufferedFile.__init__(
|
|
1203
|
+
self,
|
|
1204
|
+
fs,
|
|
1205
|
+
path,
|
|
1206
|
+
mode,
|
|
1207
|
+
block_size,
|
|
1208
|
+
autocommit,
|
|
1209
|
+
cache_type,
|
|
1210
|
+
cache_options,
|
|
1211
|
+
size,
|
|
1212
|
+
**kwargs,
|
|
1213
|
+
)
|
|
1214
|
+
kwargs_mixin = kwargs.copy()
|
|
1215
|
+
kwargs_mixin.update(
|
|
1216
|
+
{
|
|
1217
|
+
"fs": fs,
|
|
1218
|
+
"path": path,
|
|
1219
|
+
"mode": mode,
|
|
1220
|
+
"block_size": block_size,
|
|
1221
|
+
"autocommit": autocommit,
|
|
1222
|
+
"cache_type": cache_type,
|
|
1223
|
+
"cache_options": cache_options,
|
|
1224
|
+
"size": size,
|
|
1225
|
+
}
|
|
1226
|
+
)
|
|
1227
|
+
|
|
1228
|
+
AsyncStreamedFileMixin._init__mixin(self, **kwargs_mixin)
|
|
1229
|
+
|
|
1230
|
+
def write(self, data):
|
|
1231
|
+
if not self._write_called:
|
|
1232
|
+
self._init_write_append_mode()
|
|
1233
|
+
self._write_called = True
|
|
1234
|
+
return super().write(data)
|
|
1235
|
+
|
|
1236
|
+
_init_write_append_mode = sync_wrapper(
|
|
1237
|
+
AsyncStreamedFileMixin._init_write_append_mode
|
|
1238
|
+
)
|
|
1239
|
+
|
|
1240
|
+
########################################################
|
|
1241
|
+
## AbstractBufferedFile methods to implement or override
|
|
1242
|
+
########################################################
|
|
1243
|
+
_upload_chunk = sync_wrapper(AsyncStreamedFileMixin._upload_chunk)
|
|
1244
|
+
_initiate_upload = sync_wrapper(AsyncStreamedFileMixin._initiate_upload)
|
|
1245
|
+
_fetch_range = sync_wrapper(AsyncStreamedFileMixin._fetch_range)
|
|
1246
|
+
|
|
1247
|
+
|
|
1248
|
+
class MSGrpahStreamedFile(AsyncStreamedFileMixin, AbstractAsyncStreamedFile):
|
|
1249
|
+
"""A file-like object representing a file in a SharePoint drive.
|
|
1250
|
+
|
|
1251
|
+
Parameters
|
|
1252
|
+
----------
|
|
1253
|
+
fs: MSGDriveFS
|
|
1254
|
+
The filesystem this file is part of.
|
|
1255
|
+
path: str
|
|
1256
|
+
The path to the file.
|
|
1257
|
+
mode: str
|
|
1258
|
+
The mode to open the file in.
|
|
1259
|
+
One of 'rb', 'wb', 'ab'. These have the same meaning
|
|
1260
|
+
as they do for the built-in `open` function.
|
|
1261
|
+
block_size: int
|
|
1262
|
+
Buffer size for reading or writing, 'default' for class default
|
|
1263
|
+
autocommit: bool
|
|
1264
|
+
Whether to write to final destination; may only impact what
|
|
1265
|
+
happens when file is being closed.
|
|
1266
|
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
|
1267
|
+
Caching policy in read mode. See the definitions in ``core``.
|
|
1268
|
+
cache_options : dict
|
|
1269
|
+
Additional options passed to the constructor for the cache specified
|
|
1270
|
+
by `cache_type`.
|
|
1271
|
+
size: int
|
|
1272
|
+
If given and in read mode, suppressed having to look up the file size
|
|
1273
|
+
kwargs:
|
|
1274
|
+
Gets stored as self.kwargs
|
|
1275
|
+
"""
|
|
1276
|
+
|
|
1277
|
+
def __init__(
|
|
1278
|
+
self,
|
|
1279
|
+
fs: MSGDriveFS,
|
|
1280
|
+
path: str,
|
|
1281
|
+
mode: str = "rb",
|
|
1282
|
+
block_size: int | None = None,
|
|
1283
|
+
autocommit: bool = True,
|
|
1284
|
+
cache_type: str = "readahead",
|
|
1285
|
+
cache_options: dict | None = None,
|
|
1286
|
+
size: int | None = None,
|
|
1287
|
+
**kwargs,
|
|
1288
|
+
):
|
|
1289
|
+
AbstractAsyncStreamedFile.__init__(
|
|
1290
|
+
self,
|
|
1291
|
+
fs,
|
|
1292
|
+
path,
|
|
1293
|
+
mode,
|
|
1294
|
+
block_size,
|
|
1295
|
+
autocommit,
|
|
1296
|
+
cache_type,
|
|
1297
|
+
cache_options,
|
|
1298
|
+
size,
|
|
1299
|
+
**kwargs,
|
|
1300
|
+
)
|
|
1301
|
+
kwargs_mixin = kwargs.copy()
|
|
1302
|
+
kwargs_mixin.update(
|
|
1303
|
+
{
|
|
1304
|
+
"fs": fs,
|
|
1305
|
+
"path": path,
|
|
1306
|
+
"mode": mode,
|
|
1307
|
+
"block_size": block_size,
|
|
1308
|
+
"autocommit": autocommit,
|
|
1309
|
+
"cache_type": cache_type,
|
|
1310
|
+
"cache_options": cache_options,
|
|
1311
|
+
"size": size,
|
|
1312
|
+
}
|
|
1313
|
+
)
|
|
1314
|
+
|
|
1315
|
+
AsyncStreamedFileMixin._init__mixin(self, **kwargs_mixin)
|
|
1316
|
+
|
|
1317
|
+
async def write(self, data):
|
|
1318
|
+
if not self._write_called:
|
|
1319
|
+
await self._init_write_append_mode()
|
|
1320
|
+
self._write_called = True
|
|
1321
|
+
return await super().write(data)
|
|
1322
|
+
|
|
1323
|
+
async def readinto(self, b):
|
|
1324
|
+
"""Mirrors builtin file's readinto method.
|
|
1325
|
+
|
|
1326
|
+
https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
|
|
1327
|
+
"""
|
|
1328
|
+
out = memoryview(b).cast("B")
|
|
1329
|
+
data = await self.read(out.nbytes)
|
|
1330
|
+
out[: len(data)] = data
|
|
1331
|
+
return len(data)
|