msgraphfs 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msgraphfs/core.py ADDED
@@ -0,0 +1,1331 @@
1
+ import asyncio
2
+ import datetime
3
+ import logging
4
+ import mimetypes
5
+ import re
6
+ import weakref
7
+
8
+ import httpx
9
+ from authlib.integrations.httpx_client import AsyncOAuth2Client
10
+ from fsspec.asyn import (
11
+ AbstractAsyncStreamedFile,
12
+ AbstractBufferedFile,
13
+ AsyncFileSystem,
14
+ FSTimeoutError,
15
+ sync,
16
+ sync_wrapper,
17
+ )
18
+ from fsspec.utils import tokenize
19
+ from httpx import HTTPStatusError, Response
20
+ from httpx._types import URLTypes
21
+
22
+ HTTPX_RETRYABLE_ERRORS = (
23
+ asyncio.TimeoutError,
24
+ httpx.NetworkError,
25
+ httpx.ProxyError,
26
+ httpx.TimeoutException,
27
+ )
28
+
29
+ HTTPX_RETRYABLE_HTTP_STATUS_CODES = (500, 502, 503, 504)
30
+
31
+
32
+ _logger = logging.getLogger(__name__)
33
+
34
+
35
+ def parse_range_header(range_header):
36
+ # Regular expression to match a range header like 'bytes=0-499'
37
+ range_pattern = r"bytes=(\d+)?-(\d+)?"
38
+
39
+ match = re.match(range_pattern, range_header)
40
+
41
+ if match:
42
+ start = match.group(1)
43
+ start = int(start) if start else None # Convert to int if not None
44
+ end = match.group(2) # Could be None if range is 'bytes=100-'
45
+ end = int(end) if end else None # Convert to int if not None
46
+ return start, end
47
+ else:
48
+ raise ValueError("Invalid Range header format")
49
+
50
+
51
+ def wrap_http_not_found_exceptions(func):
52
+ """Wrap a function that calls an HTTP request to handle 404 errors."""
53
+
54
+ async def wrapper(*args, **kwargs):
55
+ try:
56
+ return await func(*args, **kwargs)
57
+ except HTTPStatusError as e:
58
+ if e.response.status_code == 404:
59
+ path = e.request.url.path
60
+ if "root:" in path:
61
+ path = path.split("root:")[-1]
62
+ path = path[:-1] if path[-1] == ":" else path
63
+ raise FileNotFoundError(f"File not found: {path}") from e
64
+ raise e
65
+
66
+ return wrapper
67
+
68
+
69
+ @wrap_http_not_found_exceptions
70
+ async def _http_call_with_retry(func, *, args=(), kwargs=None, retries) -> Response:
71
+ kwargs = kwargs or {}
72
+ retries = 1
73
+ for i in range(retries):
74
+ try:
75
+ response = await func(*args, **kwargs)
76
+ response.raise_for_status()
77
+ return response
78
+ except HTTPX_RETRYABLE_ERRORS as e:
79
+ if i == retries - 1:
80
+ raise e
81
+ _logger.debug("Retryable error: %s", e)
82
+ await asyncio.sleep(min(1.7**i * 0.1, 15))
83
+ continue
84
+ except HTTPStatusError as e:
85
+ if e.response.status_code in HTTPX_RETRYABLE_HTTP_STATUS_CODES:
86
+ if i == retries - 1:
87
+ raise e
88
+ _logger.debug("Retryable HTTP status code: %s", e.response.status_code)
89
+ await asyncio.sleep(min(1.7**i * 0.1, 15))
90
+ continue
91
+ if e.response.status_code != 404:
92
+ _logger.error(
93
+ "HTTP error %s: %s", e.response.status_code, e.response.content
94
+ )
95
+ raise e
96
+
97
+
98
+ class AbstractMSGraphFS(AsyncFileSystem):
99
+ """A filesystem that represents microsoft files exposed through the microsoft graph
100
+ API.
101
+
102
+ parameters:
103
+ oauth2_client_params (dict): Parameters for the OAuth2 client to use for
104
+ authentication. see https://docs.authlib.org/en/latest/client/api.html#authlib.integrations.httpx_client.AsyncOAuth2Client
105
+ """
106
+
107
+ retries = 5
108
+ blocksize = 10 * 1024 * 1024 # 10 MB
109
+
110
+ def __init__(
111
+ self,
112
+ oauth2_client_params: dict,
113
+ **kwargs,
114
+ ):
115
+ super_kwargs = kwargs.copy()
116
+ super_kwargs.pop("use_listings_cache", None)
117
+ super_kwargs.pop("listings_expiry_time", None)
118
+ super_kwargs.pop("max_paths", None)
119
+ # passed to fsspec superclass... we don't support directory caching
120
+ super().__init__(**super_kwargs)
121
+
122
+ self.client: AsyncOAuth2Client = AsyncOAuth2Client(
123
+ **oauth2_client_params,
124
+ follow_redirects=True,
125
+ )
126
+ if not self.asynchronous:
127
+ weakref.finalize(self, self.close_http_session, self.client, self.loop)
128
+ self.use_recycle_bin = kwargs.get("use_recycle_bin", False)
129
+
130
+ @staticmethod
131
+ def close_http_session(
132
+ client: AsyncOAuth2Client, loop: asyncio.AbstractEventLoop | None = None
133
+ ):
134
+ """Close the HTTP session."""
135
+ if loop is not None and loop.is_running() and not loop.is_closed():
136
+ try:
137
+ loop = asyncio.get_event_loop()
138
+ loop.create_task(client.aclose())
139
+ return
140
+ except RuntimeError:
141
+ pass
142
+ try:
143
+ sync(loop, client.aclose, timeout=0.1)
144
+ return
145
+ except FSTimeoutError:
146
+ pass
147
+
148
+ def _path_to_url(self, path, item_id=None, action=None) -> str:
149
+ """This method must be implemented by subclasses to convert a path to a valid
150
+ URL to call the Microsoft Graph API for the given path according to the target
151
+ service.
152
+
153
+ (OneDrive, SharePoint, etc.)
154
+ """
155
+ raise NotImplementedError
156
+
157
+ def _get_path(self, drive_item_info: dict) -> str:
158
+ parent_path = drive_item_info["parentReference"].get("path")
159
+ if not parent_path:
160
+ return "/"
161
+ # remove all the part before the "root:"
162
+ parent_path = parent_path.split("root:")[1]
163
+ if parent_path and not parent_path.startswith("/"):
164
+ parent_path = "/" + parent_path
165
+ return parent_path + "/" + drive_item_info["name"]
166
+
167
+ def _drive_item_info_to_fsspec_info(self, drive_item_info: dict) -> dict:
168
+ """Convert a drive item info to a fsspec info dictionary.
169
+
170
+ see
171
+ https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
172
+ """
173
+ _type = "other"
174
+ if drive_item_info.get("folder"):
175
+ _type = "directory"
176
+ elif drive_item_info.get("file"):
177
+ _type = "file"
178
+ data = {
179
+ "name": self._get_path(drive_item_info),
180
+ "size": drive_item_info.get("size", 0),
181
+ "type": _type,
182
+ "item_info": drive_item_info,
183
+ "time": datetime.datetime.fromisoformat(
184
+ drive_item_info.get("createdDateTime", "1970-01-01T00:00:00Z")
185
+ ),
186
+ "mtime": datetime.datetime.fromisoformat(
187
+ drive_item_info.get("lastModifiedDateTime", "1970-01-01T00:00:00Z")
188
+ ),
189
+ "id": drive_item_info.get("id"),
190
+ }
191
+ if _type == "file":
192
+ data["mimetype"] = drive_item_info.get("file", {}).get("mimeType", "")
193
+ return data
194
+
195
+ async def _get_item_id(self, path: str, throw_on_missing=False) -> str | None:
196
+ """Get the item ID of a file or directory.
197
+
198
+ Parameters:
199
+ path (str): The path to the file or directory.
200
+
201
+ Returns:
202
+ str: The item ID of the file or directory if it exists, otherwise None.
203
+ """
204
+ url = self._path_to_url(path)
205
+ try:
206
+ response = await self._msgraph_get(url, params={"select": "id"})
207
+ return response.json()["id"]
208
+ except FileNotFoundError:
209
+ if throw_on_missing:
210
+ raise
211
+ return None
212
+
213
+ get_item_id = sync_wrapper(_get_item_id)
214
+
215
+ async def _get_item_reference(self, path: str, item_id: str | None = None) -> dict:
216
+ """Return a dictionary with information about the item reference of the given
217
+ path.
218
+
219
+ This method is useful when you need to get an itemReference to
220
+ use as an argument in other methods. see
221
+ https://docs.microsoft.com/en-us/graph/api/resources/itemreference?view=graph-rest-1.0
222
+ """
223
+ url = self._path_to_url(path, item_id=item_id)
224
+ response = await self._msgraph_get(
225
+ url,
226
+ params={
227
+ "select": "id,driveId,driveType,name,path,shareId,sharepointIds,siteId"
228
+ },
229
+ )
230
+ return response.json()
231
+
232
+ @staticmethod
233
+ def _guess_type(path: str) -> str:
234
+ return mimetypes.guess_type(path)[0] or "application/octet-stream"
235
+
236
+ ################################################
237
+ # Helper methods to call the Microsoft Graph API
238
+ ################################################
239
+ async def _call_msgraph(
240
+ self, http_method: str, url: URLTypes, *args, **kwargs
241
+ ) -> Response:
242
+ """Call the Microsoft Graph API."""
243
+ return await _http_call_with_retry(
244
+ self.client.request,
245
+ args=(http_method, url, *args),
246
+ kwargs=kwargs,
247
+ retries=self.retries,
248
+ )
249
+
250
+ call_msgraph = sync_wrapper(_call_msgraph)
251
+
252
+ async def _msgraph_get(self, url: URLTypes, *args, **kwargs) -> Response:
253
+ """Send a GET request to the Microsoft Graph API."""
254
+ return await self._call_msgraph("GET", url, *args, **kwargs)
255
+
256
+ msgraph_get = sync_wrapper(_msgraph_get)
257
+
258
+ async def _msgraph_post(self, url: URLTypes, *args, **kwargs) -> Response:
259
+ """Send a POST request to the Microsoft Graph API."""
260
+ return await self._call_msgraph("POST", url, *args, **kwargs)
261
+
262
+ msgraph_post = sync_wrapper(_msgraph_post)
263
+
264
+ async def _msgraph_put(self, url: URLTypes, *args, **kwargs) -> Response:
265
+ """Send a PUT request to the Microsoft Graph API."""
266
+ return await self._call_msgraph("PUT", url, *args, **kwargs)
267
+
268
+ msgraph_put = sync_wrapper(_msgraph_put)
269
+
270
+ async def _msgraph_delete(self, url: URLTypes, *args, **kwargs) -> Response:
271
+ """Send a DELETE request to the Microsoft Graph API."""
272
+ return await self._call_msgraph("DELETE", url, *args, **kwargs)
273
+
274
+ msgraph_delete = sync_wrapper(_msgraph_delete)
275
+
276
+ async def _msgraph_patch(self, url: URLTypes, *args, **kwargs) -> Response:
277
+ """Send a PATCH request to the Microsoft Graph API."""
278
+ return await self._call_msgraph("PATCH", url, *args, **kwargs)
279
+
280
+ msgraph_patch = sync_wrapper(_msgraph_patch)
281
+
282
+ ################################################
283
+ # Others methods
284
+ ################################################
285
+
286
+ async def _get_copy_status(self, url: str) -> dict[str:str]:
287
+ """Get the status of a copy operation.
288
+
289
+ The response will be a dictionary with the following keys
290
+ "status": The status of the copy operation. Possible values are:
291
+ "completed", "failed", "inProgress", "notStarted" "resource_id":
292
+ The ID of the resource that was copied. "percent_complete": The
293
+ percentage of the copy operation that has completed.
294
+ """
295
+ response = await httpx.AsyncClient().get(url)
296
+ value = response.json()
297
+ return {
298
+ "status": value.get("status"),
299
+ "resource_id": value.get("resourceId"),
300
+ "percent_complete": value.get("percentageComplete"),
301
+ }
302
+
303
+ get_copy_status = sync_wrapper(_get_copy_status)
304
+
305
+ async def _msggraph_item_copy(
306
+ self, path1: str, path2: str, wait_completion=True, **kwargs
307
+ ):
308
+ """Copy a path to another.
309
+
310
+ Parameters
311
+ ----------
312
+ path1 : str
313
+ Source path
314
+ path2 : str
315
+ Destination path
316
+ wait_completion : bool (=True)
317
+ In microsoft graph API, in many cases the copy action is performed
318
+ asynchronously. The response from the API will only indicate that the
319
+ copy operation was accepted or rejected; If wait_completion is True,
320
+ the method will return only after the copy operation is completed by
321
+ monitoring the status of the copy operation.
322
+ If wait_completion is False, the method will return immediately after the
323
+ call to the Microsoft Graph API with the URL where the status of the copy
324
+ operation can be monitored. You can use this URL to call the method get_copy_status
325
+ to monitor the status of the copy operation. (or _get_copy_status method
326
+ in the case of async running)
327
+
328
+ Note: the status URL does not require authentication to be accessed. It can be
329
+ accessed by anyone who has the URL since it's a temporary URL that is only valid
330
+ for a short period of time. It's particularly useful when you want to monitor the
331
+ status of the copy operation from a different process or machine (for exemple, in
332
+ a web application).
333
+ """
334
+ source_item_id = await self._get_item_id(path1, throw_on_missing=True)
335
+ url = self._path_to_url(path1, item_id=source_item_id, action="copy")
336
+ path2 = self._strip_protocol(path2)
337
+ parent_path, _file_name = path2.rsplit("/", 1)
338
+ item_reference = await self._get_item_reference(parent_path)
339
+ json = {
340
+ "parentReference": item_reference,
341
+ "name": _file_name,
342
+ }
343
+ response = await self._msgraph_post(url, json=json)
344
+ headers = response.headers
345
+ status_url = headers.get("Location")
346
+ if not wait_completion:
347
+ return status_url
348
+ while True:
349
+ status = await self._get_copy_status(status_url)
350
+ if status["status"] == "completed":
351
+ break
352
+ if status["status"] == "failed":
353
+ raise RuntimeError("Copy operation failed")
354
+ await asyncio.sleep(1)
355
+
356
+ async def __delete_item(self, path: str, item_id: str | None = None, **kwargs):
357
+ item_id = item_id or await self._get_item_id(path, throw_on_missing=True)
358
+ use_recycle_bin = kwargs.get("use_recycle_bin", self.use_recycle_bin)
359
+ if use_recycle_bin:
360
+ url = self._path_to_url(path, item_id=item_id)
361
+ await self._msgraph_delete(url)
362
+ else:
363
+ url = self._path_to_url(path, item_id=item_id, action="permanentDelete")
364
+ await self._msgraph_post(url)
365
+ self.invalidate_cache(path)
366
+
367
+ #############################################################
368
+ # Implement required async methods for the fsspec interface
369
+ #############################################################
370
+ async def _created(self, path: str) -> datetime.datetime:
371
+ return (await self._info(path))["time"]
372
+
373
+ created = sync_wrapper(_created)
374
+
375
+ async def _modified(self, path) -> datetime.datetime:
376
+ return (await self._info(path))["mtime"]
377
+
378
+ modified = sync_wrapper(_modified)
379
+
380
+ async def _exists(self, path: str, **kwargs) -> bool:
381
+ return await self._get_item_id(path) is not None
382
+
383
+ async def _info(
384
+ self, path: str, item_id: str | None = None, expand: str | None = None, **kwargs
385
+ ) -> dict:
386
+ """Get information about a file or directory.
387
+
388
+ Parameters
389
+ ----------
390
+ path : str
391
+ Path to get information about
392
+ item_id: str
393
+ If given, the item_id will be used instead of the path to get
394
+ information about the given path.
395
+ expand: str
396
+ A string used to expand the properties of the item. see
397
+ https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
398
+ For example, if you want to expand the properties to include the thumbnails,
399
+ you can pass "thumbnails" as the value of the expand parameter.
400
+ """
401
+
402
+ url = self._path_to_url(path, item_id=item_id)
403
+ params = {}
404
+ if expand:
405
+ params = {"expand": expand}
406
+ response = await self._msgraph_get(url, params=params)
407
+ return self._drive_item_info_to_fsspec_info(response.json())
408
+
409
+ async def _ls(
410
+ self,
411
+ path: str,
412
+ detail: bool = True,
413
+ item_id: str | None = None,
414
+ expand: str | None = None,
415
+ **kwargs,
416
+ ) -> list[dict | str]:
417
+ """List files in the given path.
418
+
419
+ Parameters
420
+ ----------
421
+ path : str
422
+ Path to list files in
423
+ detail: bool
424
+ if True, gives a list of dictionaries, where each is the same as
425
+ the result of ``info(path)``. If False, gives a list of paths
426
+ (str).
427
+ item_id: str
428
+ If given, the item_id will be used instead of the path to list
429
+ the files in the given path.
430
+ expand: str
431
+ A string used to expand the properties of the item. see
432
+ https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
433
+ For example, if you want to expand the properties to include the thumbnails,
434
+ you can pass "thumbnails" as the value of the expand parameter.
435
+ kwargs: may have additional backend-specific options, such as version
436
+ information
437
+ """
438
+ url = self._path_to_url(path, item_id=item_id, action="children")
439
+ params = None
440
+ if expand and not detail:
441
+ raise ValueError(
442
+ "The expand parameter can only be used when detail is True"
443
+ )
444
+ if not detail:
445
+ params = {"select": "name,parentReference"}
446
+ if expand:
447
+ params = {"expand": expand}
448
+ response = await self._msgraph_get(url, params=params)
449
+ items = response.json().get("value", [])
450
+ if not items:
451
+ # maybe the path is a file
452
+ try:
453
+ item = await self._info(path, expand=expand, **kwargs)
454
+ if item["type"] == "file":
455
+ items = [item["item_info"]]
456
+ except FileNotFoundError:
457
+ pass
458
+ if detail:
459
+ return [self._drive_item_info_to_fsspec_info(item) for item in items]
460
+ else:
461
+ return [self._get_path(item) for item in items]
462
+
463
+ async def _cat_file(
464
+ self,
465
+ path: str,
466
+ start: int = None,
467
+ end: int = None,
468
+ item_id: str | None = None,
469
+ **kwargs,
470
+ ):
471
+ url = self._path_to_url(path, item_id=item_id, action="content")
472
+ headers = kwargs.get("headers", {})
473
+ if start is not None or end is not None:
474
+ range = await self._process_limits(path, start, end)
475
+ # range is expressed as "bytes={start}-{end}"
476
+ # extract start and end values from the range string
477
+ # to know if we are at the end of the file
478
+ rstart, rend = parse_range_header(range)
479
+ if rend is not None:
480
+ size = await self._size(path)
481
+ if rend > size:
482
+ rend = size
483
+ if rstart and rend and (rstart > rend or rstart == rend == size):
484
+ return b""
485
+ headers["Range"] = range
486
+ response = await self._msgraph_get(url, headers=headers)
487
+ return response.content
488
+
489
+ async def _pipe_file(self, path: str, value: bytes, **kwargs):
490
+ async with await self.open_async(path, "wb") as f:
491
+ await f.write(value)
492
+
493
+ async def _get_file(self, rpath: str, lpath: str, **kwargs):
494
+ headers = kwargs.get("headers", {})
495
+ content = await self._cat_file(rpath, **kwargs, headers=headers)
496
+ with open(lpath, "wb") as f:
497
+ f.write(content)
498
+
499
+ async def _put_file(self, lpath: str, rpath: str, **kwargs):
500
+ with open(lpath, "rb") as f:
501
+ data = f.read()
502
+ await self._pipe_file(rpath, data, **kwargs)
503
+ while rpath:
504
+ self.invalidate_cache(rpath)
505
+ rpath = self._parent(rpath)
506
+
507
+ async def _rm_file(self, path: str, item_id: str | None = None, **kwargs):
508
+ if not await self._isfile(path):
509
+ raise FileNotFoundError(f"File not found: {path}")
510
+ await self.__delete_item(path, item_id=item_id, **kwargs)
511
+
512
+ async def _copy(
513
+ self,
514
+ path1,
515
+ path2,
516
+ recursive=False,
517
+ on_error=None,
518
+ maxdepth=None,
519
+ batch_size=None,
520
+ wait_completion=True,
521
+ **kwargs,
522
+ ):
523
+ if recursive:
524
+ return await self._msggraph_item_copy(
525
+ path1, path2, wait_completion=wait_completion, **kwargs
526
+ )
527
+ return await super()._copy(
528
+ path1,
529
+ path2,
530
+ recursive=recursive,
531
+ on_error=on_error,
532
+ maxdepth=maxdepth,
533
+ batch_size=batch_size,
534
+ wait_completion=wait_completion,
535
+ **kwargs,
536
+ )
537
+
538
+ async def _cp_file(self, path1: str, path2: str, wait_completion=True, **kwargs):
539
+ return await self._msggraph_item_copy(
540
+ path1, path2, wait_completion=wait_completion, **kwargs
541
+ )
542
+
543
+ async def _isfile(self, path: str) -> bool:
544
+ url = self._path_to_url(path)
545
+ try:
546
+ response = await self._msgraph_get(url, params={"select": "file"})
547
+ except FileNotFoundError:
548
+ return False
549
+ return response.json().get("file") is not None
550
+
551
+ async def _isdir(self, path: str) -> bool:
552
+ url = self._path_to_url(path)
553
+ try:
554
+ response = await self._msgraph_get(url, params={"select": "folder"})
555
+ except FileNotFoundError:
556
+ return False
557
+ return response.json().get("folder") is not None
558
+
559
+ async def _size(self, path: str) -> int:
560
+ url = self._path_to_url(path)
561
+ response = await self._msgraph_get(url, params={"select": "size"})
562
+ return response.json().get("size", 0)
563
+
564
+ async def _mkdir(self, path, create_parents=True, exist_ok=False, **kwargs) -> str:
565
+ path = self._strip_protocol(path).rstrip("/")
566
+ parent, child = path.rsplit("/", 1)
567
+ parent_id = await self._get_item_id(parent)
568
+ if not parent_id and not create_parents:
569
+ raise FileNotFoundError(f"Parent directory does not exists: {parent}")
570
+ if not parent_id:
571
+ await self._mkdir(parent, create_parents=create_parents)
572
+ parent_id = await self._get_item_id(parent)
573
+ url = self._path_to_url(path, item_id=parent_id, action="children")
574
+ response = await self._msgraph_post(
575
+ url,
576
+ json={
577
+ "name": child,
578
+ "folder": {},
579
+ "@microsoft.graph.conflictBehavior": "fail",
580
+ },
581
+ )
582
+ return response.json()["id"]
583
+
584
+ async def _makedirs(self, path: str, exist_ok: bool = False):
585
+ try:
586
+ await self._mkdir(path, create_parents=True)
587
+ except HTTPStatusError as e:
588
+ if e.response.status_code == 409:
589
+ if not exist_ok:
590
+ raise FileExistsError(f"Directory already exists: {path}") from e
591
+ else:
592
+ raise e
593
+
594
+ async def _rmdir(self, path: str, **kwargs):
595
+ """Remove a directory if it's empty.
596
+
597
+ Parameters
598
+ ----------
599
+ path : str
600
+ Path of the directory to
601
+
602
+ use_recycle_bin : bool
603
+ If specified, the value will be used instead of the default value
604
+ of the use_recycle_bin attribute of the class. If the value is True, the
605
+ directory will be deleted and moved to the recycle bin. If False,
606
+ the directory will be permanently deleted. Default is False.
607
+ """
608
+ if not await self._isdir(path):
609
+ raise FileNotFoundError(f"Directory not found: {path}")
610
+ if await self._ls(path):
611
+ raise OSError(f"Directory not empty: {path}")
612
+ item_id = await self._get_item_id(path, throw_on_missing=True)
613
+ await self.__delete_item(path, item_id=item_id, **kwargs)
614
+
615
+ rmdir = sync_wrapper(_rmdir) # not into the list of async methods to auto wrap
616
+
617
+ async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
618
+ paths = path
619
+ if not isinstance(paths, list):
620
+ paths = [path]
621
+ for path in paths:
622
+ if not recursive and await self._isdir(path) and await self._ls(path):
623
+ raise OSError(f"Directory not empty: {path}")
624
+ await self.__delete_item(path, **kwargs)
625
+
626
+ async def _mv(self, path1, path2, **kwargs):
627
+ source_item_id = await self._get_item_id(path1, throw_on_missing=True)
628
+ url = self._path_to_url(path1, item_id=source_item_id)
629
+ path2 = self._strip_protocol(path2)
630
+ destination_item_id = await self._get_item_id(path2)
631
+ item_reference = None
632
+ name = None
633
+ if destination_item_id:
634
+ item_reference = await self._get_item_reference(path2)
635
+ else:
636
+ parent_path, name = path2.rsplit("/", 1)
637
+ item_reference = await self._get_item_reference(parent_path)
638
+ json = {
639
+ "parentReference": item_reference,
640
+ }
641
+ if name:
642
+ json["name"] = name
643
+
644
+ await self._msgraph_patch(url, json=json)
645
+ self.invalidate_cache(path1)
646
+
647
+ mv = sync_wrapper(_mv)
648
+
649
+ def _open(
650
+ self,
651
+ path,
652
+ mode="rb",
653
+ block_size="default",
654
+ cache_type="readahead",
655
+ autocommit=True,
656
+ size=None,
657
+ cache_options=None,
658
+ item_id=None,
659
+ **kwargs,
660
+ ):
661
+ """Open a file for reading or writing.
662
+
663
+ Parameters
664
+ ----------
665
+ path: string
666
+ Path of file
667
+ mode: string
668
+ One of 'r', 'w', 'a', 'rb', 'wb', or 'ab'. These have the same meaning
669
+ as they do for the built-in `open` function.
670
+ block_size: int
671
+ Size of data-node blocks if reading
672
+ fill_cache: bool
673
+ If seeking to new a part of the file beyond the current buffer,
674
+ with this True, the buffer will be filled between the sections to
675
+ best support random access. When reading only a few specific chunks
676
+ out of a file, performance may be better if False.
677
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
678
+ Caching policy in read mode. See the definitions in ``core``.
679
+ cache_options : dict
680
+ Additional options passed to the constructor for the cache specified
681
+ by `cache_type`.
682
+ item_id: str
683
+ If given, the item_id will be used instead of the path to open the file.
684
+ kwargs: dict-like
685
+ Additional parameters used for s3 methods. Typically used for
686
+ ServerSideEncryption.
687
+ """
688
+ if ("r" in mode or "a" in mode) and not self.isfile(path):
689
+ raise FileNotFoundError(f"File not found: {path}")
690
+ if "a" in mode and not size:
691
+ size = self.size(path)
692
+ return MSGraphBuffredFile(
693
+ fs=self,
694
+ path=path,
695
+ mode=mode,
696
+ block_size=block_size,
697
+ autocommit=autocommit,
698
+ cache_type=cache_type,
699
+ cache_options=cache_options,
700
+ size=size,
701
+ item_id=item_id or self.get_item_id(path),
702
+ **kwargs,
703
+ )
704
+
705
+ async def open_async(self, path, mode="rb", **kwargs):
706
+ if ("r" in mode or "a" in mode) and not await self._isfile(path):
707
+ raise FileNotFoundError(f"File not found: {path}")
708
+ if "b" not in mode or kwargs.get("compression"):
709
+ raise ValueError
710
+ size = None
711
+ item_id = kwargs.get("item_id") or await self._get_item_id(
712
+ path, throw_on_missing=False
713
+ )
714
+ if "rb" in mode or "a" in mode:
715
+ # we must provice the size of the file to the constructor
716
+ # to avoid the need to call the info method from within the constructor
717
+ # since in case of async running, the _info method is a coroutine
718
+ # and it's not allowed to call a coroutine from a constructor. If the
719
+ # size is provided, the info method will not be called from the constructor
720
+ info = await self._info(path)
721
+ size = info["size"]
722
+ return MSGrpahStreamedFile(
723
+ self, path, mode, size=size, item_id=item_id, **kwargs
724
+ )
725
+
726
+ async def _touch(self, path, truncate=True, item_id=None, **kwargs):
727
+ # if the file exists, update the last modified date time
728
+ # otherwise, create an empty file"""
729
+ item_id = item_id or await self._get_item_id(path)
730
+ if item_id and not truncate:
731
+ if truncate:
732
+ url = self._path_to_url(path, item_id=item_id, action="content")
733
+ await self._msgraph_put(
734
+ url,
735
+ content=b"",
736
+ headers={"Content-Type": "application/octet-stream"},
737
+ )
738
+ else:
739
+ url = self._path_to_url(path, item_id=item_id)
740
+ await self._msgraph_patch(
741
+ url, json={"lastModifiedDateTime": datetime.now().isoformat()}
742
+ )
743
+ else:
744
+ parent_path, file_name = path.rsplit("/", 1)
745
+ parent_id = await self._get_item_id(parent_path, throw_on_missing=True)
746
+ item_id = f"{parent_id}:/{file_name}:"
747
+ url = self._path_to_url(path, item_id=item_id, action="content")
748
+ headers = {"Content-Type": self._guess_type(path)}
749
+ await self._msgraph_put(url, content=b"", headers=headers)
750
+ self.invalidate_cache(path)
751
+
752
+ touch = sync_wrapper(_touch)
753
+
754
+ async def _checksum(self, path, refresh=False):
755
+ """Unique value for current version of file.
756
+
757
+ If the checksum is the same from one moment to another, the contents
758
+ are guaranteed to be the same. If the checksum changes, the contents
759
+ *might* have changed.
760
+
761
+ Parameters
762
+ ----------
763
+ path : string/bytes
764
+ path of file to get checksum for
765
+ refresh : bool (=False)
766
+ if False, look in local cache for file details first
767
+ """
768
+
769
+ info = await self._info(path, refresh=refresh)
770
+
771
+ if info["type"] != "directory":
772
+ return int(info["ETag"].strip('"').split("-")[0], 16)
773
+ else:
774
+ return int(tokenize(info), 16)
775
+
776
+ checksum = sync_wrapper(_checksum)
777
+
778
+ ########################################################
779
+ # Additional methods specific to the Microsoft Graph API
780
+ ########################################################
781
+ async def _get_content(self, path, item_id=None, params=None) -> bytes:
782
+ """Get the item content.
783
+
784
+ Can set format in params to precise the output format (useful to convert docx to pdf)
785
+
786
+ Parameters:
787
+ item_id (str): The ID of the item to get the content of.
788
+ params (dict): Additional parameters to pass to the request.
789
+
790
+ Returns:
791
+ bytes: stream of content
792
+ """
793
+ params = params or {}
794
+ url = self._path_to_url(path, item_id=item_id, action="content")
795
+ response = await self._msgraph_get(url, **params)
796
+ return response.content
797
+
798
+ get_content = sync_wrapper(_get_content)
799
+
800
+ async def _preview(self, path, item_id: str | None = None) -> str:
801
+ if not await self._isfile(path):
802
+ raise FileNotFoundError(f"File not found: {path}")
803
+ url = self._path_to_url(path, item_id=item_id, action="preview")
804
+ response = await self._msgraph_post(url)
805
+ return response.json().get("getUrl", [])
806
+
807
+ preview = sync_wrapper(_preview)
808
+
809
+ async def _checkout(self, path: str, item_id: str | None = None):
810
+ """Check out a file to prevent others from editing the document, and prevent
811
+ your changes from being visible until the documented is checked in.
812
+
813
+ Parameters
814
+ ----------
815
+ path : str
816
+ Path of the file to check out
817
+ item_id: str
818
+ If given, the item_id will be used instead of the path to check
819
+ out the file.
820
+ """
821
+ if not await self._isfile(path):
822
+ raise FileNotFoundError(f"File not found: {path}")
823
+ url = self._path_to_url(path, item_id=item_id, action="checkout")
824
+ await self._msgraph_post(url)
825
+
826
+ checkout = sync_wrapper(_checkout)
827
+
828
+ async def _checkin(self, path: str, comment: str, item_id: str | None = None):
829
+ """Check in a checked out file, which makes the version of the document
830
+ available to others.
831
+
832
+ Parameters
833
+ ----------
834
+ path : str
835
+ Path of the file to check in
836
+ comment : str
837
+ Comment to add to the check-in
838
+ item_id: str
839
+ If given, the item_id will be used instead of the path to check
840
+ in the file.
841
+ """
842
+ if not await self._isfile(path):
843
+ raise FileNotFoundError(f"File not found: {path}")
844
+ url = self._path_to_url(path, item_id=item_id, action="checkin")
845
+ await self._msgraph_post(url, json={"comment": comment})
846
+
847
+ checkin = sync_wrapper(_checkin)
848
+
849
+ async def _get_versions(self, path: str, item_id: str | None = None) -> list[dict]:
850
+ """Get the versions of a file.
851
+
852
+ Parameters
853
+ ----------
854
+ path : str
855
+ Path of the file to get the versions of
856
+ item_id: str
857
+ If given, the item_id will be used instead of the path to get
858
+ the versions of the file.
859
+ """
860
+ if not await self._isfile(path):
861
+ raise FileNotFoundError(f"File not found: {path}")
862
+ url = self._path_to_url(path, item_id=item_id, action="versions")
863
+ response = await self._msgraph_get(url)
864
+ return response.json().get("value", [])
865
+
866
+ get_versions = sync_wrapper(_get_versions)
867
+
868
+
869
+ class MSGDriveFS(AbstractMSGraphFS):
870
+ """A filesystem that represents a SharePoint site dirve as a filesystem.
871
+
872
+ parameters:
873
+ drive_id (str): The ID of the SharePoint drive.
874
+ site_name (str): The name of the SharePoint site (optional, only used to list the recycle bin items).
875
+ use_recycle_bin: bool (=False)
876
+ If True, when a file is deleted, it will be moved to the recycle bin.
877
+ If False, the file will be permanently deleted. Default is False.
878
+ oauth2_client_params (dict): Parameters for the OAuth2 client to use for
879
+ authentication. see https://docs.authlib.org/en/latest/client/api.html#authlib.integrations.httpx_client.AsyncOAuth2Client
880
+ """
881
+
882
+ protocol = ["msgd"]
883
+
884
+ def __init__(
885
+ self,
886
+ drive_id: str,
887
+ oauth2_client_params: dict,
888
+ site_name: str | None = None,
889
+ **kwargs,
890
+ ):
891
+ super().__init__(oauth2_client_params=oauth2_client_params, **kwargs)
892
+ self.site_name: str = site_name
893
+ self.drive_id: str = drive_id
894
+ self.drive_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}"
895
+
896
+ def _path_to_url(self, path, item_id=None, action=None) -> str:
897
+ action = action and f"/{action}" if action else ""
898
+ path = self._strip_protocol(path).rstrip("/")
899
+ if path and not path.startswith("/"):
900
+ path = "/" + path
901
+ if path:
902
+ path = f":{path}:"
903
+ if item_id:
904
+ return f"{self.drive_url}/items/{item_id}{action}"
905
+
906
+ return f"{self.drive_url}/root{path}{action}"
907
+
908
+ async def _get_site_id(self) -> str:
909
+ url = f"https://graph.microsoft.com/v1.0/sites?search=¼{self.site_name}"
910
+ response = await self._msgraph_get(url)
911
+ return response.json()["value"][0]["id"]
912
+
913
+ async def _get_item_reference(self, path: str, item_id: str | None = None) -> dict:
914
+ item_reference = await super()._get_item_reference(path, item_id=item_id)
915
+ return {
916
+ "driveId": self.drive_id,
917
+ "id": item_reference["id"],
918
+ }
919
+
920
+ async def _get_recycle_bin_items(self) -> list[dict]:
921
+ """Get the items in the recycle bin. (Beta!!)
922
+
923
+ Returns:
924
+ list[dict]: A list of dictionaries with information about the items in the recycle bin.
925
+
926
+ see https://docs.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0
927
+ """
928
+ site_id = await self._get_site_id()
929
+ url = f"https://graph.microsoft.com/beta/sites/{site_id}/recycleBin/items"
930
+ response = await self._msgraph_get(url)
931
+ return response.json().get("value", [])
932
+
933
+ get_recycle_bin_items = sync_wrapper(_get_recycle_bin_items)
934
+
935
+
936
+ class AsyncStreamedFileMixin:
937
+ """Mixin for streamed file-like objects using async iterators."""
938
+
939
+ def _init__mixin(self, **kwargs):
940
+ self.path = self.fs._strip_protocol(self.path)
941
+ block_size = kwargs.get("block_size", "default")
942
+ if block_size == "default":
943
+ block_size = None
944
+ self.blocksize = block_size if block_size is not None else self.fs.blocksize
945
+ if "w" in self.mode or "a" in self.mode:
946
+ # block_size must bet a multiple of 320 KiB
947
+ if self.blocksize % (320 * 1024) != 0:
948
+ raise ValueError("block_size must be a multiple of 320 KiB")
949
+ self._item_id = kwargs.get("item_id")
950
+ self._append_mode = "a" in self.mode and self.item_id is not None
951
+ if self._append_mode:
952
+ self.loc = kwargs.get("size", 0)
953
+ self._reset_session_info()
954
+
955
+ @property
956
+ async def item_id(self):
957
+ if self._item_id is None:
958
+ self._item_id = await self.fs._get_item_id(self.path)
959
+ return self._item_id
960
+
961
+ async def _create_upload_session(self) -> tuple[str, datetime.datetime]:
962
+ """Create a new upload session for the file.
963
+
964
+ Returns:
965
+ tuple[str, datetime.datetime]: The URL of the upload session and the expiration date time.
966
+
967
+ see https://learn.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0
968
+ """
969
+ item_id = await self.item_id
970
+ if not item_id:
971
+ parent_path, file_name = self.path.rsplit("/", 1)
972
+ parent_id = await self.fs._get_item_id(parent_path)
973
+ item_id = f"{parent_id}:/{file_name}:"
974
+ url = self.fs._path_to_url(
975
+ self.path, item_id=item_id, action="createUploadSession"
976
+ )
977
+ response = await self.fs._msgraph_post(
978
+ url,
979
+ json={
980
+ "@microsoft.graph.conflictBehavior": "replace",
981
+ # We don't know the size of the file. Explicit commit is required.
982
+ "deferCommit": True,
983
+ },
984
+ )
985
+ json = response.json()
986
+ expiration_dt = datetime.datetime.fromisoformat(json["expirationDateTime"])
987
+ return json["uploadUrl"], expiration_dt
988
+
989
+ @property
990
+ def _is_upload_session_expired(self) -> bool:
991
+ """Check if the current upload session is expired."""
992
+ if not self._upload_expiration_dt:
993
+ return True
994
+ now = datetime.datetime.now(datetime.UTC)
995
+ return now > self._upload_expiration_dt
996
+
997
+ def _reset_session_info(self):
998
+ """Reset the upload session information."""
999
+ self._upload_session_url = None
1000
+ self._upload_expiration_dt = None
1001
+ self._chunk_start_pos = 0
1002
+ self._remaining_bytes = None
1003
+ self._write_called = False
1004
+
1005
+ async def _upload_content_at_once(self, data):
1006
+ headers = self.kwargs.get("headers", {})
1007
+ if "content-type" not in headers:
1008
+ headers["content-type"] = self.fs._guess_type(self.path)
1009
+ item_id = await self.item_id
1010
+ if not item_id:
1011
+ parent_path, file_name = self.path.rsplit("/", 1)
1012
+ parent_id = await self.fs._get_item_id(parent_path, throw_on_missing=True)
1013
+ item_id = f"{parent_id}:/{file_name}:"
1014
+ url = self.fs._path_to_url(self.path, item_id=item_id, action="content")
1015
+ await self.fs._msgraph_put(url, content=data, headers=headers)
1016
+ self.fs.invalidate_cache(self.path)
1017
+
1018
+ async def _abort_upload_session(self):
1019
+ """Abort the current upload session."""
1020
+ if self._upload_session_url and not self._is_upload_session_expired:
1021
+ await self.fs._msgraph_delete(self._upload_session_url)
1022
+ self._reset_session_info()
1023
+
1024
+ async def _commit_upload_session(self):
1025
+ """Commit the current upload session."""
1026
+ if self._upload_session_url and self._is_upload_session_expired:
1027
+ raise RuntimeError("The upload session has expired.")
1028
+ if self._upload_session_url:
1029
+ await self.fs._msgraph_post(self._upload_session_url)
1030
+ self._reset_session_info()
1031
+
1032
+ async def _commit(self):
1033
+ _logger.debug("Commit %s" % self)
1034
+ # Avoid resetting a file that has been opened in append mode
1035
+ # and has not been written to.
1036
+ append_no_write = self._append_mode and not self._write_called
1037
+ if self.tell() == 0:
1038
+ if self.buffer is not None:
1039
+ _logger.debug("Empty file committed %s" % self)
1040
+ await self._abort_upload_session()
1041
+ await self.fs._touch(self.path, **self.kwargs)
1042
+ elif not self._upload_session_url:
1043
+ if self.buffer is not None:
1044
+ if not append_no_write:
1045
+ _logger.debug("One-shot upload of %s" % self)
1046
+ self.buffer.seek(0)
1047
+ data = self.buffer.read()
1048
+ await self._upload_content_at_once(data)
1049
+ else:
1050
+ raise RuntimeError
1051
+
1052
+ if append_no_write:
1053
+ # if not written, we must abort the upload session otherwise the file
1054
+ # will be truncated
1055
+ await self._abort_upload_session()
1056
+ else:
1057
+ await self._commit_upload_session()
1058
+
1059
+ # complex cache invalidation, since file's appearance can cause several
1060
+ # directories
1061
+ parts = self.path.split("/")
1062
+ path = parts[0]
1063
+ for p in parts[1:]:
1064
+ if path in self.fs.dircache and not [
1065
+ True for f in self.fs.dircache[path] if f["name"] == path + "/" + p
1066
+ ]:
1067
+ self.fs.invalidate_cache(path)
1068
+ path = path + "/" + p
1069
+ pass
1070
+
1071
+ commit = sync_wrapper(_commit)
1072
+
1073
+ async def _discard(self):
1074
+ await self._abort_upload_session()
1075
+
1076
+ discard = sync_wrapper(_discard)
1077
+
1078
+ async def _init_write_append_mode(self):
1079
+ """Add the initial content of the file to the buffer."""
1080
+ if self._append_mode and not self._write_called:
1081
+ # If the file is opened in append mode, we must get the current content
1082
+ # of the file and add it to the buffer.
1083
+ content = await self.fs._cat_file(self.path, item_id=self._item_id)
1084
+ self.buffer.write(content)
1085
+ self.loc = len(content)
1086
+
1087
+ ########################################################
1088
+ ## AbstractBufferedFile methods to implement or override
1089
+ ########################################################
1090
+
1091
+ async def _upload_chunk(self, final=False):
1092
+ """Write one part of a multi-block file upload.
1093
+
1094
+ Parameters
1095
+ ==========
1096
+ final: bool
1097
+ This is the last block, so should complete file, if
1098
+ self.autocommit is True.
1099
+ """
1100
+ if self.autocommit and final and self.tell() < self.blocksize:
1101
+ # only happens when closing small file, use on-shot PUT
1102
+ chunk_to_write = False
1103
+ else:
1104
+ self.buffer.seek(0)
1105
+ if self._remaining_bytes:
1106
+ chunk_to_write = self._remaining_bytes + self.buffer.read(
1107
+ self.blocksize - len(self._remaining_bytes)
1108
+ )
1109
+ self._remaining_bytes = None
1110
+ else:
1111
+ chunk_to_write = self.buffer.read(self.blocksize)
1112
+ # we must write into chunk of the same block size. We therefore need to
1113
+ # buffer the remaining bytes if the buffer is not a multiple of the block size
1114
+ while chunk_to_write:
1115
+ chunk_size = len(chunk_to_write)
1116
+ if chunk_size < self.blocksize and not final:
1117
+ self._remaining_bytes = chunk_to_write
1118
+ break
1119
+
1120
+ headers = {
1121
+ "Content-Length": str(chunk_size),
1122
+ "Content-Range": f"bytes {self._chunk_start_pos}-{self._chunk_start_pos + chunk_size - 1}/*",
1123
+ }
1124
+ response = await self.fs._msgraph_put(
1125
+ self._upload_session_url,
1126
+ content=chunk_to_write,
1127
+ headers=headers,
1128
+ )
1129
+ self._upload_expiration_dt = datetime.datetime.fromisoformat(
1130
+ response.json()["expirationDateTime"]
1131
+ )
1132
+ self._chunk_start_pos += chunk_size
1133
+ chunk_to_write = self.buffer.read(self.blocksize)
1134
+
1135
+ if self.autocommit and final:
1136
+ await self._commit()
1137
+ return not final
1138
+
1139
+ async def _initiate_upload(self):
1140
+ if self.autocommit and self.tell() < self.blocksize:
1141
+ # only happens when closing small file, use on-shot PUT
1142
+ return
1143
+ # If the file to be uploaded is larger than the block size, then we need to
1144
+ # create an upload session to upload the file in chunks.
1145
+ self._chunk_start_pos = 0
1146
+ (
1147
+ self._upload_session_url,
1148
+ self._upload_expiration_dt,
1149
+ ) = await self._create_upload_session()
1150
+
1151
+ async def _fetch_range(self, start, end) -> bytes:
1152
+ """Get the specified set of bytes from remote."""
1153
+ item_id = await self.fs._get_item_id(self.path)
1154
+ return await self.fs._cat_file(self.path, start=start, end=end, item_id=item_id)
1155
+
1156
+ @property
1157
+ def loop(self):
1158
+ return self.fs.loop
1159
+
1160
+
1161
+ class MSGraphBuffredFile(AsyncStreamedFileMixin, AbstractBufferedFile):
1162
+ """A file-like object representing a file in a SharePoint drive.
1163
+
1164
+ Parameters
1165
+ ----------
1166
+ fs: MSGDriveFS
1167
+ The filesystem this file is part of.
1168
+ path: str
1169
+ The path to the file.
1170
+ mode: str
1171
+ The mode to open the file in.
1172
+ One of 'rb', 'wb', 'ab'. These have the same meaning
1173
+ as they do for the built-in `open` function.
1174
+ block_size: int
1175
+ Buffer size for reading or writing, 'default' for class default
1176
+ autocommit: bool
1177
+ Whether to write to final destination; may only impact what
1178
+ happens when file is being closed.
1179
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
1180
+ Caching policy in read mode. See the definitions in ``core``.
1181
+ cache_options : dict
1182
+ Additional options passed to the constructor for the cache specified
1183
+ by `cache_type`.
1184
+ size: int
1185
+ If given and in read mode, suppressed having to look up the file size
1186
+ kwargs:
1187
+ Gets stored as self.kwargs
1188
+ """
1189
+
1190
+ def __init__(
1191
+ self,
1192
+ fs: MSGDriveFS,
1193
+ path: str,
1194
+ mode: str = "rb",
1195
+ block_size: int | None = None,
1196
+ autocommit: bool = True,
1197
+ cache_type: str = "readahead",
1198
+ cache_options: dict | None = None,
1199
+ size: int | None = None,
1200
+ **kwargs,
1201
+ ):
1202
+ AbstractBufferedFile.__init__(
1203
+ self,
1204
+ fs,
1205
+ path,
1206
+ mode,
1207
+ block_size,
1208
+ autocommit,
1209
+ cache_type,
1210
+ cache_options,
1211
+ size,
1212
+ **kwargs,
1213
+ )
1214
+ kwargs_mixin = kwargs.copy()
1215
+ kwargs_mixin.update(
1216
+ {
1217
+ "fs": fs,
1218
+ "path": path,
1219
+ "mode": mode,
1220
+ "block_size": block_size,
1221
+ "autocommit": autocommit,
1222
+ "cache_type": cache_type,
1223
+ "cache_options": cache_options,
1224
+ "size": size,
1225
+ }
1226
+ )
1227
+
1228
+ AsyncStreamedFileMixin._init__mixin(self, **kwargs_mixin)
1229
+
1230
+ def write(self, data):
1231
+ if not self._write_called:
1232
+ self._init_write_append_mode()
1233
+ self._write_called = True
1234
+ return super().write(data)
1235
+
1236
+ _init_write_append_mode = sync_wrapper(
1237
+ AsyncStreamedFileMixin._init_write_append_mode
1238
+ )
1239
+
1240
+ ########################################################
1241
+ ## AbstractBufferedFile methods to implement or override
1242
+ ########################################################
1243
+ _upload_chunk = sync_wrapper(AsyncStreamedFileMixin._upload_chunk)
1244
+ _initiate_upload = sync_wrapper(AsyncStreamedFileMixin._initiate_upload)
1245
+ _fetch_range = sync_wrapper(AsyncStreamedFileMixin._fetch_range)
1246
+
1247
+
1248
+ class MSGrpahStreamedFile(AsyncStreamedFileMixin, AbstractAsyncStreamedFile):
1249
+ """A file-like object representing a file in a SharePoint drive.
1250
+
1251
+ Parameters
1252
+ ----------
1253
+ fs: MSGDriveFS
1254
+ The filesystem this file is part of.
1255
+ path: str
1256
+ The path to the file.
1257
+ mode: str
1258
+ The mode to open the file in.
1259
+ One of 'rb', 'wb', 'ab'. These have the same meaning
1260
+ as they do for the built-in `open` function.
1261
+ block_size: int
1262
+ Buffer size for reading or writing, 'default' for class default
1263
+ autocommit: bool
1264
+ Whether to write to final destination; may only impact what
1265
+ happens when file is being closed.
1266
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
1267
+ Caching policy in read mode. See the definitions in ``core``.
1268
+ cache_options : dict
1269
+ Additional options passed to the constructor for the cache specified
1270
+ by `cache_type`.
1271
+ size: int
1272
+ If given and in read mode, suppressed having to look up the file size
1273
+ kwargs:
1274
+ Gets stored as self.kwargs
1275
+ """
1276
+
1277
+ def __init__(
1278
+ self,
1279
+ fs: MSGDriveFS,
1280
+ path: str,
1281
+ mode: str = "rb",
1282
+ block_size: int | None = None,
1283
+ autocommit: bool = True,
1284
+ cache_type: str = "readahead",
1285
+ cache_options: dict | None = None,
1286
+ size: int | None = None,
1287
+ **kwargs,
1288
+ ):
1289
+ AbstractAsyncStreamedFile.__init__(
1290
+ self,
1291
+ fs,
1292
+ path,
1293
+ mode,
1294
+ block_size,
1295
+ autocommit,
1296
+ cache_type,
1297
+ cache_options,
1298
+ size,
1299
+ **kwargs,
1300
+ )
1301
+ kwargs_mixin = kwargs.copy()
1302
+ kwargs_mixin.update(
1303
+ {
1304
+ "fs": fs,
1305
+ "path": path,
1306
+ "mode": mode,
1307
+ "block_size": block_size,
1308
+ "autocommit": autocommit,
1309
+ "cache_type": cache_type,
1310
+ "cache_options": cache_options,
1311
+ "size": size,
1312
+ }
1313
+ )
1314
+
1315
+ AsyncStreamedFileMixin._init__mixin(self, **kwargs_mixin)
1316
+
1317
+ async def write(self, data):
1318
+ if not self._write_called:
1319
+ await self._init_write_append_mode()
1320
+ self._write_called = True
1321
+ return await super().write(data)
1322
+
1323
+ async def readinto(self, b):
1324
+ """Mirrors builtin file's readinto method.
1325
+
1326
+ https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
1327
+ """
1328
+ out = memoryview(b).cast("B")
1329
+ data = await self.read(out.nbytes)
1330
+ out[: len(data)] = data
1331
+ return len(data)