msgraphfs 0.1__tar.gz → 0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msgraphfs-0.4/PKG-INFO +232 -0
- msgraphfs-0.4/README.md +186 -0
- {msgraphfs-0.1 → msgraphfs-0.4}/pyproject.toml +14 -1
- msgraphfs-0.4/src/msgraphfs/__init__.py +14 -0
- {msgraphfs-0.1 → msgraphfs-0.4}/src/msgraphfs/core.py +1000 -64
- msgraphfs-0.1/PKG-INFO +0 -250
- msgraphfs-0.1/README.md +0 -204
- msgraphfs-0.1/src/msgraphfs/__init__.py +0 -1
- {msgraphfs-0.1 → msgraphfs-0.4}/.gitignore +0 -0
- {msgraphfs-0.1 → msgraphfs-0.4}/LICENSE +0 -0
msgraphfs-0.4/PKG-INFO
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: msgraphfs
|
|
3
|
+
Version: 0.4
|
|
4
|
+
Dynamic: Summary
|
|
5
|
+
Project-URL: Source, https://github.com/acsone/msgraphfs
|
|
6
|
+
Author-email: Laurent Mignon <laurent.mignon@acsone.eu>
|
|
7
|
+
License: The MIT License (MIT)
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2024 Laurent Mignon (ACSONE)
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Classifier: Intended Audience :: Developers
|
|
30
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
+
Classifier: Operating System :: OS Independent
|
|
32
|
+
Requires-Python: >=3.10
|
|
33
|
+
Requires-Dist: authlib
|
|
34
|
+
Requires-Dist: fsspec>=0.7.5
|
|
35
|
+
Requires-Dist: httpx[http2]
|
|
36
|
+
Provides-Extra: release
|
|
37
|
+
Requires-Dist: towncrier; extra == 'release'
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Requires-Dist: coverage[toml]; extra == 'test'
|
|
40
|
+
Requires-Dist: keyring; extra == 'test'
|
|
41
|
+
Requires-Dist: pytest; extra == 'test'
|
|
42
|
+
Requires-Dist: pytest-asyncio; extra == 'test'
|
|
43
|
+
Requires-Dist: pytest-cov; extra == 'test'
|
|
44
|
+
Requires-Dist: requests; extra == 'test'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
Filesystem interface to Microsoft Graph API (SharePoint, OneDrive)
|
|
48
|
+
------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
[](https://pypi.python.org/pypi/msgraphfs/)
|
|
51
|
+
|
|
52
|
+
Quickstart
|
|
53
|
+
----------
|
|
54
|
+
|
|
55
|
+
This package can be installed using:
|
|
56
|
+
|
|
57
|
+
`pip install msgraphfs`
|
|
58
|
+
|
|
59
|
+
or
|
|
60
|
+
|
|
61
|
+
`uv add msgraphfs`
|
|
62
|
+
|
|
63
|
+
The `msgd://`, `sharepoint://`, and `onedrive://` protocols are included in fsspec's known_implementations registry, allowing seamless integration with fsspec-compatible libraries.
|
|
64
|
+
|
|
65
|
+
To use the filesystem with specific site and drive:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import pandas as pd
|
|
69
|
+
|
|
70
|
+
storage_options = {
|
|
71
|
+
'client_id': 'your-client-id',
|
|
72
|
+
'tenant_id': 'your-tenant-id',
|
|
73
|
+
'client_secret': 'your-client-secret',
|
|
74
|
+
'site_name': 'YourSiteName',
|
|
75
|
+
'drive_name': 'Documents'
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
df = pd.read_csv('msgd://folder/data.csv', storage_options=storage_options)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
To use multi-site mode where site and drive are specified in the URL:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import pandas as pd
|
|
85
|
+
|
|
86
|
+
storage_options = {
|
|
87
|
+
'client_id': 'your-client-id',
|
|
88
|
+
'tenant_id': 'your-tenant-id',
|
|
89
|
+
'client_secret': 'your-client-secret'
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
df = pd.read_csv('msgd://YourSite/Documents/folder/data.csv', storage_options=storage_options)
|
|
93
|
+
df = pd.read_parquet('sharepoint://AnotherSite/Reports/data.parquet', storage_options=storage_options)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Accepted protocol / uri formats include:
|
|
97
|
+
- `msgd://site/drive/path/file` (multi-site mode)
|
|
98
|
+
- `sharepoint://site/drive/path/file` (multi-site mode)
|
|
99
|
+
- `onedrive://drive/path/file` (OneDrive personal)
|
|
100
|
+
- `msgd://path/file` (single-site mode when site_name and drive_name specified in storage_options)
|
|
101
|
+
|
|
102
|
+
To read files, you can optionally set the `MSGRAPHFS_CLIENT_ID`, `MSGRAPHFS_TENANT_ID`, and `MSGRAPHFS_CLIENT_SECRET` environment variables, then storage_options will be read from the environment:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
import pandas as pd
|
|
106
|
+
|
|
107
|
+
# With environment variables set, you can omit credentials from storage_options
|
|
108
|
+
storage_options = {'site_name': 'YourSite', 'drive_name': 'Documents'}
|
|
109
|
+
df = pd.read_csv('msgd://folder/data.csv', storage_options=storage_options)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Details
|
|
113
|
+
-------
|
|
114
|
+
|
|
115
|
+
The package provides a pythonic filesystem implementation for Microsoft Graph API drives (SharePoint and OneDrive), facilitating interactions between Microsoft 365 services and data processing libraries like Pandas, Dask, and others. This is implemented using the [fsspec](https://filesystem-spec.readthedocs.io/) base class and Microsoft Graph Python SDK.
|
|
116
|
+
|
|
117
|
+
Operations work with Azure AD application credentials using the client credentials flow, suitable for server-to-server authentication scenarios.
|
|
118
|
+
|
|
119
|
+
The filesystem automatically handles OAuth2 token management, site and drive discovery, and provides fork-safe lazy initialization perfect for multi-process environments like Apache Airflow.
|
|
120
|
+
|
|
121
|
+
### Setting credentials
|
|
122
|
+
|
|
123
|
+
The `storage_options` can be instantiated with the following authentication parameters:
|
|
124
|
+
|
|
125
|
+
**Required for authentication:**
|
|
126
|
+
- `client_id`: Azure AD application (client) ID
|
|
127
|
+
- `tenant_id`: Azure AD directory (tenant) ID
|
|
128
|
+
- `client_secret`: Azure AD application client secret
|
|
129
|
+
|
|
130
|
+
**Optional filesystem parameters:**
|
|
131
|
+
- `site_name`: SharePoint site name (for single-site mode or site discovery)
|
|
132
|
+
- `drive_name`: Drive/library name (e.g., "Documents", "CustomLibrary")
|
|
133
|
+
- `drive_id`: Specific drive ID (bypasses site/drive discovery)
|
|
134
|
+
- `oauth2_client_params`: Pre-built OAuth2 parameters dict
|
|
135
|
+
- `use_recycle_bin`: Enable recycle bin operations (default: False)
|
|
136
|
+
|
|
137
|
+
For more details on all available parameters, see the [MSGDriveFS documentation](https://github.com/your-repo/msgraphfs).
|
|
138
|
+
|
|
139
|
+
The following environment variables can be set and will be automatically detected:
|
|
140
|
+
- `MSGRAPHFS_CLIENT_ID` (or `AZURE_CLIENT_ID` as fallback)
|
|
141
|
+
- `MSGRAPHFS_TENANT_ID` (or `AZURE_TENANT_ID` as fallback)
|
|
142
|
+
- `MSGRAPHFS_CLIENT_SECRET` (or `AZURE_CLIENT_SECRET` as fallback)
|
|
143
|
+
|
|
144
|
+
### Usage modes
|
|
145
|
+
|
|
146
|
+
The filesystem can be used in different modes based on the `storage_options` provided:
|
|
147
|
+
|
|
148
|
+
1. **Single-site mode**: Specify `site_name` and `drive_name` in storage_options, then use relative paths in URLs:
|
|
149
|
+
```python
|
|
150
|
+
storage_options = {
|
|
151
|
+
'client_id': CLIENT_ID,
|
|
152
|
+
'tenant_id': TENANT_ID,
|
|
153
|
+
'client_secret': CLIENT_SECRET,
|
|
154
|
+
'site_name': 'YourSite',
|
|
155
|
+
'drive_name': 'Documents'
|
|
156
|
+
}
|
|
157
|
+
df = pd.read_csv('msgd://folder/file.csv', storage_options=storage_options)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
2. **Multi-site mode**: Omit `site_name` and `drive_name` from storage_options, specify them in the URL:
|
|
161
|
+
```python
|
|
162
|
+
storage_options = {
|
|
163
|
+
'client_id': CLIENT_ID,
|
|
164
|
+
'tenant_id': TENANT_ID,
|
|
165
|
+
'client_secret': CLIENT_SECRET
|
|
166
|
+
}
|
|
167
|
+
df = pd.read_csv('msgd://YourSite/Documents/folder/file.csv', storage_options=storage_options)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
3. **Direct drive access**: Use `drive_id` to bypass site discovery:
|
|
171
|
+
```python
|
|
172
|
+
storage_options = {
|
|
173
|
+
'client_id': CLIENT_ID,
|
|
174
|
+
'tenant_id': TENANT_ID,
|
|
175
|
+
'client_secret': CLIENT_SECRET,
|
|
176
|
+
'drive_id': 'specific-drive-id'
|
|
177
|
+
}
|
|
178
|
+
df = pd.read_csv('msgd://folder/file.csv', storage_options=storage_options)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Advanced features
|
|
182
|
+
|
|
183
|
+
#### File operations with metadata
|
|
184
|
+
```python
|
|
185
|
+
import fsspec
|
|
186
|
+
|
|
187
|
+
fs = fsspec.filesystem('msgd', **storage_options)
|
|
188
|
+
|
|
189
|
+
# List files with detailed metadata
|
|
190
|
+
files = fs.ls('/folder', detail=True)
|
|
191
|
+
|
|
192
|
+
# Get file information with permissions
|
|
193
|
+
info = fs.info('/document.pdf', expand='permissions')
|
|
194
|
+
|
|
195
|
+
# Read file with version control
|
|
196
|
+
with fs.open('/document.docx', mode='r') as f:
|
|
197
|
+
content = f.read()
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
#### Permission management
|
|
201
|
+
```python
|
|
202
|
+
# Get detailed permissions for files and folders
|
|
203
|
+
permissions = fs.get_permissions('/sensitive-folder')
|
|
204
|
+
print(f"Total permissions: {permissions['summary']['total_permissions']}")
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### Integration with data processing libraries
|
|
208
|
+
```python
|
|
209
|
+
import dask.dataframe as dd
|
|
210
|
+
|
|
211
|
+
# Read multiple CSV files using Dask
|
|
212
|
+
ddf = dd.read_csv('msgd://YourSite/Data/*.csv', storage_options=storage_options)
|
|
213
|
+
|
|
214
|
+
# Read Parquet files
|
|
215
|
+
ddf = dd.read_parquet('sharepoint://Reports/Analytics/data.parquet', storage_options=storage_options)
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Azure AD Setup
|
|
219
|
+
|
|
220
|
+
To use this filesystem, you need to register an Azure AD application:
|
|
221
|
+
|
|
222
|
+
1. Go to the [Azure Portal](https://portal.azure.com)
|
|
223
|
+
2. Register a new application under "Azure Active Directory" > "App registrations"
|
|
224
|
+
3. Configure API permissions (Application permissions). Choose based on your needs:
|
|
225
|
+
- For read-only access: `Sites.Read.All`
|
|
226
|
+
- For read-write access: `Sites.ReadWrite.All`
|
|
227
|
+
- Optional for enhanced functionality: `Files.Read.All` or `Files.ReadWrite.All`
|
|
228
|
+
4. Grant admin consent for your organization
|
|
229
|
+
5. Create a client secret
|
|
230
|
+
6. Note the Application (client) ID, Directory (tenant) ID, and client secret
|
|
231
|
+
|
|
232
|
+
The filesystem uses the OAuth2 client credentials flow with the default scope (`https://graph.microsoft.com/.default`), which automatically includes all application permissions granted to your Azure AD application.
|
msgraphfs-0.4/README.md
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Filesystem interface to Microsoft Graph API (SharePoint, OneDrive)
|
|
2
|
+
------------------------------------------------------------
|
|
3
|
+
|
|
4
|
+
[](https://pypi.python.org/pypi/msgraphfs/)
|
|
5
|
+
|
|
6
|
+
Quickstart
|
|
7
|
+
----------
|
|
8
|
+
|
|
9
|
+
This package can be installed using:
|
|
10
|
+
|
|
11
|
+
`pip install msgraphfs`
|
|
12
|
+
|
|
13
|
+
or
|
|
14
|
+
|
|
15
|
+
`uv add msgraphfs`
|
|
16
|
+
|
|
17
|
+
The `msgd://`, `sharepoint://`, and `onedrive://` protocols are included in fsspec's known_implementations registry, allowing seamless integration with fsspec-compatible libraries.
|
|
18
|
+
|
|
19
|
+
To use the filesystem with specific site and drive:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
storage_options = {
|
|
25
|
+
'client_id': 'your-client-id',
|
|
26
|
+
'tenant_id': 'your-tenant-id',
|
|
27
|
+
'client_secret': 'your-client-secret',
|
|
28
|
+
'site_name': 'YourSiteName',
|
|
29
|
+
'drive_name': 'Documents'
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
df = pd.read_csv('msgd://folder/data.csv', storage_options=storage_options)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
To use multi-site mode where site and drive are specified in the URL:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import pandas as pd
|
|
39
|
+
|
|
40
|
+
storage_options = {
|
|
41
|
+
'client_id': 'your-client-id',
|
|
42
|
+
'tenant_id': 'your-tenant-id',
|
|
43
|
+
'client_secret': 'your-client-secret'
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
df = pd.read_csv('msgd://YourSite/Documents/folder/data.csv', storage_options=storage_options)
|
|
47
|
+
df = pd.read_parquet('sharepoint://AnotherSite/Reports/data.parquet', storage_options=storage_options)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Accepted protocol / uri formats include:
|
|
51
|
+
- `msgd://site/drive/path/file` (multi-site mode)
|
|
52
|
+
- `sharepoint://site/drive/path/file` (multi-site mode)
|
|
53
|
+
- `onedrive://drive/path/file` (OneDrive personal)
|
|
54
|
+
- `msgd://path/file` (single-site mode when site_name and drive_name specified in storage_options)
|
|
55
|
+
|
|
56
|
+
To read files, you can optionally set the `MSGRAPHFS_CLIENT_ID`, `MSGRAPHFS_TENANT_ID`, and `MSGRAPHFS_CLIENT_SECRET` environment variables, then storage_options will be read from the environment:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import pandas as pd
|
|
60
|
+
|
|
61
|
+
# With environment variables set, you can omit credentials from storage_options
|
|
62
|
+
storage_options = {'site_name': 'YourSite', 'drive_name': 'Documents'}
|
|
63
|
+
df = pd.read_csv('msgd://folder/data.csv', storage_options=storage_options)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Details
|
|
67
|
+
-------
|
|
68
|
+
|
|
69
|
+
The package provides a pythonic filesystem implementation for Microsoft Graph API drives (SharePoint and OneDrive), facilitating interactions between Microsoft 365 services and data processing libraries like Pandas, Dask, and others. This is implemented using the [fsspec](https://filesystem-spec.readthedocs.io/) base class and Microsoft Graph Python SDK.
|
|
70
|
+
|
|
71
|
+
Operations work with Azure AD application credentials using the client credentials flow, suitable for server-to-server authentication scenarios.
|
|
72
|
+
|
|
73
|
+
The filesystem automatically handles OAuth2 token management, site and drive discovery, and provides fork-safe lazy initialization perfect for multi-process environments like Apache Airflow.
|
|
74
|
+
|
|
75
|
+
### Setting credentials
|
|
76
|
+
|
|
77
|
+
The `storage_options` can be instantiated with the following authentication parameters:
|
|
78
|
+
|
|
79
|
+
**Required for authentication:**
|
|
80
|
+
- `client_id`: Azure AD application (client) ID
|
|
81
|
+
- `tenant_id`: Azure AD directory (tenant) ID
|
|
82
|
+
- `client_secret`: Azure AD application client secret
|
|
83
|
+
|
|
84
|
+
**Optional filesystem parameters:**
|
|
85
|
+
- `site_name`: SharePoint site name (for single-site mode or site discovery)
|
|
86
|
+
- `drive_name`: Drive/library name (e.g., "Documents", "CustomLibrary")
|
|
87
|
+
- `drive_id`: Specific drive ID (bypasses site/drive discovery)
|
|
88
|
+
- `oauth2_client_params`: Pre-built OAuth2 parameters dict
|
|
89
|
+
- `use_recycle_bin`: Enable recycle bin operations (default: False)
|
|
90
|
+
|
|
91
|
+
For more details on all available parameters, see the [MSGDriveFS documentation](https://github.com/your-repo/msgraphfs).
|
|
92
|
+
|
|
93
|
+
The following environment variables can be set and will be automatically detected:
|
|
94
|
+
- `MSGRAPHFS_CLIENT_ID` (or `AZURE_CLIENT_ID` as fallback)
|
|
95
|
+
- `MSGRAPHFS_TENANT_ID` (or `AZURE_TENANT_ID` as fallback)
|
|
96
|
+
- `MSGRAPHFS_CLIENT_SECRET` (or `AZURE_CLIENT_SECRET` as fallback)
|
|
97
|
+
|
|
98
|
+
### Usage modes
|
|
99
|
+
|
|
100
|
+
The filesystem can be used in different modes based on the `storage_options` provided:
|
|
101
|
+
|
|
102
|
+
1. **Single-site mode**: Specify `site_name` and `drive_name` in storage_options, then use relative paths in URLs:
|
|
103
|
+
```python
|
|
104
|
+
storage_options = {
|
|
105
|
+
'client_id': CLIENT_ID,
|
|
106
|
+
'tenant_id': TENANT_ID,
|
|
107
|
+
'client_secret': CLIENT_SECRET,
|
|
108
|
+
'site_name': 'YourSite',
|
|
109
|
+
'drive_name': 'Documents'
|
|
110
|
+
}
|
|
111
|
+
df = pd.read_csv('msgd://folder/file.csv', storage_options=storage_options)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
2. **Multi-site mode**: Omit `site_name` and `drive_name` from storage_options, specify them in the URL:
|
|
115
|
+
```python
|
|
116
|
+
storage_options = {
|
|
117
|
+
'client_id': CLIENT_ID,
|
|
118
|
+
'tenant_id': TENANT_ID,
|
|
119
|
+
'client_secret': CLIENT_SECRET
|
|
120
|
+
}
|
|
121
|
+
df = pd.read_csv('msgd://YourSite/Documents/folder/file.csv', storage_options=storage_options)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
3. **Direct drive access**: Use `drive_id` to bypass site discovery:
|
|
125
|
+
```python
|
|
126
|
+
storage_options = {
|
|
127
|
+
'client_id': CLIENT_ID,
|
|
128
|
+
'tenant_id': TENANT_ID,
|
|
129
|
+
'client_secret': CLIENT_SECRET,
|
|
130
|
+
'drive_id': 'specific-drive-id'
|
|
131
|
+
}
|
|
132
|
+
df = pd.read_csv('msgd://folder/file.csv', storage_options=storage_options)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Advanced features
|
|
136
|
+
|
|
137
|
+
#### File operations with metadata
|
|
138
|
+
```python
|
|
139
|
+
import fsspec
|
|
140
|
+
|
|
141
|
+
fs = fsspec.filesystem('msgd', **storage_options)
|
|
142
|
+
|
|
143
|
+
# List files with detailed metadata
|
|
144
|
+
files = fs.ls('/folder', detail=True)
|
|
145
|
+
|
|
146
|
+
# Get file information with permissions
|
|
147
|
+
info = fs.info('/document.pdf', expand='permissions')
|
|
148
|
+
|
|
149
|
+
# Read file with version control
|
|
150
|
+
with fs.open('/document.docx', mode='r') as f:
|
|
151
|
+
content = f.read()
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### Permission management
|
|
155
|
+
```python
|
|
156
|
+
# Get detailed permissions for files and folders
|
|
157
|
+
permissions = fs.get_permissions('/sensitive-folder')
|
|
158
|
+
print(f"Total permissions: {permissions['summary']['total_permissions']}")
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
#### Integration with data processing libraries
|
|
162
|
+
```python
|
|
163
|
+
import dask.dataframe as dd
|
|
164
|
+
|
|
165
|
+
# Read multiple CSV files using Dask
|
|
166
|
+
ddf = dd.read_csv('msgd://YourSite/Data/*.csv', storage_options=storage_options)
|
|
167
|
+
|
|
168
|
+
# Read Parquet files
|
|
169
|
+
ddf = dd.read_parquet('sharepoint://Reports/Analytics/data.parquet', storage_options=storage_options)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Azure AD Setup
|
|
173
|
+
|
|
174
|
+
To use this filesystem, you need to register an Azure AD application:
|
|
175
|
+
|
|
176
|
+
1. Go to the [Azure Portal](https://portal.azure.com)
|
|
177
|
+
2. Register a new application under "Azure Active Directory" > "App registrations"
|
|
178
|
+
3. Configure API permissions (Application permissions). Choose based on your needs:
|
|
179
|
+
- For read-only access: `Sites.Read.All`
|
|
180
|
+
- For read-write access: `Sites.ReadWrite.All`
|
|
181
|
+
- Optional for enhanced functionality: `Files.Read.All` or `Files.ReadWrite.All`
|
|
182
|
+
4. Grant admin consent for your organization
|
|
183
|
+
5. Create a client secret
|
|
184
|
+
6. Note the Application (client) ID, Directory (tenant) ID, and client secret
|
|
185
|
+
|
|
186
|
+
The filesystem uses the OAuth2 client credentials flow with the default scope (`https://graph.microsoft.com/.default`), which automatically includes all application permissions granted to your Azure AD application.
|
|
@@ -19,7 +19,7 @@ dependencies = [
|
|
|
19
19
|
"httpx[http2]",
|
|
20
20
|
"authlib",
|
|
21
21
|
]
|
|
22
|
-
requires-python = ">=3.
|
|
22
|
+
requires-python = ">=3.10"
|
|
23
23
|
|
|
24
24
|
[project.optional-dependencies]
|
|
25
25
|
test = [
|
|
@@ -88,3 +88,16 @@ filename = "HISTORY.md"
|
|
|
88
88
|
directory = "news"
|
|
89
89
|
issue_format = "`#[{issue}](https://acsone.plan.io/issues/{issue}>`_"
|
|
90
90
|
title_format = "{version} ({project_date})"
|
|
91
|
+
|
|
92
|
+
[dependency-groups]
|
|
93
|
+
dev = [
|
|
94
|
+
"pytest>=8.4.2",
|
|
95
|
+
"ruff",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
[tool.pytest.ini_options]
|
|
99
|
+
markers = [
|
|
100
|
+
"live: marks tests as requiring live credentials (deselect with '-m \"not live\"')",
|
|
101
|
+
"credentials: marks tests as requiring credentials (deselect with '-m \"not credentials\"')",
|
|
102
|
+
]
|
|
103
|
+
addopts = "-v"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fsspec
|
|
2
|
+
|
|
3
|
+
from .core import (
|
|
4
|
+
MSGDriveFS,
|
|
5
|
+
MSGraphBufferedFile,
|
|
6
|
+
MSGraphStreamedFile,
|
|
7
|
+
parse_msgraph_url,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
# Register MSGDriveFS for all supported protocols
|
|
11
|
+
# Use clobber=True to allow re-registration
|
|
12
|
+
fsspec.register_implementation("msgd", MSGDriveFS, clobber=True)
|
|
13
|
+
fsspec.register_implementation("sharepoint", MSGDriveFS, clobber=True)
|
|
14
|
+
fsspec.register_implementation("onedrive", MSGDriveFS, clobber=True)
|