dagster-sharepoint 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_sharepoint-0.0.1/PKG-INFO +224 -0
- dagster_sharepoint-0.0.1/README.md +215 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint/__init__.py +16 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint/resource.py +1052 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint.egg-info/PKG-INFO +224 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint.egg-info/SOURCES.txt +9 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint.egg-info/dependency_links.txt +1 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint.egg-info/requires.txt +2 -0
- dagster_sharepoint-0.0.1/dagster_sharepoint.egg-info/top_level.txt +1 -0
- dagster_sharepoint-0.0.1/pyproject.toml +29 -0
- dagster_sharepoint-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-sharepoint
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Dagster integration for SharePoint document management using Microsoft Graph API
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: dagster>=1.8.0
|
|
8
|
+
Requires-Dist: requests>=2.31.0
|
|
9
|
+
|
|
10
|
+
# dagster-sharepoint
|
|
11
|
+
|
|
12
|
+
A Dagster integration for interacting with SharePoint document libraries using the Microsoft Graph API. This integration provides a Dagster resource that enables file operations, folder management, and data extraction from SharePoint.
|
|
13
|
+
|
|
14
|
+
## Features
|
|
15
|
+
|
|
16
|
+
- **Authentication**: Secure authentication using Azure AD client credentials
|
|
17
|
+
- **File Operations**: Upload, download, delete, move, and rename files
|
|
18
|
+
- **Folder Management**: Create folders, list contents, and navigate folder structures
|
|
19
|
+
- **Search**: Search for files across SharePoint document libraries
|
|
20
|
+
- **Batch Operations**: List newly created files, filter by extension, recursive operations
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install dagster-sharepoint
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Prerequisites
|
|
29
|
+
|
|
30
|
+
Before using this integration, you need to set up Azure AD authentication:
|
|
31
|
+
|
|
32
|
+
1. Register an application in Azure AD
|
|
33
|
+
2. Grant the application appropriate SharePoint permissions (e.g., `Sites.ReadWrite.All`)
|
|
34
|
+
3. Create a client secret for the application
|
|
35
|
+
4. Note down:
|
|
36
|
+
- Tenant ID
|
|
37
|
+
- Client ID (Application ID)
|
|
38
|
+
- Client Secret
|
|
39
|
+
- SharePoint Site ID
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
|
|
43
|
+
### Basic Setup
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import dagster as dg
|
|
47
|
+
from dagster_sharepoint import SharePointResource
|
|
48
|
+
import os
|
|
49
|
+
|
|
50
|
+
# Configure the resource
|
|
51
|
+
defs = dg.Definitions(
|
|
52
|
+
resources={
|
|
53
|
+
"sharepoint": SharePointResource(
|
|
54
|
+
site_id=os.getenv("SHAREPOINT_SITE_ID"),
|
|
55
|
+
tenant_id=os.getenv("AZURE_TENANT_ID"),
|
|
56
|
+
client_id=os.getenv("AZURE_CLIENT_ID"),
|
|
57
|
+
client_secret=os.getenv("AZURE_CLIENT_SECRET")
|
|
58
|
+
)
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### File Operations
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import dagster as dg
|
|
67
|
+
from dagster_sharepoint import SharePointResource
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dg.asset
|
|
71
|
+
def sharepoint_file_operations(sharepoint: SharePointResource):
|
|
72
|
+
# Upload a file
|
|
73
|
+
with open("local_report.xlsx", "rb") as f:
|
|
74
|
+
result = sharepoint.upload_file(
|
|
75
|
+
file_name="report_2024.xlsx",
|
|
76
|
+
content=f,
|
|
77
|
+
folder_path="Documents/Reports/2024"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if result.success:
|
|
81
|
+
print(f"Uploaded: {result.file_info.name}")
|
|
82
|
+
|
|
83
|
+
# Download a file
|
|
84
|
+
content = sharepoint.download_file_by_path("Documents/Reports/report.xlsx")
|
|
85
|
+
|
|
86
|
+
# Move a file
|
|
87
|
+
moved_file = sharepoint.move_file_by_path(
|
|
88
|
+
source_file_path="Documents/Temp/draft.docx",
|
|
89
|
+
destination_folder_path="Documents/Final",
|
|
90
|
+
new_name="final_report.docx"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Delete a file
|
|
94
|
+
sharepoint.delete_file_by_path("Documents/Temp/old_file.xlsx")
|
|
95
|
+
```
|
|
96
|
+
### Folder Operations
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import dagster as dg
|
|
100
|
+
from dagster_sharepoint import SharePointResource
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dg.asset
|
|
104
|
+
def manage_folders(sharepoint: SharePointResource):
|
|
105
|
+
# Create a new folder
|
|
106
|
+
new_folder = sharepoint.create_folder(
|
|
107
|
+
folder_name="2024_Q4",
|
|
108
|
+
parent_path="Documents/Reports"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# List all folders recursively
|
|
112
|
+
folders = sharepoint.list_folders(
|
|
113
|
+
folder_path="Documents",
|
|
114
|
+
recursive=True
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for folder in folders:
|
|
118
|
+
print(f"Folder: {folder.name} (contains {folder.child_count} items)")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Sensor and Asset Pattern
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
import dagster as dg
|
|
125
|
+
from datetime import datetime, timedelta
|
|
126
|
+
from dagster_sharepoint import SharePointResource, FileInfoConfig
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dg.asset
|
|
130
|
+
def my_asset(context: dg.AssetExecutionContext, sharepoint: SharePointResource, config: FileInfoConfig):
|
|
131
|
+
"""
|
|
132
|
+
Example dg.asset that processes SharePoint files.
|
|
133
|
+
|
|
134
|
+
This would be triggered by the sharepoint_new_files.
|
|
135
|
+
"""
|
|
136
|
+
context.log.info(f"Processing file from SharePoint {config}")
|
|
137
|
+
contents = sharepoint.download_file(config.id)
|
|
138
|
+
context.log.info(f"Downloaded file {config.parent_path}/{config.name}")
|
|
139
|
+
|
|
140
|
+
# Process file contents...
|
|
141
|
+
|
|
142
|
+
return contents
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dg.sensor(
|
|
146
|
+
name="sharepoint_new_files",
|
|
147
|
+
minimum_interval_seconds=600,
|
|
148
|
+
target=[my_asset],
|
|
149
|
+
)
|
|
150
|
+
def sharepoint_new_files(
|
|
151
|
+
context: dg.SensorEvaluationContext,
|
|
152
|
+
sharepoint: SharePointResource,
|
|
153
|
+
) -> dg.SensorResult:
|
|
154
|
+
"""
|
|
155
|
+
Sensor that checks for new or created files in SharePoint.
|
|
156
|
+
|
|
157
|
+
This dg.sensor:
|
|
158
|
+
1. Checks a configured SharePoint folder for files created since the last run
|
|
159
|
+
2. Triggers runs for each new file found
|
|
160
|
+
3. Stores the last check timestamp in cursor storage
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
last_check = datetime.fromisoformat(context.cursor) if context.cursor else datetime.now() - timedelta(weeks=999)
|
|
165
|
+
current_check = datetime.now()
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
newly_created_files = sharepoint.list_newly_created_files(
|
|
169
|
+
since_timestamp=last_check,
|
|
170
|
+
file_name_glob_pattern="*/my/file/pattern*.csv",
|
|
171
|
+
recursive=True,
|
|
172
|
+
)
|
|
173
|
+
if not newly_created_files:
|
|
174
|
+
return dg.SkipReason(f"No new files found since {last_check.isoformat()}")
|
|
175
|
+
|
|
176
|
+
return dg.SensorResult(
|
|
177
|
+
run_requests=[
|
|
178
|
+
dg.RunRequest(
|
|
179
|
+
asset_selection=[my_asset.key],
|
|
180
|
+
run_key=file.id,
|
|
181
|
+
run_config=dg.RunConfig(
|
|
182
|
+
ops={my_asset.key.to_python_identifier(): {"config": file.to_config_dict()}}
|
|
183
|
+
),
|
|
184
|
+
)
|
|
185
|
+
for file in newly_created_files
|
|
186
|
+
],
|
|
187
|
+
cursor=current_check.isoformat(),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
except Exception as e:
|
|
191
|
+
context.log.error(f"Error checking SharePoint: {str(e)}")
|
|
192
|
+
return dg.Failure(f"Error checking SharePoint: {str(e)}")
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Testing
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Run tests
|
|
199
|
+
make test
|
|
200
|
+
|
|
201
|
+
# Run linting and formatting
|
|
202
|
+
make ruff
|
|
203
|
+
|
|
204
|
+
# Run type checking
|
|
205
|
+
make check
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Development
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
# Install development dependencies
|
|
212
|
+
make install
|
|
213
|
+
|
|
214
|
+
# Build the package
|
|
215
|
+
make build
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## License
|
|
219
|
+
|
|
220
|
+
See LICENSE file in the repository.
|
|
221
|
+
|
|
222
|
+
## Contributing
|
|
223
|
+
|
|
224
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# dagster-sharepoint
|
|
2
|
+
|
|
3
|
+
A Dagster integration for interacting with SharePoint document libraries using the Microsoft Graph API. This integration provides a Dagster resource that enables file operations, folder management, and data extraction from SharePoint.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Authentication**: Secure authentication using Azure AD client credentials
|
|
8
|
+
- **File Operations**: Upload, download, delete, move, and rename files
|
|
9
|
+
- **Folder Management**: Create folders, list contents, and navigate folder structures
|
|
10
|
+
- **Search**: Search for files across SharePoint document libraries
|
|
11
|
+
- **Batch Operations**: List newly created files, filter by extension, recursive operations
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install dagster-sharepoint
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Prerequisites
|
|
20
|
+
|
|
21
|
+
Before using this integration, you need to set up Azure AD authentication:
|
|
22
|
+
|
|
23
|
+
1. Register an application in Azure AD
|
|
24
|
+
2. Grant the application appropriate SharePoint permissions (e.g., `Sites.ReadWrite.All`)
|
|
25
|
+
3. Create a client secret for the application
|
|
26
|
+
4. Note down:
|
|
27
|
+
- Tenant ID
|
|
28
|
+
- Client ID (Application ID)
|
|
29
|
+
- Client Secret
|
|
30
|
+
- SharePoint Site ID
|
|
31
|
+
|
|
32
|
+
## Usage
|
|
33
|
+
|
|
34
|
+
### Basic Setup
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import dagster as dg
|
|
38
|
+
from dagster_sharepoint import SharePointResource
|
|
39
|
+
import os
|
|
40
|
+
|
|
41
|
+
# Configure the resource
|
|
42
|
+
defs = dg.Definitions(
|
|
43
|
+
resources={
|
|
44
|
+
"sharepoint": SharePointResource(
|
|
45
|
+
site_id=os.getenv("SHAREPOINT_SITE_ID"),
|
|
46
|
+
tenant_id=os.getenv("AZURE_TENANT_ID"),
|
|
47
|
+
client_id=os.getenv("AZURE_CLIENT_ID"),
|
|
48
|
+
client_secret=os.getenv("AZURE_CLIENT_SECRET")
|
|
49
|
+
)
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### File Operations
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import dagster as dg
|
|
58
|
+
from dagster_sharepoint import SharePointResource
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dg.asset
|
|
62
|
+
def sharepoint_file_operations(sharepoint: SharePointResource):
|
|
63
|
+
# Upload a file
|
|
64
|
+
with open("local_report.xlsx", "rb") as f:
|
|
65
|
+
result = sharepoint.upload_file(
|
|
66
|
+
file_name="report_2024.xlsx",
|
|
67
|
+
content=f,
|
|
68
|
+
folder_path="Documents/Reports/2024"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if result.success:
|
|
72
|
+
print(f"Uploaded: {result.file_info.name}")
|
|
73
|
+
|
|
74
|
+
# Download a file
|
|
75
|
+
content = sharepoint.download_file_by_path("Documents/Reports/report.xlsx")
|
|
76
|
+
|
|
77
|
+
# Move a file
|
|
78
|
+
moved_file = sharepoint.move_file_by_path(
|
|
79
|
+
source_file_path="Documents/Temp/draft.docx",
|
|
80
|
+
destination_folder_path="Documents/Final",
|
|
81
|
+
new_name="final_report.docx"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Delete a file
|
|
85
|
+
sharepoint.delete_file_by_path("Documents/Temp/old_file.xlsx")
|
|
86
|
+
```
|
|
87
|
+
### Folder Operations
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
import dagster as dg
|
|
91
|
+
from dagster_sharepoint import SharePointResource
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dg.asset
|
|
95
|
+
def manage_folders(sharepoint: SharePointResource):
|
|
96
|
+
# Create a new folder
|
|
97
|
+
new_folder = sharepoint.create_folder(
|
|
98
|
+
folder_name="2024_Q4",
|
|
99
|
+
parent_path="Documents/Reports"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# List all folders recursively
|
|
103
|
+
folders = sharepoint.list_folders(
|
|
104
|
+
folder_path="Documents",
|
|
105
|
+
recursive=True
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
for folder in folders:
|
|
109
|
+
print(f"Folder: {folder.name} (contains {folder.child_count} items)")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Sensor and Asset Pattern
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import dagster as dg
|
|
116
|
+
from datetime import datetime, timedelta
|
|
117
|
+
from dagster_sharepoint import SharePointResource, FileInfoConfig
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dg.asset
|
|
121
|
+
def my_asset(context: dg.AssetExecutionContext, sharepoint: SharePointResource, config: FileInfoConfig):
|
|
122
|
+
"""
|
|
123
|
+
Example dg.asset that processes SharePoint files.
|
|
124
|
+
|
|
125
|
+
This would be triggered by the sharepoint_new_files.
|
|
126
|
+
"""
|
|
127
|
+
context.log.info(f"Processing file from SharePoint {config}")
|
|
128
|
+
contents = sharepoint.download_file(config.id)
|
|
129
|
+
context.log.info(f"Downloaded file {config.parent_path}/{config.name}")
|
|
130
|
+
|
|
131
|
+
# Process file contents...
|
|
132
|
+
|
|
133
|
+
return contents
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dg.sensor(
|
|
137
|
+
name="sharepoint_new_files",
|
|
138
|
+
minimum_interval_seconds=600,
|
|
139
|
+
target=[my_asset],
|
|
140
|
+
)
|
|
141
|
+
def sharepoint_new_files(
|
|
142
|
+
context: dg.SensorEvaluationContext,
|
|
143
|
+
sharepoint: SharePointResource,
|
|
144
|
+
) -> dg.SensorResult:
|
|
145
|
+
"""
|
|
146
|
+
Sensor that checks for new or created files in SharePoint.
|
|
147
|
+
|
|
148
|
+
This dg.sensor:
|
|
149
|
+
1. Checks a configured SharePoint folder for files created since the last run
|
|
150
|
+
2. Triggers runs for each new file found
|
|
151
|
+
3. Stores the last check timestamp in cursor storage
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
last_check = datetime.fromisoformat(context.cursor) if context.cursor else datetime.now() - timedelta(weeks=999)
|
|
156
|
+
current_check = datetime.now()
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
newly_created_files = sharepoint.list_newly_created_files(
|
|
160
|
+
since_timestamp=last_check,
|
|
161
|
+
file_name_glob_pattern="*/my/file/pattern*.csv",
|
|
162
|
+
recursive=True,
|
|
163
|
+
)
|
|
164
|
+
if not newly_created_files:
|
|
165
|
+
return dg.SkipReason(f"No new files found since {last_check.isoformat()}")
|
|
166
|
+
|
|
167
|
+
return dg.SensorResult(
|
|
168
|
+
run_requests=[
|
|
169
|
+
dg.RunRequest(
|
|
170
|
+
asset_selection=[my_asset.key],
|
|
171
|
+
run_key=file.id,
|
|
172
|
+
run_config=dg.RunConfig(
|
|
173
|
+
ops={my_asset.key.to_python_identifier(): {"config": file.to_config_dict()}}
|
|
174
|
+
),
|
|
175
|
+
)
|
|
176
|
+
for file in newly_created_files
|
|
177
|
+
],
|
|
178
|
+
cursor=current_check.isoformat(),
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
context.log.error(f"Error checking SharePoint: {str(e)}")
|
|
183
|
+
return dg.Failure(f"Error checking SharePoint: {str(e)}")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Testing
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# Run tests
|
|
190
|
+
make test
|
|
191
|
+
|
|
192
|
+
# Run linting and formatting
|
|
193
|
+
make ruff
|
|
194
|
+
|
|
195
|
+
# Run type checking
|
|
196
|
+
make check
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Development
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Install development dependencies
|
|
203
|
+
make install
|
|
204
|
+
|
|
205
|
+
# Build the package
|
|
206
|
+
make build
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
|
|
211
|
+
See LICENSE file in the repository.
|
|
212
|
+
|
|
213
|
+
## Contributing
|
|
214
|
+
|
|
215
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dagster._core.libraries import DagsterLibraryRegistry
|
|
2
|
+
|
|
3
|
+
from dagster_sharepoint.resource import (
|
|
4
|
+
SharePointResource as SharePointResource,
|
|
5
|
+
FileInfoConfig as FileInfoConfig,
|
|
6
|
+
FileInfo as FileInfo,
|
|
7
|
+
FolderInfo as FolderInfo,
|
|
8
|
+
DriveInfo as DriveInfo,
|
|
9
|
+
UploadResult as UploadResult,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__version__ = "0.0.1"
|
|
13
|
+
|
|
14
|
+
DagsterLibraryRegistry.register(
|
|
15
|
+
"dagster-sharepoint", __version__, is_dagster_package=False
|
|
16
|
+
)
|