dagster-sharepoint 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,224 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-sharepoint
3
+ Version: 0.0.1
4
+ Summary: Dagster integration for SharePoint document management using Microsoft Graph API
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: dagster>=1.8.0
8
+ Requires-Dist: requests>=2.31.0
9
+
10
+ # dagster-sharepoint
11
+
12
+ A Dagster integration for interacting with SharePoint document libraries using the Microsoft Graph API. This integration provides a Dagster resource that enables file operations, folder management, and data extraction from SharePoint.
13
+
14
+ ## Features
15
+
16
+ - **Authentication**: Secure authentication using Azure AD client credentials
17
+ - **File Operations**: Upload, download, delete, move, and rename files
18
+ - **Folder Management**: Create folders, list contents, and navigate folder structures
19
+ - **Search**: Search for files across SharePoint document libraries
20
+ - **Batch Operations**: List newly created files, filter by extension, recursive operations
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install dagster-sharepoint
26
+ ```
27
+
28
+ ## Prerequisites
29
+
30
+ Before using this integration, you need to set up Azure AD authentication:
31
+
32
+ 1. Register an application in Azure AD
33
+ 2. Grant the application appropriate SharePoint permissions (e.g., `Sites.ReadWrite.All`)
34
+ 3. Create a client secret for the application
35
+ 4. Note down:
36
+ - Tenant ID
37
+ - Client ID (Application ID)
38
+ - Client Secret
39
+ - SharePoint Site ID
40
+
41
+ ## Usage
42
+
43
+ ### Basic Setup
44
+
45
+ ```python
46
+ import dagster as dg
47
+ from dagster_sharepoint import SharePointResource
48
+ import os
49
+
50
+ # Configure the resource
51
+ defs = dg.Definitions(
52
+ resources={
53
+ "sharepoint": SharePointResource(
54
+ site_id=os.getenv("SHAREPOINT_SITE_ID"),
55
+ tenant_id=os.getenv("AZURE_TENANT_ID"),
56
+ client_id=os.getenv("AZURE_CLIENT_ID"),
57
+ client_secret=os.getenv("AZURE_CLIENT_SECRET")
58
+ )
59
+ }
60
+ )
61
+ ```
62
+
63
+ ### File Operations
64
+
65
+ ```python
66
+ import dagster as dg
67
+ from dagster_sharepoint import SharePointResource
68
+
69
+
70
+ @dg.asset
71
+ def sharepoint_file_operations(sharepoint: SharePointResource):
72
+ # Upload a file
73
+ with open("local_report.xlsx", "rb") as f:
74
+ result = sharepoint.upload_file(
75
+ file_name="report_2024.xlsx",
76
+ content=f,
77
+ folder_path="Documents/Reports/2024"
78
+ )
79
+
80
+ if result.success:
81
+ print(f"Uploaded: {result.file_info.name}")
82
+
83
+ # Download a file
84
+ content = sharepoint.download_file_by_path("Documents/Reports/report.xlsx")
85
+
86
+ # Move a file
87
+ moved_file = sharepoint.move_file_by_path(
88
+ source_file_path="Documents/Temp/draft.docx",
89
+ destination_folder_path="Documents/Final",
90
+ new_name="final_report.docx"
91
+ )
92
+
93
+ # Delete a file
94
+ sharepoint.delete_file_by_path("Documents/Temp/old_file.xlsx")
95
+ ```
96
+ ### Folder Operations
97
+
98
+ ```python
99
+ import dagster as dg
100
+ from dagster_sharepoint import SharePointResource
101
+
102
+
103
+ @dg.asset
104
+ def manage_folders(sharepoint: SharePointResource):
105
+ # Create a new folder
106
+ new_folder = sharepoint.create_folder(
107
+ folder_name="2024_Q4",
108
+ parent_path="Documents/Reports"
109
+ )
110
+
111
+ # List all folders recursively
112
+ folders = sharepoint.list_folders(
113
+ folder_path="Documents",
114
+ recursive=True
115
+ )
116
+
117
+ for folder in folders:
118
+ print(f"Folder: {folder.name} (contains {folder.child_count} items)")
119
+ ```
120
+
121
+ ### Sensor and Asset Pattern
122
+
123
+ ```python
124
+ import dagster as dg
125
+ from datetime import datetime, timedelta
126
+ from dagster_sharepoint import SharePointResource, FileInfoConfig
127
+
128
+
129
+ @dg.asset
130
+ def my_asset(context: dg.AssetExecutionContext, sharepoint: SharePointResource, config: FileInfoConfig):
131
+ """
132
+ Example dg.asset that processes SharePoint files.
133
+
134
+ This would be triggered by the sharepoint_new_files.
135
+ """
136
+ context.log.info(f"Processing file from SharePoint {config}")
137
+ contents = sharepoint.download_file(config.id)
138
+ context.log.info(f"Downloaded file {config.parent_path}/{config.name}")
139
+
140
+ # Process file contents...
141
+
142
+ return contents
143
+
144
+
145
+ @dg.sensor(
146
+ name="sharepoint_new_files",
147
+ minimum_interval_seconds=600,
148
+ target=[my_asset],
149
+ )
150
+ def sharepoint_new_files(
151
+ context: dg.SensorEvaluationContext,
152
+ sharepoint: SharePointResource,
153
+ ) -> dg.SensorResult:
154
+ """
155
+ Sensor that checks for new or created files in SharePoint.
156
+
157
+ This dg.sensor:
158
+ 1. Checks a configured SharePoint folder for files created since the last run
159
+ 2. Triggers runs for each new file found
160
+ 3. Stores the last check timestamp in cursor storage
161
+ """
162
+
163
+
164
+ last_check = datetime.fromisoformat(context.cursor) if context.cursor else datetime.now() - timedelta(weeks=999)
165
+ current_check = datetime.now()
166
+
167
+ try:
168
+ newly_created_files = sharepoint.list_newly_created_files(
169
+ since_timestamp=last_check,
170
+ file_name_glob_pattern="*/my/file/pattern*.csv",
171
+ recursive=True,
172
+ )
173
+ if not newly_created_files:
174
+ return dg.SkipReason(f"No new files found since {last_check.isoformat()}")
175
+
176
+ return dg.SensorResult(
177
+ run_requests=[
178
+ dg.RunRequest(
179
+ asset_selection=[my_asset.key],
180
+ run_key=file.id,
181
+ run_config=dg.RunConfig(
182
+ ops={my_asset.key.to_python_identifier(): {"config": file.to_config_dict()}}
183
+ ),
184
+ )
185
+ for file in newly_created_files
186
+ ],
187
+ cursor=current_check.isoformat(),
188
+ )
189
+
190
+ except Exception as e:
191
+ context.log.error(f"Error checking SharePoint: {str(e)}")
192
+ return dg.Failure(f"Error checking SharePoint: {str(e)}")
193
+ ```
194
+
195
+ ## Testing
196
+
197
+ ```bash
198
+ # Run tests
199
+ make test
200
+
201
+ # Run linting and formatting
202
+ make ruff
203
+
204
+ # Run type checking
205
+ make check
206
+ ```
207
+
208
+ ## Development
209
+
210
+ ```bash
211
+ # Install development dependencies
212
+ make install
213
+
214
+ # Build the package
215
+ make build
216
+ ```
217
+
218
+ ## License
219
+
220
+ See LICENSE file in the repository.
221
+
222
+ ## Contributing
223
+
224
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,215 @@
1
+ # dagster-sharepoint
2
+
3
+ A Dagster integration for interacting with SharePoint document libraries using the Microsoft Graph API. This integration provides a Dagster resource that enables file operations, folder management, and data extraction from SharePoint.
4
+
5
+ ## Features
6
+
7
+ - **Authentication**: Secure authentication using Azure AD client credentials
8
+ - **File Operations**: Upload, download, delete, move, and rename files
9
+ - **Folder Management**: Create folders, list contents, and navigate folder structures
10
+ - **Search**: Search for files across SharePoint document libraries
11
+ - **Batch Operations**: List newly created files, filter by extension, recursive operations
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install dagster-sharepoint
17
+ ```
18
+
19
+ ## Prerequisites
20
+
21
+ Before using this integration, you need to set up Azure AD authentication:
22
+
23
+ 1. Register an application in Azure AD
24
+ 2. Grant the application appropriate SharePoint permissions (e.g., `Sites.ReadWrite.All`)
25
+ 3. Create a client secret for the application
26
+ 4. Note down:
27
+ - Tenant ID
28
+ - Client ID (Application ID)
29
+ - Client Secret
30
+ - SharePoint Site ID
31
+
32
+ ## Usage
33
+
34
+ ### Basic Setup
35
+
36
+ ```python
37
+ import dagster as dg
38
+ from dagster_sharepoint import SharePointResource
39
+ import os
40
+
41
+ # Configure the resource
42
+ defs = dg.Definitions(
43
+ resources={
44
+ "sharepoint": SharePointResource(
45
+ site_id=os.getenv("SHAREPOINT_SITE_ID"),
46
+ tenant_id=os.getenv("AZURE_TENANT_ID"),
47
+ client_id=os.getenv("AZURE_CLIENT_ID"),
48
+ client_secret=os.getenv("AZURE_CLIENT_SECRET")
49
+ )
50
+ }
51
+ )
52
+ ```
53
+
54
+ ### File Operations
55
+
56
+ ```python
57
+ import dagster as dg
58
+ from dagster_sharepoint import SharePointResource
59
+
60
+
61
+ @dg.asset
62
+ def sharepoint_file_operations(sharepoint: SharePointResource):
63
+ # Upload a file
64
+ with open("local_report.xlsx", "rb") as f:
65
+ result = sharepoint.upload_file(
66
+ file_name="report_2024.xlsx",
67
+ content=f,
68
+ folder_path="Documents/Reports/2024"
69
+ )
70
+
71
+ if result.success:
72
+ print(f"Uploaded: {result.file_info.name}")
73
+
74
+ # Download a file
75
+ content = sharepoint.download_file_by_path("Documents/Reports/report.xlsx")
76
+
77
+ # Move a file
78
+ moved_file = sharepoint.move_file_by_path(
79
+ source_file_path="Documents/Temp/draft.docx",
80
+ destination_folder_path="Documents/Final",
81
+ new_name="final_report.docx"
82
+ )
83
+
84
+ # Delete a file
85
+ sharepoint.delete_file_by_path("Documents/Temp/old_file.xlsx")
86
+ ```
87
+ ### Folder Operations
88
+
89
+ ```python
90
+ import dagster as dg
91
+ from dagster_sharepoint import SharePointResource
92
+
93
+
94
+ @dg.asset
95
+ def manage_folders(sharepoint: SharePointResource):
96
+ # Create a new folder
97
+ new_folder = sharepoint.create_folder(
98
+ folder_name="2024_Q4",
99
+ parent_path="Documents/Reports"
100
+ )
101
+
102
+ # List all folders recursively
103
+ folders = sharepoint.list_folders(
104
+ folder_path="Documents",
105
+ recursive=True
106
+ )
107
+
108
+ for folder in folders:
109
+ print(f"Folder: {folder.name} (contains {folder.child_count} items)")
110
+ ```
111
+
112
+ ### Sensor and Asset Pattern
113
+
114
+ ```python
115
+ import dagster as dg
116
+ from datetime import datetime, timedelta
117
+ from dagster_sharepoint import SharePointResource, FileInfoConfig
118
+
119
+
120
+ @dg.asset
121
+ def my_asset(context: dg.AssetExecutionContext, sharepoint: SharePointResource, config: FileInfoConfig):
122
+ """
123
+ Example dg.asset that processes SharePoint files.
124
+
125
+ This would be triggered by the sharepoint_new_files.
126
+ """
127
+ context.log.info(f"Processing file from SharePoint {config}")
128
+ contents = sharepoint.download_file(config.id)
129
+ context.log.info(f"Downloaded file {config.parent_path}/{config.name}")
130
+
131
+ # Process file contents...
132
+
133
+ return contents
134
+
135
+
136
+ @dg.sensor(
137
+ name="sharepoint_new_files",
138
+ minimum_interval_seconds=600,
139
+ target=[my_asset],
140
+ )
141
+ def sharepoint_new_files(
142
+ context: dg.SensorEvaluationContext,
143
+ sharepoint: SharePointResource,
144
+ ) -> dg.SensorResult:
145
+ """
146
+ Sensor that checks for new or created files in SharePoint.
147
+
148
+ This dg.sensor:
149
+ 1. Checks a configured SharePoint folder for files created since the last run
150
+ 2. Triggers runs for each new file found
151
+ 3. Stores the last check timestamp in cursor storage
152
+ """
153
+
154
+
155
+ last_check = datetime.fromisoformat(context.cursor) if context.cursor else datetime.now() - timedelta(weeks=999)
156
+ current_check = datetime.now()
157
+
158
+ try:
159
+ newly_created_files = sharepoint.list_newly_created_files(
160
+ since_timestamp=last_check,
161
+ file_name_glob_pattern="*/my/file/pattern*.csv",
162
+ recursive=True,
163
+ )
164
+ if not newly_created_files:
165
+ return dg.SkipReason(f"No new files found since {last_check.isoformat()}")
166
+
167
+ return dg.SensorResult(
168
+ run_requests=[
169
+ dg.RunRequest(
170
+ asset_selection=[my_asset.key],
171
+ run_key=file.id,
172
+ run_config=dg.RunConfig(
173
+ ops={my_asset.key.to_python_identifier(): {"config": file.to_config_dict()}}
174
+ ),
175
+ )
176
+ for file in newly_created_files
177
+ ],
178
+ cursor=current_check.isoformat(),
179
+ )
180
+
181
+ except Exception as e:
182
+ context.log.error(f"Error checking SharePoint: {str(e)}")
183
+ return dg.Failure(f"Error checking SharePoint: {str(e)}")
184
+ ```
185
+
186
+ ## Testing
187
+
188
+ ```bash
189
+ # Run tests
190
+ make test
191
+
192
+ # Run linting and formatting
193
+ make ruff
194
+
195
+ # Run type checking
196
+ make check
197
+ ```
198
+
199
+ ## Development
200
+
201
+ ```bash
202
+ # Install development dependencies
203
+ make install
204
+
205
+ # Build the package
206
+ make build
207
+ ```
208
+
209
+ ## License
210
+
211
+ See LICENSE file in the repository.
212
+
213
+ ## Contributing
214
+
215
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,16 @@
1
+ from dagster._core.libraries import DagsterLibraryRegistry
2
+
3
+ from dagster_sharepoint.resource import (
4
+ SharePointResource as SharePointResource,
5
+ FileInfoConfig as FileInfoConfig,
6
+ FileInfo as FileInfo,
7
+ FolderInfo as FolderInfo,
8
+ DriveInfo as DriveInfo,
9
+ UploadResult as UploadResult,
10
+ )
11
+
12
+ __version__ = "0.0.1"
13
+
14
+ DagsterLibraryRegistry.register(
15
+ "dagster-sharepoint", __version__, is_dagster_package=False
16
+ )