sapiopycommons 2024.8.28a313__py3-none-any.whl → 2024.8.28a314__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- sapiopycommons/callbacks/callback_util.py +277 -35
- sapiopycommons/chem/IndigoMolecules.py +1 -0
- sapiopycommons/chem/Molecules.py +1 -0
- sapiopycommons/eln/experiment_report_util.py +214 -0
- sapiopycommons/files/file_bridge.py +16 -10
- sapiopycommons/files/file_bridge_handler.py +318 -0
- sapiopycommons/files/file_util.py +13 -6
- sapiopycommons/files/file_validator.py +71 -0
- sapiopycommons/general/accession_service.py +375 -0
- sapiopycommons/general/custom_report_util.py +199 -27
- sapiopycommons/multimodal/multimodal.py +146 -0
- sapiopycommons/multimodal/multimodal_data.py +487 -0
- sapiopycommons/recordmodel/record_handler.py +278 -45
- sapiopycommons/webhook/webhook_handlers.py +58 -1
- {sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a314.dist-info}/METADATA +4 -2
- {sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a314.dist-info}/RECORD +18 -13
- {sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a314.dist-info}/WHEEL +0 -0
- {sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a314.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,7 +16,8 @@ class FileBridge:
|
|
|
16
16
|
Read a file from FileBridge.
|
|
17
17
|
|
|
18
18
|
:param context: The current webhook context or a user object to send requests from.
|
|
19
|
-
:param bridge_name: The name of the bridge to use.
|
|
19
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
20
|
+
file bridge configurations.
|
|
20
21
|
:param file_path: The path to read the file from.
|
|
21
22
|
:param base64_decode: If true, base64 decode the file. Files are by default base64 encoded when retrieved from
|
|
22
23
|
FileBridge.
|
|
@@ -42,7 +43,8 @@ class FileBridge:
|
|
|
42
43
|
Write a file to FileBridge.
|
|
43
44
|
|
|
44
45
|
:param context: The current webhook context or a user object to send requests from.
|
|
45
|
-
:param bridge_name: The name of the bridge to use.
|
|
46
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
47
|
+
file bridge configurations.
|
|
46
48
|
:param file_path: The path to write the file to. If a file already exists at the given path then the file is
|
|
47
49
|
overwritten.
|
|
48
50
|
:param file_data: A string or bytes of the file to be written.
|
|
@@ -63,9 +65,10 @@ class FileBridge:
|
|
|
63
65
|
List the contents of a FileBridge directory.
|
|
64
66
|
|
|
65
67
|
:param context: The current webhook context or a user object to send requests from.
|
|
66
|
-
:param bridge_name: The name of the bridge to use.
|
|
68
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
69
|
+
file bridge configurations.
|
|
67
70
|
:param file_path: The path to read the directory from.
|
|
68
|
-
:return: A list of
|
|
71
|
+
:return: A list of names of files and folders in the directory.
|
|
69
72
|
"""
|
|
70
73
|
sub_path = '/ext/filebridge/listDirectory'
|
|
71
74
|
params = {
|
|
@@ -77,7 +80,7 @@ class FileBridge:
|
|
|
77
80
|
|
|
78
81
|
response_body: list[str] = response.json()
|
|
79
82
|
path_length = len(f"bridge://{bridge_name}/")
|
|
80
|
-
return [urllib.parse.unquote(value[path_length:]
|
|
83
|
+
return [urllib.parse.unquote(value)[path_length:] for value in response_body]
|
|
81
84
|
|
|
82
85
|
@staticmethod
|
|
83
86
|
def create_directory(context: SapioWebhookContext | SapioUser, bridge_name: str, file_path: str) -> None:
|
|
@@ -85,7 +88,8 @@ class FileBridge:
|
|
|
85
88
|
Create a new directory in FileBridge.
|
|
86
89
|
|
|
87
90
|
:param context: The current webhook context or a user object to send requests from.
|
|
88
|
-
:param bridge_name: The name of the bridge to use.
|
|
91
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
92
|
+
file bridge configurations.
|
|
89
93
|
:param file_path: The path to create the directory at. If a directory already exists at the given path then an
|
|
90
94
|
exception is raised.
|
|
91
95
|
"""
|
|
@@ -103,7 +107,8 @@ class FileBridge:
|
|
|
103
107
|
Delete an existing file in FileBridge.
|
|
104
108
|
|
|
105
109
|
:param context: The current webhook context or a user object to send requests from.
|
|
106
|
-
:param bridge_name: The name of the bridge to use.
|
|
110
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
111
|
+
file bridge configurations.
|
|
107
112
|
:param file_path: The path to the file to delete.
|
|
108
113
|
"""
|
|
109
114
|
sub_path = '/ext/filebridge/deleteFile'
|
|
@@ -111,7 +116,7 @@ class FileBridge:
|
|
|
111
116
|
'Filepath': f"bridge://{bridge_name}/{file_path}"
|
|
112
117
|
}
|
|
113
118
|
user: SapioUser = context if isinstance(context, SapioUser) else context.user
|
|
114
|
-
response = user.
|
|
119
|
+
response = user.delete(sub_path, params=params)
|
|
115
120
|
user.raise_for_status(response)
|
|
116
121
|
|
|
117
122
|
@staticmethod
|
|
@@ -120,7 +125,8 @@ class FileBridge:
|
|
|
120
125
|
Delete an existing directory in FileBridge.
|
|
121
126
|
|
|
122
127
|
:param context: The current webhook context or a user object to send requests from.
|
|
123
|
-
:param bridge_name: The name of the bridge to use.
|
|
128
|
+
:param bridge_name: The name of the bridge to use. This is the "connection name" in the
|
|
129
|
+
file bridge configurations.
|
|
124
130
|
:param file_path: The path to the directory to delete.
|
|
125
131
|
"""
|
|
126
132
|
sub_path = '/ext/filebridge/deleteDirectory'
|
|
@@ -128,5 +134,5 @@ class FileBridge:
|
|
|
128
134
|
'Filepath': f"bridge://{bridge_name}/{file_path}"
|
|
129
135
|
}
|
|
130
136
|
user: SapioUser = context if isinstance(context, SapioUser) else context.user
|
|
131
|
-
response = user.
|
|
137
|
+
response = user.delete(sub_path, params=params)
|
|
132
138
|
user.raise_for_status(response)
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod, ABC
|
|
4
|
+
|
|
5
|
+
from sapiopycommons.files.file_bridge import FileBridge
|
|
6
|
+
from sapiopylib.rest.User import SapioUser
|
|
7
|
+
from sapiopylib.rest.pojo.webhook.WebhookContext import SapioWebhookContext
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileBridgeHandler:
|
|
11
|
+
"""
|
|
12
|
+
The FileBridgeHandler provides caching of the results of file bridge endpoint calls while also containing quality
|
|
13
|
+
of life functions for common file bridge actions.
|
|
14
|
+
"""
|
|
15
|
+
user: SapioUser
|
|
16
|
+
__bridge: str
|
|
17
|
+
__file_cache: dict[str, bytes]
|
|
18
|
+
"""A cache of file paths to file bytes."""
|
|
19
|
+
__files: dict[str, File]
|
|
20
|
+
"""A cache of file paths to File objects."""
|
|
21
|
+
__dir_cache: dict[str, list[str]]
|
|
22
|
+
"""A cache of directory file paths to the names of the files or nested directories within it."""
|
|
23
|
+
__directories: dict[str, Directory]
|
|
24
|
+
"""A cache of directory file paths to Directory objects."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, context: SapioWebhookContext | SapioUser, bridge_name: str):
|
|
27
|
+
"""
|
|
28
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
29
|
+
:param bridge_name: The name of the bridge to communicate with. This is the "connection name" in the
|
|
30
|
+
file bridge configurations.
|
|
31
|
+
"""
|
|
32
|
+
self.user = context if isinstance(context, SapioUser) else context.user
|
|
33
|
+
self.__bridge = bridge_name
|
|
34
|
+
self.__file_cache = {}
|
|
35
|
+
self.__files = {}
|
|
36
|
+
self.__dir_cache = {}
|
|
37
|
+
self.__directories = {}
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def connection_name(self) -> str:
|
|
41
|
+
return self.__bridge
|
|
42
|
+
|
|
43
|
+
def clear_caches(self) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Clear the file and directory caches of this handler.
|
|
46
|
+
"""
|
|
47
|
+
self.__file_cache.clear()
|
|
48
|
+
self.__files.clear()
|
|
49
|
+
self.__dir_cache.clear()
|
|
50
|
+
self.__directories.clear()
|
|
51
|
+
|
|
52
|
+
def read_file(self, file_path: str, base64_decode: bool = True) -> bytes:
|
|
53
|
+
"""
|
|
54
|
+
Read a file from FileBridge. The bytes of the given file will be cached so that any subsequent reads of this
|
|
55
|
+
file will not make an additional webservice call.
|
|
56
|
+
|
|
57
|
+
:param file_path: The path to read the file from.
|
|
58
|
+
:param base64_decode: If true, base64 decode the file. Files are by default base64 encoded when retrieved from
|
|
59
|
+
FileBridge.
|
|
60
|
+
:return: The bytes of the file.
|
|
61
|
+
"""
|
|
62
|
+
if file_path in self.__file_cache:
|
|
63
|
+
return self.__file_cache[file_path]
|
|
64
|
+
file_bytes: bytes = FileBridge.read_file(self.user, self.__bridge, file_path, base64_decode)
|
|
65
|
+
self.__file_cache[file_path] = file_bytes
|
|
66
|
+
return file_bytes
|
|
67
|
+
|
|
68
|
+
def write_file(self, file_path: str, file_data: bytes | str) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Write a file to FileBridge. The bytes of the given file will be cached so that any subsequent reads of this
|
|
71
|
+
file will not make an additional webservice call.
|
|
72
|
+
|
|
73
|
+
:param file_path: The path to write the file to. If a file already exists at the given path then the file is
|
|
74
|
+
overwritten.
|
|
75
|
+
:param file_data: A string or bytes of the file to be written.
|
|
76
|
+
"""
|
|
77
|
+
FileBridge.write_file(self.user, self.__bridge, file_path, file_data)
|
|
78
|
+
self.__file_cache[file_path] = file_data if isinstance(file_data, bytes) else file_data.encode()
|
|
79
|
+
|
|
80
|
+
# Find the directory path to this file and the name of the file. Add the file name to the cached list of
|
|
81
|
+
# files for the directory, assuming we have this directory cached and the file isn't already in it.
|
|
82
|
+
last_slash: int = file_path.rfind("/")
|
|
83
|
+
dir_path: str = file_path[:last_slash]
|
|
84
|
+
file_name: str = file_path[last_slash + 1:]
|
|
85
|
+
if dir_path in self.__dir_cache and file_path not in self.__dir_cache[dir_path]:
|
|
86
|
+
self.__dir_cache[dir_path].append(file_name)
|
|
87
|
+
|
|
88
|
+
def delete_file(self, file_path: str) -> None:
|
|
89
|
+
"""
|
|
90
|
+
Delete an existing file in FileBridge. If this file is in the cache, it will also be deleted from the cache.
|
|
91
|
+
|
|
92
|
+
:param file_path: The path to the file to delete.
|
|
93
|
+
"""
|
|
94
|
+
FileBridge.delete_file(self.user, self.__bridge, file_path)
|
|
95
|
+
if file_path in self.__file_cache:
|
|
96
|
+
self.__file_cache.pop(file_path)
|
|
97
|
+
if file_path in self.__files:
|
|
98
|
+
self.__files.pop(file_path)
|
|
99
|
+
|
|
100
|
+
def list_directory(self, file_path: str) -> list[str]:
|
|
101
|
+
"""
|
|
102
|
+
List the contents of a FileBridge directory. The contents of this directory will be cached so that any
|
|
103
|
+
subsequent lists of this directory will not make an additional webservice call.
|
|
104
|
+
|
|
105
|
+
:param file_path: The path to read the directory from.
|
|
106
|
+
:return: A list of names of files and folders in the directory.
|
|
107
|
+
"""
|
|
108
|
+
if file_path in self.__dir_cache:
|
|
109
|
+
return self.__dir_cache[file_path]
|
|
110
|
+
files: list[str] = FileBridge.list_directory(self.user, self.__bridge, file_path)
|
|
111
|
+
self.__dir_cache[file_path] = files
|
|
112
|
+
return files
|
|
113
|
+
|
|
114
|
+
def create_directory(self, file_path: str) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Create a new directory in FileBridge. This new directory will be added to the cache as empty so that listing
|
|
117
|
+
the same directory does not make an additional webservice call.
|
|
118
|
+
|
|
119
|
+
:param file_path: The path to create the directory at. If a directory already exists at the given path then an
|
|
120
|
+
exception is raised.
|
|
121
|
+
"""
|
|
122
|
+
FileBridge.create_directory(self.user, self.__bridge, file_path)
|
|
123
|
+
# This directory was just created, so we know it's empty.
|
|
124
|
+
self.__dir_cache[file_path] = []
|
|
125
|
+
|
|
126
|
+
def delete_directory(self, file_path: str) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Delete an existing directory in FileBridge. If this directory is in the cache, it will also be deleted
|
|
129
|
+
from the cache.
|
|
130
|
+
|
|
131
|
+
:param file_path: The path to the directory to delete.
|
|
132
|
+
"""
|
|
133
|
+
FileBridge.delete_directory(self.user, self.__bridge, file_path)
|
|
134
|
+
if file_path in self.__dir_cache:
|
|
135
|
+
self.__dir_cache.pop(file_path)
|
|
136
|
+
if file_path in self.__directories:
|
|
137
|
+
self.__directories.pop(file_path)
|
|
138
|
+
|
|
139
|
+
def is_file(self, file_path: str) -> bool:
|
|
140
|
+
"""
|
|
141
|
+
Determine if the given file path points to a file or a directory. This is achieved by trying to call
|
|
142
|
+
list_directory on the given file path. If an exception is thrown, that's because the function was called
|
|
143
|
+
on a file. If no exception is thrown, then we know that this is a directory, and we have now also cached
|
|
144
|
+
the contents of that directory if it wasn't cached already.
|
|
145
|
+
|
|
146
|
+
:param file_path: A file path.
|
|
147
|
+
:return: True if the file path points to a file. False if it points to a directory.
|
|
148
|
+
"""
|
|
149
|
+
try:
|
|
150
|
+
self.list_directory(file_path)
|
|
151
|
+
return False
|
|
152
|
+
except Exception:
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
def move_file(self, move_from: str, move_to: str, old_name: str, new_name: str | None = None) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Move a file from one location to another within File Bridge. This is done be reading the file into memory,
|
|
158
|
+
writing a copy of the file in the new location, then deleting the original file.
|
|
159
|
+
|
|
160
|
+
:param move_from: The path to the current location of the file.
|
|
161
|
+
:param move_to: The path to move the file to.
|
|
162
|
+
:param old_name: The current name of the file.
|
|
163
|
+
:param new_name: The name that the file should have after it is moved. if this is not provided, then the new
|
|
164
|
+
name will be the same as the old name.
|
|
165
|
+
"""
|
|
166
|
+
if not new_name:
|
|
167
|
+
new_name = old_name
|
|
168
|
+
|
|
169
|
+
# Read the file into memory.
|
|
170
|
+
file_bytes: bytes = self.read_file(move_from + "/" + old_name)
|
|
171
|
+
# Write the file into the new location.
|
|
172
|
+
self.write_file(move_to + "/" + new_name, file_bytes)
|
|
173
|
+
# Delete the file from the old location. We do this last in case the write call fails.
|
|
174
|
+
self.delete_file(move_from + "/" + old_name)
|
|
175
|
+
|
|
176
|
+
def get_file_object(self, file_path: str) -> File:
|
|
177
|
+
"""
|
|
178
|
+
Get a File object from a file path. This object can be used to get the contents of the file at this path
|
|
179
|
+
and traverse up the file hierarchy to the directory that the file is contained within.
|
|
180
|
+
|
|
181
|
+
There is no guarantee that this file actually exists within the current file bridge connection when it is
|
|
182
|
+
constructed. If the file doesn't exist, then retrieving its contents will fail.
|
|
183
|
+
|
|
184
|
+
:param file_path: A file path.
|
|
185
|
+
:return: A File object constructed form the given file path.
|
|
186
|
+
"""
|
|
187
|
+
if file_path in self.__files:
|
|
188
|
+
return self.__files[file_path]
|
|
189
|
+
file = File(self, file_path)
|
|
190
|
+
self.__files[file_path] = file
|
|
191
|
+
return file
|
|
192
|
+
|
|
193
|
+
def get_directory_object(self, file_path: str) -> Directory | None:
|
|
194
|
+
"""
|
|
195
|
+
Get a Directory object from a file path. This object can be used to traverse up and down the file hierarchy
|
|
196
|
+
by going up to the parent directory that this directory is contained within or going down to the contents of
|
|
197
|
+
this directory.
|
|
198
|
+
|
|
199
|
+
There is no guarantee that this directory actually exists within the current file bridge connection when it is
|
|
200
|
+
constructed. If the directory doesn't exist, then retrieving its contents will fail.
|
|
201
|
+
|
|
202
|
+
:param file_path: A file path.
|
|
203
|
+
:return: A Directory object constructed form the given file path.
|
|
204
|
+
"""
|
|
205
|
+
if file_path is None:
|
|
206
|
+
return None
|
|
207
|
+
if file_path in self.__directories:
|
|
208
|
+
return self.__directories[file_path]
|
|
209
|
+
directory = Directory(self, file_path)
|
|
210
|
+
self.__directories[file_path] = directory
|
|
211
|
+
return directory
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class FileBridgeObject(ABC):
|
|
215
|
+
"""
|
|
216
|
+
A FileBridgeObject is either a file or a directory that is contained within file bridge. Every object has a
|
|
217
|
+
name and a parent directory that it is contained within (unless the object is located in the bridge root, in
|
|
218
|
+
which case the parent is None). From the name and the parent, a path can be constructed to that object.
|
|
219
|
+
"""
|
|
220
|
+
_handler: FileBridgeHandler
|
|
221
|
+
name: str
|
|
222
|
+
parent: Directory | None
|
|
223
|
+
|
|
224
|
+
def __init__(self, handler: FileBridgeHandler, file_path: str):
|
|
225
|
+
self._handler = handler
|
|
226
|
+
|
|
227
|
+
name, root = split_path(file_path)
|
|
228
|
+
self.name = name
|
|
229
|
+
self.parent = handler.get_directory_object(root)
|
|
230
|
+
|
|
231
|
+
@abstractmethod
|
|
232
|
+
def is_file(self) -> bool:
|
|
233
|
+
"""
|
|
234
|
+
:return: True if this object is a file. False if it is a directory.
|
|
235
|
+
"""
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
def get_path(self) -> str:
|
|
239
|
+
"""
|
|
240
|
+
:return: The file path that leads to this object.
|
|
241
|
+
"""
|
|
242
|
+
if self.parent is None:
|
|
243
|
+
return self.name
|
|
244
|
+
return self.parent.get_path() + "/" + self.name
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class File(FileBridgeObject):
|
|
248
|
+
def __init__(self, handler: FileBridgeHandler, file_path: str):
|
|
249
|
+
"""
|
|
250
|
+
:param handler: A FileBridgeHandler for the connection that this file came from.
|
|
251
|
+
:param file_path: The path to this file.
|
|
252
|
+
"""
|
|
253
|
+
super().__init__(handler, file_path)
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def contents(self) -> bytes:
|
|
257
|
+
"""
|
|
258
|
+
:return: The bytes of this file.
|
|
259
|
+
This pulls from the cache of this object's related FileBridgeHandler.
|
|
260
|
+
"""
|
|
261
|
+
return self._handler.read_file(self.get_path())
|
|
262
|
+
|
|
263
|
+
def is_file(self) -> bool:
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class Directory(FileBridgeObject):
|
|
268
|
+
def __init__(self, handler: FileBridgeHandler, file_path: str):
|
|
269
|
+
"""
|
|
270
|
+
:param handler: A FileBridgeHandler for the connection that this directory came from.
|
|
271
|
+
:param file_path: The path to this directory.
|
|
272
|
+
"""
|
|
273
|
+
super().__init__(handler, file_path)
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def contents(self) -> dict[str, FileBridgeObject]:
|
|
277
|
+
"""
|
|
278
|
+
:return: A dictionary of object names to the objects (Files or Directories) contained within this Directory.
|
|
279
|
+
This pulls from the cache of this object's related FileBridgeHandler.
|
|
280
|
+
"""
|
|
281
|
+
contents: dict[str, FileBridgeObject] = {}
|
|
282
|
+
path: str = self.get_path()
|
|
283
|
+
for name in self._handler.list_directory(path):
|
|
284
|
+
file_path: str = path + "/" + name
|
|
285
|
+
if self._handler.is_file(file_path):
|
|
286
|
+
contents[name] = self._handler.get_file_object(file_path)
|
|
287
|
+
else:
|
|
288
|
+
contents[name] = self._handler.get_directory_object(file_path)
|
|
289
|
+
return contents
|
|
290
|
+
|
|
291
|
+
def is_file(self) -> bool:
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
def get_files(self) -> dict[str, File]:
|
|
295
|
+
"""
|
|
296
|
+
:return: A mapping of file name to File for every file in this Directory.
|
|
297
|
+
This pulls from the cache of this object's related FileBridgeHandler.
|
|
298
|
+
"""
|
|
299
|
+
return {x: y for x, y in self.contents.items() if y.is_file()}
|
|
300
|
+
|
|
301
|
+
def get_directories(self) -> dict[str, Directory]:
|
|
302
|
+
"""
|
|
303
|
+
:return: A mapping of directory name to Directory for every directory in this Directory.
|
|
304
|
+
This pulls from the cache of this object's related FileBridgeHandler.
|
|
305
|
+
"""
|
|
306
|
+
return {x: y for x, y in self.contents.items() if not y.is_file()}
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def split_path(file_path: str) -> (str, str):
|
|
310
|
+
"""
|
|
311
|
+
:param file_path: A file path where directories are separated the "/" characters.
|
|
312
|
+
:return: A tuple of two strings that splits the path on its last slash. The first string is the name of the
|
|
313
|
+
file/directory at the given file path and the second string is the location to that file.
|
|
314
|
+
"""
|
|
315
|
+
last_slash: int = file_path.rfind("/")
|
|
316
|
+
if last_slash == -1:
|
|
317
|
+
return file_path, None
|
|
318
|
+
return file_path[last_slash + 1:], file_path[:last_slash]
|
|
@@ -21,7 +21,7 @@ class FileUtil:
|
|
|
21
21
|
"""
|
|
22
22
|
@staticmethod
|
|
23
23
|
def tokenize_csv(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0,
|
|
24
|
-
seperator: str = ",") -> tuple[list[dict[str, str]], list[list[str]]]:
|
|
24
|
+
seperator: str = ",", *, encoding: str | None = None) -> tuple[list[dict[str, str]], list[list[str]]]:
|
|
25
25
|
"""
|
|
26
26
|
Tokenize a CSV file. The provided file must be uniform. That is, if row 1 has 10 cells, all the rows in the file
|
|
27
27
|
must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
|
|
@@ -34,13 +34,17 @@ class FileUtil:
|
|
|
34
34
|
meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
|
|
35
35
|
is assumed to be the header row.
|
|
36
36
|
:param seperator: The character that separates cells in the table.
|
|
37
|
+
:param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
|
|
38
|
+
contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
|
|
39
|
+
ISO-8859-1 as the encoding.
|
|
37
40
|
:return: The CSV parsed into a list of dicts where each dict is a row, mapping the headers to the cells for
|
|
38
41
|
that row. Also returns a list of each row above the headers (the metadata), parsed into a list of each cell.
|
|
39
42
|
If the header row index is 0 or None, this list will be empty.
|
|
40
43
|
"""
|
|
41
44
|
# Parse the file bytes into two DataFrames. The first is metadata of the file located above the header row,
|
|
42
45
|
# while the second is the body of the file below the header row.
|
|
43
|
-
file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator
|
|
46
|
+
file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator,
|
|
47
|
+
encoding=encoding)
|
|
44
48
|
# Parse the metadata from above the header row index into a list of lists.
|
|
45
49
|
metadata: list[list[str]] = FileUtil.data_frame_to_lists(file_metadata)
|
|
46
50
|
# Parse the data from the file body into a list of dicts.
|
|
@@ -74,8 +78,8 @@ class FileUtil:
|
|
|
74
78
|
return rows, metadata
|
|
75
79
|
|
|
76
80
|
@staticmethod
|
|
77
|
-
def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ","
|
|
78
|
-
|
|
81
|
+
def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",",
|
|
82
|
+
*, encoding: str | None = None) -> tuple[DataFrame, DataFrame | None]:
|
|
79
83
|
"""
|
|
80
84
|
Parse the file bytes for a CSV into DataFrames. The provided file must be uniform. That is, if row 1 has 10
|
|
81
85
|
cells, all the rows in the file must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
|
|
@@ -86,6 +90,9 @@ class FileUtil:
|
|
|
86
90
|
meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
|
|
87
91
|
is assumed to be the header row.
|
|
88
92
|
:param seperator: The character that separates cells in the table.
|
|
93
|
+
:param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
|
|
94
|
+
contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
|
|
95
|
+
ISO-8859-1 as the encoding.
|
|
89
96
|
:return: A tuple of two DataFrames. The first is the frame for the CSV table body, while the second is for the
|
|
90
97
|
metadata from above the header row, or None if there is no metadata.
|
|
91
98
|
"""
|
|
@@ -97,13 +104,13 @@ class FileUtil:
|
|
|
97
104
|
# can throw off the header row index.
|
|
98
105
|
file_metadata = pandas.read_csv(file_io, header=None, dtype=dtype(str),
|
|
99
106
|
skiprows=lambda x: x >= header_row_index,
|
|
100
|
-
skip_blank_lines=False, sep=seperator)
|
|
107
|
+
skip_blank_lines=False, sep=seperator, encoding=encoding)
|
|
101
108
|
with io.BytesIO(file_bytes) as file_io:
|
|
102
109
|
# The use of the dtype argument is to ensure that everything from the file gets read as a string. Added
|
|
103
110
|
# because some numerical values would get ".0" appended to them, even when casting the DataFrame cell to a
|
|
104
111
|
# string.
|
|
105
112
|
file_body: DataFrame = pandas.read_csv(file_io, header=header_row_index, dtype=dtype(str),
|
|
106
|
-
skip_blank_lines=False, sep=seperator)
|
|
113
|
+
skip_blank_lines=False, sep=seperator, encoding=encoding)
|
|
107
114
|
|
|
108
115
|
return file_body, file_metadata
|
|
109
116
|
|
|
@@ -4,12 +4,15 @@ from abc import abstractmethod
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from sapiopylib.rest.User import SapioUser
|
|
7
|
+
from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
|
|
7
8
|
from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
|
|
8
9
|
AbstractVeloxFieldDefinition
|
|
10
|
+
from sapiopylib.rest.pojo.webhook.WebhookContext import SapioWebhookContext
|
|
9
11
|
from sapiopylib.rest.pojo.webhook.WebhookResult import SapioWebhookResult
|
|
10
12
|
|
|
11
13
|
from sapiopycommons.callbacks.callback_util import CallbackUtil
|
|
12
14
|
from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
|
|
15
|
+
from sapiopycommons.general.custom_report_util import CustomReportUtil
|
|
13
16
|
from sapiopycommons.general.time_util import TimeUtil
|
|
14
17
|
|
|
15
18
|
|
|
@@ -480,3 +483,71 @@ class ContainsSubstringFromCellRule(RowRule):
|
|
|
480
483
|
|
|
481
484
|
def validate(self, row: dict[str, Any]) -> bool:
|
|
482
485
|
return row.get(self.second) in row.get(self.first)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
class UniqueSystemValueRule(ColumnRule):
|
|
489
|
+
"""
|
|
490
|
+
Requires that every cell in the column has a value that is not already in use in the system for a given data type
|
|
491
|
+
and field name.
|
|
492
|
+
"""
|
|
493
|
+
user: SapioUser
|
|
494
|
+
data_type_name: str
|
|
495
|
+
data_field_name: str
|
|
496
|
+
|
|
497
|
+
def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
|
|
498
|
+
data_field_name: str):
|
|
499
|
+
"""
|
|
500
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
501
|
+
:param header: The header that this rule acts upon.
|
|
502
|
+
:param data_type_name: The data type name to search on.
|
|
503
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
504
|
+
"""
|
|
505
|
+
self.user = context.user if isinstance(context, SapioWebhookContext) else context
|
|
506
|
+
self.data_type_name = data_type_name
|
|
507
|
+
self.data_field_name = data_field_name
|
|
508
|
+
super().__init__(header, f"This value already exists in the system.")
|
|
509
|
+
|
|
510
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
511
|
+
file_handler = FileDataHandler(rows)
|
|
512
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
513
|
+
|
|
514
|
+
# Run a quick report for all records of this type that match these field values.
|
|
515
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
516
|
+
"{" + ",".join(values) + "}")
|
|
517
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
518
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
519
|
+
return file_handler.get_in_list(self.header, existing_values)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class ExistingSystemValueRule(ColumnRule):
|
|
523
|
+
"""
|
|
524
|
+
Requires that every cell in the column has a value that is already in use in the system for a given data type
|
|
525
|
+
and field name.
|
|
526
|
+
"""
|
|
527
|
+
user: SapioUser
|
|
528
|
+
data_type_name: str
|
|
529
|
+
data_field_name: str
|
|
530
|
+
|
|
531
|
+
def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
|
|
532
|
+
data_field_name: str):
|
|
533
|
+
"""
|
|
534
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
535
|
+
:param header: The header that this rule acts upon.
|
|
536
|
+
:param data_type_name: The data type name to search on.
|
|
537
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
538
|
+
"""
|
|
539
|
+
self.user = context.user if isinstance(context, SapioWebhookContext) else context
|
|
540
|
+
self.data_type_name = data_type_name
|
|
541
|
+
self.data_field_name = data_field_name
|
|
542
|
+
super().__init__(header, f"This value doesn't exist in the system.")
|
|
543
|
+
|
|
544
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
545
|
+
file_handler = FileDataHandler(rows)
|
|
546
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
547
|
+
|
|
548
|
+
# Run a quick report for all records of this type that match these field values.
|
|
549
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
550
|
+
"{" + ",".join(values) + "}")
|
|
551
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
552
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
553
|
+
return file_handler.get_not_in_list(self.header, existing_values)
|