sapiopycommons 2025.7.10a595__py3-none-any.whl → 2025.7.15a611__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- sapiopycommons/ai/__init__.py +0 -0
- sapiopycommons/ai/tool_of_tools.py +917 -0
- sapiopycommons/callbacks/callback_util.py +22 -15
- sapiopycommons/chem/IndigoMolecules.py +0 -2
- sapiopycommons/files/file_text_converter.py +207 -0
- sapiopycommons/recordmodel/record_handler.py +161 -107
- sapiopycommons/webhook/webhook_handlers.py +6 -4
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.15a611.dist-info}/METADATA +1 -1
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.15a611.dist-info}/RECORD +11 -9
- sapiopycommons/chem/ps_commons.py +0 -523
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.15a611.dist-info}/WHEEL +0 -0
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.15a611.dist-info}/licenses/LICENSE +0 -0
|
@@ -780,7 +780,7 @@ class CallbackUtil:
|
|
|
780
780
|
# FR-47690: Set default values for fields that aren't present.
|
|
781
781
|
for row in values:
|
|
782
782
|
for field in fields:
|
|
783
|
-
if field.data_field_name not in
|
|
783
|
+
if field.data_field_name not in row:
|
|
784
784
|
row[field.data_field_name] = field.default_value
|
|
785
785
|
|
|
786
786
|
# Convert the group_by parameter to a field name.
|
|
@@ -1812,7 +1812,8 @@ class CallbackUtil:
|
|
|
1812
1812
|
return response
|
|
1813
1813
|
|
|
1814
1814
|
def request_file(self, title: str, exts: Iterable[str] | None = None,
|
|
1815
|
-
show_image_editor: bool = False, show_camera_button: bool = False
|
|
1815
|
+
show_image_editor: bool = False, show_camera_button: bool = False,
|
|
1816
|
+
*, enforce_file_extensions: bool = True) -> tuple[str, bytes]:
|
|
1816
1817
|
"""
|
|
1817
1818
|
Request a single file from the user.
|
|
1818
1819
|
|
|
@@ -1822,6 +1823,8 @@ class CallbackUtil:
|
|
|
1822
1823
|
:param show_image_editor: Whether the user will see an image editor when image is uploaded in this file prompt.
|
|
1823
1824
|
:param show_camera_button: Whether the user will be able to use camera to take a picture as an upload request,
|
|
1824
1825
|
rather than selecting an existing file.
|
|
1826
|
+
:param enforce_file_extensions: If true, then the file extensions provided in the exts parameter will be
|
|
1827
|
+
enforced. If false, then the user may upload any file type.
|
|
1825
1828
|
:return: The file name and bytes of the uploaded file.
|
|
1826
1829
|
"""
|
|
1827
1830
|
# If no extensions were provided, use an empty list for the extensions instead.
|
|
@@ -1841,11 +1844,12 @@ class CallbackUtil:
|
|
|
1841
1844
|
file_path: str = self.__send_dialog(request, self.callback.show_file_dialog, data_sink=do_consume)
|
|
1842
1845
|
|
|
1843
1846
|
# Verify that each of the file given matches the expected extension(s).
|
|
1844
|
-
self.__verify_file(file_path, sink.data, exts)
|
|
1847
|
+
self.__verify_file(file_path, sink.data, exts if enforce_file_extensions else None)
|
|
1845
1848
|
return file_path, sink.data
|
|
1846
1849
|
|
|
1847
1850
|
def request_files(self, title: str, exts: Iterable[str] | None = None,
|
|
1848
|
-
show_image_editor: bool = False, show_camera_button: bool = False
|
|
1851
|
+
show_image_editor: bool = False, show_camera_button: bool = False,
|
|
1852
|
+
*, enforce_file_extensions: bool = True) -> dict[str, bytes]:
|
|
1849
1853
|
"""
|
|
1850
1854
|
Request multiple files from the user.
|
|
1851
1855
|
|
|
@@ -1855,6 +1859,8 @@ class CallbackUtil:
|
|
|
1855
1859
|
:param show_image_editor: Whether the user will see an image editor when image is uploaded in this file prompt.
|
|
1856
1860
|
:param show_camera_button: Whether the user will be able to use camera to take a picture as an upload request,
|
|
1857
1861
|
rather than selecting an existing file.
|
|
1862
|
+
:param enforce_file_extensions: If true, then the file extensions provided in the exts parameter will be
|
|
1863
|
+
enforced. If false, then the user may upload any file type.
|
|
1858
1864
|
:return: A dictionary of file name to file bytes for each file the user uploaded.
|
|
1859
1865
|
"""
|
|
1860
1866
|
# If no extensions were provided, use an empty list for the extensions instead.
|
|
@@ -1870,7 +1876,7 @@ class CallbackUtil:
|
|
|
1870
1876
|
for file_path in file_paths:
|
|
1871
1877
|
sink = InMemoryRecordDataSink(self.user)
|
|
1872
1878
|
sink.consume_client_callback_file_path_data(file_path)
|
|
1873
|
-
self.__verify_file(file_path, sink.data, exts)
|
|
1879
|
+
self.__verify_file(file_path, sink.data, exts if enforce_file_extensions else None)
|
|
1874
1880
|
ret_dict.update({file_path: sink.data})
|
|
1875
1881
|
|
|
1876
1882
|
return ret_dict
|
|
@@ -1887,16 +1893,17 @@ class CallbackUtil:
|
|
|
1887
1893
|
"""
|
|
1888
1894
|
if file_path is None or len(file_path) == 0 or file_bytes is None or len(file_bytes) == 0:
|
|
1889
1895
|
raise SapioUserErrorException("Empty file provided or file unable to be read.")
|
|
1890
|
-
if allowed_extensions:
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1896
|
+
if not allowed_extensions:
|
|
1897
|
+
return
|
|
1898
|
+
matches: bool = False
|
|
1899
|
+
for ext in allowed_extensions:
|
|
1900
|
+
# FR-47690: Changed to a case-insensitive match.
|
|
1901
|
+
if file_path.casefold().endswith("." + ext.lstrip(".").casefold()):
|
|
1902
|
+
matches = True
|
|
1903
|
+
break
|
|
1904
|
+
if not matches:
|
|
1905
|
+
raise SapioUserErrorException("Unsupported file type. Expecting the following extension(s): "
|
|
1906
|
+
+ (",".join(allowed_extensions)))
|
|
1900
1907
|
|
|
1901
1908
|
def write_file(self, file_name: str, file_data: str | bytes) -> None:
|
|
1902
1909
|
"""
|
|
@@ -6,8 +6,6 @@ indigo = Indigo()
|
|
|
6
6
|
renderer = IndigoRenderer(indigo)
|
|
7
7
|
indigo.setOption("render-output-format", "svg")
|
|
8
8
|
indigo.setOption("ignore-stereochemistry-errors", True)
|
|
9
|
-
# Ignore only if loading as non-query object. That is the meaning of this flag. Does nothing if it's query molecule.
|
|
10
|
-
indigo.setOption("ignore-noncritical-query-features", True)
|
|
11
9
|
indigo.setOption("render-stereo-style", "ext")
|
|
12
10
|
indigo.setOption("aromaticity-model", "generic")
|
|
13
11
|
indigo.setOption("render-coloring", True)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
from enum import Enum, auto
|
|
5
|
+
|
|
6
|
+
class FileType(Enum):
|
|
7
|
+
"""Supported file types for conversion."""
|
|
8
|
+
TXT = auto()
|
|
9
|
+
MD = auto()
|
|
10
|
+
CSV = auto()
|
|
11
|
+
DOC = auto()
|
|
12
|
+
DOCX = auto()
|
|
13
|
+
XLS = auto()
|
|
14
|
+
XLSX = auto()
|
|
15
|
+
PPT = auto()
|
|
16
|
+
PPTX = auto()
|
|
17
|
+
PDF = auto()
|
|
18
|
+
UNKNOWN = auto()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FileToTextConverter:
|
|
22
|
+
"""
|
|
23
|
+
A class for converting various file types to raw text.
|
|
24
|
+
"""
|
|
25
|
+
@staticmethod
|
|
26
|
+
def mime_type_to_enum(mime_type: str) -> FileType:
|
|
27
|
+
"""
|
|
28
|
+
Converts a MIME type to a FileType enum.
|
|
29
|
+
|
|
30
|
+
:param mime_type: The MIME type string to convert.
|
|
31
|
+
:return: The corresponding FileType enum, or UNKNOWN if not recognized.
|
|
32
|
+
"""
|
|
33
|
+
if not mime_type or not mime_type.strip():
|
|
34
|
+
return FileType.UNKNOWN
|
|
35
|
+
|
|
36
|
+
mime_map = {
|
|
37
|
+
"text/plain": FileType.TXT,
|
|
38
|
+
"text/markdown": FileType.MD,
|
|
39
|
+
"text/csv": FileType.CSV,
|
|
40
|
+
"application/msword": FileType.DOC,
|
|
41
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": FileType.DOCX,
|
|
42
|
+
"application/vnd.ms-excel": FileType.XLS,
|
|
43
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": FileType.XLSX,
|
|
44
|
+
"application/vnd.ms-powerpoint": FileType.PPT,
|
|
45
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": FileType.PPTX,
|
|
46
|
+
"application/pdf": FileType.PDF,
|
|
47
|
+
}
|
|
48
|
+
return mime_map.get(mime_type, FileType.UNKNOWN)
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def file_extension_to_enum(file_path: str) -> FileType:
|
|
52
|
+
"""
|
|
53
|
+
Converts a file path or extension to a FileType enum.
|
|
54
|
+
|
|
55
|
+
:param file_path: The file path or extension to convert.
|
|
56
|
+
:return: The corresponding FileType enum, or UNKNOWN if not recognized.
|
|
57
|
+
"""
|
|
58
|
+
if not file_path or not file_path.strip():
|
|
59
|
+
return FileType.UNKNOWN
|
|
60
|
+
|
|
61
|
+
# Extract the file extension, removing the leading dot and making it lowercase
|
|
62
|
+
file_extension = os.path.splitext(file_path)[1].lstrip('.').lower()
|
|
63
|
+
|
|
64
|
+
ext_map = {
|
|
65
|
+
"txt": FileType.TXT,
|
|
66
|
+
"md": FileType.MD,
|
|
67
|
+
"csv": FileType.CSV,
|
|
68
|
+
"doc": FileType.DOC,
|
|
69
|
+
"docx": FileType.DOCX,
|
|
70
|
+
"xls": FileType.XLS,
|
|
71
|
+
"xlsx": FileType.XLSX,
|
|
72
|
+
"ppt": FileType.PPT,
|
|
73
|
+
"pptx": FileType.PPTX,
|
|
74
|
+
"pdf": FileType.PDF,
|
|
75
|
+
}
|
|
76
|
+
return ext_map.get(file_extension, FileType.UNKNOWN)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def parse_file(cls, file_type: FileType, file_bytes: bytes) -> str | None:
|
|
80
|
+
"""
|
|
81
|
+
Parses file bytes based on the FileType and returns the text content.
|
|
82
|
+
|
|
83
|
+
:param file_type: The type of the file to parse.
|
|
84
|
+
:param file_bytes: The raw bytes of the file to parse.
|
|
85
|
+
:return: The text content of the file, or None if the file type is not supported or parsing fails.
|
|
86
|
+
"""
|
|
87
|
+
if file_type is None or file_bytes is None:
|
|
88
|
+
return None
|
|
89
|
+
if not file_bytes:
|
|
90
|
+
return ""
|
|
91
|
+
|
|
92
|
+
# Dispatch to the correct parser method
|
|
93
|
+
parser_map = {
|
|
94
|
+
FileType.TXT: cls._parse_plain_text,
|
|
95
|
+
FileType.MD: cls._parse_plain_text,
|
|
96
|
+
FileType.CSV: cls._parse_plain_text,
|
|
97
|
+
FileType.DOC: cls._parse_doc,
|
|
98
|
+
FileType.DOCX: cls._parse_docx,
|
|
99
|
+
FileType.XLS: cls._parse_xls,
|
|
100
|
+
FileType.XLSX: cls._parse_xlsx,
|
|
101
|
+
FileType.PPT: cls._parse_ppt,
|
|
102
|
+
FileType.PPTX: cls._parse_pptx,
|
|
103
|
+
FileType.PDF: cls._parse_pdf,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
parser_func = parser_map.get(file_type)
|
|
107
|
+
|
|
108
|
+
if parser_func:
|
|
109
|
+
return parser_func(file_bytes)
|
|
110
|
+
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
def _parse_plain_text(file_bytes: bytes) -> str:
|
|
115
|
+
return file_bytes.decode('utf-8')
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def _run_textract(file_bytes: bytes, extension: str) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Helper to run textract on in-memory bytes by writing to a temp file.
|
|
121
|
+
Note: textract may require external system dependencies.
|
|
122
|
+
"""
|
|
123
|
+
import textract
|
|
124
|
+
with tempfile.NamedTemporaryFile(suffix=f".{extension}", delete=True) as temp_file:
|
|
125
|
+
temp_file.write(file_bytes)
|
|
126
|
+
temp_file.flush() # Ensure all bytes are written to disk
|
|
127
|
+
text = textract.process(temp_file.name).decode('utf-8')
|
|
128
|
+
return text
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def _parse_doc(cls, file_bytes: bytes) -> str:
|
|
132
|
+
return cls._run_textract(file_bytes, 'doc')
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _parse_docx(file_bytes: bytes) -> str:
|
|
136
|
+
import docx
|
|
137
|
+
with io.BytesIO(file_bytes) as stream:
|
|
138
|
+
document = docx.Document(stream)
|
|
139
|
+
return "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _parse_xls(file_bytes: bytes) -> str:
|
|
143
|
+
import xlrd
|
|
144
|
+
workbook = xlrd.open_workbook(file_contents=file_bytes)
|
|
145
|
+
text_parts = []
|
|
146
|
+
for sheet in workbook.sheets():
|
|
147
|
+
text_parts.append(f"Sheet: {sheet.name}\n")
|
|
148
|
+
for row_idx in range(sheet.nrows):
|
|
149
|
+
row_cells = []
|
|
150
|
+
for col_idx in range(sheet.ncols):
|
|
151
|
+
cell_text = str(sheet.cell_value(row_idx, col_idx))
|
|
152
|
+
if cell_text.strip():
|
|
153
|
+
row_cells.append(cell_text + "\t")
|
|
154
|
+
if row_cells:
|
|
155
|
+
text_parts.append("".join(row_cells))
|
|
156
|
+
text_parts.append("\n")
|
|
157
|
+
text_parts.append("\n")
|
|
158
|
+
return "".join(text_parts)
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _parse_xlsx(file_bytes: bytes) -> str:
|
|
162
|
+
import openpyxl
|
|
163
|
+
with io.BytesIO(file_bytes) as stream:
|
|
164
|
+
workbook = openpyxl.load_workbook(stream, read_only=True)
|
|
165
|
+
text_parts = []
|
|
166
|
+
for sheet in workbook.worksheets:
|
|
167
|
+
text_parts.append(f"Sheet: {sheet.title}\n")
|
|
168
|
+
for row in sheet.iter_rows():
|
|
169
|
+
row_cells = []
|
|
170
|
+
for cell in row:
|
|
171
|
+
cell_text = str(cell.value) if cell.value is not None else ""
|
|
172
|
+
if cell_text.strip():
|
|
173
|
+
row_cells.append(cell_text + "\t")
|
|
174
|
+
if row_cells:
|
|
175
|
+
text_parts.append("".join(row_cells))
|
|
176
|
+
text_parts.append("\n")
|
|
177
|
+
text_parts.append("\n")
|
|
178
|
+
return "".join(text_parts)
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def _parse_ppt(cls, file_bytes: bytes) -> str:
|
|
182
|
+
return cls._run_textract(file_bytes, 'ppt')
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def _parse_pptx(file_bytes: bytes) -> str:
|
|
186
|
+
import pptx
|
|
187
|
+
with io.BytesIO(file_bytes) as stream:
|
|
188
|
+
presentation = pptx.Presentation(stream)
|
|
189
|
+
text_parts = []
|
|
190
|
+
for slide in presentation.slides:
|
|
191
|
+
for shape in slide.shapes:
|
|
192
|
+
if shape.has_text_frame:
|
|
193
|
+
text = shape.text_frame.text
|
|
194
|
+
if text and text.strip():
|
|
195
|
+
text_parts.append(text)
|
|
196
|
+
return "\n".join(text_parts)
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _parse_pdf(file_bytes: bytes) -> str:
|
|
200
|
+
"""Parses a PDF file's bytes and extracts text using PyMuPDF."""
|
|
201
|
+
import pymupdf
|
|
202
|
+
text_parts = []
|
|
203
|
+
with io.BytesIO(file_bytes) as stream:
|
|
204
|
+
with pymupdf.open(stream=stream) as doc:
|
|
205
|
+
for page in doc:
|
|
206
|
+
text_parts.append(page.get_text())
|
|
207
|
+
return "\n".join(text_parts)
|