sapiopycommons 2025.6.19a564__py3-none-any.whl → 2026.1.22a847__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sapiopycommons/ai/__init__.py +0 -0
- sapiopycommons/ai/agent_service_base.py +2051 -0
- sapiopycommons/ai/converter_service_base.py +163 -0
- sapiopycommons/ai/external_credentials.py +131 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2.py +87 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2.pyi +282 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2_grpc.py +154 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2.py +49 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2.pyi +40 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2.py +61 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2.pyi +181 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.py +41 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.pyi +36 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.py +51 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.pyi +59 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.py +123 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.pyi +599 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.py +59 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.pyi +68 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2_grpc.py +149 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2.py +69 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2.pyi +109 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2_grpc.py +153 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2.py +49 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2.pyi +56 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2.py +43 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2.pyi +44 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.py +39 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.pyi +33 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2_grpc.py +24 -0
- sapiopycommons/ai/protobuf_utils.py +583 -0
- sapiopycommons/ai/request_validation.py +561 -0
- sapiopycommons/ai/server.py +152 -0
- sapiopycommons/ai/test_client.py +534 -0
- sapiopycommons/callbacks/callback_util.py +53 -24
- sapiopycommons/eln/experiment_handler.py +12 -5
- sapiopycommons/files/assay_plate_reader.py +93 -0
- sapiopycommons/files/file_text_converter.py +207 -0
- sapiopycommons/files/file_util.py +128 -1
- sapiopycommons/files/temp_files.py +82 -0
- sapiopycommons/flowcyto/flow_cyto.py +2 -24
- sapiopycommons/general/accession_service.py +2 -28
- sapiopycommons/general/aliases.py +4 -1
- sapiopycommons/general/macros.py +172 -0
- sapiopycommons/general/time_util.py +199 -4
- sapiopycommons/multimodal/multimodal.py +2 -24
- sapiopycommons/recordmodel/record_handler.py +200 -111
- sapiopycommons/rules/eln_rule_handler.py +3 -0
- sapiopycommons/rules/on_save_rule_handler.py +3 -0
- sapiopycommons/webhook/webhook_handlers.py +6 -4
- sapiopycommons/webhook/webservice_handlers.py +1 -1
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/METADATA +2 -2
- sapiopycommons-2026.1.22a847.dist-info/RECORD +113 -0
- sapiopycommons-2025.6.19a564.dist-info/RECORD +0 -68
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/WHEEL +0 -0
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/licenses/LICENSE +0 -0
|
@@ -780,7 +780,7 @@ class CallbackUtil:
|
|
|
780
780
|
# FR-47690: Set default values for fields that aren't present.
|
|
781
781
|
for row in values:
|
|
782
782
|
for field in fields:
|
|
783
|
-
if field.data_field_name not in
|
|
783
|
+
if field.data_field_name not in row:
|
|
784
784
|
row[field.data_field_name] = field.default_value
|
|
785
785
|
|
|
786
786
|
# Convert the group_by parameter to a field name.
|
|
@@ -858,9 +858,9 @@ class CallbackUtil:
|
|
|
858
858
|
raise SapioException("No records provided.")
|
|
859
859
|
data_type: str = AliasUtil.to_singular_data_type_name(records)
|
|
860
860
|
if index_field is not None:
|
|
861
|
-
field_map_list: list[FieldMap] = self.__get_indexed_field_maps(records, index_field)
|
|
861
|
+
field_map_list: list[FieldMap] = self.__get_indexed_field_maps(records, index_field, True)
|
|
862
862
|
else:
|
|
863
|
-
field_map_list: list[FieldMap] = AliasUtil.to_field_map_list(records)
|
|
863
|
+
field_map_list: list[FieldMap] = AliasUtil.to_field_map_list(records, True)
|
|
864
864
|
|
|
865
865
|
# Convert the group_by parameter to a field name.
|
|
866
866
|
if group_by is not None:
|
|
@@ -882,6 +882,18 @@ class CallbackUtil:
|
|
|
882
882
|
temp_dt = self.__temp_dt_from_field_names(data_type, fields, None, default_modifier, field_modifiers)
|
|
883
883
|
temp_dt.record_image_assignable = bool(image_data)
|
|
884
884
|
|
|
885
|
+
# PR-47894: If the RecordId field is not present in the layout, then it should not be included in the field
|
|
886
|
+
# maps, as otherwise selection list fields can break.
|
|
887
|
+
remove_record_id: bool = True
|
|
888
|
+
for field_def in temp_dt.get_field_def_list():
|
|
889
|
+
if field_def.data_field_name == "RecordId":
|
|
890
|
+
remove_record_id = False
|
|
891
|
+
break
|
|
892
|
+
if remove_record_id:
|
|
893
|
+
for field_map in field_map_list:
|
|
894
|
+
if "RecordId" in field_map:
|
|
895
|
+
del field_map["RecordId"]
|
|
896
|
+
|
|
885
897
|
# Send the request to the user.
|
|
886
898
|
request = TableEntryDialogRequest(title, msg, temp_dt, field_map_list,
|
|
887
899
|
record_image_data_list=image_data, group_by_field=group_by,
|
|
@@ -1765,8 +1777,11 @@ class CallbackUtil:
|
|
|
1765
1777
|
blank_result_handling = BlankResultHandling.REPEAT
|
|
1766
1778
|
def not_blank_func(r: list[DataRecord]) -> bool:
|
|
1767
1779
|
return bool(r)
|
|
1768
|
-
|
|
1769
|
-
|
|
1780
|
+
response: list[DataRecord] = self.__send_dialog_blank_results(request,
|
|
1781
|
+
self.callback.show_input_selection_dialog,
|
|
1782
|
+
not_blank_func, blank_result_handling,
|
|
1783
|
+
repeat_message, cancel_message)
|
|
1784
|
+
return self.rec_handler.wrap_models(response, wrapper_type)
|
|
1770
1785
|
|
|
1771
1786
|
# FR-47690: Deprecated the require_authentication parameter.
|
|
1772
1787
|
# noinspection PyUnusedLocal
|
|
@@ -1812,7 +1827,8 @@ class CallbackUtil:
|
|
|
1812
1827
|
return response
|
|
1813
1828
|
|
|
1814
1829
|
def request_file(self, title: str, exts: Iterable[str] | None = None,
|
|
1815
|
-
show_image_editor: bool = False, show_camera_button: bool = False
|
|
1830
|
+
show_image_editor: bool = False, show_camera_button: bool = False,
|
|
1831
|
+
*, enforce_file_extensions: bool = True) -> tuple[str, bytes]:
|
|
1816
1832
|
"""
|
|
1817
1833
|
Request a single file from the user.
|
|
1818
1834
|
|
|
@@ -1822,6 +1838,8 @@ class CallbackUtil:
|
|
|
1822
1838
|
:param show_image_editor: Whether the user will see an image editor when image is uploaded in this file prompt.
|
|
1823
1839
|
:param show_camera_button: Whether the user will be able to use camera to take a picture as an upload request,
|
|
1824
1840
|
rather than selecting an existing file.
|
|
1841
|
+
:param enforce_file_extensions: If true, then the file extensions provided in the exts parameter will be
|
|
1842
|
+
enforced. If false, then the user may upload any file type.
|
|
1825
1843
|
:return: The file name and bytes of the uploaded file.
|
|
1826
1844
|
"""
|
|
1827
1845
|
# If no extensions were provided, use an empty list for the extensions instead.
|
|
@@ -1841,11 +1859,12 @@ class CallbackUtil:
|
|
|
1841
1859
|
file_path: str = self.__send_dialog(request, self.callback.show_file_dialog, data_sink=do_consume)
|
|
1842
1860
|
|
|
1843
1861
|
# Verify that each of the file given matches the expected extension(s).
|
|
1844
|
-
self.__verify_file(file_path, sink.data, exts)
|
|
1862
|
+
self.__verify_file(file_path, sink.data, exts if enforce_file_extensions else None)
|
|
1845
1863
|
return file_path, sink.data
|
|
1846
1864
|
|
|
1847
1865
|
def request_files(self, title: str, exts: Iterable[str] | None = None,
|
|
1848
|
-
show_image_editor: bool = False, show_camera_button: bool = False
|
|
1866
|
+
show_image_editor: bool = False, show_camera_button: bool = False,
|
|
1867
|
+
*, enforce_file_extensions: bool = True) -> dict[str, bytes]:
|
|
1849
1868
|
"""
|
|
1850
1869
|
Request multiple files from the user.
|
|
1851
1870
|
|
|
@@ -1855,6 +1874,8 @@ class CallbackUtil:
|
|
|
1855
1874
|
:param show_image_editor: Whether the user will see an image editor when image is uploaded in this file prompt.
|
|
1856
1875
|
:param show_camera_button: Whether the user will be able to use camera to take a picture as an upload request,
|
|
1857
1876
|
rather than selecting an existing file.
|
|
1877
|
+
:param enforce_file_extensions: If true, then the file extensions provided in the exts parameter will be
|
|
1878
|
+
enforced. If false, then the user may upload any file type.
|
|
1858
1879
|
:return: A dictionary of file name to file bytes for each file the user uploaded.
|
|
1859
1880
|
"""
|
|
1860
1881
|
# If no extensions were provided, use an empty list for the extensions instead.
|
|
@@ -1870,7 +1891,7 @@ class CallbackUtil:
|
|
|
1870
1891
|
for file_path in file_paths:
|
|
1871
1892
|
sink = InMemoryRecordDataSink(self.user)
|
|
1872
1893
|
sink.consume_client_callback_file_path_data(file_path)
|
|
1873
|
-
self.__verify_file(file_path, sink.data, exts)
|
|
1894
|
+
self.__verify_file(file_path, sink.data, exts if enforce_file_extensions else None)
|
|
1874
1895
|
ret_dict.update({file_path: sink.data})
|
|
1875
1896
|
|
|
1876
1897
|
return ret_dict
|
|
@@ -1887,16 +1908,17 @@ class CallbackUtil:
|
|
|
1887
1908
|
"""
|
|
1888
1909
|
if file_path is None or len(file_path) == 0 or file_bytes is None or len(file_bytes) == 0:
|
|
1889
1910
|
raise SapioUserErrorException("Empty file provided or file unable to be read.")
|
|
1890
|
-
if allowed_extensions:
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1911
|
+
if not allowed_extensions:
|
|
1912
|
+
return
|
|
1913
|
+
matches: bool = False
|
|
1914
|
+
for ext in allowed_extensions:
|
|
1915
|
+
# FR-47690: Changed to a case-insensitive match.
|
|
1916
|
+
if file_path.casefold().endswith("." + ext.lstrip(".").casefold()):
|
|
1917
|
+
matches = True
|
|
1918
|
+
break
|
|
1919
|
+
if not matches:
|
|
1920
|
+
raise SapioUserErrorException("Unsupported file type. Expecting the following extension(s): "
|
|
1921
|
+
+ (",".join(allowed_extensions)))
|
|
1900
1922
|
|
|
1901
1923
|
def write_file(self, file_name: str, file_data: str | bytes) -> None:
|
|
1902
1924
|
"""
|
|
@@ -1918,7 +1940,8 @@ class CallbackUtil:
|
|
|
1918
1940
|
self.write_file(zip_name, FileUtil.zip_files(files))
|
|
1919
1941
|
|
|
1920
1942
|
@staticmethod
|
|
1921
|
-
def __get_indexed_field_maps(records: Iterable[SapioRecord], index_field: str)
|
|
1943
|
+
def __get_indexed_field_maps(records: Iterable[SapioRecord], index_field: str, include_record_id: bool = False) \
|
|
1944
|
+
-> list[FieldMap]:
|
|
1922
1945
|
"""
|
|
1923
1946
|
For dialogs that accept multiple records, we may want to be able to match the returned results back to the
|
|
1924
1947
|
records that they're for. In this case, we need to add an index to each record so that we can match them back
|
|
@@ -1928,12 +1951,13 @@ class CallbackUtil:
|
|
|
1928
1951
|
:param records: The records to return indexed field maps of.
|
|
1929
1952
|
:param index_field: The name of the field to use as the index. Make sure that this field doesn't exist on the
|
|
1930
1953
|
records, as then it will overwrite the existing value.
|
|
1954
|
+
:param include_record_id: Whether to include the RecordId field in the field maps.
|
|
1931
1955
|
:return: A list of field maps for the records, with an index field added to each. The value of the index on
|
|
1932
1956
|
each field map is the record's record ID (even if it's a record model with a negative ID).
|
|
1933
1957
|
"""
|
|
1934
1958
|
ret_val: list[FieldMap] = []
|
|
1935
1959
|
for record in records:
|
|
1936
|
-
field_map: FieldMap = AliasUtil.to_field_map(record)
|
|
1960
|
+
field_map: FieldMap = AliasUtil.to_field_map(record, include_record_id)
|
|
1937
1961
|
field_map[index_field] = AliasUtil.to_record_id(record)
|
|
1938
1962
|
ret_val.append(field_map)
|
|
1939
1963
|
return ret_val
|
|
@@ -1974,7 +1998,10 @@ class CallbackUtil:
|
|
|
1974
1998
|
if field_def.key_field:
|
|
1975
1999
|
field_def = modifier.modify_field(field_def)
|
|
1976
2000
|
builder.add_field(field_def, column, span)
|
|
1977
|
-
|
|
2001
|
+
# PR-47917: Set fill_view to false on the layout of temp data types created by CallbackUtil.
|
|
2002
|
+
temp_dt = builder.get_temporary_data_type()
|
|
2003
|
+
temp_dt.data_type_layout.fill_view = False
|
|
2004
|
+
return temp_dt
|
|
1978
2005
|
|
|
1979
2006
|
def __temp_dt_from_field_names(self, data_type: str, fields: Iterable[FieldIdentifier | FieldFilterCriteria],
|
|
1980
2007
|
column_positions: dict[str, tuple[int, int]] | None,
|
|
@@ -2045,8 +2072,10 @@ class CallbackUtil:
|
|
|
2045
2072
|
modifier: FieldModifier = field_modifiers.get(field_name, default_modifier)
|
|
2046
2073
|
builder.add_field(modifier.modify_field(field_def), current_column, span)
|
|
2047
2074
|
current_column += span
|
|
2048
|
-
|
|
2049
|
-
|
|
2075
|
+
# PR-47917: Set fill_view to false on the layout of temp data types created by CallbackUtil.
|
|
2076
|
+
temp_dt = builder.get_temporary_data_type()
|
|
2077
|
+
temp_dt.data_type_layout.fill_view = False
|
|
2078
|
+
return temp_dt
|
|
2050
2079
|
|
|
2051
2080
|
# CR-47309: Allow layouts to be provided in place of field names for record dialogs.
|
|
2052
2081
|
def __temp_dt_from_layout(self, data_type: str, layout: DataTypeLayoutIdentifier,
|
|
@@ -206,12 +206,11 @@ class ExperimentHandler:
|
|
|
206
206
|
else:
|
|
207
207
|
user = context
|
|
208
208
|
context = None
|
|
209
|
-
if context is not None and context.eln_experiment is not None and experiment is None:
|
|
210
|
-
experiment = context.eln_experiment
|
|
211
209
|
# FR-46495 - Allow the init function of ExperimentHandler to take in an ElnExperiment that is separate from the
|
|
212
210
|
# context.
|
|
213
211
|
# CR-37038 - Allow other experiment object types to be provided. Convert them all down to ElnExperiment.
|
|
214
|
-
|
|
212
|
+
# PR-47793 - Fix cases where both a SapioWebhookContext and an experiment parameter are provided.
|
|
213
|
+
if experiment is not None:
|
|
215
214
|
eln_manager = DataMgmtServer.get_eln_manager(user)
|
|
216
215
|
# If this object is already an ElnExperiment, do nothing.
|
|
217
216
|
if isinstance(experiment, ElnExperiment):
|
|
@@ -227,13 +226,19 @@ class ExperimentHandler:
|
|
|
227
226
|
raise SapioException(f"No experiment with notebook ID {notebook_id} located in the system.")
|
|
228
227
|
# If this object is a record, assume it is an experiment record that we can query the system with.
|
|
229
228
|
else:
|
|
230
|
-
record_id: int = AliasUtil.
|
|
229
|
+
record_id: int = AliasUtil.to_record_id(experiment)
|
|
231
230
|
experiment: ElnExperiment = eln_manager.get_eln_experiment_by_record_id(record_id)
|
|
232
231
|
if not experiment:
|
|
233
232
|
raise SapioException(f"No experiment with record ID {record_id} located in the system.")
|
|
233
|
+
elif context is not None and context.eln_experiment is not None:
|
|
234
|
+
experiment = context.eln_experiment
|
|
235
|
+
|
|
234
236
|
if experiment is None:
|
|
235
237
|
raise SapioException("Cannot initialize ExperimentHandler. No ELN Experiment found in the provided "
|
|
236
238
|
"parameters.")
|
|
239
|
+
elif not isinstance(experiment, ElnExperiment):
|
|
240
|
+
raise SapioException("Cannot initialize ExperimentHandler. The experiment variable is not an "
|
|
241
|
+
"ElnExperiment!")
|
|
237
242
|
|
|
238
243
|
return user, context, experiment
|
|
239
244
|
|
|
@@ -1425,7 +1430,9 @@ class ExperimentHandler:
|
|
|
1425
1430
|
:return: The map of options for the input step.
|
|
1426
1431
|
"""
|
|
1427
1432
|
step: ElnEntryStep = self.get_step(step)
|
|
1428
|
-
|
|
1433
|
+
# PR-47796: Fix the get_step_options function making a webservice query every time it is called instead of
|
|
1434
|
+
# properly checking its cache of entry options.
|
|
1435
|
+
if step.get_id() not in self._step_options:
|
|
1429
1436
|
self._step_options.update(ExperimentReportUtil.get_experiment_entry_options(self.user,
|
|
1430
1437
|
self.get_all_steps()))
|
|
1431
1438
|
return self._step_options[step.get_id()]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import dataclasses
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from databind.core.dataclasses import dataclass
|
|
6
|
+
from databind.json import loads
|
|
7
|
+
from sapiopylib.rest.utils.singletons import SapioContextManager
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclasses.dataclass
|
|
11
|
+
class ProcessAssayPlateRequest:
|
|
12
|
+
"""
|
|
13
|
+
A request to process the results of assay plate reader with a configuration set in Sapio.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
num_rows (int): The number of rows in the plate.
|
|
17
|
+
num_columns (int): The number of columns in the plate.
|
|
18
|
+
plate_ids_in_context (list[str]): List of plate IDs that are in context for this request.
|
|
19
|
+
filename (str): The name of the file containing the assay data.
|
|
20
|
+
file_data (bytes): The binary content of the file.
|
|
21
|
+
plate_reader_config_name (str): The name of the plate reader configuration to use.
|
|
22
|
+
"""
|
|
23
|
+
num_rows: int
|
|
24
|
+
num_columns: int
|
|
25
|
+
plate_ids_in_context: list[str] | None
|
|
26
|
+
filename: str
|
|
27
|
+
file_data: bytes
|
|
28
|
+
plate_reader_config_name: str
|
|
29
|
+
|
|
30
|
+
def to_json(self) -> dict[str, Any]:
|
|
31
|
+
return {
|
|
32
|
+
"numRows": self.num_rows,
|
|
33
|
+
"numCols": self.num_columns,
|
|
34
|
+
"plateIdsInContext": self.plate_ids_in_context,
|
|
35
|
+
"fileName": self.filename,
|
|
36
|
+
"fileDataBase64": base64.b64encode(self.file_data).decode('utf-8'),
|
|
37
|
+
"plateReaderName": self.plate_reader_config_name
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class AssayPlateResultIdent:
|
|
43
|
+
plateId: str
|
|
44
|
+
channelIdOrBlock: str
|
|
45
|
+
kineticAssaySeconds: float | None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class AssayResultDatum:
|
|
50
|
+
"""
|
|
51
|
+
Describes the data received from an assay plate reader.
|
|
52
|
+
Most of the time, the data is a single value, but sometimes it can be multiple values, especially for kinetic data.
|
|
53
|
+
"""
|
|
54
|
+
DEFAULT_PROPERTY_NAME: str = "read"
|
|
55
|
+
rowPosition: str
|
|
56
|
+
columnPosition: str
|
|
57
|
+
valueByPropertyName: dict[str, float]
|
|
58
|
+
textValueByPropertyName: dict[str, str]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class AssayPlateResult:
|
|
63
|
+
"""
|
|
64
|
+
Assay plate load result for a single plate in a file. A file can have more than one of this result if it has multiple plate of data in a single file.
|
|
65
|
+
"""
|
|
66
|
+
resultIdent: AssayPlateResultIdent
|
|
67
|
+
numRows: int
|
|
68
|
+
numColumns: int
|
|
69
|
+
resultDatum: list[AssayResultDatum]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class AssayFileLoadResult:
|
|
74
|
+
"""
|
|
75
|
+
The entire top-level file loading result for an assay plate reader file.
|
|
76
|
+
"""
|
|
77
|
+
filename: str
|
|
78
|
+
plateResultList: list[AssayPlateResult]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class AssayPlateReader(SapioContextManager):
|
|
82
|
+
"""
|
|
83
|
+
This class contains services for Sapio Assay Plate Reader.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def process_plate_reader_data(self, request: ProcessAssayPlateRequest) -> AssayFileLoadResult:
|
|
87
|
+
"""
|
|
88
|
+
Processes the assay plate reader data using provided request into a structured result using configuration defined in Sapio.
|
|
89
|
+
"""
|
|
90
|
+
payload = request.to_json()
|
|
91
|
+
response = self.user.plugin_post("assayplatereader/process", payload=payload)
|
|
92
|
+
self.user.raise_for_status(response)
|
|
93
|
+
return loads(response.text, AssayFileLoadResult)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
from enum import Enum, auto
|
|
5
|
+
|
|
6
|
+
class FileType(Enum):
|
|
7
|
+
"""Supported file types for conversion."""
|
|
8
|
+
TXT = auto()
|
|
9
|
+
MD = auto()
|
|
10
|
+
CSV = auto()
|
|
11
|
+
DOC = auto()
|
|
12
|
+
DOCX = auto()
|
|
13
|
+
XLS = auto()
|
|
14
|
+
XLSX = auto()
|
|
15
|
+
PPT = auto()
|
|
16
|
+
PPTX = auto()
|
|
17
|
+
PDF = auto()
|
|
18
|
+
UNKNOWN = auto()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FileToTextConverter:
|
|
22
|
+
"""
|
|
23
|
+
A class for converting various file types to raw text.
|
|
24
|
+
"""
|
|
25
|
+
@staticmethod
|
|
26
|
+
def mime_type_to_enum(mime_type: str) -> FileType:
|
|
27
|
+
"""
|
|
28
|
+
Converts a MIME type to a FileType enum.
|
|
29
|
+
|
|
30
|
+
:param mime_type: The MIME type string to convert.
|
|
31
|
+
:return: The corresponding FileType enum, or UNKNOWN if not recognized.
|
|
32
|
+
"""
|
|
33
|
+
if not mime_type or not mime_type.strip():
|
|
34
|
+
return FileType.UNKNOWN
|
|
35
|
+
|
|
36
|
+
mime_map = {
|
|
37
|
+
"text/plain": FileType.TXT,
|
|
38
|
+
"text/markdown": FileType.MD,
|
|
39
|
+
"text/csv": FileType.CSV,
|
|
40
|
+
"application/msword": FileType.DOC,
|
|
41
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": FileType.DOCX,
|
|
42
|
+
"application/vnd.ms-excel": FileType.XLS,
|
|
43
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": FileType.XLSX,
|
|
44
|
+
"application/vnd.ms-powerpoint": FileType.PPT,
|
|
45
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": FileType.PPTX,
|
|
46
|
+
"application/pdf": FileType.PDF,
|
|
47
|
+
}
|
|
48
|
+
return mime_map.get(mime_type, FileType.UNKNOWN)
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def file_extension_to_enum(file_path: str) -> FileType:
|
|
52
|
+
"""
|
|
53
|
+
Converts a file path or extension to a FileType enum.
|
|
54
|
+
|
|
55
|
+
:param file_path: The file path or extension to convert.
|
|
56
|
+
:return: The corresponding FileType enum, or UNKNOWN if not recognized.
|
|
57
|
+
"""
|
|
58
|
+
if not file_path or not file_path.strip():
|
|
59
|
+
return FileType.UNKNOWN
|
|
60
|
+
|
|
61
|
+
# Extract the file extension, removing the leading dot and making it lowercase
|
|
62
|
+
file_extension = os.path.splitext(file_path)[1].lstrip('.').lower()
|
|
63
|
+
|
|
64
|
+
ext_map = {
|
|
65
|
+
"txt": FileType.TXT,
|
|
66
|
+
"md": FileType.MD,
|
|
67
|
+
"csv": FileType.CSV,
|
|
68
|
+
"doc": FileType.DOC,
|
|
69
|
+
"docx": FileType.DOCX,
|
|
70
|
+
"xls": FileType.XLS,
|
|
71
|
+
"xlsx": FileType.XLSX,
|
|
72
|
+
"ppt": FileType.PPT,
|
|
73
|
+
"pptx": FileType.PPTX,
|
|
74
|
+
"pdf": FileType.PDF,
|
|
75
|
+
}
|
|
76
|
+
return ext_map.get(file_extension, FileType.UNKNOWN)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def parse_file(cls, file_type: FileType, file_bytes: bytes) -> str | None:
|
|
80
|
+
"""
|
|
81
|
+
Parses file bytes based on the FileType and returns the text content.
|
|
82
|
+
|
|
83
|
+
:param file_type: The type of the file to parse.
|
|
84
|
+
:param file_bytes: The raw bytes of the file to parse.
|
|
85
|
+
:return: The text content of the file, or None if the file type is not supported or parsing fails.
|
|
86
|
+
"""
|
|
87
|
+
if file_type is None or file_bytes is None:
|
|
88
|
+
return None
|
|
89
|
+
if not file_bytes:
|
|
90
|
+
return ""
|
|
91
|
+
|
|
92
|
+
# Dispatch to the correct parser method
|
|
93
|
+
parser_map = {
|
|
94
|
+
FileType.TXT: cls._parse_plain_text,
|
|
95
|
+
FileType.MD: cls._parse_plain_text,
|
|
96
|
+
FileType.CSV: cls._parse_plain_text,
|
|
97
|
+
FileType.DOC: cls._parse_doc,
|
|
98
|
+
FileType.DOCX: cls._parse_docx,
|
|
99
|
+
FileType.XLS: cls._parse_xls,
|
|
100
|
+
FileType.XLSX: cls._parse_xlsx,
|
|
101
|
+
FileType.PPT: cls._parse_ppt,
|
|
102
|
+
FileType.PPTX: cls._parse_pptx,
|
|
103
|
+
FileType.PDF: cls._parse_pdf,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
parser_func = parser_map.get(file_type)
|
|
107
|
+
|
|
108
|
+
if parser_func:
|
|
109
|
+
return parser_func(file_bytes)
|
|
110
|
+
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
def _parse_plain_text(file_bytes: bytes) -> str:
|
|
115
|
+
return file_bytes.decode('utf-8')
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def _run_textract(file_bytes: bytes, extension: str) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Helper to run textract on in-memory bytes by writing to a temp file.
|
|
121
|
+
Note: textract may require external system dependencies.
|
|
122
|
+
"""
|
|
123
|
+
import textract
|
|
124
|
+
with tempfile.NamedTemporaryFile(suffix=f".{extension}", delete=True) as temp_file:
|
|
125
|
+
temp_file.write(file_bytes)
|
|
126
|
+
temp_file.flush() # Ensure all bytes are written to disk
|
|
127
|
+
text = textract.process(temp_file.name).decode('utf-8')
|
|
128
|
+
return text
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def _parse_doc(cls, file_bytes: bytes) -> str:
|
|
132
|
+
return cls._run_textract(file_bytes, 'doc')
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _parse_docx(file_bytes: bytes) -> str:
|
|
136
|
+
import docx
|
|
137
|
+
with io.BytesIO(file_bytes) as stream:
|
|
138
|
+
document = docx.Document(stream)
|
|
139
|
+
return "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _parse_xls(file_bytes: bytes) -> str:
|
|
143
|
+
import xlrd
|
|
144
|
+
workbook = xlrd.open_workbook(file_contents=file_bytes)
|
|
145
|
+
text_parts = []
|
|
146
|
+
for sheet in workbook.sheets():
|
|
147
|
+
text_parts.append(f"Sheet: {sheet.name}\n")
|
|
148
|
+
for row_idx in range(sheet.nrows):
|
|
149
|
+
row_cells = []
|
|
150
|
+
for col_idx in range(sheet.ncols):
|
|
151
|
+
cell_text = str(sheet.cell_value(row_idx, col_idx))
|
|
152
|
+
if cell_text.strip():
|
|
153
|
+
row_cells.append(cell_text + "\t")
|
|
154
|
+
if row_cells:
|
|
155
|
+
text_parts.append("".join(row_cells))
|
|
156
|
+
text_parts.append("\n")
|
|
157
|
+
text_parts.append("\n")
|
|
158
|
+
return "".join(text_parts)
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _parse_xlsx(file_bytes: bytes) -> str:
|
|
162
|
+
import openpyxl
|
|
163
|
+
with io.BytesIO(file_bytes) as stream:
|
|
164
|
+
workbook = openpyxl.load_workbook(stream, read_only=True)
|
|
165
|
+
text_parts = []
|
|
166
|
+
for sheet in workbook.worksheets:
|
|
167
|
+
text_parts.append(f"Sheet: {sheet.title}\n")
|
|
168
|
+
for row in sheet.iter_rows():
|
|
169
|
+
row_cells = []
|
|
170
|
+
for cell in row:
|
|
171
|
+
cell_text = str(cell.value) if cell.value is not None else ""
|
|
172
|
+
if cell_text.strip():
|
|
173
|
+
row_cells.append(cell_text + "\t")
|
|
174
|
+
if row_cells:
|
|
175
|
+
text_parts.append("".join(row_cells))
|
|
176
|
+
text_parts.append("\n")
|
|
177
|
+
text_parts.append("\n")
|
|
178
|
+
return "".join(text_parts)
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def _parse_ppt(cls, file_bytes: bytes) -> str:
|
|
182
|
+
return cls._run_textract(file_bytes, 'ppt')
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def _parse_pptx(file_bytes: bytes) -> str:
|
|
186
|
+
import pptx
|
|
187
|
+
with io.BytesIO(file_bytes) as stream:
|
|
188
|
+
presentation = pptx.Presentation(stream)
|
|
189
|
+
text_parts = []
|
|
190
|
+
for slide in presentation.slides:
|
|
191
|
+
for shape in slide.shapes:
|
|
192
|
+
if shape.has_text_frame:
|
|
193
|
+
text = shape.text_frame.text
|
|
194
|
+
if text and text.strip():
|
|
195
|
+
text_parts.append(text)
|
|
196
|
+
return "\n".join(text_parts)
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _parse_pdf(file_bytes: bytes) -> str:
|
|
200
|
+
"""Parses a PDF file's bytes and extracts text using PyMuPDF."""
|
|
201
|
+
import pymupdf
|
|
202
|
+
text_parts = []
|
|
203
|
+
with io.BytesIO(file_bytes) as stream:
|
|
204
|
+
with pymupdf.open(stream=stream) as doc:
|
|
205
|
+
for page in doc:
|
|
206
|
+
text_parts.append(page.get_text())
|
|
207
|
+
return "\n".join(text_parts)
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import gzip
|
|
1
2
|
import io
|
|
3
|
+
import tarfile
|
|
4
|
+
import time
|
|
2
5
|
import warnings
|
|
3
6
|
import zipfile
|
|
4
7
|
|
|
@@ -322,7 +325,7 @@ class FileUtil:
|
|
|
322
325
|
@staticmethod
|
|
323
326
|
def zip_files(files: dict[str, str | bytes]) -> bytes:
|
|
324
327
|
"""
|
|
325
|
-
Create a zip file for a collection of files.
|
|
328
|
+
Create a .zip file for a collection of files.
|
|
326
329
|
|
|
327
330
|
:param files: A dictionary of file name to file data as a string or bytes.
|
|
328
331
|
:return: The bytes for a zip file containing the input files.
|
|
@@ -335,6 +338,130 @@ class FileUtil:
|
|
|
335
338
|
# throws an I/O exception.
|
|
336
339
|
return zip_buffer.getvalue()
|
|
337
340
|
|
|
341
|
+
# FR-47422: Add a function for unzipping files that may have been zipped by the above function.
|
|
342
|
+
@staticmethod
|
|
343
|
+
def unzip_files(zip_file: bytes) -> dict[str, bytes]:
|
|
344
|
+
"""
|
|
345
|
+
Decompress a .zip file from an in-memory bytes object and extracts all files into a dictionary.
|
|
346
|
+
|
|
347
|
+
:param zip_file: The bytes of the zip file to be decompressed.
|
|
348
|
+
:return: A dictionary of file name to file bytes for each file in the zip.
|
|
349
|
+
"""
|
|
350
|
+
extracted_files: dict[str, bytes] = {}
|
|
351
|
+
with io.BytesIO(zip_file) as zip_buffer:
|
|
352
|
+
with zipfile.ZipFile(zip_buffer, "r") as zip_file:
|
|
353
|
+
for file_name in zip_file.namelist():
|
|
354
|
+
with zip_file.open(file_name) as file:
|
|
355
|
+
extracted_files[file_name] = file.read()
|
|
356
|
+
return extracted_files
|
|
357
|
+
|
|
358
|
+
# FR-47422: Add functions for compressing and decompressing .gz, .tar, and .tar.gz files.
|
|
359
|
+
@staticmethod
|
|
360
|
+
def gzip_file(file_data: bytes | str) -> bytes:
|
|
361
|
+
"""
|
|
362
|
+
Create a .gz file for a single file.
|
|
363
|
+
|
|
364
|
+
:param file_data: The file data to be compressed as bytes or a string.
|
|
365
|
+
:return: The bytes of the gzip-compressed file.
|
|
366
|
+
"""
|
|
367
|
+
return gzip.compress(file_data.encode() if isinstance(file_data, str) else file_data)
|
|
368
|
+
|
|
369
|
+
@staticmethod
|
|
370
|
+
def ungzip_file(gzip_file: bytes) -> bytes:
|
|
371
|
+
"""
|
|
372
|
+
Decompress a .gz file.
|
|
373
|
+
|
|
374
|
+
:param gzip_file: The bytes of the gzip-compressed file.
|
|
375
|
+
:return: The decompressed file data as bytes.
|
|
376
|
+
"""
|
|
377
|
+
return gzip.decompress(gzip_file)
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def tar_files(files: dict[str, str | bytes]) -> bytes:
|
|
381
|
+
"""
|
|
382
|
+
Create a .tar file for a collection of files.
|
|
383
|
+
|
|
384
|
+
:param files: A dictionary of file name to file data as a string or bytes.
|
|
385
|
+
:return: The bytes for a tar file containing the input files.
|
|
386
|
+
"""
|
|
387
|
+
with io.BytesIO() as tar_buffer:
|
|
388
|
+
with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
|
|
389
|
+
for name, data in files.items():
|
|
390
|
+
if isinstance(data, str):
|
|
391
|
+
data: bytes = data.encode('utf-8')
|
|
392
|
+
|
|
393
|
+
tarinfo = tarfile.TarInfo(name=name)
|
|
394
|
+
tarinfo.size = len(data)
|
|
395
|
+
tarinfo.mtime = int(time.time())
|
|
396
|
+
|
|
397
|
+
with io.BytesIO(data) as file:
|
|
398
|
+
tar.addfile(tarinfo=tarinfo, fileobj=file)
|
|
399
|
+
|
|
400
|
+
tar_buffer.seek(0)
|
|
401
|
+
return tar_buffer.getvalue()
|
|
402
|
+
|
|
403
|
+
@staticmethod
|
|
404
|
+
def untar_files(tar_file: bytes) -> dict[str, bytes]:
|
|
405
|
+
"""
|
|
406
|
+
Decompress a .tar file from an in-memory bytes object and extracts all files into a dictionary.
|
|
407
|
+
|
|
408
|
+
:param tar_file: The bytes of the tar file to be decompressed.
|
|
409
|
+
:return: A dictionary of file name to file bytes for each file in the tar.
|
|
410
|
+
"""
|
|
411
|
+
extracted_files: dict[str, bytes] = {}
|
|
412
|
+
with io.BytesIO(tar_file) as tar_buffer:
|
|
413
|
+
with tarfile.open(fileobj=tar_buffer, mode="r") as tar:
|
|
414
|
+
for member in tar.getmembers():
|
|
415
|
+
if member.isfile():
|
|
416
|
+
file_obj = tar.extractfile(member)
|
|
417
|
+
if file_obj:
|
|
418
|
+
with file_obj:
|
|
419
|
+
extracted_files[member.name] = file_obj.read()
|
|
420
|
+
return extracted_files
|
|
421
|
+
|
|
422
|
+
@staticmethod
|
|
423
|
+
def tar_gzip_files(files: dict[str, str | bytes]) -> bytes:
|
|
424
|
+
"""
|
|
425
|
+
Create a .tar.gz file for a collection of files.
|
|
426
|
+
|
|
427
|
+
:param files: A dictionary of file name to file data as a string or bytes.
|
|
428
|
+
:return: The bytes for a tar.gz file containing the input files.
|
|
429
|
+
"""
|
|
430
|
+
with io.BytesIO() as tar_buffer:
|
|
431
|
+
with tarfile.open(fileobj=tar_buffer, mode="w:gz") as tar:
|
|
432
|
+
for name, data in files.items():
|
|
433
|
+
if isinstance(data, str):
|
|
434
|
+
data: bytes = data.encode('utf-8')
|
|
435
|
+
|
|
436
|
+
tarinfo = tarfile.TarInfo(name=name)
|
|
437
|
+
tarinfo.size = len(data)
|
|
438
|
+
tarinfo.mtime = int(time.time())
|
|
439
|
+
|
|
440
|
+
with io.BytesIO(data) as file:
|
|
441
|
+
tar.addfile(tarinfo=tarinfo, fileobj=file)
|
|
442
|
+
|
|
443
|
+
tar_buffer.seek(0)
|
|
444
|
+
return tar_buffer.getvalue()
|
|
445
|
+
|
|
446
|
+
@staticmethod
|
|
447
|
+
def untar_gzip_files(tar_gzip_file: bytes) -> dict[str, bytes]:
|
|
448
|
+
"""
|
|
449
|
+
Decompress a .tar.gz file from an in-memory bytes object and extracts all files into a dictionary.
|
|
450
|
+
|
|
451
|
+
:param tar_gzip_file: The bytes of the tar.gz file to be decompressed.
|
|
452
|
+
:return: A dictionary of file name to file bytes for each file in the tar.gz
|
|
453
|
+
"""
|
|
454
|
+
extracted_files: dict[str, bytes] = {}
|
|
455
|
+
with io.BytesIO(tar_gzip_file) as tar_buffer:
|
|
456
|
+
with tarfile.open(fileobj=tar_buffer, mode="r:gz") as tar:
|
|
457
|
+
for member in tar.getmembers():
|
|
458
|
+
if member.isfile():
|
|
459
|
+
file_obj = tar.extractfile(member)
|
|
460
|
+
if file_obj:
|
|
461
|
+
with file_obj:
|
|
462
|
+
extracted_files[member.name] = file_obj.read()
|
|
463
|
+
return extracted_files
|
|
464
|
+
|
|
338
465
|
# Deprecated functions:
|
|
339
466
|
|
|
340
467
|
# FR-46097 - Add write file request shorthand functions to FileUtil.
|