PyPI - sapiopycommons - Versions diffs - 2024.8.28a313__py3-none-any.whl → 2024.8.28a315__py3-none-any.whl - Mend

sapiopycommons 2024.8.28a313py3-none-any.whl → 2024.8.28a315py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sapiopycommons might be problematic. Click here for more details.

Files changed (37) hide show

sapiopycommons/callbacks/callback_util.py +407 -69
sapiopycommons/chem/IndigoMolecules.py +1 -0
sapiopycommons/chem/Molecules.py +1 -0
sapiopycommons/customreport/__init__.py +0 -0
sapiopycommons/customreport/column_builder.py +60 -0
sapiopycommons/customreport/custom_report_builder.py +125 -0
sapiopycommons/customreport/term_builder.py +299 -0
sapiopycommons/datatype/attachment_util.py +11 -10
sapiopycommons/eln/experiment_handler.py +209 -48
sapiopycommons/eln/experiment_report_util.py +118 -0
sapiopycommons/files/complex_data_loader.py +5 -4
sapiopycommons/files/file_bridge.py +31 -24
sapiopycommons/files/file_bridge_handler.py +340 -0
sapiopycommons/files/file_data_handler.py +2 -5
sapiopycommons/files/file_util.py +50 -10
sapiopycommons/files/file_validator.py +92 -6
sapiopycommons/files/file_writer.py +44 -15
sapiopycommons/general/accession_service.py +375 -0
sapiopycommons/general/aliases.py +147 -3
sapiopycommons/general/audit_log.py +196 -0
sapiopycommons/general/custom_report_util.py +211 -37
sapiopycommons/general/popup_util.py +17 -0
sapiopycommons/general/sapio_links.py +50 -0
sapiopycommons/general/time_util.py +40 -0
sapiopycommons/multimodal/multimodal.py +146 -0
sapiopycommons/multimodal/multimodal_data.py +486 -0
sapiopycommons/processtracking/endpoints.py +22 -22
sapiopycommons/recordmodel/record_handler.py +481 -97
sapiopycommons/rules/eln_rule_handler.py +34 -25
sapiopycommons/rules/on_save_rule_handler.py +34 -31
sapiopycommons/webhook/webhook_handlers.py +147 -26
sapiopycommons/webhook/webservice_handlers.py +67 -0
{sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a315.dist-info}/METADATA +4 -2
sapiopycommons-2024.8.28a315.dist-info/RECORD +50 -0
sapiopycommons-2024.8.28a313.dist-info/RECORD +0 -38
{sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a315.dist-info}/WHEEL +0 -0
{sapiopycommons-2024.8.28a313.dist-info → sapiopycommons-2024.8.28a315.dist-info}/licenses/LICENSE +0 -0

sapiopycommons/files/file_util.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import io
+import warnings
+import zipfile
 import pandas
 from numpy import dtype
@@ -21,7 +23,8 @@ class FileUtil:
     """
     @staticmethod
     def tokenize_csv(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0,
-                     seperator: str = ",") -> tuple[list[dict[str, str]], list[list[str]]]:
+                     seperator: str = ",", *, encoding: str | None = None, exception_on_empty: bool = True) \
+            -> tuple[list[dict[str, str]], list[list[str]]]:
         """
         Tokenize a CSV file. The provided file must be uniform. That is, if row 1 has 10 cells, all the rows in the file
         must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
@@ -34,22 +37,30 @@ class FileUtil:
             meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
             is assumed to be the header row.
         :param seperator: The character that separates cells in the table.
+        :param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
+            contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
+            ISO-8859-1 as the encoding.
+        :param exception_on_empty: Throw a user error exception if the provided file bytes result in an empty list in
+            the first element of the returned tuple.
         :return: The CSV parsed into a list of dicts where each dict is a row, mapping the headers to the cells for
             that row. Also returns a list of each row above the headers (the metadata), parsed into a list of each cell.
             If the header row index is 0 or None, this list will be empty.
         """
         # Parse the file bytes into two DataFrames. The first is metadata of the file located above the header row,
         # while the second is the body of the file below the header row.
-        file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator)
+        file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator,
+                                                               encoding=encoding)
         # Parse the metadata from above the header row index into a list of lists.
         metadata: list[list[str]] = FileUtil.data_frame_to_lists(file_metadata)
         # Parse the data from the file body into a list of dicts.
         rows: list[dict[str, str]] = FileUtil.data_frame_to_dicts(file_body, required_headers, header_row_index)
+        if exception_on_empty and not rows:
+            raise SapioUserErrorException("The provided file contains no rows of information below the headers.")
         return rows, metadata
     @staticmethod
-    def tokenize_xlsx(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0) \
-            -> tuple[list[dict[str, str]], list[list[str]]]:
+    def tokenize_xlsx(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0,
+                      *, exception_on_empty: bool = True) -> tuple[list[dict[str, str]], list[list[str]]]:
         """
         Tokenize an XLSX file row by row.
@@ -60,6 +71,8 @@ class FileUtil:
             row is returned in the metadata list. If input is None, then no row is considered to be the header row,
             meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
             is assumed to be the header row.
+        :param exception_on_empty: Throw a user error exception if the provided file bytes result in an empty list in
+            the first element of the returned tuple.
         :return: The XLSX parsed into a list of dicts where each dict is a row, mapping the headers to the cells for
             that row. Also returns a list of each row above the headers (the metadata), parsed into a list of each cell.
             If the header row index is 0 or None, this list will be empty.
@@ -71,11 +84,13 @@ class FileUtil:
         metadata: list[list[str]] = FileUtil.data_frame_to_lists(file_metadata)
         # Parse the data from the file body into a list of dicts.
         rows: list[dict[str, str]] = FileUtil.data_frame_to_dicts(file_body, required_headers, header_row_index)
+        if exception_on_empty and not rows:
+            raise SapioUserErrorException("The provided file contains no rows of information below the headers.")
         return rows, metadata
     @staticmethod
-    def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",") \
-            -> tuple[DataFrame, DataFrame | None]:
+    def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",",
+                           *, encoding: str | None = None) -> tuple[DataFrame, DataFrame | None]:
         """
         Parse the file bytes for a CSV into DataFrames. The provided file must be uniform. That is, if row 1 has 10
         cells, all the rows in the file must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
@@ -86,6 +101,9 @@ class FileUtil:
             meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
             is assumed to be the header row.
         :param seperator: The character that separates cells in the table.
+        :param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
+            contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
+            ISO-8859-1 as the encoding.
         :return: A tuple of two DataFrames. The first is the frame for the CSV table body, while the second is for the
             metadata from above the header row, or None if there is no metadata.
         """
@@ -97,13 +115,13 @@ class FileUtil:
                 # can throw off the header row index.
                 file_metadata = pandas.read_csv(file_io, header=None, dtype=dtype(str),
                                                 skiprows=lambda x: x >= header_row_index,
-                                                skip_blank_lines=False, sep=seperator)
+                                                skip_blank_lines=False, sep=seperator, encoding=encoding)
         with io.BytesIO(file_bytes) as file_io:
             # The use of the dtype argument is to ensure that everything from the file gets read as a string. Added
             # because some numerical values would get ".0" appended to them, even when casting the DataFrame cell to a
             # string.
             file_body: DataFrame = pandas.read_csv(file_io, header=header_row_index, dtype=dtype(str),
-                                                   skip_blank_lines=False, sep=seperator)
+                                                   skip_blank_lines=False, sep=seperator, encoding=encoding)
         return file_body, file_metadata
@@ -222,7 +240,7 @@ class FileUtil:
         :param file_data: The CSV file to be converted.
         :return: The bytes of the CSV file converted to an XLSX file.
         """
-        with (io.BytesIO(file_data) if isinstance(file_data, bytes) else io.StringIO(file_data)) as csv:
+        with (io.BytesIO(file_data.encode() if isinstance(file_data, str) else file_data)) as csv:
             # Setting header to false makes pandas read the CSV as-is.
             data_frame = pandas.read_csv(csv, sep=",", header=None)
@@ -266,6 +284,20 @@ class FileUtil:
             file_bytes: bytes = buffer.getvalue()
         return file_bytes
+    @staticmethod
+    def zip_files(files: dict[str, str | bytes]) -> bytes:
+        """
+        Create a zip file for a collection of files.
+        :param files: A dictionary of file name to file data as a string or bytes.
+        :return: The bytes for a zip file containing the input files.
+        """
+        zip_buffer: io.BytesIO = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
+            for file_name, file_data in files.items():
+                zip_file.writestr(file_name, file_data)
+        return zip_buffer.getvalue()
     # Deprecated functions:
     # FR-46097 - Add write file request shorthand functions to FileUtil.
@@ -283,6 +315,8 @@ class FileUtil:
         :param request_context: Context that will be returned to the webhook server in the client callback result.
         :return: A SapioWebhookResult with the write request as its client callback request.
         """
+        warnings.warn("FileUtil.write_file is deprecated as of 24.5+. Use CallbackUtil.write_file instead.",
+                      DeprecationWarning)
         return SapioWebhookResult(True, client_callback_request=WriteFileRequest(file_bytes, file_name,
                                                                                  request_context))
@@ -299,6 +333,8 @@ class FileUtil:
         :param request_context: Context that will be returned to the webhook server in the client callback result.
         :return: A SapioWebhookResult with the write request as its client callback request.
         """
+        warnings.warn("FileUtil.write_files is deprecated as of 24.5+. Use CallbackUtil.write_file instead.",
+                      DeprecationWarning)
         return SapioWebhookResult(True, client_callback_request=MultiFileRequest(files, request_context))
     @staticmethod
@@ -326,6 +362,8 @@ class FileUtil:
             1 - The file name of the requested file if the user provided one.
             2 - The file bytes of the requested file if the user provided one.
         """
+        warnings.warn("FileUtil.request_file is deprecated as of 24.5+. Use CallbackUtil.request_file instead.",
+                      DeprecationWarning)
         client_callback = context.client_callback_result
         result_context: str | None = client_callback.callback_context_data if client_callback else None
         # If the user cancels, terminate the interaction.
@@ -378,6 +416,8 @@ class FileUtil:
             May also contain a result that will terminate the client interaction if the user canceled the prompt.
             1 - A dictionary that maps the file names to the file bytes for each provided file.
         """
+        warnings.warn("FileUtil.request_files is deprecated as of 24.5+. Use CallbackUtil.request_files instead.",
+                      DeprecationWarning)
         client_callback = context.client_callback_result
         result_context: str | None = client_callback.callback_context_data if client_callback else None
         # If the user cancels, terminate the interaction.
@@ -420,7 +460,7 @@ class FileUtil:
         if len(allowed_extensions) != 0:
             matches: bool = False
             for ext in allowed_extensions:
-                if file_path.endswith("." + ext):
+                if file_path.endswith("." + ext.lstrip(".")):
                     matches = True
                     break
             if matches is False:

sapiopycommons/files/file_validator.py CHANGED Viewed

@@ -4,12 +4,15 @@ from abc import abstractmethod
 from typing import Any
 from sapiopylib.rest.User import SapioUser
+from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
 from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
     AbstractVeloxFieldDefinition
-from sapiopylib.rest.pojo.webhook.WebhookResult import SapioWebhookResult
 from sapiopycommons.callbacks.callback_util import CallbackUtil
 from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
+from sapiopycommons.general.aliases import UserIdentifier, AliasUtil
+from sapiopycommons.general.custom_report_util import CustomReportUtil
+from sapiopycommons.general.exceptions import SapioUserCancelledException
 from sapiopycommons.general.time_util import TimeUtil
@@ -77,10 +80,10 @@ class FileValidator:
         return failed_rows
-    def build_violation_report(self, context: SapioWebhookResult | SapioUser,
+    def build_violation_report(self, context: UserIdentifier,
                                rule_violations: dict[int, list[ValidationRule]]) -> None:
         """
-        Build a simple report of any rule violations in the file to display to the user as a table dialog.
+        Display a simple report of any rule violations in the file to the user as a table dialog.
         :param context: The current webhook context or a user object to send requests from.
         :param rule_violations: A dict of rule violations generated by a call to validate_file.
@@ -118,9 +121,24 @@ class FileValidator:
                         "Reason": violation.reason[:2000]
                     })
-        callback_util = CallbackUtil(context)
-        callback_util.table_dialog("Errors", "The following rule violations were encountered in the provided file.",
-                                   columns, rows)
+        callback = CallbackUtil(context)
+        callback.table_dialog("Errors", "The following rule violations were encountered in the provided file.",
+                              columns, rows)
+    def validate_and_report_errors(self, context: UserIdentifier) -> None:
+        """
+        Validate the file. If any rule violations are found, display a simple report of any rule violations in the file
+        to the user as a table dialog and throw a SapioUserCancelled exception after the user acknowledges the dialog
+        to end the webhook interaction.
+        Shorthand for calling validate_file() and then build_violation_report() if there are any errors.
+        :param context: The current webhook context or a user object to send requests from.
+        """
+        violations = self.validate_file()
+        if violations:
+            self.build_violation_report(context, violations)
+            raise SapioUserCancelledException()
 class ValidationRule:
@@ -480,3 +498,71 @@ class ContainsSubstringFromCellRule(RowRule):
     def validate(self, row: dict[str, Any]) -> bool:
         return row.get(self.second) in row.get(self.first)
+class UniqueSystemValueRule(ColumnRule):
+    """
+    Requires that every cell in the column has a value that is not already in use in the system for a given data type
+    and field name.
+    """
+    user: SapioUser
+    data_type_name: str
+    data_field_name: str
+    def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
+                 data_field_name: str):
+        """
+        :param context: The current webhook context or a user object to send requests from.
+        :param header: The header that this rule acts upon.
+        :param data_type_name: The data type name to search on.
+        :param data_field_name: The data field name to search on. This is expected to be a string field.
+        """
+        self.user = AliasUtil.to_sapio_user(context)
+        self.data_type_name = data_type_name
+        self.data_field_name = data_field_name
+        super().__init__(header, f"This value already exists in the system.")
+    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
+        file_handler = FileDataHandler(rows)
+        values: list[str] = file_handler.get_values_list(self.header)
+        # Run a quick report for all records of this type that match these field values.
+        term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
+                             "{" + ",".join(values) + "}")
+        results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
+        existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
+        return file_handler.get_in_list(self.header, existing_values)
+class ExistingSystemValueRule(ColumnRule):
+    """
+    Requires that every cell in the column has a value that is already in use in the system for a given data type
+    and field name.
+    """
+    user: SapioUser
+    data_type_name: str
+    data_field_name: str
+    def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
+                 data_field_name: str):
+        """
+        :param context: The current webhook context or a user object to send requests from.
+        :param header: The header that this rule acts upon.
+        :param data_type_name: The data type name to search on.
+        :param data_field_name: The data field name to search on. This is expected to be a string field.
+        """
+        self.user = AliasUtil.to_sapio_user(context)
+        self.data_type_name = data_type_name
+        self.data_field_name = data_field_name
+        super().__init__(header, f"This value doesn't exist in the system.")
+    def validate(self, rows: list[dict[str, Any]]) -> list[int]:
+        file_handler = FileDataHandler(rows)
+        values: list[str] = file_handler.get_values_list(self.header)
+        # Run a quick report for all records of this type that match these field values.
+        term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
+                             "{" + ",".join(values) + "}")
+        results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
+        existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
+        return file_handler.get_not_in_list(self.header, existing_values)

sapiopycommons/files/file_writer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import warnings
 from abc import abstractmethod
 from enum import Enum
 from typing import Any
@@ -18,7 +19,7 @@ class FileWriter:
     body: list[list[Any]]
     delimiter: str
     line_break: str
-    column_definitions: list[ColumnDef]
+    column_definitions: dict[str, ColumnDef]
     def __init__(self, headers: list[str], delimiter: str = ",", line_break: str = "\r\n"):
         """
@@ -30,7 +31,7 @@ class FileWriter:
         self.delimiter = delimiter
         self.line_break = line_break
         self.body = []
-        self.column_definitions = []
+        self.column_definitions = {}
     def add_row_list(self, row: list[Any]) -> None:
         """
@@ -65,21 +66,49 @@ class FileWriter:
             new_row.append(row.get(header, ""))
         self.body.append(new_row)
-    def add_column_definitions(self, column_defs: list[ColumnDef]) -> None:
+    def add_column_definition(self, header: str, column_def: ColumnDef) -> None:
         """
-        Add new column definitions to this FileWriter. Column definitions are evaluated in the order they are added,
-        meaning that they map to the header with the equivalent index. Before the file is built, the number of column
-        definitions must equal the number of headers if any column definition is provided.
+        Add a new column definition to this FileWriter for a specific header.
-        ColumnDefs are only used if the build_file function is provided with a list of RowBundles.
+        ColumnDefs are only used if the build_file function is provided with a list of RowBundles. Every header must
+        have a column definition if this is the case.
         Custom column definitions can be created by defining a class that extends ColumnDef and implements the print
         method.
-        :param column_defs: A list of column definitions to be used to construct the file when build_file is
+        :param column_def: A column definitions to be used to construct the file when build_file is
             called.
+        :param header: The header that this column definition is for. If a header is provided that isn't in the headers
+            list, the header is appended to the end of the list.
         """
-        self.column_definitions.extend(column_defs)
+        if header not in self.headers:
+            self.headers.append(header)
+        self.column_definitions[header] = column_def
+    def add_column_definitions(self, column_defs: dict[str, ColumnDef]) -> None:
+        """
+        Add new column definitions to this FileWriter.
+        ColumnDefs are only used if the build_file function is provided with a list of RowBundles. Every header must
+        have a column definition if this is the case.
+        Custom column definitions can be created by defining a class that extends ColumnDef and implements the print
+        method.
+        :param column_defs: A dictionary of header names to column definitions to be used to construct the file when
+            build_file is called.
+        """
+        # For backwards compatibility purposes, if column definitions are provided as a list,
+        # add them in order of appearance of the headers. This will only work if the headers are defined first, though.
+        if isinstance(column_defs, list):
+            warnings.warn("Adding column definitions is no longer expected as a list. Continuing to provide a list to "
+                          "this function may result in undesirable behavior.", UserWarning)
+            if not self.headers:
+                raise SapioException("No headers provided to FileWriter before the column definitions were added.")
+            for header, column_def in zip(self.headers, column_defs):
+                self.column_definitions[header] = column_def
+        for header, column_def in column_defs.items():
+            self.add_column_definition(header, column_def)
     def build_file(self, rows: list[RowBundle] | None = None, sorter=None, reverse: bool = False) -> str:
         """
@@ -100,11 +129,10 @@ class FileWriter:
         """
         # If any column definitions have been provided, the number of column definitions and headers must be equal.
         if self.column_definitions:
-            def_count: int = len(self.column_definitions)
-            header_count: int = len(self.headers)
-            if def_count != header_count:
-                raise SapioException(f"FileWriter has {def_count} column definitions defined but {header_count} "
-                                     f"headers. The number of column definitions must equal the number of headers.")
+            for header in self.headers:
+                if header not in self.column_definitions:
+                    raise SapioException(f"FileWriter has no column definition for the header {header}. If any column "
+                                         f"definitions are provided, then all headers must have a column definition.")
         # If any RowBundles have been provided, there must be column definitions for mapping them to the file.
         elif rows:
             raise SapioException(f"FileWriter was given RowBundles but contains no column definitions for mapping "
@@ -130,7 +158,8 @@ class FileWriter:
         rows.sort(key=lambda x: x.index)
         for row in rows:
             new_row: list[Any] = []
-            for column in self.column_definitions:
+            for header in self.headers:
+                column = self.column_definitions[header]
                 if column.may_skip and row.may_skip:
                     new_row.append("")
                 else:

sapiopycommons 2024.8.28a313__py3-none-any.whl → 2024.8.28a315__py3-none-any.whl

Potentially problematic release.

sapiopycommons 2024.8.28a313py3-none-any.whl → 2024.8.28a315py3-none-any.whl