PyPI - sapiopycommons - Versions diffs - 2025.8.14a703__py3-none-any.whl → 2025.8.15a704__py3-none-any.whl - Mend

sapiopycommons 2025.8.14a703py3-none-any.whl → 2025.8.15a704py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sapiopycommons might be problematic. Click here for more details.

Files changed (46) hide show

sapiopycommons/files/assay_plate_reader.py DELETED Viewed

@@ -1,93 +0,0 @@
-import base64
-import dataclasses
-from typing import Any
-from databind.core.dataclasses import dataclass
-from databind.json import loads
-from sapiopylib.rest.utils.singletons import SapioContextManager
-@dataclasses.dataclass
-class ProcessAssayPlateRequest:
-    """
-    A request to process the results of assay plate reader with a configuration set in Sapio.
-    Attributes:
-        num_rows (int): The number of rows in the plate.
-        num_columns (int): The number of columns in the plate.
-        plate_ids_in_context (list[str]): List of plate IDs that are in context for this request.
-        filename (str): The name of the file containing the assay data.
-        file_data (bytes): The binary content of the file.
-        plate_reader_config_name (str): The name of the plate reader configuration to use.
-    """
-    num_rows: int
-    num_columns: int
-    plate_ids_in_context: list[str] | None
-    filename: str
-    file_data: bytes
-    plate_reader_config_name: str
-    def to_json(self) -> dict[str, Any]:
-        return {
-            "numRows": self.num_rows,
-            "numCols": self.num_columns,
-            "plateIdsInContext": self.plate_ids_in_context,
-            "fileName": self.filename,
-            "fileDataBase64": base64.b64encode(self.file_data).decode('utf-8'),
-            "plateReaderName": self.plate_reader_config_name
-        }
-@dataclass
-class AssayPlateResultIdent:
-    plateId: str
-    channelIdOrBlock: str
-    kineticAssaySeconds: float | None
-@dataclass
-class AssayResultDatum:
-    """
-    Describes the data received from an assay plate reader.
-    Most of the time, the data is a single value, but sometimes it can be multiple values, especially for kinetic data.
-    """
-    DEFAULT_PROPERTY_NAME: str = "read"
-    rowPosition: str
-    columnPosition: str
-    valueByPropertyName: dict[str, float]
-    textValueByPropertyName: dict[str, str]
-@dataclass
-class AssayPlateResult:
-    """
-    Assay plate load result for a single plate in a file. A file can have more than one of this result if it has multiple plate of data in a single file.
-    """
-    resultIdent: AssayPlateResultIdent
-    numRows: int
-    numColumns: int
-    resultDatum: list[AssayResultDatum]
-@dataclass
-class AssayFileLoadResult:
-    """
-    The entire top-level file loading result for an assay plate reader file.
-    """
-    filename: str
-    plateResultList: list[AssayPlateResult]
-class AssayPlateReader(SapioContextManager):
-    """
-    This class contains services for Sapio Assay Plate Reader.
-    """
-    def process_plate_reader_data(self, request: ProcessAssayPlateRequest) -> AssayFileLoadResult:
-        """
-        Processes the assay plate reader data using provided request into a structured result using configuration defined in Sapio.
-        """
-        payload = request.to_json()
-        response = self.user.plugin_post("assayplatereader/process", payload=payload)
-        self.user.raise_for_status(response)
-        return loads(response.text, AssayFileLoadResult)

sapiopycommons/files/file_text_converter.py DELETED Viewed

@@ -1,207 +0,0 @@
-import io
-import os
-import tempfile
-from enum import Enum, auto
-class FileType(Enum):
-    """Supported file types for conversion."""
-    TXT = auto()
-    MD = auto()
-    CSV = auto()
-    DOC = auto()
-    DOCX = auto()
-    XLS = auto()
-    XLSX = auto()
-    PPT = auto()
-    PPTX = auto()
-    PDF = auto()
-    UNKNOWN = auto()
-class FileToTextConverter:
-    """
-    A class for converting various file types to raw text.
-    """
-    @staticmethod
-    def mime_type_to_enum(mime_type: str) -> FileType:
-        """
-        Converts a MIME type to a FileType enum.
-        :param mime_type: The MIME type string to convert.
-        :return: The corresponding FileType enum, or UNKNOWN if not recognized.
-        """
-        if not mime_type or not mime_type.strip():
-            return FileType.UNKNOWN
-        mime_map = {
-            "text/plain": FileType.TXT,
-            "text/markdown": FileType.MD,
-            "text/csv": FileType.CSV,
-            "application/msword": FileType.DOC,
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": FileType.DOCX,
-            "application/vnd.ms-excel": FileType.XLS,
-            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": FileType.XLSX,
-            "application/vnd.ms-powerpoint": FileType.PPT,
-            "application/vnd.openxmlformats-officedocument.presentationml.presentation": FileType.PPTX,
-            "application/pdf": FileType.PDF,
-        }
-        return mime_map.get(mime_type, FileType.UNKNOWN)
-    @staticmethod
-    def file_extension_to_enum(file_path: str) -> FileType:
-        """
-        Converts a file path or extension to a FileType enum.
-        :param file_path: The file path or extension to convert.
-        :return: The corresponding FileType enum, or UNKNOWN if not recognized.
-        """
-        if not file_path or not file_path.strip():
-            return FileType.UNKNOWN
-        # Extract the file extension, removing the leading dot and making it lowercase
-        file_extension = os.path.splitext(file_path)[1].lstrip('.').lower()
-        ext_map = {
-            "txt": FileType.TXT,
-            "md": FileType.MD,
-            "csv": FileType.CSV,
-            "doc": FileType.DOC,
-            "docx": FileType.DOCX,
-            "xls": FileType.XLS,
-            "xlsx": FileType.XLSX,
-            "ppt": FileType.PPT,
-            "pptx": FileType.PPTX,
-            "pdf": FileType.PDF,
-        }
-        return ext_map.get(file_extension, FileType.UNKNOWN)
-    @classmethod
-    def parse_file(cls, file_type: FileType, file_bytes: bytes) -> str | None:
-        """
-        Parses file bytes based on the FileType and returns the text content.
-        :param file_type: The type of the file to parse.
-        :param file_bytes: The raw bytes of the file to parse.
-        :return: The text content of the file, or None if the file type is not supported or parsing fails.
-        """
-        if file_type is None or file_bytes is None:
-            return None
-        if not file_bytes:
-            return ""
-        # Dispatch to the correct parser method
-        parser_map = {
-            FileType.TXT: cls._parse_plain_text,
-            FileType.MD: cls._parse_plain_text,
-            FileType.CSV: cls._parse_plain_text,
-            FileType.DOC: cls._parse_doc,
-            FileType.DOCX: cls._parse_docx,
-            FileType.XLS: cls._parse_xls,
-            FileType.XLSX: cls._parse_xlsx,
-            FileType.PPT: cls._parse_ppt,
-            FileType.PPTX: cls._parse_pptx,
-            FileType.PDF: cls._parse_pdf,
-        }
-        parser_func = parser_map.get(file_type)
-        if parser_func:
-            return parser_func(file_bytes)
-        return None
-    @staticmethod
-    def _parse_plain_text(file_bytes: bytes) -> str:
-        return file_bytes.decode('utf-8')
-    @staticmethod
-    def _run_textract(file_bytes: bytes, extension: str) -> str:
-        """
-        Helper to run textract on in-memory bytes by writing to a temp file.
-        Note: textract may require external system dependencies.
-        """
-        import textract
-        with tempfile.NamedTemporaryFile(suffix=f".{extension}", delete=True) as temp_file:
-            temp_file.write(file_bytes)
-            temp_file.flush()  # Ensure all bytes are written to disk
-            text = textract.process(temp_file.name).decode('utf-8')
-        return text
-    @classmethod
-    def _parse_doc(cls, file_bytes: bytes) -> str:
-        return cls._run_textract(file_bytes, 'doc')
-    @staticmethod
-    def _parse_docx(file_bytes: bytes) -> str:
-        import docx
-        with io.BytesIO(file_bytes) as stream:
-            document = docx.Document(stream)
-            return "\n".join(para.text for para in document.paragraphs if para.text.strip())
-    @staticmethod
-    def _parse_xls(file_bytes: bytes) -> str:
-        import xlrd
-        workbook = xlrd.open_workbook(file_contents=file_bytes)
-        text_parts = []
-        for sheet in workbook.sheets():
-            text_parts.append(f"Sheet: {sheet.name}\n")
-            for row_idx in range(sheet.nrows):
-                row_cells = []
-                for col_idx in range(sheet.ncols):
-                    cell_text = str(sheet.cell_value(row_idx, col_idx))
-                    if cell_text.strip():
-                        row_cells.append(cell_text + "\t")
-                if row_cells:
-                    text_parts.append("".join(row_cells))
-                text_parts.append("\n")
-            text_parts.append("\n")
-        return "".join(text_parts)
-    @staticmethod
-    def _parse_xlsx(file_bytes: bytes) -> str:
-        import openpyxl
-        with io.BytesIO(file_bytes) as stream:
-            workbook = openpyxl.load_workbook(stream, read_only=True)
-            text_parts = []
-            for sheet in workbook.worksheets:
-                text_parts.append(f"Sheet: {sheet.title}\n")
-                for row in sheet.iter_rows():
-                    row_cells = []
-                    for cell in row:
-                        cell_text = str(cell.value) if cell.value is not None else ""
-                        if cell_text.strip():
-                            row_cells.append(cell_text + "\t")
-                    if row_cells:
-                        text_parts.append("".join(row_cells))
-                    text_parts.append("\n")
-                text_parts.append("\n")
-            return "".join(text_parts)
-    @classmethod
-    def _parse_ppt(cls, file_bytes: bytes) -> str:
-        return cls._run_textract(file_bytes, 'ppt')
-    @staticmethod
-    def _parse_pptx(file_bytes: bytes) -> str:
-        import pptx
-        with io.BytesIO(file_bytes) as stream:
-            presentation = pptx.Presentation(stream)
-            text_parts = []
-            for slide in presentation.slides:
-                for shape in slide.shapes:
-                    if shape.has_text_frame:
-                        text = shape.text_frame.text
-                        if text and text.strip():
-                            text_parts.append(text)
-            return "\n".join(text_parts)
-    @staticmethod
-    def _parse_pdf(file_bytes: bytes) -> str:
-        """Parses a PDF file's bytes and extracts text using PyMuPDF."""
-        import pymupdf
-        text_parts = []
-        with io.BytesIO(file_bytes) as stream:
-            with pymupdf.open(stream=stream) as doc:
-                for page in doc:
-                    text_parts.append(page.get_text())
-        return "\n".join(text_parts)

{sapiopycommons-2025.8.14a703.dist-info → sapiopycommons-2025.8.15a704.dist-info}/WHEEL RENAMED Viewed

File without changes

{sapiopycommons-2025.8.14a703.dist-info → sapiopycommons-2025.8.15a704.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sapiopycommons 2025.8.14a703__py3-none-any.whl → 2025.8.15a704__py3-none-any.whl

Potentially problematic release.

sapiopycommons 2025.8.14a703py3-none-any.whl → 2025.8.15a704py3-none-any.whl