PyPI - parseet - Versions diffs - 0.2.0__py3-none-any.whl - Mend

parseet 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

parseet/.DS_Store +0 -0
parseet/__init__.py +2 -0
parseet/app.py +63 -0
parseet/backend.py +283 -0
parseet/cli.py +177 -0
parseet/config/__init__.py +6 -0
parseet/config/config.toml +7 -0
parseet/core/single_process.py +45 -0
parseet/core/utils/__init__.py +5 -0
parseet/core/utils/build_parser.py +33 -0
parseet/core/utils/check_order.py +426 -0
parseet/core/utils/lcms_check_samples.py +397 -0
parseet/core/utils/parse_samplesheet.py +363 -0
parseet/core/utils/setup_logger.py +24 -0
parseet/main.qml +1057 -0
parseet/version.py +1 -0
parseet-0.2.0.dist-info/METADATA +56 -0
parseet-0.2.0.dist-info/RECORD +20 -0
parseet-0.2.0.dist-info/WHEEL +4 -0
parseet-0.2.0.dist-info/entry_points.txt +6 -0

parseet/.DS_Store ADDED Viewed

Binary file

parseet/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from parseet import *
2	+ from .version import __version__

parseet/app.py ADDED Viewed

@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+"""
+app.py
+======
+Entry point: registers ``Backend`` with the QML engine and opens the window.
+Usage
+-----
+::
+    python -m your_package.app
+    # or
+    python app.py
+"""
+from __future__ import annotations
+import sys
+from pathlib import Path
+from parseet.backend import Backend
+def _resource(relative: str) -> Path:
+    """Resolve a path that works both in development and when bundled."""
+    if hasattr(sys, '_MEIPASS'):
+        return Path(sys._MEIPASS) / relative
+    return Path(__file__).parent / relative
+def main() -> None:
+    try:
+        from PySide6.QtGui import QGuiApplication #pylint: disable=no-name-in-module
+        from PySide6.QtQml import QQmlApplicationEngine #pylint: disable=no-name-in-module
+        from PySide6.QtCore import QUrl #pylint: disable=no-name-in-module
+    except ImportError:
+        print(
+            "Error: the GUI requires PySide6.\n"
+            "Install it with:  pip install parseet[gui]",
+            file=__import__("sys").stderr,
+        )
+        __import__("sys").exit(1)
+    app = QGuiApplication(sys.argv)
+    app.setApplicationName("Samplesheet Tools")
+    app.setOrganizationName("YourOrg")
+    engine = QQmlApplicationEngine()
+    # Expose the backend singleton to QML
+    backend = Backend()
+    engine.rootContext().setContextProperty("backend", backend)
+    qml_file = _resource("main.qml")
+    engine.load(QUrl.fromLocalFile(str(qml_file)))
+    if not engine.rootObjects():
+        sys.exit(1)
+    sys.exit(app.exec())
+if __name__ == "__main__":
+    main()

parseet/backend.py ADDED Viewed

@@ -0,0 +1,283 @@
+"""
+backend.py
+==========
+QML-exposed backend that runs ``check_raw_file_order`` and ``check_samples``
+in a worker thread and emits structured result signals back to the UI.
+"""
+from __future__ import annotations
+import logging
+import traceback
+from pathlib import Path
+import pandas as pd
+from PySide6.QtCore import ( #pylint: disable=no-name-in-module
+    Property,
+    QObject,
+    QRunnable,
+    QThreadPool,
+    Signal,
+    Slot,
+) #pylint: disable=no-name-in-module
+from parseet.core.utils import (
+    check_raw_file_order,
+    check_samples,
+    configure_logging,
+    parse_samplesheet,
+)
+configure_logging(level=logging.WARNING)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Worker infrastructure
+# ---------------------------------------------------------------------------
+class _WorkerSignals(QObject):
+    """Signals emitted by the worker back to the main thread."""
+    finished = Signal(list, list, list)   # errors, warnings, ok
+    failed   = Signal(str)                # traceback / exception message
+class _Worker(QRunnable):
+    """Runs a callable in the thread pool and emits results via signals."""
+    def __init__(self, fn, *args, **kwargs):
+        super().__init__()
+        self._fn     = fn
+        self._args   = args
+        self._kwargs = kwargs
+        self.signals = _WorkerSignals()
+    def run(self) -> None:
+        try:
+            errors, warnings, ok = self._fn(*self._args, **self._kwargs)
+            self.signals.finished.emit(errors, warnings, ok)
+        except Exception:
+            self.signals.failed.emit(traceback.format_exc())
+# ---------------------------------------------------------------------------
+# Shared samplesheet loader
+# ---------------------------------------------------------------------------
+def _load_and_parse(path: str) -> tuple[pd.DataFrame, list[str], list[str], list[str]]:
+    """
+    Read an Excel samplesheet and run ``parse_samplesheet``.
+    Returns ``(df, errors, warnings, ok)``.  Raises ``ValueError`` on
+    hard parse errors so the worker can surface them cleanly.
+    """
+    raw = pd.read_excel(path, header=None)
+    df, errors, warnings, ok, _col_descriptions = parse_samplesheet(raw)
+    if errors:
+        raise ValueError(
+            "Samplesheet parse errors:\n" + "\n".join(f"  • {e}" for e in errors)
+        )
+    return df, errors, warnings, ok
+# ---------------------------------------------------------------------------
+# Command implementations (run inside the worker thread)
+# ---------------------------------------------------------------------------
+def _run_check(
+    samplesheet_path: str,
+    folder_path: str,
+    dry_run: bool,
+    ignore_id: bool,
+) -> tuple[list[str], list[str], list[str]]:
+    """Worker body for the ``check`` command."""
+    df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
+    _rename_map, errors, warnings, ok = check_raw_file_order(
+        folder_path=folder_path,
+        samplesheet=df,
+        dry_run=dry_run,
+        ignore_id=ignore_id,
+    )
+    return errors, ss_warnings + warnings, ss_ok + ok
+def _run_create(
+    samplesheet_path: str,
+    file_paths: list[str],
+    output_folder: str,
+    software: str,
+    dry_run: bool,
+    ignore_warnings: bool,
+) -> tuple[list[str], list[str], list[str]]:
+    """Worker body for the ``create`` command."""
+    df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
+    all_errors:   list[str] = []
+    all_warnings: list[str] = ss_warnings
+    all_ok:       list[str] = ss_ok
+    import csv, json
+    for file_path in file_paths:
+        logger.info("Processing '%s'", file_path)
+        processed_sheet, message, errors, warnings, ok = check_samples(
+            df,
+            file_path,
+            software=software,
+        )
+        all_errors   += errors
+        all_warnings += warnings
+        all_ok       += ok
+        if errors:
+            all_errors.append(f"Skipped output for '{file_path}' due to errors above.")
+            continue
+        if dry_run:
+            all_ok.append(f"[DRY-RUN] Would write output for '{file_path}'")
+            continue
+        if warnings and not ignore_warnings:
+            all_warnings.append(
+                f"Skipped output for '{file_path}' — warnings present. "
+                "Enable 'Ignore warnings' to force output."
+            )
+            continue
+        stem        = Path(file_path).stem
+        csv_path    = Path(output_folder) / f"{stem}_samplesheet.csv"
+        schema_path = Path(output_folder) / f"{stem}_samplesheet_schema.json"
+        processed_sheet.replace("<NA>", "").to_csv(
+            csv_path,
+            quotechar='"',
+            quoting=csv.QUOTE_STRINGS,
+            na_rep="",
+            index=False,
+        )
+        schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
+        with open(schema_path, "w") as fh:
+            json.dump(schema, fh, indent=2)
+        all_ok.append(f"Saved '{csv_path}'")
+        all_ok.append(f"Saved schema '{schema_path}'")
+        if message:
+            all_warnings.append(message)
+    return all_errors, all_warnings, all_ok
+# ---------------------------------------------------------------------------
+# QML-exposed backend object
+# ---------------------------------------------------------------------------
+class Backend(QObject):
+    """
+    Exposed to QML as ``backend``.
+    Signals
+    -------
+    runStarted()
+        Emitted when the worker thread begins.
+    runFinished(errors, warnings, ok)
+        Emitted with result lists when the run completes successfully.
+    runFailed(message)
+        Emitted with a traceback string when an unhandled exception occurs.
+    busyChanged()
+        Notifies QML that the ``busy`` property has changed.
+    """
+    runStarted  = Signal()
+    runFinished = Signal(list, list, list)
+    runFailed   = Signal(str)
+    busyChanged = Signal()
+    def __init__(self, parent: QObject | None = None) -> None:
+        super().__init__(parent)
+        self._busy       = False
+        self._thread_pool = QThreadPool.globalInstance()
+    # ------------------------------------------------------------------
+    # busy property
+    # ------------------------------------------------------------------
+    def _get_busy(self) -> bool:
+        return self._busy
+    def _set_busy(self, value: bool) -> None:
+        if self._busy != value:
+            self._busy = value
+            self.busyChanged.emit()
+    busy = Property(bool, _get_busy, _set_busy, notify=busyChanged)
+    # ------------------------------------------------------------------
+    # Slots called from QML
+    # ------------------------------------------------------------------
+    @Slot(str, str, bool, bool)
+    def runCheck(
+        self,
+        samplesheet_path: str,
+        folder_path: str,
+        dry_run: bool,
+        ignore_id: bool,
+    ) -> None:
+        """Start the ``check_raw_file_order`` pipeline in a worker thread."""
+        self._dispatch(
+            _run_check,
+            samplesheet_path,
+            folder_path,
+            dry_run,
+            ignore_id,
+        )
+    @Slot(str, list, str, str, bool, bool)
+    def runCreate(
+        self,
+        samplesheet_path: str,
+        file_paths: list,
+        output_folder: str,
+        software: str,
+        dry_run: bool,
+        ignore_warnings: bool,
+    ) -> None:
+        """Start the ``check_samples`` / CSV-write pipeline in a worker thread."""
+        self._dispatch(
+            _run_create,
+            samplesheet_path,
+            list(file_paths),
+            output_folder,
+            software,
+            dry_run,
+            ignore_warnings,
+        )
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    def _dispatch(self, fn, *args) -> None:
+        if self._busy:
+            logger.warning("A run is already in progress — ignoring request")
+            return
+        self._set_busy(True)
+        self.runStarted.emit()
+        worker = _Worker(fn, *args)
+        worker.signals.finished.connect(self._on_finished)
+        worker.signals.failed.connect(self._on_failed)
+        self._thread_pool.start(worker)
+    def _on_finished(self, errors: list, warnings: list, ok: list) -> None:
+        self._set_busy(False)
+        self.runFinished.emit(errors, warnings, ok)
+    def _on_failed(self, message: str) -> None:
+        self._set_busy(False)
+        logger.error("Worker failed:\n%s", message)
+        self.runFailed.emit(message)

parseet/cli.py ADDED Viewed

@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+"""
+Samplesheet CLI
+===============
+.. program:: samplesheet
+Command-line interface for validating, parsing, and generating samplesheets.
+This CLI provides two subcommands:
+``reparse``
+    Reparse Excel input files into quoted CSV format.
+``create``
+    Validate a samplesheet against integration files and generate derived
+    samplesheet CSV files.
+Configuration is provided via a TOML file and merged with the default
+:data:`base_config`.
+Subcommands
+-----------
+reparse
+^^^^^^^
+Reparse Excel files into quoted CSV files.
+.. option:: reparse INPUTS [INPUTS ...]
+   One or more Excel files to reparse.
+.. option:: -c, --config CONFIG
+   Optional TOML configuration file.
+create
+^^^^^^
+Validate a samplesheet and generate per-input CSV files.
+.. option:: create FILES [FILES ...]
+   Integration files to process.
+.. option:: -s, --samplesheet PATH
+   Path to the samplesheet Excel file.
+.. option:: -o, --outdir DIR
+   Output directory for generated samplesheets.
+.. option:: --dry-run
+   Do not write output files.
+.. option:: --ignore-warnings
+   Save output even if warnings are present.
+.. option:: -c, --config CONFIG
+   Optional TOML configuration file.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+import json
+import sys
+from typing import List
+import pandas as pd
+import csv
+from .core.utils import (
+    parse_samplesheet,
+    check_samples,
+    build_parser,
+    check_raw_file_order,
+    configure_logging
+)
+def main(argv: list[str] | None = None) -> None:
+    """
+    Entry point for the samplesheet command-line interface.
+    This function parses command-line arguments, loads configuration,
+    dispatches subcommands, and coordinates samplesheet validation
+    and output generation.
+    Raises
+    ------
+    ValueError
+        If required inputs are missing or validation fails.
+    """
+    configure_logging(level=logging.INFO)
+    # This is necessary to build docs
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    # ---------------------
+    # CHECK SAMPLE ORDER
+    # ---------------------
+    if args.command == "check":
+        samplesheet = pd.read_excel(args.samplesheet, header=None)
+        samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet, )
+        check_raw_file_order(folder_path=args.folder[0],
+                             dry_run=args.dry_run,
+                             samplesheet = samplesheet,
+                             ignore_id=args.ignore_id)
+    # ----------
+    # CREATE
+    # ----------
+    if args.command == "create":
+        files: List[str] = (
+            args.files
+        )
+        if not files:
+            logging.error("No input files provided. Use CLI arguments or config.")
+            sys.exit(1)
+        output_folder: str = args.outdir
+        dry_run: bool = args.dry_run
+        ignore_warnings: bool = args.ignore_warnings
+        samplesheet = pd.read_excel(args.samplesheet, header=None)
+        samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet)
+        if errors:
+            sys.exit(1)
+        for file_path in files:
+            processed_sheet, message, errors, warnings, ok = check_samples(
+                samplesheet,
+                file_path,
+                software=args.software,
+            )
+            output_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet.csv"
+            if dry_run:
+                print("[DRY-RUN] File not saved.")
+                continue
+            if warnings and not ignore_warnings:
+                logging.warning("Warnings were found. Use --ignore-warnings to proceed.")
+                sys.exit(1)
+            processed_sheet = processed_sheet.replace("<NA>", "").sort_values("sample_order")
+            processed_sheet.to_csv(
+                output_file,
+                quotechar='"',
+                quoting=csv.QUOTE_STRINGS,
+                na_rep="",
+                index=False
+            )
+            print(f"Samplesheet saved as {output_file}. {message}")
+            schema_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet_schema.json"
+            schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
+            with open(schema_file, "w") as f:
+                json.dump(schema, f, indent=2)
+            print(f"Schema file saved as {schema_file}")
+if __name__ == "__main__":
+    main()

parseet/config/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+import pathlib
+import tomllib
+path = pathlib.Path(__file__).parent / "config.toml"
+with path.open(mode="rb") as fp:
+    base_config = tomllib.load(fp)

parseet/config/config.toml ADDED Viewed

@@ -0,0 +1,7 @@
+[output]
+dir = "."
+[run]
+dry_run = false
+ignore_warnings = false

parseet/core/single_process.py ADDED Viewed

@@ -0,0 +1,45 @@
+from datetime import datetime
+import pandas as pd
+from .utils.check_order import check_raw_file_order
+from .utils.parse_samplesheet import parse_samplesheet
+class SingleProcess:
+    def __init__(self):
+        self.datetime: datetime.datetime = None
+        self.exit_code: int = None
+        self.errors = {}
+        self.inputs = {"samplesheet": None}
+        self.samplesheet: pd.DataFrame = None
+    def set_samplesheet(self, samplesheet_path):
+        self.inputs["samplesheet"] = samplesheet_path
+    def load_samplesheet(self):
+        if self.inputs["samplesheet"] is not None:
+            try:
+                xls = pd.ExcelFile(self.inputs["samplesheet"])
+                tmp_samplesheet = pd.read_excel(xls, sheet_name="Input", header=None)
+                self.samplesheet = parse_samplesheet(tmp_samplesheet)
+            except ValueError as ve:
+                raise ValueError(f"Sheet 'Input' not found in samplesheet: {self.inputs['samplesheet']}") from ve
+            except FileNotFoundError as fnfe:
+                raise ValueError(f"Samplesheet file not found: {self.inputs['samplesheet']}") from fnfe
+            except Exception as e:
+                raise ValueError(f"Error reading samplesheet: {e}") from e
+        else:
+            raise ValueError("No samplesheet path provided.")
+    def run_check_order(self, folder_path, output_txt="correct_time_order.txt", dry_run=True, ignore_id=False):
+        if self.samplesheet is None:
+            raise ValueError("Samplesheet not loaded. Please load the samplesheet before running check_order.")
+        errors, warnings, exit_code = check_raw_file_order(
+            folder_path=folder_path,
+            samplesheet=self.samplesheet,
+            output_txt=output_txt,
+            dry_run=dry_run,
+            ignore_id=ignore_id
+        )

parseet/core/utils/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .lcms_check_samples import check_samples
+from .parse_samplesheet import parse_samplesheet
+from .build_parser import build_parser
+from .check_order import check_raw_file_order
+from .setup_logger import configure_logging

parseet/core/utils/build_parser.py ADDED Viewed

@@ -0,0 +1,33 @@
+import argparse
+def build_parser() -> argparse.ArgumentParser:
+    """
+    Build and return the argument parser for the CLI.
+    """
+    parser = argparse.ArgumentParser(
+        description="Samplesheet validator and parser."
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    checkorder_parser = subparsers.add_parser(
+            "check",
+            help="Check order of samples files in a folder and reorder them."
+            )
+    checkorder_parser.add_argument("--dry-run", action="store_true")
+    checkorder_parser.add_argument("-s", "--samplesheet", help="Path to the samplesheet file.", required=True)
+    checkorder_parser.add_argument("folder", nargs=1)
+    checkorder_parser.add_argument("--ignore-id", action="store_true")
+    create_parser = subparsers.add_parser(
+        "create",
+        help="Validate samplesheet and generate CSV outputs"
+    )
+    create_parser.add_argument("files", nargs="*")
+    create_parser.add_argument("-s", "--samplesheet", required=True, help="Path to the samplesheet file.")
+    create_parser.add_argument("-o", "--outdir", default=".", help="Output directory for generated files.")
+    create_parser.add_argument("--dry-run", action="store_true")
+    create_parser.add_argument("--ignore-warnings", action="store_true")
+    create_parser.add_argument("--software", choices=["crommy", "msdial"], default="msdial", help="Software for which to generate the output samplesheet.")
+    return parser