parseet 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parseet/.DS_Store ADDED
Binary file
parseet/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from parseet import *
2
+ from .version import __version__
parseet/app.py ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ app.py
4
+ ======
5
+
6
+ Entry point: registers ``Backend`` with the QML engine and opens the window.
7
+
8
+ Usage
9
+ -----
10
+ ::
11
+
12
+ python -m your_package.app
13
+ # or
14
+ python app.py
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ from parseet.backend import Backend
23
+
24
+ def _resource(relative: str) -> Path:
25
+ """Resolve a path that works both in development and when bundled."""
26
+ if hasattr(sys, '_MEIPASS'):
27
+ return Path(sys._MEIPASS) / relative
28
+ return Path(__file__).parent / relative
29
+
30
+ def main() -> None:
31
+ try:
32
+ from PySide6.QtGui import QGuiApplication #pylint: disable=no-name-in-module
33
+ from PySide6.QtQml import QQmlApplicationEngine #pylint: disable=no-name-in-module
34
+ from PySide6.QtCore import QUrl #pylint: disable=no-name-in-module
35
+ except ImportError:
36
+ print(
37
+ "Error: the GUI requires PySide6.\n"
38
+ "Install it with: pip install parseet[gui]",
39
+ file=__import__("sys").stderr,
40
+ )
41
+ __import__("sys").exit(1)
42
+
43
+ app = QGuiApplication(sys.argv)
44
+ app.setApplicationName("Samplesheet Tools")
45
+ app.setOrganizationName("YourOrg")
46
+
47
+ engine = QQmlApplicationEngine()
48
+
49
+ # Expose the backend singleton to QML
50
+ backend = Backend()
51
+ engine.rootContext().setContextProperty("backend", backend)
52
+
53
+ qml_file = _resource("main.qml")
54
+ engine.load(QUrl.fromLocalFile(str(qml_file)))
55
+
56
+ if not engine.rootObjects():
57
+ sys.exit(1)
58
+
59
+ sys.exit(app.exec())
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
parseet/backend.py ADDED
@@ -0,0 +1,283 @@
1
+ """
2
+ backend.py
3
+ ==========
4
+
5
+ QML-exposed backend that runs ``check_raw_file_order`` and ``check_samples``
6
+ in a worker thread and emits structured result signals back to the UI.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import traceback
13
+ from pathlib import Path
14
+
15
+ import pandas as pd
16
+ from PySide6.QtCore import ( #pylint: disable=no-name-in-module
17
+ Property,
18
+ QObject,
19
+ QRunnable,
20
+ QThreadPool,
21
+ Signal,
22
+ Slot,
23
+ ) #pylint: disable=no-name-in-module
24
+
25
+ from parseet.core.utils import (
26
+ check_raw_file_order,
27
+ check_samples,
28
+ configure_logging,
29
+ parse_samplesheet,
30
+ )
31
+
32
+ configure_logging(level=logging.WARNING)
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Worker infrastructure
38
+ # ---------------------------------------------------------------------------
39
+
40
+ class _WorkerSignals(QObject):
41
+ """Signals emitted by the worker back to the main thread."""
42
+ finished = Signal(list, list, list) # errors, warnings, ok
43
+ failed = Signal(str) # traceback / exception message
44
+
45
+
46
+ class _Worker(QRunnable):
47
+ """Runs a callable in the thread pool and emits results via signals."""
48
+
49
+ def __init__(self, fn, *args, **kwargs):
50
+ super().__init__()
51
+ self._fn = fn
52
+ self._args = args
53
+ self._kwargs = kwargs
54
+ self.signals = _WorkerSignals()
55
+
56
+ def run(self) -> None:
57
+ try:
58
+ errors, warnings, ok = self._fn(*self._args, **self._kwargs)
59
+ self.signals.finished.emit(errors, warnings, ok)
60
+ except Exception:
61
+ self.signals.failed.emit(traceback.format_exc())
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Shared samplesheet loader
66
+ # ---------------------------------------------------------------------------
67
+
68
+ def _load_and_parse(path: str) -> tuple[pd.DataFrame, list[str], list[str], list[str]]:
69
+ """
70
+ Read an Excel samplesheet and run ``parse_samplesheet``.
71
+
72
+ Returns ``(df, errors, warnings, ok)``. Raises ``ValueError`` on
73
+ hard parse errors so the worker can surface them cleanly.
74
+ """
75
+ raw = pd.read_excel(path, header=None)
76
+ df, errors, warnings, ok, _col_descriptions = parse_samplesheet(raw)
77
+ if errors:
78
+ raise ValueError(
79
+ "Samplesheet parse errors:\n" + "\n".join(f" • {e}" for e in errors)
80
+ )
81
+ return df, errors, warnings, ok
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Command implementations (run inside the worker thread)
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def _run_check(
89
+ samplesheet_path: str,
90
+ folder_path: str,
91
+ dry_run: bool,
92
+ ignore_id: bool,
93
+ ) -> tuple[list[str], list[str], list[str]]:
94
+ """Worker body for the ``check`` command."""
95
+ df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
96
+
97
+ _rename_map, errors, warnings, ok = check_raw_file_order(
98
+ folder_path=folder_path,
99
+ samplesheet=df,
100
+ dry_run=dry_run,
101
+ ignore_id=ignore_id,
102
+ )
103
+
104
+ return errors, ss_warnings + warnings, ss_ok + ok
105
+
106
+
107
+ def _run_create(
108
+ samplesheet_path: str,
109
+ file_paths: list[str],
110
+ output_folder: str,
111
+ software: str,
112
+ dry_run: bool,
113
+ ignore_warnings: bool,
114
+ ) -> tuple[list[str], list[str], list[str]]:
115
+ """Worker body for the ``create`` command."""
116
+ df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
117
+
118
+ all_errors: list[str] = []
119
+ all_warnings: list[str] = ss_warnings
120
+ all_ok: list[str] = ss_ok
121
+
122
+ import csv, json
123
+ for file_path in file_paths:
124
+ logger.info("Processing '%s'", file_path)
125
+ processed_sheet, message, errors, warnings, ok = check_samples(
126
+ df,
127
+ file_path,
128
+ software=software,
129
+ )
130
+
131
+ all_errors += errors
132
+ all_warnings += warnings
133
+ all_ok += ok
134
+
135
+ if errors:
136
+ all_errors.append(f"Skipped output for '{file_path}' due to errors above.")
137
+ continue
138
+
139
+ if dry_run:
140
+ all_ok.append(f"[DRY-RUN] Would write output for '{file_path}'")
141
+ continue
142
+
143
+ if warnings and not ignore_warnings:
144
+ all_warnings.append(
145
+ f"Skipped output for '{file_path}' — warnings present. "
146
+ "Enable 'Ignore warnings' to force output."
147
+ )
148
+ continue
149
+
150
+ stem = Path(file_path).stem
151
+ csv_path = Path(output_folder) / f"{stem}_samplesheet.csv"
152
+ schema_path = Path(output_folder) / f"{stem}_samplesheet_schema.json"
153
+
154
+ processed_sheet.replace("<NA>", "").to_csv(
155
+ csv_path,
156
+ quotechar='"',
157
+ quoting=csv.QUOTE_STRINGS,
158
+ na_rep="",
159
+ index=False,
160
+ )
161
+ schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
162
+ with open(schema_path, "w") as fh:
163
+ json.dump(schema, fh, indent=2)
164
+
165
+ all_ok.append(f"Saved '{csv_path}'")
166
+ all_ok.append(f"Saved schema '{schema_path}'")
167
+ if message:
168
+ all_warnings.append(message)
169
+
170
+ return all_errors, all_warnings, all_ok
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # QML-exposed backend object
175
+ # ---------------------------------------------------------------------------
176
+
177
+ class Backend(QObject):
178
+ """
179
+ Exposed to QML as ``backend``.
180
+
181
+ Signals
182
+ -------
183
+ runStarted()
184
+ Emitted when the worker thread begins.
185
+ runFinished(errors, warnings, ok)
186
+ Emitted with result lists when the run completes successfully.
187
+ runFailed(message)
188
+ Emitted with a traceback string when an unhandled exception occurs.
189
+ busyChanged()
190
+ Notifies QML that the ``busy`` property has changed.
191
+ """
192
+
193
+ runStarted = Signal()
194
+ runFinished = Signal(list, list, list)
195
+ runFailed = Signal(str)
196
+ busyChanged = Signal()
197
+
198
+ def __init__(self, parent: QObject | None = None) -> None:
199
+ super().__init__(parent)
200
+ self._busy = False
201
+ self._thread_pool = QThreadPool.globalInstance()
202
+
203
+ # ------------------------------------------------------------------
204
+ # busy property
205
+ # ------------------------------------------------------------------
206
+
207
+ def _get_busy(self) -> bool:
208
+ return self._busy
209
+
210
+ def _set_busy(self, value: bool) -> None:
211
+ if self._busy != value:
212
+ self._busy = value
213
+ self.busyChanged.emit()
214
+
215
+ busy = Property(bool, _get_busy, _set_busy, notify=busyChanged)
216
+
217
+ # ------------------------------------------------------------------
218
+ # Slots called from QML
219
+ # ------------------------------------------------------------------
220
+
221
+ @Slot(str, str, bool, bool)
222
+ def runCheck(
223
+ self,
224
+ samplesheet_path: str,
225
+ folder_path: str,
226
+ dry_run: bool,
227
+ ignore_id: bool,
228
+ ) -> None:
229
+ """Start the ``check_raw_file_order`` pipeline in a worker thread."""
230
+ self._dispatch(
231
+ _run_check,
232
+ samplesheet_path,
233
+ folder_path,
234
+ dry_run,
235
+ ignore_id,
236
+ )
237
+
238
+ @Slot(str, list, str, str, bool, bool)
239
+ def runCreate(
240
+ self,
241
+ samplesheet_path: str,
242
+ file_paths: list,
243
+ output_folder: str,
244
+ software: str,
245
+ dry_run: bool,
246
+ ignore_warnings: bool,
247
+ ) -> None:
248
+ """Start the ``check_samples`` / CSV-write pipeline in a worker thread."""
249
+ self._dispatch(
250
+ _run_create,
251
+ samplesheet_path,
252
+ list(file_paths),
253
+ output_folder,
254
+ software,
255
+ dry_run,
256
+ ignore_warnings,
257
+ )
258
+
259
+ # ------------------------------------------------------------------
260
+ # Internal
261
+ # ------------------------------------------------------------------
262
+
263
+ def _dispatch(self, fn, *args) -> None:
264
+ if self._busy:
265
+ logger.warning("A run is already in progress — ignoring request")
266
+ return
267
+
268
+ self._set_busy(True)
269
+ self.runStarted.emit()
270
+
271
+ worker = _Worker(fn, *args)
272
+ worker.signals.finished.connect(self._on_finished)
273
+ worker.signals.failed.connect(self._on_failed)
274
+ self._thread_pool.start(worker)
275
+
276
+ def _on_finished(self, errors: list, warnings: list, ok: list) -> None:
277
+ self._set_busy(False)
278
+ self.runFinished.emit(errors, warnings, ok)
279
+
280
+ def _on_failed(self, message: str) -> None:
281
+ self._set_busy(False)
282
+ logger.error("Worker failed:\n%s", message)
283
+ self.runFailed.emit(message)
parseet/cli.py ADDED
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Samplesheet CLI
4
+ ===============
5
+
6
+ .. program:: samplesheet
7
+
8
+ Command-line interface for validating, parsing, and generating samplesheets.
9
+
10
+ This CLI provides two subcommands:
11
+
12
+ ``reparse``
13
+ Reparse Excel input files into quoted CSV format.
14
+
15
+ ``create``
16
+ Validate a samplesheet against integration files and generate derived
17
+ samplesheet CSV files.
18
+
19
+ Configuration is provided via a TOML file and merged with the default
20
+ :data:`base_config`.
21
+
22
+ Subcommands
23
+ -----------
24
+
25
+ reparse
26
+ ^^^^^^^
27
+ Reparse Excel files into quoted CSV files.
28
+
29
+ .. option:: reparse INPUTS [INPUTS ...]
30
+
31
+ One or more Excel files to reparse.
32
+
33
+ .. option:: -c, --config CONFIG
34
+
35
+ Optional TOML configuration file.
36
+
37
+ create
38
+ ^^^^^^
39
+ Validate a samplesheet and generate per-input CSV files.
40
+
41
+ .. option:: create FILES [FILES ...]
42
+
43
+ Integration files to process.
44
+
45
+ .. option:: -s, --samplesheet PATH
46
+
47
+ Path to the samplesheet Excel file.
48
+
49
+ .. option:: -o, --outdir DIR
50
+
51
+ Output directory for generated samplesheets.
52
+
53
+ .. option:: --dry-run
54
+
55
+ Do not write output files.
56
+
57
+ .. option:: --ignore-warnings
58
+
59
+ Save output even if warnings are present.
60
+
61
+ .. option:: -c, --config CONFIG
62
+
63
+ Optional TOML configuration file.
64
+ """
65
+
66
+ from __future__ import annotations
67
+
68
+ import logging
69
+ from pathlib import Path
70
+ import json
71
+ import sys
72
+ from typing import List
73
+ import pandas as pd
74
+
75
+ import csv
76
+ from .core.utils import (
77
+ parse_samplesheet,
78
+ check_samples,
79
+ build_parser,
80
+ check_raw_file_order,
81
+ configure_logging
82
+ )
83
+
84
+
85
+ def main(argv: list[str] | None = None) -> None:
86
+ """
87
+ Entry point for the samplesheet command-line interface.
88
+
89
+ This function parses command-line arguments, loads configuration,
90
+ dispatches subcommands, and coordinates samplesheet validation
91
+ and output generation.
92
+
93
+ Raises
94
+ ------
95
+ ValueError
96
+ If required inputs are missing or validation fails.
97
+ """
98
+
99
+ configure_logging(level=logging.INFO)
100
+
101
+ # This is necessary to build docs
102
+ parser = build_parser()
103
+ args = parser.parse_args(argv)
104
+
105
+ # ---------------------
106
+ # CHECK SAMPLE ORDER
107
+ # ---------------------
108
+ if args.command == "check":
109
+ samplesheet = pd.read_excel(args.samplesheet, header=None)
110
+ samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet, )
111
+ check_raw_file_order(folder_path=args.folder[0],
112
+ dry_run=args.dry_run,
113
+ samplesheet = samplesheet,
114
+ ignore_id=args.ignore_id)
115
+
116
+ # ----------
117
+ # CREATE
118
+ # ----------
119
+ if args.command == "create":
120
+
121
+ files: List[str] = (
122
+ args.files
123
+ )
124
+
125
+ if not files:
126
+ logging.error("No input files provided. Use CLI arguments or config.")
127
+ sys.exit(1)
128
+
129
+ output_folder: str = args.outdir
130
+
131
+ dry_run: bool = args.dry_run
132
+
133
+ ignore_warnings: bool = args.ignore_warnings
134
+
135
+ samplesheet = pd.read_excel(args.samplesheet, header=None)
136
+ samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet)
137
+
138
+ if errors:
139
+ sys.exit(1)
140
+
141
+ for file_path in files:
142
+ processed_sheet, message, errors, warnings, ok = check_samples(
143
+ samplesheet,
144
+ file_path,
145
+ software=args.software,
146
+ )
147
+
148
+ output_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet.csv"
149
+
150
+ if dry_run:
151
+ print("[DRY-RUN] File not saved.")
152
+ continue
153
+
154
+ if warnings and not ignore_warnings:
155
+ logging.warning("Warnings were found. Use --ignore-warnings to proceed.")
156
+ sys.exit(1)
157
+
158
+ processed_sheet = processed_sheet.replace("<NA>", "").sort_values("sample_order")
159
+ processed_sheet.to_csv(
160
+ output_file,
161
+ quotechar='"',
162
+ quoting=csv.QUOTE_STRINGS,
163
+ na_rep="",
164
+ index=False
165
+ )
166
+
167
+ print(f"Samplesheet saved as {output_file}. {message}")
168
+
169
+ schema_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet_schema.json"
170
+
171
+ schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
172
+ with open(schema_file, "w") as f:
173
+ json.dump(schema, f, indent=2)
174
+ print(f"Schema file saved as {schema_file}")
175
+
176
+ if __name__ == "__main__":
177
+ main()
@@ -0,0 +1,6 @@
1
+ import pathlib
2
+ import tomllib
3
+
4
+ path = pathlib.Path(__file__).parent / "config.toml"
5
+ with path.open(mode="rb") as fp:
6
+ base_config = tomllib.load(fp)
@@ -0,0 +1,7 @@
1
+ [output]
2
+ dir = "."
3
+
4
+ [run]
5
+ dry_run = false
6
+ ignore_warnings = false
7
+
@@ -0,0 +1,45 @@
1
+ from datetime import datetime
2
+ import pandas as pd
3
+
4
+ from .utils.check_order import check_raw_file_order
5
+ from .utils.parse_samplesheet import parse_samplesheet
6
+
7
+ class SingleProcess:
8
+
9
+ def __init__(self):
10
+ self.datetime: datetime.datetime = None
11
+ self.exit_code: int = None
12
+ self.errors = {}
13
+ self.inputs = {"samplesheet": None}
14
+ self.samplesheet: pd.DataFrame = None
15
+
16
+ def set_samplesheet(self, samplesheet_path):
17
+ self.inputs["samplesheet"] = samplesheet_path
18
+
19
+ def load_samplesheet(self):
20
+ if self.inputs["samplesheet"] is not None:
21
+ try:
22
+ xls = pd.ExcelFile(self.inputs["samplesheet"])
23
+ tmp_samplesheet = pd.read_excel(xls, sheet_name="Input", header=None)
24
+ self.samplesheet = parse_samplesheet(tmp_samplesheet)
25
+ except ValueError as ve:
26
+ raise ValueError(f"Sheet 'Input' not found in samplesheet: {self.inputs['samplesheet']}") from ve
27
+ except FileNotFoundError as fnfe:
28
+ raise ValueError(f"Samplesheet file not found: {self.inputs['samplesheet']}") from fnfe
29
+ except Exception as e:
30
+ raise ValueError(f"Error reading samplesheet: {e}") from e
31
+
32
+ else:
33
+ raise ValueError("No samplesheet path provided.")
34
+
35
+ def run_check_order(self, folder_path, output_txt="correct_time_order.txt", dry_run=True, ignore_id=False):
36
+ if self.samplesheet is None:
37
+ raise ValueError("Samplesheet not loaded. Please load the samplesheet before running check_order.")
38
+
39
+ errors, warnings, exit_code = check_raw_file_order(
40
+ folder_path=folder_path,
41
+ samplesheet=self.samplesheet,
42
+ output_txt=output_txt,
43
+ dry_run=dry_run,
44
+ ignore_id=ignore_id
45
+ )
@@ -0,0 +1,5 @@
1
+ from .lcms_check_samples import check_samples
2
+ from .parse_samplesheet import parse_samplesheet
3
+ from .build_parser import build_parser
4
+ from .check_order import check_raw_file_order
5
+ from .setup_logger import configure_logging
@@ -0,0 +1,33 @@
1
+ import argparse
2
+
3
+ def build_parser() -> argparse.ArgumentParser:
4
+ """
5
+ Build and return the argument parser for the CLI.
6
+ """
7
+ parser = argparse.ArgumentParser(
8
+ description="Samplesheet validator and parser."
9
+ )
10
+
11
+ subparsers = parser.add_subparsers(dest="command", required=True)
12
+
13
+ checkorder_parser = subparsers.add_parser(
14
+ "check",
15
+ help="Check order of samples files in a folder and reorder them."
16
+ )
17
+ checkorder_parser.add_argument("--dry-run", action="store_true")
18
+ checkorder_parser.add_argument("-s", "--samplesheet", help="Path to the samplesheet file.", required=True)
19
+ checkorder_parser.add_argument("folder", nargs=1)
20
+ checkorder_parser.add_argument("--ignore-id", action="store_true")
21
+
22
+ create_parser = subparsers.add_parser(
23
+ "create",
24
+ help="Validate samplesheet and generate CSV outputs"
25
+ )
26
+ create_parser.add_argument("files", nargs="*")
27
+ create_parser.add_argument("-s", "--samplesheet", required=True, help="Path to the samplesheet file.")
28
+ create_parser.add_argument("-o", "--outdir", default=".", help="Output directory for generated files.")
29
+ create_parser.add_argument("--dry-run", action="store_true")
30
+ create_parser.add_argument("--ignore-warnings", action="store_true")
31
+ create_parser.add_argument("--software", choices=["crommy", "msdial"], default="msdial", help="Software for which to generate the output samplesheet.")
32
+
33
+ return parser