parseet 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parseet-0.2.0/PKG-INFO +56 -0
- parseet-0.2.0/README.md +39 -0
- parseet-0.2.0/pyproject.toml +57 -0
- parseet-0.2.0/src/parseet/.DS_Store +0 -0
- parseet-0.2.0/src/parseet/__init__.py +2 -0
- parseet-0.2.0/src/parseet/app.py +63 -0
- parseet-0.2.0/src/parseet/backend.py +283 -0
- parseet-0.2.0/src/parseet/cli.py +177 -0
- parseet-0.2.0/src/parseet/config/__init__.py +6 -0
- parseet-0.2.0/src/parseet/config/config.toml +7 -0
- parseet-0.2.0/src/parseet/core/single_process.py +45 -0
- parseet-0.2.0/src/parseet/core/utils/__init__.py +5 -0
- parseet-0.2.0/src/parseet/core/utils/build_parser.py +33 -0
- parseet-0.2.0/src/parseet/core/utils/check_order.py +426 -0
- parseet-0.2.0/src/parseet/core/utils/lcms_check_samples.py +397 -0
- parseet-0.2.0/src/parseet/core/utils/parse_samplesheet.py +363 -0
- parseet-0.2.0/src/parseet/core/utils/setup_logger.py +24 -0
- parseet-0.2.0/src/parseet/main.qml +1057 -0
- parseet-0.2.0/src/parseet/version.py +1 -0
- parseet-0.2.0/tests/__init__.py +0 -0
parseet-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: parseet
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: This package is used to parse ms samplesheet for further analyses.
|
|
5
|
+
Keywords: samplesheet,parser,data analyses,mass spectrometry,msdial
|
|
6
|
+
Author-Email: Matteo Miotto <miotsdata@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: <3.13,>=3.12
|
|
12
|
+
Requires-Dist: pandas>=2.3.3
|
|
13
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
14
|
+
Provides-Extra: gui
|
|
15
|
+
Requires-Dist: pyside6>=6.11.1; extra == "gui"
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# BULK METABOLOMICS SAMPLESHEET
|
|
19
|
+
|
|
20
|
+
This folder contains the files necessary for the samplesheet of LC-MS and spatial metabolomics.
|
|
21
|
+
Samplesheet template (LC-MS_metabolomics_samplesheet.xlsx or spatial_metabolomics_samplesheet.xlsx) should be given to clients to be compiled. Once received back, it should be tested with the parser (LC-MS_parse_samplesheet.py or spatial_parse_samplesheet.py).
|
|
22
|
+
|
|
23
|
+
## Samplesheet template
|
|
24
|
+
Samplesheet templates contains 3 sheets:
|
|
25
|
+
|
|
26
|
+
- input: the one the user should fill. It contains two sections, one with user info and the other with samples metadata.
|
|
27
|
+
- example_ok: an example of a good samplesheet
|
|
28
|
+
- example_fail: an example of a problematic samplesheet
|
|
29
|
+
|
|
30
|
+
## Parser
|
|
31
|
+
The parser will check the samplesheet for its correct compilation and raises warnings and/or errors if any.
|
|
32
|
+
If no errors are found, the samples metadata section is saved as a csv file (specified as -o). Warnings also prevent file to be saved, but this can be overruled by --ignore-warnings.
|
|
33
|
+
|
|
34
|
+
## Test
|
|
35
|
+
In test folder, 3 test samplesheet for each technology are found:
|
|
36
|
+
|
|
37
|
+
- test_error.xlsx: a test samplesheet with errrors
|
|
38
|
+
- test_ok.xlsx: a test samplesheet correctly compiled
|
|
39
|
+
- test_warning.xlsx: a test samplesheet with warnings
|
|
40
|
+
|
|
41
|
+
Several test should be run after changind any file:
|
|
42
|
+
|
|
43
|
+
- `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
|
|
44
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
|
|
45
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
46
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
|
|
47
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
|
|
48
|
+
- `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
|
|
49
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
|
|
50
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok_timecourse.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
51
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
52
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
|
|
53
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
|
|
54
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
|
|
55
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
|
|
56
|
+
|
parseet-0.2.0/README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# BULK METABOLOMICS SAMPLESHEET
|
|
2
|
+
|
|
3
|
+
This folder contains the files necessary for the samplesheet of LC-MS and spatial metabolomics.
|
|
4
|
+
Samplesheet template (LC-MS_metabolomics_samplesheet.xlsx or spatial_metabolomics_samplesheet.xlsx) should be given to clients to be compiled. Once received back, it should be tested with the parser (LC-MS_parse_samplesheet.py or spatial_parse_samplesheet.py).
|
|
5
|
+
|
|
6
|
+
## Samplesheet template
|
|
7
|
+
Samplesheet templates contains 3 sheets:
|
|
8
|
+
|
|
9
|
+
- input: the one the user should fill. It contains two sections, one with user info and the other with samples metadata.
|
|
10
|
+
- example_ok: an example of a good samplesheet
|
|
11
|
+
- example_fail: an example of a problematic samplesheet
|
|
12
|
+
|
|
13
|
+
## Parser
|
|
14
|
+
The parser will check the samplesheet for its correct compilation and raises warnings and/or errors if any.
|
|
15
|
+
If no errors are found, the samples metadata section is saved as a csv file (specified as -o). Warnings also prevent file to be saved, but this can be overruled by --ignore-warnings.
|
|
16
|
+
|
|
17
|
+
## Test
|
|
18
|
+
In test folder, 3 test samplesheet for each technology are found:
|
|
19
|
+
|
|
20
|
+
- test_error.xlsx: a test samplesheet with errrors
|
|
21
|
+
- test_ok.xlsx: a test samplesheet correctly compiled
|
|
22
|
+
- test_warning.xlsx: a test samplesheet with warnings
|
|
23
|
+
|
|
24
|
+
Several test should be run after changind any file:
|
|
25
|
+
|
|
26
|
+
- `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
|
|
27
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
|
|
28
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
29
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
|
|
30
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
|
|
31
|
+
- `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
|
|
32
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
|
|
33
|
+
- `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok_timecourse.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
34
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
|
|
35
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
|
|
36
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
|
|
37
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
|
|
38
|
+
- `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
|
|
39
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "parseet"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "This package is used to parse ms samplesheet for further analyses."
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Matteo Miotto", email = "miotsdata@gmail.com" },
|
|
7
|
+
]
|
|
8
|
+
dependencies = [
|
|
9
|
+
"pandas>=2.3.3",
|
|
10
|
+
"openpyxl>=3.1.5",
|
|
11
|
+
]
|
|
12
|
+
requires-python = "<3.13,>=3.12"
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
keywords = [
|
|
20
|
+
"samplesheet",
|
|
21
|
+
"parser",
|
|
22
|
+
"data analyses",
|
|
23
|
+
"mass spectrometry",
|
|
24
|
+
"msdial",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.license]
|
|
28
|
+
text = "MIT"
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
gui = [
|
|
32
|
+
"pyside6>=6.11.1",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
parseet = "parseet.cli:main"
|
|
37
|
+
|
|
38
|
+
[project.gui-scripts]
|
|
39
|
+
parseet-gui = "parseet.app:main"
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = [
|
|
43
|
+
"pdm-backend",
|
|
44
|
+
]
|
|
45
|
+
build-backend = "pdm.backend"
|
|
46
|
+
|
|
47
|
+
[tool.pdm]
|
|
48
|
+
distribution = true
|
|
49
|
+
|
|
50
|
+
[dependency-groups]
|
|
51
|
+
dev = [
|
|
52
|
+
"sphinx-rtd-theme>=0.5.1",
|
|
53
|
+
"sphinx-argparse>=0.5.2",
|
|
54
|
+
"sphinx<8,>=6",
|
|
55
|
+
"sphinx-autodoc-typehints<3.6",
|
|
56
|
+
"pyinstaller>=6.20.0",
|
|
57
|
+
]
|
|
Binary file
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
app.py
|
|
4
|
+
======
|
|
5
|
+
|
|
6
|
+
Entry point: registers ``Backend`` with the QML engine and opens the window.
|
|
7
|
+
|
|
8
|
+
Usage
|
|
9
|
+
-----
|
|
10
|
+
::
|
|
11
|
+
|
|
12
|
+
python -m your_package.app
|
|
13
|
+
# or
|
|
14
|
+
python app.py
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import sys
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from parseet.backend import Backend
|
|
23
|
+
|
|
24
|
+
def _resource(relative: str) -> Path:
|
|
25
|
+
"""Resolve a path that works both in development and when bundled."""
|
|
26
|
+
if hasattr(sys, '_MEIPASS'):
|
|
27
|
+
return Path(sys._MEIPASS) / relative
|
|
28
|
+
return Path(__file__).parent / relative
|
|
29
|
+
|
|
30
|
+
def main() -> None:
|
|
31
|
+
try:
|
|
32
|
+
from PySide6.QtGui import QGuiApplication #pylint: disable=no-name-in-module
|
|
33
|
+
from PySide6.QtQml import QQmlApplicationEngine #pylint: disable=no-name-in-module
|
|
34
|
+
from PySide6.QtCore import QUrl #pylint: disable=no-name-in-module
|
|
35
|
+
except ImportError:
|
|
36
|
+
print(
|
|
37
|
+
"Error: the GUI requires PySide6.\n"
|
|
38
|
+
"Install it with: pip install parseet[gui]",
|
|
39
|
+
file=__import__("sys").stderr,
|
|
40
|
+
)
|
|
41
|
+
__import__("sys").exit(1)
|
|
42
|
+
|
|
43
|
+
app = QGuiApplication(sys.argv)
|
|
44
|
+
app.setApplicationName("Samplesheet Tools")
|
|
45
|
+
app.setOrganizationName("YourOrg")
|
|
46
|
+
|
|
47
|
+
engine = QQmlApplicationEngine()
|
|
48
|
+
|
|
49
|
+
# Expose the backend singleton to QML
|
|
50
|
+
backend = Backend()
|
|
51
|
+
engine.rootContext().setContextProperty("backend", backend)
|
|
52
|
+
|
|
53
|
+
qml_file = _resource("main.qml")
|
|
54
|
+
engine.load(QUrl.fromLocalFile(str(qml_file)))
|
|
55
|
+
|
|
56
|
+
if not engine.rootObjects():
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
sys.exit(app.exec())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
main()
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
backend.py
|
|
3
|
+
==========
|
|
4
|
+
|
|
5
|
+
QML-exposed backend that runs ``check_raw_file_order`` and ``check_samples``
|
|
6
|
+
in a worker thread and emits structured result signals back to the UI.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import traceback
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from PySide6.QtCore import ( #pylint: disable=no-name-in-module
|
|
17
|
+
Property,
|
|
18
|
+
QObject,
|
|
19
|
+
QRunnable,
|
|
20
|
+
QThreadPool,
|
|
21
|
+
Signal,
|
|
22
|
+
Slot,
|
|
23
|
+
) #pylint: disable=no-name-in-module
|
|
24
|
+
|
|
25
|
+
from parseet.core.utils import (
|
|
26
|
+
check_raw_file_order,
|
|
27
|
+
check_samples,
|
|
28
|
+
configure_logging,
|
|
29
|
+
parse_samplesheet,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
configure_logging(level=logging.WARNING)
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Worker infrastructure
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
class _WorkerSignals(QObject):
|
|
41
|
+
"""Signals emitted by the worker back to the main thread."""
|
|
42
|
+
finished = Signal(list, list, list) # errors, warnings, ok
|
|
43
|
+
failed = Signal(str) # traceback / exception message
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class _Worker(QRunnable):
|
|
47
|
+
"""Runs a callable in the thread pool and emits results via signals."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, fn, *args, **kwargs):
|
|
50
|
+
super().__init__()
|
|
51
|
+
self._fn = fn
|
|
52
|
+
self._args = args
|
|
53
|
+
self._kwargs = kwargs
|
|
54
|
+
self.signals = _WorkerSignals()
|
|
55
|
+
|
|
56
|
+
def run(self) -> None:
|
|
57
|
+
try:
|
|
58
|
+
errors, warnings, ok = self._fn(*self._args, **self._kwargs)
|
|
59
|
+
self.signals.finished.emit(errors, warnings, ok)
|
|
60
|
+
except Exception:
|
|
61
|
+
self.signals.failed.emit(traceback.format_exc())
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
# Shared samplesheet loader
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
def _load_and_parse(path: str) -> tuple[pd.DataFrame, list[str], list[str], list[str]]:
|
|
69
|
+
"""
|
|
70
|
+
Read an Excel samplesheet and run ``parse_samplesheet``.
|
|
71
|
+
|
|
72
|
+
Returns ``(df, errors, warnings, ok)``. Raises ``ValueError`` on
|
|
73
|
+
hard parse errors so the worker can surface them cleanly.
|
|
74
|
+
"""
|
|
75
|
+
raw = pd.read_excel(path, header=None)
|
|
76
|
+
df, errors, warnings, ok, _col_descriptions = parse_samplesheet(raw)
|
|
77
|
+
if errors:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"Samplesheet parse errors:\n" + "\n".join(f" • {e}" for e in errors)
|
|
80
|
+
)
|
|
81
|
+
return df, errors, warnings, ok
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Command implementations (run inside the worker thread)
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def _run_check(
|
|
89
|
+
samplesheet_path: str,
|
|
90
|
+
folder_path: str,
|
|
91
|
+
dry_run: bool,
|
|
92
|
+
ignore_id: bool,
|
|
93
|
+
) -> tuple[list[str], list[str], list[str]]:
|
|
94
|
+
"""Worker body for the ``check`` command."""
|
|
95
|
+
df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
|
|
96
|
+
|
|
97
|
+
_rename_map, errors, warnings, ok = check_raw_file_order(
|
|
98
|
+
folder_path=folder_path,
|
|
99
|
+
samplesheet=df,
|
|
100
|
+
dry_run=dry_run,
|
|
101
|
+
ignore_id=ignore_id,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return errors, ss_warnings + warnings, ss_ok + ok
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _run_create(
|
|
108
|
+
samplesheet_path: str,
|
|
109
|
+
file_paths: list[str],
|
|
110
|
+
output_folder: str,
|
|
111
|
+
software: str,
|
|
112
|
+
dry_run: bool,
|
|
113
|
+
ignore_warnings: bool,
|
|
114
|
+
) -> tuple[list[str], list[str], list[str]]:
|
|
115
|
+
"""Worker body for the ``create`` command."""
|
|
116
|
+
df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
|
|
117
|
+
|
|
118
|
+
all_errors: list[str] = []
|
|
119
|
+
all_warnings: list[str] = ss_warnings
|
|
120
|
+
all_ok: list[str] = ss_ok
|
|
121
|
+
|
|
122
|
+
import csv, json
|
|
123
|
+
for file_path in file_paths:
|
|
124
|
+
logger.info("Processing '%s'", file_path)
|
|
125
|
+
processed_sheet, message, errors, warnings, ok = check_samples(
|
|
126
|
+
df,
|
|
127
|
+
file_path,
|
|
128
|
+
software=software,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
all_errors += errors
|
|
132
|
+
all_warnings += warnings
|
|
133
|
+
all_ok += ok
|
|
134
|
+
|
|
135
|
+
if errors:
|
|
136
|
+
all_errors.append(f"Skipped output for '{file_path}' due to errors above.")
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if dry_run:
|
|
140
|
+
all_ok.append(f"[DRY-RUN] Would write output for '{file_path}'")
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
if warnings and not ignore_warnings:
|
|
144
|
+
all_warnings.append(
|
|
145
|
+
f"Skipped output for '{file_path}' — warnings present. "
|
|
146
|
+
"Enable 'Ignore warnings' to force output."
|
|
147
|
+
)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
stem = Path(file_path).stem
|
|
151
|
+
csv_path = Path(output_folder) / f"{stem}_samplesheet.csv"
|
|
152
|
+
schema_path = Path(output_folder) / f"{stem}_samplesheet_schema.json"
|
|
153
|
+
|
|
154
|
+
processed_sheet.replace("<NA>", "").to_csv(
|
|
155
|
+
csv_path,
|
|
156
|
+
quotechar='"',
|
|
157
|
+
quoting=csv.QUOTE_STRINGS,
|
|
158
|
+
na_rep="",
|
|
159
|
+
index=False,
|
|
160
|
+
)
|
|
161
|
+
schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
|
|
162
|
+
with open(schema_path, "w") as fh:
|
|
163
|
+
json.dump(schema, fh, indent=2)
|
|
164
|
+
|
|
165
|
+
all_ok.append(f"Saved '{csv_path}'")
|
|
166
|
+
all_ok.append(f"Saved schema '{schema_path}'")
|
|
167
|
+
if message:
|
|
168
|
+
all_warnings.append(message)
|
|
169
|
+
|
|
170
|
+
return all_errors, all_warnings, all_ok
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# QML-exposed backend object
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
class Backend(QObject):
|
|
178
|
+
"""
|
|
179
|
+
Exposed to QML as ``backend``.
|
|
180
|
+
|
|
181
|
+
Signals
|
|
182
|
+
-------
|
|
183
|
+
runStarted()
|
|
184
|
+
Emitted when the worker thread begins.
|
|
185
|
+
runFinished(errors, warnings, ok)
|
|
186
|
+
Emitted with result lists when the run completes successfully.
|
|
187
|
+
runFailed(message)
|
|
188
|
+
Emitted with a traceback string when an unhandled exception occurs.
|
|
189
|
+
busyChanged()
|
|
190
|
+
Notifies QML that the ``busy`` property has changed.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
runStarted = Signal()
|
|
194
|
+
runFinished = Signal(list, list, list)
|
|
195
|
+
runFailed = Signal(str)
|
|
196
|
+
busyChanged = Signal()
|
|
197
|
+
|
|
198
|
+
def __init__(self, parent: QObject | None = None) -> None:
|
|
199
|
+
super().__init__(parent)
|
|
200
|
+
self._busy = False
|
|
201
|
+
self._thread_pool = QThreadPool.globalInstance()
|
|
202
|
+
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
# busy property
|
|
205
|
+
# ------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
def _get_busy(self) -> bool:
|
|
208
|
+
return self._busy
|
|
209
|
+
|
|
210
|
+
def _set_busy(self, value: bool) -> None:
|
|
211
|
+
if self._busy != value:
|
|
212
|
+
self._busy = value
|
|
213
|
+
self.busyChanged.emit()
|
|
214
|
+
|
|
215
|
+
busy = Property(bool, _get_busy, _set_busy, notify=busyChanged)
|
|
216
|
+
|
|
217
|
+
# ------------------------------------------------------------------
|
|
218
|
+
# Slots called from QML
|
|
219
|
+
# ------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
@Slot(str, str, bool, bool)
|
|
222
|
+
def runCheck(
|
|
223
|
+
self,
|
|
224
|
+
samplesheet_path: str,
|
|
225
|
+
folder_path: str,
|
|
226
|
+
dry_run: bool,
|
|
227
|
+
ignore_id: bool,
|
|
228
|
+
) -> None:
|
|
229
|
+
"""Start the ``check_raw_file_order`` pipeline in a worker thread."""
|
|
230
|
+
self._dispatch(
|
|
231
|
+
_run_check,
|
|
232
|
+
samplesheet_path,
|
|
233
|
+
folder_path,
|
|
234
|
+
dry_run,
|
|
235
|
+
ignore_id,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@Slot(str, list, str, str, bool, bool)
|
|
239
|
+
def runCreate(
|
|
240
|
+
self,
|
|
241
|
+
samplesheet_path: str,
|
|
242
|
+
file_paths: list,
|
|
243
|
+
output_folder: str,
|
|
244
|
+
software: str,
|
|
245
|
+
dry_run: bool,
|
|
246
|
+
ignore_warnings: bool,
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Start the ``check_samples`` / CSV-write pipeline in a worker thread."""
|
|
249
|
+
self._dispatch(
|
|
250
|
+
_run_create,
|
|
251
|
+
samplesheet_path,
|
|
252
|
+
list(file_paths),
|
|
253
|
+
output_folder,
|
|
254
|
+
software,
|
|
255
|
+
dry_run,
|
|
256
|
+
ignore_warnings,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
# Internal
|
|
261
|
+
# ------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
def _dispatch(self, fn, *args) -> None:
|
|
264
|
+
if self._busy:
|
|
265
|
+
logger.warning("A run is already in progress — ignoring request")
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
self._set_busy(True)
|
|
269
|
+
self.runStarted.emit()
|
|
270
|
+
|
|
271
|
+
worker = _Worker(fn, *args)
|
|
272
|
+
worker.signals.finished.connect(self._on_finished)
|
|
273
|
+
worker.signals.failed.connect(self._on_failed)
|
|
274
|
+
self._thread_pool.start(worker)
|
|
275
|
+
|
|
276
|
+
def _on_finished(self, errors: list, warnings: list, ok: list) -> None:
|
|
277
|
+
self._set_busy(False)
|
|
278
|
+
self.runFinished.emit(errors, warnings, ok)
|
|
279
|
+
|
|
280
|
+
def _on_failed(self, message: str) -> None:
|
|
281
|
+
self._set_busy(False)
|
|
282
|
+
logger.error("Worker failed:\n%s", message)
|
|
283
|
+
self.runFailed.emit(message)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Samplesheet CLI
|
|
4
|
+
===============
|
|
5
|
+
|
|
6
|
+
.. program:: samplesheet
|
|
7
|
+
|
|
8
|
+
Command-line interface for validating, parsing, and generating samplesheets.
|
|
9
|
+
|
|
10
|
+
This CLI provides two subcommands:
|
|
11
|
+
|
|
12
|
+
``reparse``
|
|
13
|
+
Reparse Excel input files into quoted CSV format.
|
|
14
|
+
|
|
15
|
+
``create``
|
|
16
|
+
Validate a samplesheet against integration files and generate derived
|
|
17
|
+
samplesheet CSV files.
|
|
18
|
+
|
|
19
|
+
Configuration is provided via a TOML file and merged with the default
|
|
20
|
+
:data:`base_config`.
|
|
21
|
+
|
|
22
|
+
Subcommands
|
|
23
|
+
-----------
|
|
24
|
+
|
|
25
|
+
reparse
|
|
26
|
+
^^^^^^^
|
|
27
|
+
Reparse Excel files into quoted CSV files.
|
|
28
|
+
|
|
29
|
+
.. option:: reparse INPUTS [INPUTS ...]
|
|
30
|
+
|
|
31
|
+
One or more Excel files to reparse.
|
|
32
|
+
|
|
33
|
+
.. option:: -c, --config CONFIG
|
|
34
|
+
|
|
35
|
+
Optional TOML configuration file.
|
|
36
|
+
|
|
37
|
+
create
|
|
38
|
+
^^^^^^
|
|
39
|
+
Validate a samplesheet and generate per-input CSV files.
|
|
40
|
+
|
|
41
|
+
.. option:: create FILES [FILES ...]
|
|
42
|
+
|
|
43
|
+
Integration files to process.
|
|
44
|
+
|
|
45
|
+
.. option:: -s, --samplesheet PATH
|
|
46
|
+
|
|
47
|
+
Path to the samplesheet Excel file.
|
|
48
|
+
|
|
49
|
+
.. option:: -o, --outdir DIR
|
|
50
|
+
|
|
51
|
+
Output directory for generated samplesheets.
|
|
52
|
+
|
|
53
|
+
.. option:: --dry-run
|
|
54
|
+
|
|
55
|
+
Do not write output files.
|
|
56
|
+
|
|
57
|
+
.. option:: --ignore-warnings
|
|
58
|
+
|
|
59
|
+
Save output even if warnings are present.
|
|
60
|
+
|
|
61
|
+
.. option:: -c, --config CONFIG
|
|
62
|
+
|
|
63
|
+
Optional TOML configuration file.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
from __future__ import annotations
|
|
67
|
+
|
|
68
|
+
import logging
|
|
69
|
+
from pathlib import Path
|
|
70
|
+
import json
|
|
71
|
+
import sys
|
|
72
|
+
from typing import List
|
|
73
|
+
import pandas as pd
|
|
74
|
+
|
|
75
|
+
import csv
|
|
76
|
+
from .core.utils import (
|
|
77
|
+
parse_samplesheet,
|
|
78
|
+
check_samples,
|
|
79
|
+
build_parser,
|
|
80
|
+
check_raw_file_order,
|
|
81
|
+
configure_logging
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main(argv: list[str] | None = None) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Entry point for the samplesheet command-line interface.
|
|
88
|
+
|
|
89
|
+
This function parses command-line arguments, loads configuration,
|
|
90
|
+
dispatches subcommands, and coordinates samplesheet validation
|
|
91
|
+
and output generation.
|
|
92
|
+
|
|
93
|
+
Raises
|
|
94
|
+
------
|
|
95
|
+
ValueError
|
|
96
|
+
If required inputs are missing or validation fails.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
configure_logging(level=logging.INFO)
|
|
100
|
+
|
|
101
|
+
# This is necessary to build docs
|
|
102
|
+
parser = build_parser()
|
|
103
|
+
args = parser.parse_args(argv)
|
|
104
|
+
|
|
105
|
+
# ---------------------
|
|
106
|
+
# CHECK SAMPLE ORDER
|
|
107
|
+
# ---------------------
|
|
108
|
+
if args.command == "check":
|
|
109
|
+
samplesheet = pd.read_excel(args.samplesheet, header=None)
|
|
110
|
+
samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet, )
|
|
111
|
+
check_raw_file_order(folder_path=args.folder[0],
|
|
112
|
+
dry_run=args.dry_run,
|
|
113
|
+
samplesheet = samplesheet,
|
|
114
|
+
ignore_id=args.ignore_id)
|
|
115
|
+
|
|
116
|
+
# ----------
|
|
117
|
+
# CREATE
|
|
118
|
+
# ----------
|
|
119
|
+
if args.command == "create":
|
|
120
|
+
|
|
121
|
+
files: List[str] = (
|
|
122
|
+
args.files
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if not files:
|
|
126
|
+
logging.error("No input files provided. Use CLI arguments or config.")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
|
|
129
|
+
output_folder: str = args.outdir
|
|
130
|
+
|
|
131
|
+
dry_run: bool = args.dry_run
|
|
132
|
+
|
|
133
|
+
ignore_warnings: bool = args.ignore_warnings
|
|
134
|
+
|
|
135
|
+
samplesheet = pd.read_excel(args.samplesheet, header=None)
|
|
136
|
+
samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet)
|
|
137
|
+
|
|
138
|
+
if errors:
|
|
139
|
+
sys.exit(1)
|
|
140
|
+
|
|
141
|
+
for file_path in files:
|
|
142
|
+
processed_sheet, message, errors, warnings, ok = check_samples(
|
|
143
|
+
samplesheet,
|
|
144
|
+
file_path,
|
|
145
|
+
software=args.software,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
output_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet.csv"
|
|
149
|
+
|
|
150
|
+
if dry_run:
|
|
151
|
+
print("[DRY-RUN] File not saved.")
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
if warnings and not ignore_warnings:
|
|
155
|
+
logging.warning("Warnings were found. Use --ignore-warnings to proceed.")
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
processed_sheet = processed_sheet.replace("<NA>", "").sort_values("sample_order")
|
|
159
|
+
processed_sheet.to_csv(
|
|
160
|
+
output_file,
|
|
161
|
+
quotechar='"',
|
|
162
|
+
quoting=csv.QUOTE_STRINGS,
|
|
163
|
+
na_rep="",
|
|
164
|
+
index=False
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
print(f"Samplesheet saved as {output_file}. {message}")
|
|
168
|
+
|
|
169
|
+
schema_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet_schema.json"
|
|
170
|
+
|
|
171
|
+
schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
|
|
172
|
+
with open(schema_file, "w") as f:
|
|
173
|
+
json.dump(schema, f, indent=2)
|
|
174
|
+
print(f"Schema file saved as {schema_file}")
|
|
175
|
+
|
|
176
|
+
if __name__ == "__main__":
|
|
177
|
+
main()
|