parseet 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parseet-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.1
2
+ Name: parseet
3
+ Version: 0.2.0
4
+ Summary: This package is used to parse ms samplesheet for further analyses.
5
+ Keywords: samplesheet,parser,data analyses,mass spectrometry,msdial
6
+ Author-Email: Matteo Miotto <miotsdata@gmail.com>
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: <3.13,>=3.12
12
+ Requires-Dist: pandas>=2.3.3
13
+ Requires-Dist: openpyxl>=3.1.5
14
+ Provides-Extra: gui
15
+ Requires-Dist: pyside6>=6.11.1; extra == "gui"
16
+ Description-Content-Type: text/markdown
17
+
18
+ # BULK METABOLOMICS SAMPLESHEET
19
+
20
+ This folder contains the files necessary for the samplesheet of LC-MS and spatial metabolomics.
21
+ Samplesheet template (LC-MS_metabolomics_samplesheet.xlsx or spatial_metabolomics_samplesheet.xlsx) should be given to clients to be compiled. Once received back, it should be tested with the parser (LC-MS_parse_samplesheet.py or spatial_parse_samplesheet.py).
22
+
23
+ ## Samplesheet template
24
+ Samplesheet templates contains 3 sheets:
25
+
26
+ - input: the one the user should fill. It contains two sections, one with user info and the other with samples metadata.
27
+ - example_ok: an example of a good samplesheet
28
+ - example_fail: an example of a problematic samplesheet
29
+
30
+ ## Parser
31
+ The parser will check the samplesheet for its correct compilation and raises warnings and/or errors if any.
32
+ If no errors are found, the samples metadata section is saved as a csv file (specified as -o). Warnings also prevent file to be saved, but this can be overruled by --ignore-warnings.
33
+
34
+ ## Test
35
+ In test folder, 3 test samplesheet for each technology are found:
36
+
37
+ - test_error.xlsx: a test samplesheet with errrors
38
+ - test_ok.xlsx: a test samplesheet correctly compiled
39
+ - test_warning.xlsx: a test samplesheet with warnings
40
+
41
+ Several test should be run after changind any file:
42
+
43
+ - `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
44
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
45
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
46
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
47
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
48
+ - `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
49
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
50
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok_timecourse.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
51
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
52
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
53
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
54
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
55
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
56
+
@@ -0,0 +1,39 @@
1
+ # BULK METABOLOMICS SAMPLESHEET
2
+
3
+ This folder contains the files necessary for the samplesheet of LC-MS and spatial metabolomics.
4
+ Samplesheet template (LC-MS_metabolomics_samplesheet.xlsx or spatial_metabolomics_samplesheet.xlsx) should be given to clients to be compiled. Once received back, it should be tested with the parser (LC-MS_parse_samplesheet.py or spatial_parse_samplesheet.py).
5
+
6
+ ## Samplesheet template
7
+ Samplesheet templates contains 3 sheets:
8
+
9
+ - input: the one the user should fill. It contains two sections, one with user info and the other with samples metadata.
10
+ - example_ok: an example of a good samplesheet
11
+ - example_fail: an example of a problematic samplesheet
12
+
13
+ ## Parser
14
+ The parser will check the samplesheet for its correct compilation and raises warnings and/or errors if any.
15
+ If no errors are found, the samples metadata section is saved as a csv file (specified as -o). Warnings also prevent file to be saved, but this can be overruled by --ignore-warnings.
16
+
17
+ ## Test
18
+ In test folder, 3 test samplesheet for each technology are found:
19
+
20
+ - test_error.xlsx: a test samplesheet with errrors
21
+ - test_ok.xlsx: a test samplesheet correctly compiled
22
+ - test_warning.xlsx: a test samplesheet with warnings
23
+
24
+ Several test should be run after changind any file:
25
+
26
+ - `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
27
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
28
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
29
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
30
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
31
+ - `python LC-MS_parse_samplesheet.py` (this should result in an error for missing input and output)
32
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv --dry-run` (this should be ok, with no errors nor warnings, but no file should be saved due to --dry-run)
33
+ - `python LC-MS_parse_samplesheet.py -i test_files/LC-MS_test_ok_timecourse.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
34
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_ok.xlsx -o test.csv` (this should be ok, with also test.csv file saved)
35
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_error.xlsx -o test.csv` (this should raise errors and no test.csv file should be created)
36
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` (this should raise warnings and no test.csv file should be created)
37
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
38
+ - `python spatial_parse_samplesheet.py -i test_files/spatial_test_warning.xlsx -o test.csv` --ignore-warnings (this should raise warnings, but test.csv file should be created)
39
+
@@ -0,0 +1,57 @@
1
+ [project]
2
+ name = "parseet"
3
+ version = "0.2.0"
4
+ description = "This package is used to parse ms samplesheet for further analyses."
5
+ authors = [
6
+ { name = "Matteo Miotto", email = "miotsdata@gmail.com" },
7
+ ]
8
+ dependencies = [
9
+ "pandas>=2.3.3",
10
+ "openpyxl>=3.1.5",
11
+ ]
12
+ requires-python = "<3.13,>=3.12"
13
+ readme = "README.md"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ keywords = [
20
+ "samplesheet",
21
+ "parser",
22
+ "data analyses",
23
+ "mass spectrometry",
24
+ "msdial",
25
+ ]
26
+
27
+ [project.license]
28
+ text = "MIT"
29
+
30
+ [project.optional-dependencies]
31
+ gui = [
32
+ "pyside6>=6.11.1",
33
+ ]
34
+
35
+ [project.scripts]
36
+ parseet = "parseet.cli:main"
37
+
38
+ [project.gui-scripts]
39
+ parseet-gui = "parseet.app:main"
40
+
41
+ [build-system]
42
+ requires = [
43
+ "pdm-backend",
44
+ ]
45
+ build-backend = "pdm.backend"
46
+
47
+ [tool.pdm]
48
+ distribution = true
49
+
50
+ [dependency-groups]
51
+ dev = [
52
+ "sphinx-rtd-theme>=0.5.1",
53
+ "sphinx-argparse>=0.5.2",
54
+ "sphinx<8,>=6",
55
+ "sphinx-autodoc-typehints<3.6",
56
+ "pyinstaller>=6.20.0",
57
+ ]
Binary file
@@ -0,0 +1,2 @@
1
+ from parseet import *
2
+ from .version import __version__
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ app.py
4
+ ======
5
+
6
+ Entry point: registers ``Backend`` with the QML engine and opens the window.
7
+
8
+ Usage
9
+ -----
10
+ ::
11
+
12
+ python -m your_package.app
13
+ # or
14
+ python app.py
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ from parseet.backend import Backend
23
+
24
+ def _resource(relative: str) -> Path:
25
+ """Resolve a path that works both in development and when bundled."""
26
+ if hasattr(sys, '_MEIPASS'):
27
+ return Path(sys._MEIPASS) / relative
28
+ return Path(__file__).parent / relative
29
+
30
+ def main() -> None:
31
+ try:
32
+ from PySide6.QtGui import QGuiApplication #pylint: disable=no-name-in-module
33
+ from PySide6.QtQml import QQmlApplicationEngine #pylint: disable=no-name-in-module
34
+ from PySide6.QtCore import QUrl #pylint: disable=no-name-in-module
35
+ except ImportError:
36
+ print(
37
+ "Error: the GUI requires PySide6.\n"
38
+ "Install it with: pip install parseet[gui]",
39
+ file=__import__("sys").stderr,
40
+ )
41
+ __import__("sys").exit(1)
42
+
43
+ app = QGuiApplication(sys.argv)
44
+ app.setApplicationName("Samplesheet Tools")
45
+ app.setOrganizationName("YourOrg")
46
+
47
+ engine = QQmlApplicationEngine()
48
+
49
+ # Expose the backend singleton to QML
50
+ backend = Backend()
51
+ engine.rootContext().setContextProperty("backend", backend)
52
+
53
+ qml_file = _resource("main.qml")
54
+ engine.load(QUrl.fromLocalFile(str(qml_file)))
55
+
56
+ if not engine.rootObjects():
57
+ sys.exit(1)
58
+
59
+ sys.exit(app.exec())
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
@@ -0,0 +1,283 @@
1
+ """
2
+ backend.py
3
+ ==========
4
+
5
+ QML-exposed backend that runs ``check_raw_file_order`` and ``check_samples``
6
+ in a worker thread and emits structured result signals back to the UI.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import traceback
13
+ from pathlib import Path
14
+
15
+ import pandas as pd
16
+ from PySide6.QtCore import ( #pylint: disable=no-name-in-module
17
+ Property,
18
+ QObject,
19
+ QRunnable,
20
+ QThreadPool,
21
+ Signal,
22
+ Slot,
23
+ ) #pylint: disable=no-name-in-module
24
+
25
+ from parseet.core.utils import (
26
+ check_raw_file_order,
27
+ check_samples,
28
+ configure_logging,
29
+ parse_samplesheet,
30
+ )
31
+
32
+ configure_logging(level=logging.WARNING)
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Worker infrastructure
38
+ # ---------------------------------------------------------------------------
39
+
40
+ class _WorkerSignals(QObject):
41
+ """Signals emitted by the worker back to the main thread."""
42
+ finished = Signal(list, list, list) # errors, warnings, ok
43
+ failed = Signal(str) # traceback / exception message
44
+
45
+
46
+ class _Worker(QRunnable):
47
+ """Runs a callable in the thread pool and emits results via signals."""
48
+
49
+ def __init__(self, fn, *args, **kwargs):
50
+ super().__init__()
51
+ self._fn = fn
52
+ self._args = args
53
+ self._kwargs = kwargs
54
+ self.signals = _WorkerSignals()
55
+
56
+ def run(self) -> None:
57
+ try:
58
+ errors, warnings, ok = self._fn(*self._args, **self._kwargs)
59
+ self.signals.finished.emit(errors, warnings, ok)
60
+ except Exception:
61
+ self.signals.failed.emit(traceback.format_exc())
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Shared samplesheet loader
66
+ # ---------------------------------------------------------------------------
67
+
68
+ def _load_and_parse(path: str) -> tuple[pd.DataFrame, list[str], list[str], list[str]]:
69
+ """
70
+ Read an Excel samplesheet and run ``parse_samplesheet``.
71
+
72
+ Returns ``(df, errors, warnings, ok)``. Raises ``ValueError`` on
73
+ hard parse errors so the worker can surface them cleanly.
74
+ """
75
+ raw = pd.read_excel(path, header=None)
76
+ df, errors, warnings, ok, _col_descriptions = parse_samplesheet(raw)
77
+ if errors:
78
+ raise ValueError(
79
+ "Samplesheet parse errors:\n" + "\n".join(f" • {e}" for e in errors)
80
+ )
81
+ return df, errors, warnings, ok
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Command implementations (run inside the worker thread)
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def _run_check(
89
+ samplesheet_path: str,
90
+ folder_path: str,
91
+ dry_run: bool,
92
+ ignore_id: bool,
93
+ ) -> tuple[list[str], list[str], list[str]]:
94
+ """Worker body for the ``check`` command."""
95
+ df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
96
+
97
+ _rename_map, errors, warnings, ok = check_raw_file_order(
98
+ folder_path=folder_path,
99
+ samplesheet=df,
100
+ dry_run=dry_run,
101
+ ignore_id=ignore_id,
102
+ )
103
+
104
+ return errors, ss_warnings + warnings, ss_ok + ok
105
+
106
+
107
+ def _run_create(
108
+ samplesheet_path: str,
109
+ file_paths: list[str],
110
+ output_folder: str,
111
+ software: str,
112
+ dry_run: bool,
113
+ ignore_warnings: bool,
114
+ ) -> tuple[list[str], list[str], list[str]]:
115
+ """Worker body for the ``create`` command."""
116
+ df, _errors, ss_warnings, ss_ok = _load_and_parse(samplesheet_path)
117
+
118
+ all_errors: list[str] = []
119
+ all_warnings: list[str] = ss_warnings
120
+ all_ok: list[str] = ss_ok
121
+
122
+ import csv, json
123
+ for file_path in file_paths:
124
+ logger.info("Processing '%s'", file_path)
125
+ processed_sheet, message, errors, warnings, ok = check_samples(
126
+ df,
127
+ file_path,
128
+ software=software,
129
+ )
130
+
131
+ all_errors += errors
132
+ all_warnings += warnings
133
+ all_ok += ok
134
+
135
+ if errors:
136
+ all_errors.append(f"Skipped output for '{file_path}' due to errors above.")
137
+ continue
138
+
139
+ if dry_run:
140
+ all_ok.append(f"[DRY-RUN] Would write output for '{file_path}'")
141
+ continue
142
+
143
+ if warnings and not ignore_warnings:
144
+ all_warnings.append(
145
+ f"Skipped output for '{file_path}' — warnings present. "
146
+ "Enable 'Ignore warnings' to force output."
147
+ )
148
+ continue
149
+
150
+ stem = Path(file_path).stem
151
+ csv_path = Path(output_folder) / f"{stem}_samplesheet.csv"
152
+ schema_path = Path(output_folder) / f"{stem}_samplesheet_schema.json"
153
+
154
+ processed_sheet.replace("<NA>", "").to_csv(
155
+ csv_path,
156
+ quotechar='"',
157
+ quoting=csv.QUOTE_STRINGS,
158
+ na_rep="",
159
+ index=False,
160
+ )
161
+ schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
162
+ with open(schema_path, "w") as fh:
163
+ json.dump(schema, fh, indent=2)
164
+
165
+ all_ok.append(f"Saved '{csv_path}'")
166
+ all_ok.append(f"Saved schema '{schema_path}'")
167
+ if message:
168
+ all_warnings.append(message)
169
+
170
+ return all_errors, all_warnings, all_ok
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # QML-exposed backend object
175
+ # ---------------------------------------------------------------------------
176
+
177
+ class Backend(QObject):
178
+ """
179
+ Exposed to QML as ``backend``.
180
+
181
+ Signals
182
+ -------
183
+ runStarted()
184
+ Emitted when the worker thread begins.
185
+ runFinished(errors, warnings, ok)
186
+ Emitted with result lists when the run completes successfully.
187
+ runFailed(message)
188
+ Emitted with a traceback string when an unhandled exception occurs.
189
+ busyChanged()
190
+ Notifies QML that the ``busy`` property has changed.
191
+ """
192
+
193
+ runStarted = Signal()
194
+ runFinished = Signal(list, list, list)
195
+ runFailed = Signal(str)
196
+ busyChanged = Signal()
197
+
198
+ def __init__(self, parent: QObject | None = None) -> None:
199
+ super().__init__(parent)
200
+ self._busy = False
201
+ self._thread_pool = QThreadPool.globalInstance()
202
+
203
+ # ------------------------------------------------------------------
204
+ # busy property
205
+ # ------------------------------------------------------------------
206
+
207
+ def _get_busy(self) -> bool:
208
+ return self._busy
209
+
210
+ def _set_busy(self, value: bool) -> None:
211
+ if self._busy != value:
212
+ self._busy = value
213
+ self.busyChanged.emit()
214
+
215
+ busy = Property(bool, _get_busy, _set_busy, notify=busyChanged)
216
+
217
+ # ------------------------------------------------------------------
218
+ # Slots called from QML
219
+ # ------------------------------------------------------------------
220
+
221
+ @Slot(str, str, bool, bool)
222
+ def runCheck(
223
+ self,
224
+ samplesheet_path: str,
225
+ folder_path: str,
226
+ dry_run: bool,
227
+ ignore_id: bool,
228
+ ) -> None:
229
+ """Start the ``check_raw_file_order`` pipeline in a worker thread."""
230
+ self._dispatch(
231
+ _run_check,
232
+ samplesheet_path,
233
+ folder_path,
234
+ dry_run,
235
+ ignore_id,
236
+ )
237
+
238
+ @Slot(str, list, str, str, bool, bool)
239
+ def runCreate(
240
+ self,
241
+ samplesheet_path: str,
242
+ file_paths: list,
243
+ output_folder: str,
244
+ software: str,
245
+ dry_run: bool,
246
+ ignore_warnings: bool,
247
+ ) -> None:
248
+ """Start the ``check_samples`` / CSV-write pipeline in a worker thread."""
249
+ self._dispatch(
250
+ _run_create,
251
+ samplesheet_path,
252
+ list(file_paths),
253
+ output_folder,
254
+ software,
255
+ dry_run,
256
+ ignore_warnings,
257
+ )
258
+
259
+ # ------------------------------------------------------------------
260
+ # Internal
261
+ # ------------------------------------------------------------------
262
+
263
+ def _dispatch(self, fn, *args) -> None:
264
+ if self._busy:
265
+ logger.warning("A run is already in progress — ignoring request")
266
+ return
267
+
268
+ self._set_busy(True)
269
+ self.runStarted.emit()
270
+
271
+ worker = _Worker(fn, *args)
272
+ worker.signals.finished.connect(self._on_finished)
273
+ worker.signals.failed.connect(self._on_failed)
274
+ self._thread_pool.start(worker)
275
+
276
+ def _on_finished(self, errors: list, warnings: list, ok: list) -> None:
277
+ self._set_busy(False)
278
+ self.runFinished.emit(errors, warnings, ok)
279
+
280
+ def _on_failed(self, message: str) -> None:
281
+ self._set_busy(False)
282
+ logger.error("Worker failed:\n%s", message)
283
+ self.runFailed.emit(message)
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Samplesheet CLI
4
+ ===============
5
+
6
+ .. program:: samplesheet
7
+
8
+ Command-line interface for validating, parsing, and generating samplesheets.
9
+
10
+ This CLI provides two subcommands:
11
+
12
+ ``reparse``
13
+ Reparse Excel input files into quoted CSV format.
14
+
15
+ ``create``
16
+ Validate a samplesheet against integration files and generate derived
17
+ samplesheet CSV files.
18
+
19
+ Configuration is provided via a TOML file and merged with the default
20
+ :data:`base_config`.
21
+
22
+ Subcommands
23
+ -----------
24
+
25
+ reparse
26
+ ^^^^^^^
27
+ Reparse Excel files into quoted CSV files.
28
+
29
+ .. option:: reparse INPUTS [INPUTS ...]
30
+
31
+ One or more Excel files to reparse.
32
+
33
+ .. option:: -c, --config CONFIG
34
+
35
+ Optional TOML configuration file.
36
+
37
+ create
38
+ ^^^^^^
39
+ Validate a samplesheet and generate per-input CSV files.
40
+
41
+ .. option:: create FILES [FILES ...]
42
+
43
+ Integration files to process.
44
+
45
+ .. option:: -s, --samplesheet PATH
46
+
47
+ Path to the samplesheet Excel file.
48
+
49
+ .. option:: -o, --outdir DIR
50
+
51
+ Output directory for generated samplesheets.
52
+
53
+ .. option:: --dry-run
54
+
55
+ Do not write output files.
56
+
57
+ .. option:: --ignore-warnings
58
+
59
+ Save output even if warnings are present.
60
+
61
+ .. option:: -c, --config CONFIG
62
+
63
+ Optional TOML configuration file.
64
+ """
65
+
66
+ from __future__ import annotations
67
+
68
+ import logging
69
+ from pathlib import Path
70
+ import json
71
+ import sys
72
+ from typing import List
73
+ import pandas as pd
74
+
75
+ import csv
76
+ from .core.utils import (
77
+ parse_samplesheet,
78
+ check_samples,
79
+ build_parser,
80
+ check_raw_file_order,
81
+ configure_logging
82
+ )
83
+
84
+
85
+ def main(argv: list[str] | None = None) -> None:
86
+ """
87
+ Entry point for the samplesheet command-line interface.
88
+
89
+ This function parses command-line arguments, loads configuration,
90
+ dispatches subcommands, and coordinates samplesheet validation
91
+ and output generation.
92
+
93
+ Raises
94
+ ------
95
+ ValueError
96
+ If required inputs are missing or validation fails.
97
+ """
98
+
99
+ configure_logging(level=logging.INFO)
100
+
101
+ # This is necessary to build docs
102
+ parser = build_parser()
103
+ args = parser.parse_args(argv)
104
+
105
+ # ---------------------
106
+ # CHECK SAMPLE ORDER
107
+ # ---------------------
108
+ if args.command == "check":
109
+ samplesheet = pd.read_excel(args.samplesheet, header=None)
110
+ samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet, )
111
+ check_raw_file_order(folder_path=args.folder[0],
112
+ dry_run=args.dry_run,
113
+ samplesheet = samplesheet,
114
+ ignore_id=args.ignore_id)
115
+
116
+ # ----------
117
+ # CREATE
118
+ # ----------
119
+ if args.command == "create":
120
+
121
+ files: List[str] = (
122
+ args.files
123
+ )
124
+
125
+ if not files:
126
+ logging.error("No input files provided. Use CLI arguments or config.")
127
+ sys.exit(1)
128
+
129
+ output_folder: str = args.outdir
130
+
131
+ dry_run: bool = args.dry_run
132
+
133
+ ignore_warnings: bool = args.ignore_warnings
134
+
135
+ samplesheet = pd.read_excel(args.samplesheet, header=None)
136
+ samplesheet, errors, warnings, ok, col_description = parse_samplesheet(samplesheet)
137
+
138
+ if errors:
139
+ sys.exit(1)
140
+
141
+ for file_path in files:
142
+ processed_sheet, message, errors, warnings, ok = check_samples(
143
+ samplesheet,
144
+ file_path,
145
+ software=args.software,
146
+ )
147
+
148
+ output_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet.csv"
149
+
150
+ if dry_run:
151
+ print("[DRY-RUN] File not saved.")
152
+ continue
153
+
154
+ if warnings and not ignore_warnings:
155
+ logging.warning("Warnings were found. Use --ignore-warnings to proceed.")
156
+ sys.exit(1)
157
+
158
+ processed_sheet = processed_sheet.replace("<NA>", "").sort_values("sample_order")
159
+ processed_sheet.to_csv(
160
+ output_file,
161
+ quotechar='"',
162
+ quoting=csv.QUOTE_STRINGS,
163
+ na_rep="",
164
+ index=False
165
+ )
166
+
167
+ print(f"Samplesheet saved as {output_file}. {message}")
168
+
169
+ schema_file: Path = Path(output_folder) / f"{Path(file_path).stem}_samplesheet_schema.json"
170
+
171
+ schema = {col: str(dtype) for col, dtype in processed_sheet.dtypes.items()}
172
+ with open(schema_file, "w") as f:
173
+ json.dump(schema, f, indent=2)
174
+ print(f"Schema file saved as {schema_file}")
175
+
176
+ if __name__ == "__main__":
177
+ main()
@@ -0,0 +1,6 @@
1
+ import pathlib
2
+ import tomllib
3
+
4
+ path = pathlib.Path(__file__).parent / "config.toml"
5
+ with path.open(mode="rb") as fp:
6
+ base_config = tomllib.load(fp)
@@ -0,0 +1,7 @@
1
+ [output]
2
+ dir = "."
3
+
4
+ [run]
5
+ dry_run = false
6
+ ignore_warnings = false
7
+