samplesheet-parser 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/.github/workflows/ci.yml +1 -1
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/PKG-INFO +153 -6
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/README.md +149 -5
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/pyproject.toml +6 -1
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/__init__.py +3 -0
- samplesheet_parser-0.3.0/samplesheet_parser/cli.py +427 -0
- samplesheet_parser-0.3.0/samplesheet_parser/merger.py +683 -0
- samplesheet_parser-0.3.0/tests/test_cli.py +631 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_converter.py +47 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_diff.py +35 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_factory.py +11 -0
- samplesheet_parser-0.3.0/tests/test_merger.py +1271 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_parsers/test_v1.py +64 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_parsers/test_v2.py +117 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_validators/test_validators.py +34 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_writer.py +110 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/.github/workflows/copilot-instructions.md +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/.gitignore +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/CHANGELOG.md +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/CONTRIBUTING.md +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/LICENSE +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/parse_examples.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/README.md +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v1_dual_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v1_multi_lane.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v1_single_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v1_with_lab_qc_settings.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v1_with_manifests.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_with_cloud_settings.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_with_pipeline_settings.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/images/samplesheet_parser_overview.png +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/converter.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/diff.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/enums.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/factory.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/parsers/__init__.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/parsers/v1.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/parsers/v2.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/validators.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/samplesheet_parser/writer.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/scripts/demo_converter.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/scripts/demo_diff.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/scripts/demo_writer.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/__init__.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/conftest.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_parsers/__init__.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_validators/__init__.py +0 -0
- {samplesheet_parser-0.2.1 → samplesheet_parser-0.3.0}/tests/test_validators/test_hamming.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: samplesheet-parser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
|
|
5
5
|
Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
|
|
6
6
|
Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
|
|
@@ -38,19 +38,22 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
38
38
|
Classifier: Typing :: Typed
|
|
39
39
|
Requires-Python: >=3.12
|
|
40
40
|
Requires-Dist: loguru>=0.7
|
|
41
|
+
Provides-Extra: cli
|
|
42
|
+
Requires-Dist: typer>=0.9; extra == 'cli'
|
|
41
43
|
Provides-Extra: dev
|
|
42
44
|
Requires-Dist: black>=24.0; extra == 'dev'
|
|
43
45
|
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
44
46
|
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
45
47
|
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
46
48
|
Requires-Dist: ruff>=0.3; extra == 'dev'
|
|
49
|
+
Requires-Dist: typer>=0.9; extra == 'dev'
|
|
47
50
|
Description-Content-Type: text/markdown
|
|
48
51
|
|
|
49
52
|
# samplesheet-parser
|
|
50
53
|
|
|
51
54
|
**Format-agnostic parser for Illumina SampleSheet.csv files.**
|
|
52
55
|
|
|
53
|
-
Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConvert V2 format (NovaSeq X series) — with automatic format detection, bidirectional conversion, index validation, Hamming distance checking, diff comparison,
|
|
56
|
+
Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConvert V2 format (NovaSeq X series) — with automatic format detection, bidirectional conversion, index validation, Hamming distance checking, diff comparison, multi-sheet merging, programmatic sheet creation, and a full-featured CLI.
|
|
54
57
|
|
|
55
58
|
[](https://pypi.org/project/samplesheet-parser/)
|
|
56
59
|
[](https://www.python.org/downloads/)
|
|
@@ -60,7 +63,7 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
|
|
|
60
63
|
|
|
61
64
|

|
|
62
65
|
|
|
63
|
-
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, and `SampleSheetWriter` builds or edits sheets programmatically.*
|
|
66
|
+
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
|
|
64
67
|
|
|
65
68
|
---
|
|
66
69
|
|
|
@@ -75,10 +78,14 @@ Existing tools either hard-code one format or require the caller to know which f
|
|
|
75
78
|
## Installation
|
|
76
79
|
|
|
77
80
|
```bash
|
|
81
|
+
# Core library only
|
|
78
82
|
pip install samplesheet-parser
|
|
83
|
+
|
|
84
|
+
# With the CLI (adds typer)
|
|
85
|
+
pip install "samplesheet-parser[cli]"
|
|
79
86
|
```
|
|
80
87
|
|
|
81
|
-
Requires Python 3.10+. No mandatory dependencies beyond `loguru`.
|
|
88
|
+
Requires Python 3.10+. No mandatory runtime dependencies beyond `loguru`.
|
|
82
89
|
|
|
83
90
|
---
|
|
84
91
|
|
|
@@ -225,6 +232,106 @@ converts format while editing.
|
|
|
225
232
|
|
|
226
233
|
---
|
|
227
234
|
|
|
235
|
+
|
|
236
|
+
### Merge multiple sheets
|
|
237
|
+
|
|
238
|
+
Combine per-project sheets from a single run into one merged sheet.
|
|
239
|
+
Conflicts (index collisions, read-length mismatches, adapter disagreements)
|
|
240
|
+
are surfaced as structured results rather than silent failures.
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
from samplesheet_parser import SampleSheetMerger
|
|
244
|
+
from samplesheet_parser.enums import SampleSheetVersion
|
|
245
|
+
|
|
246
|
+
result = (
|
|
247
|
+
SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
248
|
+
.add("ProjectA.csv")
|
|
249
|
+
.add("ProjectB.csv")
|
|
250
|
+
.add("ProjectC.csv")
|
|
251
|
+
.merge("SampleSheet_combined.csv")
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
print(result.summary())
|
|
255
|
+
# Merged 3 sheet(s) → SampleSheet_combined.csv (12 samples) — 0 conflict(s), 0 warning(s)
|
|
256
|
+
|
|
257
|
+
if result.has_conflicts:
|
|
258
|
+
for c in result.conflicts:
|
|
259
|
+
print(c)
|
|
260
|
+
# [CONFLICT] INDEX_COLLISION: Index 'ATTACTCG+TATAGCCT' in lane 1
|
|
261
|
+
# appears in both ProjectA.csv and ProjectB.csv
|
|
262
|
+
|
|
263
|
+
for w in result.warnings:
|
|
264
|
+
print(w)
|
|
265
|
+
# [WARNING] MIXED_FORMAT: Input sheets are a mix of V1 and V2 formats.
|
|
266
|
+
# All will be converted to V2 for output.
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Mixed V1/V2 inputs are automatically converted to the target format.
|
|
270
|
+
Pass `abort_on_conflicts=False` to write output even when conflicts exist.
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## CLI
|
|
275
|
+
|
|
276
|
+
Install the CLI extra and use the `samplesheet` command directly from the shell:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
pip install "samplesheet-parser[cli]"
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### validate
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
# Text output — exit 0 if clean, exit 1 if errors
|
|
286
|
+
samplesheet validate SampleSheet.csv
|
|
287
|
+
|
|
288
|
+
# JSON output for CI pipelines
|
|
289
|
+
samplesheet validate SampleSheet.csv --format json
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### convert
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
samplesheet convert SampleSheet_v1.csv --to v2 --output SampleSheet_v2.csv
|
|
296
|
+
samplesheet convert SampleSheet_v2.csv --to v1 --output SampleSheet_v1.csv
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### diff
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
# Exit 0 if identical, exit 1 if any differences detected
|
|
303
|
+
samplesheet diff old/SampleSheet.csv new/SampleSheet.csv
|
|
304
|
+
|
|
305
|
+
# JSON output for scripting
|
|
306
|
+
samplesheet diff old/SampleSheet.csv new/SampleSheet.csv --format json
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### merge
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
# Clean merge — exit 0
|
|
313
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv
|
|
314
|
+
|
|
315
|
+
# Merge three sheets to V1 format
|
|
316
|
+
samplesheet merge ProjectA.csv ProjectB.csv ProjectC.csv --to v1 --output combined.csv
|
|
317
|
+
|
|
318
|
+
# Write output even if conflicts are found
|
|
319
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv --force
|
|
320
|
+
|
|
321
|
+
# JSON output
|
|
322
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv --format json
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
**Exit codes** (all commands):
|
|
326
|
+
|
|
327
|
+
| Code | Meaning |
|
|
328
|
+
|---|---|
|
|
329
|
+
| `0` | Success / no issues |
|
|
330
|
+
| `1` | Errors found (invalid sheet, conflicts, differences detected) |
|
|
331
|
+
| `2` | Usage error (missing file, bad argument) |
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
228
335
|
## Format detection logic
|
|
229
336
|
|
|
230
337
|
The factory uses a three-step detection strategy — no format hints required from the caller:
|
|
@@ -272,6 +379,22 @@ result = ValidationResult()
|
|
|
272
379
|
SampleSheetValidator()._check_index_distances(samples, result, min_distance=4)
|
|
273
380
|
```
|
|
274
381
|
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## Merger conflict and warning codes
|
|
385
|
+
|
|
386
|
+
| Code | Level | Description |
|
|
387
|
+
|---|---|---|
|
|
388
|
+
| `PARSE_ERROR` | conflict | An input sheet could not be parsed |
|
|
389
|
+
| `INDEX_COLLISION` | conflict | The same index appears in the same lane across two sheets |
|
|
390
|
+
| `READ_LENGTH_CONFLICT` | conflict | Sheets specify different read lengths or cycle counts |
|
|
391
|
+
| `MERGE_VALIDATION_ERROR` | conflict | Post-merge validation of the combined sheet failed |
|
|
392
|
+
| `MIXED_FORMAT` | warning | Input sheets are a mix of V1 and V2 formats |
|
|
393
|
+
| `INDEX_DISTANCE_TOO_LOW` | warning | Cross-sheet index pair has Hamming distance below threshold |
|
|
394
|
+
| `ADAPTER_CONFLICT` | warning | Adapter sequences differ between sheets (primary sheet adapters are used) |
|
|
395
|
+
| `INCOMPLETE_SAMPLE_RECORD` | warning | A sample row is missing `Sample_ID` or index and was skipped |
|
|
396
|
+
|
|
397
|
+
|
|
275
398
|
---
|
|
276
399
|
|
|
277
400
|
## Diff
|
|
@@ -409,12 +532,36 @@ sheet.get_read_structure() # → ReadStructure dataclass
|
|
|
409
532
|
|
|
410
533
|
---
|
|
411
534
|
|
|
535
|
+
---
|
|
536
|
+
|
|
537
|
+
### `SampleSheetMerger`
|
|
538
|
+
|
|
539
|
+
| Method / attribute | Returns | Description |
|
|
540
|
+
|---|---|---|
|
|
541
|
+
| `SampleSheetMerger(target_version=)` | — | Instantiate; default target is `SampleSheetVersion.V2` |
|
|
542
|
+
| `add(path)` | `self` | Register an input sheet path (fluent) |
|
|
543
|
+
| `merge(output_path, *, validate=True, abort_on_conflicts=True)` | `MergeResult` | Run the merge and write output |
|
|
544
|
+
|
|
545
|
+
### `MergeResult`
|
|
546
|
+
|
|
547
|
+
| Attribute / method | Type | Description |
|
|
548
|
+
|---|---|---|
|
|
549
|
+
| `has_conflicts` | `bool` | `True` if any conflict was recorded |
|
|
550
|
+
| `sample_count` | `int` | Number of samples in the merged output |
|
|
551
|
+
| `output_path` | `Path \| None` | Path written; `None` if write was aborted |
|
|
552
|
+
| `source_versions` | `dict[str, str]` | Per-input-file detected format version |
|
|
553
|
+
| `conflicts` | `list[MergeConflict]` | Structured conflict records |
|
|
554
|
+
| `warnings` | `list[MergeConflict]` | Structured warning records |
|
|
555
|
+
| `summary()` | `str` | Human-readable one-line summary |
|
|
556
|
+
|
|
557
|
+
---
|
|
558
|
+
|
|
412
559
|
## Contributing
|
|
413
560
|
|
|
414
561
|
```bash
|
|
415
562
|
git clone https://github.com/chaitanyakasaraneni/samplesheet-parser
|
|
416
563
|
cd samplesheet-parser
|
|
417
|
-
pip install -e ".[dev]"
|
|
564
|
+
pip install -e ".[dev,cli]"
|
|
418
565
|
|
|
419
566
|
# Run tests
|
|
420
567
|
pytest tests/ -v
|
|
@@ -437,7 +584,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
437
584
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
438
585
|
year = {2026},
|
|
439
586
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
440
|
-
version = {0.
|
|
587
|
+
version = {0.3.0}
|
|
441
588
|
}
|
|
442
589
|
```
|
|
443
590
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Format-agnostic parser for Illumina SampleSheet.csv files.**
|
|
4
4
|
|
|
5
|
-
Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConvert V2 format (NovaSeq X series) — with automatic format detection, bidirectional conversion, index validation, Hamming distance checking, diff comparison,
|
|
5
|
+
Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConvert V2 format (NovaSeq X series) — with automatic format detection, bidirectional conversion, index validation, Hamming distance checking, diff comparison, multi-sheet merging, programmatic sheet creation, and a full-featured CLI.
|
|
6
6
|
|
|
7
7
|
[](https://pypi.org/project/samplesheet-parser/)
|
|
8
8
|
[](https://www.python.org/downloads/)
|
|
@@ -12,7 +12,7 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
|
|
|
12
12
|
|
|
13
13
|

|
|
14
14
|
|
|
15
|
-
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, and `SampleSheetWriter` builds or edits sheets programmatically.*
|
|
15
|
+
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
|
|
16
16
|
|
|
17
17
|
---
|
|
18
18
|
|
|
@@ -27,10 +27,14 @@ Existing tools either hard-code one format or require the caller to know which f
|
|
|
27
27
|
## Installation
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
+
# Core library only
|
|
30
31
|
pip install samplesheet-parser
|
|
32
|
+
|
|
33
|
+
# With the CLI (adds typer)
|
|
34
|
+
pip install "samplesheet-parser[cli]"
|
|
31
35
|
```
|
|
32
36
|
|
|
33
|
-
Requires Python 3.10+. No mandatory dependencies beyond `loguru`.
|
|
37
|
+
Requires Python 3.10+. No mandatory runtime dependencies beyond `loguru`.
|
|
34
38
|
|
|
35
39
|
---
|
|
36
40
|
|
|
@@ -177,6 +181,106 @@ converts format while editing.
|
|
|
177
181
|
|
|
178
182
|
---
|
|
179
183
|
|
|
184
|
+
|
|
185
|
+
### Merge multiple sheets
|
|
186
|
+
|
|
187
|
+
Combine per-project sheets from a single run into one merged sheet.
|
|
188
|
+
Conflicts (index collisions, read-length mismatches, adapter disagreements)
|
|
189
|
+
are surfaced as structured results rather than silent failures.
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from samplesheet_parser import SampleSheetMerger
|
|
193
|
+
from samplesheet_parser.enums import SampleSheetVersion
|
|
194
|
+
|
|
195
|
+
result = (
|
|
196
|
+
SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
197
|
+
.add("ProjectA.csv")
|
|
198
|
+
.add("ProjectB.csv")
|
|
199
|
+
.add("ProjectC.csv")
|
|
200
|
+
.merge("SampleSheet_combined.csv")
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
print(result.summary())
|
|
204
|
+
# Merged 3 sheet(s) → SampleSheet_combined.csv (12 samples) — 0 conflict(s), 0 warning(s)
|
|
205
|
+
|
|
206
|
+
if result.has_conflicts:
|
|
207
|
+
for c in result.conflicts:
|
|
208
|
+
print(c)
|
|
209
|
+
# [CONFLICT] INDEX_COLLISION: Index 'ATTACTCG+TATAGCCT' in lane 1
|
|
210
|
+
# appears in both ProjectA.csv and ProjectB.csv
|
|
211
|
+
|
|
212
|
+
for w in result.warnings:
|
|
213
|
+
print(w)
|
|
214
|
+
# [WARNING] MIXED_FORMAT: Input sheets are a mix of V1 and V2 formats.
|
|
215
|
+
# All will be converted to V2 for output.
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Mixed V1/V2 inputs are automatically converted to the target format.
|
|
219
|
+
Pass `abort_on_conflicts=False` to write output even when conflicts exist.
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## CLI
|
|
224
|
+
|
|
225
|
+
Install the CLI extra and use the `samplesheet` command directly from the shell:
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
pip install "samplesheet-parser[cli]"
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### validate
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Text output — exit 0 if clean, exit 1 if errors
|
|
235
|
+
samplesheet validate SampleSheet.csv
|
|
236
|
+
|
|
237
|
+
# JSON output for CI pipelines
|
|
238
|
+
samplesheet validate SampleSheet.csv --format json
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### convert
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
samplesheet convert SampleSheet_v1.csv --to v2 --output SampleSheet_v2.csv
|
|
245
|
+
samplesheet convert SampleSheet_v2.csv --to v1 --output SampleSheet_v1.csv
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### diff
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
# Exit 0 if identical, exit 1 if any differences detected
|
|
252
|
+
samplesheet diff old/SampleSheet.csv new/SampleSheet.csv
|
|
253
|
+
|
|
254
|
+
# JSON output for scripting
|
|
255
|
+
samplesheet diff old/SampleSheet.csv new/SampleSheet.csv --format json
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### merge
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
# Clean merge — exit 0
|
|
262
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv
|
|
263
|
+
|
|
264
|
+
# Merge three sheets to V1 format
|
|
265
|
+
samplesheet merge ProjectA.csv ProjectB.csv ProjectC.csv --to v1 --output combined.csv
|
|
266
|
+
|
|
267
|
+
# Write output even if conflicts are found
|
|
268
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv --force
|
|
269
|
+
|
|
270
|
+
# JSON output
|
|
271
|
+
samplesheet merge ProjectA.csv ProjectB.csv --output combined.csv --format json
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
**Exit codes** (all commands):
|
|
275
|
+
|
|
276
|
+
| Code | Meaning |
|
|
277
|
+
|---|---|
|
|
278
|
+
| `0` | Success / no issues |
|
|
279
|
+
| `1` | Errors found (invalid sheet, conflicts, differences detected) |
|
|
280
|
+
| `2` | Usage error (missing file, bad argument) |
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
180
284
|
## Format detection logic
|
|
181
285
|
|
|
182
286
|
The factory uses a three-step detection strategy — no format hints required from the caller:
|
|
@@ -224,6 +328,22 @@ result = ValidationResult()
|
|
|
224
328
|
SampleSheetValidator()._check_index_distances(samples, result, min_distance=4)
|
|
225
329
|
```
|
|
226
330
|
|
|
331
|
+
---
|
|
332
|
+
|
|
333
|
+
## Merger conflict and warning codes
|
|
334
|
+
|
|
335
|
+
| Code | Level | Description |
|
|
336
|
+
|---|---|---|
|
|
337
|
+
| `PARSE_ERROR` | conflict | An input sheet could not be parsed |
|
|
338
|
+
| `INDEX_COLLISION` | conflict | The same index appears in the same lane across two sheets |
|
|
339
|
+
| `READ_LENGTH_CONFLICT` | conflict | Sheets specify different read lengths or cycle counts |
|
|
340
|
+
| `MERGE_VALIDATION_ERROR` | conflict | Post-merge validation of the combined sheet failed |
|
|
341
|
+
| `MIXED_FORMAT` | warning | Input sheets are a mix of V1 and V2 formats |
|
|
342
|
+
| `INDEX_DISTANCE_TOO_LOW` | warning | Cross-sheet index pair has Hamming distance below threshold |
|
|
343
|
+
| `ADAPTER_CONFLICT` | warning | Adapter sequences differ between sheets (primary sheet adapters are used) |
|
|
344
|
+
| `INCOMPLETE_SAMPLE_RECORD` | warning | A sample row is missing `Sample_ID` or index and was skipped |
|
|
345
|
+
|
|
346
|
+
|
|
227
347
|
---
|
|
228
348
|
|
|
229
349
|
## Diff
|
|
@@ -361,12 +481,36 @@ sheet.get_read_structure() # → ReadStructure dataclass
|
|
|
361
481
|
|
|
362
482
|
---
|
|
363
483
|
|
|
484
|
+
---
|
|
485
|
+
|
|
486
|
+
### `SampleSheetMerger`
|
|
487
|
+
|
|
488
|
+
| Method / attribute | Returns | Description |
|
|
489
|
+
|---|---|---|
|
|
490
|
+
| `SampleSheetMerger(target_version=)` | — | Instantiate; default target is `SampleSheetVersion.V2` |
|
|
491
|
+
| `add(path)` | `self` | Register an input sheet path (fluent) |
|
|
492
|
+
| `merge(output_path, *, validate=True, abort_on_conflicts=True)` | `MergeResult` | Run the merge and write output |
|
|
493
|
+
|
|
494
|
+
### `MergeResult`
|
|
495
|
+
|
|
496
|
+
| Attribute / method | Type | Description |
|
|
497
|
+
|---|---|---|
|
|
498
|
+
| `has_conflicts` | `bool` | `True` if any conflict was recorded |
|
|
499
|
+
| `sample_count` | `int` | Number of samples in the merged output |
|
|
500
|
+
| `output_path` | `Path \| None` | Path written; `None` if write was aborted |
|
|
501
|
+
| `source_versions` | `dict[str, str]` | Per-input-file detected format version |
|
|
502
|
+
| `conflicts` | `list[MergeConflict]` | Structured conflict records |
|
|
503
|
+
| `warnings` | `list[MergeConflict]` | Structured warning records |
|
|
504
|
+
| `summary()` | `str` | Human-readable one-line summary |
|
|
505
|
+
|
|
506
|
+
---
|
|
507
|
+
|
|
364
508
|
## Contributing
|
|
365
509
|
|
|
366
510
|
```bash
|
|
367
511
|
git clone https://github.com/chaitanyakasaraneni/samplesheet-parser
|
|
368
512
|
cd samplesheet-parser
|
|
369
|
-
pip install -e ".[dev]"
|
|
513
|
+
pip install -e ".[dev,cli]"
|
|
370
514
|
|
|
371
515
|
# Run tests
|
|
372
516
|
pytest tests/ -v
|
|
@@ -389,7 +533,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
389
533
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
390
534
|
year = {2026},
|
|
391
535
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
392
|
-
version = {0.
|
|
536
|
+
version = {0.3.0}
|
|
393
537
|
}
|
|
394
538
|
```
|
|
395
539
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "samplesheet-parser"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -29,14 +29,19 @@ dependencies = [
|
|
|
29
29
|
]
|
|
30
30
|
|
|
31
31
|
[project.optional-dependencies]
|
|
32
|
+
cli = ["typer>=0.9"]
|
|
32
33
|
dev = [
|
|
33
34
|
"pytest>=7.4",
|
|
34
35
|
"pytest-cov>=4.1",
|
|
35
36
|
"black>=24.0",
|
|
36
37
|
"ruff>=0.3",
|
|
37
38
|
"mypy>=1.8",
|
|
39
|
+
"typer>=0.9",
|
|
38
40
|
]
|
|
39
41
|
|
|
42
|
+
[project.scripts]
|
|
43
|
+
samplesheet = "samplesheet_parser.cli:main"
|
|
44
|
+
|
|
40
45
|
[project.urls]
|
|
41
46
|
Homepage = "https://github.com/chaitanyakasaraneni/samplesheet-parser"
|
|
42
47
|
Documentation = "https://illumina-samplesheet.readthedocs.io"
|
|
@@ -36,6 +36,7 @@ from samplesheet_parser.converter import SampleSheetConverter
|
|
|
36
36
|
from samplesheet_parser.diff import DiffResult, SampleSheetDiff
|
|
37
37
|
from samplesheet_parser.enums import IndexType, SampleSheetVersion
|
|
38
38
|
from samplesheet_parser.factory import SampleSheetFactory
|
|
39
|
+
from samplesheet_parser.merger import MergeResult, SampleSheetMerger
|
|
39
40
|
from samplesheet_parser.parsers.v1 import SampleSheetV1
|
|
40
41
|
from samplesheet_parser.parsers.v2 import SampleSheetV2
|
|
41
42
|
from samplesheet_parser.validators import SampleSheetValidator, ValidationResult
|
|
@@ -53,5 +54,7 @@ __all__ = [
|
|
|
53
54
|
"SampleSheetDiff",
|
|
54
55
|
"DiffResult",
|
|
55
56
|
"SampleSheetWriter",
|
|
57
|
+
"SampleSheetMerger",
|
|
58
|
+
"MergeResult",
|
|
56
59
|
"__version__",
|
|
57
60
|
]
|