samplesheet-parser 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- samplesheet_parser-0.3.2/.zenodo.json +30 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/CHANGELOG.md +76 -0
- samplesheet_parser-0.3.2/CITATION.cff +25 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/PKG-INFO +2 -2
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/README.md +1 -1
- samplesheet_parser-0.3.2/examples/demo_merger.py +130 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/ProjectAlpha_SampleSheet.csv +25 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/ProjectBeta_SampleSheet.csv +25 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/ProjectDelta_SampleSheet_collision.csv +24 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/ProjectGamma_SampleSheet.csv +21 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/combined_clean.csv +20 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/combined_collision_forced.csv +19 -0
- samplesheet_parser-0.3.2/examples/sample_sheets/combined_mixed_formats.csv +23 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/pyproject.toml +1 -1
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/merger.py +39 -1
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.github/workflows/ci.yml +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.github/workflows/copilot-instructions.md +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.gitignore +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/CONTRIBUTING.md +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/LICENSE +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/parse_examples.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/README.md +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_dual_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_multi_lane.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_single_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_with_lab_qc_settings.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_with_manifests.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_cloud_settings.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_pipeline_settings.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/images/samplesheet_parser_overview.png +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/__init__.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/cli.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/converter.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/diff.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/enums.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/factory.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/__init__.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/v1.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/v2.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/validators.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/writer.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_converter.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_diff.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_writer.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/__init__.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/conftest.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_cli.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_converter.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_diff.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_factory.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_merger.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/__init__.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/test_v1.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/test_v2.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/__init__.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/test_hamming.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/test_validators.py +0 -0
- {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_writer.py +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "samplesheet-parser: A Python library for parsing, validating, converting, and merging Illumina SampleSheet V1 and V2 files",
|
|
3
|
+
"description": "Open-source Python library providing format auto-detection, bidirectional V1/V2 conversion, structural and index validation, sheet diffing, and cross-project merging for Illumina SampleSheet files.",
|
|
4
|
+
"upload_type": "software",
|
|
5
|
+
"access_right": "open",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"version": "0.3.1",
|
|
8
|
+
"keywords": [
|
|
9
|
+
"bioinformatics", "Illumina", "SampleSheet", "BCLConvert",
|
|
10
|
+
"demultiplexing", "genomics", "sequencing", "Python"
|
|
11
|
+
],
|
|
12
|
+
"creators": [
|
|
13
|
+
{
|
|
14
|
+
"name": "Kasaraneni, Chaitanya Krishna",
|
|
15
|
+
"orcid": "0000-0001-5792-1095"
|
|
16
|
+
}
|
|
17
|
+
],
|
|
18
|
+
"related_identifiers": [
|
|
19
|
+
{
|
|
20
|
+
"identifier": "https://github.com/chaitanyakasaraneni/samplesheet-parser",
|
|
21
|
+
"relation": "isSupplementTo",
|
|
22
|
+
"scheme": "url"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"identifier": "https://pypi.org/project/samplesheet-parser/",
|
|
26
|
+
"relation": "isIdenticalTo",
|
|
27
|
+
"scheme": "url"
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -6,6 +6,82 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [0.3.2] - 2026-03-12
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `.zenodo.json` metadata file for automatic Zenodo archival and DOI
|
|
13
|
+
minting on GitHub releases
|
|
14
|
+
- `CITATION.cff` file enabling GitHub's "Cite this repository" button
|
|
15
|
+
and standardized software citation for downstream users
|
|
16
|
+
|
|
17
|
+
## [0.3.1] - 2026-03-11
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- **`SampleSheetMerger`** — `INDEX_DISTANCE_TOO_LOW` and `DUPLICATE_INDEX`
|
|
22
|
+
were reported twice in `--force` merges (once by the pre-merge cross-sheet
|
|
23
|
+
check, once by the post-merge validator). Duplicate codes are now suppressed
|
|
24
|
+
in `_validate_merged` — the more descriptive pre-merge message is always
|
|
25
|
+
preferred.
|
|
26
|
+
|
|
27
|
+
## [0.3.0] - 2026-03-10
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- **`SampleSheetMerger`** — combines multiple per-project sample sheets into a
|
|
32
|
+
single sheet for a flow cell run.
|
|
33
|
+
- `add(path)` — register an input sheet (V1 or V2); mixed formats are
|
|
34
|
+
auto-converted to the target version before merging.
|
|
35
|
+
- `merge(output_path, validate=True, abort_on_conflicts=True)` — merges all
|
|
36
|
+
registered sheets, writes the combined output, and returns a `MergeResult`.
|
|
37
|
+
- **Index collision detection** — raises a conflict when two samples share
|
|
38
|
+
the same lane and index sequence across project boundaries.
|
|
39
|
+
- **Hamming distance check** — warns when the combined I7+I5 distance between
|
|
40
|
+
any two samples across sheets falls below 3.
|
|
41
|
+
- **Read-length conflict detection** — raises a conflict when registered
|
|
42
|
+
sheets specify incompatible `Read1Cycles`/`Read2Cycles` (V2) or `[Reads]`
|
|
43
|
+
lengths (V1).
|
|
44
|
+
- **Adapter conflict detection** — warns when adapter sequences differ across
|
|
45
|
+
sheets.
|
|
46
|
+
- **Mixed-format warning** — emits a warning when V1 and V2 sheets are
|
|
47
|
+
combined, with the auto-conversion strategy logged.
|
|
48
|
+
- `MergeResult` dataclass — exposes `conflicts`, `warnings`, `sample_count`,
|
|
49
|
+
`source_versions`, `output_path`, `has_conflicts`, and `summary()`;
|
|
50
|
+
consistent with `ValidationResult` and `DiffResult`.
|
|
51
|
+
- `abort_on_conflicts=True` (default) — skips writing the output file when
|
|
52
|
+
any conflict is present; set `False` (via `--force` in the CLI) to write
|
|
53
|
+
despite conflicts.
|
|
54
|
+
- `SampleSheetMerger` and `MergeResult` are exported from the top-level
|
|
55
|
+
package.
|
|
56
|
+
|
|
57
|
+
- **`samplesheet` CLI** — command-line interface exposing the four core
|
|
58
|
+
operations, available as an optional extra (`pip install
|
|
59
|
+
"samplesheet-parser[cli]"`; adds `typer` as a dependency).
|
|
60
|
+
- `samplesheet validate <file>` — exits 0 if clean, 1 if errors, 2 on
|
|
61
|
+
usage/parse errors. Supports `--format json` for machine-readable output.
|
|
62
|
+
- `samplesheet convert <file> --to <v1|v2> --output <path>` — converts
|
|
63
|
+
between formats; exits 0 on success, 1 on conversion error, 2 on bad
|
|
64
|
+
arguments.
|
|
65
|
+
- `samplesheet diff <old> <new>` — exits 0 if identical, 1 if differences
|
|
66
|
+
detected (useful in CI pre-run checks). Supports `--format json`.
|
|
67
|
+
- `samplesheet merge <files...> --output <path>` — merges two or more sheets;
|
|
68
|
+
exits 0 on clean merge, 1 on conflicts or warnings, 2 on bad arguments.
|
|
69
|
+
Supports `--force`, `--to <v1|v2>`, and `--format json`.
|
|
70
|
+
- All commands print errors to stderr and structured data to stdout.
|
|
71
|
+
- Entry point configured in `pyproject.toml`:
|
|
72
|
+
`samplesheet = "samplesheet_parser.cli:main"`.
|
|
73
|
+
- Module imports cleanly without `typer` installed — missing-extra error is
|
|
74
|
+
surfaced only at invocation time.
|
|
75
|
+
|
|
76
|
+
### Changed
|
|
77
|
+
|
|
78
|
+
- README updated to document `SampleSheetMerger`, the `samplesheet` CLI, all
|
|
79
|
+
new API reference tables, and installation instructions for the `[cli]` extra.
|
|
80
|
+
- `CONTRIBUTING.md` updated with CLI testing instructions and the new
|
|
81
|
+
`[dev,cli]` install target.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
9
85
|
## [0.2.0] - 2026-02-25
|
|
10
86
|
|
|
11
87
|
### Added
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use this software, please cite it as below."
|
|
3
|
+
type: software
|
|
4
|
+
title: "samplesheet-parser"
|
|
5
|
+
version: 0.3.2
|
|
6
|
+
date-released: 2026-03-12
|
|
7
|
+
license: Apache-2.0
|
|
8
|
+
url: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
|
|
9
|
+
repository-code: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
|
|
10
|
+
abstract: >
|
|
11
|
+
A Python library for parsing, validating, converting, and merging
|
|
12
|
+
Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
|
|
13
|
+
keywords:
|
|
14
|
+
- bioinformatics
|
|
15
|
+
- Illumina
|
|
16
|
+
- SampleSheet
|
|
17
|
+
- BCLConvert
|
|
18
|
+
- demultiplexing
|
|
19
|
+
- genomics
|
|
20
|
+
- Python
|
|
21
|
+
authors:
|
|
22
|
+
- family-names: Kasaraneni
|
|
23
|
+
given-names: Chaitanya Krishna
|
|
24
|
+
email: kc.kasaraneni@gmail.com
|
|
25
|
+
orcid: "https://orcid.org/0000-0001-5792-1095"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: samplesheet-parser
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
|
|
5
5
|
Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
|
|
6
6
|
Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
|
|
@@ -584,7 +584,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
584
584
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
585
585
|
year = {2026},
|
|
586
586
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
587
|
-
version = {0.3.
|
|
587
|
+
version = {0.3.2}
|
|
588
588
|
}
|
|
589
589
|
```
|
|
590
590
|
|
|
@@ -533,7 +533,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
533
533
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
534
534
|
year = {2026},
|
|
535
535
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
536
|
-
version = {0.3.
|
|
536
|
+
version = {0.3.2}
|
|
537
537
|
}
|
|
538
538
|
```
|
|
539
539
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
demo_merger.py — SampleSheetMerger usage examples.
|
|
3
|
+
|
|
4
|
+
Demonstrates three scenarios:
|
|
5
|
+
|
|
6
|
+
1. Clean merge — two V1 sheets, no conflicts, output written.
|
|
7
|
+
2. Mixed formats — V1 + V1 + V2, auto-converted to a target version.
|
|
8
|
+
3. Conflict handling — index collision detected; abort vs. force write.
|
|
9
|
+
|
|
10
|
+
Run from the repo root::
|
|
11
|
+
|
|
12
|
+
python examples/demo_merger.py
|
|
13
|
+
|
|
14
|
+
Sample sheets used are in examples/samplesheets/.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from samplesheet_parser import MergeResult, SampleSheetMerger
|
|
22
|
+
from samplesheet_parser.enums import SampleSheetVersion
|
|
23
|
+
|
|
24
|
+
SHEETS = Path(__file__).parent / "sample_sheets"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def print_result(result: MergeResult) -> None:
|
|
28
|
+
print(f" Summary : {result.summary()}")
|
|
29
|
+
print(f" Sample count : {result.sample_count}")
|
|
30
|
+
print(f" Has conflicts: {result.has_conflicts}")
|
|
31
|
+
print(f" Output path : {result.output_path}")
|
|
32
|
+
if result.warnings:
|
|
33
|
+
print(" Warnings:")
|
|
34
|
+
for w in result.warnings:
|
|
35
|
+
print(f" [{w.code}] {w.message}")
|
|
36
|
+
if result.conflicts:
|
|
37
|
+
print(" Conflicts:")
|
|
38
|
+
for c in result.conflicts:
|
|
39
|
+
print(f" [{c.code}] {c.message}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Scenario 1: Clean merge — two V1 sheets → combined V2
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
print("=" * 60)
|
|
47
|
+
print("Scenario 1: Clean merge (V1 + V1 → V2)")
|
|
48
|
+
print("=" * 60)
|
|
49
|
+
|
|
50
|
+
merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
51
|
+
merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
|
|
52
|
+
merger.add(SHEETS / "ProjectBeta_SampleSheet.csv")
|
|
53
|
+
|
|
54
|
+
result = merger.merge(
|
|
55
|
+
SHEETS / "combined_clean.csv",
|
|
56
|
+
validate=True,
|
|
57
|
+
abort_on_conflicts=True,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
print_result(result)
|
|
61
|
+
print()
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Scenario 2: Mixed formats — V1 + V1 + V2 → combined V2
|
|
65
|
+
# ProjectGamma supplies a V2 sheet; merger auto-converts all inputs to V2.
|
|
66
|
+
# Mixed-format inputs produce a warning but are not a conflict.
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
print("=" * 60)
|
|
70
|
+
print("Scenario 2: Mixed formats (V1 + V1 + V2 → V2)")
|
|
71
|
+
print("=" * 60)
|
|
72
|
+
|
|
73
|
+
merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
74
|
+
merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
|
|
75
|
+
merger.add(SHEETS / "ProjectBeta_SampleSheet.csv")
|
|
76
|
+
merger.add(SHEETS / "ProjectGamma_SampleSheet.csv")
|
|
77
|
+
|
|
78
|
+
result = merger.merge(
|
|
79
|
+
SHEETS / "combined_mixed_formats.csv",
|
|
80
|
+
validate=True,
|
|
81
|
+
abort_on_conflicts=True,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
print_result(result)
|
|
85
|
+
print()
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Scenario 3a: Index collision — abort_on_conflicts=True (default)
|
|
89
|
+
# ProjectDelta reuses an index from ProjectAlpha on the same lane.
|
|
90
|
+
# The output file is NOT written; result.output_path is None.
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
print("=" * 60)
|
|
94
|
+
print("Scenario 3a: Index collision — abort (default)")
|
|
95
|
+
print("=" * 60)
|
|
96
|
+
|
|
97
|
+
merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
98
|
+
merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
|
|
99
|
+
merger.add(SHEETS / "ProjectDelta_SampleSheet_collision.csv")
|
|
100
|
+
|
|
101
|
+
result = merger.merge(
|
|
102
|
+
SHEETS / "combined_collision_aborted.csv",
|
|
103
|
+
validate=True,
|
|
104
|
+
abort_on_conflicts=True, # default — file not written on conflict
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
print_result(result)
|
|
108
|
+
print()
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Scenario 3b: Index collision — abort_on_conflicts=False (force write)
|
|
112
|
+
# Same collision as above, but the file is written anyway.
|
|
113
|
+
# Equivalent to passing --force on the CLI.
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
print("=" * 60)
|
|
117
|
+
print("Scenario 3b: Index collision — force write")
|
|
118
|
+
print("=" * 60)
|
|
119
|
+
|
|
120
|
+
merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
|
|
121
|
+
merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
|
|
122
|
+
merger.add(SHEETS / "ProjectDelta_SampleSheet_collision.csv")
|
|
123
|
+
|
|
124
|
+
result = merger.merge(
|
|
125
|
+
SHEETS / "combined_collision_forced.csv",
|
|
126
|
+
validate=True,
|
|
127
|
+
abort_on_conflicts=False, # write despite conflicts
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
print_result(result)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
IEMFileVersion,5
|
|
3
|
+
Experiment Name,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
Date,2024-01-15
|
|
5
|
+
Workflow,GenerateFASTQ
|
|
6
|
+
Application,FASTQ Only
|
|
7
|
+
Instrument Type,NovaSeq 6000
|
|
8
|
+
Assay,TruSeq DNA PCR-Free
|
|
9
|
+
Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
|
|
10
|
+
Chemistry,Amplicon
|
|
11
|
+
|
|
12
|
+
[Reads]
|
|
13
|
+
151
|
|
14
|
+
151
|
|
15
|
+
|
|
16
|
+
[Settings]
|
|
17
|
+
ReverseComplement,0
|
|
18
|
+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
19
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
20
|
+
|
|
21
|
+
[Data]
|
|
22
|
+
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
|
|
23
|
+
1,AlphaSample1,AlphaSample1,,A01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,ProjectAlpha,WGS_batch1
|
|
24
|
+
1,AlphaSample2,AlphaSample2,,B01,UDP0002,TGAACCTGAT,UDP0002,TGATACGTCC,ProjectAlpha,WGS_batch1
|
|
25
|
+
1,AlphaSample3,AlphaSample3,,C01,UDP0003,GCACAACGTT,UDP0003,CATCTCACAG,ProjectAlpha,WGS_batch1
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
IEMFileVersion,5
|
|
3
|
+
Experiment Name,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
Date,2024-01-15
|
|
5
|
+
Workflow,GenerateFASTQ
|
|
6
|
+
Application,FASTQ Only
|
|
7
|
+
Instrument Type,NovaSeq 6000
|
|
8
|
+
Assay,TruSeq DNA PCR-Free
|
|
9
|
+
Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
|
|
10
|
+
Chemistry,Amplicon
|
|
11
|
+
|
|
12
|
+
[Reads]
|
|
13
|
+
151
|
|
14
|
+
151
|
|
15
|
+
|
|
16
|
+
[Settings]
|
|
17
|
+
ReverseComplement,0
|
|
18
|
+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
19
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
20
|
+
|
|
21
|
+
[Data]
|
|
22
|
+
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
|
|
23
|
+
1,BetaSample1,BetaSample1,,D01,UDP0004,ATCGCCTGTT,UDP0004,GACTAGCATG,ProjectBeta,WGS_batch2
|
|
24
|
+
1,BetaSample2,BetaSample2,,E01,UDP0005,CTTGTAGCAA,UDP0005,TGCGTCAGCC,ProjectBeta,WGS_batch2
|
|
25
|
+
1,BetaSample3,BetaSample3,,F01,UDP0006,GCGCGATGTT,UDP0006,CTATGCCGGT,ProjectBeta,WGS_batch2
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
IEMFileVersion,5
|
|
3
|
+
Experiment Name,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
Date,2024-01-15
|
|
5
|
+
Workflow,GenerateFASTQ
|
|
6
|
+
Application,FASTQ Only
|
|
7
|
+
Instrument Type,NovaSeq 6000
|
|
8
|
+
Assay,TruSeq DNA PCR-Free
|
|
9
|
+
Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
|
|
10
|
+
Chemistry,Amplicon
|
|
11
|
+
|
|
12
|
+
[Reads]
|
|
13
|
+
151
|
|
14
|
+
151
|
|
15
|
+
|
|
16
|
+
[Settings]
|
|
17
|
+
ReverseComplement,0
|
|
18
|
+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
19
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
20
|
+
|
|
21
|
+
[Data]
|
|
22
|
+
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
|
|
23
|
+
1,DeltaSample1,DeltaSample1,,G01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,ProjectDelta,WGS_batch3
|
|
24
|
+
1,DeltaSample2,DeltaSample2,,H01,UDP0009,AGTAAGCCGT,UDP0009,TTCCTGCAGT,ProjectDelta,WGS_batch3
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
FileFormatVersion,2
|
|
3
|
+
RunName,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
InstrumentPlatform,NovaSeqXSeries
|
|
5
|
+
|
|
6
|
+
[Reads]
|
|
7
|
+
Read1Cycles,151
|
|
8
|
+
Read2Cycles,151
|
|
9
|
+
Index1Cycles,10
|
|
10
|
+
Index2Cycles,10
|
|
11
|
+
|
|
12
|
+
[BCLConvert_Settings]
|
|
13
|
+
SoftwareVersion,3.9.3
|
|
14
|
+
AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
15
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
16
|
+
|
|
17
|
+
[BCLConvert_Data]
|
|
18
|
+
Lane,Sample_ID,Sample_Name,Index,Index2,Sample_Project
|
|
19
|
+
1,GammaSample1,GammaSample1,TCCGCGAAGT,TCAGCCTTGT,ProjectGamma
|
|
20
|
+
1,GammaSample2,GammaSample2,AATGTTGCGT,GCAGCCTATT,ProjectGamma
|
|
21
|
+
1,GammaSample3,GammaSample3,GTTCCGTGAT,AGTCCTAGGT,ProjectGamma
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
FileFormatVersion,2
|
|
3
|
+
RunName,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
|
|
5
|
+
[Reads]
|
|
6
|
+
Read1Cycles,151
|
|
7
|
+
Read2Cycles,151
|
|
8
|
+
|
|
9
|
+
[BCLConvert_Settings]
|
|
10
|
+
AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
11
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
12
|
+
|
|
13
|
+
[BCLConvert_Data]
|
|
14
|
+
Lane,Sample_ID,Index,Index2,Sample_Project
|
|
15
|
+
1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
|
|
16
|
+
1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
|
|
17
|
+
1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
|
|
18
|
+
1,BetaSample1,ATCGCCTGTT,GACTAGCATG,ProjectBeta
|
|
19
|
+
1,BetaSample2,CTTGTAGCAA,TGCGTCAGCC,ProjectBeta
|
|
20
|
+
1,BetaSample3,GCGCGATGTT,CTATGCCGGT,ProjectBeta
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
FileFormatVersion,2
|
|
3
|
+
RunName,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
|
|
5
|
+
[Reads]
|
|
6
|
+
Read1Cycles,151
|
|
7
|
+
Read2Cycles,151
|
|
8
|
+
|
|
9
|
+
[BCLConvert_Settings]
|
|
10
|
+
AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
11
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
12
|
+
|
|
13
|
+
[BCLConvert_Data]
|
|
14
|
+
Lane,Sample_ID,Index,Index2,Sample_Project
|
|
15
|
+
1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
|
|
16
|
+
1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
|
|
17
|
+
1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
|
|
18
|
+
1,DeltaSample1,CAAGACAGAT,ACTATAGCCT,ProjectDelta
|
|
19
|
+
1,DeltaSample2,AGTAAGCCGT,TTCCTGCAGT,ProjectDelta
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
FileFormatVersion,2
|
|
3
|
+
RunName,240115_A01234_0042_AHJLG7DRXX
|
|
4
|
+
|
|
5
|
+
[Reads]
|
|
6
|
+
Read1Cycles,151
|
|
7
|
+
Read2Cycles,151
|
|
8
|
+
|
|
9
|
+
[BCLConvert_Settings]
|
|
10
|
+
AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
11
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
12
|
+
|
|
13
|
+
[BCLConvert_Data]
|
|
14
|
+
Lane,Sample_ID,Index,Index2,Sample_Project
|
|
15
|
+
1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
|
|
16
|
+
1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
|
|
17
|
+
1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
|
|
18
|
+
1,BetaSample1,ATCGCCTGTT,GACTAGCATG,ProjectBeta
|
|
19
|
+
1,BetaSample2,CTTGTAGCAA,TGCGTCAGCC,ProjectBeta
|
|
20
|
+
1,BetaSample3,GCGCGATGTT,CTATGCCGGT,ProjectBeta
|
|
21
|
+
1,GammaSample1,TCCGCGAAGT,TCAGCCTTGT,ProjectGamma
|
|
22
|
+
1,GammaSample2,AATGTTGCGT,GCAGCCTATT,ProjectGamma
|
|
23
|
+
1,GammaSample3,GTTCCGTGAT,AGTCCTAGGT,ProjectGamma
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "samplesheet-parser"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -122,6 +122,24 @@ class MergeResult:
|
|
|
122
122
|
)
|
|
123
123
|
|
|
124
124
|
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Codes emitted by the cross-sheet pre-merge checks.
|
|
127
|
+
# These are deduplicated against post-merge validator output in
|
|
128
|
+
# _validate_merged() so the same issue is never reported twice.
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
# Validator codes that are fully covered by a pre-merge cross-sheet check
|
|
132
|
+
# and must be suppressed in _validate_merged() to avoid duplicate reporting.
|
|
133
|
+
_PRE_MERGE_CONFLICT_CODES: frozenset[str] = frozenset({
|
|
134
|
+
"INDEX_COLLISION", # covered by _check_index_collisions
|
|
135
|
+
"DUPLICATE_INDEX", # same root cause — validator sees result of collision
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
_PRE_MERGE_WARNING_CODES: frozenset[str] = frozenset({
|
|
139
|
+
"INDEX_DISTANCE_TOO_LOW", # covered by _check_index_distances
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
|
|
125
143
|
# ---------------------------------------------------------------------------
|
|
126
144
|
# Merger
|
|
127
145
|
# ---------------------------------------------------------------------------
|
|
@@ -648,7 +666,15 @@ class SampleSheetMerger:
|
|
|
648
666
|
writer: Any,
|
|
649
667
|
result: MergeResult,
|
|
650
668
|
) -> None:
|
|
651
|
-
"""Run SampleSheetValidator on the merged writer content.
|
|
669
|
+
"""Run SampleSheetValidator on the merged writer content.
|
|
670
|
+
|
|
671
|
+
Codes already reported by the pre-merge cross-sheet checks
|
|
672
|
+
(``_PRE_MERGE_CONFLICT_CODES`` / ``_PRE_MERGE_WARNING_CODES``) are
|
|
673
|
+
suppressed here to avoid duplicate reporting in the final
|
|
674
|
+
``MergeResult``. The pre-merge messages are more descriptive (they
|
|
675
|
+
include source file names and sample IDs) so they are always
|
|
676
|
+
preferred over the generic post-merge validator output.
|
|
677
|
+
"""
|
|
652
678
|
import tempfile
|
|
653
679
|
|
|
654
680
|
content = writer.to_string()
|
|
@@ -677,7 +703,19 @@ class SampleSheetMerger:
|
|
|
677
703
|
Path(tmp_path).unlink(missing_ok=True)
|
|
678
704
|
|
|
679
705
|
for w in vresult.warnings:
|
|
706
|
+
if w.code in _PRE_MERGE_WARNING_CODES:
|
|
707
|
+
logger.debug(
|
|
708
|
+
f"Suppressing duplicate post-merge warning {w.code!r} "
|
|
709
|
+
"(already reported by pre-merge cross-sheet check)."
|
|
710
|
+
)
|
|
711
|
+
continue
|
|
680
712
|
result.add_warning(w.code, w.message, **w.context)
|
|
681
713
|
|
|
682
714
|
for e in vresult.errors:
|
|
715
|
+
if e.code in _PRE_MERGE_CONFLICT_CODES:
|
|
716
|
+
logger.debug(
|
|
717
|
+
f"Suppressing duplicate post-merge conflict {e.code!r} "
|
|
718
|
+
"(already reported by pre-merge cross-sheet check)."
|
|
719
|
+
)
|
|
720
|
+
continue
|
|
683
721
|
result.add_conflict(e.code, e.message, **e.context)
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.github/workflows/copilot-instructions.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_dual_index.csv
RENAMED
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_multi_lane.csv
RENAMED
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_single_index.csv
RENAMED
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_with_manifests.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_index_umi.csv
RENAMED
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_read_umi.csv
RENAMED
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/images/samplesheet_parser_overview.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v1_dual_index.csv
RENAMED
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_dual_index.csv
RENAMED
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_modified.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/test_validators.py
RENAMED
|
File without changes
|
|
File without changes
|