samplesheet-parser 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/.github/workflows/ci.yml +1 -1
  2. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/PKG-INFO +2 -4
  3. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/parse_examples.py +31 -13
  4. samplesheet_parser-0.2.1/examples/sample_sheets/README.md +195 -0
  5. samplesheet_parser-0.2.1/examples/sample_sheets/v1_with_lab_qc_settings.csv +35 -0
  6. samplesheet_parser-0.2.1/examples/sample_sheets/v1_with_manifests.csv +32 -0
  7. samplesheet_parser-0.2.1/examples/sample_sheets/v2_with_cloud_settings.csv +32 -0
  8. samplesheet_parser-0.2.1/examples/sample_sheets/v2_with_pipeline_settings.csv +32 -0
  9. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/pyproject.toml +5 -7
  10. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/enums.py +5 -5
  11. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/factory.py +2 -1
  12. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/parsers/v1.py +188 -56
  13. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/parsers/v2.py +130 -7
  14. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/validators.py +10 -9
  15. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/conftest.py +243 -1
  16. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_parsers/test_v1.py +173 -3
  17. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_parsers/test_v2.py +191 -0
  18. samplesheet_parser-0.2.0/examples/sample_sheets/README.md +0 -92
  19. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/.github/workflows/copilot-instructions.md +0 -0
  20. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/.gitignore +0 -0
  21. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/CHANGELOG.md +0 -0
  22. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/CONTRIBUTING.md +0 -0
  23. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/LICENSE +0 -0
  24. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/README.md +0 -0
  25. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v1_dual_index.csv +0 -0
  26. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v1_multi_lane.csv +0 -0
  27. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v1_single_index.csv +0 -0
  28. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
  29. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
  30. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
  31. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
  32. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/images/samplesheet_parser_overview.png +0 -0
  33. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/__init__.py +0 -0
  34. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/converter.py +0 -0
  35. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/diff.py +0 -0
  36. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/parsers/__init__.py +0 -0
  37. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/samplesheet_parser/writer.py +0 -0
  38. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/scripts/demo_converter.py +0 -0
  39. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/scripts/demo_diff.py +0 -0
  40. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/scripts/demo_writer.py +0 -0
  41. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/__init__.py +0 -0
  42. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
  43. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
  44. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
  45. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_converter.py +0 -0
  46. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_diff.py +0 -0
  47. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_factory.py +0 -0
  48. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_parsers/__init__.py +0 -0
  49. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_validators/__init__.py +0 -0
  50. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_validators/test_hamming.py +0 -0
  51. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_validators/test_validators.py +0 -0
  52. {samplesheet_parser-0.2.0 → samplesheet_parser-0.2.1}/tests/test_writer.py +0 -0
@@ -13,7 +13,7 @@ jobs:
13
13
  runs-on: ubuntu-latest
14
14
  strategy:
15
15
  matrix:
16
- python-version: ["3.10", "3.11", "3.12"]
16
+ python-version: ["3.12"]
17
17
 
18
18
  steps:
19
19
  - uses: actions/checkout@v4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: samplesheet-parser
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
5
5
  Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
6
6
  Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
@@ -33,12 +33,10 @@ Classifier: Intended Audience :: Developers
33
33
  Classifier: Intended Audience :: Science/Research
34
34
  Classifier: License :: OSI Approved :: Apache Software License
35
35
  Classifier: Programming Language :: Python :: 3
36
- Classifier: Programming Language :: Python :: 3.10
37
- Classifier: Programming Language :: Python :: 3.11
38
36
  Classifier: Programming Language :: Python :: 3.12
39
37
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
38
  Classifier: Typing :: Typed
41
- Requires-Python: >=3.10
39
+ Requires-Python: >=3.12
42
40
  Requires-Dist: loguru>=0.7
43
41
  Provides-Extra: dev
44
42
  Requires-Dist: black>=24.0; extra == 'dev'
@@ -6,7 +6,8 @@ Run from the repo root:
6
6
  python examples/parse_examples.py
7
7
 
8
8
  Demonstrates auto-detection, samples(), index_type(), UMI extraction,
9
- and validation for every example sheet in examples/sample_sheets/.
9
+ validation, and custom section parsing for every example sheet in
10
+ examples/sample_sheets/.
10
11
  """
11
12
 
12
13
  from __future__ import annotations
@@ -22,18 +23,23 @@ from samplesheet_parser import SampleSheetFactory, SampleSheetValidator
22
23
  SHEETS_DIR = Path(__file__).parent / "sample_sheets"
23
24
 
24
25
  # Ordered for readability: V1 first, then V2
25
- EXAMPLE_FILES = [
26
- "v1_dual_index.csv",
27
- "v1_single_index.csv",
28
- "v1_multi_lane.csv",
29
- "v2_novaseq_x_dual_index.csv",
30
- "v2_with_index_umi.csv",
31
- "v2_with_read_umi.csv",
32
- "v2_nextseq_single_index.csv",
26
+ # Each entry is (filename, list of custom section names to demo, or [])
27
+ EXAMPLE_FILES: list[tuple[str, list[str]]] = [
28
+ ("v1_dual_index.csv", []),
29
+ ("v1_single_index.csv", []),
30
+ ("v1_multi_lane.csv", []),
31
+ ("v1_with_manifests.csv", ["Manifests"]),
32
+ ("v1_with_lab_qc_settings.csv", ["Lab_QC_Settings"]),
33
+ ("v2_novaseq_x_dual_index.csv", []),
34
+ ("v2_with_index_umi.csv", []),
35
+ ("v2_with_read_umi.csv", []),
36
+ ("v2_nextseq_single_index.csv", []),
37
+ ("v2_with_cloud_settings.csv", ["Cloud_Settings"]),
38
+ ("v2_with_pipeline_settings.csv", ["Pipeline_Settings"]),
33
39
  ]
34
40
 
35
41
 
36
- def parse_sheet(path: Path) -> None:
42
+ def parse_sheet(path: Path, custom_sections: list[str]) -> None:
37
43
  print(f"\n{'='*60}")
38
44
  print(f" {path.name}")
39
45
  print(f"{'='*60}")
@@ -70,6 +76,18 @@ def parse_sheet(path: Path) -> None:
70
76
  print(f" UMI location : {rs.umi_location}")
71
77
  print(f" Read structure : {rs.read_structure}")
72
78
 
79
+ # Custom sections
80
+ if custom_sections:
81
+ print("\n Custom sections:")
82
+ for section_name in custom_sections:
83
+ data = sheet.parse_custom_section(section_name)
84
+ if data:
85
+ print(f" [{section_name}]")
86
+ for key, value in data.items():
87
+ print(f" {key:<28} {value}")
88
+ else:
89
+ print(f" [{section_name}] — (empty or not present)")
90
+
73
91
  # Samples table
74
92
  samples = sheet.samples()
75
93
  print(f"\n Samples ({len(samples)} total):")
@@ -97,14 +115,14 @@ def main() -> None:
97
115
  print("samplesheet-parser — Example Sheet Demo")
98
116
  print(f"Parsing {len(EXAMPLE_FILES)} example sheets from {SHEETS_DIR}\n")
99
117
 
100
- missing = [f for f in EXAMPLE_FILES if not (SHEETS_DIR / f).exists()]
118
+ missing = [f for f, _ in EXAMPLE_FILES if not (SHEETS_DIR / f).exists()]
101
119
  if missing:
102
120
  print(f"Warning: missing files: {missing}")
103
121
 
104
- for filename in EXAMPLE_FILES:
122
+ for filename, custom_sections in EXAMPLE_FILES:
105
123
  path = SHEETS_DIR / filename
106
124
  if path.exists():
107
- parse_sheet(path)
125
+ parse_sheet(path, custom_sections)
108
126
 
109
127
  print(f"\n{'='*60}")
110
128
  print("Done.")
@@ -0,0 +1,195 @@
1
+ # Example Sample Sheets
2
+
3
+ Reference sample sheets covering the full range of supported formats.
4
+ Each file is a valid, runnable example that can be parsed by `samplesheet-parser`.
5
+
6
+ ---
7
+
8
+ ## V1 — IEM / bcl2fastq format
9
+
10
+ Used with: NovaSeq 6000, HiSeq, NextSeq 500/550, MiSeq
11
+ Identified by: `IEMFileVersion` in `[Header]`
12
+
13
+ | File | Instrument | Indexes | Key feature |
14
+ |---|---|---|---|
15
+ | `v1_dual_index.csv` | NovaSeq 6000 | Dual (10+10 bp) | Multi-lane, TruSeq UD adapters |
16
+ | `v1_single_index.csv` | NextSeq 500 | Single (6 bp) | Small RNA, TruSeq Small RNA adapters |
17
+ | `v1_multi_lane.csv` | NovaSeq 6000 | Dual (10+10 bp) | 4 lanes, 2 projects, mixed assays |
18
+ | `v1_with_manifests.csv` | NovaSeq 6000 | Dual (10+10 bp) | Custom `[Manifests]` section — HyperCapture WES |
19
+ | `v1_with_lab_qc_settings.csv` | NovaSeq 6000 | Dual (10+10 bp) | Custom `[Lab_QC_Settings]` section — QC thresholds |
20
+
21
+ ### V1 `[Settings]` adapter keys
22
+
23
+ The official IEM spec uses two separate keys — not `AdapterRead1`:
24
+
25
+ ```
26
+ [Settings]
27
+ ReverseComplement,0
28
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA ← Read 1
29
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT ← Read 2
30
+ ```
31
+
32
+ `ReverseComplement,1` is only for Nextera Mate Pair libraries.
33
+ `Chemistry,Amplicon` means dual-index. `Chemistry,Default` means no or single index.
34
+
35
+ ---
36
+
37
+ ## V2 — BCLConvert format
38
+
39
+ Used with: NovaSeq X, NovaSeq X Plus, NextSeq 1000/2000
40
+ Identified by: `FileFormatVersion` in `[Header]`, or `[BCLConvert_Settings]` / `[BCLConvert_Data]` section names
41
+
42
+ | File | Instrument | Indexes | UMI | Key feature |
43
+ |---|---|---|---|---|
44
+ | `v2_novaseq_x_dual_index.csv` | NovaSeq X | Dual (10+10 bp) | No | Standard multi-lane |
45
+ | `v2_with_index_umi.csv` | NovaSeq X | Dual (10+10 bp) | Yes — Index1 UMI (9 bp) | cfDNA / liquid biopsy |
46
+ | `v2_with_read_umi.csv` | NovaSeq X | Dual (8+8 bp) | Yes — read-level UMI (5 bp) | Duplex sequencing |
47
+ | `v2_nextseq_single_index.csv` | NextSeq 1000/2000 | Single (8 bp) | No | Amplicon panel, no Lane column |
48
+ | `v2_with_cloud_settings.csv` | NovaSeq X | Dual (10+10 bp) | No | Custom `[Cloud_Settings]` — BaseSpace upload config |
49
+ | `v2_with_pipeline_settings.csv` | NextSeq 1000/2000 | Single (8 bp) | No | Custom `[Pipeline_Settings]` — downstream pipeline config |
50
+
51
+ ### V2 `OverrideCycles` format
52
+
53
+ ```
54
+ Y151;I10;I10;Y151 — 151bp PE, 10bp dual index, no UMI
55
+ Y151;I10U9;I10;Y151 — same, with 9bp UMI appended to Index1
56
+ U5Y146;I8;I8;U5Y146 — 5bp UMI on both reads (read-level UMI)
57
+ Y151;I8;Y151 — single index, no Index2 cycle
58
+ ```
59
+
60
+ Segment order: Read1 ; Index1 ; Index2 ; Read2
61
+
62
+ ---
63
+
64
+ ## Custom sections
65
+
66
+ Both V1 and V2 sheets support non-standard sections. These are preserved verbatim
67
+ during parsing and accessible via `sheet.parse_custom_section(name)`.
68
+
69
+ ### V1 — `[Manifests]`
70
+
71
+ Used by Illumina's HyperCapture and other enrichment workflows to specify the
72
+ target capture manifest files the demultiplexer or aligner should load.
73
+
74
+ ```
75
+ [Manifests]
76
+ MFGmanifest,HyperCapture_ExomeV2_manifest.txt
77
+ PoolingManifest,pooling_batch3_v1.txt
78
+ ```
79
+
80
+ ### V1 — `[Lab_QC_Settings]`
81
+
82
+ A lab-defined section for embedding QC thresholds and pipeline metadata
83
+ directly in the sample sheet, so downstream tools can read them without a
84
+ separate config file.
85
+
86
+ ```
87
+ [Lab_QC_Settings]
88
+ MinQ30,85
89
+ TargetCoverage,100x
90
+ MinMappingRate,90
91
+ LibraryKit,TruSeq_Stranded_mRNA
92
+ SequencingCore,GenomicsCoreFacility
93
+ ```
94
+
95
+ ### V2 — `[Cloud_Settings]`
96
+
97
+ Used by Illumina DRAGEN and BaseSpace to configure automated cloud upload
98
+ after demultiplexing. `UploadToBaseSpace,1` triggers the upload; `BaseSpaceProjectId`
99
+ routes the data to the correct project.
100
+
101
+ ```
102
+ [Cloud_Settings]
103
+ GeneratedVersion,3.9.14
104
+ UploadToBaseSpace,1
105
+ BaseSpaceProjectId,bs-proj-240715-wgs
106
+ ```
107
+
108
+ ### V2 — `[Pipeline_Settings]`
109
+
110
+ A lab-defined section for downstream pipeline configuration — reference genome,
111
+ variant caller, output format — bundled with the sample sheet so the compute
112
+ environment has everything it needs in one file.
113
+
114
+ ```
115
+ [Pipeline_Settings]
116
+ PipelineVersion,2.1.0
117
+ ReferenceGenome,hg38
118
+ OutputFormat,CRAM
119
+ VariantCaller,DeepVariant
120
+ MinBaseQuality,20
121
+ MinMappingQuality,30
122
+ ```
123
+
124
+ ### Accessing custom sections in code
125
+
126
+ ```python
127
+ from samplesheet_parser import SampleSheetFactory
128
+
129
+ sheet_with_manifests = SampleSheetFactory().create_parser(
130
+ "examples/sample_sheets/v1_with_manifests.csv", parse=True
131
+ )
132
+
133
+ # Returns {} if section is absent (default)
134
+ manifests = sheet_with_manifests.parse_custom_section("Manifests")
135
+ print(manifests)
136
+ # {'MFGmanifest': 'HyperCapture_ExomeV2_manifest.txt',
137
+ # 'PoolingManifest': 'pooling_batch3_v1.txt'}
138
+
139
+ # Raise if a section your pipeline depends on is missing
140
+ sheet_with_lab_qc_settings = SampleSheetFactory().create_parser(
141
+ "examples/sample_sheets/v1_with_lab_qc_settings.csv", parse=True
142
+ )
143
+ qc = sheet_with_lab_qc_settings.parse_custom_section("Lab_QC_Settings", required=True)
144
+
145
+ # Works identically on V2 sheets
146
+ sheet_v2 = SampleSheetFactory().create_parser(
147
+ "examples/sample_sheets/v2_with_cloud_settings.csv", parse=True
148
+ )
149
+ cloud = sheet_v2.parse_custom_section("Cloud_Settings")
150
+ print(cloud["UploadToBaseSpace"]) # '1'
151
+ ```
152
+
153
+ ### Asserting required sections before parsing
154
+
155
+ ```python
156
+ # parse() raises ValueError immediately if a required section is absent
157
+ sheet = SampleSheetFactory().create_parser("SampleSheet.csv", parse=False)
158
+ sheet.parse(required_sections=["Manifests", "Lab_QC_Settings"])
159
+ ```
160
+
161
+ ---
162
+
163
+ ## Parsing examples
164
+
165
+ ```python
166
+ from samplesheet_parser import SampleSheetFactory, SampleSheetValidator
167
+
168
+ # Works for any of the files above — format is auto-detected
169
+ factory = SampleSheetFactory()
170
+ sheet = factory.create_parser("examples/sample_sheets/v2_with_index_umi.csv", parse=True)
171
+
172
+ print(factory.version) # SampleSheetVersion.V2
173
+ print(sheet.index_type()) # "dual"
174
+ print(factory.get_umi_length()) # 9
175
+
176
+ for sample in sheet.samples():
177
+ print(sample["sample_id"], sample["index"])
178
+
179
+ result = SampleSheetValidator().validate(sheet)
180
+ print(result.summary()) # PASS — 0 error(s), 0 warning(s)
181
+ ```
182
+
183
+ ---
184
+
185
+ ## Notes on column capitalisation (V1)
186
+
187
+ From the Illumina IEM reference: **capitalisation in the `[Data]` header row matters.**
188
+
189
+ Standard capitalisation:
190
+ - `Sample_ID`, `Sample_Name`, `Sample_Plate`, `Sample_Well` — Title_Case with underscore
191
+ - `I7_Index_ID`, `I5_Index_ID` — uppercase I, mixed
192
+ - `index`, `index2` — **all lowercase**
193
+ - `Sample_Project`, `Description` — Title_Case
194
+
195
+ `index` and `index2` being lowercase is deliberate and required by bcl2fastq.
@@ -0,0 +1,35 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Experiment Name,240610_A00123_0112_BHJNMMDRX3
4
+ Date,2024-06-10
5
+ Workflow,GenerateFASTQ
6
+ Application,FASTQ Only
7
+ Instrument Type,NovaSeq 6000
8
+ Assay,TruSeq Stranded mRNA
9
+ Index Adapters,TruSeq RNA UD Indexes (96 Indexes)
10
+ Chemistry,Amplicon
11
+
12
+ [Reads]
13
+ 151
14
+ 151
15
+
16
+ [Settings]
17
+ ReverseComplement,0
18
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
19
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
20
+
21
+ [Lab_QC_Settings]
22
+ MinQ30,85
23
+ TargetCoverage,100x
24
+ MinMappingRate,90
25
+ LibraryKit,TruSeq_Stranded_mRNA
26
+ SequencingCore,GenomicsCoreFacility
27
+
28
+ [Data]
29
+ Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
30
+ 1,RNA_001,Control_Rep1,,A01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,RNASeq_Project,mRNA_expression
31
+ 1,RNA_002,Control_Rep2,,B01,UDP0002,TGAACCTGAT,UDP0002,TGATACGTCC,RNASeq_Project,mRNA_expression
32
+ 1,RNA_003,Control_Rep3,,C01,UDP0003,GCACAACGTT,UDP0003,CATCTCACAG,RNASeq_Project,mRNA_expression
33
+ 1,RNA_004,Treatment_Rep1,,D01,UDP0004,ATCGCCTGTT,UDP0004,GACTAGCATG,RNASeq_Project,mRNA_expression
34
+ 1,RNA_005,Treatment_Rep2,,E01,UDP0005,CTTGTAGCAA,UDP0005,TGCGTCAGCC,RNASeq_Project,mRNA_expression
35
+ 1,RNA_006,Treatment_Rep3,,F01,UDP0006,GATCCTAAGT,UDP0006,CATGCGGTTG,RNASeq_Project,mRNA_expression
@@ -0,0 +1,32 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Experiment Name,240501_A01234_0088_AHJNYGDRX3
4
+ Date,2024-05-01
5
+ Workflow,GenerateFASTQ
6
+ Application,FASTQ Only
7
+ Instrument Type,NovaSeq 6000
8
+ Assay,Nextera DNA Flex for Enrichment
9
+ Index Adapters,IDT for Illumina DNA/RNA UD Indexes (96 Indexes)
10
+ Chemistry,Amplicon
11
+
12
+ [Reads]
13
+ 151
14
+ 151
15
+
16
+ [Settings]
17
+ ReverseComplement,0
18
+ Adapter,CTGTCTCTTATACACATCT
19
+ AdapterRead2,CTGTCTCTTATACACATCT
20
+
21
+ [Manifests]
22
+ MFGmanifest,HyperCapture_ExomeV2_manifest.txt
23
+ PoolingManifest,pooling_batch3_v1.txt
24
+
25
+ [Data]
26
+ Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
27
+ 1,WES_001,TumorA_WES,,A01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,WES_Project,tumor_normal_pair
28
+ 1,WES_002,NormalA_WES,,B01,UDP0002,TGAACCTGAT,UDP0002,TGATACGTCC,WES_Project,tumor_normal_pair
29
+ 1,WES_003,TumorB_WES,,C01,UDP0003,GCACAACGTT,UDP0003,CATCTCACAG,WES_Project,tumor_normal_pair
30
+ 1,WES_004,NormalB_WES,,D01,UDP0004,ATCGCCTGTT,UDP0004,GACTAGCATG,WES_Project,tumor_normal_pair
31
+ 2,WES_005,TumorC_WES,,E01,UDP0005,CTTGTAGCAA,UDP0005,TGCGTCAGCC,WES_Project,tumor_normal_pair
32
+ 2,WES_006,NormalC_WES,,F01,UDP0006,GATCCTAAGT,UDP0006,CATGCGGTTG,WES_Project,tumor_normal_pair
@@ -0,0 +1,32 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240715_LH00336_0078_A22TNHKLT3
4
+ InstrumentPlatform,NovaSeqXSeries
5
+ ExperimentName,WGS_CloudUpload_Batch7
6
+
7
+ [Reads]
8
+ Read1Cycles,151
9
+ Read2Cycles,151
10
+ Index1Cycles,10
11
+ Index2Cycles,10
12
+
13
+ [BCLConvert_Settings]
14
+ SoftwareVersion,3.9.3
15
+ AdapterRead1,CTGTCTCTTATACACATCT
16
+ AdapterRead2,CTGTCTCTTATACACATCT
17
+ OverrideCycles,Y151;I10;I10;Y151
18
+ BarcodeMismatchesIndex1,1
19
+ BarcodeMismatchesIndex2,1
20
+
21
+ [BCLConvert_Data]
22
+ Lane,Sample_ID,Sample_Name,Index,Index2,Sample_Project
23
+ 1,WGS_001,SampleAlpha,ATTACTCGAT,TATAGCCTGT,WGS_Cloud
24
+ 1,WGS_002,SampleBeta,TCCGGAGACC,ATAGAGGCAC,WGS_Cloud
25
+ 1,WGS_003,SampleGamma,TAGGCATGCA,CCTATCCTAG,WGS_Cloud
26
+ 2,WGS_004,SampleDelta,CTCTCTACGC,GGCTCTGAGA,WGS_Cloud
27
+ 2,WGS_005,SampleEpsilon,CGGAGCCTAA,AGGCGAAGAG,WGS_Cloud
28
+
29
+ [Cloud_Settings]
30
+ GeneratedVersion,3.9.14
31
+ UploadToBaseSpace,1
32
+ BaseSpaceProjectId,bs-proj-240715-wgs
@@ -0,0 +1,32 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240820_VH00123_0041_AACNPJKHV
4
+ InstrumentPlatform,NextSeq1000/2000
5
+ ExperimentName,AmpliSeq_Pipeline_Config_Run
6
+
7
+ [Reads]
8
+ Read1Cycles,151
9
+ Read2Cycles,151
10
+ Index1Cycles,8
11
+
12
+ [BCLConvert_Settings]
13
+ SoftwareVersion,3.9.3
14
+ AdapterRead1,CTGTCTCTTATACACATCT
15
+ OverrideCycles,Y151;I8;Y151
16
+ BarcodeMismatchesIndex1,1
17
+
18
+ [BCLConvert_Data]
19
+ Sample_ID,Sample_Name,Index,Sample_Project
20
+ Panel_001,CancerHotspot_Rep1,ATTACTCG,AmpliconPanel
21
+ Panel_002,CancerHotspot_Rep2,TCCGGAGA,AmpliconPanel
22
+ Panel_003,CancerHotspot_Rep3,TAGGCATG,AmpliconPanel
23
+ Panel_004,NormalControl_Rep1,CTCTCTAC,AmpliconPanel
24
+ Panel_005,NormalControl_Rep2,TAATCTTA,AmpliconPanel
25
+
26
+ [Pipeline_Settings]
27
+ PipelineVersion,2.1.0
28
+ ReferenceGenome,hg38
29
+ OutputFormat,CRAM
30
+ VariantCaller,DeepVariant
31
+ MinBaseQuality,20
32
+ MinMappingQuality,30
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "samplesheet-parser"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -19,13 +19,11 @@ classifiers = [
19
19
  "Intended Audience :: Developers",
20
20
  "License :: OSI Approved :: Apache Software License",
21
21
  "Programming Language :: Python :: 3",
22
- "Programming Language :: Python :: 3.10",
23
- "Programming Language :: Python :: 3.11",
24
22
  "Programming Language :: Python :: 3.12",
25
23
  "Topic :: Scientific/Engineering :: Bio-Informatics",
26
24
  "Typing :: Typed",
27
25
  ]
28
- requires-python = ">=3.10"
26
+ requires-python = ">=3.12"
29
27
  dependencies = [
30
28
  "loguru>=0.7",
31
29
  ]
@@ -50,17 +48,17 @@ packages = ["samplesheet_parser"]
50
48
 
51
49
  [tool.black]
52
50
  line-length = 100
53
- target-version = ["py310", "py311", "py312"]
51
+ target-version = ["py312"]
54
52
 
55
53
  [tool.ruff]
56
54
  line-length = 100
57
- target-version = "py310"
55
+ target-version = "py312"
58
56
 
59
57
  [tool.ruff.lint]
60
58
  select = ["E", "F", "I", "W", "UP", "B"]
61
59
 
62
60
  [tool.mypy]
63
- python_version = "3.10"
61
+ python_version = "3.12"
64
62
  strict = true
65
63
  ignore_missing_imports = true
66
64
 
@@ -2,10 +2,10 @@
2
2
  Enumerations for samplesheet-parser.
3
3
  """
4
4
 
5
- from enum import Enum
5
+ from enum import StrEnum
6
6
 
7
7
 
8
- class SampleSheetVersion(str, Enum):
8
+ class SampleSheetVersion(StrEnum):
9
9
  """Illumina sample sheet format version.
10
10
 
11
11
  V1 — Illumina Experiment Manager (IEM) format, used with bcl2fastq.
@@ -21,7 +21,7 @@ class SampleSheetVersion(str, Enum):
21
21
  V2 = "V2"
22
22
 
23
23
 
24
- class IndexType(str, Enum):
24
+ class IndexType(StrEnum):
25
25
  """Sequencing index configuration.
26
26
 
27
27
  SINGLE — I7 index only (single-index libraries).
@@ -33,7 +33,7 @@ class IndexType(str, Enum):
33
33
  NONE = "none"
34
34
 
35
35
 
36
- class InstrumentPlatform(str, Enum):
36
+ class InstrumentPlatform(StrEnum):
37
37
  """Standard Illumina instrument platform identifiers used in V2 sample sheets."""
38
38
  NOVASEQ_6000 = "NovaSeq6000"
39
39
  NOVASEQ_X_SERIES = "NovaSeqXSeries"
@@ -43,7 +43,7 @@ class InstrumentPlatform(str, Enum):
43
43
  HISEQ_X = "HiSeqX"
44
44
 
45
45
 
46
- class UMILocation(str, Enum):
46
+ class UMILocation(StrEnum):
47
47
  """Where the UMI is encoded in the read structure (OverrideCycles string)."""
48
48
  READ1 = "read1"
49
49
  READ2 = "read2"
@@ -37,6 +37,7 @@ Examples
37
37
  from __future__ import annotations
38
38
 
39
39
  from pathlib import Path
40
+ from typing import Any
40
41
 
41
42
  from loguru import logger
42
43
 
@@ -121,7 +122,7 @@ class SampleSheetFactory:
121
122
  detected = self._detect_version(path)
122
123
 
123
124
  self.version = detected
124
- kwargs: dict = dict(clean=clean, experiment_id=experiment_id, parse=parse)
125
+ kwargs: dict[str, Any] = dict(clean=clean, experiment_id=experiment_id, parse=parse)
125
126
 
126
127
  if detected == SampleSheetVersion.V2:
127
128
  logger.info("Detected BCLConvert V2 format — using SampleSheetV2")