AdvancedAnalysisFileParser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AdvancedAnalysisFileParser/AdvancedAnalysisConstants.py +9 -0
- AdvancedAnalysisFileParser/AdvancedAnalysisParser.py +129 -0
- AdvancedAnalysisFileParser/Models/ConditionOperator.py +9 -0
- AdvancedAnalysisFileParser/Models/FieldCondition.py +29 -0
- AdvancedAnalysisFileParser/Models/FieldWarningConfig.py +4 -0
- AdvancedAnalysisFileParser/Models/JsonDict.py +2 -0
- AdvancedAnalysisFileParser/Models/SectionConfig.py +8 -0
- AdvancedAnalysisFileParser/Models/__init__.py +13 -0
- AdvancedAnalysisFileParser/Parsers/AdvancedAnalysisFileParserFactory.py +36 -0
- AdvancedAnalysisFileParser/Parsers/DragenTruSightOncology500TSVParser.py +102 -0
- AdvancedAnalysisFileParser/Parsers/IAdvancedAnalysisFileParser.py +86 -0
- AdvancedAnalysisFileParser/Parsers/JsonSectionParser.py +39 -0
- AdvancedAnalysisFileParser/Parsers/OneLineTsvParser.py +26 -0
- AdvancedAnalysisFileParser/Parsers/__init__.py +7 -0
- AdvancedAnalysisFileParser/README.md +570 -0
- AdvancedAnalysisFileParser/Test/TruSightOncology500.CombinedVariantOutput.tsv +1586 -0
- AdvancedAnalysisFileParser/Test/dragen424.targeted.json +202 -0
- AdvancedAnalysisFileParser/Test/dummy_gba_affected_nonrecomb_acn2.targeted.json +21 -0
- AdvancedAnalysisFileParser/Test/dummy_gba_carrier_one_recomb_only.targeted.json +14 -0
- AdvancedAnalysisFileParser/Test/dummy_gba_phase_unknown_one_recomb_plus_variant.targeted.json +21 -0
- AdvancedAnalysisFileParser/Test/dummy_warnset_1.targeted.json +132 -0
- AdvancedAnalysisFileParser/Test/dummy_warnset_2.targeted.json +145 -0
- AdvancedAnalysisFileParser/Test/dummy_warnset_3.targeted.json +146 -0
- AdvancedAnalysisFileParser/Test/gba.tsv +2 -0
- AdvancedAnalysisFileParser/Test/gba_carrier_1.json +96 -0
- AdvancedAnalysisFileParser/Test/gba_carrier_2.json +101 -0
- AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_1.json +101 -0
- AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_2.json +105 -0
- AdvancedAnalysisFileParser/Test/gba_positive_1.json +96 -0
- AdvancedAnalysisFileParser/Test/gba_positive_2.json +101 -0
- AdvancedAnalysisFileParser/Test/hba_carrier_1.json +96 -0
- AdvancedAnalysisFileParser/Test/hba_carrier_2.json +96 -0
- AdvancedAnalysisFileParser/Test/hba_carrier_3.json +96 -0
- AdvancedAnalysisFileParser/Test/hba_carrier_4.json +96 -0
- AdvancedAnalysisFileParser/Test/hba_hemoglobin_h_disease.json +96 -0
- AdvancedAnalysisFileParser/Test/hba_silent_carrier.json +96 -0
- AdvancedAnalysisFileParser/Test/smn.tsv +2 -0
- AdvancedAnalysisFileParser/Test/smn_carrier.json +96 -0
- AdvancedAnalysisFileParser/Test/smn_positive.json +96 -0
- AdvancedAnalysisFileParser/Test/smn_silent_carrier_risk.json +101 -0
- AdvancedAnalysisFileParser/Test_AdvancedAnalysisParser.py +342 -0
- AdvancedAnalysisFileParser/Warnings/CarrierPositiveWarning.py +18 -0
- AdvancedAnalysisFileParser/Warnings/ConditionWarning.py +20 -0
- AdvancedAnalysisFileParser/Warnings/GbaWarning.py +50 -0
- AdvancedAnalysisFileParser/Warnings/GenotypeWarning.py +22 -0
- AdvancedAnalysisFileParser/Warnings/IWarning.py +7 -0
- AdvancedAnalysisFileParser/Warnings/SmnWarning.py +29 -0
- AdvancedAnalysisFileParser/Warnings/WarningFactory.py +31 -0
- AdvancedAnalysisFileParser/Warnings/__init__.py +7 -0
- AdvancedAnalysisFileParser/__init__.py +18 -0
- AdvancedAnalysisFileParser/advConfig.json +96 -0
- AdvancedAnalysisFileParser/dragen_500_tsv_config.json +53 -0
- AdvancedAnalysisFileParser/gba_tsv_config.json +23 -0
- AdvancedAnalysisFileParser/run_test_parser.py +29 -0
- AdvancedAnalysisFileParser/smn_tsv_config.json +23 -0
- advancedanalysisfileparser-0.1.0.dist-info/METADATA +152 -0
- advancedanalysisfileparser-0.1.0.dist-info/RECORD +60 -0
- advancedanalysisfileparser-0.1.0.dist-info/WHEEL +5 -0
- advancedanalysisfileparser-0.1.0.dist-info/licenses/LICENSE +9 -0
- advancedanalysisfileparser-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
{
|
|
2
|
+
"softwareVersion": "dragen v05.121.779.4.4.4",
|
|
3
|
+
"sampleId": "22WVHMLT4_5_270UDI-idt-UMI",
|
|
4
|
+
"genomeBuild": "hg38",
|
|
5
|
+
"phenotypeDatabaseSources": [
|
|
6
|
+
"PharmCAT Phenotypes Version: Snapshot-2022.09.15"
|
|
7
|
+
],
|
|
8
|
+
"cyp2b6": {
|
|
9
|
+
"genotype": "*1/*29",
|
|
10
|
+
"genotypeFilter": "PASS",
|
|
11
|
+
"phenotypeDatabaseAnnotation": "Intermediate Metabolizer"
|
|
12
|
+
},
|
|
13
|
+
"lpa": {
|
|
14
|
+
"kiv2CopyNumber": 39.45213332616308,
|
|
15
|
+
"refMarkerAlleleCopyNumber": null,
|
|
16
|
+
"altMarkerAlleleCopyNumber": null,
|
|
17
|
+
"type": "Homozygous REF markers call",
|
|
18
|
+
"variants": [
|
|
19
|
+
{
|
|
20
|
+
"hgvs": "LPA:4925G>A",
|
|
21
|
+
"qual": 0.0,
|
|
22
|
+
"altCopyNumber": 0,
|
|
23
|
+
"altCopyNumberQuality": 150.0
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"hgvs": "LPA:4733G>A",
|
|
27
|
+
"qual": 0.0,
|
|
28
|
+
"altCopyNumber": 0,
|
|
29
|
+
"altCopyNumberQuality": 150.0
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"rh": {
|
|
34
|
+
"totalCopyNumber": 4,
|
|
35
|
+
"rhdCopyNumber": 2,
|
|
36
|
+
"rhceCopyNumber": 2,
|
|
37
|
+
"variants": [
|
|
38
|
+
{
|
|
39
|
+
"hgvs": "NC_000001.11g.25405596_25409676con25283766_25287797",
|
|
40
|
+
"qual": 26.086525474041604,
|
|
41
|
+
"altCopyNumber": 1,
|
|
42
|
+
"altCopyNumberQuality": 150.0
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
"cyp21a2": {
|
|
47
|
+
"totalCopyNumber": 3,
|
|
48
|
+
"deletionBreakpointInGene": false,
|
|
49
|
+
"recombinantHaplotypes": [
|
|
50
|
+
"",
|
|
51
|
+
""
|
|
52
|
+
],
|
|
53
|
+
"variants": []
|
|
54
|
+
},
|
|
55
|
+
"cyp2d6": {
|
|
56
|
+
"totalCopyNumber": 3,
|
|
57
|
+
"genotype": "*1/*5",
|
|
58
|
+
"genotypeFilter": "PASS",
|
|
59
|
+
"genotypeQuality": 150,
|
|
60
|
+
"phenotypeDatabaseAnnotation": "Intermediate Metabolizer",
|
|
61
|
+
"variants": [
|
|
62
|
+
{
|
|
63
|
+
"alleleId": "g.42126938C>T",
|
|
64
|
+
"alleleCopyNumber": 2,
|
|
65
|
+
"genotypeQuality": 21,
|
|
66
|
+
"filter": "PASS"
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
},
|
|
70
|
+
"gba": {
|
|
71
|
+
"totalCopyNumber": 4,
|
|
72
|
+
"deletionBreakpointInGene": null,
|
|
73
|
+
"recombinantHaplotypes": [
|
|
74
|
+
"",
|
|
75
|
+
""
|
|
76
|
+
],
|
|
77
|
+
"variants": []
|
|
78
|
+
},
|
|
79
|
+
"hba": {
|
|
80
|
+
"totalCopyNumber": 4,
|
|
81
|
+
"genotype": "aa/aa",
|
|
82
|
+
"genotypeQuality": 56,
|
|
83
|
+
"genotypeFilter": "PASS",
|
|
84
|
+
"variants": []
|
|
85
|
+
},
|
|
86
|
+
"smn": {
|
|
87
|
+
"fullLengthCopyNumber": 3,
|
|
88
|
+
"totalCopyNumber": 4,
|
|
89
|
+
"smn1CopyNumber": 2,
|
|
90
|
+
"smn2CopyNumber": 1,
|
|
91
|
+
"smn2Delta78CopyNumber": 1,
|
|
92
|
+
"fullLengthCopyNumberFloat": "2.59",
|
|
93
|
+
"totalCopyNumberFloat": "3.56",
|
|
94
|
+
"variants": [
|
|
95
|
+
{
|
|
96
|
+
"alleleId": "NM_000344.4:c.*3+80T>G",
|
|
97
|
+
"alleleCopyNumber": 1
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
}
|
|
101
|
+
}
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import pytest
|
|
5
|
+
from AdvancedAnalysisFileParser import JsonDict
|
|
6
|
+
from AdvancedAnalysisFileParser import AdvancedAnalysisParser
|
|
7
|
+
|
|
8
|
+
# Always resolve the test data directory relative to this file
|
|
9
|
+
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "Test")
|
|
10
|
+
|
|
11
|
+
# Define test contexts for parametrization
|
|
12
|
+
TEST_CONTEXTS = [
|
|
13
|
+
{
|
|
14
|
+
"request": {
|
|
15
|
+
"output_json": "out.json",
|
|
16
|
+
"input_dir": TEST_DATA_DIR,
|
|
17
|
+
"output_dir": TEST_DATA_DIR,
|
|
18
|
+
"input_files": ["dummy_warnset_1.targeted.json"]
|
|
19
|
+
},
|
|
20
|
+
"expected": {
|
|
21
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>positive for hemoglobin H disease</b>"},
|
|
22
|
+
"SMN": {"warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"},
|
|
23
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"request": {
|
|
28
|
+
"output_json": "out.json",
|
|
29
|
+
"input_dir": TEST_DATA_DIR,
|
|
30
|
+
"output_dir": TEST_DATA_DIR,
|
|
31
|
+
"input_files": ["dummy_warnset_2.targeted.json"]
|
|
32
|
+
},
|
|
33
|
+
"expected": {
|
|
34
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"},
|
|
35
|
+
"SMN": {"warning": "Based on the SMN Caller, this sample is carrier for Spinal Muscular Atrophy"},
|
|
36
|
+
"GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"request": {
|
|
41
|
+
"output_json": "out.json",
|
|
42
|
+
"input_dir": TEST_DATA_DIR,
|
|
43
|
+
"output_dir": TEST_DATA_DIR,
|
|
44
|
+
"input_files": ["dummy_warnset_3.targeted.json"]
|
|
45
|
+
},
|
|
46
|
+
"expected": {
|
|
47
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>silent carrier</b>"},
|
|
48
|
+
"SMN": {"warning": "Based on the SMN Caller, this sample has increased risk of being a silent carrier (2+0) for Spinal Muscular Atrophy"},
|
|
49
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"request": {
|
|
54
|
+
"output_json": "out.json",
|
|
55
|
+
"input_dir": TEST_DATA_DIR,
|
|
56
|
+
"output_dir": TEST_DATA_DIR,
|
|
57
|
+
"input_files": ["dummy_gba_phase_unknown_one_recomb_plus_variant.targeted.json"]
|
|
58
|
+
},
|
|
59
|
+
"expected": {
|
|
60
|
+
"GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"request": {
|
|
65
|
+
"output_json": "out.json",
|
|
66
|
+
"input_dir": TEST_DATA_DIR,
|
|
67
|
+
"output_dir": TEST_DATA_DIR,
|
|
68
|
+
"input_files": ["dummy_gba_carrier_one_recomb_only.targeted.json"]
|
|
69
|
+
},
|
|
70
|
+
"expected": {
|
|
71
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"request": {
|
|
76
|
+
"output_json": "out.json",
|
|
77
|
+
"input_dir": TEST_DATA_DIR,
|
|
78
|
+
"output_dir": TEST_DATA_DIR,
|
|
79
|
+
"input_files": ["gba_carrier_1.json"]
|
|
80
|
+
},
|
|
81
|
+
"expected": {
|
|
82
|
+
"GBA": {
|
|
83
|
+
"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"request": {
|
|
89
|
+
"output_json": "out.json",
|
|
90
|
+
"input_dir": TEST_DATA_DIR,
|
|
91
|
+
"output_dir": TEST_DATA_DIR,
|
|
92
|
+
"input_files": ["gba_positive_1.json"]
|
|
93
|
+
},
|
|
94
|
+
"expected": {
|
|
95
|
+
"GBA": {
|
|
96
|
+
"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"request": {
|
|
102
|
+
"output_json": "out.json",
|
|
103
|
+
"input_dir": TEST_DATA_DIR,
|
|
104
|
+
"output_dir": TEST_DATA_DIR,
|
|
105
|
+
"input_files": ["smn_carrier.json"]
|
|
106
|
+
},
|
|
107
|
+
"expected": {
|
|
108
|
+
"SMN": {
|
|
109
|
+
"warning": "Based on the SMN Caller, this sample is carrier for Spinal Muscular Atrophy"
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"request": {
|
|
115
|
+
"output_json": "out.json",
|
|
116
|
+
"input_dir": TEST_DATA_DIR,
|
|
117
|
+
"output_dir": TEST_DATA_DIR,
|
|
118
|
+
"input_files": ["smn_positive.json"]
|
|
119
|
+
},
|
|
120
|
+
"expected": {
|
|
121
|
+
"SMN": {
|
|
122
|
+
"warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"request": {
|
|
128
|
+
"output_json": "out.json",
|
|
129
|
+
"input_dir": TEST_DATA_DIR,
|
|
130
|
+
"output_dir": TEST_DATA_DIR,
|
|
131
|
+
"input_files": ["gba_positive_1.json"]
|
|
132
|
+
},
|
|
133
|
+
"expected": {
|
|
134
|
+
"GBA": {
|
|
135
|
+
"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"request": {
|
|
141
|
+
"output_json": "out.json",
|
|
142
|
+
"input_dir": TEST_DATA_DIR,
|
|
143
|
+
"output_dir": TEST_DATA_DIR,
|
|
144
|
+
"input_files": ["hba_carrier_1.json"]
|
|
145
|
+
},
|
|
146
|
+
"expected": {
|
|
147
|
+
"HBA": {
|
|
148
|
+
"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"request": {
|
|
154
|
+
"output_json": "out.json",
|
|
155
|
+
"input_dir": TEST_DATA_DIR,
|
|
156
|
+
"output_dir": TEST_DATA_DIR,
|
|
157
|
+
"input_files": ["hba_hemoglobin_h_disease.json"]
|
|
158
|
+
},
|
|
159
|
+
"expected": {
|
|
160
|
+
"HBA": {
|
|
161
|
+
"warning": "Based on HBA Special Caller, this sample is defined as <b>positive for hemoglobin H disease</b>"
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"request": {
|
|
167
|
+
"output_json": "out.json",
|
|
168
|
+
"input_dir": TEST_DATA_DIR,
|
|
169
|
+
"output_dir": TEST_DATA_DIR,
|
|
170
|
+
"input_files": ["hba_silent_carrier.json"]
|
|
171
|
+
},
|
|
172
|
+
"expected": {
|
|
173
|
+
"HBA": {
|
|
174
|
+
"warning": "Based on HBA Special Caller, this sample is defined as <b>silent carrier</b>"
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"request": {
|
|
180
|
+
"output_json": "out.json",
|
|
181
|
+
"input_dir": TEST_DATA_DIR,
|
|
182
|
+
"output_dir": TEST_DATA_DIR,
|
|
183
|
+
"input_files": ["gba_carrier_2.json"]
|
|
184
|
+
},
|
|
185
|
+
"expected": {
|
|
186
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
|
|
187
|
+
}
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
"request": {
|
|
191
|
+
"output_json": "out.json",
|
|
192
|
+
"input_dir": TEST_DATA_DIR,
|
|
193
|
+
"output_dir": TEST_DATA_DIR,
|
|
194
|
+
"input_files": ["gba_multiple_phase_unknown_1.json"]
|
|
195
|
+
},
|
|
196
|
+
"expected": {
|
|
197
|
+
"GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
|
|
198
|
+
}
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"request": {
|
|
202
|
+
"output_json": "out.json",
|
|
203
|
+
"input_dir": TEST_DATA_DIR,
|
|
204
|
+
"output_dir": TEST_DATA_DIR,
|
|
205
|
+
"input_files": ["gba_multiple_phase_unknown_2.json"]
|
|
206
|
+
},
|
|
207
|
+
"expected": {
|
|
208
|
+
"GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"request": {
|
|
213
|
+
"output_json": "out.json",
|
|
214
|
+
"input_dir": TEST_DATA_DIR,
|
|
215
|
+
"output_dir": TEST_DATA_DIR,
|
|
216
|
+
"input_files": ["gba_positive_2.json"]
|
|
217
|
+
},
|
|
218
|
+
"expected": {
|
|
219
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"request": {
|
|
224
|
+
"output_json": "out.json",
|
|
225
|
+
"input_dir": TEST_DATA_DIR,
|
|
226
|
+
"output_dir": TEST_DATA_DIR,
|
|
227
|
+
"input_files": ["hba_carrier_2.json"]
|
|
228
|
+
},
|
|
229
|
+
"expected": {
|
|
230
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
|
|
231
|
+
}
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
"request": {
|
|
235
|
+
"output_json": "out.json",
|
|
236
|
+
"input_dir": TEST_DATA_DIR,
|
|
237
|
+
"output_dir": TEST_DATA_DIR,
|
|
238
|
+
"input_files": ["hba_carrier_3.json"]
|
|
239
|
+
},
|
|
240
|
+
"expected": {
|
|
241
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
"request": {
|
|
246
|
+
"output_json": "out.json",
|
|
247
|
+
"input_dir": TEST_DATA_DIR,
|
|
248
|
+
"output_dir": TEST_DATA_DIR,
|
|
249
|
+
"input_files": ["hba_carrier_4.json"]
|
|
250
|
+
},
|
|
251
|
+
"expected": {
|
|
252
|
+
"HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
"request": {
|
|
257
|
+
"output_json": "out.json",
|
|
258
|
+
"input_dir": TEST_DATA_DIR,
|
|
259
|
+
"output_dir": TEST_DATA_DIR,
|
|
260
|
+
"input_files": ["smn_silent_carrier_risk.json"]
|
|
261
|
+
},
|
|
262
|
+
"expected": {
|
|
263
|
+
"SMN": {"warning": "Based on the SMN Caller, this sample has increased risk of being a silent carrier (2+0) for Spinal Muscular Atrophy"},
|
|
264
|
+
}
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
"request": {
|
|
268
|
+
"output_json": "out.json",
|
|
269
|
+
"input_dir": TEST_DATA_DIR,
|
|
270
|
+
"output_dir": TEST_DATA_DIR,
|
|
271
|
+
"input_files": ["dragen424.targeted.json"]
|
|
272
|
+
},
|
|
273
|
+
"expected": {
|
|
274
|
+
"CYP2D6": {},
|
|
275
|
+
"CYP2B6": {},
|
|
276
|
+
"CYP21A2": {},
|
|
277
|
+
"LPA": {}
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
"request": {
|
|
282
|
+
"output_json": "out.json",
|
|
283
|
+
"input_dir": TEST_DATA_DIR,
|
|
284
|
+
"output_dir": TEST_DATA_DIR,
|
|
285
|
+
"input_files" : ["dummy_gba_affected_nonrecomb_acn2.targeted.json"]
|
|
286
|
+
},
|
|
287
|
+
"expected": {
|
|
288
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"request": {
|
|
293
|
+
"output_json": "out.json",
|
|
294
|
+
"input_dir": TEST_DATA_DIR,
|
|
295
|
+
"output_dir": TEST_DATA_DIR,
|
|
296
|
+
"input_files": ["gba.tsv"]
|
|
297
|
+
},
|
|
298
|
+
"expected": {
|
|
299
|
+
"GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
"request": {
|
|
304
|
+
"output_json": "out.json",
|
|
305
|
+
"input_dir": TEST_DATA_DIR,
|
|
306
|
+
"output_dir": TEST_DATA_DIR,
|
|
307
|
+
"input_files": ["smn.tsv"]
|
|
308
|
+
},
|
|
309
|
+
"expected": {
|
|
310
|
+
"SMN1": {"warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"}
|
|
311
|
+
}
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
"request": {
|
|
315
|
+
"output_json": "out.json",
|
|
316
|
+
"input_dir": TEST_DATA_DIR,
|
|
317
|
+
"output_dir": TEST_DATA_DIR,
|
|
318
|
+
"input_files": ["TruSightOncology500.CombinedVariantOutput.tsv"]
|
|
319
|
+
},
|
|
320
|
+
"expected": {
|
|
321
|
+
# TODO: Fill in expected output for this file
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
@pytest.mark.parametrize("context", TEST_CONTEXTS)
|
|
327
|
+
def test_parser_in_memory(context: JsonDict) -> None:
|
|
328
|
+
parser = AdvancedAnalysisParser(context["request"])
|
|
329
|
+
result = parser.run(return_dict=True)
|
|
330
|
+
print(json.dumps(result, indent=2, default=str))
|
|
331
|
+
if result is not None:
|
|
332
|
+
print(f"[DEBUG] Result keys: {list(result.keys())}")
|
|
333
|
+
for gene in result:
|
|
334
|
+
print(f"[DEBUG] {gene} warning: {result[gene].get('warning')}")
|
|
335
|
+
assert result is not None
|
|
336
|
+
# Validate all expected fields (not just warning)
|
|
337
|
+
for gene, expected in context["expected"].items():
|
|
338
|
+
assert gene in result, f"Missing result for {gene}"
|
|
339
|
+
for key, value in expected.items():
|
|
340
|
+
result_value = result[gene].get(key, None)
|
|
341
|
+
assert result_value == value, f"Mismatch for {gene}.{key}: got {result_value}, expected {value}"
|
|
342
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .IWarning import IWarning
|
|
2
|
+
from ..Models import *
|
|
3
|
+
class CarrierPositiveWarning(IWarning):
|
|
4
|
+
def format(self, config: JsonDict, data: JsonDict) -> str:
|
|
5
|
+
name = config.get("caller_name") or config.get("name")
|
|
6
|
+
disease = config.get("disease_name") or config.get("disease")
|
|
7
|
+
conditions = config.get("conditions",{})
|
|
8
|
+
for condition in conditions:
|
|
9
|
+
condition["operator"] = ConditionOperator[condition["operator"]]
|
|
10
|
+
field_condition = FieldCondition(
|
|
11
|
+
field= condition.get("field",None),
|
|
12
|
+
operator=condition.get("operator"),
|
|
13
|
+
value=condition.get("value"),
|
|
14
|
+
message=f"Based on the {name}, this sample is <b>Positive</b> for {disease}"
|
|
15
|
+
)
|
|
16
|
+
if field_condition.check(data):
|
|
17
|
+
return field_condition.__str__()
|
|
18
|
+
return ""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from .IWarning import IWarning
|
|
2
|
+
from ..Models import JsonDict, FieldCondition, ConditionOperator
|
|
3
|
+
|
|
4
|
+
class ConditionWarning(IWarning):
|
|
5
|
+
def format(self, config: JsonDict, data: JsonDict) -> str:
|
|
6
|
+
# config["conditions"] is expected to be a list of FieldCondition-like dicts
|
|
7
|
+
msgs = []
|
|
8
|
+
for cond_dict in config.get("conditions", []):
|
|
9
|
+
cond_dict["operator"] = ConditionOperator[cond_dict["operator"]]
|
|
10
|
+
cond = FieldCondition(**cond_dict)
|
|
11
|
+
if isinstance(data, dict):
|
|
12
|
+
if cond_dict.get("field") in data:
|
|
13
|
+
data = data[cond_dict["field"]]
|
|
14
|
+
warning = cond.__str__()
|
|
15
|
+
if warning in msgs:
|
|
16
|
+
continue
|
|
17
|
+
if cond.check(data):
|
|
18
|
+
msgs.append(warning)
|
|
19
|
+
print(f"Warning triggered by condition: {warning}")
|
|
20
|
+
return ". ".join(msgs)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from .IWarning import IWarning
|
|
2
|
+
from ..Models import JsonDict
|
|
3
|
+
|
|
4
|
+
class GbaWarning(IWarning):
|
|
5
|
+
def format(self, config: JsonDict, data: JsonDict) -> str:
|
|
6
|
+
name = config.get("caller_name") or config.get("name") or "GBA Caller"
|
|
7
|
+
disease = config.get("disease_name") or config.get("disease") or "Gaucher disease"
|
|
8
|
+
|
|
9
|
+
rec_list = [x for x in (data.get("recombinantHaplotypes") or []) if x]
|
|
10
|
+
rec_count = len(rec_list)
|
|
11
|
+
|
|
12
|
+
nonrec = []
|
|
13
|
+
for v in data.get("variants", []) or []:
|
|
14
|
+
# treat strings as nonrecombinant variants without copy number (defaults to 1 for counting phase-unknown conditions)
|
|
15
|
+
if isinstance(v, dict) or isinstance(v, str):
|
|
16
|
+
nonrec.append(v)
|
|
17
|
+
|
|
18
|
+
# Helper counts
|
|
19
|
+
def allele_copy(v):
|
|
20
|
+
if isinstance(v, dict):
|
|
21
|
+
ac = (
|
|
22
|
+
v.get("alleleCopyNumber")
|
|
23
|
+
or v.get("allele_copy_number")
|
|
24
|
+
or v.get("altCopyNumber")
|
|
25
|
+
)
|
|
26
|
+
return ac if isinstance(ac, (int, float)) else 0
|
|
27
|
+
# if variant represented as string, assume copy number 1 for phase-unknown heuristics
|
|
28
|
+
if isinstance(v, str):
|
|
29
|
+
return 1
|
|
30
|
+
return 0
|
|
31
|
+
|
|
32
|
+
any_nonrec_ge2 = any(allele_copy(v) >= 2 for v in nonrec)
|
|
33
|
+
nonrec_eq1 = [v for v in nonrec if allele_copy(v) == 1]
|
|
34
|
+
nonrec_eq1_count = len(nonrec_eq1)
|
|
35
|
+
|
|
36
|
+
# Positive: evidence of two affected alleles
|
|
37
|
+
if rec_count >= 2 or any_nonrec_ge2:
|
|
38
|
+
return f"Based on the {name}, this sample is positive for {disease}"
|
|
39
|
+
|
|
40
|
+
# Phase unknown: mixed findings
|
|
41
|
+
if (rec_count == 1 and nonrec_eq1_count >= 1) or (rec_count == 0 and nonrec_eq1_count >= 2):
|
|
42
|
+
return (
|
|
43
|
+
"Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Carrier: exactly one affected allele
|
|
47
|
+
if (rec_count == 1 and nonrec_eq1_count == 0 and not any_nonrec_ge2) or (rec_count == 0 and nonrec_eq1_count == 1):
|
|
48
|
+
return f"Based on the {name}, this sample is carrier for {disease}"
|
|
49
|
+
|
|
50
|
+
return ""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .IWarning import IWarning
|
|
2
|
+
from ..Models import JsonDict
|
|
3
|
+
class GenotypeWarning(IWarning):
|
|
4
|
+
def format(self, config: JsonDict, data: JsonDict) -> str:
|
|
5
|
+
name = config.get("caller_name") or config.get("name")
|
|
6
|
+
genotype_key = config.get("genotype_data_key", "genotype")
|
|
7
|
+
sample = data.get(genotype_key)
|
|
8
|
+
mapping = config.get("phenotypeGenotypeMapping") or config.get("phenotype_genotype_mapping") or {}
|
|
9
|
+
warning = ""
|
|
10
|
+
# Support both old and new mapping formats
|
|
11
|
+
if mapping:
|
|
12
|
+
if all(isinstance(v, str) for v in mapping.values()):
|
|
13
|
+
# Old format: genotype→phenotype
|
|
14
|
+
if sample in mapping:
|
|
15
|
+
warning = f"Based on {name}, this sample is defined as <b>{mapping[sample]}</b>"
|
|
16
|
+
elif all(isinstance(v, list) for v in mapping.values()):
|
|
17
|
+
# New format: phenotype→[genotypes]
|
|
18
|
+
for phenotype, genotypes in mapping.items():
|
|
19
|
+
if sample in genotypes:
|
|
20
|
+
warning = f"Based on {name}, this sample is defined as <b>{phenotype}</b>"
|
|
21
|
+
break
|
|
22
|
+
return warning
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .IWarning import IWarning
|
|
2
|
+
from ..Models import JsonDict
|
|
3
|
+
|
|
4
|
+
class SmnWarning(IWarning):
|
|
5
|
+
def format(self, config: JsonDict, data: JsonDict) -> str:
|
|
6
|
+
name = config.get("caller_name") or config.get("name") or "SMN Caller"
|
|
7
|
+
disease = config.get("disease_name") or config.get("disease") or "Spinal Muscular Atrophy"
|
|
8
|
+
smn1_cn = data.get("smn1CopyNumber")
|
|
9
|
+
# Positive if smn1CopyNumber == 0
|
|
10
|
+
if isinstance(smn1_cn, (int, float)) and smn1_cn == 0:
|
|
11
|
+
return f"Based on the {name}, this sample is positive for {disease}"
|
|
12
|
+
# Carrier if smn1CopyNumber == 1
|
|
13
|
+
if isinstance(smn1_cn, (int, float)) and smn1_cn == 1:
|
|
14
|
+
return f"Based on the {name}, this sample is carrier for {disease}"
|
|
15
|
+
# Silent carrier risk if smn1CopyNumber == 2 and specific variant has alleleCopyNumber >= threshold
|
|
16
|
+
if isinstance(smn1_cn, (int, float)) and smn1_cn == 2:
|
|
17
|
+
for v in data.get("variants", []) or []:
|
|
18
|
+
# variants may be strings or dicts with keys like 'alleleId', 'hgvs', 'name', 'variantName', 'id'
|
|
19
|
+
if isinstance(v, str):
|
|
20
|
+
vid = v
|
|
21
|
+
ac = 1 # presence implies >=1 when encoded as string only
|
|
22
|
+
elif isinstance(v, dict):
|
|
23
|
+
vid = v.get("alleleId") or v.get("hgvs") or v.get("name") or v.get("variantName") or v.get("id") or ""
|
|
24
|
+
ac = (v.get("alleleCopyNumber") or v.get("allele_copy_number") or v.get("altCopyNumber"))
|
|
25
|
+
else:
|
|
26
|
+
continue
|
|
27
|
+
if vid != "" and isinstance(ac, (int, float)) and ac >= 1:
|
|
28
|
+
return (f"Based on the {name}, this sample has increased risk of being a silent carrier (2+0) for {disease}")
|
|
29
|
+
return ""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
from .IWarning import IWarning
|
|
3
|
+
from .CarrierPositiveWarning import CarrierPositiveWarning
|
|
4
|
+
from .GenotypeWarning import GenotypeWarning
|
|
5
|
+
from .ConditionWarning import ConditionWarning
|
|
6
|
+
from .SmnWarning import SmnWarning
|
|
7
|
+
from .GbaWarning import GbaWarning
|
|
8
|
+
class WarningFactory:
|
|
9
|
+
@staticmethod
|
|
10
|
+
def get_formatter(wtype: Optional[Dict]) -> IWarning:
|
|
11
|
+
if wtype is None:
|
|
12
|
+
raise ValueError("Warning type cannot be None")
|
|
13
|
+
warning_type = None
|
|
14
|
+
if isinstance(wtype, Dict) and "type" in wtype:
|
|
15
|
+
warning_type = wtype["type"]
|
|
16
|
+
if warning_type == None or warning_type == "":
|
|
17
|
+
raise ValueError("Warning type cannot be empty")
|
|
18
|
+
if warning_type == "condition":
|
|
19
|
+
return ConditionWarning()
|
|
20
|
+
if warning_type == "carrier_positive":
|
|
21
|
+
return CarrierPositiveWarning()
|
|
22
|
+
if warning_type == "genotype":
|
|
23
|
+
return GenotypeWarning()
|
|
24
|
+
if warning_type == "smn":
|
|
25
|
+
return SmnWarning()
|
|
26
|
+
if warning_type == "gba":
|
|
27
|
+
return GbaWarning()
|
|
28
|
+
if not isinstance(wtype, Dict):
|
|
29
|
+
raise TypeError(f"Warning type must be a dict, got {type(wtype).__name__}")
|
|
30
|
+
# If the warning type is not found, raise an error
|
|
31
|
+
raise ValueError(f"Unknown warning type: {warning_type}")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .AdvancedAnalysisParser import AdvancedAnalysisParser
|
|
2
|
+
from .Models import JsonDict, FieldCondition, ConditionOperator, FieldWarningConfig, SectionConfig
|
|
3
|
+
from .Warnings import IWarning, WarningFactory
|
|
4
|
+
from .Parsers import IAdvancedAnalysisFileParser, AdvancedAnalysisFileParserFactory
|
|
5
|
+
from .AdvancedAnalysisConstants import AdvancedAnalysisConstants
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AdvancedAnalysisParser",
|
|
8
|
+
"JsonDict",
|
|
9
|
+
"FieldCondition",
|
|
10
|
+
"ConditionOperator",
|
|
11
|
+
"FieldWarningConfig",
|
|
12
|
+
"SectionConfig",
|
|
13
|
+
"IWarning",
|
|
14
|
+
"WarningFactory",
|
|
15
|
+
"IAdvancedAnalysisFileParser",
|
|
16
|
+
"AdvancedAnalysisFileParserFactory",
|
|
17
|
+
"AdvancedAnalysisConstants"
|
|
18
|
+
]
|