AdvancedAnalysisFileParser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. AdvancedAnalysisFileParser/AdvancedAnalysisConstants.py +9 -0
  2. AdvancedAnalysisFileParser/AdvancedAnalysisParser.py +129 -0
  3. AdvancedAnalysisFileParser/Models/ConditionOperator.py +9 -0
  4. AdvancedAnalysisFileParser/Models/FieldCondition.py +29 -0
  5. AdvancedAnalysisFileParser/Models/FieldWarningConfig.py +4 -0
  6. AdvancedAnalysisFileParser/Models/JsonDict.py +2 -0
  7. AdvancedAnalysisFileParser/Models/SectionConfig.py +8 -0
  8. AdvancedAnalysisFileParser/Models/__init__.py +13 -0
  9. AdvancedAnalysisFileParser/Parsers/AdvancedAnalysisFileParserFactory.py +36 -0
  10. AdvancedAnalysisFileParser/Parsers/DragenTruSightOncology500TSVParser.py +102 -0
  11. AdvancedAnalysisFileParser/Parsers/IAdvancedAnalysisFileParser.py +86 -0
  12. AdvancedAnalysisFileParser/Parsers/JsonSectionParser.py +39 -0
  13. AdvancedAnalysisFileParser/Parsers/OneLineTsvParser.py +26 -0
  14. AdvancedAnalysisFileParser/Parsers/__init__.py +7 -0
  15. AdvancedAnalysisFileParser/README.md +570 -0
  16. AdvancedAnalysisFileParser/Test/TruSightOncology500.CombinedVariantOutput.tsv +1586 -0
  17. AdvancedAnalysisFileParser/Test/dragen424.targeted.json +202 -0
  18. AdvancedAnalysisFileParser/Test/dummy_gba_affected_nonrecomb_acn2.targeted.json +21 -0
  19. AdvancedAnalysisFileParser/Test/dummy_gba_carrier_one_recomb_only.targeted.json +14 -0
  20. AdvancedAnalysisFileParser/Test/dummy_gba_phase_unknown_one_recomb_plus_variant.targeted.json +21 -0
  21. AdvancedAnalysisFileParser/Test/dummy_warnset_1.targeted.json +132 -0
  22. AdvancedAnalysisFileParser/Test/dummy_warnset_2.targeted.json +145 -0
  23. AdvancedAnalysisFileParser/Test/dummy_warnset_3.targeted.json +146 -0
  24. AdvancedAnalysisFileParser/Test/gba.tsv +2 -0
  25. AdvancedAnalysisFileParser/Test/gba_carrier_1.json +96 -0
  26. AdvancedAnalysisFileParser/Test/gba_carrier_2.json +101 -0
  27. AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_1.json +101 -0
  28. AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_2.json +105 -0
  29. AdvancedAnalysisFileParser/Test/gba_positive_1.json +96 -0
  30. AdvancedAnalysisFileParser/Test/gba_positive_2.json +101 -0
  31. AdvancedAnalysisFileParser/Test/hba_carrier_1.json +96 -0
  32. AdvancedAnalysisFileParser/Test/hba_carrier_2.json +96 -0
  33. AdvancedAnalysisFileParser/Test/hba_carrier_3.json +96 -0
  34. AdvancedAnalysisFileParser/Test/hba_carrier_4.json +96 -0
  35. AdvancedAnalysisFileParser/Test/hba_hemoglobin_h_disease.json +96 -0
  36. AdvancedAnalysisFileParser/Test/hba_silent_carrier.json +96 -0
  37. AdvancedAnalysisFileParser/Test/smn.tsv +2 -0
  38. AdvancedAnalysisFileParser/Test/smn_carrier.json +96 -0
  39. AdvancedAnalysisFileParser/Test/smn_positive.json +96 -0
  40. AdvancedAnalysisFileParser/Test/smn_silent_carrier_risk.json +101 -0
  41. AdvancedAnalysisFileParser/Test_AdvancedAnalysisParser.py +342 -0
  42. AdvancedAnalysisFileParser/Warnings/CarrierPositiveWarning.py +18 -0
  43. AdvancedAnalysisFileParser/Warnings/ConditionWarning.py +20 -0
  44. AdvancedAnalysisFileParser/Warnings/GbaWarning.py +50 -0
  45. AdvancedAnalysisFileParser/Warnings/GenotypeWarning.py +22 -0
  46. AdvancedAnalysisFileParser/Warnings/IWarning.py +7 -0
  47. AdvancedAnalysisFileParser/Warnings/SmnWarning.py +29 -0
  48. AdvancedAnalysisFileParser/Warnings/WarningFactory.py +31 -0
  49. AdvancedAnalysisFileParser/Warnings/__init__.py +7 -0
  50. AdvancedAnalysisFileParser/__init__.py +18 -0
  51. AdvancedAnalysisFileParser/advConfig.json +96 -0
  52. AdvancedAnalysisFileParser/dragen_500_tsv_config.json +53 -0
  53. AdvancedAnalysisFileParser/gba_tsv_config.json +23 -0
  54. AdvancedAnalysisFileParser/run_test_parser.py +29 -0
  55. AdvancedAnalysisFileParser/smn_tsv_config.json +23 -0
  56. advancedanalysisfileparser-0.1.0.dist-info/METADATA +152 -0
  57. advancedanalysisfileparser-0.1.0.dist-info/RECORD +60 -0
  58. advancedanalysisfileparser-0.1.0.dist-info/WHEEL +5 -0
  59. advancedanalysisfileparser-0.1.0.dist-info/licenses/LICENSE +9 -0
  60. advancedanalysisfileparser-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,101 @@
1
+ {
2
+ "softwareVersion": "dragen v05.121.779.4.4.4",
3
+ "sampleId": "22WVHMLT4_5_270UDI-idt-UMI",
4
+ "genomeBuild": "hg38",
5
+ "phenotypeDatabaseSources": [
6
+ "PharmCAT Phenotypes Version: Snapshot-2022.09.15"
7
+ ],
8
+ "cyp2b6": {
9
+ "genotype": "*1/*29",
10
+ "genotypeFilter": "PASS",
11
+ "phenotypeDatabaseAnnotation": "Intermediate Metabolizer"
12
+ },
13
+ "lpa": {
14
+ "kiv2CopyNumber": 39.45213332616308,
15
+ "refMarkerAlleleCopyNumber": null,
16
+ "altMarkerAlleleCopyNumber": null,
17
+ "type": "Homozygous REF markers call",
18
+ "variants": [
19
+ {
20
+ "hgvs": "LPA:4925G>A",
21
+ "qual": 0.0,
22
+ "altCopyNumber": 0,
23
+ "altCopyNumberQuality": 150.0
24
+ },
25
+ {
26
+ "hgvs": "LPA:4733G>A",
27
+ "qual": 0.0,
28
+ "altCopyNumber": 0,
29
+ "altCopyNumberQuality": 150.0
30
+ }
31
+ ]
32
+ },
33
+ "rh": {
34
+ "totalCopyNumber": 4,
35
+ "rhdCopyNumber": 2,
36
+ "rhceCopyNumber": 2,
37
+ "variants": [
38
+ {
39
+ "hgvs": "NC_000001.11g.25405596_25409676con25283766_25287797",
40
+ "qual": 26.086525474041604,
41
+ "altCopyNumber": 1,
42
+ "altCopyNumberQuality": 150.0
43
+ }
44
+ ]
45
+ },
46
+ "cyp21a2": {
47
+ "totalCopyNumber": 3,
48
+ "deletionBreakpointInGene": false,
49
+ "recombinantHaplotypes": [
50
+ "",
51
+ ""
52
+ ],
53
+ "variants": []
54
+ },
55
+ "cyp2d6": {
56
+ "totalCopyNumber": 3,
57
+ "genotype": "*1/*5",
58
+ "genotypeFilter": "PASS",
59
+ "genotypeQuality": 150,
60
+ "phenotypeDatabaseAnnotation": "Intermediate Metabolizer",
61
+ "variants": [
62
+ {
63
+ "alleleId": "g.42126938C>T",
64
+ "alleleCopyNumber": 2,
65
+ "genotypeQuality": 21,
66
+ "filter": "PASS"
67
+ }
68
+ ]
69
+ },
70
+ "gba": {
71
+ "totalCopyNumber": 4,
72
+ "deletionBreakpointInGene": null,
73
+ "recombinantHaplotypes": [
74
+ "",
75
+ ""
76
+ ],
77
+ "variants": []
78
+ },
79
+ "hba": {
80
+ "totalCopyNumber": 4,
81
+ "genotype": "aa/aa",
82
+ "genotypeQuality": 56,
83
+ "genotypeFilter": "PASS",
84
+ "variants": []
85
+ },
86
+ "smn": {
87
+ "fullLengthCopyNumber": 3,
88
+ "totalCopyNumber": 4,
89
+ "smn1CopyNumber": 2,
90
+ "smn2CopyNumber": 1,
91
+ "smn2Delta78CopyNumber": 1,
92
+ "fullLengthCopyNumberFloat": "2.59",
93
+ "totalCopyNumberFloat": "3.56",
94
+ "variants": [
95
+ {
96
+ "alleleId": "NM_000344.4:c.*3+80T>G",
97
+ "alleleCopyNumber": 1
98
+ }
99
+ ]
100
+ }
101
+ }
@@ -0,0 +1,342 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import pytest
5
+ from AdvancedAnalysisFileParser import JsonDict
6
+ from AdvancedAnalysisFileParser import AdvancedAnalysisParser
7
+
8
+ # Always resolve the test data directory relative to this file
9
+ TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "Test")
10
+
11
+ # Define test contexts for parametrization
12
+ TEST_CONTEXTS = [
13
+ {
14
+ "request": {
15
+ "output_json": "out.json",
16
+ "input_dir": TEST_DATA_DIR,
17
+ "output_dir": TEST_DATA_DIR,
18
+ "input_files": ["dummy_warnset_1.targeted.json"]
19
+ },
20
+ "expected": {
21
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>positive for hemoglobin H disease</b>"},
22
+ "SMN": {"warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"},
23
+ "GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
24
+ }
25
+ },
26
+ {
27
+ "request": {
28
+ "output_json": "out.json",
29
+ "input_dir": TEST_DATA_DIR,
30
+ "output_dir": TEST_DATA_DIR,
31
+ "input_files": ["dummy_warnset_2.targeted.json"]
32
+ },
33
+ "expected": {
34
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"},
35
+ "SMN": {"warning": "Based on the SMN Caller, this sample is carrier for Spinal Muscular Atrophy"},
36
+ "GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
37
+ }
38
+ },
39
+ {
40
+ "request": {
41
+ "output_json": "out.json",
42
+ "input_dir": TEST_DATA_DIR,
43
+ "output_dir": TEST_DATA_DIR,
44
+ "input_files": ["dummy_warnset_3.targeted.json"]
45
+ },
46
+ "expected": {
47
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>silent carrier</b>"},
48
+ "SMN": {"warning": "Based on the SMN Caller, this sample has increased risk of being a silent carrier (2+0) for Spinal Muscular Atrophy"},
49
+ "GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
50
+ }
51
+ },
52
+ {
53
+ "request": {
54
+ "output_json": "out.json",
55
+ "input_dir": TEST_DATA_DIR,
56
+ "output_dir": TEST_DATA_DIR,
57
+ "input_files": ["dummy_gba_phase_unknown_one_recomb_plus_variant.targeted.json"]
58
+ },
59
+ "expected": {
60
+ "GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
61
+ }
62
+ },
63
+ {
64
+ "request": {
65
+ "output_json": "out.json",
66
+ "input_dir": TEST_DATA_DIR,
67
+ "output_dir": TEST_DATA_DIR,
68
+ "input_files": ["dummy_gba_carrier_one_recomb_only.targeted.json"]
69
+ },
70
+ "expected": {
71
+ "GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
72
+ }
73
+ },
74
+ {
75
+ "request": {
76
+ "output_json": "out.json",
77
+ "input_dir": TEST_DATA_DIR,
78
+ "output_dir": TEST_DATA_DIR,
79
+ "input_files": ["gba_carrier_1.json"]
80
+ },
81
+ "expected": {
82
+ "GBA": {
83
+ "warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"
84
+ }
85
+ }
86
+ },
87
+ {
88
+ "request": {
89
+ "output_json": "out.json",
90
+ "input_dir": TEST_DATA_DIR,
91
+ "output_dir": TEST_DATA_DIR,
92
+ "input_files": ["gba_positive_1.json"]
93
+ },
94
+ "expected": {
95
+ "GBA": {
96
+ "warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"
97
+ }
98
+ }
99
+ },
100
+ {
101
+ "request": {
102
+ "output_json": "out.json",
103
+ "input_dir": TEST_DATA_DIR,
104
+ "output_dir": TEST_DATA_DIR,
105
+ "input_files": ["smn_carrier.json"]
106
+ },
107
+ "expected": {
108
+ "SMN": {
109
+ "warning": "Based on the SMN Caller, this sample is carrier for Spinal Muscular Atrophy"
110
+ }
111
+ }
112
+ },
113
+ {
114
+ "request": {
115
+ "output_json": "out.json",
116
+ "input_dir": TEST_DATA_DIR,
117
+ "output_dir": TEST_DATA_DIR,
118
+ "input_files": ["smn_positive.json"]
119
+ },
120
+ "expected": {
121
+ "SMN": {
122
+ "warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"
123
+ }
124
+ }
125
+ },
126
+ {
127
+ "request": {
128
+ "output_json": "out.json",
129
+ "input_dir": TEST_DATA_DIR,
130
+ "output_dir": TEST_DATA_DIR,
131
+ "input_files": ["gba_positive_1.json"]
132
+ },
133
+ "expected": {
134
+ "GBA": {
135
+ "warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"
136
+ }
137
+ }
138
+ },
139
+ {
140
+ "request": {
141
+ "output_json": "out.json",
142
+ "input_dir": TEST_DATA_DIR,
143
+ "output_dir": TEST_DATA_DIR,
144
+ "input_files": ["hba_carrier_1.json"]
145
+ },
146
+ "expected": {
147
+ "HBA": {
148
+ "warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"
149
+ }
150
+ }
151
+ },
152
+ {
153
+ "request": {
154
+ "output_json": "out.json",
155
+ "input_dir": TEST_DATA_DIR,
156
+ "output_dir": TEST_DATA_DIR,
157
+ "input_files": ["hba_hemoglobin_h_disease.json"]
158
+ },
159
+ "expected": {
160
+ "HBA": {
161
+ "warning": "Based on HBA Special Caller, this sample is defined as <b>positive for hemoglobin H disease</b>"
162
+ }
163
+ }
164
+ },
165
+ {
166
+ "request": {
167
+ "output_json": "out.json",
168
+ "input_dir": TEST_DATA_DIR,
169
+ "output_dir": TEST_DATA_DIR,
170
+ "input_files": ["hba_silent_carrier.json"]
171
+ },
172
+ "expected": {
173
+ "HBA": {
174
+ "warning": "Based on HBA Special Caller, this sample is defined as <b>silent carrier</b>"
175
+ }
176
+ }
177
+ },
178
+ {
179
+ "request": {
180
+ "output_json": "out.json",
181
+ "input_dir": TEST_DATA_DIR,
182
+ "output_dir": TEST_DATA_DIR,
183
+ "input_files": ["gba_carrier_2.json"]
184
+ },
185
+ "expected": {
186
+ "GBA": {"warning": "Based on the GBA Caller, this sample is carrier for Gaucher disease"}
187
+ }
188
+ },
189
+ {
190
+ "request": {
191
+ "output_json": "out.json",
192
+ "input_dir": TEST_DATA_DIR,
193
+ "output_dir": TEST_DATA_DIR,
194
+ "input_files": ["gba_multiple_phase_unknown_1.json"]
195
+ },
196
+ "expected": {
197
+ "GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
198
+ }
199
+ },
200
+ {
201
+ "request": {
202
+ "output_json": "out.json",
203
+ "input_dir": TEST_DATA_DIR,
204
+ "output_dir": TEST_DATA_DIR,
205
+ "input_files": ["gba_multiple_phase_unknown_2.json"]
206
+ },
207
+ "expected": {
208
+ "GBA": {"warning": "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."}
209
+ }
210
+ },
211
+ {
212
+ "request": {
213
+ "output_json": "out.json",
214
+ "input_dir": TEST_DATA_DIR,
215
+ "output_dir": TEST_DATA_DIR,
216
+ "input_files": ["gba_positive_2.json"]
217
+ },
218
+ "expected": {
219
+ "GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
220
+ }
221
+ },
222
+ {
223
+ "request": {
224
+ "output_json": "out.json",
225
+ "input_dir": TEST_DATA_DIR,
226
+ "output_dir": TEST_DATA_DIR,
227
+ "input_files": ["hba_carrier_2.json"]
228
+ },
229
+ "expected": {
230
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
231
+ }
232
+ },
233
+ {
234
+ "request": {
235
+ "output_json": "out.json",
236
+ "input_dir": TEST_DATA_DIR,
237
+ "output_dir": TEST_DATA_DIR,
238
+ "input_files": ["hba_carrier_3.json"]
239
+ },
240
+ "expected": {
241
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
242
+ }
243
+ },
244
+ {
245
+ "request": {
246
+ "output_json": "out.json",
247
+ "input_dir": TEST_DATA_DIR,
248
+ "output_dir": TEST_DATA_DIR,
249
+ "input_files": ["hba_carrier_4.json"]
250
+ },
251
+ "expected": {
252
+ "HBA": {"warning": "Based on HBA Special Caller, this sample is defined as <b>carrier</b>"}
253
+ }
254
+ },
255
+ {
256
+ "request": {
257
+ "output_json": "out.json",
258
+ "input_dir": TEST_DATA_DIR,
259
+ "output_dir": TEST_DATA_DIR,
260
+ "input_files": ["smn_silent_carrier_risk.json"]
261
+ },
262
+ "expected": {
263
+ "SMN": {"warning": "Based on the SMN Caller, this sample has increased risk of being a silent carrier (2+0) for Spinal Muscular Atrophy"},
264
+ }
265
+ },
266
+ {
267
+ "request": {
268
+ "output_json": "out.json",
269
+ "input_dir": TEST_DATA_DIR,
270
+ "output_dir": TEST_DATA_DIR,
271
+ "input_files": ["dragen424.targeted.json"]
272
+ },
273
+ "expected": {
274
+ "CYP2D6": {},
275
+ "CYP2B6": {},
276
+ "CYP21A2": {},
277
+ "LPA": {}
278
+ }
279
+ },
280
+ {
281
+ "request": {
282
+ "output_json": "out.json",
283
+ "input_dir": TEST_DATA_DIR,
284
+ "output_dir": TEST_DATA_DIR,
285
+ "input_files" : ["dummy_gba_affected_nonrecomb_acn2.targeted.json"]
286
+ },
287
+ "expected": {
288
+ "GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
289
+ }
290
+ },
291
+ {
292
+ "request": {
293
+ "output_json": "out.json",
294
+ "input_dir": TEST_DATA_DIR,
295
+ "output_dir": TEST_DATA_DIR,
296
+ "input_files": ["gba.tsv"]
297
+ },
298
+ "expected": {
299
+ "GBA": {"warning": "Based on the GBA Caller, this sample is positive for Gaucher disease"}
300
+ }
301
+ },
302
+ {
303
+ "request": {
304
+ "output_json": "out.json",
305
+ "input_dir": TEST_DATA_DIR,
306
+ "output_dir": TEST_DATA_DIR,
307
+ "input_files": ["smn.tsv"]
308
+ },
309
+ "expected": {
310
+ "SMN1": {"warning": "Based on the SMN Caller, this sample is positive for Spinal Muscular Atrophy"}
311
+ }
312
+ },
313
+ {
314
+ "request": {
315
+ "output_json": "out.json",
316
+ "input_dir": TEST_DATA_DIR,
317
+ "output_dir": TEST_DATA_DIR,
318
+ "input_files": ["TruSightOncology500.CombinedVariantOutput.tsv"]
319
+ },
320
+ "expected": {
321
+ # TODO: Fill in expected output for this file
322
+ }
323
+ },
324
+ ]
325
+
326
+ @pytest.mark.parametrize("context", TEST_CONTEXTS)
327
+ def test_parser_in_memory(context: JsonDict) -> None:
328
+ parser = AdvancedAnalysisParser(context["request"])
329
+ result = parser.run(return_dict=True)
330
+ print(json.dumps(result, indent=2, default=str))
331
+ if result is not None:
332
+ print(f"[DEBUG] Result keys: {list(result.keys())}")
333
+ for gene in result:
334
+ print(f"[DEBUG] {gene} warning: {result[gene].get('warning')}")
335
+ assert result is not None
336
+ # Validate all expected fields (not just warning)
337
+ for gene, expected in context["expected"].items():
338
+ assert gene in result, f"Missing result for {gene}"
339
+ for key, value in expected.items():
340
+ result_value = result[gene].get(key, None)
341
+ assert result_value == value, f"Mismatch for {gene}.{key}: got {result_value}, expected {value}"
342
+
@@ -0,0 +1,18 @@
1
+ from .IWarning import IWarning
2
+ from ..Models import *
3
+ class CarrierPositiveWarning(IWarning):
4
+ def format(self, config: JsonDict, data: JsonDict) -> str:
5
+ name = config.get("caller_name") or config.get("name")
6
+ disease = config.get("disease_name") or config.get("disease")
7
+ conditions = config.get("conditions",{})
8
+ for condition in conditions:
9
+ condition["operator"] = ConditionOperator[condition["operator"]]
10
+ field_condition = FieldCondition(
11
+ field= condition.get("field",None),
12
+ operator=condition.get("operator"),
13
+ value=condition.get("value"),
14
+ message=f"Based on the {name}, this sample is <b>Positive</b> for {disease}"
15
+ )
16
+ if field_condition.check(data):
17
+ return field_condition.__str__()
18
+ return ""
@@ -0,0 +1,20 @@
1
+ from .IWarning import IWarning
2
+ from ..Models import JsonDict, FieldCondition, ConditionOperator
3
+
4
+ class ConditionWarning(IWarning):
5
+ def format(self, config: JsonDict, data: JsonDict) -> str:
6
+ # config["conditions"] is expected to be a list of FieldCondition-like dicts
7
+ msgs = []
8
+ for cond_dict in config.get("conditions", []):
9
+ cond_dict["operator"] = ConditionOperator[cond_dict["operator"]]
10
+ cond = FieldCondition(**cond_dict)
11
+ if isinstance(data, dict):
12
+ if cond_dict.get("field") in data:
13
+ data = data[cond_dict["field"]]
14
+ warning = cond.__str__()
15
+ if warning in msgs:
16
+ continue
17
+ if cond.check(data):
18
+ msgs.append(warning)
19
+ print(f"Warning triggered by condition: {warning}")
20
+ return ". ".join(msgs)
@@ -0,0 +1,50 @@
1
+ from .IWarning import IWarning
2
+ from ..Models import JsonDict
3
+
4
+ class GbaWarning(IWarning):
5
+ def format(self, config: JsonDict, data: JsonDict) -> str:
6
+ name = config.get("caller_name") or config.get("name") or "GBA Caller"
7
+ disease = config.get("disease_name") or config.get("disease") or "Gaucher disease"
8
+
9
+ rec_list = [x for x in (data.get("recombinantHaplotypes") or []) if x]
10
+ rec_count = len(rec_list)
11
+
12
+ nonrec = []
13
+ for v in data.get("variants", []) or []:
14
+ # treat strings as nonrecombinant variants without copy number (defaults to 1 for counting phase-unknown conditions)
15
+ if isinstance(v, dict) or isinstance(v, str):
16
+ nonrec.append(v)
17
+
18
+ # Helper counts
19
+ def allele_copy(v):
20
+ if isinstance(v, dict):
21
+ ac = (
22
+ v.get("alleleCopyNumber")
23
+ or v.get("allele_copy_number")
24
+ or v.get("altCopyNumber")
25
+ )
26
+ return ac if isinstance(ac, (int, float)) else 0
27
+ # if variant represented as string, assume copy number 1 for phase-unknown heuristics
28
+ if isinstance(v, str):
29
+ return 1
30
+ return 0
31
+
32
+ any_nonrec_ge2 = any(allele_copy(v) >= 2 for v in nonrec)
33
+ nonrec_eq1 = [v for v in nonrec if allele_copy(v) == 1]
34
+ nonrec_eq1_count = len(nonrec_eq1)
35
+
36
+ # Positive: evidence of two affected alleles
37
+ if rec_count >= 2 or any_nonrec_ge2:
38
+ return f"Based on the {name}, this sample is positive for {disease}"
39
+
40
+ # Phase unknown: mixed findings
41
+ if (rec_count == 1 and nonrec_eq1_count >= 1) or (rec_count == 0 and nonrec_eq1_count >= 2):
42
+ return (
43
+ "Multiple GBA variants detected; phase is unknown (could be in cis or in trans). Interpret cautiously."
44
+ )
45
+
46
+ # Carrier: exactly one affected allele
47
+ if (rec_count == 1 and nonrec_eq1_count == 0 and not any_nonrec_ge2) or (rec_count == 0 and nonrec_eq1_count == 1):
48
+ return f"Based on the {name}, this sample is carrier for {disease}"
49
+
50
+ return ""
@@ -0,0 +1,22 @@
1
+ from .IWarning import IWarning
2
+ from ..Models import JsonDict
3
+ class GenotypeWarning(IWarning):
4
+ def format(self, config: JsonDict, data: JsonDict) -> str:
5
+ name = config.get("caller_name") or config.get("name")
6
+ genotype_key = config.get("genotype_data_key", "genotype")
7
+ sample = data.get(genotype_key)
8
+ mapping = config.get("phenotypeGenotypeMapping") or config.get("phenotype_genotype_mapping") or {}
9
+ warning = ""
10
+ # Support both old and new mapping formats
11
+ if mapping:
12
+ if all(isinstance(v, str) for v in mapping.values()):
13
+ # Old format: genotype→phenotype
14
+ if sample in mapping:
15
+ warning = f"Based on {name}, this sample is defined as <b>{mapping[sample]}</b>"
16
+ elif all(isinstance(v, list) for v in mapping.values()):
17
+ # New format: phenotype→[genotypes]
18
+ for phenotype, genotypes in mapping.items():
19
+ if sample in genotypes:
20
+ warning = f"Based on {name}, this sample is defined as <b>{phenotype}</b>"
21
+ break
22
+ return warning
@@ -0,0 +1,7 @@
1
+ from abc import ABC, abstractmethod
2
+ from ..Models import JsonDict
3
+
4
+ class IWarning(ABC):
5
+ @abstractmethod
6
+ def format(self, config: JsonDict, data: JsonDict) -> str:
7
+ pass
@@ -0,0 +1,29 @@
1
+ from .IWarning import IWarning
2
+ from ..Models import JsonDict
3
+
4
+ class SmnWarning(IWarning):
5
+ def format(self, config: JsonDict, data: JsonDict) -> str:
6
+ name = config.get("caller_name") or config.get("name") or "SMN Caller"
7
+ disease = config.get("disease_name") or config.get("disease") or "Spinal Muscular Atrophy"
8
+ smn1_cn = data.get("smn1CopyNumber")
9
+ # Positive if smn1CopyNumber == 0
10
+ if isinstance(smn1_cn, (int, float)) and smn1_cn == 0:
11
+ return f"Based on the {name}, this sample is positive for {disease}"
12
+ # Carrier if smn1CopyNumber == 1
13
+ if isinstance(smn1_cn, (int, float)) and smn1_cn == 1:
14
+ return f"Based on the {name}, this sample is carrier for {disease}"
15
+ # Silent carrier risk if smn1CopyNumber == 2 and specific variant has alleleCopyNumber >= threshold
16
+ if isinstance(smn1_cn, (int, float)) and smn1_cn == 2:
17
+ for v in data.get("variants", []) or []:
18
+ # variants may be strings or dicts with keys like 'alleleId', 'hgvs', 'name', 'variantName', 'id'
19
+ if isinstance(v, str):
20
+ vid = v
21
+ ac = 1 # presence implies >=1 when encoded as string only
22
+ elif isinstance(v, dict):
23
+ vid = v.get("alleleId") or v.get("hgvs") or v.get("name") or v.get("variantName") or v.get("id") or ""
24
+ ac = (v.get("alleleCopyNumber") or v.get("allele_copy_number") or v.get("altCopyNumber"))
25
+ else:
26
+ continue
27
+ if vid != "" and isinstance(ac, (int, float)) and ac >= 1:
28
+ return (f"Based on the {name}, this sample has increased risk of being a silent carrier (2+0) for {disease}")
29
+ return ""
@@ -0,0 +1,31 @@
1
+ from typing import Dict, Optional
2
+ from .IWarning import IWarning
3
+ from .CarrierPositiveWarning import CarrierPositiveWarning
4
+ from .GenotypeWarning import GenotypeWarning
5
+ from .ConditionWarning import ConditionWarning
6
+ from .SmnWarning import SmnWarning
7
+ from .GbaWarning import GbaWarning
8
+ class WarningFactory:
9
+ @staticmethod
10
+ def get_formatter(wtype: Optional[Dict]) -> IWarning:
11
+ if wtype is None:
12
+ raise ValueError("Warning type cannot be None")
13
+ warning_type = None
14
+ if isinstance(wtype, Dict) and "type" in wtype:
15
+ warning_type = wtype["type"]
16
+ if warning_type == None or warning_type == "":
17
+ raise ValueError("Warning type cannot be empty")
18
+ if warning_type == "condition":
19
+ return ConditionWarning()
20
+ if warning_type == "carrier_positive":
21
+ return CarrierPositiveWarning()
22
+ if warning_type == "genotype":
23
+ return GenotypeWarning()
24
+ if warning_type == "smn":
25
+ return SmnWarning()
26
+ if warning_type == "gba":
27
+ return GbaWarning()
28
+ if not isinstance(wtype, Dict):
29
+ raise TypeError(f"Warning type must be a dict, got {type(wtype).__name__}")
30
+ # If the warning type is not found, raise an error
31
+ raise ValueError(f"Unknown warning type: {warning_type}")
@@ -0,0 +1,7 @@
1
+ from .IWarning import IWarning
2
+ from .WarningFactory import WarningFactory
3
+
4
+ __all__ = [
5
+ "IWarning",
6
+ "WarningFactory",
7
+ ]
@@ -0,0 +1,18 @@
1
+ from .AdvancedAnalysisParser import AdvancedAnalysisParser
2
+ from .Models import JsonDict, FieldCondition, ConditionOperator, FieldWarningConfig, SectionConfig
3
+ from .Warnings import IWarning, WarningFactory
4
+ from .Parsers import IAdvancedAnalysisFileParser, AdvancedAnalysisFileParserFactory
5
+ from .AdvancedAnalysisConstants import AdvancedAnalysisConstants
6
+ __all__ = [
7
+ "AdvancedAnalysisParser",
8
+ "JsonDict",
9
+ "FieldCondition",
10
+ "ConditionOperator",
11
+ "FieldWarningConfig",
12
+ "SectionConfig",
13
+ "IWarning",
14
+ "WarningFactory",
15
+ "IAdvancedAnalysisFileParser",
16
+ "AdvancedAnalysisFileParserFactory",
17
+ "AdvancedAnalysisConstants"
18
+ ]