AdvancedAnalysisFileParser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/AdvancedAnalysisConstants.py +9 -0
  2. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/AdvancedAnalysisParser.py +129 -0
  3. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/ConditionOperator.py +9 -0
  4. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/FieldCondition.py +29 -0
  5. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/FieldWarningConfig.py +4 -0
  6. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/JsonDict.py +2 -0
  7. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/SectionConfig.py +8 -0
  8. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Models/__init__.py +13 -0
  9. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/AdvancedAnalysisFileParserFactory.py +36 -0
  10. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/DragenTruSightOncology500TSVParser.py +102 -0
  11. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/IAdvancedAnalysisFileParser.py +86 -0
  12. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/JsonSectionParser.py +39 -0
  13. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/OneLineTsvParser.py +26 -0
  14. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Parsers/__init__.py +7 -0
  15. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/README.md +570 -0
  16. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/TruSightOncology500.CombinedVariantOutput.tsv +1586 -0
  17. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dragen424.targeted.json +202 -0
  18. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_gba_affected_nonrecomb_acn2.targeted.json +21 -0
  19. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_gba_carrier_one_recomb_only.targeted.json +14 -0
  20. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_gba_phase_unknown_one_recomb_plus_variant.targeted.json +21 -0
  21. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_warnset_1.targeted.json +132 -0
  22. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_warnset_2.targeted.json +145 -0
  23. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/dummy_warnset_3.targeted.json +146 -0
  24. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba.tsv +2 -0
  25. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_carrier_1.json +96 -0
  26. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_carrier_2.json +101 -0
  27. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_1.json +101 -0
  28. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_multiple_phase_unknown_2.json +105 -0
  29. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_positive_1.json +96 -0
  30. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/gba_positive_2.json +101 -0
  31. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_carrier_1.json +96 -0
  32. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_carrier_2.json +96 -0
  33. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_carrier_3.json +96 -0
  34. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_carrier_4.json +96 -0
  35. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_hemoglobin_h_disease.json +96 -0
  36. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/hba_silent_carrier.json +96 -0
  37. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/smn.tsv +2 -0
  38. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/smn_carrier.json +96 -0
  39. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/smn_positive.json +96 -0
  40. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test/smn_silent_carrier_risk.json +101 -0
  41. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Test_AdvancedAnalysisParser.py +342 -0
  42. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/CarrierPositiveWarning.py +18 -0
  43. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/ConditionWarning.py +20 -0
  44. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/GbaWarning.py +50 -0
  45. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/GenotypeWarning.py +22 -0
  46. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/IWarning.py +7 -0
  47. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/SmnWarning.py +29 -0
  48. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/WarningFactory.py +31 -0
  49. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/Warnings/__init__.py +7 -0
  50. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/__init__.py +18 -0
  51. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/advConfig.json +96 -0
  52. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/dragen_500_tsv_config.json +53 -0
  53. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/gba_tsv_config.json +23 -0
  54. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/run_test_parser.py +29 -0
  55. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser/smn_tsv_config.json +23 -0
  56. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser.egg-info/PKG-INFO +152 -0
  57. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser.egg-info/SOURCES.txt +64 -0
  58. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser.egg-info/dependency_links.txt +1 -0
  59. advancedanalysisfileparser-0.1.0/AdvancedAnalysisFileParser.egg-info/top_level.txt +1 -0
  60. advancedanalysisfileparser-0.1.0/LICENSE +9 -0
  61. advancedanalysisfileparser-0.1.0/MANIFEST.in +3 -0
  62. advancedanalysisfileparser-0.1.0/PKG-INFO +152 -0
  63. advancedanalysisfileparser-0.1.0/README.md +132 -0
  64. advancedanalysisfileparser-0.1.0/pyproject.toml +15 -0
  65. advancedanalysisfileparser-0.1.0/setup.cfg +26 -0
@@ -0,0 +1,9 @@
1
+ from typing import Final
2
+ class AdvancedAnalysisConstants:
3
+ __slots__ = ()
4
+ OUTPUT_DIR : Final[str] = "output_dir"
5
+ OUTPUT_JSON : Final[str] = "output_json"
6
+ INPUT_DIR : Final[str] = "input_dir"
7
+ MAP_FILES : Final[str] = "map_files"
8
+ WARNING_KEY : Final[str] = "Warning"
9
+ SEQUENCE_ID : Final[str] = "sequence_id"
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ from typing import Optional
8
+ from pathlib import Path
9
+ from .Models import *
10
+ from .Warnings import *
11
+ from .Parsers import *
12
+ from .AdvancedAnalysisConstants import AdvancedAnalysisConstants
13
+
14
+ class AdvancedAnalysisParser:
15
+ """
16
+ Parse special callers outputs into unified JSON format.
17
+ The input files are specified in a configuration JSON file.
18
+ The output is a JSON file with the parsed data and warnings.
19
+ """
20
+ def __init__(self, json_request: JsonDict) -> None:
21
+ self.output_dir: str = json_request.get(AdvancedAnalysisConstants.OUTPUT_DIR, '.')
22
+ self.output_json: str = json_request.get(AdvancedAnalysisConstants.OUTPUT_JSON, 'adv_analysis_output.json')
23
+ self.input_dir: str = json_request.get(AdvancedAnalysisConstants.INPUT_DIR, '.')
24
+ self.input_files: list[str] = json_request.get('input_files', [])
25
+
26
+ def run(self, return_dict: bool = False) -> Optional[JsonDict]:
27
+ result = self.parse_files()
28
+ if return_dict:
29
+ return result
30
+ out_path = os.path.join(self.output_dir, self.output_json)
31
+ with open(out_path, 'w', encoding='utf-8') as f:
32
+ json.dump(result, f, indent=4)
33
+ logging.info(f"Wrote unified JSON to {out_path}")
34
+ return None
35
+
36
+ def parse_files(self) -> JsonDict:
37
+ logging.info("Started parsing Advanced Analysis files")
38
+ if not self.input_files or self.input_files == [] and self.input_dir and os.path.exists(self.input_dir):
39
+ folder_files = [
40
+ os.path.join(self.input_dir, f)
41
+ for f in os.listdir(self.input_dir) if os.path.isfile(os.path.join(self.input_dir, f))
42
+ and f.lower().endswith(('.json', '.tsv'))
43
+ ]
44
+ self.input_files.extend(folder_files)
45
+ if not self.input_files or self.input_files == []:
46
+ raise ValueError("No input files found in configuration or input directory.")
47
+ result : JsonDict = {}
48
+ for fname in self.input_files:
49
+ if not os.path.exists(fname):
50
+ fname_path = os.path.join(self.input_dir, fname)
51
+ if os.path.exists(fname_path):
52
+ fname = fname_path
53
+ else:
54
+ logging.warning(f"File {fname} not found. Skipping.")
55
+ continue
56
+ parser: IAdvancedAnalysisFileParser = AdvancedAnalysisFileParserFactory.get_parser(fname)
57
+ file_parsed_data = self.parse_by_config(parser)
58
+ result.update(file_parsed_data)
59
+ return result
60
+
61
+ def parse_by_config(self,parser : IAdvancedAnalysisFileParser) -> JsonDict:
62
+ caller_data: JsonDict = parser.parse()
63
+ return parser.build_result(caller_data)
64
+
65
+ @staticmethod
66
+ def _threshold_warning(w: JsonDict, parsed: JsonDict, value_key: str, threshold_key: str, label: str) -> str:
67
+ name = w.get("caller_name")
68
+ key = w.get(value_key)
69
+ threshold = w.get(threshold_key)
70
+ val = parsed.get(key or "")
71
+ if isinstance(val, (int, float)) and threshold is not None and val >= threshold:
72
+ return f"Based on {name}, sample {label} {val} ≥ {threshold}"
73
+ return ""
74
+
75
+ @staticmethod
76
+ def parse_args() -> argparse.Namespace:
77
+ parser = argparse.ArgumentParser(
78
+ description="Parse special callers outputs into unified JSON"
79
+ )
80
+ parser.add_argument("-c", "--config", required=False, help="Path to config JSON file")
81
+ parser.add_argument("--input-dir", help="Input directory")
82
+ parser.add_argument("--output-dir", help="Output directory")
83
+ parser.add_argument("--input-files", nargs='+', help="Input file(s)")
84
+ parser.add_argument("--output-json", help="Output JSON filename")
85
+ return parser.parse_args()
86
+
87
+ @classmethod
88
+ def from_cli(cls) -> None:
89
+ args = cls.parse_args()
90
+ config = {}
91
+ # Load config file if provided
92
+ if args.config:
93
+ with open(args.config, 'r') as f:
94
+ config = json.load(f)
95
+ # Override config with CLI args if provided
96
+ if args.input_dir:
97
+ config[AdvancedAnalysisConstants.INPUT_DIR] = args.input_dir
98
+ if args.output_dir:
99
+ config[AdvancedAnalysisConstants.OUTPUT_DIR] = args.output_dir
100
+ if args.input_files:
101
+ # If input files are provided, build map_files structure and set input_files
102
+ map_files = {}
103
+ for fname in args.input_files:
104
+ map_files[fname] = config.get('map_files', {}).get(fname, {})
105
+ config[AdvancedAnalysisConstants.MAP_FILES] = map_files
106
+ config['input_files'] = args.input_files
107
+ if args.output_json:
108
+ config[AdvancedAnalysisConstants.OUTPUT_JSON] = args.output_json
109
+ parser = cls(config)
110
+ parser.run()
111
+
112
+ if __name__ == "__main__":
113
+ import sys
114
+ from argparse import ArgumentParser
115
+ # Parse output-dir early to set up file logging
116
+ parser = ArgumentParser(add_help=False)
117
+ parser.add_argument("--output-dir", default=".")
118
+ args, _ = parser.parse_known_args()
119
+ log_path = os.path.join(args.output_dir, "advanced_analysis.log")
120
+ logging.basicConfig(
121
+ level=logging.INFO,
122
+ format="%(asctime)s %(levelname)s: %(message)s",
123
+ handlers=[
124
+ logging.StreamHandler(sys.stdout),
125
+ logging.FileHandler(log_path, mode="a", encoding="utf-8")
126
+ ]
127
+ )
128
+ logging.info(f"Logging to {log_path}")
129
+ AdvancedAnalysisParser.from_cli()
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+ class ConditionOperator(Enum):
3
+ EQ = "==" # equality
4
+ NE = "!=" # not equal
5
+ GT = ">" # greater than
6
+ LT = "<" # less than
7
+ GE = ">=" # ≥
8
+ LE = "<=" # ≤
9
+ CONTAINS = "in" # substring or membership
@@ -0,0 +1,29 @@
1
+ from typing import Any, Callable,Dict,Optional
2
+ from .ConditionOperator import ConditionOperator
3
+
4
+ class FieldCondition:
5
+ def __init__(self, operator: ConditionOperator, value: Any, message: str, field: Optional[str] = None):
6
+ self.operator = operator
7
+ self.value = value
8
+ self.message = message
9
+
10
+ def check(self, value: Any) -> bool:
11
+ func = OPERATOR_FUNCS[self.operator]
12
+ return func(value, self.value)
13
+
14
+ def __str__(self) -> str:
15
+ return self.message
16
+
17
+ # map each operator to a function that compares (field_value, target_value) → bool
18
+ OPERATOR_FUNCS: Dict[ConditionOperator, Callable[[Any, Any], bool]] = {
19
+ ConditionOperator.EQ: lambda a, b: a == b,
20
+ ConditionOperator.NE: lambda a, b: a != b,
21
+ ConditionOperator.GT: lambda a, b: isinstance(a, (int, float)) and a > b,
22
+ ConditionOperator.LT: lambda a, b: isinstance(a, (int, float)) and a < b,
23
+ ConditionOperator.GE: lambda a, b: isinstance(a, (int, float)) and a >= b,
24
+ ConditionOperator.LE: lambda a, b: isinstance(a, (int, float)) and a <= b,
25
+ ConditionOperator.CONTAINS: lambda a, b: (isinstance(a, str) and b in a)
26
+ or (isinstance(a, (list, set)) and b in a),
27
+ }
28
+
29
+
@@ -0,0 +1,4 @@
1
+ from typing import Optional
2
+ from .FieldCondition import FieldCondition
3
+ class FieldWarningConfig:
4
+ warning: Optional[FieldCondition] = None
@@ -0,0 +1,2 @@
1
+ from typing import Any, Dict
2
+ JsonDict = Dict[str, Any]
@@ -0,0 +1,8 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional,Dict
3
+ from .FieldCondition import FieldCondition
4
+
5
+ @dataclass
6
+ class SectionConfig:
7
+ include_all_fields: bool = False
8
+ fields: Optional[Dict[str, FieldCondition]] = None
@@ -0,0 +1,13 @@
1
+ from .JsonDict import JsonDict
2
+ from .FieldCondition import FieldCondition
3
+ from .ConditionOperator import ConditionOperator
4
+ from .FieldWarningConfig import FieldWarningConfig
5
+ from .SectionConfig import SectionConfig
6
+
7
+ __all__ = [
8
+ "JsonDict",
9
+ "ConditionOperator",
10
+ "SectionConfig",
11
+ "FieldCondition",
12
+ "FieldWarningConfig"
13
+ ]
@@ -0,0 +1,36 @@
1
+ from .IAdvancedAnalysisFileParser import IAdvancedAnalysisFileParser
2
+ from .DragenTruSightOncology500TSVParser import DragenTruSightOncology500TSVParser
3
+ from .OneLineTsvParser import OneLineTsvParser
4
+ from .JsonSectionParser import JsonSectionParser
5
+ from ..Models import JsonDict
6
+ class AdvancedAnalysisFileParserFactory:
7
+ """
8
+ Returns the correct IParser implementation based on file path.
9
+ """
10
+ @staticmethod
11
+ def get_parser( file_path: str) -> IAdvancedAnalysisFileParser:
12
+ import importlib.resources
13
+ import json
14
+ lower = file_path.lower()
15
+ section_config = {}
16
+ try:
17
+ if lower.endswith(".tsv"):
18
+ if lower.endswith("combinedvariantoutput.tsv"):
19
+ with importlib.resources.files('AdvancedAnalysisFileParser').joinpath('dragen_500_tsv_config.json').open('r', encoding='utf-8') as f:
20
+ section_config = json.load(f)
21
+ return DragenTruSightOncology500TSVParser(section_config, file_path)
22
+ if lower.endswith("smn.tsv"):
23
+ with importlib.resources.files('AdvancedAnalysisFileParser').joinpath('smn_tsv_config.json').open('r', encoding='utf-8') as f:
24
+ section_config = json.load(f)
25
+ return OneLineTsvParser(section_config, file_path)
26
+ if lower.endswith("gba.tsv"):
27
+ with importlib.resources.files('AdvancedAnalysisFileParser').joinpath('gba_tsv_config.json').open('r', encoding='utf-8') as f:
28
+ section_config = json.load(f)
29
+ return OneLineTsvParser(section_config, file_path)
30
+ if lower.endswith(".json"):
31
+ with importlib.resources.files('AdvancedAnalysisFileParser').joinpath('advConfig.json').open('r', encoding='utf-8') as f:
32
+ section_config = json.load(f)
33
+ return JsonSectionParser(section_config, file_path)
34
+ except Exception as e:
35
+ raise RuntimeError(f"Failed to load config for {file_path}: {e}")
36
+ raise ValueError(f"Unsupported file type: {file_path}")
@@ -0,0 +1,102 @@
1
+ import csv
2
+ import logging
3
+ from typing import Dict, Any, List, Optional, Union, Tuple
4
+ from .IAdvancedAnalysisFileParser import IAdvancedAnalysisFileParser
5
+ from ..Models import JsonDict, SectionConfig
6
+
7
+
8
+ class DragenTruSightOncology500TSVParser(IAdvancedAnalysisFileParser):
9
+ """
10
+ Parser for CombinedVariantOutput TSV.
11
+ Reads only configured sections and fields, streaming parse inside loader.
12
+ """
13
+ def parse(self) -> JsonDict:
14
+ """
15
+ Entry point: prepare section configs and delegate to loader-parsing function.
16
+ """
17
+ sections = self._prepare_configs(self.section_config)
18
+ return self._load_and_parse_sections(sections)
19
+
20
+ def _load_and_parse_sections(self, sections: Dict[str, Union[SectionConfig, dict]]) -> JsonDict:
21
+ """
22
+ Open TSV, detect requested sections, parse key/value and table rows
23
+ on the fly according to each section's fields or include_all flag.
24
+ Returns a JsonDict mapping section names to extracted data.
25
+ """
26
+ result: JsonDict = {}
27
+ headers: Dict[str, List[str]] = {}
28
+
29
+ # Track for each section its cfg, fields, include_all, and accumulated list
30
+ state: Dict[str, Tuple[JsonDict, bool]] = {}
31
+
32
+ current_section: Optional[str] = None
33
+ with open(self.file_path, 'r', newline='', encoding='utf-8') as f:
34
+ reader = csv.reader(f, delimiter='\t')
35
+ for raw in reader:
36
+ if not raw or raw[0].startswith('#'):
37
+ continue
38
+ row = [c.strip() for c in raw if c.strip()]
39
+ if not row:
40
+ continue
41
+ # Section header
42
+ if row[0].startswith('[') and row[0].endswith(']'):
43
+ sec = row[0].strip('[]')
44
+ if sec in sections:
45
+ current_section = sec
46
+ cfg = sections[sec]
47
+ fields, include_all = self._extract_fields(cfg)
48
+ state[sec] = (fields, include_all)
49
+ result[sec] = {}
50
+ headers.pop(sec, None)
51
+ else:
52
+ current_section = None
53
+ continue
54
+
55
+ if current_section is None:
56
+ continue
57
+
58
+ fields, include_all = state[current_section]
59
+ # Before header: KV pairs or header row
60
+ if current_section not in headers:
61
+ # KV pair
62
+ if len(row) == 2:
63
+ key, val = row
64
+ key_l = key
65
+ if include_all or key_l in fields:
66
+ result[current_section][key_l] = IAdvancedAnalysisFileParser._format_value(val)
67
+ continue
68
+ # Header row
69
+ hdr = [h for h in row]
70
+ if not include_all and fields:
71
+ allowed = set(f for f in fields)
72
+ hdr = [h for h in hdr if h in allowed]
73
+ headers[current_section] = hdr
74
+ continue
75
+
76
+ # Table row
77
+ hdr = headers[current_section]
78
+ for i in range(min(len(hdr), len(row))):
79
+ result[current_section].append(IAdvancedAnalysisFileParser._format_value(row[i]))
80
+
81
+ # Post-process: unwrap single KV-only sections
82
+ for sec, entries in list(result.items()):
83
+ hdr = headers.get(sec)
84
+ if hdr is None and len(entries) == 1:
85
+ result[sec] = entries
86
+ return result
87
+
88
+ def _prepare_configs(self, config: JsonDict) -> Dict[str, Union[SectionConfig, dict]]:
89
+ return {k: v for k, v in config.items()}
90
+
91
+ def _extract_fields(
92
+ self,
93
+ cfg: Union[SectionConfig, dict]
94
+ ) -> Tuple[JsonDict, bool]:
95
+ if isinstance(cfg, SectionConfig):
96
+ return cfg.fields or {}, cfg.include_all_fields
97
+ raw_fields = cfg.get('fields') or {}
98
+ include_all = cfg.get('include_all_fields', False)
99
+ raw_fields = {k: v for k, v in raw_fields.items()}
100
+ if isinstance(raw_fields, dict):
101
+ raw_fields = {k: None for k in raw_fields}
102
+ return raw_fields, include_all
@@ -0,0 +1,86 @@
1
+ from abc import ABC, abstractmethod
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from ..Warnings import IWarning
7
+
8
+ from AdvancedAnalysis.AdvancedAnalysisFileParser.AdvancedAnalysisConstants import AdvancedAnalysisConstants
9
+ from AdvancedAnalysis.AdvancedAnalysisFileParser.Warnings.WarningFactory import WarningFactory
10
+ from ..Models import JsonDict
11
+ class IAdvancedAnalysisFileParser(ABC):
12
+ section_config : JsonDict
13
+ file_config : JsonDict
14
+ def __init__(self,section_config: JsonDict, file_path: str) -> None:
15
+ self.section_config = section_config
16
+ if not file_path:
17
+ raise ValueError("Filename cannot be empty")
18
+ if not isinstance(file_path, str):
19
+ raise TypeError(f"Filename must be a string, got {type(filename).__name__}")
20
+ if not file_path.lower().endswith(('.json', '.tsv')):
21
+ raise ValueError("Filename must be in lowercase")
22
+ #check if the file exists on path
23
+ if not os.path.exists(file_path):
24
+ base = Path(__file__).resolve().parent.parent
25
+ file_path = os.path.join(base, file_path)
26
+ if not os.path.exists(file_path):
27
+ raise FileNotFoundError(f"File not found: {file_path}")
28
+ self.file_path: str = file_path
29
+
30
+ @abstractmethod
31
+ def parse(self) -> JsonDict:
32
+ pass
33
+
34
+ def build_result(self,data: JsonDict) -> JsonDict:
35
+ result: JsonDict = {}
36
+ for caller_name, caller_config in self.section_config.items():
37
+ caller_title = caller_config.get("caller_name", None)
38
+ fields = caller_config.get("fields", None)
39
+ fields = {k: v for k, v in fields.items()} if isinstance(fields, dict) else fields
40
+ caller_data = data.get(caller_name, {})
41
+ if fields is None:
42
+ caller_warning = caller_config.get(AdvancedAnalysisConstants.WARNING_KEY, None)
43
+ if caller_warning:
44
+ warning_formatter: IWarning = WarningFactory.get_formatter(caller_warning)
45
+ warning_text = warning_formatter.format(caller_warning, caller_data)
46
+ if result.get(caller_name.upper()) is None:
47
+ result[caller_name.upper()] = {"data": {}, "warning": {}}
48
+ result[caller_name.upper()]["caller_name"] = caller_title
49
+ result[caller_name.upper()]["data"] = caller_data
50
+ result[caller_name.upper()]["warning"] = warning_text
51
+ else:
52
+ if result.get(caller_name.upper()) is None:
53
+ result[caller_name.upper()] = {"data": {}}
54
+ result[caller_name.upper()]["caller_name"] = caller_title
55
+ result[caller_name.upper()]["data"] = caller_data
56
+ return result
57
+
58
+
59
+ @staticmethod
60
+ def _format_value(v: Any) -> Any:
61
+ if isinstance(v, dict):
62
+ return {k: IAdvancedAnalysisFileParser._format_value(v) for k, v in v.items()}
63
+ if isinstance(v, list):
64
+ clean = [IAdvancedAnalysisFileParser._format_value(x) for x in v if x not in ("", None)]
65
+ return clean or None
66
+ if isinstance(v, str):
67
+ low = v.lower()
68
+ if low == "true":
69
+ return True
70
+ if low == "false":
71
+ return False
72
+ if low in ("none", "null", ""):
73
+ return None
74
+ try:
75
+ return int(v)
76
+ except ValueError:
77
+ pass
78
+ try:
79
+ return round(float(v), 2)
80
+ except ValueError:
81
+ return v
82
+ if isinstance(v, float):
83
+ return round(v, 2)
84
+ #if isinstance(v, bool):
85
+ #if isinstance(v, int):
86
+ return v
@@ -0,0 +1,39 @@
1
+ import json
2
+ from .IAdvancedAnalysisFileParser import IAdvancedAnalysisFileParser
3
+ from ..Models import JsonDict
4
+ class JsonSectionParser(IAdvancedAnalysisFileParser):
5
+ """
6
+ Parse JSON files with a specific section into a unified format.
7
+ The section is specified by the 'json_key' or 'caller_key' in the config.
8
+ """
9
+ def parse(self) -> JsonDict:
10
+ data = JsonSectionParser._load_json(self.file_path)
11
+ result = {}
12
+ import logging
13
+ # Build a mapping of lowercased data keys to actual keys
14
+ data_key_map = {k.lower(): k for k in data.keys()}
15
+ for key in self.section_config.keys():
16
+ caller_data = {}
17
+ key_lc = key.lower()
18
+ if key_lc in data_key_map:
19
+ actual_key = data_key_map[key_lc]
20
+ section = data.get(actual_key, {})
21
+ if section == {}:
22
+ logging.warning(f"Section '{key}' is empty in the input data.")
23
+ for k, value in section.items():
24
+ if k == 'variants':
25
+ variant_list = []
26
+ for index, var in enumerate(value):
27
+ variant_list.append(IAdvancedAnalysisFileParser._format_value(var))
28
+ caller_data[k] = variant_list
29
+ else:
30
+ caller_data[k] = IAdvancedAnalysisFileParser._format_value(value)
31
+ else:
32
+ logging.warning(f"Key '{key}' from config not found in input data. Skipping.")
33
+ result[key] = caller_data
34
+ return result
35
+
36
+ @staticmethod
37
+ def _load_json(path: str) -> JsonDict:
38
+ with open(path, 'r') as f:
39
+ return json.load(f)
@@ -0,0 +1,26 @@
1
+ from typing import Any
2
+ from .IAdvancedAnalysisFileParser import IAdvancedAnalysisFileParser
3
+ from ..Models import JsonDict
4
+ # --- Parser Implementations ---
5
+ class OneLineTsvParser(IAdvancedAnalysisFileParser):
6
+ is_tsv_parser = True
7
+
8
+ def parse(self) -> JsonDict:
9
+ with open(self.file_path, 'r') as f:
10
+ headers = f.readline().strip().split('\t')
11
+ values = f.readline().strip().split('\t')
12
+ parsed = {h: IAdvancedAnalysisFileParser._format_value(v) for h, v in zip(headers, values)}
13
+
14
+ # Find the caller name from config (should match the file type, e.g., 'GBA' or 'SMN1')
15
+ import os
16
+ caller_name = None
17
+ filename_base = os.path.splitext(os.path.basename(self.file_path))[0].lower()
18
+ for key in self.section_config.keys():
19
+ if filename_base == key.lower():
20
+ caller_name = key
21
+ break
22
+ if not caller_name:
23
+ caller_name = next(iter(self.section_config.keys()))
24
+
25
+ # Only return parsed data under caller name
26
+ return {caller_name: parsed}
@@ -0,0 +1,7 @@
1
+ from .IAdvancedAnalysisFileParser import IAdvancedAnalysisFileParser
2
+ from .AdvancedAnalysisFileParserFactory import AdvancedAnalysisFileParserFactory
3
+
4
+ __all__ = [
5
+ "IAdvancedAnalysisFileParser",
6
+ "AdvancedAnalysisFileParserFactory",
7
+ ]