paraphrase 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. paraphrase-0.1.0/.github/CODEOWNERS +5 -0
  2. paraphrase-0.1.0/.github/pull_request_template.md +13 -0
  3. paraphrase-0.1.0/.github/workflows/keep_a_changelog.yml +15 -0
  4. paraphrase-0.1.0/.github/workflows/linting.yml +13 -0
  5. paraphrase-0.1.0/.github/workflows/release.yml +21 -0
  6. paraphrase-0.1.0/.github/workflows/test.yml +38 -0
  7. paraphrase-0.1.0/.gitignore +11 -0
  8. paraphrase-0.1.0/.pre-commit-config.yaml +18 -0
  9. paraphrase-0.1.0/.prettierignore +1 -0
  10. paraphrase-0.1.0/.python-version +1 -0
  11. paraphrase-0.1.0/CHANGELOG.md +25 -0
  12. paraphrase-0.1.0/LICENCE +21 -0
  13. paraphrase-0.1.0/PKG-INFO +74 -0
  14. paraphrase-0.1.0/README.md +61 -0
  15. paraphrase-0.1.0/pyproject.toml +42 -0
  16. paraphrase-0.1.0/src/__init__.py +3 -0
  17. paraphrase-0.1.0/src/paraphrase/__init__.py +1 -0
  18. paraphrase-0.1.0/src/paraphrase/config.py +17 -0
  19. paraphrase-0.1.0/src/paraphrase/constants.py +34 -0
  20. paraphrase-0.1.0/src/paraphrase/exceptions.py +28 -0
  21. paraphrase-0.1.0/src/paraphrase/io.py +108 -0
  22. paraphrase-0.1.0/src/paraphrase/main.py +92 -0
  23. paraphrase-0.1.0/src/paraphrase/pipeline.py +26 -0
  24. paraphrase-0.1.0/src/paraphrase/processors.py +145 -0
  25. paraphrase-0.1.0/src/paraphrase/rules_engine.py +251 -0
  26. paraphrase-0.1.0/src/paraphrase/utils.py +0 -0
  27. paraphrase-0.1.0/test-data/HG002.paraphase.json +139513 -0
  28. paraphrase-0.1.0/test-data/HG003.paraphase.json +135075 -0
  29. paraphrase-0.1.0/test-data/HG004.paraphase.json +140297 -0
  30. paraphrase-0.1.0/test-data/rules.yaml +64 -0
  31. paraphrase-0.1.0/tests/test_io_tsv.py +31 -0
  32. paraphrase-0.1.0/tests/test_processors.py +137 -0
  33. paraphrase-0.1.0/tests/test_rules_engine.py +63 -0
  34. paraphrase-0.1.0/tests/test_rules_not_empty.py +18 -0
  35. paraphrase-0.1.0/uv.lock +335 -0
@@ -0,0 +1,5 @@
1
+ # These owners will be the default owners for everything in
2
+ # the repo. Unless a later match takes precedence,
3
+ # @global-owner1 and @global-owner2 will be requested for
4
+ # review when someone opens a pull request.
5
+ * @Clinical-Genomics/rare-disease @dnil
@@ -0,0 +1,13 @@
1
+ ## Description
2
+
3
+ ### Added
4
+
5
+ -
6
+
7
+ ### Changed
8
+
9
+ -
10
+
11
+ ### Fixed
12
+
13
+ -
@@ -0,0 +1,15 @@
1
+ name: "Changelog Reminder"
2
+ on:
3
+ pull_request:
4
+ types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
5
+
6
+ jobs:
7
+ # Enforces the update of a changelog file on every pull request
8
+ changelog:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v6
12
+ - uses: dangoslen/changelog-enforcer@v3
13
+ with:
14
+ changeLogPath: "CHANGELOG.md"
15
+ skipLabels: "Skip-Changelog"
@@ -0,0 +1,13 @@
1
+ name: linting
2
+
3
+ on:
4
+ - pull_request
5
+
6
+ jobs:
7
+ lint:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/checkout@v6
11
+ - name: Run prek
12
+ id: prek
13
+ uses: j178/prek-action@v1
@@ -0,0 +1,21 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - v*
7
+
8
+ jobs:
9
+ pypi:
10
+ name: Build and publish to PyPI
11
+ runs-on: ubuntu-latest
12
+ # Environment and permissions trusted publishing.
13
+ environment:
14
+ name: release
15
+ permissions:
16
+ id-token: write
17
+ steps:
18
+ - uses: actions/checkout@v6
19
+ - uses: astral-sh/setup-uv@v7
20
+ - run: uv build
21
+ - run: uv publish --trusted-publishing always
@@ -0,0 +1,38 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - uses: actions/checkout@v6
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v7
20
+ with:
21
+ version: "latest"
22
+
23
+ - name: Set up Python
24
+ uses: actions/setup-python@v6
25
+ with:
26
+ python-version-file: ".python-version"
27
+
28
+ - name: Install dependencies
29
+ run: uv sync
30
+
31
+ - name: Run tests with coverage
32
+ run: uv run pytest --cov=src --cov-report=xml --cov-report=term
33
+
34
+ - name: Upload coverage to Codecov
35
+ uses: codecov/codecov-action@v4
36
+ with:
37
+ file: ./coverage.xml
38
+ fail_ci_if_error: false
@@ -0,0 +1,11 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ data/
6
+ dist/
7
+ wheels/
8
+ *.egg-info
9
+
10
+ # Virtual environments
11
+ .venv
@@ -0,0 +1,18 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/mirrors-prettier
3
+ rev: "v3.1.0"
4
+ hooks:
5
+ - id: prettier
6
+ additional_dependencies:
7
+ - prettier
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v6.0.0
10
+ hooks:
11
+ - id: trailing-whitespace
12
+ args: [--markdown-linebreak-ext=md]
13
+ - id: end-of-file-fixer
14
+ - repo: https://github.com/astral-sh/ruff-pre-commit
15
+ rev: v0.15.2
16
+ hooks:
17
+ - id: ruff-check
18
+ - id: ruff-format
@@ -0,0 +1 @@
1
+ test-data/HG*
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,25 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## v0.1.0 [2026-02-24]
9
+
10
+ ### Added
11
+
12
+ - [#2](https://github.com/Clinical-Genomics/paraphrase/pull/2) - LICENCE
13
+ - [#2](https://github.com/Clinical-Genomics/paraphrase/pull/2) - Paraphase JSON parsing
14
+ - [#2](https://github.com/Clinical-Genomics/paraphrase/pull/2) - Testing framework
15
+ - [#3](https://github.com/Clinical-Genomics/paraphrase/pull/3) - CHANGELOG and reminder workflow
16
+ - [#3](https://github.com/Clinical-Genomics/paraphrase/pull/3) - CODEOWNERS and PR template
17
+ - [#4](https://github.com/Clinical-Genomics/paraphrase/pull/4) - Paraphase test data
18
+ - [#7](https://github.com/Clinical-Genomics/paraphrase/pull/7) - Release workflow
19
+ - [#11](https://github.com/Clinical-Genomics/paraphrase/pull/11) - Pre-commit hooks and GitHub actions workflow for prettier and ruff
20
+
21
+ ### Changed
22
+
23
+ - [#8](https://github.com/Clinical-Genomics/paraphrase/pull/8) - Phase regions to output
24
+ - [#9](https://github.com/Clinical-Genomics/paraphrase/pull/9) - Rework nested separators for TSV output
25
+ - [#10](https://github.com/Clinical-Genomics/paraphrase/pull/10) - Updated CODEOWNERS
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Felix Lenner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: paraphrase
3
+ Version: 0.1.0
4
+ Summary: Parse paraphase JSONs
5
+ Author-email: Felix Lenner <felix.lenner@scilifelab.se>
6
+ License: MIT License
7
+ License-File: LICENCE
8
+ Requires-Python: >=3.12
9
+ Requires-Dist: coloredlogs>=15.0.1
10
+ Requires-Dist: pyyaml>=6.0.3
11
+ Requires-Dist: typer>=0.20.0
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Paraphrase
15
+
16
+ Parse paraphase JSONs.
17
+
18
+ ## Usage
19
+
20
+ ```
21
+ Usage: paraphrase [OPTIONS]
22
+
23
+ Parse paraphase JSONs.
24
+
25
+ ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
26
+ │ * --input -f FILE Input JSON files (can be multiple) [required] │
27
+ │ * --sample -s TEXT Sample names corresponding to input JSON files [required] │
28
+ │ --rules -r FILE Optional YAML file with per-gene classification rules (adds 'status' fields) │
29
+ │ --skip-keys TEXT Comma-separated keys to skip (e.g. region_depth,final_haplotypes) │
30
+ │ --genes TEXT Optional comma-separated list of gene names to process │
31
+ │ --output-format -o TEXT Output format: 'json' (default) or 'tsv' [default: json] │
32
+ │ --help Show this message and exit. │
33
+ ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
34
+ ```
35
+
36
+ Example command:
37
+
38
+ ```
39
+ uv run paraphrase \
40
+ --input test-data/HG002.paraphase.json \
41
+ --input test-data/HG003.paraphase.json \
42
+ --input test-data/HG004.paraphase.json \
43
+ --sample HG002 \
44
+ --sample HG003 \
45
+ --sample HG004 \
46
+ --rules test-data/rules.yaml \
47
+ --genes CFH,CFHR3,f8,GBA,hba,ikbkg,ncf1,neb,opn1lw,pms2,rccx,smn1,strc
48
+ ```
49
+
50
+ ## Rules YAML (per-gene status classification)
51
+
52
+ Rules are evaluated per gene. Conditions within a single `when` mapping are
53
+ combined with logical AND (all must be true). If multiple rules match, you can
54
+ provide a `status_order` to pick the most severe (last wins by order).
55
+
56
+ Example:
57
+
58
+ ```yaml
59
+ smn1:
60
+ # Optional; defaults to "normal" if omitted
61
+ default_status: normal
62
+ # Optional; defaults to [normal, intermediate, pathological] if omitted
63
+ status_order: [normal, intermediate, pathological]
64
+ rules:
65
+ - status: intermediate
66
+ when:
67
+ smn1_cn: 0
68
+ smn2_cn: { ">=": 4 }
69
+ reason: "SMN1 deleted but SMN2 high"
70
+ - status: pathological
71
+ when:
72
+ smn1_cn: { "<": 2 }
73
+ reason: "SMN1 copy number low"
74
+ ```
@@ -0,0 +1,61 @@
1
+ # Paraphrase
2
+
3
+ Parse paraphase JSONs.
4
+
5
+ ## Usage
6
+
7
+ ```
8
+ Usage: paraphrase [OPTIONS]
9
+
10
+ Parse paraphase JSONs.
11
+
12
+ ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
13
+ │ * --input -f FILE Input JSON files (can be multiple) [required] │
14
+ │ * --sample -s TEXT Sample names corresponding to input JSON files [required] │
15
+ │ --rules -r FILE Optional YAML file with per-gene classification rules (adds 'status' fields) │
16
+ │ --skip-keys TEXT Comma-separated keys to skip (e.g. region_depth,final_haplotypes) │
17
+ │ --genes TEXT Optional comma-separated list of gene names to process │
18
+ │ --output-format -o TEXT Output format: 'json' (default) or 'tsv' [default: json] │
19
+ │ --help Show this message and exit. │
20
+ ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
21
+ ```
22
+
23
+ Example command:
24
+
25
+ ```
26
+ uv run paraphrase \
27
+ --input test-data/HG002.paraphase.json \
28
+ --input test-data/HG003.paraphase.json \
29
+ --input test-data/HG004.paraphase.json \
30
+ --sample HG002 \
31
+ --sample HG003 \
32
+ --sample HG004 \
33
+ --rules test-data/rules.yaml \
34
+ --genes CFH,CFHR3,f8,GBA,hba,ikbkg,ncf1,neb,opn1lw,pms2,rccx,smn1,strc
35
+ ```
36
+
37
+ ## Rules YAML (per-gene status classification)
38
+
39
+ Rules are evaluated per gene. Conditions within a single `when` mapping are
40
+ combined with logical AND (all must be true). If multiple rules match, you can
41
+ provide a `status_order` to pick the most severe (last wins by order).
42
+
43
+ Example:
44
+
45
+ ```yaml
46
+ smn1:
47
+ # Optional; defaults to "normal" if omitted
48
+ default_status: normal
49
+ # Optional; defaults to [normal, intermediate, pathological] if omitted
50
+ status_order: [normal, intermediate, pathological]
51
+ rules:
52
+ - status: intermediate
53
+ when:
54
+ smn1_cn: 0
55
+ smn2_cn: { ">=": 4 }
56
+ reason: "SMN1 deleted but SMN2 high"
57
+ - status: pathological
58
+ when:
59
+ smn1_cn: { "<": 2 }
60
+ reason: "SMN1 copy number low"
61
+ ```
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "paraphrase"
3
+ version = "0.1.0"
4
+ description = "Parse paraphase JSONs"
5
+ authors = [{name="Felix Lenner", email="felix.lenner@scilifelab.se"}]
6
+ license = {text = "MIT License"}
7
+ readme = "README.md"
8
+ requires-python = ">=3.12"
9
+ dependencies = [
10
+ "coloredlogs>=15.0.1",
11
+ "pyyaml>=6.0.3",
12
+ "typer>=0.20.0",
13
+ ]
14
+
15
+ [project.scripts]
16
+ paraphrase = "paraphrase.main:app"
17
+
18
+ [build-system]
19
+ requires = ["hatchling"]
20
+ build-backend = "hatchling.build"
21
+
22
+ [dependency-groups]
23
+ dev = [
24
+ "pytest-cov>=7.0.0",
25
+ ]
26
+
27
+ [tool.pytest.ini_options]
28
+ testpaths = ["tests"]
29
+ addopts = "--cov=src --cov-report=term-missing --cov-report=html"
30
+
31
+ [tool.coverage.run]
32
+ source = ["src"]
33
+ omit = ["*/tests/*", "*/__pycache__/*"]
34
+
35
+ [tool.coverage.report]
36
+ exclude_lines = [
37
+ "pragma: no cover",
38
+ "def __repr__",
39
+ "raise AssertionError",
40
+ "raise NotImplementedError",
41
+ "if __name__ == .__main__.:",
42
+ ]
@@ -0,0 +1,3 @@
1
+ from importlib.metadata import version
2
+
3
+ __version__ = version("paraphrase")
@@ -0,0 +1 @@
1
+ from .main import main as main
@@ -0,0 +1,17 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional, Set, Dict, Any
3
+
4
+
5
+ @dataclass
6
+ class ProcessingConfig:
7
+ """
8
+ Configuration for how to process paraphase JSONs.
9
+
10
+ - skip_keys: keys under each gene that should be ignored entirely.
11
+ - genes_list: optional list of genes to keep; if None, keep all.
12
+ - rules: optional per-gene classification rules YAML structure.
13
+ """
14
+
15
+ skip_keys: Set[str]
16
+ genes_list: Optional[List[str]] = None
17
+ rules: Optional[Dict[str, Any]] = None
@@ -0,0 +1,34 @@
1
+ DEFAULT_SKIP_KEYS: frozenset[str] = frozenset(
2
+ [
3
+ "alleles_all_haplotypes",
4
+ "assembled_haplotypes",
5
+ "del_read_number",
6
+ "directional_links",
7
+ "final_haplotypes",
8
+ "first_copies",
9
+ "flanking_summary",
10
+ "gene_reads",
11
+ "haplotype_details",
12
+ "haplotype_links",
13
+ "het_sites_not_used_in_phasing",
14
+ "heterozygous_sites",
15
+ "highest_total_cn",
16
+ "homozygous_sites",
17
+ "intergenic_depth",
18
+ "last_copies",
19
+ "links_loose",
20
+ "linked_haplotypes",
21
+ "middle_copies",
22
+ "nonunique_supporting_reads",
23
+ "phasing_success",
24
+ "pseudo_reads",
25
+ "raw_alleles",
26
+ "read_details",
27
+ "sample_sex",
28
+ "smn1_read_number",
29
+ "smn2_read_number",
30
+ "smn_del78_read_number",
31
+ "sites_for_phasing",
32
+ "unique_supporting_reads",
33
+ ]
34
+ )
@@ -0,0 +1,28 @@
1
+ class InputMismatchError(Exception):
2
+ """Raised when the number of input files does not match the number of sample names."""
3
+
4
+ pass
5
+
6
+
7
+ class YAMLLoadError(Exception):
8
+ """Raised when a YAML file cannot be loaded."""
9
+
10
+ pass
11
+
12
+
13
+ class JSONLoadError(Exception):
14
+ """Raised when a JSON file cannot be loaded."""
15
+
16
+ pass
17
+
18
+
19
+ class InvalidOperatorError(Exception):
20
+ """Raised when an invalid operator is encountered in normal value processing."""
21
+
22
+ pass
23
+
24
+
25
+ class ListOpNotSupportedError(Exception):
26
+ """Raised when an unsupported operation is attempted on a list."""
27
+
28
+ pass
@@ -0,0 +1,108 @@
1
+ from pathlib import Path
2
+ import coloredlogs
3
+ import yaml
4
+ import json
5
+ import logging
6
+ from .exceptions import JSONLoadError, YAMLLoadError
7
+ from typing import Dict
8
+
9
+ coloredlogs.install(level="INFO")
10
+ logging.basicConfig(level=logging.DEBUG)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def load_yaml(file: Path):
15
+ try:
16
+ with file.open("r", encoding="utf-8") as f:
17
+ return yaml.safe_load(f)
18
+ except Exception as e:
19
+ raise YAMLLoadError(f"Failed to read YAML file {file}: {e}")
20
+
21
+
22
+ def load_json(file: Path):
23
+ """Load one JSON file and process its contents."""
24
+ try:
25
+ with file.open("r", encoding="utf-8") as f:
26
+ return json.load(f)
27
+ except Exception as e:
28
+ raise JSONLoadError(f"Failed to read JSON file {file}: {e}")
29
+
30
+
31
+ def print_tsv(json_data: Dict) -> None:
32
+ """
33
+ Print results in TSV format.
34
+ """
35
+ print("sample\tlocus\tstatus\tmetric\tvalue")
36
+
37
+ for sample, locus in json_data.items():
38
+ for locus, locus_info in locus.items():
39
+ # Gene-level status comes from the rules engine; if no rules were
40
+ # defined or no status was set, report it as "unknown"
41
+ locus_status = locus_info.get("status")
42
+ if not isinstance(locus_status, str):
43
+ locus_status = "unknown"
44
+
45
+ # Iterate over locus information, e.g. region_depth, final_haplotypes, etc.
46
+ for locus_metric, locus_metric_value in locus_info.items():
47
+ # Do not emit the per-gene status or rule-match metadata as separate rows
48
+ if locus_metric in {"status", "status_matches"}:
49
+ continue
50
+ prettified_value = stringify_value(locus_metric_value)
51
+ print(
52
+ f"{sample}\t{locus}\t{locus_status}\t{locus_metric}\t{prettified_value}"
53
+ )
54
+
55
+
56
+ def stringify_value(content) -> str | None:
57
+ """
58
+ Flatten nested dicts/lists for TSV output.
59
+
60
+ Rules:
61
+ - Skip empty or None values.
62
+ - Dicts: key:value for top-level, key=subkey=subvalue for nested dicts
63
+ - Lists:
64
+ * Lists of simple values (str, int, float) are joined by commas.
65
+ * Lists of lists -> join inner list by '|', then outer by ','
66
+ """
67
+ if content in (None, [], {}):
68
+ return None
69
+
70
+ if isinstance(content, (int, float, str)):
71
+ return str(content)
72
+
73
+ if isinstance(content, list):
74
+ # If it's a list of simple values, join by commas
75
+ if all(not isinstance(item, list) for item in content):
76
+ flat = [str(element) for element in content if element is not None]
77
+ return ",".join(flat) if flat else None
78
+ else:
79
+ # Lists with sublists
80
+ flat = []
81
+ for item in content:
82
+ if isinstance(item, list):
83
+ if inner := [
84
+ str(element) for element in item if element is not None
85
+ ]:
86
+ flat.append("|".join(inner))
87
+ elif item is not None:
88
+ flat.append(str(item))
89
+ return ",".join(flat) if flat else None
90
+
91
+ if isinstance(content, dict):
92
+ flat = []
93
+ for key, value in content.items():
94
+ if value in (None, [], {}):
95
+ continue
96
+ if isinstance(value, dict):
97
+ sub_items = []
98
+ for subkey, subvalue in value.items():
99
+ if prettified_subvalue := stringify_value(subvalue):
100
+ sub_items.append(f"{subkey}={prettified_subvalue}")
101
+ if sub_items:
102
+ flat.append(f"{key}:{'|'.join(sub_items)}")
103
+ else:
104
+ if prettified_string := stringify_value(value):
105
+ flat.append(f"{key}:{prettified_string}")
106
+ return ",".join(flat) if flat else None
107
+
108
+ return str(content)
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+ import typer
6
+ from .constants import DEFAULT_SKIP_KEYS
7
+ from .pipeline import merge_and_process, assert_equal_inputs_and_samples
8
+ from .io import load_json, load_yaml, print_tsv
9
+ from .exceptions import InputMismatchError
10
+ from .config import ProcessingConfig
11
+
12
+ app = typer.Typer(
13
+ rich_markup_mode="rich",
14
+ invoke_without_command=True,
15
+ pretty_exceptions_show_locals=False,
16
+ add_completion=False,
17
+ help="Call coverage drops over alleles in STR VCFs",
18
+ )
19
+
20
+
21
+ @app.command()
22
+ def main(
23
+ input: List[Path] = typer.Option(
24
+ ...,
25
+ "--input",
26
+ "-f",
27
+ exists=True,
28
+ file_okay=True,
29
+ dir_okay=False,
30
+ help="Input JSON files (can be multiple)",
31
+ ),
32
+ sample: List[str] = typer.Option(
33
+ ...,
34
+ "--sample",
35
+ "-s",
36
+ help="Sample names corresponding to input JSON files",
37
+ ),
38
+ rules_yaml: Optional[Path] = typer.Option(
39
+ None,
40
+ "--rules",
41
+ "-r",
42
+ exists=True,
43
+ file_okay=True,
44
+ dir_okay=False,
45
+ help="Optional YAML file with per-gene classification rules (adds 'status' fields)",
46
+ ),
47
+ skip_keys: str = typer.Option(
48
+ None, help="Comma-separated keys to skip (e.g. region_depth,final_haplotypes)"
49
+ ),
50
+ genes: Optional[str] = typer.Option(
51
+ None, help="Optional comma-separated list of gene names to process"
52
+ ),
53
+ output_format: str = typer.Option(
54
+ "json", "--output-format", "-o", help="Output format: 'json' (default) or 'tsv'"
55
+ ),
56
+ ):
57
+ """
58
+ Parse paraphase JSONs.
59
+ """
60
+ try:
61
+ # Parse and validate parameters
62
+ skip_keys_list = (
63
+ [k.strip() for k in skip_keys.split(",")]
64
+ if skip_keys
65
+ else list(DEFAULT_SKIP_KEYS)
66
+ )
67
+ genes_list = [g.strip().lower() for g in genes.split(",")] if genes else None
68
+ assert_equal_inputs_and_samples(input, sample)
69
+
70
+ # Get input files
71
+ rules = load_yaml(rules_yaml) if rules_yaml else None
72
+ json_data_list = [load_json(f) for f in input]
73
+
74
+ config = ProcessingConfig(
75
+ skip_keys=set(skip_keys_list),
76
+ genes_list=genes_list,
77
+ rules=rules,
78
+ )
79
+
80
+ merged_data = merge_and_process(json_data_list, sample, config)
81
+
82
+ if output_format.lower() == "tsv":
83
+ print_tsv(merged_data)
84
+ else:
85
+ typer.echo(json.dumps(merged_data, indent=2))
86
+ except InputMismatchError as e:
87
+ typer.echo(f"[error] {e}", err=True)
88
+ raise typer.Exit(code=1)
89
+
90
+
91
+ if __name__ == "__main__":
92
+ app()