cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cg/__init__.py +1 -1
- cg/apps/housekeeper/hk.py +18 -1
- cg/apps/tb/api.py +42 -5
- cg/cli/transfer.py +13 -2
- cg/cli/upload/mutacc.py +16 -3
- cg/cli/upload/scout.py +2 -2
- cg/cli/upload/utils.py +10 -1
- cg/cli/workflow/balsamic/base.py +86 -172
- cg/cli/workflow/balsamic/options.py +3 -48
- cg/cli/workflow/balsamic/umi.py +210 -15
- cg/cli/workflow/microsalt/base.py +4 -2
- cg/cli/workflow/mip_dna/base.py +1 -1
- cg/cli/workflow/nallo/base.py +73 -23
- cg/cli/workflow/nf_analysis.py +5 -207
- cg/cli/workflow/raredisease/base.py +41 -54
- cg/cli/workflow/rnafusion/base.py +38 -8
- cg/cli/workflow/taxprofiler/base.py +31 -18
- cg/cli/workflow/tomte/base.py +83 -10
- cg/constants/constants.py +25 -30
- cg/constants/devices.py +6 -1
- cg/constants/gene_panel.py +3 -1
- cg/constants/housekeeper_tags.py +28 -28
- cg/constants/lims.py +4 -0
- cg/constants/nf_analysis.py +0 -1
- cg/constants/observations.py +21 -5
- cg/constants/orderforms.py +3 -3
- cg/constants/pacbio.py +1 -0
- cg/constants/priority.py +1 -1
- cg/constants/report.py +1 -0
- cg/constants/scout.py +12 -9
- cg/constants/sequencing.py +2 -2
- cg/constants/tb.py +5 -5
- cg/exc.py +27 -5
- cg/meta/compress/compress.py +7 -2
- cg/meta/delivery_report/balsamic.py +3 -1
- cg/meta/delivery_report/delivery_report_api.py +4 -3
- cg/meta/delivery_report/nallo.py +11 -11
- cg/meta/delivery_report/raredisease.py +7 -3
- cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
- cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
- cg/meta/observations/balsamic_observations_api.py +110 -14
- cg/meta/observations/mip_dna_observations_api.py +1 -1
- cg/meta/observations/nallo_observations_api.py +1 -1
- cg/meta/observations/observations_api.py +23 -32
- cg/meta/observations/raredisease_observations_api.py +1 -1
- cg/meta/tar/tar.py +5 -2
- cg/meta/transfer/lims.py +32 -3
- cg/meta/upload/balsamic/balsamic.py +1 -8
- cg/meta/upload/coverage.py +5 -5
- cg/meta/upload/raredisease/raredisease.py +3 -0
- cg/meta/upload/scout/hk_tags.py +1 -0
- cg/meta/upload/scout/nallo_config_builder.py +31 -7
- cg/meta/workflow/balsamic.py +70 -36
- cg/meta/workflow/fastq.py +8 -0
- cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
- cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
- cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
- cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
- cg/meta/workflow/nallo.py +21 -99
- cg/meta/workflow/nf_analysis.py +12 -263
- cg/meta/workflow/raredisease.py +3 -112
- cg/meta/workflow/rnafusion.py +2 -34
- cg/meta/workflow/taxprofiler.py +2 -38
- cg/meta/workflow/tomte.py +2 -42
- cg/models/balsamic/config.py +0 -24
- cg/models/balsamic/metrics.py +5 -3
- cg/models/cg_config.py +39 -16
- cg/models/deliverables/metric_deliverables.py +1 -1
- cg/models/delivery_report/metadata.py +2 -1
- cg/models/nallo/nallo.py +14 -64
- cg/models/nf_analysis.py +1 -41
- cg/models/raredisease/raredisease.py +1 -63
- cg/models/rnafusion/rnafusion.py +0 -26
- cg/models/scout/scout_load_config.py +5 -2
- cg/models/taxprofiler/taxprofiler.py +0 -42
- cg/models/tomte/tomte.py +0 -69
- cg/resources/nallo_bundle_filenames.yaml +292 -22
- cg/resources/raredisease_bundle_filenames.yaml +11 -1
- cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
- cg/server/admin.py +106 -25
- cg/server/app.py +15 -4
- cg/server/endpoints/sequencing_run/dtos.py +21 -3
- cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
- cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
- cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
- cg/services/analysis_starter/configurator/abstract_model.py +8 -0
- cg/services/analysis_starter/configurator/configurator.py +1 -1
- cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
- cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
- cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
- cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
- cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
- cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
- cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
- cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
- cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
- cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
- cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
- cg/services/analysis_starter/constants.py +2 -0
- cg/services/analysis_starter/factories/configurator_factory.py +131 -51
- cg/services/analysis_starter/factories/starter_factory.py +36 -7
- cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
- cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
- cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
- cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
- cg/services/analysis_starter/submitters/submitter.py +1 -1
- cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
- cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
- cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
- cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
- cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
- cg/services/analysis_starter/tracker/tracker.py +19 -15
- cg/services/deliver_files/factory.py +1 -1
- cg/services/delivery_message/messages/__init__.py +24 -14
- cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
- cg/services/delivery_message/utils.py +4 -40
- cg/services/illumina/backup/backup_service.py +29 -7
- cg/services/orders/validation/constants.py +3 -0
- cg/services/orders/validation/index_sequences.py +558 -0
- cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
- cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
- cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
- cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
- cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
- cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
- cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
- cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
- cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
- cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
- cg/store/crud/create.py +73 -42
- cg/store/crud/read.py +73 -7
- cg/store/crud/update.py +14 -3
- cg/store/models.py +98 -35
- cg/store/store.py +8 -1
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
- cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
- cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
cg/meta/workflow/tomte.py
CHANGED
|
@@ -4,20 +4,14 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
|
-
from cg.constants.constants import GenomeVersion
|
|
7
|
+
from cg.constants.constants import GenomeVersion
|
|
8
8
|
from cg.constants.nf_analysis import TOMTE_METRIC_CONDITIONS
|
|
9
9
|
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
10
10
|
from cg.models.analysis import NextflowAnalysis
|
|
11
11
|
from cg.models.cg_config import CGConfig
|
|
12
12
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
13
|
-
from cg.models.tomte.tomte import
|
|
14
|
-
TomteParameters,
|
|
15
|
-
TomteQCMetrics,
|
|
16
|
-
TomteSampleSheetEntry,
|
|
17
|
-
TomteSampleSheetHeaders,
|
|
18
|
-
)
|
|
13
|
+
from cg.models.tomte.tomte import TomteQCMetrics
|
|
19
14
|
from cg.resources import TOMTE_BUNDLE_FILENAMES_PATH
|
|
20
|
-
from cg.store.models import CaseSample
|
|
21
15
|
|
|
22
16
|
LOG = logging.getLogger(__name__)
|
|
23
17
|
|
|
@@ -48,45 +42,11 @@ class TomteAnalysisAPI(NfAnalysisAPI):
|
|
|
48
42
|
self.revision: str = config.tomte.revision
|
|
49
43
|
self.nextflow_binary_path: str = config.tomte.binary_path
|
|
50
44
|
|
|
51
|
-
@property
|
|
52
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
53
|
-
"""Headers for sample sheet."""
|
|
54
|
-
return TomteSampleSheetHeaders.list()
|
|
55
|
-
|
|
56
|
-
@property
|
|
57
|
-
def is_gene_panel_required(self) -> bool:
|
|
58
|
-
"""Return True if a gene panel is needs to be created using the information in StatusDB and exporting it from Scout."""
|
|
59
|
-
return True
|
|
60
|
-
|
|
61
45
|
@staticmethod
|
|
62
46
|
def get_bundle_filenames_path() -> Path:
|
|
63
47
|
"""Return path to bundle template."""
|
|
64
48
|
return TOMTE_BUNDLE_FILENAMES_PATH
|
|
65
49
|
|
|
66
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
67
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
68
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
69
|
-
sample=case_sample.sample
|
|
70
|
-
)
|
|
71
|
-
sample_sheet_entry = TomteSampleSheetEntry(
|
|
72
|
-
case_id=case_sample.case.internal_id,
|
|
73
|
-
name=case_sample.sample.internal_id,
|
|
74
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
75
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
76
|
-
strandedness=Strandedness.REVERSE,
|
|
77
|
-
)
|
|
78
|
-
return sample_sheet_entry.reformat_sample_content
|
|
79
|
-
|
|
80
|
-
def get_built_workflow_parameters(self, case_id: str, dry_run: bool = False) -> TomteParameters:
|
|
81
|
-
"""Return parameters."""
|
|
82
|
-
return TomteParameters(
|
|
83
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
84
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
85
|
-
gene_panel_clinical_filter=self.get_gene_panels_path(case_id=case_id),
|
|
86
|
-
tissue=self.get_case_source_type(case_id=case_id),
|
|
87
|
-
genome=self.get_genome_build(case_id=case_id),
|
|
88
|
-
)
|
|
89
|
-
|
|
90
50
|
def get_genome_build(self, case_id: str) -> str:
|
|
91
51
|
return GenomeVersion.HG38
|
|
92
52
|
|
cg/models/balsamic/config.py
CHANGED
|
@@ -38,28 +38,6 @@ class BalsamicConfigSample(BaseModel):
|
|
|
38
38
|
fastq_info: dict[str, dict[str, Path]]
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class BalsamicConfigReference(BaseModel):
|
|
42
|
-
"""Metadata of reference files.
|
|
43
|
-
|
|
44
|
-
Attributes:
|
|
45
|
-
reference_genome: reference genome fasta file
|
|
46
|
-
reference_genome_version: reference genome build version
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
reference_genome: Path
|
|
50
|
-
reference_genome_version: str | None = Field(default=None, validate_default=True)
|
|
51
|
-
|
|
52
|
-
@field_validator("reference_genome_version")
|
|
53
|
-
@classmethod
|
|
54
|
-
def extract_genome_version_from_path(cls, _, info: ValidationInfo) -> str:
|
|
55
|
-
"""
|
|
56
|
-
Return the genome version from the reference path:
|
|
57
|
-
/home/proj/stage/cancer/balsamic_cache/X.X.X/hg19/genome/human_g1k_v37.fasta
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
return str(info.data.get("reference_genome")).split("/")[-3]
|
|
61
|
-
|
|
62
|
-
|
|
63
41
|
class BalsamicConfigPanel(BaseModel):
|
|
64
42
|
"""Balsamic attributes of a panel BED file.
|
|
65
43
|
|
|
@@ -134,13 +112,11 @@ class BalsamicConfigJSON(BaseModel):
|
|
|
134
112
|
Attributes:
|
|
135
113
|
analysis: config analysis attributes
|
|
136
114
|
samples: sample attributes associated to a specific case
|
|
137
|
-
reference: BALSAMIC build reference
|
|
138
115
|
panel: panel attributes (targeted analysis exclusively)
|
|
139
116
|
"""
|
|
140
117
|
|
|
141
118
|
analysis: BalsamicConfigAnalysis
|
|
142
119
|
samples: list[BalsamicConfigSample]
|
|
143
|
-
reference: BalsamicConfigReference
|
|
144
120
|
panel: BalsamicConfigPanel | None = None
|
|
145
121
|
QC: BalsamicConfigQC
|
|
146
122
|
vcf: dict[str, BalsamicVarCaller]
|
cg/models/balsamic/metrics.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from pydantic import field_validator
|
|
1
|
+
from pydantic import AfterValidator, field_validator
|
|
2
|
+
from typing_extensions import Annotated
|
|
2
3
|
|
|
3
4
|
from cg.models.deliverables.metric_deliverables import MetricCondition, MetricsBase
|
|
5
|
+
from cg.models.delivery_report.validators import get_sex_as_string
|
|
4
6
|
from cg.models.qc_metrics import QCMetrics
|
|
5
7
|
|
|
6
8
|
|
|
@@ -25,7 +27,9 @@ class BalsamicQCMetrics(QCMetrics):
|
|
|
25
27
|
|
|
26
28
|
fold_80_base_penalty: float | None = None
|
|
27
29
|
mean_insert_size: float | None = None
|
|
30
|
+
median_target_coverage: float | None = None
|
|
28
31
|
percent_duplication: float | None = None
|
|
32
|
+
compare_predicted_to_given_sex: Annotated[str | None, AfterValidator(get_sex_as_string)] = None
|
|
29
33
|
|
|
30
34
|
_percent_duplication: float = field_validator("percent_duplication")(percent_value_validation)
|
|
31
35
|
|
|
@@ -34,7 +38,6 @@ class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
|
|
|
34
38
|
"""BALSAMIC targeted QC metrics"""
|
|
35
39
|
|
|
36
40
|
mean_target_coverage: float | None = None
|
|
37
|
-
median_target_coverage: float | None = None
|
|
38
41
|
pct_target_bases_50x: float | None = None
|
|
39
42
|
pct_target_bases_100x: float | None = None
|
|
40
43
|
pct_target_bases_250x: float | None = None
|
|
@@ -56,7 +59,6 @@ class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
|
|
|
56
59
|
class BalsamicWGSQCMetrics(BalsamicQCMetrics):
|
|
57
60
|
"""BALSAMIC WHOLE_GENOME_SEQUENCING QC metrics"""
|
|
58
61
|
|
|
59
|
-
median_coverage: float | None = None
|
|
60
62
|
pct_15x: float | None = None
|
|
61
63
|
pct_30x: float | None = None
|
|
62
64
|
pct_60x: float | None = None
|
cg/models/cg_config.py
CHANGED
|
@@ -22,7 +22,7 @@ from cg.apps.tb import TrailblazerAPI
|
|
|
22
22
|
from cg.clients.arnold.api import ArnoldAPIClient
|
|
23
23
|
from cg.clients.chanjo2.client import Chanjo2APIClient
|
|
24
24
|
from cg.clients.janus.api import JanusAPIClient
|
|
25
|
-
from cg.constants.observations import LoqusdbInstance
|
|
25
|
+
from cg.constants.observations import BalsamicObservationPanel, LoqusdbInstance
|
|
26
26
|
from cg.constants.priority import SlurmQos
|
|
27
27
|
from cg.meta.delivery.delivery import DeliveryAPI
|
|
28
28
|
from cg.services.analysis_service.analysis_service import AnalysisService
|
|
@@ -178,23 +178,40 @@ class MutaccAutoConfig(CommonAppConfig):
|
|
|
178
178
|
padding: int = 300
|
|
179
179
|
|
|
180
180
|
|
|
181
|
+
class LoqusDBDumpFiles(BaseModel):
|
|
182
|
+
artefact_sv: Path # WGS
|
|
183
|
+
artefact_snv: Path
|
|
184
|
+
cancer_germline_snv: Path
|
|
185
|
+
cancer_somatic_snv: Path
|
|
186
|
+
cancer_somatic_sv: Path
|
|
187
|
+
clinical_snv: Path
|
|
188
|
+
clinical_sv: Path
|
|
189
|
+
cancer_somatic_snv_panels: dict[BalsamicObservationPanel, Path] # Panel
|
|
190
|
+
|
|
191
|
+
|
|
181
192
|
class BalsamicConfig(CommonAppConfig):
|
|
182
|
-
balsamic_cache:
|
|
183
|
-
bed_path:
|
|
184
|
-
binary_path:
|
|
185
|
-
cadd_path:
|
|
186
|
-
conda_binary:
|
|
193
|
+
balsamic_cache: Path
|
|
194
|
+
bed_path: Path
|
|
195
|
+
binary_path: Path
|
|
196
|
+
cadd_path: Path
|
|
197
|
+
conda_binary: Path
|
|
187
198
|
conda_env: str
|
|
188
|
-
genome_interval_path:
|
|
189
|
-
gens_coverage_female_path:
|
|
190
|
-
gens_coverage_male_path:
|
|
191
|
-
gnomad_af5_path:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
199
|
+
genome_interval_path: Path
|
|
200
|
+
gens_coverage_female_path: Path
|
|
201
|
+
gens_coverage_male_path: Path
|
|
202
|
+
gnomad_af5_path: Path
|
|
203
|
+
head_job_partition: str
|
|
204
|
+
loqusdb_path: Path
|
|
205
|
+
loqusdb_dump_files: LoqusDBDumpFiles
|
|
206
|
+
panel_of_normals: dict[str, Path] # For TGS and Exome
|
|
207
|
+
pon_path: Path
|
|
208
|
+
root: Path
|
|
209
|
+
sentieon_licence_path: Path
|
|
210
|
+
sentieon_licence_server: str
|
|
196
211
|
slurm: SlurmConfig
|
|
197
|
-
swegen_path:
|
|
212
|
+
swegen_path: Path
|
|
213
|
+
swegen_snv: Path
|
|
214
|
+
swegen_sv: Path
|
|
198
215
|
|
|
199
216
|
|
|
200
217
|
class MutantConfig(BaseModel):
|
|
@@ -415,7 +432,6 @@ class CGConfig(BaseModel):
|
|
|
415
432
|
max_flowcells: int | None = None
|
|
416
433
|
nanopore_data_directory: str
|
|
417
434
|
run_instruments: RunInstruments
|
|
418
|
-
sentieon_licence_server: str
|
|
419
435
|
tower_binary_path: str
|
|
420
436
|
|
|
421
437
|
# Base APIs that always should exist
|
|
@@ -458,6 +474,13 @@ class CGConfig(BaseModel):
|
|
|
458
474
|
loqusdb_somatic: CommonAppConfig = Field(None, alias=LoqusdbInstance.SOMATIC.value)
|
|
459
475
|
loqusdb_tumor: CommonAppConfig = Field(None, alias=LoqusdbInstance.TUMOR.value)
|
|
460
476
|
loqusdb_wes: CommonAppConfig = Field(None, alias=LoqusdbInstance.WES.value)
|
|
477
|
+
loqusdb_somatic_lymphoid: CommonAppConfig = Field(
|
|
478
|
+
None, alias=LoqusdbInstance.SOMATIC_LYMPHOID.value
|
|
479
|
+
)
|
|
480
|
+
loqusdb_somatic_myeloid: CommonAppConfig = Field(
|
|
481
|
+
None, alias=LoqusdbInstance.SOMATIC_MYELOID.value
|
|
482
|
+
)
|
|
483
|
+
loqusdb_somatic_exome: CommonAppConfig = Field(None, alias=LoqusdbInstance.SOMATIC_EXOME.value)
|
|
461
484
|
madeline_api_: MadelineAPI = None
|
|
462
485
|
mutacc_auto: MutaccAutoConfig = Field(None, alias="mutacc-auto")
|
|
463
486
|
mutacc_auto_api_: MutaccAutoAPI = None
|
|
@@ -164,6 +164,6 @@ class MetricsDeliverablesCondition(BaseModel):
|
|
|
164
164
|
class MultiqcDataJson(BaseModel):
|
|
165
165
|
"""Multiqc data json model."""
|
|
166
166
|
|
|
167
|
-
report_general_stats_data: list[dict] | None = None
|
|
167
|
+
report_general_stats_data: list[dict[str, Any]] | None = None
|
|
168
168
|
report_data_sources: dict | None = None
|
|
169
169
|
report_saved_raw_data: dict[str, dict] | None = None
|
|
@@ -92,6 +92,7 @@ class BalsamicSampleMetadataModel(SampleMetadataModel):
|
|
|
92
92
|
|
|
93
93
|
mean_insert_size: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
94
94
|
fold_80: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
95
|
+
predicted_sex: str = NA_FIELD
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
|
|
@@ -166,7 +167,7 @@ class WTSSampleMetadataModel(SequencingSampleMetadataModel):
|
|
|
166
167
|
pct_surviving: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
167
168
|
q20_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
|
|
168
169
|
q30_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
|
|
169
|
-
ribosomal_bases: Annotated[str, BeforeValidator(
|
|
170
|
+
ribosomal_bases: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
170
171
|
rin: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
171
172
|
uniquely_mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
|
|
172
173
|
|
cg/models/nallo/nallo.py
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
|
-
from
|
|
2
|
-
from pathlib import Path
|
|
1
|
+
from typing import Annotated
|
|
3
2
|
|
|
4
|
-
from pydantic import
|
|
3
|
+
from pydantic import BeforeValidator, Field
|
|
5
4
|
|
|
6
5
|
from cg.constants import SexOptions
|
|
7
|
-
from cg.exc import NfSampleSheetError
|
|
8
|
-
from cg.models.nf_analysis import WorkflowParameters
|
|
9
6
|
from cg.models.qc_metrics import QCMetrics
|
|
10
7
|
|
|
11
8
|
|
|
9
|
+
def convert_sex(plink_sex: float) -> SexOptions:
|
|
10
|
+
if plink_sex == 2:
|
|
11
|
+
return SexOptions.FEMALE
|
|
12
|
+
elif plink_sex == 1:
|
|
13
|
+
return SexOptions.MALE
|
|
14
|
+
elif plink_sex == 0:
|
|
15
|
+
return SexOptions.UNKNOWN
|
|
16
|
+
else:
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
|
|
19
|
+
|
|
12
20
|
class NalloQCMetrics(QCMetrics):
|
|
13
21
|
"""Nallo QC metrics."""
|
|
14
22
|
|
|
@@ -16,62 +24,4 @@ class NalloQCMetrics(QCMetrics):
|
|
|
16
24
|
coverage_bases: float | None
|
|
17
25
|
median_coverage: float | None
|
|
18
26
|
percent_duplicates: float | None
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class NalloSampleSheetEntry(BaseModel):
|
|
23
|
-
"""Nallo sample model is used when building the sample sheet."""
|
|
24
|
-
|
|
25
|
-
project: str
|
|
26
|
-
sample: str
|
|
27
|
-
read_file: Path
|
|
28
|
-
family_id: str
|
|
29
|
-
paternal_id: str
|
|
30
|
-
maternal_id: str
|
|
31
|
-
sex: int
|
|
32
|
-
phenotype: int
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
36
|
-
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
|
|
37
|
-
return [
|
|
38
|
-
[
|
|
39
|
-
self.project,
|
|
40
|
-
self.sample,
|
|
41
|
-
self.read_file,
|
|
42
|
-
self.family_id,
|
|
43
|
-
self.paternal_id,
|
|
44
|
-
self.maternal_id,
|
|
45
|
-
self.sex,
|
|
46
|
-
self.phenotype,
|
|
47
|
-
]
|
|
48
|
-
]
|
|
49
|
-
|
|
50
|
-
@field_validator("read_file")
|
|
51
|
-
@classmethod
|
|
52
|
-
def read_file_exists(cls, bam_path: Path) -> Path:
|
|
53
|
-
"""Verify that bam files exist."""
|
|
54
|
-
if not bam_path.is_file():
|
|
55
|
-
raise NfSampleSheetError(f"Bam file does not exist: {str(bam_path)}")
|
|
56
|
-
return bam_path
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class NalloSampleSheetHeaders(StrEnum):
|
|
60
|
-
project: str = "project"
|
|
61
|
-
sample: str = "sample"
|
|
62
|
-
file: str = "file"
|
|
63
|
-
family_id: str = "family_id"
|
|
64
|
-
paternal_id: str = "paternal_id"
|
|
65
|
-
maternal_id: str = "maternal_id"
|
|
66
|
-
sex: str = "sex"
|
|
67
|
-
phenotype: str = "phenotype"
|
|
68
|
-
|
|
69
|
-
@classmethod
|
|
70
|
-
def list(cls) -> list[str]:
|
|
71
|
-
return list(map(lambda header: header.value, cls))
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class NalloParameters(WorkflowParameters):
|
|
75
|
-
"""Model for Nallo parameters."""
|
|
76
|
-
|
|
77
|
-
filter_variants_hgnc_ids: str
|
|
27
|
+
predicted_sex: Annotated[SexOptions, BeforeValidator(convert_sex)] = Field(alias="somalier_sex")
|
cg/models/nf_analysis.py
CHANGED
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel,
|
|
4
|
-
|
|
5
|
-
from cg.exc import NfSampleSheetError
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class WorkflowParameters(BaseModel):
|
|
9
|
-
input: Path
|
|
10
|
-
outdir: Path
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
11
4
|
|
|
12
5
|
|
|
13
6
|
class NfCommandArgs(BaseModel):
|
|
@@ -29,39 +22,6 @@ class NfCommandArgs(BaseModel):
|
|
|
29
22
|
params_file: str | Path | None = None
|
|
30
23
|
|
|
31
24
|
|
|
32
|
-
class NextflowSampleSheetEntry(BaseModel):
|
|
33
|
-
"""Nextflow sample sheet model.
|
|
34
|
-
|
|
35
|
-
Attributes:
|
|
36
|
-
name: sample name, or case id
|
|
37
|
-
fastq_forward_read_paths: list of all fastq read1 file paths corresponding to sample
|
|
38
|
-
fastq_reverse_read_paths: list of all fastq read2 file paths corresponding to sample
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
name: str
|
|
42
|
-
fastq_forward_read_paths: conlist(Path, min_length=1)
|
|
43
|
-
fastq_reverse_read_paths: conlist(Path, min_length=1)
|
|
44
|
-
|
|
45
|
-
@field_validator("fastq_reverse_read_paths")
|
|
46
|
-
@classmethod
|
|
47
|
-
def validate_complete_fastq_file_pairs(
|
|
48
|
-
cls, fastq_reverse: list[str], info: ValidationInfo
|
|
49
|
-
) -> list[str]:
|
|
50
|
-
"""Verify that the number of fastq forward files is the same as for the reverse."""
|
|
51
|
-
if len(fastq_reverse) != len(info.data.get("fastq_forward_read_paths")):
|
|
52
|
-
raise NfSampleSheetError("Fastq file length for forward and reverse do not match")
|
|
53
|
-
return fastq_reverse
|
|
54
|
-
|
|
55
|
-
@field_validator("fastq_forward_read_paths", "fastq_reverse_read_paths")
|
|
56
|
-
@classmethod
|
|
57
|
-
def fastq_files_exist(cls, fastq_paths: list[str]) -> list[str]:
|
|
58
|
-
"""Verify that fastq files exist."""
|
|
59
|
-
for fastq_path in fastq_paths:
|
|
60
|
-
if not fastq_path.is_file():
|
|
61
|
-
raise NfSampleSheetError(f"Fastq file does not exist: {str(fastq_path)}")
|
|
62
|
-
return fastq_paths
|
|
63
|
-
|
|
64
|
-
|
|
65
25
|
class FileDeliverable(BaseModel):
|
|
66
26
|
"""Specification for a general deliverables file."""
|
|
67
27
|
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
from enum import StrEnum
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
1
|
from cg.constants.constants import SexOptions
|
|
5
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
6
2
|
from cg.models.qc_metrics import QCMetrics
|
|
7
3
|
|
|
8
4
|
|
|
@@ -10,64 +6,6 @@ class RarediseaseQCMetrics(QCMetrics):
|
|
|
10
6
|
"""Raredisease QC metrics."""
|
|
11
7
|
|
|
12
8
|
mapped_reads: int
|
|
13
|
-
|
|
9
|
+
percent_duplication: float
|
|
14
10
|
predicted_sex_sex_check: SexOptions
|
|
15
11
|
total_reads: int
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
|
|
19
|
-
"""Raredisease sample model is used when building the sample sheet."""
|
|
20
|
-
|
|
21
|
-
sex: str
|
|
22
|
-
phenotype: int
|
|
23
|
-
sex: int
|
|
24
|
-
paternal_id: str
|
|
25
|
-
maternal_id: str
|
|
26
|
-
case_id: str
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
30
|
-
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
|
|
31
|
-
return [
|
|
32
|
-
[
|
|
33
|
-
self.name,
|
|
34
|
-
lane + 1,
|
|
35
|
-
self.fastq_forward_read_paths,
|
|
36
|
-
self.fastq_reverse_read_paths,
|
|
37
|
-
self.sex,
|
|
38
|
-
self.phenotype,
|
|
39
|
-
self.paternal_id,
|
|
40
|
-
self.maternal_id,
|
|
41
|
-
self.case_id,
|
|
42
|
-
]
|
|
43
|
-
for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate(
|
|
44
|
-
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths)
|
|
45
|
-
)
|
|
46
|
-
]
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class RarediseaseSampleSheetHeaders(StrEnum):
|
|
50
|
-
sample: str = "sample"
|
|
51
|
-
lane: str = "lane"
|
|
52
|
-
fastq_1: str = "fastq_1"
|
|
53
|
-
fastq_2: str = "fastq_2"
|
|
54
|
-
sex: str = "sex"
|
|
55
|
-
phenotype: str = "phenotype"
|
|
56
|
-
paternal_id: str = "paternal_id"
|
|
57
|
-
maternal_id: str = "maternal_id"
|
|
58
|
-
case_id: str = "case_id"
|
|
59
|
-
|
|
60
|
-
@classmethod
|
|
61
|
-
def list(cls) -> list[str]:
|
|
62
|
-
return list(map(lambda header: header.value, cls))
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class RarediseaseParameters(WorkflowParameters):
|
|
66
|
-
"""Model for Raredisease parameters."""
|
|
67
|
-
|
|
68
|
-
target_bed_file: str
|
|
69
|
-
analysis_type: str
|
|
70
|
-
save_mapped_as_cram: bool
|
|
71
|
-
vcfanno_extra_resources: str
|
|
72
|
-
vep_filters_scout_fmt: str
|
|
73
|
-
sample_id_map: Path
|
cg/models/rnafusion/rnafusion.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from cg.constants.constants import Strandedness
|
|
2
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
3
1
|
from cg.models.qc_metrics import QCMetrics
|
|
4
2
|
|
|
5
3
|
|
|
@@ -19,27 +17,3 @@ class RnafusionQCMetrics(QCMetrics):
|
|
|
19
17
|
pct_duplication: float
|
|
20
18
|
read_pairs_examined: float
|
|
21
19
|
uniquely_mapped_percent: float
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class RnafusionParameters(WorkflowParameters):
|
|
25
|
-
"""Rnafusion parameters."""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class RnafusionSampleSheetEntry(NextflowSampleSheetEntry):
|
|
29
|
-
"""Rnafusion sample sheet model."""
|
|
30
|
-
|
|
31
|
-
strandedness: Strandedness
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def headers() -> list[str]:
|
|
35
|
-
"""Return sample sheet headers."""
|
|
36
|
-
return ["sample", "fastq_1", "fastq_2", "strandedness"]
|
|
37
|
-
|
|
38
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
39
|
-
"""Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
|
|
40
|
-
return [
|
|
41
|
-
[self.name, fastq_forward_read_path, fastq_reverse_read_path, str(self.strandedness)]
|
|
42
|
-
for fastq_forward_read_path, fastq_reverse_read_path in zip(
|
|
43
|
-
self.fastq_forward_read_paths, self.fastq_reverse_read_paths
|
|
44
|
-
)
|
|
45
|
-
]
|
|
@@ -84,12 +84,15 @@ class ScoutMipIndividual(ScoutIndividual):
|
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
class ScoutNalloIndividual(ScoutIndividual):
|
|
87
|
+
assembly_alignment_path: str | None = None
|
|
88
|
+
chromograph_images: ChromographImages = ChromographImages()
|
|
87
89
|
d4_file: str | None = None
|
|
90
|
+
minor_allele_frequency_wig: str | None = None
|
|
91
|
+
mt_bam: str | None = None
|
|
88
92
|
paraphase_alignment_path: str | None = None
|
|
93
|
+
phase_blocks: str | None = None
|
|
89
94
|
reviewer: Reviewer = Reviewer()
|
|
90
95
|
tiddit_coverage_wig: str | None = None
|
|
91
|
-
minor_allele_frequency_wig: str | None = None
|
|
92
|
-
assembly_alignment_path: str | None = None
|
|
93
96
|
|
|
94
97
|
|
|
95
98
|
class ScoutRarediseaseIndividual(ScoutIndividual):
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from cg.constants.sequencing import SequencingPlatform
|
|
2
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
3
1
|
from cg.models.qc_metrics import QCMetrics
|
|
4
2
|
|
|
5
3
|
|
|
@@ -14,43 +12,3 @@ class TaxprofilerQCMetrics(QCMetrics):
|
|
|
14
12
|
pct_duplication: float
|
|
15
13
|
raw_total_sequences: float
|
|
16
14
|
reads_mapped: float
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TaxprofilerParameters(WorkflowParameters):
|
|
20
|
-
"""Taxprofiler parameters."""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class TaxprofilerSampleSheetEntry(NextflowSampleSheetEntry):
|
|
24
|
-
"""Taxprofiler sample model is used when building the sample sheet."""
|
|
25
|
-
|
|
26
|
-
instrument_platform: SequencingPlatform
|
|
27
|
-
fasta: str
|
|
28
|
-
|
|
29
|
-
@staticmethod
|
|
30
|
-
def headers() -> list[str]:
|
|
31
|
-
"""Return sample sheet headers."""
|
|
32
|
-
return [
|
|
33
|
-
"sample",
|
|
34
|
-
"run_accession",
|
|
35
|
-
"instrument_platform",
|
|
36
|
-
"fastq_1",
|
|
37
|
-
"fastq_2",
|
|
38
|
-
"fasta",
|
|
39
|
-
]
|
|
40
|
-
|
|
41
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
42
|
-
"""Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
|
|
43
|
-
reformatted_content = []
|
|
44
|
-
for run_accession, (forward_path, reverse_path) in enumerate(
|
|
45
|
-
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths), 1
|
|
46
|
-
):
|
|
47
|
-
line = [
|
|
48
|
-
self.name,
|
|
49
|
-
run_accession,
|
|
50
|
-
self.instrument_platform,
|
|
51
|
-
forward_path,
|
|
52
|
-
reverse_path,
|
|
53
|
-
self.fasta,
|
|
54
|
-
]
|
|
55
|
-
reformatted_content.append(line)
|
|
56
|
-
return reformatted_content
|
cg/models/tomte/tomte.py
CHANGED
|
@@ -1,73 +1,4 @@
|
|
|
1
|
-
from enum import StrEnum
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from pydantic import field_validator
|
|
5
|
-
|
|
6
|
-
from cg.constants.constants import GenomeVersion, Strandedness
|
|
7
|
-
from cg.constants.sample_sources import SourceType
|
|
8
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
9
1
|
from cg.models.qc_metrics import QCMetrics
|
|
10
|
-
from cg.utils.utils import replace_non_alphanumeric
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class TomteSampleSheetEntry(NextflowSampleSheetEntry):
|
|
14
|
-
"""Tomte sample model is used when building the sample sheet."""
|
|
15
|
-
|
|
16
|
-
case_id: str
|
|
17
|
-
strandedness: Strandedness
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
21
|
-
"""Reformat sample sheet content as a list of lists, where
|
|
22
|
-
each list represents a line in the final file."""
|
|
23
|
-
return [
|
|
24
|
-
[
|
|
25
|
-
self.case_id,
|
|
26
|
-
self.name,
|
|
27
|
-
fastq_forward_read_path,
|
|
28
|
-
fastq_reverse_read_path,
|
|
29
|
-
str(self.strandedness),
|
|
30
|
-
]
|
|
31
|
-
for fastq_forward_read_path, fastq_reverse_read_path in zip(
|
|
32
|
-
self.fastq_forward_read_paths, self.fastq_reverse_read_paths
|
|
33
|
-
)
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class TomteSampleSheetHeaders(StrEnum):
|
|
38
|
-
case_id: str = "case"
|
|
39
|
-
name: str = "sample"
|
|
40
|
-
fastq_1: str = "fastq_1"
|
|
41
|
-
fastq_2: str = "fastq_2"
|
|
42
|
-
strandedness: str = "strandedness"
|
|
43
|
-
|
|
44
|
-
@classmethod
|
|
45
|
-
def list(cls) -> list[str]:
|
|
46
|
-
return list(map(lambda header: header.value, cls))
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class TomteParameters(WorkflowParameters):
|
|
50
|
-
"""Model for Tomte parameters."""
|
|
51
|
-
|
|
52
|
-
gene_panel_clinical_filter: Path
|
|
53
|
-
tissue: str
|
|
54
|
-
genome: str = GenomeVersion.HG38
|
|
55
|
-
|
|
56
|
-
@field_validator("tissue", mode="before")
|
|
57
|
-
@classmethod
|
|
58
|
-
def restrict_tissue_values(cls, tissue: str | None) -> str:
|
|
59
|
-
if tissue:
|
|
60
|
-
return replace_non_alphanumeric(string=tissue)
|
|
61
|
-
else:
|
|
62
|
-
return SourceType.UNKNOWN
|
|
63
|
-
|
|
64
|
-
@field_validator("genome", mode="before")
|
|
65
|
-
@classmethod
|
|
66
|
-
def restrict_genome_values(cls, genome: str) -> str:
|
|
67
|
-
if genome == GenomeVersion.HG38:
|
|
68
|
-
return GenomeVersion.GRCh38.value
|
|
69
|
-
elif genome == GenomeVersion.HG19:
|
|
70
|
-
return GenomeVersion.GRCh37.value
|
|
71
2
|
|
|
72
3
|
|
|
73
4
|
class TomteQCMetrics(QCMetrics):
|