cg 80.1.0__py3-none-any.whl → 83.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cg/__init__.py +1 -1
- cg/apps/housekeeper/hk.py +1 -1
- cg/apps/tb/api.py +1 -1
- cg/cli/upload/mutacc.py +16 -3
- cg/cli/upload/scout.py +2 -2
- cg/cli/upload/utils.py +10 -1
- cg/cli/workflow/balsamic/base.py +29 -4
- cg/cli/workflow/microsalt/base.py +3 -1
- cg/cli/workflow/nallo/base.py +18 -38
- cg/cli/workflow/nf_analysis.py +2 -203
- cg/cli/workflow/raredisease/base.py +33 -51
- cg/cli/workflow/rnafusion/base.py +28 -3
- cg/cli/workflow/taxprofiler/base.py +21 -13
- cg/cli/workflow/tomte/base.py +17 -19
- cg/constants/constants.py +3 -3
- cg/constants/devices.py +6 -1
- cg/constants/gene_panel.py +3 -1
- cg/constants/lims.py +4 -0
- cg/constants/orderforms.py +1 -1
- cg/constants/pacbio.py +1 -0
- cg/constants/scout.py +6 -4
- cg/exc.py +12 -4
- cg/meta/compress/compress.py +7 -2
- cg/meta/delivery_report/nallo.py +1 -1
- cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
- cg/meta/observations/balsamic_observations_api.py +1 -1
- cg/meta/observations/mip_dna_observations_api.py +1 -1
- cg/meta/observations/nallo_observations_api.py +1 -1
- cg/meta/observations/observations_api.py +1 -1
- cg/meta/observations/raredisease_observations_api.py +1 -1
- cg/meta/tar/tar.py +5 -2
- cg/meta/upload/coverage.py +5 -5
- cg/meta/upload/raredisease/raredisease.py +3 -0
- cg/meta/upload/scout/nallo_config_builder.py +14 -0
- cg/meta/workflow/nallo.py +22 -95
- cg/meta/workflow/nf_analysis.py +11 -262
- cg/meta/workflow/raredisease.py +3 -112
- cg/meta/workflow/rnafusion.py +2 -34
- cg/meta/workflow/taxprofiler.py +2 -38
- cg/meta/workflow/tomte.py +2 -42
- cg/models/deliverables/metric_deliverables.py +1 -1
- cg/models/nallo/nallo.py +14 -64
- cg/models/nf_analysis.py +1 -41
- cg/models/raredisease/raredisease.py +0 -62
- cg/models/rnafusion/rnafusion.py +0 -26
- cg/models/scout/scout_load_config.py +1 -0
- cg/models/taxprofiler/taxprofiler.py +0 -42
- cg/models/tomte/tomte.py +0 -69
- cg/resources/nallo_bundle_filenames.yaml +282 -22
- cg/resources/raredisease_bundle_filenames.yaml +11 -1
- cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
- cg/server/admin.py +51 -24
- cg/server/app.py +15 -4
- cg/server/endpoints/sequencing_run/dtos.py +21 -3
- cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
- cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
- cg/services/analysis_starter/configurator/configurator.py +1 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +3 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +3 -1
- cg/services/analysis_starter/factories/configurator_factory.py +4 -4
- cg/services/analysis_starter/tracker/implementations/balsamic.py +4 -1
- cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
- cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
- cg/services/analysis_starter/tracker/implementations/nextflow_tracker.py +6 -4
- cg/services/analysis_starter/tracker/tracker.py +10 -6
- cg/services/illumina/backup/backup_service.py +29 -7
- cg/services/orders/validation/constants.py +3 -0
- cg/services/orders/validation/index_sequences.py +558 -0
- cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
- cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
- cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
- cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
- cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
- cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
- cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
- cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
- cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
- cg/store/crud/create.py +73 -42
- cg/store/crud/read.py +50 -2
- cg/store/crud/update.py +14 -3
- cg/store/models.py +88 -31
- cg/store/store.py +8 -1
- {cg-80.1.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
- {cg-80.1.0.dist-info → cg-83.14.0.dist-info}/RECORD +91 -90
- /cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{nallo.py → nallo_sample_sheet_creator.py} +0 -0
- /cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +0 -0
- /cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +0 -0
- /cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +0 -0
- {cg-80.1.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
- {cg-80.1.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
cg/meta/workflow/rnafusion.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
|
-
from cg.constants.constants import GenomeVersion
|
|
7
|
+
from cg.constants.constants import GenomeVersion
|
|
8
8
|
from cg.constants.nf_analysis import RNAFUSION_METRIC_CONDITIONS
|
|
9
9
|
from cg.constants.scout import RNAFUSION_CASE_TAGS
|
|
10
10
|
from cg.exc import MissingMetrics
|
|
@@ -12,13 +12,8 @@ from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
|
12
12
|
from cg.models.analysis import NextflowAnalysis
|
|
13
13
|
from cg.models.cg_config import CGConfig
|
|
14
14
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
15
|
-
from cg.models.rnafusion.rnafusion import
|
|
16
|
-
RnafusionParameters,
|
|
17
|
-
RnafusionQCMetrics,
|
|
18
|
-
RnafusionSampleSheetEntry,
|
|
19
|
-
)
|
|
15
|
+
from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
|
|
20
16
|
from cg.resources import RNAFUSION_BUNDLE_FILENAMES_PATH
|
|
21
|
-
from cg.store.models import CaseSample
|
|
22
17
|
|
|
23
18
|
LOG = logging.getLogger(__name__)
|
|
24
19
|
|
|
@@ -50,11 +45,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
|
|
|
50
45
|
self.revision: str = config.rnafusion.revision
|
|
51
46
|
self.nextflow_binary_path: str = config.rnafusion.binary_path
|
|
52
47
|
|
|
53
|
-
@property
|
|
54
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
55
|
-
"""Headers for sample sheet."""
|
|
56
|
-
return RnafusionSampleSheetEntry.headers()
|
|
57
|
-
|
|
58
48
|
@property
|
|
59
49
|
def is_multiple_samples_allowed(self) -> bool:
|
|
60
50
|
"""Return whether the analysis supports multiple samples to be linked to the case."""
|
|
@@ -69,28 +59,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
|
|
|
69
59
|
"""Return Rnafusion bundle filenames path."""
|
|
70
60
|
return RNAFUSION_BUNDLE_FILENAMES_PATH
|
|
71
61
|
|
|
72
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
73
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
74
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
75
|
-
sample=case_sample.sample
|
|
76
|
-
)
|
|
77
|
-
sample_sheet_entry = RnafusionSampleSheetEntry(
|
|
78
|
-
name=case_sample.sample.internal_id,
|
|
79
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
80
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
81
|
-
strandedness=Strandedness.REVERSE,
|
|
82
|
-
)
|
|
83
|
-
return sample_sheet_entry.reformat_sample_content()
|
|
84
|
-
|
|
85
|
-
def get_built_workflow_parameters(
|
|
86
|
-
self, case_id: str, dry_run: bool = False
|
|
87
|
-
) -> RnafusionParameters:
|
|
88
|
-
"""Get Rnafusion parameters."""
|
|
89
|
-
return RnafusionParameters(
|
|
90
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
91
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
92
|
-
)
|
|
93
|
-
|
|
94
62
|
@staticmethod
|
|
95
63
|
def ensure_mandatory_metrics_present(metrics: list[MetricsBase]) -> None:
|
|
96
64
|
"""Check that all mandatory metrics are present.
|
cg/meta/workflow/taxprofiler.py
CHANGED
|
@@ -5,19 +5,13 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
7
|
from cg.constants.constants import GenomeVersion
|
|
8
|
-
from cg.constants.sequencing import SequencingPlatform
|
|
9
|
-
from cg.constants.symbols import EMPTY_STRING
|
|
10
8
|
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
11
9
|
from cg.models.analysis import NextflowAnalysis
|
|
12
10
|
from cg.models.cg_config import CGConfig
|
|
13
11
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
14
|
-
from cg.models.taxprofiler.taxprofiler import
|
|
15
|
-
TaxprofilerParameters,
|
|
16
|
-
TaxprofilerQCMetrics,
|
|
17
|
-
TaxprofilerSampleSheetEntry,
|
|
18
|
-
)
|
|
12
|
+
from cg.models.taxprofiler.taxprofiler import TaxprofilerQCMetrics
|
|
19
13
|
from cg.resources import TAXPROFILER_BUNDLE_FILENAMES_PATH
|
|
20
|
-
from cg.store.models import
|
|
14
|
+
from cg.store.models import Sample
|
|
21
15
|
|
|
22
16
|
LOG = logging.getLogger(__name__)
|
|
23
17
|
|
|
@@ -49,11 +43,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
|
|
|
49
43
|
self.nextflow_binary_path: str = config.taxprofiler.binary_path
|
|
50
44
|
self.compute_env_base: str = config.taxprofiler.compute_env
|
|
51
45
|
|
|
52
|
-
@property
|
|
53
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
54
|
-
"""Headers for sample sheet."""
|
|
55
|
-
return TaxprofilerSampleSheetEntry.headers()
|
|
56
|
-
|
|
57
46
|
@property
|
|
58
47
|
def is_multiqc_pattern_search_exact(self) -> bool:
|
|
59
48
|
"""Only exact pattern search is allowed to collect metrics information from multiqc file."""
|
|
@@ -64,31 +53,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
|
|
|
64
53
|
"""Return Taxprofiler bundle filenames path."""
|
|
65
54
|
return TAXPROFILER_BUNDLE_FILENAMES_PATH
|
|
66
55
|
|
|
67
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
68
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
69
|
-
sample_name: str = case_sample.sample.name
|
|
70
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
71
|
-
sample=case_sample.sample
|
|
72
|
-
)
|
|
73
|
-
sample_sheet_entry = TaxprofilerSampleSheetEntry(
|
|
74
|
-
name=sample_name,
|
|
75
|
-
run_accession=sample_name,
|
|
76
|
-
instrument_platform=SequencingPlatform.ILLUMINA,
|
|
77
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
78
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
79
|
-
fasta=EMPTY_STRING,
|
|
80
|
-
)
|
|
81
|
-
return sample_sheet_entry.reformat_sample_content()
|
|
82
|
-
|
|
83
|
-
def get_built_workflow_parameters(
|
|
84
|
-
self, case_id: str, dry_run: bool = False
|
|
85
|
-
) -> TaxprofilerParameters:
|
|
86
|
-
"""Return Taxprofiler parameters."""
|
|
87
|
-
return TaxprofilerParameters(
|
|
88
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
89
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
90
|
-
)
|
|
91
|
-
|
|
92
56
|
def get_multiqc_search_patterns(self, case_id: str) -> dict:
|
|
93
57
|
"""Return search patterns for MultiQC for Taxprofiler."""
|
|
94
58
|
samples: list[Sample] = self.status_db.get_samples_by_case_id(case_id=case_id)
|
cg/meta/workflow/tomte.py
CHANGED
|
@@ -4,20 +4,14 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
|
-
from cg.constants.constants import GenomeVersion
|
|
7
|
+
from cg.constants.constants import GenomeVersion
|
|
8
8
|
from cg.constants.nf_analysis import TOMTE_METRIC_CONDITIONS
|
|
9
9
|
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
10
10
|
from cg.models.analysis import NextflowAnalysis
|
|
11
11
|
from cg.models.cg_config import CGConfig
|
|
12
12
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
13
|
-
from cg.models.tomte.tomte import
|
|
14
|
-
TomteParameters,
|
|
15
|
-
TomteQCMetrics,
|
|
16
|
-
TomteSampleSheetEntry,
|
|
17
|
-
TomteSampleSheetHeaders,
|
|
18
|
-
)
|
|
13
|
+
from cg.models.tomte.tomte import TomteQCMetrics
|
|
19
14
|
from cg.resources import TOMTE_BUNDLE_FILENAMES_PATH
|
|
20
|
-
from cg.store.models import CaseSample
|
|
21
15
|
|
|
22
16
|
LOG = logging.getLogger(__name__)
|
|
23
17
|
|
|
@@ -48,45 +42,11 @@ class TomteAnalysisAPI(NfAnalysisAPI):
|
|
|
48
42
|
self.revision: str = config.tomte.revision
|
|
49
43
|
self.nextflow_binary_path: str = config.tomte.binary_path
|
|
50
44
|
|
|
51
|
-
@property
|
|
52
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
53
|
-
"""Headers for sample sheet."""
|
|
54
|
-
return TomteSampleSheetHeaders.list()
|
|
55
|
-
|
|
56
|
-
@property
|
|
57
|
-
def is_gene_panel_required(self) -> bool:
|
|
58
|
-
"""Return True if a gene panel is needs to be created using the information in StatusDB and exporting it from Scout."""
|
|
59
|
-
return True
|
|
60
|
-
|
|
61
45
|
@staticmethod
|
|
62
46
|
def get_bundle_filenames_path() -> Path:
|
|
63
47
|
"""Return path to bundle template."""
|
|
64
48
|
return TOMTE_BUNDLE_FILENAMES_PATH
|
|
65
49
|
|
|
66
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
67
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
68
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
69
|
-
sample=case_sample.sample
|
|
70
|
-
)
|
|
71
|
-
sample_sheet_entry = TomteSampleSheetEntry(
|
|
72
|
-
case_id=case_sample.case.internal_id,
|
|
73
|
-
name=case_sample.sample.internal_id,
|
|
74
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
75
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
76
|
-
strandedness=Strandedness.REVERSE,
|
|
77
|
-
)
|
|
78
|
-
return sample_sheet_entry.reformat_sample_content
|
|
79
|
-
|
|
80
|
-
def get_built_workflow_parameters(self, case_id: str, dry_run: bool = False) -> TomteParameters:
|
|
81
|
-
"""Return parameters."""
|
|
82
|
-
return TomteParameters(
|
|
83
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
84
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
85
|
-
gene_panel_clinical_filter=self.get_gene_panels_path(case_id=case_id),
|
|
86
|
-
tissue=self.get_case_source_type(case_id=case_id),
|
|
87
|
-
genome=self.get_genome_build(case_id=case_id),
|
|
88
|
-
)
|
|
89
|
-
|
|
90
50
|
def get_genome_build(self, case_id: str) -> str:
|
|
91
51
|
return GenomeVersion.HG38
|
|
92
52
|
|
|
@@ -164,6 +164,6 @@ class MetricsDeliverablesCondition(BaseModel):
|
|
|
164
164
|
class MultiqcDataJson(BaseModel):
|
|
165
165
|
"""Multiqc data json model."""
|
|
166
166
|
|
|
167
|
-
report_general_stats_data: list[dict] | None = None
|
|
167
|
+
report_general_stats_data: list[dict[str, Any]] | None = None
|
|
168
168
|
report_data_sources: dict | None = None
|
|
169
169
|
report_saved_raw_data: dict[str, dict] | None = None
|
cg/models/nallo/nallo.py
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
|
-
from
|
|
2
|
-
from pathlib import Path
|
|
1
|
+
from typing import Annotated
|
|
3
2
|
|
|
4
|
-
from pydantic import
|
|
3
|
+
from pydantic import BeforeValidator, Field
|
|
5
4
|
|
|
6
5
|
from cg.constants import SexOptions
|
|
7
|
-
from cg.exc import NfSampleSheetError
|
|
8
|
-
from cg.models.nf_analysis import WorkflowParameters
|
|
9
6
|
from cg.models.qc_metrics import QCMetrics
|
|
10
7
|
|
|
11
8
|
|
|
9
|
+
def convert_sex(plink_sex: float) -> SexOptions:
|
|
10
|
+
if plink_sex == 2:
|
|
11
|
+
return SexOptions.FEMALE
|
|
12
|
+
elif plink_sex == 1:
|
|
13
|
+
return SexOptions.MALE
|
|
14
|
+
elif plink_sex == 0:
|
|
15
|
+
return SexOptions.UNKNOWN
|
|
16
|
+
else:
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
|
|
19
|
+
|
|
12
20
|
class NalloQCMetrics(QCMetrics):
|
|
13
21
|
"""Nallo QC metrics."""
|
|
14
22
|
|
|
@@ -16,62 +24,4 @@ class NalloQCMetrics(QCMetrics):
|
|
|
16
24
|
coverage_bases: float | None
|
|
17
25
|
median_coverage: float | None
|
|
18
26
|
percent_duplicates: float | None
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class NalloSampleSheetEntry(BaseModel):
|
|
23
|
-
"""Nallo sample model is used when building the sample sheet."""
|
|
24
|
-
|
|
25
|
-
project: str
|
|
26
|
-
sample: str
|
|
27
|
-
read_file: Path
|
|
28
|
-
family_id: str
|
|
29
|
-
paternal_id: str
|
|
30
|
-
maternal_id: str
|
|
31
|
-
sex: int
|
|
32
|
-
phenotype: int
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
36
|
-
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
|
|
37
|
-
return [
|
|
38
|
-
[
|
|
39
|
-
self.project,
|
|
40
|
-
self.sample,
|
|
41
|
-
self.read_file,
|
|
42
|
-
self.family_id,
|
|
43
|
-
self.paternal_id,
|
|
44
|
-
self.maternal_id,
|
|
45
|
-
self.sex,
|
|
46
|
-
self.phenotype,
|
|
47
|
-
]
|
|
48
|
-
]
|
|
49
|
-
|
|
50
|
-
@field_validator("read_file")
|
|
51
|
-
@classmethod
|
|
52
|
-
def read_file_exists(cls, bam_path: Path) -> Path:
|
|
53
|
-
"""Verify that bam files exist."""
|
|
54
|
-
if not bam_path.is_file():
|
|
55
|
-
raise NfSampleSheetError(f"Bam file does not exist: {str(bam_path)}")
|
|
56
|
-
return bam_path
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class NalloSampleSheetHeaders(StrEnum):
|
|
60
|
-
project: str = "project"
|
|
61
|
-
sample: str = "sample"
|
|
62
|
-
file: str = "file"
|
|
63
|
-
family_id: str = "family_id"
|
|
64
|
-
paternal_id: str = "paternal_id"
|
|
65
|
-
maternal_id: str = "maternal_id"
|
|
66
|
-
sex: str = "sex"
|
|
67
|
-
phenotype: str = "phenotype"
|
|
68
|
-
|
|
69
|
-
@classmethod
|
|
70
|
-
def list(cls) -> list[str]:
|
|
71
|
-
return list(map(lambda header: header.value, cls))
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class NalloParameters(WorkflowParameters):
|
|
75
|
-
"""Model for Nallo parameters."""
|
|
76
|
-
|
|
77
|
-
filter_variants_hgnc_ids: str
|
|
27
|
+
predicted_sex: Annotated[SexOptions, BeforeValidator(convert_sex)] = Field(alias="somalier_sex")
|
cg/models/nf_analysis.py
CHANGED
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel,
|
|
4
|
-
|
|
5
|
-
from cg.exc import NfSampleSheetError
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class WorkflowParameters(BaseModel):
|
|
9
|
-
input: Path
|
|
10
|
-
outdir: Path
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
11
4
|
|
|
12
5
|
|
|
13
6
|
class NfCommandArgs(BaseModel):
|
|
@@ -29,39 +22,6 @@ class NfCommandArgs(BaseModel):
|
|
|
29
22
|
params_file: str | Path | None = None
|
|
30
23
|
|
|
31
24
|
|
|
32
|
-
class NextflowSampleSheetEntry(BaseModel):
|
|
33
|
-
"""Nextflow sample sheet model.
|
|
34
|
-
|
|
35
|
-
Attributes:
|
|
36
|
-
name: sample name, or case id
|
|
37
|
-
fastq_forward_read_paths: list of all fastq read1 file paths corresponding to sample
|
|
38
|
-
fastq_reverse_read_paths: list of all fastq read2 file paths corresponding to sample
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
name: str
|
|
42
|
-
fastq_forward_read_paths: conlist(Path, min_length=1)
|
|
43
|
-
fastq_reverse_read_paths: conlist(Path, min_length=1)
|
|
44
|
-
|
|
45
|
-
@field_validator("fastq_reverse_read_paths")
|
|
46
|
-
@classmethod
|
|
47
|
-
def validate_complete_fastq_file_pairs(
|
|
48
|
-
cls, fastq_reverse: list[str], info: ValidationInfo
|
|
49
|
-
) -> list[str]:
|
|
50
|
-
"""Verify that the number of fastq forward files is the same as for the reverse."""
|
|
51
|
-
if len(fastq_reverse) != len(info.data.get("fastq_forward_read_paths")):
|
|
52
|
-
raise NfSampleSheetError("Fastq file length for forward and reverse do not match")
|
|
53
|
-
return fastq_reverse
|
|
54
|
-
|
|
55
|
-
@field_validator("fastq_forward_read_paths", "fastq_reverse_read_paths")
|
|
56
|
-
@classmethod
|
|
57
|
-
def fastq_files_exist(cls, fastq_paths: list[str]) -> list[str]:
|
|
58
|
-
"""Verify that fastq files exist."""
|
|
59
|
-
for fastq_path in fastq_paths:
|
|
60
|
-
if not fastq_path.is_file():
|
|
61
|
-
raise NfSampleSheetError(f"Fastq file does not exist: {str(fastq_path)}")
|
|
62
|
-
return fastq_paths
|
|
63
|
-
|
|
64
|
-
|
|
65
25
|
class FileDeliverable(BaseModel):
|
|
66
26
|
"""Specification for a general deliverables file."""
|
|
67
27
|
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
from enum import StrEnum
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
1
|
from cg.constants.constants import SexOptions
|
|
5
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
6
2
|
from cg.models.qc_metrics import QCMetrics
|
|
7
3
|
|
|
8
4
|
|
|
@@ -13,61 +9,3 @@ class RarediseaseQCMetrics(QCMetrics):
|
|
|
13
9
|
percent_duplication: float
|
|
14
10
|
predicted_sex_sex_check: SexOptions
|
|
15
11
|
total_reads: int
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
|
|
19
|
-
"""Raredisease sample model is used when building the sample sheet."""
|
|
20
|
-
|
|
21
|
-
sex: str
|
|
22
|
-
phenotype: int
|
|
23
|
-
sex: int
|
|
24
|
-
paternal_id: str
|
|
25
|
-
maternal_id: str
|
|
26
|
-
case_id: str
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
30
|
-
"""Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
|
|
31
|
-
return [
|
|
32
|
-
[
|
|
33
|
-
self.name,
|
|
34
|
-
lane + 1,
|
|
35
|
-
self.fastq_forward_read_paths,
|
|
36
|
-
self.fastq_reverse_read_paths,
|
|
37
|
-
self.sex,
|
|
38
|
-
self.phenotype,
|
|
39
|
-
self.paternal_id,
|
|
40
|
-
self.maternal_id,
|
|
41
|
-
self.case_id,
|
|
42
|
-
]
|
|
43
|
-
for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate(
|
|
44
|
-
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths)
|
|
45
|
-
)
|
|
46
|
-
]
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class RarediseaseSampleSheetHeaders(StrEnum):
|
|
50
|
-
sample: str = "sample"
|
|
51
|
-
lane: str = "lane"
|
|
52
|
-
fastq_1: str = "fastq_1"
|
|
53
|
-
fastq_2: str = "fastq_2"
|
|
54
|
-
sex: str = "sex"
|
|
55
|
-
phenotype: str = "phenotype"
|
|
56
|
-
paternal_id: str = "paternal_id"
|
|
57
|
-
maternal_id: str = "maternal_id"
|
|
58
|
-
case_id: str = "case_id"
|
|
59
|
-
|
|
60
|
-
@classmethod
|
|
61
|
-
def list(cls) -> list[str]:
|
|
62
|
-
return list(map(lambda header: header.value, cls))
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class RarediseaseParameters(WorkflowParameters):
|
|
66
|
-
"""Model for Raredisease parameters."""
|
|
67
|
-
|
|
68
|
-
target_bed_file: str
|
|
69
|
-
analysis_type: str
|
|
70
|
-
save_mapped_as_cram: bool
|
|
71
|
-
vcfanno_extra_resources: str
|
|
72
|
-
vep_filters_scout_fmt: str
|
|
73
|
-
sample_id_map: Path
|
cg/models/rnafusion/rnafusion.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from cg.constants.constants import Strandedness
|
|
2
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
3
1
|
from cg.models.qc_metrics import QCMetrics
|
|
4
2
|
|
|
5
3
|
|
|
@@ -19,27 +17,3 @@ class RnafusionQCMetrics(QCMetrics):
|
|
|
19
17
|
pct_duplication: float
|
|
20
18
|
read_pairs_examined: float
|
|
21
19
|
uniquely_mapped_percent: float
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class RnafusionParameters(WorkflowParameters):
|
|
25
|
-
"""Rnafusion parameters."""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class RnafusionSampleSheetEntry(NextflowSampleSheetEntry):
|
|
29
|
-
"""Rnafusion sample sheet model."""
|
|
30
|
-
|
|
31
|
-
strandedness: Strandedness
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def headers() -> list[str]:
|
|
35
|
-
"""Return sample sheet headers."""
|
|
36
|
-
return ["sample", "fastq_1", "fastq_2", "strandedness"]
|
|
37
|
-
|
|
38
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
39
|
-
"""Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
|
|
40
|
-
return [
|
|
41
|
-
[self.name, fastq_forward_read_path, fastq_reverse_read_path, str(self.strandedness)]
|
|
42
|
-
for fastq_forward_read_path, fastq_reverse_read_path in zip(
|
|
43
|
-
self.fastq_forward_read_paths, self.fastq_reverse_read_paths
|
|
44
|
-
)
|
|
45
|
-
]
|
|
@@ -85,6 +85,7 @@ class ScoutMipIndividual(ScoutIndividual):
|
|
|
85
85
|
|
|
86
86
|
class ScoutNalloIndividual(ScoutIndividual):
|
|
87
87
|
assembly_alignment_path: str | None = None
|
|
88
|
+
chromograph_images: ChromographImages = ChromographImages()
|
|
88
89
|
d4_file: str | None = None
|
|
89
90
|
minor_allele_frequency_wig: str | None = None
|
|
90
91
|
mt_bam: str | None = None
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from cg.constants.sequencing import SequencingPlatform
|
|
2
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
3
1
|
from cg.models.qc_metrics import QCMetrics
|
|
4
2
|
|
|
5
3
|
|
|
@@ -14,43 +12,3 @@ class TaxprofilerQCMetrics(QCMetrics):
|
|
|
14
12
|
pct_duplication: float
|
|
15
13
|
raw_total_sequences: float
|
|
16
14
|
reads_mapped: float
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TaxprofilerParameters(WorkflowParameters):
|
|
20
|
-
"""Taxprofiler parameters."""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class TaxprofilerSampleSheetEntry(NextflowSampleSheetEntry):
|
|
24
|
-
"""Taxprofiler sample model is used when building the sample sheet."""
|
|
25
|
-
|
|
26
|
-
instrument_platform: SequencingPlatform
|
|
27
|
-
fasta: str
|
|
28
|
-
|
|
29
|
-
@staticmethod
|
|
30
|
-
def headers() -> list[str]:
|
|
31
|
-
"""Return sample sheet headers."""
|
|
32
|
-
return [
|
|
33
|
-
"sample",
|
|
34
|
-
"run_accession",
|
|
35
|
-
"instrument_platform",
|
|
36
|
-
"fastq_1",
|
|
37
|
-
"fastq_2",
|
|
38
|
-
"fasta",
|
|
39
|
-
]
|
|
40
|
-
|
|
41
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
42
|
-
"""Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
|
|
43
|
-
reformatted_content = []
|
|
44
|
-
for run_accession, (forward_path, reverse_path) in enumerate(
|
|
45
|
-
zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths), 1
|
|
46
|
-
):
|
|
47
|
-
line = [
|
|
48
|
-
self.name,
|
|
49
|
-
run_accession,
|
|
50
|
-
self.instrument_platform,
|
|
51
|
-
forward_path,
|
|
52
|
-
reverse_path,
|
|
53
|
-
self.fasta,
|
|
54
|
-
]
|
|
55
|
-
reformatted_content.append(line)
|
|
56
|
-
return reformatted_content
|
cg/models/tomte/tomte.py
CHANGED
|
@@ -1,73 +1,4 @@
|
|
|
1
|
-
from enum import StrEnum
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from pydantic import field_validator
|
|
5
|
-
|
|
6
|
-
from cg.constants.constants import GenomeVersion, Strandedness
|
|
7
|
-
from cg.constants.sample_sources import SourceType
|
|
8
|
-
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
|
|
9
1
|
from cg.models.qc_metrics import QCMetrics
|
|
10
|
-
from cg.utils.utils import replace_non_alphanumeric
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class TomteSampleSheetEntry(NextflowSampleSheetEntry):
|
|
14
|
-
"""Tomte sample model is used when building the sample sheet."""
|
|
15
|
-
|
|
16
|
-
case_id: str
|
|
17
|
-
strandedness: Strandedness
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def reformat_sample_content(self) -> list[list[str]]:
|
|
21
|
-
"""Reformat sample sheet content as a list of lists, where
|
|
22
|
-
each list represents a line in the final file."""
|
|
23
|
-
return [
|
|
24
|
-
[
|
|
25
|
-
self.case_id,
|
|
26
|
-
self.name,
|
|
27
|
-
fastq_forward_read_path,
|
|
28
|
-
fastq_reverse_read_path,
|
|
29
|
-
str(self.strandedness),
|
|
30
|
-
]
|
|
31
|
-
for fastq_forward_read_path, fastq_reverse_read_path in zip(
|
|
32
|
-
self.fastq_forward_read_paths, self.fastq_reverse_read_paths
|
|
33
|
-
)
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class TomteSampleSheetHeaders(StrEnum):
|
|
38
|
-
case_id: str = "case"
|
|
39
|
-
name: str = "sample"
|
|
40
|
-
fastq_1: str = "fastq_1"
|
|
41
|
-
fastq_2: str = "fastq_2"
|
|
42
|
-
strandedness: str = "strandedness"
|
|
43
|
-
|
|
44
|
-
@classmethod
|
|
45
|
-
def list(cls) -> list[str]:
|
|
46
|
-
return list(map(lambda header: header.value, cls))
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class TomteParameters(WorkflowParameters):
|
|
50
|
-
"""Model for Tomte parameters."""
|
|
51
|
-
|
|
52
|
-
gene_panel_clinical_filter: Path
|
|
53
|
-
tissue: str
|
|
54
|
-
genome: str = GenomeVersion.HG38
|
|
55
|
-
|
|
56
|
-
@field_validator("tissue", mode="before")
|
|
57
|
-
@classmethod
|
|
58
|
-
def restrict_tissue_values(cls, tissue: str | None) -> str:
|
|
59
|
-
if tissue:
|
|
60
|
-
return replace_non_alphanumeric(string=tissue)
|
|
61
|
-
else:
|
|
62
|
-
return SourceType.UNKNOWN
|
|
63
|
-
|
|
64
|
-
@field_validator("genome", mode="before")
|
|
65
|
-
@classmethod
|
|
66
|
-
def restrict_genome_values(cls, genome: str) -> str:
|
|
67
|
-
if genome == GenomeVersion.HG38:
|
|
68
|
-
return GenomeVersion.GRCh38.value
|
|
69
|
-
elif genome == GenomeVersion.HG19:
|
|
70
|
-
return GenomeVersion.GRCh37.value
|
|
71
2
|
|
|
72
3
|
|
|
73
4
|
class TomteQCMetrics(QCMetrics):
|