cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cg/__init__.py +1 -1
- cg/apps/housekeeper/hk.py +18 -1
- cg/apps/tb/api.py +42 -5
- cg/cli/transfer.py +13 -2
- cg/cli/upload/mutacc.py +16 -3
- cg/cli/upload/scout.py +2 -2
- cg/cli/upload/utils.py +10 -1
- cg/cli/workflow/balsamic/base.py +86 -172
- cg/cli/workflow/balsamic/options.py +3 -48
- cg/cli/workflow/balsamic/umi.py +210 -15
- cg/cli/workflow/microsalt/base.py +4 -2
- cg/cli/workflow/mip_dna/base.py +1 -1
- cg/cli/workflow/nallo/base.py +73 -23
- cg/cli/workflow/nf_analysis.py +5 -207
- cg/cli/workflow/raredisease/base.py +41 -54
- cg/cli/workflow/rnafusion/base.py +38 -8
- cg/cli/workflow/taxprofiler/base.py +31 -18
- cg/cli/workflow/tomte/base.py +83 -10
- cg/constants/constants.py +25 -30
- cg/constants/devices.py +6 -1
- cg/constants/gene_panel.py +3 -1
- cg/constants/housekeeper_tags.py +28 -28
- cg/constants/lims.py +4 -0
- cg/constants/nf_analysis.py +0 -1
- cg/constants/observations.py +21 -5
- cg/constants/orderforms.py +3 -3
- cg/constants/pacbio.py +1 -0
- cg/constants/priority.py +1 -1
- cg/constants/report.py +1 -0
- cg/constants/scout.py +12 -9
- cg/constants/sequencing.py +2 -2
- cg/constants/tb.py +5 -5
- cg/exc.py +27 -5
- cg/meta/compress/compress.py +7 -2
- cg/meta/delivery_report/balsamic.py +3 -1
- cg/meta/delivery_report/delivery_report_api.py +4 -3
- cg/meta/delivery_report/nallo.py +11 -11
- cg/meta/delivery_report/raredisease.py +7 -3
- cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
- cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
- cg/meta/observations/balsamic_observations_api.py +110 -14
- cg/meta/observations/mip_dna_observations_api.py +1 -1
- cg/meta/observations/nallo_observations_api.py +1 -1
- cg/meta/observations/observations_api.py +23 -32
- cg/meta/observations/raredisease_observations_api.py +1 -1
- cg/meta/tar/tar.py +5 -2
- cg/meta/transfer/lims.py +32 -3
- cg/meta/upload/balsamic/balsamic.py +1 -8
- cg/meta/upload/coverage.py +5 -5
- cg/meta/upload/raredisease/raredisease.py +3 -0
- cg/meta/upload/scout/hk_tags.py +1 -0
- cg/meta/upload/scout/nallo_config_builder.py +31 -7
- cg/meta/workflow/balsamic.py +70 -36
- cg/meta/workflow/fastq.py +8 -0
- cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
- cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
- cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
- cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
- cg/meta/workflow/nallo.py +21 -99
- cg/meta/workflow/nf_analysis.py +12 -263
- cg/meta/workflow/raredisease.py +3 -112
- cg/meta/workflow/rnafusion.py +2 -34
- cg/meta/workflow/taxprofiler.py +2 -38
- cg/meta/workflow/tomte.py +2 -42
- cg/models/balsamic/config.py +0 -24
- cg/models/balsamic/metrics.py +5 -3
- cg/models/cg_config.py +39 -16
- cg/models/deliverables/metric_deliverables.py +1 -1
- cg/models/delivery_report/metadata.py +2 -1
- cg/models/nallo/nallo.py +14 -64
- cg/models/nf_analysis.py +1 -41
- cg/models/raredisease/raredisease.py +1 -63
- cg/models/rnafusion/rnafusion.py +0 -26
- cg/models/scout/scout_load_config.py +5 -2
- cg/models/taxprofiler/taxprofiler.py +0 -42
- cg/models/tomte/tomte.py +0 -69
- cg/resources/nallo_bundle_filenames.yaml +292 -22
- cg/resources/raredisease_bundle_filenames.yaml +11 -1
- cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
- cg/server/admin.py +106 -25
- cg/server/app.py +15 -4
- cg/server/endpoints/sequencing_run/dtos.py +21 -3
- cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
- cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
- cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
- cg/services/analysis_starter/configurator/abstract_model.py +8 -0
- cg/services/analysis_starter/configurator/configurator.py +1 -1
- cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
- cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
- cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
- cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
- cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
- cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
- cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
- cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
- cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
- cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
- cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
- cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
- cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
- cg/services/analysis_starter/constants.py +2 -0
- cg/services/analysis_starter/factories/configurator_factory.py +131 -51
- cg/services/analysis_starter/factories/starter_factory.py +36 -7
- cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
- cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
- cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
- cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
- cg/services/analysis_starter/submitters/submitter.py +1 -1
- cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
- cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
- cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
- cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
- cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
- cg/services/analysis_starter/tracker/tracker.py +19 -15
- cg/services/deliver_files/factory.py +1 -1
- cg/services/delivery_message/messages/__init__.py +24 -14
- cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
- cg/services/delivery_message/utils.py +4 -40
- cg/services/illumina/backup/backup_service.py +29 -7
- cg/services/orders/validation/constants.py +3 -0
- cg/services/orders/validation/index_sequences.py +558 -0
- cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
- cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
- cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
- cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
- cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
- cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
- cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
- cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
- cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
- cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
- cg/store/crud/create.py +73 -42
- cg/store/crud/read.py +73 -7
- cg/store/crud/update.py +14 -3
- cg/store/models.py +98 -35
- cg/store/store.py +8 -1
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
- cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
- cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
- {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
cg/meta/workflow/nf_analysis.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import logging
|
|
3
|
-
import re
|
|
4
2
|
from datetime import datetime
|
|
5
3
|
from pathlib import Path
|
|
6
4
|
from typing import Any, Iterator, Type
|
|
@@ -17,16 +15,12 @@ from cg.constants.constants import (
|
|
|
17
15
|
MultiQC,
|
|
18
16
|
WorkflowManager,
|
|
19
17
|
)
|
|
20
|
-
from cg.constants.gene_panel import GenePanelGenomeBuild
|
|
21
|
-
from cg.constants.housekeeper_tags import AlignmentFileTag
|
|
22
18
|
from cg.constants.nextflow import NFX_WORK_DIR
|
|
23
19
|
from cg.constants.nf_analysis import NfTowerStatus
|
|
24
20
|
from cg.constants.tb import AnalysisStatus
|
|
25
21
|
from cg.exc import CgError, HousekeeperStoreError, MetricsQCError
|
|
26
22
|
from cg.io.controller import ReadFile, WriteFile
|
|
27
23
|
from cg.io.json import read_json
|
|
28
|
-
from cg.io.txt import concat_txt, write_txt
|
|
29
|
-
from cg.io.yaml import read_yaml, write_yaml_nextflow_style
|
|
30
24
|
from cg.meta.workflow.analysis import AnalysisAPI
|
|
31
25
|
from cg.meta.workflow.nf_handlers import NextflowHandler, NfTowerHandler
|
|
32
26
|
from cg.models.analysis import NextflowAnalysis
|
|
@@ -36,15 +30,9 @@ from cg.models.deliverables.metric_deliverables import (
|
|
|
36
30
|
MetricsDeliverablesCondition,
|
|
37
31
|
MultiqcDataJson,
|
|
38
32
|
)
|
|
39
|
-
from cg.models.
|
|
40
|
-
from cg.models.nf_analysis import (
|
|
41
|
-
FileDeliverable,
|
|
42
|
-
NfCommandArgs,
|
|
43
|
-
WorkflowDeliverables,
|
|
44
|
-
WorkflowParameters,
|
|
45
|
-
)
|
|
33
|
+
from cg.models.nf_analysis import FileDeliverable, NfCommandArgs, WorkflowDeliverables
|
|
46
34
|
from cg.models.qc_metrics import QCMetrics
|
|
47
|
-
from cg.store.models import Analysis, Case,
|
|
35
|
+
from cg.store.models import Analysis, Case, Sample
|
|
48
36
|
from cg.utils import Process
|
|
49
37
|
|
|
50
38
|
LOG = logging.getLogger(__name__)
|
|
@@ -90,27 +78,12 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
90
78
|
def process(self, process: Process):
|
|
91
79
|
self._process = process
|
|
92
80
|
|
|
93
|
-
@property
|
|
94
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
95
|
-
"""Headers for sample sheet."""
|
|
96
|
-
raise NotImplementedError
|
|
97
|
-
|
|
98
81
|
@property
|
|
99
82
|
def is_multiqc_pattern_search_exact(self) -> bool:
|
|
100
83
|
"""Return True if only exact pattern search is allowed to collect metrics information from MultiQC file.
|
|
101
84
|
If false, pattern must be present but does not need to be exact."""
|
|
102
85
|
return False
|
|
103
86
|
|
|
104
|
-
@property
|
|
105
|
-
def is_gene_panel_required(self) -> bool:
|
|
106
|
-
"""Return True if a gene panel needs to be created using the information in StatusDB and exporting it from Scout."""
|
|
107
|
-
return False
|
|
108
|
-
|
|
109
|
-
@property
|
|
110
|
-
def is_managed_variants_required(self) -> bool:
|
|
111
|
-
"""Return True if a managed variant export needs to be exported it from Scout."""
|
|
112
|
-
return False
|
|
113
|
-
|
|
114
87
|
def get_profile(self, profile: str | None = None) -> str:
|
|
115
88
|
"""Get NF profiles."""
|
|
116
89
|
return profile or self.profile
|
|
@@ -123,27 +96,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
123
96
|
"""Get workflow version from config."""
|
|
124
97
|
return self.revision
|
|
125
98
|
|
|
126
|
-
def get_built_workflow_parameters(
|
|
127
|
-
self, case_id: str, dry_run: bool = False
|
|
128
|
-
) -> WorkflowParameters:
|
|
129
|
-
"""Return workflow parameters."""
|
|
130
|
-
raise NotImplementedError
|
|
131
|
-
|
|
132
|
-
def get_nextflow_config_content(self, case_id: str) -> str:
|
|
133
|
-
"""Return nextflow config content."""
|
|
134
|
-
config_files_list: list[str] = [
|
|
135
|
-
self.platform,
|
|
136
|
-
self.workflow_config_path,
|
|
137
|
-
self.resources,
|
|
138
|
-
]
|
|
139
|
-
extra_parameters_str: list[str] = [
|
|
140
|
-
self.set_cluster_options(case_id=case_id),
|
|
141
|
-
]
|
|
142
|
-
return concat_txt(
|
|
143
|
-
file_paths=config_files_list,
|
|
144
|
-
str_content=extra_parameters_str,
|
|
145
|
-
)
|
|
146
|
-
|
|
147
99
|
def get_case_path(self, case_id: str) -> Path:
|
|
148
100
|
"""Path to case working directory."""
|
|
149
101
|
return Path(self.root_dir, case_id)
|
|
@@ -192,11 +144,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
192
144
|
FileExtensions.YAML
|
|
193
145
|
)
|
|
194
146
|
|
|
195
|
-
def create_case_directory(self, case_id: str, dry_run: bool = False) -> None:
|
|
196
|
-
"""Create case directory."""
|
|
197
|
-
if not dry_run:
|
|
198
|
-
Path(self.get_case_path(case_id=case_id)).mkdir(parents=True, exist_ok=True)
|
|
199
|
-
|
|
200
147
|
def get_log_path(self, case_id: str, workflow: str) -> Path:
|
|
201
148
|
"""Path to NF log."""
|
|
202
149
|
launch_time: str = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
|
|
@@ -211,68 +158,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
211
158
|
return work_dir.absolute()
|
|
212
159
|
return Path(self.get_case_path(case_id), NFX_WORK_DIR)
|
|
213
160
|
|
|
214
|
-
def get_gene_panels_path(self, case_id: str) -> Path:
|
|
215
|
-
"""Path to gene panels bed file exported from Scout."""
|
|
216
|
-
return Path(self.get_case_path(case_id=case_id), "gene_panels").with_suffix(
|
|
217
|
-
FileExtensions.BED
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
def set_cluster_options(self, case_id: str) -> str:
|
|
221
|
-
return f'process.clusterOptions = "-A {self.account} --qos={self.get_slurm_qos_for_case(case_id=case_id)}"\n'
|
|
222
|
-
|
|
223
|
-
@staticmethod
|
|
224
|
-
def extract_read_files(
|
|
225
|
-
metadata: list[FastqFileMeta], forward_read: bool = False, reverse_read: bool = False
|
|
226
|
-
) -> list[str]:
|
|
227
|
-
"""Extract a list of fastq file paths for either forward or reverse reads."""
|
|
228
|
-
if forward_read and not reverse_read:
|
|
229
|
-
read_direction = 1
|
|
230
|
-
elif reverse_read and not forward_read:
|
|
231
|
-
read_direction = 2
|
|
232
|
-
else:
|
|
233
|
-
raise ValueError("Either forward or reverse needs to be specified")
|
|
234
|
-
sorted_metadata: list = sorted(metadata, key=lambda k: k.path)
|
|
235
|
-
return [
|
|
236
|
-
fastq_file.path
|
|
237
|
-
for fastq_file in sorted_metadata
|
|
238
|
-
if fastq_file.read_direction == read_direction
|
|
239
|
-
]
|
|
240
|
-
|
|
241
|
-
def get_paired_read_paths(self, sample: Sample) -> tuple[list[str], list[str]]:
|
|
242
|
-
"""Returns a tuple of paired fastq file paths for the forward and reverse read."""
|
|
243
|
-
sample_metadata: list[FastqFileMeta] = self.gather_file_metadata_for_sample(sample=sample)
|
|
244
|
-
fastq_forward_read_paths: list[str] = self.extract_read_files(
|
|
245
|
-
metadata=sample_metadata, forward_read=True
|
|
246
|
-
)
|
|
247
|
-
fastq_reverse_read_paths: list[str] = self.extract_read_files(
|
|
248
|
-
metadata=sample_metadata, reverse_read=True
|
|
249
|
-
)
|
|
250
|
-
return fastq_forward_read_paths, fastq_reverse_read_paths
|
|
251
|
-
|
|
252
|
-
def get_bam_read_file_paths(self, sample: Sample) -> list[Path]:
|
|
253
|
-
"""Gather BAM file path for a sample based on the BAM tag."""
|
|
254
|
-
return [
|
|
255
|
-
Path(hk_file.full_path)
|
|
256
|
-
for hk_file in self.housekeeper_api.files(
|
|
257
|
-
bundle=sample.internal_id, tags={AlignmentFileTag.BAM}
|
|
258
|
-
)
|
|
259
|
-
]
|
|
260
|
-
|
|
261
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
262
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
263
|
-
raise NotImplementedError
|
|
264
|
-
|
|
265
|
-
def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
|
|
266
|
-
"""Return formatted information required to build a sample sheet for a case.
|
|
267
|
-
This contains information for all samples linked to the case."""
|
|
268
|
-
sample_sheet_content: list = []
|
|
269
|
-
case: Case = self.get_validated_case(case_id)
|
|
270
|
-
LOG.info(f"Samples linked to case {case_id}: {len(case.links)}")
|
|
271
|
-
LOG.debug("Getting sample sheet information")
|
|
272
|
-
for link in case.links:
|
|
273
|
-
sample_sheet_content.extend(self.get_sample_sheet_content_per_sample(case_sample=link))
|
|
274
|
-
return sample_sheet_content
|
|
275
|
-
|
|
276
161
|
def verify_sample_sheet_exists(self, case_id: str, dry_run: bool = False) -> None:
|
|
277
162
|
"""Raise an error if sample sheet file is not found."""
|
|
278
163
|
if not dry_run and not Path(self.get_sample_sheet_path(case_id=case_id)).exists():
|
|
@@ -283,33 +168,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
283
168
|
if not Path(self.get_deliverables_file_path(case_id=case_id)).exists():
|
|
284
169
|
raise CgError(f"No deliverables file found for case {case_id}")
|
|
285
170
|
|
|
286
|
-
def write_params_file(self, case_id: str, replaced_workflow_parameters: dict = None) -> None:
|
|
287
|
-
"""Write params-file for analysis."""
|
|
288
|
-
LOG.debug("Writing parameters file")
|
|
289
|
-
if replaced_workflow_parameters:
|
|
290
|
-
write_yaml_nextflow_style(
|
|
291
|
-
content=replaced_workflow_parameters,
|
|
292
|
-
file_path=self.get_params_file_path(case_id=case_id),
|
|
293
|
-
)
|
|
294
|
-
else:
|
|
295
|
-
self.get_params_file_path(case_id=case_id).touch()
|
|
296
|
-
|
|
297
|
-
@staticmethod
|
|
298
|
-
def write_sample_sheet(
|
|
299
|
-
content: list[list[Any]],
|
|
300
|
-
file_path: Path,
|
|
301
|
-
header: list[str],
|
|
302
|
-
) -> None:
|
|
303
|
-
"""Write sample sheet CSV file."""
|
|
304
|
-
LOG.debug("Writing sample sheet")
|
|
305
|
-
if header:
|
|
306
|
-
content.insert(0, header)
|
|
307
|
-
WriteFile.write_file_from_content(
|
|
308
|
-
content=content,
|
|
309
|
-
file_format=FileFormat.CSV,
|
|
310
|
-
file_path=file_path,
|
|
311
|
-
)
|
|
312
|
-
|
|
313
171
|
@staticmethod
|
|
314
172
|
def write_deliverables_file(
|
|
315
173
|
deliverables_content: dict, file_path: Path, file_format=FileFormat.YAML
|
|
@@ -329,105 +187,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
329
187
|
file_path=config_path,
|
|
330
188
|
)
|
|
331
189
|
|
|
332
|
-
def create_sample_sheet(self, case_id: str, dry_run: bool) -> None:
|
|
333
|
-
"""Create sample sheet for a case."""
|
|
334
|
-
sample_sheet_content: list[list[Any]] = self.get_sample_sheet_content(case_id=case_id)
|
|
335
|
-
if not dry_run:
|
|
336
|
-
self.write_sample_sheet(
|
|
337
|
-
content=sample_sheet_content,
|
|
338
|
-
file_path=self.get_sample_sheet_path(case_id=case_id),
|
|
339
|
-
header=self.sample_sheet_headers,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
def create_params_file(self, case_id: str, dry_run: bool) -> None:
|
|
343
|
-
"""Create parameters file for a case."""
|
|
344
|
-
LOG.debug("Getting parameters information built on-the-fly")
|
|
345
|
-
built_workflow_parameters: dict | None = self.get_built_workflow_parameters(
|
|
346
|
-
case_id=case_id, dry_run=dry_run
|
|
347
|
-
).model_dump()
|
|
348
|
-
LOG.debug("Adding parameters from the pipeline config file if it exist")
|
|
349
|
-
|
|
350
|
-
yaml_params: dict = (
|
|
351
|
-
read_yaml(self.params) if hasattr(self, "params") and self.params else {}
|
|
352
|
-
)
|
|
353
|
-
|
|
354
|
-
# Check for duplicate keys
|
|
355
|
-
duplicate_keys = set(built_workflow_parameters.keys()) & set(yaml_params.keys())
|
|
356
|
-
if duplicate_keys:
|
|
357
|
-
raise ValueError(f"Duplicate parameter keys found: {duplicate_keys}")
|
|
358
|
-
workflow_parameters: dict = built_workflow_parameters | (yaml_params)
|
|
359
|
-
replaced_workflow_parameters: dict = self.replace_values_in_params_file(
|
|
360
|
-
workflow_parameters=workflow_parameters
|
|
361
|
-
)
|
|
362
|
-
if not dry_run:
|
|
363
|
-
self.write_params_file(
|
|
364
|
-
case_id=case_id, replaced_workflow_parameters=replaced_workflow_parameters
|
|
365
|
-
)
|
|
366
|
-
|
|
367
|
-
def replace_values_in_params_file(self, workflow_parameters: dict) -> dict:
|
|
368
|
-
replaced_workflow_parameters = copy.deepcopy(workflow_parameters)
|
|
369
|
-
"""Iterate through the dictionary until all placeholders are replaced with the corresponding value from the dictionary"""
|
|
370
|
-
while True:
|
|
371
|
-
resolved: bool = True
|
|
372
|
-
for key, value in replaced_workflow_parameters.items():
|
|
373
|
-
new_value: str | int = self.replace_params_placeholders(value, workflow_parameters)
|
|
374
|
-
if new_value != value:
|
|
375
|
-
resolved = False
|
|
376
|
-
replaced_workflow_parameters[key] = new_value
|
|
377
|
-
if resolved:
|
|
378
|
-
break
|
|
379
|
-
return replaced_workflow_parameters
|
|
380
|
-
|
|
381
|
-
def replace_params_placeholders(self, value: str | int, workflow_parameters: dict) -> str:
|
|
382
|
-
"""Replace values marked as placeholders with values from the given dictionary"""
|
|
383
|
-
if isinstance(value, str):
|
|
384
|
-
placeholders: list[str] = re.findall(r"{{\s*([^{}\s]+)\s*}}", value)
|
|
385
|
-
for placeholder in placeholders:
|
|
386
|
-
if placeholder in workflow_parameters:
|
|
387
|
-
value = value.replace(
|
|
388
|
-
f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder])
|
|
389
|
-
)
|
|
390
|
-
return value
|
|
391
|
-
|
|
392
|
-
def create_nextflow_config(self, case_id: str, dry_run: bool = False) -> None:
|
|
393
|
-
"""Create nextflow config file."""
|
|
394
|
-
if content := self.get_nextflow_config_content(case_id=case_id):
|
|
395
|
-
LOG.debug("Writing nextflow config file")
|
|
396
|
-
if not dry_run:
|
|
397
|
-
write_txt(
|
|
398
|
-
content=content,
|
|
399
|
-
file_path=self.get_nextflow_config_path(case_id=case_id),
|
|
400
|
-
)
|
|
401
|
-
|
|
402
|
-
def create_gene_panel(self, case_id: str, dry_run: bool) -> None:
|
|
403
|
-
"""Create and write an aggregated gene panel file exported from Scout."""
|
|
404
|
-
LOG.info("Creating gene panel file")
|
|
405
|
-
bed_lines: list[str] = self.get_gene_panel(case_id=case_id, dry_run=dry_run)
|
|
406
|
-
if dry_run:
|
|
407
|
-
bed_lines: str = "\n".join(bed_lines)
|
|
408
|
-
LOG.debug(f"{bed_lines}")
|
|
409
|
-
return
|
|
410
|
-
self.write_panel(case_id=case_id, content=bed_lines)
|
|
411
|
-
|
|
412
|
-
def config_case(self, case_id: str, dry_run: bool):
|
|
413
|
-
"""Create directory and config files required by a workflow for a case."""
|
|
414
|
-
if dry_run:
|
|
415
|
-
LOG.info("Dry run: Config files will not be written")
|
|
416
|
-
self.status_db.verify_case_exists(case_internal_id=case_id)
|
|
417
|
-
self.create_case_directory(case_id=case_id, dry_run=dry_run)
|
|
418
|
-
self.create_sample_sheet(case_id=case_id, dry_run=dry_run)
|
|
419
|
-
self.create_params_file(case_id=case_id, dry_run=dry_run)
|
|
420
|
-
self.create_nextflow_config(case_id=case_id, dry_run=dry_run)
|
|
421
|
-
if self.is_gene_panel_required:
|
|
422
|
-
self.create_gene_panel(case_id=case_id, dry_run=dry_run)
|
|
423
|
-
if self.is_managed_variants_required:
|
|
424
|
-
vcf_lines: list[str] = self.get_managed_variants(case_id=case_id)
|
|
425
|
-
if dry_run:
|
|
426
|
-
for line in vcf_lines:
|
|
427
|
-
LOG.debug(line)
|
|
428
|
-
else:
|
|
429
|
-
self.write_managed_variants(case_id=case_id, content=vcf_lines)
|
|
430
|
-
|
|
431
190
|
def _run_analysis_with_nextflow(
|
|
432
191
|
self, case_id: str, command_args: NfCommandArgs, dry_run: bool
|
|
433
192
|
) -> None:
|
|
@@ -733,7 +492,9 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
733
492
|
) -> list[MetricsBase]:
|
|
734
493
|
"""Parse a MultiqcDataJson and returns a list of metrics."""
|
|
735
494
|
metrics: list[MetricsBase] = []
|
|
736
|
-
|
|
495
|
+
list_of_metric_dicts: list[dict[str, Any]] = self._get_list_of_metric_dicts(multiqc_json)
|
|
496
|
+
|
|
497
|
+
for section in list_of_metric_dicts:
|
|
737
498
|
for subsection, metrics_dict in section.items():
|
|
738
499
|
if self._is_pattern_found(
|
|
739
500
|
pattern=search_pattern, text=subsection, exact_match=exact_match
|
|
@@ -745,6 +506,12 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
745
506
|
metrics.append(metric)
|
|
746
507
|
return metrics
|
|
747
508
|
|
|
509
|
+
def _get_list_of_metric_dicts(self, multiqc_json: MultiqcDataJson) -> list[dict[str, Any]]:
|
|
510
|
+
if metric_dicts := multiqc_json.report_general_stats_data:
|
|
511
|
+
return metric_dicts
|
|
512
|
+
else:
|
|
513
|
+
raise ValueError("No report_general_stats_data found in MultiqcDataJson")
|
|
514
|
+
|
|
748
515
|
def get_multiqc_metric(
|
|
749
516
|
self, metric_name: str, metric_value: str | int | float, metric_id: str
|
|
750
517
|
) -> MetricsBase:
|
|
@@ -882,7 +649,7 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
882
649
|
)
|
|
883
650
|
if not is_latest_analysis_qc and not is_latest_analysis_completed and not force:
|
|
884
651
|
LOG.error(
|
|
885
|
-
"Case not stored. Trailblazer status must be either QC or
|
|
652
|
+
"Case not stored. Trailblazer status must be either QC or COMPLETED to be able to store"
|
|
886
653
|
)
|
|
887
654
|
raise ValueError
|
|
888
655
|
|
|
@@ -912,24 +679,6 @@ class NfAnalysisAPI(AnalysisAPI):
|
|
|
912
679
|
def get_genome_build(self, case_id: str) -> GenomeVersion:
|
|
913
680
|
raise NotImplementedError
|
|
914
681
|
|
|
915
|
-
def get_gene_panel_genome_build(self, case_id: str) -> GenePanelGenomeBuild:
|
|
916
|
-
"""Return build version of the gene panel for a case."""
|
|
917
|
-
reference_genome: GenomeVersion = self.get_genome_build(case_id=case_id)
|
|
918
|
-
try:
|
|
919
|
-
return getattr(GenePanelGenomeBuild, reference_genome)
|
|
920
|
-
except AttributeError as error:
|
|
921
|
-
raise CgError(
|
|
922
|
-
f"Reference {reference_genome} has no associated genome build for panels: {error}"
|
|
923
|
-
) from error
|
|
924
|
-
|
|
925
|
-
def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
|
|
926
|
-
"""Create and return the aggregated gene panel file."""
|
|
927
|
-
return self._get_gene_panel(
|
|
928
|
-
case_id=case_id,
|
|
929
|
-
genome_build=self.get_gene_panel_genome_build(case_id=case_id),
|
|
930
|
-
dry_run=dry_run,
|
|
931
|
-
)
|
|
932
|
-
|
|
933
682
|
def parse_analysis(
|
|
934
683
|
self, qc_metrics_raw: list[MetricsBase], qc_metrics_model: Type[QCMetrics], **kwargs
|
|
935
684
|
) -> NextflowAnalysis:
|
cg/meta/workflow/raredisease.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Module for Raredisease Analysis API."""
|
|
2
2
|
|
|
3
|
-
import csv
|
|
4
3
|
import logging
|
|
5
4
|
from itertools import permutations
|
|
6
5
|
from pathlib import Path
|
|
@@ -24,22 +23,15 @@ from cg.constants.nf_analysis import (
|
|
|
24
23
|
RAREDISEASE_METRIC_CONDITIONS_WGS,
|
|
25
24
|
RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
|
|
26
25
|
)
|
|
27
|
-
from cg.constants.scout import RAREDISEASE_CASE_TAGS
|
|
26
|
+
from cg.constants.scout import RAREDISEASE_CASE_TAGS
|
|
28
27
|
from cg.constants.sequencing import SeqLibraryPrepCategory
|
|
29
|
-
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
|
|
30
|
-
from cg.constants.tb import AnalysisType
|
|
31
28
|
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
32
29
|
from cg.models.analysis import NextflowAnalysis
|
|
33
30
|
from cg.models.cg_config import CGConfig
|
|
34
31
|
from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
|
|
35
|
-
from cg.models.raredisease.raredisease import
|
|
36
|
-
RarediseaseParameters,
|
|
37
|
-
RarediseaseQCMetrics,
|
|
38
|
-
RarediseaseSampleSheetEntry,
|
|
39
|
-
RarediseaseSampleSheetHeaders,
|
|
40
|
-
)
|
|
32
|
+
from cg.models.raredisease.raredisease import RarediseaseQCMetrics
|
|
41
33
|
from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
|
|
42
|
-
from cg.store.models import
|
|
34
|
+
from cg.store.models import Sample
|
|
43
35
|
|
|
44
36
|
LOG = logging.getLogger(__name__)
|
|
45
37
|
|
|
@@ -71,95 +63,11 @@ class RarediseaseAnalysisAPI(NfAnalysisAPI):
|
|
|
71
63
|
self.revision: str = config.raredisease.revision
|
|
72
64
|
self.nextflow_binary_path: str = config.raredisease.binary_path
|
|
73
65
|
|
|
74
|
-
@property
|
|
75
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
76
|
-
"""Headers for sample sheet."""
|
|
77
|
-
return RarediseaseSampleSheetHeaders.list()
|
|
78
|
-
|
|
79
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
80
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
81
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
82
|
-
sample=case_sample.sample
|
|
83
|
-
)
|
|
84
|
-
sample_sheet_entry = RarediseaseSampleSheetEntry(
|
|
85
|
-
name=case_sample.sample.internal_id,
|
|
86
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
87
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
88
|
-
sex=self.get_sex_code(case_sample.sample.sex),
|
|
89
|
-
phenotype=self.get_phenotype_code(case_sample.status),
|
|
90
|
-
paternal_id=case_sample.get_paternal_sample_id,
|
|
91
|
-
maternal_id=case_sample.get_maternal_sample_id,
|
|
92
|
-
case_id=case_sample.case.internal_id,
|
|
93
|
-
)
|
|
94
|
-
return sample_sheet_entry.reformat_sample_content
|
|
95
|
-
|
|
96
|
-
@property
|
|
97
|
-
def is_gene_panel_required(self) -> bool:
|
|
98
|
-
"""Return True if a gene panel needs to be created using the information in StatusDB and exporting it from Scout."""
|
|
99
|
-
return True
|
|
100
|
-
|
|
101
|
-
def get_built_workflow_parameters(
|
|
102
|
-
self, case_id: str, dry_run: bool = False
|
|
103
|
-
) -> RarediseaseParameters:
|
|
104
|
-
"""Return parameters."""
|
|
105
|
-
analysis_type: AnalysisType = self.get_data_analysis_type(case_id=case_id)
|
|
106
|
-
target_bed_file: str = self.get_target_bed_from_lims(case_id=case_id) or ""
|
|
107
|
-
outdir = self.get_case_path(case_id=case_id)
|
|
108
|
-
sample_id_map: Path = self.get_sample_name_mapping_csv_path(case=case_id)
|
|
109
|
-
# Build the sample_id_map path
|
|
110
|
-
if not dry_run:
|
|
111
|
-
self.export_customer_internal_mapping_csv(case=case_id, output_path=sample_id_map)
|
|
112
|
-
|
|
113
|
-
return RarediseaseParameters(
|
|
114
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
115
|
-
outdir=outdir,
|
|
116
|
-
analysis_type=analysis_type,
|
|
117
|
-
target_bed_file=target_bed_file,
|
|
118
|
-
save_mapped_as_cram=True,
|
|
119
|
-
vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}",
|
|
120
|
-
vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}",
|
|
121
|
-
sample_id_map=sample_id_map,
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
@staticmethod
|
|
125
|
-
def get_phenotype_code(phenotype: str) -> int:
|
|
126
|
-
"""Return Raredisease phenotype code."""
|
|
127
|
-
LOG.debug("Translate phenotype to integer code")
|
|
128
|
-
try:
|
|
129
|
-
code = PlinkPhenotypeStatus[phenotype.upper()]
|
|
130
|
-
except KeyError:
|
|
131
|
-
raise ValueError(f"{phenotype} is not a valid phenotype")
|
|
132
|
-
return code
|
|
133
|
-
|
|
134
|
-
@staticmethod
|
|
135
|
-
def get_sex_code(sex: str) -> int:
|
|
136
|
-
"""Return Raredisease sex code."""
|
|
137
|
-
LOG.debug("Translate sex to integer code")
|
|
138
|
-
try:
|
|
139
|
-
code = PlinkSex[sex.upper()]
|
|
140
|
-
except KeyError:
|
|
141
|
-
raise ValueError(f"{sex} is not a valid sex")
|
|
142
|
-
return code
|
|
143
|
-
|
|
144
66
|
@staticmethod
|
|
145
67
|
def get_bundle_filenames_path() -> Path:
|
|
146
68
|
"""Return Raredisease bundle filenames path."""
|
|
147
69
|
return RAREDISEASE_BUNDLE_FILENAMES_PATH
|
|
148
70
|
|
|
149
|
-
@property
|
|
150
|
-
def is_managed_variants_required(self) -> bool:
|
|
151
|
-
"""Return True if a managed variants needs to be exported from Scout."""
|
|
152
|
-
return True
|
|
153
|
-
|
|
154
|
-
def write_managed_variants(self, case_id: str, content: list[str]) -> None:
|
|
155
|
-
self._write_managed_variants(out_dir=Path(self.root, case_id), content=content)
|
|
156
|
-
|
|
157
|
-
def get_managed_variants(self, case_id: str) -> list[str]:
|
|
158
|
-
"""Create and return the managed variants."""
|
|
159
|
-
return self._get_managed_variants(
|
|
160
|
-
genome_build=self.get_gene_panel_genome_build(case_id=case_id)
|
|
161
|
-
)
|
|
162
|
-
|
|
163
71
|
def get_workflow_metrics(self, sample_id: str) -> dict:
|
|
164
72
|
"""Return Raredisease workflow metric conditions for a sample."""
|
|
165
73
|
sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id)
|
|
@@ -282,20 +190,3 @@ class RarediseaseAnalysisAPI(NfAnalysisAPI):
|
|
|
282
190
|
return super().parse_analysis(
|
|
283
191
|
qc_metrics_raw=qc_metrics_raw, qc_metrics_model=qc_metrics_model, **kwargs
|
|
284
192
|
)
|
|
285
|
-
|
|
286
|
-
def get_sample_name_mapping_csv_path(self, case: str) -> Path:
|
|
287
|
-
"""Return the path to the CSV file containing the mapping between sample names and internal ids."""
|
|
288
|
-
return Path(self.get_case_path(case), f"{case}_customer_internal_mapping.csv")
|
|
289
|
-
|
|
290
|
-
def export_customer_internal_mapping_csv(self, case: str, output_path: Path):
|
|
291
|
-
"""Export a CSV file mapping customer sample names to internal sample IDs."""
|
|
292
|
-
LOG.info(f"Exporting customer internal mapping CSV for case {case} to {output_path}")
|
|
293
|
-
with output_path.open("w", newline="") as csvfile:
|
|
294
|
-
writer = csv.writer(csvfile)
|
|
295
|
-
writer.writerow(
|
|
296
|
-
["customer_id", "internal_id"]
|
|
297
|
-
) # this is the header expected by the pipeline
|
|
298
|
-
for link in self.status_db.get_case_by_internal_id(case).links:
|
|
299
|
-
customer_sample_name = link.sample.name
|
|
300
|
-
internal_id = link.sample.internal_id
|
|
301
|
-
writer.writerow([customer_sample_name, internal_id])
|
cg/meta/workflow/rnafusion.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
|
-
from cg.constants.constants import GenomeVersion
|
|
7
|
+
from cg.constants.constants import GenomeVersion
|
|
8
8
|
from cg.constants.nf_analysis import RNAFUSION_METRIC_CONDITIONS
|
|
9
9
|
from cg.constants.scout import RNAFUSION_CASE_TAGS
|
|
10
10
|
from cg.exc import MissingMetrics
|
|
@@ -12,13 +12,8 @@ from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
|
12
12
|
from cg.models.analysis import NextflowAnalysis
|
|
13
13
|
from cg.models.cg_config import CGConfig
|
|
14
14
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
15
|
-
from cg.models.rnafusion.rnafusion import
|
|
16
|
-
RnafusionParameters,
|
|
17
|
-
RnafusionQCMetrics,
|
|
18
|
-
RnafusionSampleSheetEntry,
|
|
19
|
-
)
|
|
15
|
+
from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
|
|
20
16
|
from cg.resources import RNAFUSION_BUNDLE_FILENAMES_PATH
|
|
21
|
-
from cg.store.models import CaseSample
|
|
22
17
|
|
|
23
18
|
LOG = logging.getLogger(__name__)
|
|
24
19
|
|
|
@@ -50,11 +45,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
|
|
|
50
45
|
self.revision: str = config.rnafusion.revision
|
|
51
46
|
self.nextflow_binary_path: str = config.rnafusion.binary_path
|
|
52
47
|
|
|
53
|
-
@property
|
|
54
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
55
|
-
"""Headers for sample sheet."""
|
|
56
|
-
return RnafusionSampleSheetEntry.headers()
|
|
57
|
-
|
|
58
48
|
@property
|
|
59
49
|
def is_multiple_samples_allowed(self) -> bool:
|
|
60
50
|
"""Return whether the analysis supports multiple samples to be linked to the case."""
|
|
@@ -69,28 +59,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
|
|
|
69
59
|
"""Return Rnafusion bundle filenames path."""
|
|
70
60
|
return RNAFUSION_BUNDLE_FILENAMES_PATH
|
|
71
61
|
|
|
72
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
73
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
74
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
75
|
-
sample=case_sample.sample
|
|
76
|
-
)
|
|
77
|
-
sample_sheet_entry = RnafusionSampleSheetEntry(
|
|
78
|
-
name=case_sample.sample.internal_id,
|
|
79
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
80
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
81
|
-
strandedness=Strandedness.REVERSE,
|
|
82
|
-
)
|
|
83
|
-
return sample_sheet_entry.reformat_sample_content()
|
|
84
|
-
|
|
85
|
-
def get_built_workflow_parameters(
|
|
86
|
-
self, case_id: str, dry_run: bool = False
|
|
87
|
-
) -> RnafusionParameters:
|
|
88
|
-
"""Get Rnafusion parameters."""
|
|
89
|
-
return RnafusionParameters(
|
|
90
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
91
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
92
|
-
)
|
|
93
|
-
|
|
94
62
|
@staticmethod
|
|
95
63
|
def ensure_mandatory_metrics_present(metrics: list[MetricsBase]) -> None:
|
|
96
64
|
"""Check that all mandatory metrics are present.
|
cg/meta/workflow/taxprofiler.py
CHANGED
|
@@ -5,19 +5,13 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from cg.constants import Workflow
|
|
7
7
|
from cg.constants.constants import GenomeVersion
|
|
8
|
-
from cg.constants.sequencing import SequencingPlatform
|
|
9
|
-
from cg.constants.symbols import EMPTY_STRING
|
|
10
8
|
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
|
|
11
9
|
from cg.models.analysis import NextflowAnalysis
|
|
12
10
|
from cg.models.cg_config import CGConfig
|
|
13
11
|
from cg.models.deliverables.metric_deliverables import MetricsBase
|
|
14
|
-
from cg.models.taxprofiler.taxprofiler import
|
|
15
|
-
TaxprofilerParameters,
|
|
16
|
-
TaxprofilerQCMetrics,
|
|
17
|
-
TaxprofilerSampleSheetEntry,
|
|
18
|
-
)
|
|
12
|
+
from cg.models.taxprofiler.taxprofiler import TaxprofilerQCMetrics
|
|
19
13
|
from cg.resources import TAXPROFILER_BUNDLE_FILENAMES_PATH
|
|
20
|
-
from cg.store.models import
|
|
14
|
+
from cg.store.models import Sample
|
|
21
15
|
|
|
22
16
|
LOG = logging.getLogger(__name__)
|
|
23
17
|
|
|
@@ -49,11 +43,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
|
|
|
49
43
|
self.nextflow_binary_path: str = config.taxprofiler.binary_path
|
|
50
44
|
self.compute_env_base: str = config.taxprofiler.compute_env
|
|
51
45
|
|
|
52
|
-
@property
|
|
53
|
-
def sample_sheet_headers(self) -> list[str]:
|
|
54
|
-
"""Headers for sample sheet."""
|
|
55
|
-
return TaxprofilerSampleSheetEntry.headers()
|
|
56
|
-
|
|
57
46
|
@property
|
|
58
47
|
def is_multiqc_pattern_search_exact(self) -> bool:
|
|
59
48
|
"""Only exact pattern search is allowed to collect metrics information from multiqc file."""
|
|
@@ -64,31 +53,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
|
|
|
64
53
|
"""Return Taxprofiler bundle filenames path."""
|
|
65
54
|
return TAXPROFILER_BUNDLE_FILENAMES_PATH
|
|
66
55
|
|
|
67
|
-
def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
|
|
68
|
-
"""Collect and format information required to build a sample sheet for a single sample."""
|
|
69
|
-
sample_name: str = case_sample.sample.name
|
|
70
|
-
fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
|
|
71
|
-
sample=case_sample.sample
|
|
72
|
-
)
|
|
73
|
-
sample_sheet_entry = TaxprofilerSampleSheetEntry(
|
|
74
|
-
name=sample_name,
|
|
75
|
-
run_accession=sample_name,
|
|
76
|
-
instrument_platform=SequencingPlatform.ILLUMINA,
|
|
77
|
-
fastq_forward_read_paths=fastq_forward_read_paths,
|
|
78
|
-
fastq_reverse_read_paths=fastq_reverse_read_paths,
|
|
79
|
-
fasta=EMPTY_STRING,
|
|
80
|
-
)
|
|
81
|
-
return sample_sheet_entry.reformat_sample_content()
|
|
82
|
-
|
|
83
|
-
def get_built_workflow_parameters(
|
|
84
|
-
self, case_id: str, dry_run: bool = False
|
|
85
|
-
) -> TaxprofilerParameters:
|
|
86
|
-
"""Return Taxprofiler parameters."""
|
|
87
|
-
return TaxprofilerParameters(
|
|
88
|
-
input=self.get_sample_sheet_path(case_id=case_id),
|
|
89
|
-
outdir=self.get_case_path(case_id=case_id),
|
|
90
|
-
)
|
|
91
|
-
|
|
92
56
|
def get_multiqc_search_patterns(self, case_id: str) -> dict:
|
|
93
57
|
"""Return search patterns for MultiQC for Taxprofiler."""
|
|
94
58
|
samples: list[Sample] = self.status_db.get_samples_by_case_id(case_id=case_id)
|