cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. cg/__init__.py +1 -1
  2. cg/apps/housekeeper/hk.py +18 -1
  3. cg/apps/tb/api.py +42 -5
  4. cg/cli/transfer.py +13 -2
  5. cg/cli/upload/mutacc.py +16 -3
  6. cg/cli/upload/scout.py +2 -2
  7. cg/cli/upload/utils.py +10 -1
  8. cg/cli/workflow/balsamic/base.py +86 -172
  9. cg/cli/workflow/balsamic/options.py +3 -48
  10. cg/cli/workflow/balsamic/umi.py +210 -15
  11. cg/cli/workflow/microsalt/base.py +4 -2
  12. cg/cli/workflow/mip_dna/base.py +1 -1
  13. cg/cli/workflow/nallo/base.py +73 -23
  14. cg/cli/workflow/nf_analysis.py +5 -207
  15. cg/cli/workflow/raredisease/base.py +41 -54
  16. cg/cli/workflow/rnafusion/base.py +38 -8
  17. cg/cli/workflow/taxprofiler/base.py +31 -18
  18. cg/cli/workflow/tomte/base.py +83 -10
  19. cg/constants/constants.py +25 -30
  20. cg/constants/devices.py +6 -1
  21. cg/constants/gene_panel.py +3 -1
  22. cg/constants/housekeeper_tags.py +28 -28
  23. cg/constants/lims.py +4 -0
  24. cg/constants/nf_analysis.py +0 -1
  25. cg/constants/observations.py +21 -5
  26. cg/constants/orderforms.py +3 -3
  27. cg/constants/pacbio.py +1 -0
  28. cg/constants/priority.py +1 -1
  29. cg/constants/report.py +1 -0
  30. cg/constants/scout.py +12 -9
  31. cg/constants/sequencing.py +2 -2
  32. cg/constants/tb.py +5 -5
  33. cg/exc.py +27 -5
  34. cg/meta/compress/compress.py +7 -2
  35. cg/meta/delivery_report/balsamic.py +3 -1
  36. cg/meta/delivery_report/delivery_report_api.py +4 -3
  37. cg/meta/delivery_report/nallo.py +11 -11
  38. cg/meta/delivery_report/raredisease.py +7 -3
  39. cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
  40. cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
  41. cg/meta/observations/balsamic_observations_api.py +110 -14
  42. cg/meta/observations/mip_dna_observations_api.py +1 -1
  43. cg/meta/observations/nallo_observations_api.py +1 -1
  44. cg/meta/observations/observations_api.py +23 -32
  45. cg/meta/observations/raredisease_observations_api.py +1 -1
  46. cg/meta/tar/tar.py +5 -2
  47. cg/meta/transfer/lims.py +32 -3
  48. cg/meta/upload/balsamic/balsamic.py +1 -8
  49. cg/meta/upload/coverage.py +5 -5
  50. cg/meta/upload/raredisease/raredisease.py +3 -0
  51. cg/meta/upload/scout/hk_tags.py +1 -0
  52. cg/meta/upload/scout/nallo_config_builder.py +31 -7
  53. cg/meta/workflow/balsamic.py +70 -36
  54. cg/meta/workflow/fastq.py +8 -0
  55. cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
  56. cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
  57. cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
  58. cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
  59. cg/meta/workflow/nallo.py +21 -99
  60. cg/meta/workflow/nf_analysis.py +12 -263
  61. cg/meta/workflow/raredisease.py +3 -112
  62. cg/meta/workflow/rnafusion.py +2 -34
  63. cg/meta/workflow/taxprofiler.py +2 -38
  64. cg/meta/workflow/tomte.py +2 -42
  65. cg/models/balsamic/config.py +0 -24
  66. cg/models/balsamic/metrics.py +5 -3
  67. cg/models/cg_config.py +39 -16
  68. cg/models/deliverables/metric_deliverables.py +1 -1
  69. cg/models/delivery_report/metadata.py +2 -1
  70. cg/models/nallo/nallo.py +14 -64
  71. cg/models/nf_analysis.py +1 -41
  72. cg/models/raredisease/raredisease.py +1 -63
  73. cg/models/rnafusion/rnafusion.py +0 -26
  74. cg/models/scout/scout_load_config.py +5 -2
  75. cg/models/taxprofiler/taxprofiler.py +0 -42
  76. cg/models/tomte/tomte.py +0 -69
  77. cg/resources/nallo_bundle_filenames.yaml +292 -22
  78. cg/resources/raredisease_bundle_filenames.yaml +11 -1
  79. cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
  80. cg/server/admin.py +106 -25
  81. cg/server/app.py +15 -4
  82. cg/server/endpoints/sequencing_run/dtos.py +21 -3
  83. cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
  84. cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
  85. cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
  86. cg/services/analysis_starter/configurator/abstract_model.py +8 -0
  87. cg/services/analysis_starter/configurator/configurator.py +1 -1
  88. cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
  89. cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
  90. cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
  91. cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
  92. cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
  93. cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
  94. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
  95. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
  96. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
  97. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
  98. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
  99. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
  100. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
  101. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
  102. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
  103. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
  104. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
  105. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
  106. cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
  107. cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
  108. cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
  109. cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
  110. cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
  111. cg/services/analysis_starter/constants.py +2 -0
  112. cg/services/analysis_starter/factories/configurator_factory.py +131 -51
  113. cg/services/analysis_starter/factories/starter_factory.py +36 -7
  114. cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
  115. cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
  116. cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
  117. cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
  118. cg/services/analysis_starter/submitters/submitter.py +1 -1
  119. cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
  120. cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
  121. cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
  122. cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
  123. cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
  124. cg/services/analysis_starter/tracker/tracker.py +19 -15
  125. cg/services/deliver_files/factory.py +1 -1
  126. cg/services/delivery_message/messages/__init__.py +24 -14
  127. cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
  128. cg/services/delivery_message/utils.py +4 -40
  129. cg/services/illumina/backup/backup_service.py +29 -7
  130. cg/services/orders/validation/constants.py +3 -0
  131. cg/services/orders/validation/index_sequences.py +558 -0
  132. cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
  133. cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
  134. cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
  135. cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
  136. cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
  137. cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
  138. cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
  139. cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
  140. cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
  141. cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
  142. cg/store/crud/create.py +73 -42
  143. cg/store/crud/read.py +73 -7
  144. cg/store/crud/update.py +14 -3
  145. cg/store/models.py +98 -35
  146. cg/store/store.py +8 -1
  147. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
  148. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
  149. cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
  150. cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
  151. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
  152. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,4 @@
1
- import copy
2
1
  import logging
3
- import re
4
2
  from datetime import datetime
5
3
  from pathlib import Path
6
4
  from typing import Any, Iterator, Type
@@ -17,16 +15,12 @@ from cg.constants.constants import (
17
15
  MultiQC,
18
16
  WorkflowManager,
19
17
  )
20
- from cg.constants.gene_panel import GenePanelGenomeBuild
21
- from cg.constants.housekeeper_tags import AlignmentFileTag
22
18
  from cg.constants.nextflow import NFX_WORK_DIR
23
19
  from cg.constants.nf_analysis import NfTowerStatus
24
20
  from cg.constants.tb import AnalysisStatus
25
21
  from cg.exc import CgError, HousekeeperStoreError, MetricsQCError
26
22
  from cg.io.controller import ReadFile, WriteFile
27
23
  from cg.io.json import read_json
28
- from cg.io.txt import concat_txt, write_txt
29
- from cg.io.yaml import read_yaml, write_yaml_nextflow_style
30
24
  from cg.meta.workflow.analysis import AnalysisAPI
31
25
  from cg.meta.workflow.nf_handlers import NextflowHandler, NfTowerHandler
32
26
  from cg.models.analysis import NextflowAnalysis
@@ -36,15 +30,9 @@ from cg.models.deliverables.metric_deliverables import (
36
30
  MetricsDeliverablesCondition,
37
31
  MultiqcDataJson,
38
32
  )
39
- from cg.models.fastq import FastqFileMeta
40
- from cg.models.nf_analysis import (
41
- FileDeliverable,
42
- NfCommandArgs,
43
- WorkflowDeliverables,
44
- WorkflowParameters,
45
- )
33
+ from cg.models.nf_analysis import FileDeliverable, NfCommandArgs, WorkflowDeliverables
46
34
  from cg.models.qc_metrics import QCMetrics
47
- from cg.store.models import Analysis, Case, CaseSample, Sample
35
+ from cg.store.models import Analysis, Case, Sample
48
36
  from cg.utils import Process
49
37
 
50
38
  LOG = logging.getLogger(__name__)
@@ -90,27 +78,12 @@ class NfAnalysisAPI(AnalysisAPI):
90
78
  def process(self, process: Process):
91
79
  self._process = process
92
80
 
93
- @property
94
- def sample_sheet_headers(self) -> list[str]:
95
- """Headers for sample sheet."""
96
- raise NotImplementedError
97
-
98
81
  @property
99
82
  def is_multiqc_pattern_search_exact(self) -> bool:
100
83
  """Return True if only exact pattern search is allowed to collect metrics information from MultiQC file.
101
84
  If false, pattern must be present but does not need to be exact."""
102
85
  return False
103
86
 
104
- @property
105
- def is_gene_panel_required(self) -> bool:
106
- """Return True if a gene panel needs to be created using the information in StatusDB and exporting it from Scout."""
107
- return False
108
-
109
- @property
110
- def is_managed_variants_required(self) -> bool:
111
- """Return True if a managed variant export needs to be exported it from Scout."""
112
- return False
113
-
114
87
  def get_profile(self, profile: str | None = None) -> str:
115
88
  """Get NF profiles."""
116
89
  return profile or self.profile
@@ -123,27 +96,6 @@ class NfAnalysisAPI(AnalysisAPI):
123
96
  """Get workflow version from config."""
124
97
  return self.revision
125
98
 
126
- def get_built_workflow_parameters(
127
- self, case_id: str, dry_run: bool = False
128
- ) -> WorkflowParameters:
129
- """Return workflow parameters."""
130
- raise NotImplementedError
131
-
132
- def get_nextflow_config_content(self, case_id: str) -> str:
133
- """Return nextflow config content."""
134
- config_files_list: list[str] = [
135
- self.platform,
136
- self.workflow_config_path,
137
- self.resources,
138
- ]
139
- extra_parameters_str: list[str] = [
140
- self.set_cluster_options(case_id=case_id),
141
- ]
142
- return concat_txt(
143
- file_paths=config_files_list,
144
- str_content=extra_parameters_str,
145
- )
146
-
147
99
  def get_case_path(self, case_id: str) -> Path:
148
100
  """Path to case working directory."""
149
101
  return Path(self.root_dir, case_id)
@@ -192,11 +144,6 @@ class NfAnalysisAPI(AnalysisAPI):
192
144
  FileExtensions.YAML
193
145
  )
194
146
 
195
- def create_case_directory(self, case_id: str, dry_run: bool = False) -> None:
196
- """Create case directory."""
197
- if not dry_run:
198
- Path(self.get_case_path(case_id=case_id)).mkdir(parents=True, exist_ok=True)
199
-
200
147
  def get_log_path(self, case_id: str, workflow: str) -> Path:
201
148
  """Path to NF log."""
202
149
  launch_time: str = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
@@ -211,68 +158,6 @@ class NfAnalysisAPI(AnalysisAPI):
211
158
  return work_dir.absolute()
212
159
  return Path(self.get_case_path(case_id), NFX_WORK_DIR)
213
160
 
214
- def get_gene_panels_path(self, case_id: str) -> Path:
215
- """Path to gene panels bed file exported from Scout."""
216
- return Path(self.get_case_path(case_id=case_id), "gene_panels").with_suffix(
217
- FileExtensions.BED
218
- )
219
-
220
- def set_cluster_options(self, case_id: str) -> str:
221
- return f'process.clusterOptions = "-A {self.account} --qos={self.get_slurm_qos_for_case(case_id=case_id)}"\n'
222
-
223
- @staticmethod
224
- def extract_read_files(
225
- metadata: list[FastqFileMeta], forward_read: bool = False, reverse_read: bool = False
226
- ) -> list[str]:
227
- """Extract a list of fastq file paths for either forward or reverse reads."""
228
- if forward_read and not reverse_read:
229
- read_direction = 1
230
- elif reverse_read and not forward_read:
231
- read_direction = 2
232
- else:
233
- raise ValueError("Either forward or reverse needs to be specified")
234
- sorted_metadata: list = sorted(metadata, key=lambda k: k.path)
235
- return [
236
- fastq_file.path
237
- for fastq_file in sorted_metadata
238
- if fastq_file.read_direction == read_direction
239
- ]
240
-
241
- def get_paired_read_paths(self, sample: Sample) -> tuple[list[str], list[str]]:
242
- """Returns a tuple of paired fastq file paths for the forward and reverse read."""
243
- sample_metadata: list[FastqFileMeta] = self.gather_file_metadata_for_sample(sample=sample)
244
- fastq_forward_read_paths: list[str] = self.extract_read_files(
245
- metadata=sample_metadata, forward_read=True
246
- )
247
- fastq_reverse_read_paths: list[str] = self.extract_read_files(
248
- metadata=sample_metadata, reverse_read=True
249
- )
250
- return fastq_forward_read_paths, fastq_reverse_read_paths
251
-
252
- def get_bam_read_file_paths(self, sample: Sample) -> list[Path]:
253
- """Gather BAM file path for a sample based on the BAM tag."""
254
- return [
255
- Path(hk_file.full_path)
256
- for hk_file in self.housekeeper_api.files(
257
- bundle=sample.internal_id, tags={AlignmentFileTag.BAM}
258
- )
259
- ]
260
-
261
- def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
262
- """Collect and format information required to build a sample sheet for a single sample."""
263
- raise NotImplementedError
264
-
265
- def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
266
- """Return formatted information required to build a sample sheet for a case.
267
- This contains information for all samples linked to the case."""
268
- sample_sheet_content: list = []
269
- case: Case = self.get_validated_case(case_id)
270
- LOG.info(f"Samples linked to case {case_id}: {len(case.links)}")
271
- LOG.debug("Getting sample sheet information")
272
- for link in case.links:
273
- sample_sheet_content.extend(self.get_sample_sheet_content_per_sample(case_sample=link))
274
- return sample_sheet_content
275
-
276
161
  def verify_sample_sheet_exists(self, case_id: str, dry_run: bool = False) -> None:
277
162
  """Raise an error if sample sheet file is not found."""
278
163
  if not dry_run and not Path(self.get_sample_sheet_path(case_id=case_id)).exists():
@@ -283,33 +168,6 @@ class NfAnalysisAPI(AnalysisAPI):
283
168
  if not Path(self.get_deliverables_file_path(case_id=case_id)).exists():
284
169
  raise CgError(f"No deliverables file found for case {case_id}")
285
170
 
286
- def write_params_file(self, case_id: str, replaced_workflow_parameters: dict = None) -> None:
287
- """Write params-file for analysis."""
288
- LOG.debug("Writing parameters file")
289
- if replaced_workflow_parameters:
290
- write_yaml_nextflow_style(
291
- content=replaced_workflow_parameters,
292
- file_path=self.get_params_file_path(case_id=case_id),
293
- )
294
- else:
295
- self.get_params_file_path(case_id=case_id).touch()
296
-
297
- @staticmethod
298
- def write_sample_sheet(
299
- content: list[list[Any]],
300
- file_path: Path,
301
- header: list[str],
302
- ) -> None:
303
- """Write sample sheet CSV file."""
304
- LOG.debug("Writing sample sheet")
305
- if header:
306
- content.insert(0, header)
307
- WriteFile.write_file_from_content(
308
- content=content,
309
- file_format=FileFormat.CSV,
310
- file_path=file_path,
311
- )
312
-
313
171
  @staticmethod
314
172
  def write_deliverables_file(
315
173
  deliverables_content: dict, file_path: Path, file_format=FileFormat.YAML
@@ -329,105 +187,6 @@ class NfAnalysisAPI(AnalysisAPI):
329
187
  file_path=config_path,
330
188
  )
331
189
 
332
- def create_sample_sheet(self, case_id: str, dry_run: bool) -> None:
333
- """Create sample sheet for a case."""
334
- sample_sheet_content: list[list[Any]] = self.get_sample_sheet_content(case_id=case_id)
335
- if not dry_run:
336
- self.write_sample_sheet(
337
- content=sample_sheet_content,
338
- file_path=self.get_sample_sheet_path(case_id=case_id),
339
- header=self.sample_sheet_headers,
340
- )
341
-
342
- def create_params_file(self, case_id: str, dry_run: bool) -> None:
343
- """Create parameters file for a case."""
344
- LOG.debug("Getting parameters information built on-the-fly")
345
- built_workflow_parameters: dict | None = self.get_built_workflow_parameters(
346
- case_id=case_id, dry_run=dry_run
347
- ).model_dump()
348
- LOG.debug("Adding parameters from the pipeline config file if it exist")
349
-
350
- yaml_params: dict = (
351
- read_yaml(self.params) if hasattr(self, "params") and self.params else {}
352
- )
353
-
354
- # Check for duplicate keys
355
- duplicate_keys = set(built_workflow_parameters.keys()) & set(yaml_params.keys())
356
- if duplicate_keys:
357
- raise ValueError(f"Duplicate parameter keys found: {duplicate_keys}")
358
- workflow_parameters: dict = built_workflow_parameters | (yaml_params)
359
- replaced_workflow_parameters: dict = self.replace_values_in_params_file(
360
- workflow_parameters=workflow_parameters
361
- )
362
- if not dry_run:
363
- self.write_params_file(
364
- case_id=case_id, replaced_workflow_parameters=replaced_workflow_parameters
365
- )
366
-
367
- def replace_values_in_params_file(self, workflow_parameters: dict) -> dict:
368
- replaced_workflow_parameters = copy.deepcopy(workflow_parameters)
369
- """Iterate through the dictionary until all placeholders are replaced with the corresponding value from the dictionary"""
370
- while True:
371
- resolved: bool = True
372
- for key, value in replaced_workflow_parameters.items():
373
- new_value: str | int = self.replace_params_placeholders(value, workflow_parameters)
374
- if new_value != value:
375
- resolved = False
376
- replaced_workflow_parameters[key] = new_value
377
- if resolved:
378
- break
379
- return replaced_workflow_parameters
380
-
381
- def replace_params_placeholders(self, value: str | int, workflow_parameters: dict) -> str:
382
- """Replace values marked as placeholders with values from the given dictionary"""
383
- if isinstance(value, str):
384
- placeholders: list[str] = re.findall(r"{{\s*([^{}\s]+)\s*}}", value)
385
- for placeholder in placeholders:
386
- if placeholder in workflow_parameters:
387
- value = value.replace(
388
- f"{{{{{placeholder}}}}}", str(workflow_parameters[placeholder])
389
- )
390
- return value
391
-
392
- def create_nextflow_config(self, case_id: str, dry_run: bool = False) -> None:
393
- """Create nextflow config file."""
394
- if content := self.get_nextflow_config_content(case_id=case_id):
395
- LOG.debug("Writing nextflow config file")
396
- if not dry_run:
397
- write_txt(
398
- content=content,
399
- file_path=self.get_nextflow_config_path(case_id=case_id),
400
- )
401
-
402
- def create_gene_panel(self, case_id: str, dry_run: bool) -> None:
403
- """Create and write an aggregated gene panel file exported from Scout."""
404
- LOG.info("Creating gene panel file")
405
- bed_lines: list[str] = self.get_gene_panel(case_id=case_id, dry_run=dry_run)
406
- if dry_run:
407
- bed_lines: str = "\n".join(bed_lines)
408
- LOG.debug(f"{bed_lines}")
409
- return
410
- self.write_panel(case_id=case_id, content=bed_lines)
411
-
412
- def config_case(self, case_id: str, dry_run: bool):
413
- """Create directory and config files required by a workflow for a case."""
414
- if dry_run:
415
- LOG.info("Dry run: Config files will not be written")
416
- self.status_db.verify_case_exists(case_internal_id=case_id)
417
- self.create_case_directory(case_id=case_id, dry_run=dry_run)
418
- self.create_sample_sheet(case_id=case_id, dry_run=dry_run)
419
- self.create_params_file(case_id=case_id, dry_run=dry_run)
420
- self.create_nextflow_config(case_id=case_id, dry_run=dry_run)
421
- if self.is_gene_panel_required:
422
- self.create_gene_panel(case_id=case_id, dry_run=dry_run)
423
- if self.is_managed_variants_required:
424
- vcf_lines: list[str] = self.get_managed_variants(case_id=case_id)
425
- if dry_run:
426
- for line in vcf_lines:
427
- LOG.debug(line)
428
- else:
429
- self.write_managed_variants(case_id=case_id, content=vcf_lines)
430
-
431
190
  def _run_analysis_with_nextflow(
432
191
  self, case_id: str, command_args: NfCommandArgs, dry_run: bool
433
192
  ) -> None:
@@ -733,7 +492,9 @@ class NfAnalysisAPI(AnalysisAPI):
733
492
  ) -> list[MetricsBase]:
734
493
  """Parse a MultiqcDataJson and returns a list of metrics."""
735
494
  metrics: list[MetricsBase] = []
736
- for section in multiqc_json.report_general_stats_data:
495
+ list_of_metric_dicts: list[dict[str, Any]] = self._get_list_of_metric_dicts(multiqc_json)
496
+
497
+ for section in list_of_metric_dicts:
737
498
  for subsection, metrics_dict in section.items():
738
499
  if self._is_pattern_found(
739
500
  pattern=search_pattern, text=subsection, exact_match=exact_match
@@ -745,6 +506,12 @@ class NfAnalysisAPI(AnalysisAPI):
745
506
  metrics.append(metric)
746
507
  return metrics
747
508
 
509
+ def _get_list_of_metric_dicts(self, multiqc_json: MultiqcDataJson) -> list[dict[str, Any]]:
510
+ if metric_dicts := multiqc_json.report_general_stats_data:
511
+ return metric_dicts
512
+ else:
513
+ raise ValueError("No report_general_stats_data found in MultiqcDataJson")
514
+
748
515
  def get_multiqc_metric(
749
516
  self, metric_name: str, metric_value: str | int | float, metric_id: str
750
517
  ) -> MetricsBase:
@@ -882,7 +649,7 @@ class NfAnalysisAPI(AnalysisAPI):
882
649
  )
883
650
  if not is_latest_analysis_qc and not is_latest_analysis_completed and not force:
884
651
  LOG.error(
885
- "Case not stored. Trailblazer status must be either QC or COMPLETE to be able to store"
652
+ "Case not stored. Trailblazer status must be either QC or COMPLETED to be able to store"
886
653
  )
887
654
  raise ValueError
888
655
 
@@ -912,24 +679,6 @@ class NfAnalysisAPI(AnalysisAPI):
912
679
  def get_genome_build(self, case_id: str) -> GenomeVersion:
913
680
  raise NotImplementedError
914
681
 
915
- def get_gene_panel_genome_build(self, case_id: str) -> GenePanelGenomeBuild:
916
- """Return build version of the gene panel for a case."""
917
- reference_genome: GenomeVersion = self.get_genome_build(case_id=case_id)
918
- try:
919
- return getattr(GenePanelGenomeBuild, reference_genome)
920
- except AttributeError as error:
921
- raise CgError(
922
- f"Reference {reference_genome} has no associated genome build for panels: {error}"
923
- ) from error
924
-
925
- def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
926
- """Create and return the aggregated gene panel file."""
927
- return self._get_gene_panel(
928
- case_id=case_id,
929
- genome_build=self.get_gene_panel_genome_build(case_id=case_id),
930
- dry_run=dry_run,
931
- )
932
-
933
682
  def parse_analysis(
934
683
  self, qc_metrics_raw: list[MetricsBase], qc_metrics_model: Type[QCMetrics], **kwargs
935
684
  ) -> NextflowAnalysis:
@@ -1,6 +1,5 @@
1
1
  """Module for Raredisease Analysis API."""
2
2
 
3
- import csv
4
3
  import logging
5
4
  from itertools import permutations
6
5
  from pathlib import Path
@@ -24,22 +23,15 @@ from cg.constants.nf_analysis import (
24
23
  RAREDISEASE_METRIC_CONDITIONS_WGS,
25
24
  RAREDISEASE_PARENT_PEDDY_METRIC_CONDITION,
26
25
  )
27
- from cg.constants.scout import RAREDISEASE_CASE_TAGS, ScoutExportFileName
26
+ from cg.constants.scout import RAREDISEASE_CASE_TAGS
28
27
  from cg.constants.sequencing import SeqLibraryPrepCategory
29
- from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
30
- from cg.constants.tb import AnalysisType
31
28
  from cg.meta.workflow.nf_analysis import NfAnalysisAPI
32
29
  from cg.models.analysis import NextflowAnalysis
33
30
  from cg.models.cg_config import CGConfig
34
31
  from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
35
- from cg.models.raredisease.raredisease import (
36
- RarediseaseParameters,
37
- RarediseaseQCMetrics,
38
- RarediseaseSampleSheetEntry,
39
- RarediseaseSampleSheetHeaders,
40
- )
32
+ from cg.models.raredisease.raredisease import RarediseaseQCMetrics
41
33
  from cg.resources import RAREDISEASE_BUNDLE_FILENAMES_PATH
42
- from cg.store.models import CaseSample, Sample
34
+ from cg.store.models import Sample
43
35
 
44
36
  LOG = logging.getLogger(__name__)
45
37
 
@@ -71,95 +63,11 @@ class RarediseaseAnalysisAPI(NfAnalysisAPI):
71
63
  self.revision: str = config.raredisease.revision
72
64
  self.nextflow_binary_path: str = config.raredisease.binary_path
73
65
 
74
- @property
75
- def sample_sheet_headers(self) -> list[str]:
76
- """Headers for sample sheet."""
77
- return RarediseaseSampleSheetHeaders.list()
78
-
79
- def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
80
- """Collect and format information required to build a sample sheet for a single sample."""
81
- fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
82
- sample=case_sample.sample
83
- )
84
- sample_sheet_entry = RarediseaseSampleSheetEntry(
85
- name=case_sample.sample.internal_id,
86
- fastq_forward_read_paths=fastq_forward_read_paths,
87
- fastq_reverse_read_paths=fastq_reverse_read_paths,
88
- sex=self.get_sex_code(case_sample.sample.sex),
89
- phenotype=self.get_phenotype_code(case_sample.status),
90
- paternal_id=case_sample.get_paternal_sample_id,
91
- maternal_id=case_sample.get_maternal_sample_id,
92
- case_id=case_sample.case.internal_id,
93
- )
94
- return sample_sheet_entry.reformat_sample_content
95
-
96
- @property
97
- def is_gene_panel_required(self) -> bool:
98
- """Return True if a gene panel needs to be created using the information in StatusDB and exporting it from Scout."""
99
- return True
100
-
101
- def get_built_workflow_parameters(
102
- self, case_id: str, dry_run: bool = False
103
- ) -> RarediseaseParameters:
104
- """Return parameters."""
105
- analysis_type: AnalysisType = self.get_data_analysis_type(case_id=case_id)
106
- target_bed_file: str = self.get_target_bed_from_lims(case_id=case_id) or ""
107
- outdir = self.get_case_path(case_id=case_id)
108
- sample_id_map: Path = self.get_sample_name_mapping_csv_path(case=case_id)
109
- # Build the sample_id_map path
110
- if not dry_run:
111
- self.export_customer_internal_mapping_csv(case=case_id, output_path=sample_id_map)
112
-
113
- return RarediseaseParameters(
114
- input=self.get_sample_sheet_path(case_id=case_id),
115
- outdir=outdir,
116
- analysis_type=analysis_type,
117
- target_bed_file=target_bed_file,
118
- save_mapped_as_cram=True,
119
- vcfanno_extra_resources=f"{outdir}/{ScoutExportFileName.MANAGED_VARIANTS}",
120
- vep_filters_scout_fmt=f"{outdir}/{ScoutExportFileName.PANELS}",
121
- sample_id_map=sample_id_map,
122
- )
123
-
124
- @staticmethod
125
- def get_phenotype_code(phenotype: str) -> int:
126
- """Return Raredisease phenotype code."""
127
- LOG.debug("Translate phenotype to integer code")
128
- try:
129
- code = PlinkPhenotypeStatus[phenotype.upper()]
130
- except KeyError:
131
- raise ValueError(f"{phenotype} is not a valid phenotype")
132
- return code
133
-
134
- @staticmethod
135
- def get_sex_code(sex: str) -> int:
136
- """Return Raredisease sex code."""
137
- LOG.debug("Translate sex to integer code")
138
- try:
139
- code = PlinkSex[sex.upper()]
140
- except KeyError:
141
- raise ValueError(f"{sex} is not a valid sex")
142
- return code
143
-
144
66
  @staticmethod
145
67
  def get_bundle_filenames_path() -> Path:
146
68
  """Return Raredisease bundle filenames path."""
147
69
  return RAREDISEASE_BUNDLE_FILENAMES_PATH
148
70
 
149
- @property
150
- def is_managed_variants_required(self) -> bool:
151
- """Return True if a managed variants needs to be exported from Scout."""
152
- return True
153
-
154
- def write_managed_variants(self, case_id: str, content: list[str]) -> None:
155
- self._write_managed_variants(out_dir=Path(self.root, case_id), content=content)
156
-
157
- def get_managed_variants(self, case_id: str) -> list[str]:
158
- """Create and return the managed variants."""
159
- return self._get_managed_variants(
160
- genome_build=self.get_gene_panel_genome_build(case_id=case_id)
161
- )
162
-
163
71
  def get_workflow_metrics(self, sample_id: str) -> dict:
164
72
  """Return Raredisease workflow metric conditions for a sample."""
165
73
  sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id)
@@ -282,20 +190,3 @@ class RarediseaseAnalysisAPI(NfAnalysisAPI):
282
190
  return super().parse_analysis(
283
191
  qc_metrics_raw=qc_metrics_raw, qc_metrics_model=qc_metrics_model, **kwargs
284
192
  )
285
-
286
- def get_sample_name_mapping_csv_path(self, case: str) -> Path:
287
- """Return the path to the CSV file containing the mapping between sample names and internal ids."""
288
- return Path(self.get_case_path(case), f"{case}_customer_internal_mapping.csv")
289
-
290
- def export_customer_internal_mapping_csv(self, case: str, output_path: Path):
291
- """Export a CSV file mapping customer sample names to internal sample IDs."""
292
- LOG.info(f"Exporting customer internal mapping CSV for case {case} to {output_path}")
293
- with output_path.open("w", newline="") as csvfile:
294
- writer = csv.writer(csvfile)
295
- writer.writerow(
296
- ["customer_id", "internal_id"]
297
- ) # this is the header expected by the pipeline
298
- for link in self.status_db.get_case_by_internal_id(case).links:
299
- customer_sample_name = link.sample.name
300
- internal_id = link.sample.internal_id
301
- writer.writerow([customer_sample_name, internal_id])
@@ -4,7 +4,7 @@ import logging
4
4
  from pathlib import Path
5
5
 
6
6
  from cg.constants import Workflow
7
- from cg.constants.constants import GenomeVersion, Strandedness
7
+ from cg.constants.constants import GenomeVersion
8
8
  from cg.constants.nf_analysis import RNAFUSION_METRIC_CONDITIONS
9
9
  from cg.constants.scout import RNAFUSION_CASE_TAGS
10
10
  from cg.exc import MissingMetrics
@@ -12,13 +12,8 @@ from cg.meta.workflow.nf_analysis import NfAnalysisAPI
12
12
  from cg.models.analysis import NextflowAnalysis
13
13
  from cg.models.cg_config import CGConfig
14
14
  from cg.models.deliverables.metric_deliverables import MetricsBase
15
- from cg.models.rnafusion.rnafusion import (
16
- RnafusionParameters,
17
- RnafusionQCMetrics,
18
- RnafusionSampleSheetEntry,
19
- )
15
+ from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
20
16
  from cg.resources import RNAFUSION_BUNDLE_FILENAMES_PATH
21
- from cg.store.models import CaseSample
22
17
 
23
18
  LOG = logging.getLogger(__name__)
24
19
 
@@ -50,11 +45,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
50
45
  self.revision: str = config.rnafusion.revision
51
46
  self.nextflow_binary_path: str = config.rnafusion.binary_path
52
47
 
53
- @property
54
- def sample_sheet_headers(self) -> list[str]:
55
- """Headers for sample sheet."""
56
- return RnafusionSampleSheetEntry.headers()
57
-
58
48
  @property
59
49
  def is_multiple_samples_allowed(self) -> bool:
60
50
  """Return whether the analysis supports multiple samples to be linked to the case."""
@@ -69,28 +59,6 @@ class RnafusionAnalysisAPI(NfAnalysisAPI):
69
59
  """Return Rnafusion bundle filenames path."""
70
60
  return RNAFUSION_BUNDLE_FILENAMES_PATH
71
61
 
72
- def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
73
- """Collect and format information required to build a sample sheet for a single sample."""
74
- fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
75
- sample=case_sample.sample
76
- )
77
- sample_sheet_entry = RnafusionSampleSheetEntry(
78
- name=case_sample.sample.internal_id,
79
- fastq_forward_read_paths=fastq_forward_read_paths,
80
- fastq_reverse_read_paths=fastq_reverse_read_paths,
81
- strandedness=Strandedness.REVERSE,
82
- )
83
- return sample_sheet_entry.reformat_sample_content()
84
-
85
- def get_built_workflow_parameters(
86
- self, case_id: str, dry_run: bool = False
87
- ) -> RnafusionParameters:
88
- """Get Rnafusion parameters."""
89
- return RnafusionParameters(
90
- input=self.get_sample_sheet_path(case_id=case_id),
91
- outdir=self.get_case_path(case_id=case_id),
92
- )
93
-
94
62
  @staticmethod
95
63
  def ensure_mandatory_metrics_present(metrics: list[MetricsBase]) -> None:
96
64
  """Check that all mandatory metrics are present.
@@ -5,19 +5,13 @@ from pathlib import Path
5
5
 
6
6
  from cg.constants import Workflow
7
7
  from cg.constants.constants import GenomeVersion
8
- from cg.constants.sequencing import SequencingPlatform
9
- from cg.constants.symbols import EMPTY_STRING
10
8
  from cg.meta.workflow.nf_analysis import NfAnalysisAPI
11
9
  from cg.models.analysis import NextflowAnalysis
12
10
  from cg.models.cg_config import CGConfig
13
11
  from cg.models.deliverables.metric_deliverables import MetricsBase
14
- from cg.models.taxprofiler.taxprofiler import (
15
- TaxprofilerParameters,
16
- TaxprofilerQCMetrics,
17
- TaxprofilerSampleSheetEntry,
18
- )
12
+ from cg.models.taxprofiler.taxprofiler import TaxprofilerQCMetrics
19
13
  from cg.resources import TAXPROFILER_BUNDLE_FILENAMES_PATH
20
- from cg.store.models import CaseSample, Sample
14
+ from cg.store.models import Sample
21
15
 
22
16
  LOG = logging.getLogger(__name__)
23
17
 
@@ -49,11 +43,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
49
43
  self.nextflow_binary_path: str = config.taxprofiler.binary_path
50
44
  self.compute_env_base: str = config.taxprofiler.compute_env
51
45
 
52
- @property
53
- def sample_sheet_headers(self) -> list[str]:
54
- """Headers for sample sheet."""
55
- return TaxprofilerSampleSheetEntry.headers()
56
-
57
46
  @property
58
47
  def is_multiqc_pattern_search_exact(self) -> bool:
59
48
  """Only exact pattern search is allowed to collect metrics information from multiqc file."""
@@ -64,31 +53,6 @@ class TaxprofilerAnalysisAPI(NfAnalysisAPI):
64
53
  """Return Taxprofiler bundle filenames path."""
65
54
  return TAXPROFILER_BUNDLE_FILENAMES_PATH
66
55
 
67
- def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
68
- """Collect and format information required to build a sample sheet for a single sample."""
69
- sample_name: str = case_sample.sample.name
70
- fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
71
- sample=case_sample.sample
72
- )
73
- sample_sheet_entry = TaxprofilerSampleSheetEntry(
74
- name=sample_name,
75
- run_accession=sample_name,
76
- instrument_platform=SequencingPlatform.ILLUMINA,
77
- fastq_forward_read_paths=fastq_forward_read_paths,
78
- fastq_reverse_read_paths=fastq_reverse_read_paths,
79
- fasta=EMPTY_STRING,
80
- )
81
- return sample_sheet_entry.reformat_sample_content()
82
-
83
- def get_built_workflow_parameters(
84
- self, case_id: str, dry_run: bool = False
85
- ) -> TaxprofilerParameters:
86
- """Return Taxprofiler parameters."""
87
- return TaxprofilerParameters(
88
- input=self.get_sample_sheet_path(case_id=case_id),
89
- outdir=self.get_case_path(case_id=case_id),
90
- )
91
-
92
56
  def get_multiqc_search_patterns(self, case_id: str) -> dict:
93
57
  """Return search patterns for MultiQC for Taxprofiler."""
94
58
  samples: list[Sample] = self.status_db.get_samples_by_case_id(case_id=case_id)