cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. cg/__init__.py +1 -1
  2. cg/apps/housekeeper/hk.py +18 -1
  3. cg/apps/tb/api.py +42 -5
  4. cg/cli/transfer.py +13 -2
  5. cg/cli/upload/mutacc.py +16 -3
  6. cg/cli/upload/scout.py +2 -2
  7. cg/cli/upload/utils.py +10 -1
  8. cg/cli/workflow/balsamic/base.py +86 -172
  9. cg/cli/workflow/balsamic/options.py +3 -48
  10. cg/cli/workflow/balsamic/umi.py +210 -15
  11. cg/cli/workflow/microsalt/base.py +4 -2
  12. cg/cli/workflow/mip_dna/base.py +1 -1
  13. cg/cli/workflow/nallo/base.py +73 -23
  14. cg/cli/workflow/nf_analysis.py +5 -207
  15. cg/cli/workflow/raredisease/base.py +41 -54
  16. cg/cli/workflow/rnafusion/base.py +38 -8
  17. cg/cli/workflow/taxprofiler/base.py +31 -18
  18. cg/cli/workflow/tomte/base.py +83 -10
  19. cg/constants/constants.py +25 -30
  20. cg/constants/devices.py +6 -1
  21. cg/constants/gene_panel.py +3 -1
  22. cg/constants/housekeeper_tags.py +28 -28
  23. cg/constants/lims.py +4 -0
  24. cg/constants/nf_analysis.py +0 -1
  25. cg/constants/observations.py +21 -5
  26. cg/constants/orderforms.py +3 -3
  27. cg/constants/pacbio.py +1 -0
  28. cg/constants/priority.py +1 -1
  29. cg/constants/report.py +1 -0
  30. cg/constants/scout.py +12 -9
  31. cg/constants/sequencing.py +2 -2
  32. cg/constants/tb.py +5 -5
  33. cg/exc.py +27 -5
  34. cg/meta/compress/compress.py +7 -2
  35. cg/meta/delivery_report/balsamic.py +3 -1
  36. cg/meta/delivery_report/delivery_report_api.py +4 -3
  37. cg/meta/delivery_report/nallo.py +11 -11
  38. cg/meta/delivery_report/raredisease.py +7 -3
  39. cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
  40. cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
  41. cg/meta/observations/balsamic_observations_api.py +110 -14
  42. cg/meta/observations/mip_dna_observations_api.py +1 -1
  43. cg/meta/observations/nallo_observations_api.py +1 -1
  44. cg/meta/observations/observations_api.py +23 -32
  45. cg/meta/observations/raredisease_observations_api.py +1 -1
  46. cg/meta/tar/tar.py +5 -2
  47. cg/meta/transfer/lims.py +32 -3
  48. cg/meta/upload/balsamic/balsamic.py +1 -8
  49. cg/meta/upload/coverage.py +5 -5
  50. cg/meta/upload/raredisease/raredisease.py +3 -0
  51. cg/meta/upload/scout/hk_tags.py +1 -0
  52. cg/meta/upload/scout/nallo_config_builder.py +31 -7
  53. cg/meta/workflow/balsamic.py +70 -36
  54. cg/meta/workflow/fastq.py +8 -0
  55. cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
  56. cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
  57. cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
  58. cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
  59. cg/meta/workflow/nallo.py +21 -99
  60. cg/meta/workflow/nf_analysis.py +12 -263
  61. cg/meta/workflow/raredisease.py +3 -112
  62. cg/meta/workflow/rnafusion.py +2 -34
  63. cg/meta/workflow/taxprofiler.py +2 -38
  64. cg/meta/workflow/tomte.py +2 -42
  65. cg/models/balsamic/config.py +0 -24
  66. cg/models/balsamic/metrics.py +5 -3
  67. cg/models/cg_config.py +39 -16
  68. cg/models/deliverables/metric_deliverables.py +1 -1
  69. cg/models/delivery_report/metadata.py +2 -1
  70. cg/models/nallo/nallo.py +14 -64
  71. cg/models/nf_analysis.py +1 -41
  72. cg/models/raredisease/raredisease.py +1 -63
  73. cg/models/rnafusion/rnafusion.py +0 -26
  74. cg/models/scout/scout_load_config.py +5 -2
  75. cg/models/taxprofiler/taxprofiler.py +0 -42
  76. cg/models/tomte/tomte.py +0 -69
  77. cg/resources/nallo_bundle_filenames.yaml +292 -22
  78. cg/resources/raredisease_bundle_filenames.yaml +11 -1
  79. cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
  80. cg/server/admin.py +106 -25
  81. cg/server/app.py +15 -4
  82. cg/server/endpoints/sequencing_run/dtos.py +21 -3
  83. cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
  84. cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
  85. cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
  86. cg/services/analysis_starter/configurator/abstract_model.py +8 -0
  87. cg/services/analysis_starter/configurator/configurator.py +1 -1
  88. cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
  89. cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
  90. cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
  91. cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
  92. cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
  93. cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
  94. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
  95. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
  96. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
  97. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
  98. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
  99. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
  100. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
  101. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
  102. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
  103. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
  104. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
  105. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
  106. cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
  107. cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
  108. cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
  109. cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
  110. cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
  111. cg/services/analysis_starter/constants.py +2 -0
  112. cg/services/analysis_starter/factories/configurator_factory.py +131 -51
  113. cg/services/analysis_starter/factories/starter_factory.py +36 -7
  114. cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
  115. cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
  116. cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
  117. cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
  118. cg/services/analysis_starter/submitters/submitter.py +1 -1
  119. cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
  120. cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
  121. cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
  122. cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
  123. cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
  124. cg/services/analysis_starter/tracker/tracker.py +19 -15
  125. cg/services/deliver_files/factory.py +1 -1
  126. cg/services/delivery_message/messages/__init__.py +24 -14
  127. cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
  128. cg/services/delivery_message/utils.py +4 -40
  129. cg/services/illumina/backup/backup_service.py +29 -7
  130. cg/services/orders/validation/constants.py +3 -0
  131. cg/services/orders/validation/index_sequences.py +558 -0
  132. cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
  133. cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
  134. cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
  135. cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
  136. cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
  137. cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
  138. cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
  139. cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
  140. cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
  141. cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
  142. cg/store/crud/create.py +73 -42
  143. cg/store/crud/read.py +73 -7
  144. cg/store/crud/update.py +14 -3
  145. cg/store/models.py +98 -35
  146. cg/store/store.py +8 -1
  147. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
  148. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
  149. cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
  150. cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
  151. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
  152. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
@@ -17,10 +17,10 @@ from cg.models.cg_config import CGConfig, CommonAppConfig
17
17
  from cg.models.observations.input_files import (
18
18
  BalsamicObservationsInputFiles,
19
19
  MipDNAObservationsInputFiles,
20
- RarediseaseObservationsInputFiles,
21
20
  NalloObservationsInputFiles,
21
+ RarediseaseObservationsInputFiles,
22
22
  )
23
- from cg.store.models import Analysis, Case
23
+ from cg.store.models import Analysis, Case, Sample
24
24
  from cg.store.store import Store
25
25
 
26
26
  LOG = logging.getLogger(__name__)
@@ -38,6 +38,9 @@ class ObservationsAPI:
38
38
  self.loqusdb_wes_config: CommonAppConfig = config.loqusdb_wes
39
39
  self.loqusdb_somatic_config: CommonAppConfig = config.loqusdb_somatic
40
40
  self.loqusdb_tumor_config: CommonAppConfig = config.loqusdb_tumor
41
+ self.loqusdb_somatic_lymphoid_config: CommonAppConfig = config.loqusdb_somatic_lymphoid
42
+ self.loqusdb_somatic_myeloid_config: CommonAppConfig = config.loqusdb_somatic_myeloid
43
+ self.loqusdb_somatic_exome_config: CommonAppConfig = config.loqusdb_somatic_exome
41
44
 
42
45
  def upload(self, case_id: str) -> None:
43
46
  """
@@ -47,10 +50,7 @@ class ObservationsAPI:
47
50
  LoqusdbUploadCaseError: If case is not eligible for Loqusdb uploads
48
51
  """
49
52
  case: Case = self.store.get_case_by_internal_id(internal_id=case_id)
50
- is_case_eligible_for_observations_upload: bool = (
51
- self.is_case_eligible_for_observations_upload(case)
52
- )
53
- if is_case_eligible_for_observations_upload:
53
+ if self.is_case_eligible_for_observations_upload(case):
54
54
  self.load_observations(case=case)
55
55
  else:
56
56
  LOG.error(f"Case {case.internal_id} is not eligible for observations upload")
@@ -75,29 +75,21 @@ class ObservationsAPI:
75
75
 
76
76
  def get_loqusdb_api(self, loqusdb_instance: LoqusdbInstance) -> LoqusdbAPI:
77
77
  """Returns a Loqusdb API for the given Loqusdb instance."""
78
- loqusdb_apis = {
79
- LoqusdbInstance.LWP: LoqusdbAPI(
80
- binary_path=self.loqusdb_rd_lwp_config.binary_path,
81
- config_path=self.loqusdb_rd_lwp_config.config_path,
82
- ),
83
- LoqusdbInstance.WGS: LoqusdbAPI(
84
- binary_path=self.loqusdb_config.binary_path,
85
- config_path=self.loqusdb_config.config_path,
86
- ),
87
- LoqusdbInstance.WES: LoqusdbAPI(
88
- binary_path=self.loqusdb_wes_config.binary_path,
89
- config_path=self.loqusdb_wes_config.config_path,
90
- ),
91
- LoqusdbInstance.SOMATIC: LoqusdbAPI(
92
- binary_path=self.loqusdb_somatic_config.binary_path,
93
- config_path=self.loqusdb_somatic_config.config_path,
94
- ),
95
- LoqusdbInstance.TUMOR: LoqusdbAPI(
96
- binary_path=self.loqusdb_tumor_config.binary_path,
97
- config_path=self.loqusdb_tumor_config.config_path,
98
- ),
78
+ loqusdb_config_map: dict = {
79
+ LoqusdbInstance.LWP: self.loqusdb_rd_lwp_config,
80
+ LoqusdbInstance.WGS: self.loqusdb_config,
81
+ LoqusdbInstance.WES: self.loqusdb_wes_config,
82
+ LoqusdbInstance.SOMATIC: self.loqusdb_somatic_config,
83
+ LoqusdbInstance.TUMOR: self.loqusdb_tumor_config,
84
+ LoqusdbInstance.SOMATIC_LYMPHOID: self.loqusdb_somatic_lymphoid_config,
85
+ LoqusdbInstance.SOMATIC_MYELOID: self.loqusdb_somatic_myeloid_config,
86
+ LoqusdbInstance.SOMATIC_EXOME: self.loqusdb_somatic_exome_config,
99
87
  }
100
- return loqusdb_apis[loqusdb_instance]
88
+ loqusdb_config = loqusdb_config_map[loqusdb_instance]
89
+ return LoqusdbAPI(
90
+ binary_path=loqusdb_config.binary_path,
91
+ config_path=loqusdb_config.config_path,
92
+ )
101
93
 
102
94
  @staticmethod
103
95
  def is_duplicate(
@@ -117,11 +109,11 @@ class ObservationsAPI:
117
109
  )
118
110
  return bool(loqusdb_case or duplicate or case.loqusdb_uploaded_samples)
119
111
 
120
- def update_statusdb_loqusdb_id(self, samples: list[Case], loqusdb_id: str | None) -> None:
112
+ def update_statusdb_loqusdb_id(self, samples: list[Sample], loqusdb_id: str | None) -> None:
121
113
  """Update Loqusdb ID field in StatusDB for each of the provided samples."""
122
114
  for sample in samples:
123
115
  sample.loqusdb_id = loqusdb_id
124
- self.store.session.commit()
116
+ self.store.commit_to_store()
125
117
 
126
118
  def is_customer_eligible_for_observations_upload(self, customer_id: str) -> bool:
127
119
  """Return whether the customer has been whitelisted for uploading observations."""
@@ -140,8 +132,7 @@ class ObservationsAPI:
140
132
  return False
141
133
  return True
142
134
 
143
- def is_sample_source_eligible_for_observations_upload(self, case_id: str) -> bool:
144
- """Check if the sample source is FFPE."""
135
+ def is_sample_source_type_not_ffpe(self, case_id: str) -> bool:
145
136
  source_type: str | None = self.analysis_api.get_case_source_type(case_id)
146
137
  if source_type and SourceType.FFPE.lower() not in source_type.lower():
147
138
  return True
@@ -58,7 +58,7 @@ class RarediseaseObservationsAPI(ObservationsAPI):
58
58
  self.is_customer_eligible_for_observations_upload(case.customer.internal_id),
59
59
  self.is_sequencing_method_eligible_for_observations_upload(case.internal_id),
60
60
  self.is_sample_type_eligible_for_observations_upload(case),
61
- self.is_sample_source_eligible_for_observations_upload(case.internal_id),
61
+ self.is_sample_source_type_not_ffpe(case.internal_id),
62
62
  ]
63
63
  )
64
64
 
cg/meta/tar/tar.py CHANGED
@@ -24,8 +24,9 @@ class TarAPI:
24
24
  self.process.run_command(command, dry_run=self.dry_run)
25
25
 
26
26
  @staticmethod
27
- def get_extract_file_command(input_file: Path, output_dir: Path) -> list[str]:
28
- """Generates the Tar command for flow cel run directory extraction"""
27
+ def get_extract_file_command(input_file: Path, output_dir: Path, is_current: bool) -> list[str]:
28
+ """Generates the Tar command for flow cel run directory extraction. If the is_current flag
29
+ is set, the command will strip the first 6 components of the file path when extracting."""
29
30
  extraction_parameters: list = FlowCellExtractionParameters.EXTRACT_FILE.copy()
30
31
  extraction_parameters.append(str(input_file))
31
32
  exclude_files: list = FlowCellExtractionParameters.EXCLUDE_FILES.copy()
@@ -33,6 +34,8 @@ class TarAPI:
33
34
  target_directory_parameters: list = FlowCellExtractionParameters.CHANGE_TO_DIR.copy()
34
35
  extraction_parameters.extend(target_directory_parameters)
35
36
  extraction_parameters.append(str(output_dir))
37
+ if is_current:
38
+ extraction_parameters.append("--strip-components=6")
36
39
  return extraction_parameters
37
40
 
38
41
  def get_compress_cmd(self, input_path: Path) -> str:
cg/meta/transfer/lims.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from datetime import datetime
2
3
  from enum import Enum
3
4
 
4
5
  import genologics.entities
@@ -6,6 +7,7 @@ import genologics.entities
6
7
  from cg.apps.lims import LimsAPI
7
8
  from cg.store.models import Pool, Sample
8
9
  from cg.store.store import Store
10
+ from cg.utils.date import get_date_days_ago
9
11
 
10
12
  LOG = logging.getLogger(__name__)
11
13
 
@@ -52,14 +54,27 @@ class TransferLims(object):
52
54
  }
53
55
 
54
56
  def transfer_samples(
55
- self, status_type: SampleState, include: str = "unset", sample_id: str = None
57
+ self,
58
+ status_type: SampleState,
59
+ include: str = "unset",
60
+ max_order_age: int | None = None,
61
+ sample_id: str = None,
56
62
  ):
57
63
  """Transfer information about samples."""
58
64
 
59
65
  if sample_id:
60
66
  samples: list[Sample] = self.status.get_samples_by_internal_id(internal_id=sample_id)
61
67
  else:
62
- samples: list[Sample] = self._get_samples_to_include(include, status_type)
68
+ samples: list[Sample] | None = self._get_samples_to_include(
69
+ include=include, status_type=status_type
70
+ )
71
+
72
+ if max_order_age:
73
+ order_date_cutoff: datetime = get_date_days_ago(max_order_age * 365)
74
+ LOG.info(f"Remove samples ordered before {str(order_date_cutoff)}")
75
+ samples: list[Sample] = self._filter_out_older_orders(
76
+ samples=samples, order_date_cutoff=order_date_cutoff
77
+ )
63
78
 
64
79
  if samples is None:
65
80
  LOG.info(f"No samples to process found with {include} {status_type.value}")
@@ -80,10 +95,20 @@ class TransferLims(object):
80
95
  )
81
96
 
82
97
  setattr(sample_obj, f"{status_type.value}_at", lims_date)
83
- self.status.session.commit()
98
+ self.status.commit_to_store()
84
99
  else:
85
100
  LOG.debug(f"no {status_type.value} date found for {sample_obj.internal_id}")
86
101
 
102
+ def _filter_out_older_orders(
103
+ self, samples: list[Sample], order_date_cutoff: datetime
104
+ ) -> list[Sample]:
105
+ sample_within_time_window: list[Sample] = []
106
+ for sample in samples:
107
+ if not self._is_sample_too_old(sample=sample, order_date_cutoff=order_date_cutoff):
108
+ sample_within_time_window.append(sample)
109
+ LOG.debug(f"Filter out {len(samples)-len(sample_within_time_window)} samples")
110
+ return sample_within_time_window
111
+
87
112
  def _get_samples_to_include(self, include, status_type):
88
113
  samples = None
89
114
  if include == IncludeOptions.UNSET.value:
@@ -151,3 +176,7 @@ class TransferLims(object):
151
176
  )
152
177
  return False
153
178
  return True
179
+
180
+ @staticmethod
181
+ def _is_sample_too_old(sample: Sample, order_date_cutoff: datetime) -> bool:
182
+ return sample.ordered_at < order_date_cutoff
@@ -5,7 +5,6 @@ import logging
5
5
 
6
6
  import rich_click as click
7
7
 
8
- from cg.apps.gens import GensAPI
9
8
  from cg.cli.generate.delivery_report.base import generate_delivery_report
10
9
  from cg.cli.upload.genotype import upload_genotypes
11
10
  from cg.cli.upload.gens import upload_to_gens
@@ -59,13 +58,7 @@ class BalsamicUploadAPI(UploadAPI):
59
58
  LOG.info(f"Balsamic case {case.internal_id} is not compatible for Genotype upload")
60
59
 
61
60
  # Observations upload
62
- if (
63
- self.analysis_api.get_case_application_type(case_id=case.internal_id)
64
- == SeqLibraryPrepCategory.WHOLE_GENOME_SEQUENCING
65
- ):
66
- ctx.invoke(upload_observations_to_loqusdb, case_id=case.internal_id)
67
- else:
68
- LOG.info(f"Balsamic case {case.internal_id} is not compatible for Observations upload")
61
+ ctx.invoke(upload_observations_to_loqusdb, case_id=case.internal_id)
69
62
  LOG.info(
70
63
  f"Upload of case {case.internal_id} was successful. Setting uploaded at to {dt.datetime.now()}"
71
64
  )
@@ -25,15 +25,15 @@ class UploadCoverageApi:
25
25
  """Get data for uploading coverage."""
26
26
  case_id = analysis.case.internal_id
27
27
  data = {"family": case_id, "family_name": analysis.case.name, "samples": []}
28
- for link_obj in analysis.case.links:
28
+ for sample in analysis.case.samples:
29
29
  hk_coverage: File = self.hk_api.files(
30
30
  version=analysis.housekeeper_version_id,
31
- tags=[link_obj.sample.internal_id, "coverage"],
32
- ).first()
31
+ tags={sample.internal_id, "coverage", "sambamba-depth", "chanjo"},
32
+ ).one()
33
33
  data["samples"].append(
34
34
  {
35
- "sample": link_obj.sample.internal_id,
36
- "sample_name": link_obj.sample.name,
35
+ "sample": sample.internal_id,
36
+ "sample_name": sample.name,
37
37
  "coverage": hk_coverage.full_path,
38
38
  }
39
39
  )
@@ -6,6 +6,7 @@ import logging
6
6
  import rich_click as click
7
7
 
8
8
  from cg.cli.generate.delivery_report.base import generate_delivery_report
9
+ from cg.cli.upload.coverage import upload_coverage
9
10
  from cg.cli.upload.genotype import upload_genotypes
10
11
  from cg.cli.upload.gens import upload_to_gens
11
12
  from cg.cli.upload.observations import upload_observations_to_loqusdb
@@ -31,6 +32,8 @@ class RarediseaseUploadAPI(UploadAPI):
31
32
  analysis: Analysis = self.status_db.get_latest_completed_analysis_for_case(case.internal_id)
32
33
  self.update_upload_started_at(analysis=analysis)
33
34
 
35
+ ctx.invoke(upload_coverage, family_id=case.internal_id)
36
+
34
37
  # Delivery report generation
35
38
  if case.data_delivery in REPORT_SUPPORTED_DATA_DELIVERY:
36
39
  ctx.invoke(generate_delivery_report, case_id=case.internal_id)
@@ -85,6 +85,7 @@ class SampleTags(BaseModel):
85
85
  chromograph_sites: set[str] | None = None
86
86
  hificnv_coverage: set[str] | None = None
87
87
  minor_allele_frequency_wig: set[str] | None = None
88
+ phase_blocks: set[str] | None = None
88
89
  reviewer_alignment: set[str] | None = None
89
90
  reviewer_alignment_index: set[str] | None = None
90
91
  reviewer_catalog: set[str] | None = None
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import cast
2
3
 
3
4
  from housekeeper.store.models import Version
4
5
 
@@ -88,27 +89,50 @@ class NalloConfigBuilder(ScoutConfigBuilder):
88
89
  ) -> None:
89
90
  """Include sample level files that are optional."""
90
91
  LOG.info("Including NALLO specific sample level files")
91
- sample_id: str = config_sample.sample_id
92
+ sample_id: str = cast(str, config_sample.sample_id)
93
+ config_sample.chromograph_images.autozygous = self.remove_chromosome_substring(
94
+ self.get_sample_file(
95
+ hk_tags=self.sample_tags.chromograph_autozyg,
96
+ sample_id=sample_id,
97
+ hk_version=hk_version,
98
+ )
99
+ )
100
+ config_sample.chromograph_images.coverage = self.remove_chromosome_substring(
101
+ self.get_sample_file(
102
+ hk_tags=self.sample_tags.chromograph_coverage,
103
+ sample_id=sample_id,
104
+ hk_version=hk_version,
105
+ )
106
+ )
92
107
  config_sample.d4_file = self.get_sample_file(
93
- hk_tags=self.sample_tags.d4_file, sample_id=sample_id, hk_version=hk_version
108
+ hk_tags=cast(set[str], self.sample_tags.d4_file),
109
+ sample_id=sample_id,
110
+ hk_version=hk_version,
94
111
  )
95
112
  config_sample.paraphase_alignment_path = self.get_sample_file(
96
- hk_tags=self.sample_tags.paraphase_alignment_path,
113
+ hk_tags=cast(set[str], self.sample_tags.paraphase_alignment_path),
97
114
  sample_id=sample_id,
98
115
  hk_version=hk_version,
99
116
  )
100
117
  config_sample.tiddit_coverage_wig = self.get_sample_file(
101
- hk_tags=self.sample_tags.hificnv_coverage,
118
+ hk_tags=cast(set[str], self.sample_tags.hificnv_coverage),
102
119
  sample_id=sample_id,
103
120
  hk_version=hk_version,
104
121
  )
105
122
  config_sample.minor_allele_frequency_wig = self.get_sample_file(
106
- hk_tags=self.sample_tags.minor_allele_frequency_wig,
123
+ hk_tags=cast(set[str], self.sample_tags.minor_allele_frequency_wig),
124
+ sample_id=sample_id,
125
+ hk_version=hk_version,
126
+ )
127
+ alignment_and_mt_bam_path: str | None = self.get_sample_file(
128
+ hk_tags=cast(set[str], self.sample_tags.alignment_path),
107
129
  sample_id=sample_id,
108
130
  hk_version=hk_version,
109
131
  )
110
- config_sample.alignment_path = self.get_sample_file(
111
- hk_tags=self.sample_tags.alignment_path,
132
+ config_sample.alignment_path = alignment_and_mt_bam_path
133
+ config_sample.mt_bam = alignment_and_mt_bam_path
134
+ config_sample.phase_blocks = self.get_sample_file(
135
+ hk_tags=cast(set[str], self.sample_tags.phase_blocks),
112
136
  sample_id=sample_id,
113
137
  hk_version=hk_version,
114
138
  )
@@ -4,15 +4,16 @@ import logging
4
4
  from pathlib import Path
5
5
 
6
6
  from housekeeper.store.models import File, Version
7
- from pydantic.v1 import EmailStr, ValidationError
7
+ from pydantic import EmailStr
8
+ from pydantic.v1 import ValidationError
8
9
 
9
10
  from cg.constants import Workflow
10
11
  from cg.constants.constants import FileFormat, GenomeVersion, SampleType
11
12
  from cg.constants.housekeeper_tags import BalsamicAnalysisTag
12
- from cg.constants.observations import ObservationsFileWildcards
13
+ from cg.constants.observations import BalsamicObservationPanel, ObservationsFileWildcards
13
14
  from cg.constants.priority import SlurmQos
14
15
  from cg.constants.scout import BALSAMIC_CASE_TAGS
15
- from cg.constants.sequencing import Variants
16
+ from cg.constants.sequencing import SeqLibraryPrepCategory, Variants
16
17
  from cg.constants.subject import Sex
17
18
  from cg.exc import BalsamicStartError, CgError
18
19
  from cg.io.controller import ReadFile
@@ -33,6 +34,15 @@ from cg.utils.utils import build_command_from_dict, get_string_from_list_by_patt
33
34
  LOG = logging.getLogger(__name__)
34
35
 
35
36
 
37
+ PANELS_WITH_LOQUSDB_DUMP_FILES_MAP: dict[str, str] = {
38
+ BalsamicObservationPanel.MYELOID: "loqusdb_cancer_somatic_myeloid_snv_variants_export-20250920-.vcf.gz",
39
+ BalsamicObservationPanel.LYMPHOID: "loqusdb_cancer_somatic_lymphoid_snv_variants_export-20250920-.vcf.gz",
40
+ BalsamicObservationPanel.EXOME: "loqusdb_cancer_somatic_exome_snv_variants_export-20250920-.vcf.gz",
41
+ }
42
+
43
+ LOQUSDB_WGS_DUMP_FILE = "loqusdb_artefact_somatic_sv_variants_export-20250920-.vcf.gz"
44
+
45
+
36
46
  class BalsamicAnalysisAPI(AnalysisAPI):
37
47
  """Handles communication between BALSAMIC processes and the rest of CG infrastructure."""
38
48
 
@@ -46,23 +56,24 @@ class BalsamicAnalysisAPI(AnalysisAPI):
46
56
  ):
47
57
  super().__init__(workflow=workflow, config=config)
48
58
  self.account: str = config.balsamic.slurm.account
49
- self.balsamic_cache: str = config.balsamic.balsamic_cache
50
- self.bed_path: str = config.balsamic.bed_path
51
- self.binary_path: str = config.balsamic.binary_path
52
- self.cadd_path: str = config.balsamic.cadd_path
53
- self.conda_binary: str = config.balsamic.conda_binary
59
+ self.balsamic_cache: str = str(config.balsamic.balsamic_cache)
60
+ self.bed_path: str = str(config.balsamic.bed_path)
61
+ self.binary_path: str = str(config.balsamic.binary_path)
62
+ self.cadd_path: str = str(config.balsamic.cadd_path)
63
+ self.conda_binary: str = str(config.balsamic.conda_binary)
54
64
  self.conda_env: str = config.balsamic.conda_env
55
65
  self.email: EmailStr = config.balsamic.slurm.mail_user
56
- self.genome_interval_path: str = config.balsamic.genome_interval_path
57
- self.gens_coverage_female_path: str = config.balsamic.gens_coverage_female_path
58
- self.gens_coverage_male_path: str = config.balsamic.gens_coverage_male_path
59
- self.gnomad_af5_path: str = config.balsamic.gnomad_af5_path
60
- self.loqusdb_path: str = config.balsamic.loqusdb_path
61
- self.pon_path: str = config.balsamic.pon_path
66
+ self.genome_interval_path: str = str(config.balsamic.genome_interval_path)
67
+ self.gens_coverage_female_path: str = str(config.balsamic.gens_coverage_female_path)
68
+ self.gens_coverage_male_path: str = str(config.balsamic.gens_coverage_male_path)
69
+ self.gnomad_af5_path: str = str(config.balsamic.gnomad_af5_path)
70
+ self.head_job_partition: str = config.balsamic.head_job_partition
71
+ self.loqusdb_path: str = str(config.balsamic.loqusdb_path)
72
+ self.pon_path: str = str(config.balsamic.pon_path)
62
73
  self.qos: SlurmQos = config.balsamic.slurm.qos
63
- self.root_dir: str = config.balsamic.root
64
- self.sentieon_licence_path: str = config.balsamic.sentieon_licence_path
65
- self.sentieon_licence_server: str = config.sentieon_licence_server
74
+ self.root_dir: str = str(config.balsamic.root)
75
+ self.sentieon_licence_path: str = str(config.balsamic.sentieon_licence_path)
76
+ self.sentieon_licence_server: str = config.balsamic.sentieon_licence_server
66
77
  self.swegen_path: str = config.balsamic.swegen_path
67
78
 
68
79
  @property
@@ -253,10 +264,6 @@ class BalsamicAnalysisAPI(AnalysisAPI):
253
264
  if all(val["sex"] == sex for val in sample_data.values()) and sex in set(
254
265
  value for value in Sex
255
266
  ):
256
- if sex not in [Sex.FEMALE, Sex.MALE]:
257
- LOG.warning(f"The provided sex is unknown, setting {Sex.FEMALE} as the default")
258
- sex = Sex.FEMALE
259
-
260
267
  return sex
261
268
  else:
262
269
  LOG.error(f"Unable to retrieve a valid sex from samples: {sample_data.keys()}")
@@ -308,8 +315,12 @@ class BalsamicAnalysisAPI(AnalysisAPI):
308
315
  def get_latest_metadata(self, case_id: str) -> BalsamicAnalysis:
309
316
  """Return the latest metadata of a specific BALSAMIC case."""
310
317
 
311
- config_raw_data = self.get_latest_raw_file_data(case_id, BalsamicAnalysisTag.CONFIG)
312
- metrics_raw_data = self.get_latest_raw_file_data(case_id, BalsamicAnalysisTag.QC_METRICS)
318
+ config_raw_data: dict = self.get_latest_raw_file_data(
319
+ case_id=case_id, tags=BalsamicAnalysisTag.CONFIG
320
+ )
321
+ metrics_raw_data: list[dict] = self.get_latest_raw_file_data(
322
+ case_id=case_id, tags=BalsamicAnalysisTag.QC_METRICS
323
+ )
313
324
 
314
325
  if config_raw_data and metrics_raw_data:
315
326
  try:
@@ -324,7 +335,9 @@ class BalsamicAnalysisAPI(AnalysisAPI):
324
335
  LOG.error(f"Unable to retrieve the latest metadata for {case_id}")
325
336
  raise CgError
326
337
 
327
- def parse_analysis(self, config_raw: dict, qc_metrics_raw: dict, **kwargs) -> BalsamicAnalysis:
338
+ def parse_analysis(
339
+ self, config_raw: dict, qc_metrics_raw: list[dict], **kwargs
340
+ ) -> BalsamicAnalysis:
328
341
  """Returns a formatted BalsamicAnalysis object"""
329
342
 
330
343
  sequencing_type = config_raw["analysis"]["sequencing_type"]
@@ -399,9 +412,7 @@ class BalsamicAnalysisAPI(AnalysisAPI):
399
412
  "genome_interval": self.genome_interval_path,
400
413
  "gnomad_min_af5": self.gnomad_af5_path,
401
414
  "gens_coverage_pon": (
402
- self.gens_coverage_female_path
403
- if sex == Sex.FEMALE
404
- else self.gens_coverage_male_path
415
+ self.gens_coverage_male_path if sex == Sex.MALE else self.gens_coverage_female_path
405
416
  ),
406
417
  }
407
418
 
@@ -443,6 +454,7 @@ class BalsamicAnalysisAPI(AnalysisAPI):
443
454
  raise BalsamicStartError(f"{case_id} has no samples tagged for BALSAMIC analysis!")
444
455
 
445
456
  verified_panel_bed = self.get_verified_bed(panel_bed=panel_bed, sample_data=sample_data)
457
+ loqusdb_panel_dump_file: str | None = self.get_panel_loqusdb_dump(verified_panel_bed)
446
458
  verified_pon = (
447
459
  self.get_verified_pon(pon_cnn=pon_cnn, panel_bed=verified_panel_bed)
448
460
  if verified_panel_bed
@@ -452,10 +464,16 @@ class BalsamicAnalysisAPI(AnalysisAPI):
452
464
 
453
465
  verified_exome_argument: bool = self.has_case_only_exome_samples(case_id=case_id)
454
466
 
467
+ is_wgs_case: bool = self.has_case_only_wgs_samples(case_id=case_id)
468
+
455
469
  config_case: dict[str, str] = {
456
470
  "case_id": case_id,
457
471
  "analysis_workflow": self.workflow,
458
472
  "genome_version": genome_version,
473
+ "loqusdb_panel_dump_file": loqusdb_panel_dump_file,
474
+ "loqusdb_wgs_dump_file": (
475
+ f"{self.loqusdb_path}/{LOQUSDB_WGS_DUMP_FILE}" if is_wgs_case else None
476
+ ),
459
477
  "sex": verified_sex,
460
478
  "panel_bed": verified_panel_bed,
461
479
  "pon_cnn": verified_pon,
@@ -480,6 +498,22 @@ class BalsamicAnalysisAPI(AnalysisAPI):
480
498
 
481
499
  return config_case
482
500
 
501
+ def has_case_only_wgs_samples(self, case_id: str) -> bool:
502
+ case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
503
+ return all(
504
+ sample.prep_category == SeqLibraryPrepCategory.WHOLE_GENOME_SEQUENCING
505
+ for sample in case.samples
506
+ )
507
+
508
+ def get_panel_loqusdb_dump(self, bed_file: str | None) -> str | None:
509
+ if not bed_file:
510
+ return None
511
+ bed_file_name: str = Path(bed_file).name
512
+ bed_name: str = self.status_db.get_bed_version_by_file_name(bed_file_name).bed.name
513
+
514
+ if file_name := PANELS_WITH_LOQUSDB_DUMP_FILES_MAP.get(bed_name):
515
+ return f"{self.loqusdb_path}/{file_name}"
516
+
483
517
  @staticmethod
484
518
  def print_sample_params(case_id: str, sample_data: dict) -> None:
485
519
  """Outputs a table of samples to be displayed in log"""
@@ -557,13 +591,15 @@ class BalsamicAnalysisAPI(AnalysisAPI):
557
591
  {
558
592
  "--analysis-dir": self.root_dir,
559
593
  "--analysis-workflow": arguments.get("analysis_workflow"),
594
+ "--artefact-snv-observations": arguments.get("artefact_somatic_snv"),
595
+ "--artefact-sv-observations": arguments.get("loqusdb_wgs_dump_file"),
560
596
  "--balsamic-cache": self.balsamic_cache,
561
597
  "--cache-version": cache_version,
562
598
  "--cadd-annotations": self.cadd_path,
563
- "--artefact-snv-observations": arguments.get("artefact_somatic_snv"),
564
599
  "--cancer-germline-snv-observations": arguments.get("cancer_germline_snv"),
565
600
  "--cancer-germline-sv-observations": arguments.get("cancer_germline_sv"),
566
601
  "--cancer-somatic-snv-observations": arguments.get("cancer_somatic_snv"),
602
+ "--cancer-somatic-snv-panel-observations": arguments.get("loqusdb_panel_dump_file"),
567
603
  "--cancer-somatic-sv-observations": arguments.get("cancer_somatic_sv"),
568
604
  "--case-id": arguments.get("case_id"),
569
605
  "--clinical-snv-observations": arguments.get("clinical_snv"),
@@ -578,8 +614,8 @@ class BalsamicAnalysisAPI(AnalysisAPI):
578
614
  "--gnomad-min-af5": arguments.get("gnomad_min_af5"),
579
615
  "--normal-sample-name": arguments.get("normal_sample_name"),
580
616
  "--panel-bed": arguments.get("panel_bed"),
617
+ "--exome": arguments.get("exome"), # MUST be after panel bed
581
618
  "--pon-cnn": arguments.get("pon_cnn"),
582
- "--exome": arguments.get("exome"),
583
619
  "--sentieon-install-dir": self.sentieon_licence_path,
584
620
  "--sentieon-license": self.sentieon_licence_server,
585
621
  "--soft-filter-normal": arguments.get("soft_filter_normal"),
@@ -595,7 +631,7 @@ class BalsamicAnalysisAPI(AnalysisAPI):
595
631
  def run_analysis(
596
632
  self,
597
633
  case_id: str,
598
- cluster_config: Path | None = None,
634
+ workflow_profile: Path | None = None,
599
635
  slurm_quality_of_service: str | None = None,
600
636
  dry_run: bool = False,
601
637
  ) -> None:
@@ -603,17 +639,16 @@ class BalsamicAnalysisAPI(AnalysisAPI):
603
639
 
604
640
  command = ["run", "analysis"]
605
641
  run_analysis = ["--run-analysis"] if not dry_run else []
606
- benchmark = ["--benchmark"]
607
642
  options = build_command_from_dict(
608
643
  {
609
644
  "--account": self.account,
610
- "--mail-user": self.email,
611
645
  "--qos": slurm_quality_of_service or self.get_slurm_qos_for_case(case_id=case_id),
612
646
  "--sample-config": self.get_case_config_path(case_id=case_id),
613
- "--cluster-config": cluster_config,
647
+ "--workflow-profile": workflow_profile,
648
+ "--headjob-partition": self.head_job_partition,
614
649
  }
615
650
  )
616
- parameters = command + options + run_analysis + benchmark
651
+ parameters = command + options + run_analysis
617
652
  self.process.run_command(parameters=parameters, dry_run=dry_run)
618
653
 
619
654
  def report_deliver(self, case_id: str, dry_run: bool = False) -> None:
@@ -630,8 +665,7 @@ class BalsamicAnalysisAPI(AnalysisAPI):
630
665
 
631
666
  def get_genome_build(self, case_id: str) -> str:
632
667
  """Returns the reference genome build version of a Balsamic analysis."""
633
- analysis_metadata: BalsamicAnalysis = self.get_latest_metadata(case_id)
634
- return analysis_metadata.balsamic_config.reference.reference_genome_version
668
+ return GenomeVersion.HG19
635
669
 
636
670
  @staticmethod
637
671
  def get_variant_caller_version(var_caller_name: str, var_caller_versions: dict) -> str | None:
cg/meta/workflow/fastq.py CHANGED
@@ -212,6 +212,14 @@ class BalsamicFastqHandler(FastqHandler):
212
212
  date: str = date if isinstance(date, str) else date.strftime("%y%m%d")
213
213
  return f"{lane}_{date}_{flow_cell}_{sample}_{index}_R_{read_direction}{FileExtensions.FASTQ}{FileExtensions.GZIP}"
214
214
 
215
+ def get_sample_fastq_destination_dir(self, case: Case, sample: Sample) -> Path:
216
+ """Get FASTQ file paths for a Balsamic case (sample-independent)."""
217
+ return self.get_fastq_dir(case_id=case.internal_id)
218
+
219
+ def get_fastq_dir(self, case_id: str) -> Path:
220
+ """Get the FASTQ directory for a Balsamic case."""
221
+ return Path(self.root_dir, case_id, "fastq")
222
+
215
223
 
216
224
  class MipFastqHandler(FastqHandler):
217
225
  @staticmethod
@@ -17,8 +17,6 @@ class SampleQualityResult(BaseModel):
17
17
  class CaseQualityResult(BaseModel):
18
18
  passes_qc: bool
19
19
  control_passes_qc: bool
20
- urgent_passes_qc: bool
21
- non_urgent_passes_qc: bool
22
20
 
23
21
 
24
22
  class QualityResult(BaseModel):