cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. cg/__init__.py +1 -1
  2. cg/apps/housekeeper/hk.py +18 -1
  3. cg/apps/tb/api.py +42 -5
  4. cg/cli/transfer.py +13 -2
  5. cg/cli/upload/mutacc.py +16 -3
  6. cg/cli/upload/scout.py +2 -2
  7. cg/cli/upload/utils.py +10 -1
  8. cg/cli/workflow/balsamic/base.py +86 -172
  9. cg/cli/workflow/balsamic/options.py +3 -48
  10. cg/cli/workflow/balsamic/umi.py +210 -15
  11. cg/cli/workflow/microsalt/base.py +4 -2
  12. cg/cli/workflow/mip_dna/base.py +1 -1
  13. cg/cli/workflow/nallo/base.py +73 -23
  14. cg/cli/workflow/nf_analysis.py +5 -207
  15. cg/cli/workflow/raredisease/base.py +41 -54
  16. cg/cli/workflow/rnafusion/base.py +38 -8
  17. cg/cli/workflow/taxprofiler/base.py +31 -18
  18. cg/cli/workflow/tomte/base.py +83 -10
  19. cg/constants/constants.py +25 -30
  20. cg/constants/devices.py +6 -1
  21. cg/constants/gene_panel.py +3 -1
  22. cg/constants/housekeeper_tags.py +28 -28
  23. cg/constants/lims.py +4 -0
  24. cg/constants/nf_analysis.py +0 -1
  25. cg/constants/observations.py +21 -5
  26. cg/constants/orderforms.py +3 -3
  27. cg/constants/pacbio.py +1 -0
  28. cg/constants/priority.py +1 -1
  29. cg/constants/report.py +1 -0
  30. cg/constants/scout.py +12 -9
  31. cg/constants/sequencing.py +2 -2
  32. cg/constants/tb.py +5 -5
  33. cg/exc.py +27 -5
  34. cg/meta/compress/compress.py +7 -2
  35. cg/meta/delivery_report/balsamic.py +3 -1
  36. cg/meta/delivery_report/delivery_report_api.py +4 -3
  37. cg/meta/delivery_report/nallo.py +11 -11
  38. cg/meta/delivery_report/raredisease.py +7 -3
  39. cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
  40. cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
  41. cg/meta/observations/balsamic_observations_api.py +110 -14
  42. cg/meta/observations/mip_dna_observations_api.py +1 -1
  43. cg/meta/observations/nallo_observations_api.py +1 -1
  44. cg/meta/observations/observations_api.py +23 -32
  45. cg/meta/observations/raredisease_observations_api.py +1 -1
  46. cg/meta/tar/tar.py +5 -2
  47. cg/meta/transfer/lims.py +32 -3
  48. cg/meta/upload/balsamic/balsamic.py +1 -8
  49. cg/meta/upload/coverage.py +5 -5
  50. cg/meta/upload/raredisease/raredisease.py +3 -0
  51. cg/meta/upload/scout/hk_tags.py +1 -0
  52. cg/meta/upload/scout/nallo_config_builder.py +31 -7
  53. cg/meta/workflow/balsamic.py +70 -36
  54. cg/meta/workflow/fastq.py +8 -0
  55. cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
  56. cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
  57. cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
  58. cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
  59. cg/meta/workflow/nallo.py +21 -99
  60. cg/meta/workflow/nf_analysis.py +12 -263
  61. cg/meta/workflow/raredisease.py +3 -112
  62. cg/meta/workflow/rnafusion.py +2 -34
  63. cg/meta/workflow/taxprofiler.py +2 -38
  64. cg/meta/workflow/tomte.py +2 -42
  65. cg/models/balsamic/config.py +0 -24
  66. cg/models/balsamic/metrics.py +5 -3
  67. cg/models/cg_config.py +39 -16
  68. cg/models/deliverables/metric_deliverables.py +1 -1
  69. cg/models/delivery_report/metadata.py +2 -1
  70. cg/models/nallo/nallo.py +14 -64
  71. cg/models/nf_analysis.py +1 -41
  72. cg/models/raredisease/raredisease.py +1 -63
  73. cg/models/rnafusion/rnafusion.py +0 -26
  74. cg/models/scout/scout_load_config.py +5 -2
  75. cg/models/taxprofiler/taxprofiler.py +0 -42
  76. cg/models/tomte/tomte.py +0 -69
  77. cg/resources/nallo_bundle_filenames.yaml +292 -22
  78. cg/resources/raredisease_bundle_filenames.yaml +11 -1
  79. cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
  80. cg/server/admin.py +106 -25
  81. cg/server/app.py +15 -4
  82. cg/server/endpoints/sequencing_run/dtos.py +21 -3
  83. cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
  84. cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
  85. cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
  86. cg/services/analysis_starter/configurator/abstract_model.py +8 -0
  87. cg/services/analysis_starter/configurator/configurator.py +1 -1
  88. cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
  89. cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
  90. cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
  91. cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
  92. cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
  93. cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
  94. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
  95. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
  96. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
  97. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
  98. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
  99. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
  100. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
  101. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
  102. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
  103. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
  104. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
  105. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
  106. cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
  107. cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
  108. cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
  109. cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
  110. cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
  111. cg/services/analysis_starter/constants.py +2 -0
  112. cg/services/analysis_starter/factories/configurator_factory.py +131 -51
  113. cg/services/analysis_starter/factories/starter_factory.py +36 -7
  114. cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
  115. cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
  116. cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
  117. cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
  118. cg/services/analysis_starter/submitters/submitter.py +1 -1
  119. cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
  120. cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
  121. cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
  122. cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
  123. cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
  124. cg/services/analysis_starter/tracker/tracker.py +19 -15
  125. cg/services/deliver_files/factory.py +1 -1
  126. cg/services/delivery_message/messages/__init__.py +24 -14
  127. cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
  128. cg/services/delivery_message/utils.py +4 -40
  129. cg/services/illumina/backup/backup_service.py +29 -7
  130. cg/services/orders/validation/constants.py +3 -0
  131. cg/services/orders/validation/index_sequences.py +558 -0
  132. cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
  133. cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
  134. cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
  135. cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
  136. cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
  137. cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
  138. cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
  139. cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
  140. cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
  141. cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
  142. cg/store/crud/create.py +73 -42
  143. cg/store/crud/read.py +73 -7
  144. cg/store/crud/update.py +14 -3
  145. cg/store/models.py +98 -35
  146. cg/store/store.py +8 -1
  147. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
  148. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
  149. cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
  150. cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
  151. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
  152. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,240 @@
1
+ import logging
2
+ import subprocess
3
+ from pathlib import Path
4
+ from subprocess import CalledProcessError
5
+ from typing import cast
6
+
7
+ from pydantic import EmailStr
8
+
9
+ from cg.apps.lims.api import LimsAPI
10
+ from cg.constants import SexOptions
11
+ from cg.constants.constants import GenomeVersion, Workflow
12
+ from cg.constants.process import EXIT_SUCCESS
13
+ from cg.constants.sequencing import SeqLibraryPrepCategory
14
+ from cg.models.cg_config import BalsamicConfig
15
+ from cg.services.analysis_starter.configurator.models.balsamic import (
16
+ BalsamicConfigInput,
17
+ BalsamicConfigInputPanel,
18
+ BalsamicConfigInputWGS,
19
+ )
20
+ from cg.store.models import BedVersion, Case, Sample
21
+ from cg.store.store import Store
22
+
23
+ LOG = logging.getLogger(__name__)
24
+
25
+
26
+ class BalsamicConfigFileCreator:
27
+
28
+ def __init__(self, cg_balsamic_config: BalsamicConfig, lims_api: LimsAPI, status_db: Store):
29
+ self.status_db = status_db
30
+ self.root_dir = cg_balsamic_config.root
31
+ self.lims_api: LimsAPI = lims_api
32
+ self.conda_binary: Path = cg_balsamic_config.conda_binary
33
+ self.conda_env: str = cg_balsamic_config.conda_env
34
+ self.balsamic_binary: Path = cg_balsamic_config.binary_path
35
+ self.root_dir: Path = cg_balsamic_config.root
36
+ self.bed_directory: Path = cg_balsamic_config.bed_path
37
+ self.cache_dir: Path = cg_balsamic_config.balsamic_cache
38
+ self.cadd_path: Path = cg_balsamic_config.cadd_path
39
+ self.genome_interval_path: Path = cg_balsamic_config.genome_interval_path
40
+ self.gens_coverage_female_path: Path = cg_balsamic_config.gens_coverage_female_path
41
+ self.gens_coverage_male_path: Path = cg_balsamic_config.gens_coverage_male_path
42
+ self.gnomad_af5_path: Path = cg_balsamic_config.gnomad_af5_path
43
+ self.environment: str = cg_balsamic_config.conda_env
44
+ self.sentieon_licence_path: Path = cg_balsamic_config.sentieon_licence_path
45
+ self.sentieon_licence_server: str = cg_balsamic_config.sentieon_licence_server
46
+ self.loqusdb_artefact_snv: Path = cg_balsamic_config.loqusdb_dump_files.artefact_snv
47
+ self.artefact_sv_observations: Path = cg_balsamic_config.loqusdb_dump_files.artefact_sv
48
+ self.loqusdb_cancer_germline_snv: Path = (
49
+ cg_balsamic_config.loqusdb_dump_files.cancer_germline_snv
50
+ )
51
+ self.loqusdb_cancer_somatic_snv: Path = (
52
+ cg_balsamic_config.loqusdb_dump_files.cancer_somatic_snv
53
+ )
54
+ self.loqusdb_cancer_somatic_snv_panels: dict = (
55
+ cg_balsamic_config.loqusdb_dump_files.cancer_somatic_snv_panels
56
+ )
57
+ self.loqusdb_cancer_somatic_sv: Path = (
58
+ cg_balsamic_config.loqusdb_dump_files.cancer_somatic_sv
59
+ )
60
+ self.loqusdb_clinical_snv: Path = cg_balsamic_config.loqusdb_dump_files.clinical_snv
61
+ self.loqusdb_clinical_sv: Path = cg_balsamic_config.loqusdb_dump_files.clinical_sv
62
+ self.panel_of_normals: dict = cg_balsamic_config.panel_of_normals
63
+ self.slurm_account: str = cg_balsamic_config.slurm.account
64
+ self.slurm_mail_user: EmailStr = cg_balsamic_config.slurm.mail_user
65
+ self.swegen_snv: Path = cg_balsamic_config.swegen_snv
66
+ self.swegen_sv: Path = cg_balsamic_config.swegen_sv
67
+
68
+ def create(self, case_id: str, fastq_path: Path, **flags) -> None:
69
+ config_cli_input: BalsamicConfigInput = self._build_config_input(
70
+ case_id=case_id, fastq_path=fastq_path, **flags
71
+ )
72
+ self._create_config_file(config_cli_input)
73
+
74
+ @staticmethod
75
+ def _create_config_file(config_cli_input: BalsamicConfigInput) -> None:
76
+ final_command: str = config_cli_input.dump_to_cli()
77
+ LOG.debug(f"Running: {final_command}")
78
+ result = subprocess.run(
79
+ args=final_command,
80
+ shell=True,
81
+ check=False,
82
+ stdout=subprocess.PIPE,
83
+ stderr=subprocess.PIPE,
84
+ )
85
+ if result.returncode != EXIT_SUCCESS:
86
+ LOG.critical(result.stderr.decode("utf-8").rstrip())
87
+ raise CalledProcessError(result.returncode, final_command)
88
+
89
+ def _build_config_input(self, case_id: str, fastq_path: Path, **flags) -> BalsamicConfigInput:
90
+ case: Case = self.status_db.get_case_by_internal_id_strict(case_id)
91
+ if self._all_samples_are_wgs(case):
92
+ return self._build_wgs_config(case=case, fastq_path=fastq_path)
93
+ else:
94
+ return self._build_targeted_config(
95
+ case=case, fastq_path=fastq_path, override_panel_bed=flags.get("panel_bed")
96
+ )
97
+
98
+ def _build_wgs_config(self, case: Case, fastq_path: Path) -> BalsamicConfigInput:
99
+ patient_sex: SexOptions = self._get_patient_sex(case)
100
+ return BalsamicConfigInputWGS(
101
+ analysis_dir=self.root_dir,
102
+ analysis_workflow=cast(Workflow, case.data_analysis),
103
+ artefact_snv_observations=self.loqusdb_artefact_snv,
104
+ artefact_sv_observations=self.artefact_sv_observations,
105
+ balsamic_binary=self.balsamic_binary,
106
+ balsamic_cache=self.cache_dir,
107
+ cadd_annotations=self.cadd_path,
108
+ cancer_germline_snv_observations=self.loqusdb_cancer_germline_snv,
109
+ cancer_somatic_snv_observations=self.loqusdb_cancer_somatic_snv,
110
+ cancer_somatic_sv_observations=self.loqusdb_cancer_somatic_sv,
111
+ case_id=case.internal_id,
112
+ clinical_snv_observations=self.loqusdb_clinical_snv,
113
+ clinical_sv_observations=self.loqusdb_clinical_sv,
114
+ conda_binary=self.conda_binary,
115
+ conda_env=self.conda_env,
116
+ fastq_path=fastq_path,
117
+ gender=patient_sex,
118
+ genome_interval=self.genome_interval_path,
119
+ genome_version=GenomeVersion.HG19,
120
+ gens_coverage_pon=self._get_gens_coverage_pon_file(patient_sex),
121
+ gnomad_min_af5=self.gnomad_af5_path,
122
+ normal_sample_name=self._get_normal_sample_id_from_paired_analysis(case),
123
+ sentieon_install_dir=self.sentieon_licence_path,
124
+ sentieon_license=self.sentieon_licence_server,
125
+ swegen_snv=self.swegen_snv,
126
+ swegen_sv=self.swegen_sv,
127
+ tumor_sample_name=self._get_tumor_or_single_sample_id(case),
128
+ )
129
+
130
+ def _build_targeted_config(
131
+ self, case: Case, fastq_path: Path, override_panel_bed: str | None
132
+ ) -> BalsamicConfigInput:
133
+ bed_version: BedVersion = self._get_bed_version(
134
+ case=case, override_panel_bed=override_panel_bed
135
+ )
136
+ bed_file: Path = Path(self.bed_directory, bed_version.filename)
137
+ patient_sex: SexOptions = self._get_patient_sex(case)
138
+ return BalsamicConfigInputPanel(
139
+ analysis_dir=self.root_dir,
140
+ analysis_workflow=cast(Workflow, case.data_analysis),
141
+ artefact_snv_observations=self.loqusdb_artefact_snv,
142
+ balsamic_binary=self.balsamic_binary,
143
+ balsamic_cache=self.cache_dir,
144
+ cadd_annotations=self.cadd_path,
145
+ cancer_germline_snv_observations=self.loqusdb_cancer_germline_snv,
146
+ cancer_somatic_snv_observations=self.loqusdb_cancer_somatic_snv,
147
+ cancer_somatic_snv_panel_observations=self.loqusdb_cancer_somatic_snv_panels.get(
148
+ bed_version.bed_name
149
+ ),
150
+ cancer_somatic_sv_observations=self.loqusdb_cancer_somatic_sv,
151
+ case_id=case.internal_id,
152
+ clinical_snv_observations=self.loqusdb_clinical_snv,
153
+ clinical_sv_observations=self.loqusdb_clinical_sv,
154
+ conda_binary=self.conda_binary,
155
+ conda_env=self.conda_env,
156
+ fastq_path=fastq_path,
157
+ gender=patient_sex,
158
+ genome_version=GenomeVersion.HG19,
159
+ gnomad_min_af5=self.gnomad_af5_path,
160
+ normal_sample_name=self._get_normal_sample_id_from_paired_analysis(case),
161
+ panel_bed=bed_file,
162
+ pon_cnn=self._get_pon_file(bed_version.shortname),
163
+ exome=self._all_samples_are_exome(case),
164
+ sentieon_install_dir=self.sentieon_licence_path,
165
+ sentieon_license=self.sentieon_licence_server,
166
+ soft_filter_normal=self._is_case_paired_analysis(case),
167
+ swegen_snv=self.swegen_snv,
168
+ swegen_sv=self.swegen_sv,
169
+ tumor_sample_name=self._get_tumor_or_single_sample_id(case),
170
+ )
171
+
172
+ @staticmethod
173
+ def _all_samples_are_wgs(case: Case) -> bool:
174
+ """Check if all samples in the case are WGS."""
175
+ return all(
176
+ sample.prep_category == SeqLibraryPrepCategory.WHOLE_GENOME_SEQUENCING
177
+ for sample in case.samples
178
+ )
179
+
180
+ @staticmethod
181
+ def _all_samples_are_exome(case: Case) -> bool:
182
+ """Check if all samples in the case are exome."""
183
+ return all(
184
+ sample.prep_category == SeqLibraryPrepCategory.WHOLE_EXOME_SEQUENCING
185
+ for sample in case.samples
186
+ )
187
+
188
+ @staticmethod
189
+ def _get_patient_sex(case) -> SexOptions:
190
+ sample_sex: set[SexOptions] = {sample.sex for sample in case.samples}
191
+ return sample_sex.pop()
192
+
193
+ @staticmethod
194
+ def _get_normal_sample_id_from_paired_analysis(case) -> str | None:
195
+ """Return the internal id of the normal sample if the case is a paired analysis, otherwise None."""
196
+ if len(case.samples) == 2:
197
+ for sample in case.samples:
198
+ if not sample.is_tumour:
199
+ return sample.internal_id
200
+
201
+ @staticmethod
202
+ def _get_tumor_or_single_sample_id(case) -> str:
203
+ """
204
+ Return the internal id of the tumour sample if the case is a paired analysis,
205
+ otherwise return the internal id of the single sample.
206
+ """
207
+ if len(case.samples) == 1:
208
+ return case.samples[0].internal_id
209
+ for sample in case.samples:
210
+ if sample.is_tumour:
211
+ return sample.internal_id
212
+
213
+ @staticmethod
214
+ def _is_case_paired_analysis(case: Case) -> bool:
215
+ return len(case.samples) == 2
216
+
217
+ def _get_bed_version(self, case: Case, override_panel_bed: str | None) -> BedVersion:
218
+ first_sample: Sample = case.samples[0]
219
+ short_name: str = override_panel_bed or self.lims_api.get_capture_kit_strict(
220
+ first_sample.internal_id
221
+ )
222
+ return self.status_db.get_bed_version_by_short_name_strict(short_name)
223
+
224
+ def _get_pon_file(self, bed_short_name: str | None) -> Path | None:
225
+ if pon_file := self.panel_of_normals.get(bed_short_name):
226
+ return pon_file
227
+ else:
228
+ LOG.info(f"No PON file found for bed file {pon_file}. Configuring without PON.")
229
+ return None
230
+
231
+ def _get_sample_config_path(self, case_id: str) -> Path:
232
+ return Path(self.root_dir, case_id, f"{case_id}.json")
233
+
234
+ def _get_gens_coverage_pon_file(self, patient_sex: SexOptions) -> Path:
235
+ """Return the corresponding PON file for WGS cases based on the patient's sex."""
236
+ return (
237
+ self.gens_coverage_male_path
238
+ if patient_sex == SexOptions.MALE
239
+ else self.gens_coverage_female_path
240
+ )
@@ -17,18 +17,23 @@ class GenePanelFileCreator:
17
17
  self.store = store
18
18
  self.scout_api = scout_api
19
19
 
20
- def create(self, case_id: str, file_path: Path) -> None:
21
- content: list[str] = self._get_content(case_id)
20
+ def create(self, case_id: str, file_path: Path, double_hashtag_filtering: bool = False) -> None:
21
+ content: list[str] = self._get_content(
22
+ case_id=case_id, double_hashtag_filtering=double_hashtag_filtering
23
+ )
22
24
  write_txt_with_newlines(file_path=file_path, content=content)
23
25
  LOG.info(f"Created gene panel file for case {case_id} at {file_path}")
24
26
 
25
- def _get_content(self, case_id: str) -> list[str]:
26
- case: Case = self.store.get_case_by_internal_id(internal_id=case_id)
27
+ def _get_content(self, case_id: str, double_hashtag_filtering: bool) -> list[str]:
28
+ case: Case = self.store.get_case_by_internal_id_strict(internal_id=case_id)
27
29
  genome_build: GenePanelGenomeBuild = get_genome_build(workflow=case.data_analysis)
28
30
  all_panels: list[str] = self._get_aggregated_panels(
29
31
  customer_id=case.customer.internal_id, default_panels=set(case.panels)
30
32
  )
31
- return self.scout_api.export_panels(build=genome_build, panels=all_panels)
33
+ panels: list[str] = self.scout_api.export_panels(build=genome_build, panels=all_panels)
34
+ if double_hashtag_filtering:
35
+ panels = [panel for panel in panels if not panel.startswith("##")]
36
+ return panels
32
37
 
33
38
  def _get_aggregated_panels(self, customer_id: str, default_panels: set[str]) -> list[str]:
34
39
  """Check if the customer is collaborator for gene panel master list
@@ -1,5 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
+ from typing import Any
3
4
 
4
5
 
5
6
  class ParamsFileCreator(ABC):
@@ -8,5 +9,5 @@ class ParamsFileCreator(ABC):
8
9
  self.params = Path(params)
9
10
 
10
11
  @abstractmethod
11
- def create(self, case_id: str, file_path: Path, sample_sheet_path: Path) -> any:
12
+ def create(self, case_id: str, file_path: Path, sample_sheet_path: Path) -> Any:
12
13
  pass
@@ -1,6 +1,21 @@
1
1
  from pathlib import Path
2
2
 
3
- from cg.models.nf_analysis import WorkflowParameters
3
+ from pydantic import BaseModel, field_validator
4
+
5
+ from cg.constants.constants import GenomeVersion
6
+ from cg.constants.sample_sources import SourceType
7
+ from cg.utils.utils import replace_non_alphanumeric
8
+
9
+
10
+ class WorkflowParameters(BaseModel):
11
+ input: Path
12
+ outdir: Path
13
+
14
+
15
+ class NalloParameters(WorkflowParameters):
16
+ """Model for Nallo parameters."""
17
+
18
+ filter_variants_hgnc_ids: str
4
19
 
5
20
 
6
21
  class RarediseaseParameters(WorkflowParameters):
@@ -20,3 +35,27 @@ class RNAFusionParameters(WorkflowParameters):
20
35
 
21
36
  class TaxprofilerParameters(WorkflowParameters):
22
37
  """Taxprofiler parameters."""
38
+
39
+
40
+ class TomteParameters(WorkflowParameters):
41
+ """Model for Tomte parameters."""
42
+
43
+ gene_panel_clinical_filter: Path
44
+ tissue: str
45
+ genome: str = GenomeVersion.HG38
46
+
47
+ @field_validator("tissue", mode="before")
48
+ @classmethod
49
+ def restrict_tissue_values(cls, tissue: str | None) -> str:
50
+ if tissue:
51
+ return replace_non_alphanumeric(string=tissue)
52
+ else:
53
+ return SourceType.UNKNOWN
54
+
55
+ @field_validator("genome", mode="before")
56
+ @classmethod
57
+ def restrict_genome_values(cls, genome: str) -> str:
58
+ if genome == GenomeVersion.HG38:
59
+ return GenomeVersion.GRCh38.value
60
+ elif genome == GenomeVersion.HG19:
61
+ return GenomeVersion.GRCh37.value
@@ -0,0 +1,37 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from cg.constants.scout import ScoutExportFileName
6
+ from cg.io.yaml import read_yaml, write_yaml_nextflow_style
7
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.abstract import (
8
+ ParamsFileCreator,
9
+ )
10
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.models import (
11
+ NalloParameters,
12
+ )
13
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.utils import (
14
+ replace_values_in_params_file,
15
+ )
16
+
17
+ LOG = logging.getLogger(__name__)
18
+
19
+
20
+ class NalloParamsFileCreator(ParamsFileCreator):
21
+ def create(self, case_id: str, file_path: Path, sample_sheet_path: Path) -> Any:
22
+ LOG.debug(f"Creating params file for case {case_id}")
23
+ content: dict = self._get_content(
24
+ case_run_directory=file_path.parent, sample_sheet_path=sample_sheet_path
25
+ )
26
+ write_yaml_nextflow_style(content=content, file_path=file_path)
27
+
28
+ def _get_content(self, case_run_directory: Path, sample_sheet_path: Path) -> dict:
29
+ nallo_parameters = NalloParameters(
30
+ input=sample_sheet_path,
31
+ outdir=case_run_directory,
32
+ filter_variants_hgnc_ids=f"{case_run_directory}/{ScoutExportFileName.PANELS_TSV}",
33
+ )
34
+ workflow_parameters: dict = read_yaml(self.params)
35
+ parameters: dict = nallo_parameters.model_dump() | workflow_parameters
36
+ curated_parameters: dict = replace_values_in_params_file(parameters)
37
+ return curated_parameters
@@ -84,13 +84,16 @@ class RarediseaseParamsFileCreator(ParamsFileCreator):
84
84
  """
85
85
  case: Case = self.store.get_case_by_internal_id_strict(internal_id=case_id)
86
86
  sample: Sample = case.samples[0]
87
- target_bed_shortname: str = self.lims.get_capture_kit_strict(
87
+ target_bed_shortname: str | None = self.lims.capture_kit(
88
88
  sample.from_sample or sample.internal_id
89
89
  )
90
- bed_version: BedVersion = self.store.get_bed_version_by_short_name_strict(
91
- target_bed_shortname
92
- )
93
- return bed_version.filename
90
+ if target_bed_shortname:
91
+ bed_version: BedVersion = self.store.get_bed_version_by_short_name_strict(
92
+ target_bed_shortname
93
+ )
94
+ return bed_version.filename
95
+ else:
96
+ return ""
94
97
 
95
98
  def _create_sample_mapping_file(self, case_id: str, case_path: Path) -> Path:
96
99
  """Create a sample mapping file for the case and returns its path."""
@@ -0,0 +1,64 @@
1
+ from pathlib import Path
2
+ from typing import Iterator
3
+
4
+ from cg.apps.lims import LimsAPI
5
+ from cg.constants.constants import GenomeVersion
6
+ from cg.constants.scout import ScoutExportFileName
7
+ from cg.exc import CgError
8
+ from cg.io.yaml import read_yaml, write_yaml_nextflow_style
9
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.abstract import (
10
+ ParamsFileCreator,
11
+ )
12
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.models import (
13
+ TomteParameters,
14
+ )
15
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.params_file.utils import (
16
+ replace_values_in_params_file,
17
+ )
18
+ from cg.store.store import Store
19
+
20
+
21
+ class TomteParamsFileCreator(ParamsFileCreator):
22
+ def __init__(self, params: str, lims_api: LimsAPI, status_db: Store):
23
+ super().__init__(params)
24
+ self.lims_api = lims_api
25
+ self.status_db = status_db
26
+
27
+ def create(self, case_id: str, file_path: Path, sample_sheet_path: Path) -> None:
28
+ content: dict = self._get_content(
29
+ case_id=case_id,
30
+ case_run_directory=file_path.parent,
31
+ sample_sheet_path=sample_sheet_path,
32
+ )
33
+ write_yaml_nextflow_style(content=content, file_path=file_path)
34
+
35
+ def _get_content(self, case_id: str, case_run_directory: Path, sample_sheet_path: Path):
36
+ case_parameters = TomteParameters(
37
+ input=sample_sheet_path,
38
+ outdir=case_run_directory,
39
+ gene_panel_clinical_filter=Path(case_run_directory, ScoutExportFileName.PANELS),
40
+ tissue=self._get_case_source_type(case_id), # type:ignore
41
+ genome=GenomeVersion.HG38,
42
+ ).model_dump()
43
+
44
+ workflow_params = self._get_workflow_params()
45
+ workflow_parameters: dict = workflow_params | case_parameters
46
+ return replace_values_in_params_file(workflow_parameters=workflow_parameters)
47
+
48
+ def _get_workflow_params(self) -> dict:
49
+ return read_yaml(self.params)
50
+
51
+ def _get_case_source_type(self, case_id: str) -> str | None:
52
+ """
53
+ Return the sample source type of a case.
54
+
55
+ Raises:
56
+ CgError: If different sources are set for the samples linked to a case.
57
+ """
58
+ sample_ids: Iterator[str] = self.status_db.get_sample_ids_by_case_id(case_id=case_id)
59
+ source_types: set[str | None] = {
60
+ self.lims_api.get_source(sample_id) for sample_id in sample_ids
61
+ }
62
+ if len(source_types) > 1:
63
+ raise CgError(f"Different source types found for case: {case_id} ({source_types})")
64
+ return source_types.pop()
@@ -16,7 +16,7 @@ from cg.store.store import Store
16
16
  LOG = logging.getLogger(__name__)
17
17
 
18
18
 
19
- class NextflowSampleSheetCreator(ABC):
19
+ class NextflowFastqSampleSheetCreator(ABC):
20
20
 
21
21
  def __init__(self, housekeeper_api: HousekeeperAPI, store: Store):
22
22
  self.housekeeper_api = housekeeper_api
@@ -0,0 +1,65 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from cg.apps.housekeeper.hk import HousekeeperAPI
5
+ from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
6
+ from cg.io.csv import write_csv
7
+ from cg.store.models import Case, CaseSample, Sample
8
+ from cg.store.store import Store
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+ HEADERS: list[str] = [
13
+ "project",
14
+ "sample",
15
+ "file",
16
+ "family_id",
17
+ "paternal_id",
18
+ "maternal_id",
19
+ "sex",
20
+ "phenotype",
21
+ ]
22
+
23
+
24
+ class NalloSampleSheetCreator:
25
+ def __init__(self, housekeeper_api: HousekeeperAPI, status_db: Store) -> None:
26
+ self.housekeeper_api = housekeeper_api
27
+ self.status_db = status_db
28
+
29
+ def create(self, case_id: str, file_path: Path) -> None:
30
+ LOG.debug(f"Creating sample sheet for case {case_id}")
31
+ content: list[list[str]] = self._get_content(case_id)
32
+ write_csv(content=content, file_path=file_path)
33
+
34
+ def _get_content(self, case_id: str) -> list[list[str]]:
35
+ case: Case = self.status_db.get_case_by_internal_id_strict(case_id)
36
+ sample_sheet_content: list[list[str]] = [HEADERS]
37
+ for link in case.links:
38
+ sample_sheet_content.extend(self._get_sample_sheet_content_per_sample(case_sample=link))
39
+ return sample_sheet_content
40
+
41
+ def _get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
42
+ """Collect and format information required to build a sample sheet for a single sample."""
43
+ read_file_paths: list[str] = self._get_bam_read_file_paths(sample=case_sample.sample)
44
+ sample_sheet_entries = []
45
+
46
+ for bam_path in read_file_paths:
47
+ sample_sheet_entry: list[str] = [
48
+ case_sample.case.internal_id,
49
+ case_sample.sample.internal_id,
50
+ bam_path,
51
+ case_sample.case.internal_id,
52
+ case_sample.get_paternal_sample_id or "0",
53
+ case_sample.get_maternal_sample_id or "0",
54
+ PlinkSex[case_sample.sample.sex.upper()].value,
55
+ str(PlinkPhenotypeStatus[case_sample.status.upper()]),
56
+ ]
57
+ sample_sheet_entries.append(sample_sheet_entry)
58
+ return sample_sheet_entries
59
+
60
+ def _get_bam_read_file_paths(self, sample: Sample) -> list[str]:
61
+ """Gather BAM file path for a sample based on the BAM tag."""
62
+ return [
63
+ hk_file.full_path
64
+ for hk_file in self.housekeeper_api.files(bundle=sample.internal_id, tags={"bam"})
65
+ ]
@@ -0,0 +1,12 @@
1
+ from abc import abstractmethod
2
+ from pathlib import Path
3
+ from typing import Protocol, runtime_checkable
4
+
5
+
6
+ @runtime_checkable
7
+ class SampleSheetCreator(Protocol):
8
+ @abstractmethod
9
+ def create(self, case_id: str, file_path: Path) -> None:
10
+ raise NotImplementedError(
11
+ "Please implement create to conform to the SampleSheetFileCreator protocol"
12
+ )
@@ -2,7 +2,7 @@ from typing import Iterator
2
2
 
3
3
  from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
4
4
  from cg.services.analysis_starter.configurator.file_creators.nextflow.sample_sheet.creator import (
5
- NextflowSampleSheetCreator,
5
+ NextflowFastqSampleSheetCreator,
6
6
  )
7
7
  from cg.store.models import Case, CaseSample
8
8
 
@@ -19,7 +19,7 @@ HEADERS: list[str] = [
19
19
  ]
20
20
 
21
21
 
22
- class RarediseaseSampleSheetCreator(NextflowSampleSheetCreator):
22
+ class RarediseaseSampleSheetCreator(NextflowFastqSampleSheetCreator):
23
23
 
24
24
  def _get_content(self, case_id: str) -> list[list[str]]:
25
25
  """Return formatted information required to build a sample sheet for a raredisease case.
@@ -2,14 +2,14 @@ from collections.abc import Iterator
2
2
 
3
3
  from cg.constants.constants import Strandedness
4
4
  from cg.services.analysis_starter.configurator.file_creators.nextflow.sample_sheet.creator import (
5
- NextflowSampleSheetCreator,
5
+ NextflowFastqSampleSheetCreator,
6
6
  )
7
7
  from cg.store.models import Case, Sample
8
8
 
9
9
  HEADERS: list[str] = ["sample", "fastq_1", "fastq_2", "strandedness"]
10
10
 
11
11
 
12
- class RNAFusionSampleSheetCreator(NextflowSampleSheetCreator):
12
+ class RNAFusionSampleSheetCreator(NextflowFastqSampleSheetCreator):
13
13
 
14
14
  def _get_content(self, case_id: str) -> list[list[str]]:
15
15
  content: list[list[str]] = [HEADERS]
@@ -3,7 +3,7 @@ from collections.abc import Iterator
3
3
  from cg.constants.sequencing import SequencingPlatform
4
4
  from cg.constants.symbols import EMPTY_STRING
5
5
  from cg.services.analysis_starter.configurator.file_creators.nextflow.sample_sheet.creator import (
6
- NextflowSampleSheetCreator,
6
+ NextflowFastqSampleSheetCreator,
7
7
  )
8
8
  from cg.store.models import Case, Sample
9
9
 
@@ -17,7 +17,7 @@ HEADERS: list[str] = [
17
17
  ]
18
18
 
19
19
 
20
- class TaxprofilerSampleSheetCreator(NextflowSampleSheetCreator):
20
+ class TaxprofilerSampleSheetCreator(NextflowFastqSampleSheetCreator):
21
21
 
22
22
  def _get_content(self, case_id: str) -> list[list[str]]:
23
23
  """Return formatted information required to build a sample sheet for a case.
@@ -0,0 +1,36 @@
1
+ from typing import Iterator
2
+
3
+ from cg.constants.constants import Strandedness
4
+ from cg.services.analysis_starter.configurator.file_creators.nextflow.sample_sheet.creator import (
5
+ NextflowFastqSampleSheetCreator,
6
+ )
7
+ from cg.store.models import Case, Sample
8
+
9
+ HEADERS: list[str] = ["case", "sample", "fastq_1", "fastq_2", "strandedness"]
10
+
11
+
12
+ class TomteSampleSheetCreator(NextflowFastqSampleSheetCreator):
13
+ def _get_content(self, case_id: str) -> list[list[str]]:
14
+ content: list[list[str]] = [HEADERS]
15
+ case: Case = self.store.get_case_by_internal_id_strict(case_id)
16
+ for sample in case.samples:
17
+ content.extend(
18
+ self._get_sample_sheet_content_per_sample(case_id=case_id, sample=sample)
19
+ )
20
+ return content
21
+
22
+ def _get_sample_sheet_content_per_sample(self, case_id: str, sample: Sample) -> list[list[str]]:
23
+ """Collect and format information required to build a sample sheet for a single sample."""
24
+ paired_fastq_files: Iterator[tuple[str, str]] = self._get_paired_read_paths(sample)
25
+ content: list[list[str]] = []
26
+ for fastq_forward_read_path, fastq_reverse_read_path in paired_fastq_files:
27
+ content.append(
28
+ [
29
+ case_id,
30
+ sample.internal_id,
31
+ fastq_forward_read_path,
32
+ fastq_reverse_read_path,
33
+ Strandedness.REVERSE,
34
+ ]
35
+ )
36
+ return content