cg 76.0.0__py3-none-any.whl → 83.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. cg/__init__.py +1 -1
  2. cg/apps/housekeeper/hk.py +18 -1
  3. cg/apps/tb/api.py +42 -5
  4. cg/cli/transfer.py +13 -2
  5. cg/cli/upload/mutacc.py +16 -3
  6. cg/cli/upload/scout.py +2 -2
  7. cg/cli/upload/utils.py +10 -1
  8. cg/cli/workflow/balsamic/base.py +86 -172
  9. cg/cli/workflow/balsamic/options.py +3 -48
  10. cg/cli/workflow/balsamic/umi.py +210 -15
  11. cg/cli/workflow/microsalt/base.py +4 -2
  12. cg/cli/workflow/mip_dna/base.py +1 -1
  13. cg/cli/workflow/nallo/base.py +73 -23
  14. cg/cli/workflow/nf_analysis.py +5 -207
  15. cg/cli/workflow/raredisease/base.py +41 -54
  16. cg/cli/workflow/rnafusion/base.py +38 -8
  17. cg/cli/workflow/taxprofiler/base.py +31 -18
  18. cg/cli/workflow/tomte/base.py +83 -10
  19. cg/constants/constants.py +25 -30
  20. cg/constants/devices.py +6 -1
  21. cg/constants/gene_panel.py +3 -1
  22. cg/constants/housekeeper_tags.py +28 -28
  23. cg/constants/lims.py +4 -0
  24. cg/constants/nf_analysis.py +0 -1
  25. cg/constants/observations.py +21 -5
  26. cg/constants/orderforms.py +3 -3
  27. cg/constants/pacbio.py +1 -0
  28. cg/constants/priority.py +1 -1
  29. cg/constants/report.py +1 -0
  30. cg/constants/scout.py +12 -9
  31. cg/constants/sequencing.py +2 -2
  32. cg/constants/tb.py +5 -5
  33. cg/exc.py +27 -5
  34. cg/meta/compress/compress.py +7 -2
  35. cg/meta/delivery_report/balsamic.py +3 -1
  36. cg/meta/delivery_report/delivery_report_api.py +4 -3
  37. cg/meta/delivery_report/nallo.py +11 -11
  38. cg/meta/delivery_report/raredisease.py +7 -3
  39. cg/meta/delivery_report/templates/macros/data_analysis/qc_metrics/balsamic_qc_metrics.html +1 -0
  40. cg/meta/delivery_report/templates/macros/ticket_system.html +1 -1
  41. cg/meta/observations/balsamic_observations_api.py +110 -14
  42. cg/meta/observations/mip_dna_observations_api.py +1 -1
  43. cg/meta/observations/nallo_observations_api.py +1 -1
  44. cg/meta/observations/observations_api.py +23 -32
  45. cg/meta/observations/raredisease_observations_api.py +1 -1
  46. cg/meta/tar/tar.py +5 -2
  47. cg/meta/transfer/lims.py +32 -3
  48. cg/meta/upload/balsamic/balsamic.py +1 -8
  49. cg/meta/upload/coverage.py +5 -5
  50. cg/meta/upload/raredisease/raredisease.py +3 -0
  51. cg/meta/upload/scout/hk_tags.py +1 -0
  52. cg/meta/upload/scout/nallo_config_builder.py +31 -7
  53. cg/meta/workflow/balsamic.py +70 -36
  54. cg/meta/workflow/fastq.py +8 -0
  55. cg/meta/workflow/microsalt/quality_controller/models.py +0 -2
  56. cg/meta/workflow/microsalt/quality_controller/quality_controller.py +8 -16
  57. cg/meta/workflow/microsalt/quality_controller/result_logger.py +3 -6
  58. cg/meta/workflow/microsalt/quality_controller/utils.py +2 -45
  59. cg/meta/workflow/nallo.py +21 -99
  60. cg/meta/workflow/nf_analysis.py +12 -263
  61. cg/meta/workflow/raredisease.py +3 -112
  62. cg/meta/workflow/rnafusion.py +2 -34
  63. cg/meta/workflow/taxprofiler.py +2 -38
  64. cg/meta/workflow/tomte.py +2 -42
  65. cg/models/balsamic/config.py +0 -24
  66. cg/models/balsamic/metrics.py +5 -3
  67. cg/models/cg_config.py +39 -16
  68. cg/models/deliverables/metric_deliverables.py +1 -1
  69. cg/models/delivery_report/metadata.py +2 -1
  70. cg/models/nallo/nallo.py +14 -64
  71. cg/models/nf_analysis.py +1 -41
  72. cg/models/raredisease/raredisease.py +1 -63
  73. cg/models/rnafusion/rnafusion.py +0 -26
  74. cg/models/scout/scout_load_config.py +5 -2
  75. cg/models/taxprofiler/taxprofiler.py +0 -42
  76. cg/models/tomte/tomte.py +0 -69
  77. cg/resources/nallo_bundle_filenames.yaml +292 -22
  78. cg/resources/raredisease_bundle_filenames.yaml +11 -1
  79. cg/resources/taxprofiler_bundle_filenames.yaml +20 -0
  80. cg/server/admin.py +106 -25
  81. cg/server/app.py +15 -4
  82. cg/server/endpoints/sequencing_run/dtos.py +21 -3
  83. cg/server/endpoints/sequencing_run/pacbio_sequencing_run.py +29 -10
  84. cg/server/endpoints/sequencing_run/pacbio_smrt_cell_metrics.py +20 -0
  85. cg/services/analysis_starter/{service.py → analysis_starter.py} +11 -9
  86. cg/services/analysis_starter/configurator/abstract_model.py +8 -0
  87. cg/services/analysis_starter/configurator/configurator.py +1 -1
  88. cg/services/analysis_starter/configurator/extensions/nallo.py +27 -0
  89. cg/services/analysis_starter/configurator/extensions/{abstract.py → pipeline_extension.py} +1 -1
  90. cg/services/analysis_starter/configurator/extensions/raredisease.py +3 -1
  91. cg/services/analysis_starter/configurator/extensions/tomte_extension.py +28 -0
  92. cg/services/analysis_starter/configurator/file_creators/balsamic_config.py +240 -0
  93. cg/services/analysis_starter/configurator/file_creators/gene_panel.py +10 -5
  94. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/abstract.py +2 -1
  95. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/models.py +40 -1
  96. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/nallo.py +37 -0
  97. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/raredisease.py +8 -5
  98. cg/services/analysis_starter/configurator/file_creators/nextflow/params_file/tomte_params_file_creator.py +64 -0
  99. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/creator.py +1 -1
  100. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/nallo_sample_sheet_creator.py +65 -0
  101. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/protocol.py +12 -0
  102. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{raredisease.py → raredisease_sample_sheet_creator.py} +2 -2
  103. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{rnafusion.py → rnafusion_sample_sheet_creator.py} +2 -2
  104. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/{taxprofiler.py → taxprofiler_sample_sheet_creator.py} +2 -2
  105. cg/services/analysis_starter/configurator/file_creators/nextflow/sample_sheet/tomte_sample_sheet_creator.py +36 -0
  106. cg/services/analysis_starter/configurator/implementations/balsamic.py +68 -0
  107. cg/services/analysis_starter/configurator/implementations/nextflow.py +22 -5
  108. cg/services/analysis_starter/configurator/models/balsamic.py +152 -0
  109. cg/services/analysis_starter/configurator/models/mip_dna.py +6 -8
  110. cg/services/analysis_starter/configurator/models/nextflow.py +9 -0
  111. cg/services/analysis_starter/constants.py +2 -0
  112. cg/services/analysis_starter/factories/configurator_factory.py +131 -51
  113. cg/services/analysis_starter/factories/starter_factory.py +36 -7
  114. cg/services/analysis_starter/input_fetcher/implementations/bam_fetcher.py +57 -0
  115. cg/services/analysis_starter/input_fetcher/implementations/fastq_fetcher.py +3 -3
  116. cg/services/analysis_starter/submitters/seqera_platform/{client.py → seqera_platform_client.py} +19 -3
  117. cg/services/analysis_starter/submitters/seqera_platform/seqera_platform_submitter.py +73 -0
  118. cg/services/analysis_starter/submitters/submitter.py +1 -1
  119. cg/services/analysis_starter/submitters/subprocess/submitter.py +2 -1
  120. cg/services/analysis_starter/tracker/implementations/balsamic.py +22 -0
  121. cg/services/analysis_starter/tracker/implementations/microsalt.py +4 -4
  122. cg/services/analysis_starter/tracker/implementations/mip_dna.py +4 -1
  123. cg/services/analysis_starter/tracker/implementations/{nextflow.py → nextflow_tracker.py} +6 -4
  124. cg/services/analysis_starter/tracker/tracker.py +19 -15
  125. cg/services/deliver_files/factory.py +1 -1
  126. cg/services/delivery_message/messages/__init__.py +24 -14
  127. cg/services/delivery_message/messages/{microsalt_mwr_message.py → microsalt_message.py} +1 -1
  128. cg/services/delivery_message/utils.py +4 -40
  129. cg/services/illumina/backup/backup_service.py +29 -7
  130. cg/services/orders/validation/constants.py +3 -0
  131. cg/services/orders/validation/index_sequences.py +558 -0
  132. cg/services/orders/validation/order_types/microsalt/models/sample.py +2 -3
  133. cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py +39 -18
  134. cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py +8 -2
  135. cg/services/run_devices/pacbio/data_transfer_service/dto.py +9 -3
  136. cg/services/run_devices/pacbio/data_transfer_service/utils.py +14 -7
  137. cg/services/run_devices/pacbio/metrics_parser/models.py +1 -0
  138. cg/services/run_devices/pacbio/sequencing_runs_service.py +35 -7
  139. cg/services/sequencing_qc_service/quality_checks/checks.py +18 -16
  140. cg/services/sequencing_qc_service/quality_checks/utils.py +82 -18
  141. cg/services/sequencing_qc_service/sequencing_qc_service.py +12 -10
  142. cg/store/crud/create.py +73 -42
  143. cg/store/crud/read.py +73 -7
  144. cg/store/crud/update.py +14 -3
  145. cg/store/models.py +98 -35
  146. cg/store/store.py +8 -1
  147. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/METADATA +1 -1
  148. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/RECORD +150 -138
  149. cg/services/analysis_starter/submitters/seqera_platform/submitter.py +0 -39
  150. cg/services/delivery_message/messages/microsalt_mwx_message.py +0 -18
  151. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/WHEEL +0 -0
  152. {cg-76.0.0.dist-info → cg-83.14.0.dist-info}/entry_points.txt +0 -0
cg/meta/workflow/tomte.py CHANGED
@@ -4,20 +4,14 @@ import logging
4
4
  from pathlib import Path
5
5
 
6
6
  from cg.constants import Workflow
7
- from cg.constants.constants import GenomeVersion, Strandedness
7
+ from cg.constants.constants import GenomeVersion
8
8
  from cg.constants.nf_analysis import TOMTE_METRIC_CONDITIONS
9
9
  from cg.meta.workflow.nf_analysis import NfAnalysisAPI
10
10
  from cg.models.analysis import NextflowAnalysis
11
11
  from cg.models.cg_config import CGConfig
12
12
  from cg.models.deliverables.metric_deliverables import MetricsBase
13
- from cg.models.tomte.tomte import (
14
- TomteParameters,
15
- TomteQCMetrics,
16
- TomteSampleSheetEntry,
17
- TomteSampleSheetHeaders,
18
- )
13
+ from cg.models.tomte.tomte import TomteQCMetrics
19
14
  from cg.resources import TOMTE_BUNDLE_FILENAMES_PATH
20
- from cg.store.models import CaseSample
21
15
 
22
16
  LOG = logging.getLogger(__name__)
23
17
 
@@ -48,45 +42,11 @@ class TomteAnalysisAPI(NfAnalysisAPI):
48
42
  self.revision: str = config.tomte.revision
49
43
  self.nextflow_binary_path: str = config.tomte.binary_path
50
44
 
51
- @property
52
- def sample_sheet_headers(self) -> list[str]:
53
- """Headers for sample sheet."""
54
- return TomteSampleSheetHeaders.list()
55
-
56
- @property
57
- def is_gene_panel_required(self) -> bool:
58
- """Return True if a gene panel is needs to be created using the information in StatusDB and exporting it from Scout."""
59
- return True
60
-
61
45
  @staticmethod
62
46
  def get_bundle_filenames_path() -> Path:
63
47
  """Return path to bundle template."""
64
48
  return TOMTE_BUNDLE_FILENAMES_PATH
65
49
 
66
- def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]:
67
- """Collect and format information required to build a sample sheet for a single sample."""
68
- fastq_forward_read_paths, fastq_reverse_read_paths = self.get_paired_read_paths(
69
- sample=case_sample.sample
70
- )
71
- sample_sheet_entry = TomteSampleSheetEntry(
72
- case_id=case_sample.case.internal_id,
73
- name=case_sample.sample.internal_id,
74
- fastq_forward_read_paths=fastq_forward_read_paths,
75
- fastq_reverse_read_paths=fastq_reverse_read_paths,
76
- strandedness=Strandedness.REVERSE,
77
- )
78
- return sample_sheet_entry.reformat_sample_content
79
-
80
- def get_built_workflow_parameters(self, case_id: str, dry_run: bool = False) -> TomteParameters:
81
- """Return parameters."""
82
- return TomteParameters(
83
- input=self.get_sample_sheet_path(case_id=case_id),
84
- outdir=self.get_case_path(case_id=case_id),
85
- gene_panel_clinical_filter=self.get_gene_panels_path(case_id=case_id),
86
- tissue=self.get_case_source_type(case_id=case_id),
87
- genome=self.get_genome_build(case_id=case_id),
88
- )
89
-
90
50
  def get_genome_build(self, case_id: str) -> str:
91
51
  return GenomeVersion.HG38
92
52
 
@@ -38,28 +38,6 @@ class BalsamicConfigSample(BaseModel):
38
38
  fastq_info: dict[str, dict[str, Path]]
39
39
 
40
40
 
41
- class BalsamicConfigReference(BaseModel):
42
- """Metadata of reference files.
43
-
44
- Attributes:
45
- reference_genome: reference genome fasta file
46
- reference_genome_version: reference genome build version
47
- """
48
-
49
- reference_genome: Path
50
- reference_genome_version: str | None = Field(default=None, validate_default=True)
51
-
52
- @field_validator("reference_genome_version")
53
- @classmethod
54
- def extract_genome_version_from_path(cls, _, info: ValidationInfo) -> str:
55
- """
56
- Return the genome version from the reference path:
57
- /home/proj/stage/cancer/balsamic_cache/X.X.X/hg19/genome/human_g1k_v37.fasta
58
- """
59
-
60
- return str(info.data.get("reference_genome")).split("/")[-3]
61
-
62
-
63
41
  class BalsamicConfigPanel(BaseModel):
64
42
  """Balsamic attributes of a panel BED file.
65
43
 
@@ -134,13 +112,11 @@ class BalsamicConfigJSON(BaseModel):
134
112
  Attributes:
135
113
  analysis: config analysis attributes
136
114
  samples: sample attributes associated to a specific case
137
- reference: BALSAMIC build reference
138
115
  panel: panel attributes (targeted analysis exclusively)
139
116
  """
140
117
 
141
118
  analysis: BalsamicConfigAnalysis
142
119
  samples: list[BalsamicConfigSample]
143
- reference: BalsamicConfigReference
144
120
  panel: BalsamicConfigPanel | None = None
145
121
  QC: BalsamicConfigQC
146
122
  vcf: dict[str, BalsamicVarCaller]
@@ -1,6 +1,8 @@
1
- from pydantic import field_validator
1
+ from pydantic import AfterValidator, field_validator
2
+ from typing_extensions import Annotated
2
3
 
3
4
  from cg.models.deliverables.metric_deliverables import MetricCondition, MetricsBase
5
+ from cg.models.delivery_report.validators import get_sex_as_string
4
6
  from cg.models.qc_metrics import QCMetrics
5
7
 
6
8
 
@@ -25,7 +27,9 @@ class BalsamicQCMetrics(QCMetrics):
25
27
 
26
28
  fold_80_base_penalty: float | None = None
27
29
  mean_insert_size: float | None = None
30
+ median_target_coverage: float | None = None
28
31
  percent_duplication: float | None = None
32
+ compare_predicted_to_given_sex: Annotated[str | None, AfterValidator(get_sex_as_string)] = None
29
33
 
30
34
  _percent_duplication: float = field_validator("percent_duplication")(percent_value_validation)
31
35
 
@@ -34,7 +38,6 @@ class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
34
38
  """BALSAMIC targeted QC metrics"""
35
39
 
36
40
  mean_target_coverage: float | None = None
37
- median_target_coverage: float | None = None
38
41
  pct_target_bases_50x: float | None = None
39
42
  pct_target_bases_100x: float | None = None
40
43
  pct_target_bases_250x: float | None = None
@@ -56,7 +59,6 @@ class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
56
59
  class BalsamicWGSQCMetrics(BalsamicQCMetrics):
57
60
  """BALSAMIC WHOLE_GENOME_SEQUENCING QC metrics"""
58
61
 
59
- median_coverage: float | None = None
60
62
  pct_15x: float | None = None
61
63
  pct_30x: float | None = None
62
64
  pct_60x: float | None = None
cg/models/cg_config.py CHANGED
@@ -22,7 +22,7 @@ from cg.apps.tb import TrailblazerAPI
22
22
  from cg.clients.arnold.api import ArnoldAPIClient
23
23
  from cg.clients.chanjo2.client import Chanjo2APIClient
24
24
  from cg.clients.janus.api import JanusAPIClient
25
- from cg.constants.observations import LoqusdbInstance
25
+ from cg.constants.observations import BalsamicObservationPanel, LoqusdbInstance
26
26
  from cg.constants.priority import SlurmQos
27
27
  from cg.meta.delivery.delivery import DeliveryAPI
28
28
  from cg.services.analysis_service.analysis_service import AnalysisService
@@ -178,23 +178,40 @@ class MutaccAutoConfig(CommonAppConfig):
178
178
  padding: int = 300
179
179
 
180
180
 
181
+ class LoqusDBDumpFiles(BaseModel):
182
+ artefact_sv: Path # WGS
183
+ artefact_snv: Path
184
+ cancer_germline_snv: Path
185
+ cancer_somatic_snv: Path
186
+ cancer_somatic_sv: Path
187
+ clinical_snv: Path
188
+ clinical_sv: Path
189
+ cancer_somatic_snv_panels: dict[BalsamicObservationPanel, Path] # Panel
190
+
191
+
181
192
  class BalsamicConfig(CommonAppConfig):
182
- balsamic_cache: str
183
- bed_path: str
184
- binary_path: str
185
- cadd_path: str
186
- conda_binary: str
193
+ balsamic_cache: Path
194
+ bed_path: Path
195
+ binary_path: Path
196
+ cadd_path: Path
197
+ conda_binary: Path
187
198
  conda_env: str
188
- genome_interval_path: str
189
- gens_coverage_female_path: str
190
- gens_coverage_male_path: str
191
- gnomad_af5_path: str
192
- loqusdb_path: str
193
- pon_path: str
194
- root: str
195
- sentieon_licence_path: str
199
+ genome_interval_path: Path
200
+ gens_coverage_female_path: Path
201
+ gens_coverage_male_path: Path
202
+ gnomad_af5_path: Path
203
+ head_job_partition: str
204
+ loqusdb_path: Path
205
+ loqusdb_dump_files: LoqusDBDumpFiles
206
+ panel_of_normals: dict[str, Path] # For TGS and Exome
207
+ pon_path: Path
208
+ root: Path
209
+ sentieon_licence_path: Path
210
+ sentieon_licence_server: str
196
211
  slurm: SlurmConfig
197
- swegen_path: str
212
+ swegen_path: Path
213
+ swegen_snv: Path
214
+ swegen_sv: Path
198
215
 
199
216
 
200
217
  class MutantConfig(BaseModel):
@@ -415,7 +432,6 @@ class CGConfig(BaseModel):
415
432
  max_flowcells: int | None = None
416
433
  nanopore_data_directory: str
417
434
  run_instruments: RunInstruments
418
- sentieon_licence_server: str
419
435
  tower_binary_path: str
420
436
 
421
437
  # Base APIs that always should exist
@@ -458,6 +474,13 @@ class CGConfig(BaseModel):
458
474
  loqusdb_somatic: CommonAppConfig = Field(None, alias=LoqusdbInstance.SOMATIC.value)
459
475
  loqusdb_tumor: CommonAppConfig = Field(None, alias=LoqusdbInstance.TUMOR.value)
460
476
  loqusdb_wes: CommonAppConfig = Field(None, alias=LoqusdbInstance.WES.value)
477
+ loqusdb_somatic_lymphoid: CommonAppConfig = Field(
478
+ None, alias=LoqusdbInstance.SOMATIC_LYMPHOID.value
479
+ )
480
+ loqusdb_somatic_myeloid: CommonAppConfig = Field(
481
+ None, alias=LoqusdbInstance.SOMATIC_MYELOID.value
482
+ )
483
+ loqusdb_somatic_exome: CommonAppConfig = Field(None, alias=LoqusdbInstance.SOMATIC_EXOME.value)
461
484
  madeline_api_: MadelineAPI = None
462
485
  mutacc_auto: MutaccAutoConfig = Field(None, alias="mutacc-auto")
463
486
  mutacc_auto_api_: MutaccAutoAPI = None
@@ -164,6 +164,6 @@ class MetricsDeliverablesCondition(BaseModel):
164
164
  class MultiqcDataJson(BaseModel):
165
165
  """Multiqc data json model."""
166
166
 
167
- report_general_stats_data: list[dict] | None = None
167
+ report_general_stats_data: list[dict[str, Any]] | None = None
168
168
  report_data_sources: dict | None = None
169
169
  report_saved_raw_data: dict[str, dict] | None = None
@@ -92,6 +92,7 @@ class BalsamicSampleMetadataModel(SampleMetadataModel):
92
92
 
93
93
  mean_insert_size: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
94
94
  fold_80: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
95
+ predicted_sex: str = NA_FIELD
95
96
 
96
97
 
97
98
  class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
@@ -166,7 +167,7 @@ class WTSSampleMetadataModel(SequencingSampleMetadataModel):
166
167
  pct_surviving: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
167
168
  q20_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
168
169
  q30_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
169
- ribosomal_bases: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD
170
+ ribosomal_bases: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
170
171
  rin: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
171
172
  uniquely_mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD
172
173
 
cg/models/nallo/nallo.py CHANGED
@@ -1,14 +1,22 @@
1
- from enum import StrEnum
2
- from pathlib import Path
1
+ from typing import Annotated
3
2
 
4
- from pydantic import BaseModel, field_validator
3
+ from pydantic import BeforeValidator, Field
5
4
 
6
5
  from cg.constants import SexOptions
7
- from cg.exc import NfSampleSheetError
8
- from cg.models.nf_analysis import WorkflowParameters
9
6
  from cg.models.qc_metrics import QCMetrics
10
7
 
11
8
 
9
+ def convert_sex(plink_sex: float) -> SexOptions:
10
+ if plink_sex == 2:
11
+ return SexOptions.FEMALE
12
+ elif plink_sex == 1:
13
+ return SexOptions.MALE
14
+ elif plink_sex == 0:
15
+ return SexOptions.UNKNOWN
16
+ else:
17
+ raise NotImplementedError
18
+
19
+
12
20
  class NalloQCMetrics(QCMetrics):
13
21
  """Nallo QC metrics."""
14
22
 
@@ -16,62 +24,4 @@ class NalloQCMetrics(QCMetrics):
16
24
  coverage_bases: float | None
17
25
  median_coverage: float | None
18
26
  percent_duplicates: float | None
19
- predicted_sex_sex_check: SexOptions
20
-
21
-
22
- class NalloSampleSheetEntry(BaseModel):
23
- """Nallo sample model is used when building the sample sheet."""
24
-
25
- project: str
26
- sample: str
27
- read_file: Path
28
- family_id: str
29
- paternal_id: str
30
- maternal_id: str
31
- sex: int
32
- phenotype: int
33
-
34
- @property
35
- def reformat_sample_content(self) -> list[list[str]]:
36
- """Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
37
- return [
38
- [
39
- self.project,
40
- self.sample,
41
- self.read_file,
42
- self.family_id,
43
- self.paternal_id,
44
- self.maternal_id,
45
- self.sex,
46
- self.phenotype,
47
- ]
48
- ]
49
-
50
- @field_validator("read_file")
51
- @classmethod
52
- def read_file_exists(cls, bam_path: Path) -> Path:
53
- """Verify that bam files exist."""
54
- if not bam_path.is_file():
55
- raise NfSampleSheetError(f"Bam file does not exist: {str(bam_path)}")
56
- return bam_path
57
-
58
-
59
- class NalloSampleSheetHeaders(StrEnum):
60
- project: str = "project"
61
- sample: str = "sample"
62
- file: str = "file"
63
- family_id: str = "family_id"
64
- paternal_id: str = "paternal_id"
65
- maternal_id: str = "maternal_id"
66
- sex: str = "sex"
67
- phenotype: str = "phenotype"
68
-
69
- @classmethod
70
- def list(cls) -> list[str]:
71
- return list(map(lambda header: header.value, cls))
72
-
73
-
74
- class NalloParameters(WorkflowParameters):
75
- """Model for Nallo parameters."""
76
-
77
- filter_variants_hgnc_ids: str
27
+ predicted_sex: Annotated[SexOptions, BeforeValidator(convert_sex)] = Field(alias="somalier_sex")
cg/models/nf_analysis.py CHANGED
@@ -1,13 +1,6 @@
1
1
  from pathlib import Path
2
2
 
3
- from pydantic import BaseModel, ValidationInfo, conlist, field_validator
4
-
5
- from cg.exc import NfSampleSheetError
6
-
7
-
8
- class WorkflowParameters(BaseModel):
9
- input: Path
10
- outdir: Path
3
+ from pydantic import BaseModel, field_validator
11
4
 
12
5
 
13
6
  class NfCommandArgs(BaseModel):
@@ -29,39 +22,6 @@ class NfCommandArgs(BaseModel):
29
22
  params_file: str | Path | None = None
30
23
 
31
24
 
32
- class NextflowSampleSheetEntry(BaseModel):
33
- """Nextflow sample sheet model.
34
-
35
- Attributes:
36
- name: sample name, or case id
37
- fastq_forward_read_paths: list of all fastq read1 file paths corresponding to sample
38
- fastq_reverse_read_paths: list of all fastq read2 file paths corresponding to sample
39
- """
40
-
41
- name: str
42
- fastq_forward_read_paths: conlist(Path, min_length=1)
43
- fastq_reverse_read_paths: conlist(Path, min_length=1)
44
-
45
- @field_validator("fastq_reverse_read_paths")
46
- @classmethod
47
- def validate_complete_fastq_file_pairs(
48
- cls, fastq_reverse: list[str], info: ValidationInfo
49
- ) -> list[str]:
50
- """Verify that the number of fastq forward files is the same as for the reverse."""
51
- if len(fastq_reverse) != len(info.data.get("fastq_forward_read_paths")):
52
- raise NfSampleSheetError("Fastq file length for forward and reverse do not match")
53
- return fastq_reverse
54
-
55
- @field_validator("fastq_forward_read_paths", "fastq_reverse_read_paths")
56
- @classmethod
57
- def fastq_files_exist(cls, fastq_paths: list[str]) -> list[str]:
58
- """Verify that fastq files exist."""
59
- for fastq_path in fastq_paths:
60
- if not fastq_path.is_file():
61
- raise NfSampleSheetError(f"Fastq file does not exist: {str(fastq_path)}")
62
- return fastq_paths
63
-
64
-
65
25
  class FileDeliverable(BaseModel):
66
26
  """Specification for a general deliverables file."""
67
27
 
@@ -1,8 +1,4 @@
1
- from enum import StrEnum
2
- from pathlib import Path
3
-
4
1
  from cg.constants.constants import SexOptions
5
- from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
6
2
  from cg.models.qc_metrics import QCMetrics
7
3
 
8
4
 
@@ -10,64 +6,6 @@ class RarediseaseQCMetrics(QCMetrics):
10
6
  """Raredisease QC metrics."""
11
7
 
12
8
  mapped_reads: int
13
- percent_duplicates: float
9
+ percent_duplication: float
14
10
  predicted_sex_sex_check: SexOptions
15
11
  total_reads: int
16
-
17
-
18
- class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
19
- """Raredisease sample model is used when building the sample sheet."""
20
-
21
- sex: str
22
- phenotype: int
23
- sex: int
24
- paternal_id: str
25
- maternal_id: str
26
- case_id: str
27
-
28
- @property
29
- def reformat_sample_content(self) -> list[list[str]]:
30
- """Reformat sample sheet content as a list of lists, where each list represents a line in the final file."""
31
- return [
32
- [
33
- self.name,
34
- lane + 1,
35
- self.fastq_forward_read_paths,
36
- self.fastq_reverse_read_paths,
37
- self.sex,
38
- self.phenotype,
39
- self.paternal_id,
40
- self.maternal_id,
41
- self.case_id,
42
- ]
43
- for lane, (self.fastq_forward_read_paths, self.fastq_reverse_read_paths) in enumerate(
44
- zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths)
45
- )
46
- ]
47
-
48
-
49
- class RarediseaseSampleSheetHeaders(StrEnum):
50
- sample: str = "sample"
51
- lane: str = "lane"
52
- fastq_1: str = "fastq_1"
53
- fastq_2: str = "fastq_2"
54
- sex: str = "sex"
55
- phenotype: str = "phenotype"
56
- paternal_id: str = "paternal_id"
57
- maternal_id: str = "maternal_id"
58
- case_id: str = "case_id"
59
-
60
- @classmethod
61
- def list(cls) -> list[str]:
62
- return list(map(lambda header: header.value, cls))
63
-
64
-
65
- class RarediseaseParameters(WorkflowParameters):
66
- """Model for Raredisease parameters."""
67
-
68
- target_bed_file: str
69
- analysis_type: str
70
- save_mapped_as_cram: bool
71
- vcfanno_extra_resources: str
72
- vep_filters_scout_fmt: str
73
- sample_id_map: Path
@@ -1,5 +1,3 @@
1
- from cg.constants.constants import Strandedness
2
- from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
3
1
  from cg.models.qc_metrics import QCMetrics
4
2
 
5
3
 
@@ -19,27 +17,3 @@ class RnafusionQCMetrics(QCMetrics):
19
17
  pct_duplication: float
20
18
  read_pairs_examined: float
21
19
  uniquely_mapped_percent: float
22
-
23
-
24
- class RnafusionParameters(WorkflowParameters):
25
- """Rnafusion parameters."""
26
-
27
-
28
- class RnafusionSampleSheetEntry(NextflowSampleSheetEntry):
29
- """Rnafusion sample sheet model."""
30
-
31
- strandedness: Strandedness
32
-
33
- @staticmethod
34
- def headers() -> list[str]:
35
- """Return sample sheet headers."""
36
- return ["sample", "fastq_1", "fastq_2", "strandedness"]
37
-
38
- def reformat_sample_content(self) -> list[list[str]]:
39
- """Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
40
- return [
41
- [self.name, fastq_forward_read_path, fastq_reverse_read_path, str(self.strandedness)]
42
- for fastq_forward_read_path, fastq_reverse_read_path in zip(
43
- self.fastq_forward_read_paths, self.fastq_reverse_read_paths
44
- )
45
- ]
@@ -84,12 +84,15 @@ class ScoutMipIndividual(ScoutIndividual):
84
84
 
85
85
 
86
86
  class ScoutNalloIndividual(ScoutIndividual):
87
+ assembly_alignment_path: str | None = None
88
+ chromograph_images: ChromographImages = ChromographImages()
87
89
  d4_file: str | None = None
90
+ minor_allele_frequency_wig: str | None = None
91
+ mt_bam: str | None = None
88
92
  paraphase_alignment_path: str | None = None
93
+ phase_blocks: str | None = None
89
94
  reviewer: Reviewer = Reviewer()
90
95
  tiddit_coverage_wig: str | None = None
91
- minor_allele_frequency_wig: str | None = None
92
- assembly_alignment_path: str | None = None
93
96
 
94
97
 
95
98
  class ScoutRarediseaseIndividual(ScoutIndividual):
@@ -1,5 +1,3 @@
1
- from cg.constants.sequencing import SequencingPlatform
2
- from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
3
1
  from cg.models.qc_metrics import QCMetrics
4
2
 
5
3
 
@@ -14,43 +12,3 @@ class TaxprofilerQCMetrics(QCMetrics):
14
12
  pct_duplication: float
15
13
  raw_total_sequences: float
16
14
  reads_mapped: float
17
-
18
-
19
- class TaxprofilerParameters(WorkflowParameters):
20
- """Taxprofiler parameters."""
21
-
22
-
23
- class TaxprofilerSampleSheetEntry(NextflowSampleSheetEntry):
24
- """Taxprofiler sample model is used when building the sample sheet."""
25
-
26
- instrument_platform: SequencingPlatform
27
- fasta: str
28
-
29
- @staticmethod
30
- def headers() -> list[str]:
31
- """Return sample sheet headers."""
32
- return [
33
- "sample",
34
- "run_accession",
35
- "instrument_platform",
36
- "fastq_1",
37
- "fastq_2",
38
- "fasta",
39
- ]
40
-
41
- def reformat_sample_content(self) -> list[list[str]]:
42
- """Reformat sample sheet content as a list of list, where each list represents a line in the final file."""
43
- reformatted_content = []
44
- for run_accession, (forward_path, reverse_path) in enumerate(
45
- zip(self.fastq_forward_read_paths, self.fastq_reverse_read_paths), 1
46
- ):
47
- line = [
48
- self.name,
49
- run_accession,
50
- self.instrument_platform,
51
- forward_path,
52
- reverse_path,
53
- self.fasta,
54
- ]
55
- reformatted_content.append(line)
56
- return reformatted_content
cg/models/tomte/tomte.py CHANGED
@@ -1,73 +1,4 @@
1
- from enum import StrEnum
2
- from pathlib import Path
3
-
4
- from pydantic import field_validator
5
-
6
- from cg.constants.constants import GenomeVersion, Strandedness
7
- from cg.constants.sample_sources import SourceType
8
- from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
9
1
  from cg.models.qc_metrics import QCMetrics
10
- from cg.utils.utils import replace_non_alphanumeric
11
-
12
-
13
- class TomteSampleSheetEntry(NextflowSampleSheetEntry):
14
- """Tomte sample model is used when building the sample sheet."""
15
-
16
- case_id: str
17
- strandedness: Strandedness
18
-
19
- @property
20
- def reformat_sample_content(self) -> list[list[str]]:
21
- """Reformat sample sheet content as a list of lists, where
22
- each list represents a line in the final file."""
23
- return [
24
- [
25
- self.case_id,
26
- self.name,
27
- fastq_forward_read_path,
28
- fastq_reverse_read_path,
29
- str(self.strandedness),
30
- ]
31
- for fastq_forward_read_path, fastq_reverse_read_path in zip(
32
- self.fastq_forward_read_paths, self.fastq_reverse_read_paths
33
- )
34
- ]
35
-
36
-
37
- class TomteSampleSheetHeaders(StrEnum):
38
- case_id: str = "case"
39
- name: str = "sample"
40
- fastq_1: str = "fastq_1"
41
- fastq_2: str = "fastq_2"
42
- strandedness: str = "strandedness"
43
-
44
- @classmethod
45
- def list(cls) -> list[str]:
46
- return list(map(lambda header: header.value, cls))
47
-
48
-
49
- class TomteParameters(WorkflowParameters):
50
- """Model for Tomte parameters."""
51
-
52
- gene_panel_clinical_filter: Path
53
- tissue: str
54
- genome: str = GenomeVersion.HG38
55
-
56
- @field_validator("tissue", mode="before")
57
- @classmethod
58
- def restrict_tissue_values(cls, tissue: str | None) -> str:
59
- if tissue:
60
- return replace_non_alphanumeric(string=tissue)
61
- else:
62
- return SourceType.UNKNOWN
63
-
64
- @field_validator("genome", mode="before")
65
- @classmethod
66
- def restrict_genome_values(cls, genome: str) -> str:
67
- if genome == GenomeVersion.HG38:
68
- return GenomeVersion.GRCh38.value
69
- elif genome == GenomeVersion.HG19:
70
- return GenomeVersion.GRCh37.value
71
2
 
72
3
 
73
4
  class TomteQCMetrics(QCMetrics):