smftools 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. smftools/__init__.py +43 -13
  2. smftools/_settings.py +6 -6
  3. smftools/_version.py +3 -1
  4. smftools/cli/__init__.py +1 -0
  5. smftools/cli/archived/cli_flows.py +2 -0
  6. smftools/cli/helpers.py +9 -1
  7. smftools/cli/hmm_adata.py +905 -242
  8. smftools/cli/load_adata.py +432 -280
  9. smftools/cli/preprocess_adata.py +287 -171
  10. smftools/cli/spatial_adata.py +141 -53
  11. smftools/cli_entry.py +119 -178
  12. smftools/config/__init__.py +3 -1
  13. smftools/config/conversion.yaml +5 -1
  14. smftools/config/deaminase.yaml +1 -1
  15. smftools/config/default.yaml +26 -18
  16. smftools/config/direct.yaml +8 -3
  17. smftools/config/discover_input_files.py +19 -5
  18. smftools/config/experiment_config.py +511 -276
  19. smftools/constants.py +37 -0
  20. smftools/datasets/__init__.py +4 -8
  21. smftools/datasets/datasets.py +32 -18
  22. smftools/hmm/HMM.py +2133 -1428
  23. smftools/hmm/__init__.py +24 -14
  24. smftools/hmm/archived/apply_hmm_batched.py +2 -0
  25. smftools/hmm/archived/calculate_distances.py +2 -0
  26. smftools/hmm/archived/call_hmm_peaks.py +18 -1
  27. smftools/hmm/archived/train_hmm.py +2 -0
  28. smftools/hmm/call_hmm_peaks.py +176 -193
  29. smftools/hmm/display_hmm.py +23 -7
  30. smftools/hmm/hmm_readwrite.py +20 -6
  31. smftools/hmm/nucleosome_hmm_refinement.py +104 -14
  32. smftools/informatics/__init__.py +55 -13
  33. smftools/informatics/archived/bam_conversion.py +2 -0
  34. smftools/informatics/archived/bam_direct.py +2 -0
  35. smftools/informatics/archived/basecall_pod5s.py +2 -0
  36. smftools/informatics/archived/basecalls_to_adata.py +2 -0
  37. smftools/informatics/archived/conversion_smf.py +2 -0
  38. smftools/informatics/archived/deaminase_smf.py +1 -0
  39. smftools/informatics/archived/direct_smf.py +2 -0
  40. smftools/informatics/archived/fast5_to_pod5.py +2 -0
  41. smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  42. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +16 -1
  43. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  44. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  45. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  46. smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  47. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  48. smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  49. smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  50. smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  51. smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  52. smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  53. smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  54. smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  55. smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  56. smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  57. smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  58. smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  59. smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  60. smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  61. smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  62. smftools/informatics/archived/helpers/archived/load_adata.py +5 -3
  63. smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  64. smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  65. smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  66. smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  67. smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  68. smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  69. smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  70. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +5 -1
  71. smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  72. smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  73. smftools/informatics/archived/print_bam_query_seq.py +9 -1
  74. smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  75. smftools/informatics/archived/subsample_pod5.py +2 -0
  76. smftools/informatics/bam_functions.py +1059 -269
  77. smftools/informatics/basecalling.py +53 -9
  78. smftools/informatics/bed_functions.py +357 -114
  79. smftools/informatics/binarize_converted_base_identities.py +21 -7
  80. smftools/informatics/complement_base_list.py +9 -6
  81. smftools/informatics/converted_BAM_to_adata.py +324 -137
  82. smftools/informatics/fasta_functions.py +251 -89
  83. smftools/informatics/h5ad_functions.py +202 -30
  84. smftools/informatics/modkit_extract_to_adata.py +623 -274
  85. smftools/informatics/modkit_functions.py +87 -44
  86. smftools/informatics/ohe.py +46 -21
  87. smftools/informatics/pod5_functions.py +114 -74
  88. smftools/informatics/run_multiqc.py +20 -14
  89. smftools/logging_utils.py +51 -0
  90. smftools/machine_learning/__init__.py +23 -12
  91. smftools/machine_learning/data/__init__.py +2 -0
  92. smftools/machine_learning/data/anndata_data_module.py +157 -50
  93. smftools/machine_learning/data/preprocessing.py +4 -1
  94. smftools/machine_learning/evaluation/__init__.py +3 -1
  95. smftools/machine_learning/evaluation/eval_utils.py +13 -14
  96. smftools/machine_learning/evaluation/evaluators.py +52 -34
  97. smftools/machine_learning/inference/__init__.py +3 -1
  98. smftools/machine_learning/inference/inference_utils.py +9 -4
  99. smftools/machine_learning/inference/lightning_inference.py +14 -13
  100. smftools/machine_learning/inference/sklearn_inference.py +8 -8
  101. smftools/machine_learning/inference/sliding_window_inference.py +37 -25
  102. smftools/machine_learning/models/__init__.py +12 -5
  103. smftools/machine_learning/models/base.py +34 -43
  104. smftools/machine_learning/models/cnn.py +22 -13
  105. smftools/machine_learning/models/lightning_base.py +78 -42
  106. smftools/machine_learning/models/mlp.py +18 -5
  107. smftools/machine_learning/models/positional.py +10 -4
  108. smftools/machine_learning/models/rnn.py +8 -3
  109. smftools/machine_learning/models/sklearn_models.py +46 -24
  110. smftools/machine_learning/models/transformer.py +75 -55
  111. smftools/machine_learning/models/wrappers.py +8 -3
  112. smftools/machine_learning/training/__init__.py +4 -2
  113. smftools/machine_learning/training/train_lightning_model.py +42 -23
  114. smftools/machine_learning/training/train_sklearn_model.py +11 -15
  115. smftools/machine_learning/utils/__init__.py +3 -1
  116. smftools/machine_learning/utils/device.py +12 -5
  117. smftools/machine_learning/utils/grl.py +8 -2
  118. smftools/metadata.py +443 -0
  119. smftools/optional_imports.py +31 -0
  120. smftools/plotting/__init__.py +32 -17
  121. smftools/plotting/autocorrelation_plotting.py +153 -48
  122. smftools/plotting/classifiers.py +175 -73
  123. smftools/plotting/general_plotting.py +350 -168
  124. smftools/plotting/hmm_plotting.py +53 -14
  125. smftools/plotting/position_stats.py +155 -87
  126. smftools/plotting/qc_plotting.py +25 -12
  127. smftools/preprocessing/__init__.py +35 -37
  128. smftools/preprocessing/append_base_context.py +105 -79
  129. smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
  130. smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +2 -0
  131. smftools/preprocessing/{archives → archived}/calculate_complexity.py +5 -1
  132. smftools/preprocessing/{archives → archived}/mark_duplicates.py +2 -0
  133. smftools/preprocessing/{archives → archived}/preprocessing.py +10 -6
  134. smftools/preprocessing/{archives → archived}/remove_duplicates.py +2 -0
  135. smftools/preprocessing/binarize.py +21 -4
  136. smftools/preprocessing/binarize_on_Youden.py +127 -31
  137. smftools/preprocessing/binary_layers_to_ohe.py +18 -11
  138. smftools/preprocessing/calculate_complexity_II.py +89 -59
  139. smftools/preprocessing/calculate_consensus.py +28 -19
  140. smftools/preprocessing/calculate_coverage.py +44 -22
  141. smftools/preprocessing/calculate_pairwise_differences.py +4 -1
  142. smftools/preprocessing/calculate_pairwise_hamming_distances.py +7 -3
  143. smftools/preprocessing/calculate_position_Youden.py +110 -55
  144. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  145. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  146. smftools/preprocessing/clean_NaN.py +38 -28
  147. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  148. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +72 -37
  149. smftools/preprocessing/filter_reads_on_modification_thresholds.py +183 -73
  150. smftools/preprocessing/flag_duplicate_reads.py +708 -303
  151. smftools/preprocessing/invert_adata.py +26 -11
  152. smftools/preprocessing/load_sample_sheet.py +40 -22
  153. smftools/preprocessing/make_dirs.py +9 -3
  154. smftools/preprocessing/min_non_diagonal.py +4 -1
  155. smftools/preprocessing/recipes.py +58 -23
  156. smftools/preprocessing/reindex_references_adata.py +93 -27
  157. smftools/preprocessing/subsample_adata.py +33 -16
  158. smftools/readwrite.py +264 -109
  159. smftools/schema/__init__.py +11 -0
  160. smftools/schema/anndata_schema_v1.yaml +227 -0
  161. smftools/tools/__init__.py +25 -18
  162. smftools/tools/archived/apply_hmm.py +2 -0
  163. smftools/tools/archived/classifiers.py +165 -0
  164. smftools/tools/archived/classify_methylated_features.py +2 -0
  165. smftools/tools/archived/classify_non_methylated_features.py +2 -0
  166. smftools/tools/archived/subset_adata_v1.py +12 -1
  167. smftools/tools/archived/subset_adata_v2.py +14 -1
  168. smftools/tools/calculate_umap.py +56 -15
  169. smftools/tools/cluster_adata_on_methylation.py +122 -47
  170. smftools/tools/general_tools.py +70 -25
  171. smftools/tools/position_stats.py +220 -99
  172. smftools/tools/read_stats.py +50 -29
  173. smftools/tools/spatial_autocorrelation.py +365 -192
  174. smftools/tools/subset_adata.py +23 -21
  175. smftools-0.3.0.dist-info/METADATA +147 -0
  176. smftools-0.3.0.dist-info/RECORD +182 -0
  177. smftools-0.2.4.dist-info/METADATA +0 -141
  178. smftools-0.2.4.dist-info/RECORD +0 -176
  179. {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
  180. {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
  181. {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,61 +1,70 @@
1
- import numpy as np
2
- import time
3
- import os
4
- import gc
5
- import pandas as pd
6
- import anndata as ad
7
- from tqdm import tqdm
8
- import multiprocessing
9
- from multiprocessing import Manager, Lock, current_process, Pool
10
- import traceback
11
- import gzip
12
- import torch
1
+ from __future__ import annotations
13
2
 
3
+ import gc
4
+ import logging
14
5
  import shutil
6
+ import time
7
+ import traceback
8
+ from multiprocessing import Manager, Pool, current_process
15
9
  from pathlib import Path
16
- from typing import Union, Iterable, Optional
10
+ from typing import TYPE_CHECKING, Iterable, Optional, Union
11
+
12
+ import anndata as ad
13
+ import numpy as np
14
+ import pandas as pd
17
15
 
18
- from ..readwrite import make_dirs, safe_write_h5ad
16
+ from smftools.logging_utils import get_logger, setup_logging
17
+ from smftools.optional_imports import require
18
+
19
+ from ..readwrite import make_dirs
20
+ from .bam_functions import count_aligned_reads, extract_base_identities
19
21
  from .binarize_converted_base_identities import binarize_converted_base_identities
20
22
  from .fasta_functions import find_conversion_sites
21
- from .bam_functions import count_aligned_reads, extract_base_identities
22
23
  from .ohe import ohe_batching
23
24
 
24
- if __name__ == "__main__":
25
- multiprocessing.set_start_method("forkserver", force=True)
26
-
27
- def converted_BAM_to_adata(converted_FASTA,
28
- split_dir,
29
- output_dir,
30
- input_already_demuxed,
31
- mapping_threshold,
32
- experiment_name,
33
- conversions,
34
- bam_suffix,
35
- device='cpu',
36
- num_threads=8,
37
- deaminase_footprinting=False,
38
- delete_intermediates=True,
39
- double_barcoded_path = None,
40
- ):
41
- """
42
- Converts BAM files into an AnnData object by binarizing modified base identities.
43
-
44
- Parameters:
45
- converted_FASTA (Path): Path to the converted FASTA reference.
46
- split_dir (Path): Directory containing converted BAM files.
47
- output_dir (Path): Directory of the output dir
48
- input_already_demuxed (bool): Whether input reads were originally demuxed
49
- mapping_threshold (float): Minimum fraction of aligned reads required for inclusion.
50
- experiment_name (str): Name for the output AnnData object.
51
- conversions (list): List of modification types (e.g., ['unconverted', '5mC', '6mA']).
52
- bam_suffix (str): File suffix for BAM files.
53
- num_threads (int): Number of parallel processing threads.
54
- deaminase_footprinting (bool): Whether the footprinting was done with a direct deamination chemistry.
55
- double_barcoded_path (Path): Path to dorado demux summary file of double ended barcodes
25
+ logger = get_logger(__name__)
26
+
27
+ if TYPE_CHECKING:
28
+ import torch
29
+
30
+ torch = require("torch", extra="torch", purpose="converted BAM processing")
31
+
32
+
33
+ def converted_BAM_to_adata(
34
+ converted_FASTA: str | Path,
35
+ split_dir: Path,
36
+ output_dir: Path,
37
+ input_already_demuxed: bool,
38
+ mapping_threshold: float,
39
+ experiment_name: str,
40
+ conversions: list[str],
41
+ bam_suffix: str,
42
+ device: str | torch.device = "cpu",
43
+ num_threads: int = 8,
44
+ deaminase_footprinting: bool = False,
45
+ delete_intermediates: bool = True,
46
+ double_barcoded_path: Path | None = None,
47
+ samtools_backend: str | None = "auto",
48
+ ) -> tuple[ad.AnnData | None, Path]:
49
+ """Convert BAM files into an AnnData object by binarizing modified base identities.
50
+
51
+ Args:
52
+ converted_FASTA: Path to the converted FASTA reference.
53
+ split_dir: Directory containing converted BAM files.
54
+ output_dir: Output directory for intermediate and final files.
55
+ input_already_demuxed: Whether input reads were originally demultiplexed.
56
+ mapping_threshold: Minimum fraction of aligned reads required for inclusion.
57
+ experiment_name: Name for the output AnnData object.
58
+ conversions: List of modification types (e.g., ``["unconverted", "5mC", "6mA"]``).
59
+ bam_suffix: File suffix for BAM files.
60
+ device: Torch device or device string.
61
+ num_threads: Number of parallel processing threads.
62
+ deaminase_footprinting: Whether the footprinting used direct deamination chemistry.
63
+ delete_intermediates: Whether to remove intermediate files after processing.
64
+ double_barcoded_path: Path to dorado demux summary file of double-ended barcodes.
56
65
 
57
66
  Returns:
58
- str: Path to the final AnnData object.
67
+ tuple[anndata.AnnData | None, Path]: The AnnData object (if generated) and its path.
59
68
  """
60
69
  if torch.cuda.is_available():
61
70
  device = torch.device("cuda")
@@ -64,69 +73,91 @@ def converted_BAM_to_adata(converted_FASTA,
64
73
  else:
65
74
  device = torch.device("cpu")
66
75
 
67
- print(f"Using device: {device}")
76
+ logger.debug(f"Using device: {device}")
68
77
 
69
78
  ## Set Up Directories and File Paths
70
- h5_dir = output_dir / 'h5ads'
71
- tmp_dir = output_dir / 'tmp'
79
+ h5_dir = output_dir / "h5ads"
80
+ tmp_dir = output_dir / "tmp"
72
81
  final_adata = None
73
- final_adata_path = h5_dir / f'{experiment_name}.h5ad.gz'
82
+ final_adata_path = h5_dir / f"{experiment_name}.h5ad.gz"
74
83
 
75
84
  if final_adata_path.exists():
76
- print(f"{final_adata_path} already exists. Using existing AnnData object.")
85
+ logger.debug(f"{final_adata_path} already exists. Using existing AnnData object.")
77
86
  return final_adata, final_adata_path
78
87
 
79
88
  make_dirs([h5_dir, tmp_dir])
80
89
 
81
90
  bam_files = sorted(
82
- p for p in split_dir.iterdir()
83
- if p.is_file()
84
- and p.suffix == ".bam"
85
- and "unclassified" not in p.name
91
+ p
92
+ for p in split_dir.iterdir()
93
+ if p.is_file() and p.suffix == ".bam" and "unclassified" not in p.name
86
94
  )
87
95
 
88
- bam_path_list = [split_dir / f for f in bam_files]
89
- print(f"Found {len(bam_files)} BAM files: {bam_files}")
96
+ bam_path_list = bam_files
97
+
98
+ bam_names = [bam.name for bam in bam_files]
99
+ logger.info(f"Found {len(bam_files)} BAM files within {split_dir}: {bam_names}")
90
100
 
91
101
  ## Process Conversion Sites
92
- max_reference_length, record_FASTA_dict, chromosome_FASTA_dict = process_conversion_sites(converted_FASTA, conversions, deaminase_footprinting)
102
+ max_reference_length, record_FASTA_dict, chromosome_FASTA_dict = process_conversion_sites(
103
+ converted_FASTA, conversions, deaminase_footprinting
104
+ )
93
105
 
94
106
  ## Filter BAM Files by Mapping Threshold
95
- records_to_analyze = filter_bams_by_mapping_threshold(bam_path_list, bam_files, mapping_threshold)
107
+ records_to_analyze = filter_bams_by_mapping_threshold(
108
+ bam_path_list, bam_files, mapping_threshold, samtools_backend
109
+ )
96
110
 
97
111
  ## Process BAMs in Parallel
98
- final_adata = process_bams_parallel(bam_path_list, records_to_analyze, record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, h5_dir, num_threads, max_reference_length, device, deaminase_footprinting)
112
+ final_adata = process_bams_parallel(
113
+ bam_path_list,
114
+ records_to_analyze,
115
+ record_FASTA_dict,
116
+ chromosome_FASTA_dict,
117
+ tmp_dir,
118
+ h5_dir,
119
+ num_threads,
120
+ max_reference_length,
121
+ device,
122
+ deaminase_footprinting,
123
+ samtools_backend,
124
+ )
99
125
 
100
- final_adata.uns['References'] = {}
126
+ final_adata.uns["References"] = {}
101
127
  for chromosome, [seq, comp] in chromosome_FASTA_dict.items():
102
- final_adata.var[f'{chromosome}_top_strand_FASTA_base'] = list(seq)
103
- final_adata.var[f'{chromosome}_bottom_strand_FASTA_base'] = list(comp)
104
- final_adata.uns[f'{chromosome}_FASTA_sequence'] = seq
105
- final_adata.uns['References'][f'{chromosome}_FASTA_sequence'] = seq
128
+ final_adata.var[f"{chromosome}_top_strand_FASTA_base"] = list(seq)
129
+ final_adata.var[f"{chromosome}_bottom_strand_FASTA_base"] = list(comp)
130
+ final_adata.uns[f"{chromosome}_FASTA_sequence"] = seq
131
+ final_adata.uns["References"][f"{chromosome}_FASTA_sequence"] = seq
106
132
 
107
133
  final_adata.obs_names_make_unique()
108
134
  cols = final_adata.obs.columns
109
135
 
110
136
  # Make obs cols categorical
111
137
  for col in cols:
112
- final_adata.obs[col] = final_adata.obs[col].astype('category')
138
+ final_adata.obs[col] = final_adata.obs[col].astype("category")
113
139
 
114
140
  if input_already_demuxed:
115
141
  final_adata.obs["demux_type"] = ["already"] * final_adata.shape[0]
116
142
  final_adata.obs["demux_type"] = final_adata.obs["demux_type"].astype("category")
117
143
  else:
118
144
  from .h5ad_functions import add_demux_type_annotation
145
+
119
146
  double_barcoded_reads = double_barcoded_path / "barcoding_summary.txt"
147
+ logger.info("Adding demux type to each read")
120
148
  add_demux_type_annotation(final_adata, double_barcoded_reads)
121
149
 
122
150
  ## Delete intermediate h5ad files and temp directories
123
151
  if delete_intermediates:
152
+ logger.info("Deleting intermediate h5ad files")
124
153
  delete_intermediate_h5ads_and_tmpdir(h5_dir, tmp_dir)
125
-
154
+
126
155
  return final_adata, final_adata_path
127
156
 
128
157
 
129
- def process_conversion_sites(converted_FASTA, conversions=['unconverted', '5mC'], deaminase_footprinting=False):
158
+ def process_conversion_sites(
159
+ converted_FASTA, conversions=["unconverted", "5mC"], deaminase_footprinting=False
160
+ ):
130
161
  """
131
162
  Extracts conversion sites and determines the max reference length.
132
163
 
@@ -147,7 +178,9 @@ def process_conversion_sites(converted_FASTA, conversions=['unconverted', '5mC']
147
178
  conversion_types = conversions[1:]
148
179
 
149
180
  # Process the unconverted sequence once
150
- modification_dict[unconverted] = find_conversion_sites(converted_FASTA, unconverted, conversions, deaminase_footprinting)
181
+ modification_dict[unconverted] = find_conversion_sites(
182
+ converted_FASTA, unconverted, conversions, deaminase_footprinting
183
+ )
151
184
  # Above points to record_dict[record.id] = [sequence_length, [], [], sequence, complement] with only unconverted record.id keys
152
185
 
153
186
  # Get **max sequence length** from unconverted records
@@ -166,15 +199,25 @@ def process_conversion_sites(converted_FASTA, conversions=['unconverted', '5mC']
166
199
  record_FASTA_dict[record] = [
167
200
  sequence + "N" * (max_reference_length - sequence_length),
168
201
  complement + "N" * (max_reference_length - sequence_length),
169
- chromosome, record, sequence_length, max_reference_length - sequence_length, unconverted, "top"
202
+ chromosome,
203
+ record,
204
+ sequence_length,
205
+ max_reference_length - sequence_length,
206
+ unconverted,
207
+ "top",
170
208
  ]
171
209
 
172
210
  if chromosome not in chromosome_FASTA_dict:
173
- chromosome_FASTA_dict[chromosome] = [sequence + "N" * (max_reference_length - sequence_length), complement + "N" * (max_reference_length - sequence_length)]
211
+ chromosome_FASTA_dict[chromosome] = [
212
+ sequence + "N" * (max_reference_length - sequence_length),
213
+ complement + "N" * (max_reference_length - sequence_length),
214
+ ]
174
215
 
175
216
  # Process converted records
176
217
  for conversion in conversion_types:
177
- modification_dict[conversion] = find_conversion_sites(converted_FASTA, conversion, conversions, deaminase_footprinting)
218
+ modification_dict[conversion] = find_conversion_sites(
219
+ converted_FASTA, conversion, conversions, deaminase_footprinting
220
+ )
178
221
  # Above points to record_dict[record.id] = [sequence_length, top_strand_coordinates, bottom_strand_coordinates, sequence, complement] with only unconverted record.id keys
179
222
 
180
223
  for record, values in modification_dict[conversion].items():
@@ -193,32 +236,47 @@ def process_conversion_sites(converted_FASTA, conversions=['unconverted', '5mC']
193
236
  record_FASTA_dict[converted_name] = [
194
237
  sequence + "N" * (max_reference_length - sequence_length),
195
238
  complement + "N" * (max_reference_length - sequence_length),
196
- chromosome, unconverted_name, sequence_length,
197
- max_reference_length - sequence_length, conversion, strand
239
+ chromosome,
240
+ unconverted_name,
241
+ sequence_length,
242
+ max_reference_length - sequence_length,
243
+ conversion,
244
+ strand,
198
245
  ]
199
246
 
200
- print("Updated record_FASTA_dict Keys:", list(record_FASTA_dict.keys()))
247
+ logger.debug("Updated record_FASTA_dict Keys:", list(record_FASTA_dict.keys()))
201
248
  return max_reference_length, record_FASTA_dict, chromosome_FASTA_dict
202
249
 
203
250
 
204
- def filter_bams_by_mapping_threshold(bam_path_list, bam_files, mapping_threshold):
251
+ def filter_bams_by_mapping_threshold(bam_path_list, bam_files, mapping_threshold, samtools_backend):
205
252
  """Filters BAM files based on mapping threshold."""
206
253
  records_to_analyze = set()
207
254
 
208
255
  for i, bam in enumerate(bam_path_list):
209
- aligned_reads, unaligned_reads, record_counts = count_aligned_reads(bam)
256
+ aligned_reads, unaligned_reads, record_counts = count_aligned_reads(bam, samtools_backend)
210
257
  aligned_percent = aligned_reads * 100 / (aligned_reads + unaligned_reads)
211
- print(f"{aligned_percent:.2f}% of reads in {bam_files[i]} aligned successfully.")
258
+ logger.info(f"{aligned_percent:.2f}% of reads in {bam_files[i].name} aligned successfully.")
212
259
 
213
260
  for record, (count, percent) in record_counts.items():
214
261
  if percent >= mapping_threshold:
215
262
  records_to_analyze.add(record)
216
263
 
217
- print(f"Analyzing the following FASTA records: {records_to_analyze}")
264
+ logger.info(f"Analyzing the following FASTA records: {records_to_analyze}")
218
265
  return records_to_analyze
219
266
 
220
267
 
221
- def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, max_reference_length, device, deaminase_footprinting):
268
+ def process_single_bam(
269
+ bam_index,
270
+ bam,
271
+ records_to_analyze,
272
+ record_FASTA_dict,
273
+ chromosome_FASTA_dict,
274
+ tmp_dir,
275
+ max_reference_length,
276
+ device,
277
+ deaminase_footprinting,
278
+ samtools_backend,
279
+ ):
222
280
  """Worker function to process a single BAM file (must be at top-level for multiprocessing)."""
223
281
  adata_list = []
224
282
 
@@ -230,34 +288,58 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
230
288
  sequence = chromosome_FASTA_dict[chromosome][0]
231
289
 
232
290
  # Extract Base Identities
233
- fwd_bases, rev_bases, mismatch_counts_per_read, mismatch_trend_per_read = extract_base_identities(bam, record, range(current_length), max_reference_length, sequence)
291
+ fwd_bases, rev_bases, mismatch_counts_per_read, mismatch_trend_per_read = (
292
+ extract_base_identities(
293
+ bam, record, range(current_length), max_reference_length, sequence, samtools_backend
294
+ )
295
+ )
234
296
  mismatch_trend_series = pd.Series(mismatch_trend_per_read)
235
297
 
236
298
  # Skip processing if both forward and reverse base identities are empty
237
299
  if not fwd_bases and not rev_bases:
238
- print(f"{timestamp()} [Worker {current_process().pid}] Skipping {sample} - No valid base identities for {record}.")
300
+ logger.debug(
301
+ f"[Worker {current_process().pid}] Skipping {sample} - No valid base identities for {record}."
302
+ )
239
303
  continue
240
304
 
241
305
  merged_bin = {}
242
306
 
243
307
  # Binarize the Base Identities if they exist
244
308
  if fwd_bases:
245
- fwd_bin = binarize_converted_base_identities(fwd_bases, strand, mod_type, bam, device,deaminase_footprinting, mismatch_trend_per_read)
309
+ fwd_bin = binarize_converted_base_identities(
310
+ fwd_bases,
311
+ strand,
312
+ mod_type,
313
+ bam,
314
+ device,
315
+ deaminase_footprinting,
316
+ mismatch_trend_per_read,
317
+ )
246
318
  merged_bin.update(fwd_bin)
247
319
 
248
320
  if rev_bases:
249
- rev_bin = binarize_converted_base_identities(rev_bases, strand, mod_type, bam, device, deaminase_footprinting, mismatch_trend_per_read)
321
+ rev_bin = binarize_converted_base_identities(
322
+ rev_bases,
323
+ strand,
324
+ mod_type,
325
+ bam,
326
+ device,
327
+ deaminase_footprinting,
328
+ mismatch_trend_per_read,
329
+ )
250
330
  merged_bin.update(rev_bin)
251
331
 
252
332
  # Skip if merged_bin is empty (no valid binarized data)
253
333
  if not merged_bin:
254
- print(f"{timestamp()} [Worker {current_process().pid}] Skipping {sample} - No valid binarized data for {record}.")
334
+ logger.debug(
335
+ f"[Worker {current_process().pid}] Skipping {sample} - No valid binarized data for {record}."
336
+ )
255
337
  continue
256
338
 
257
339
  # Convert to DataFrame
258
340
  # for key in merged_bin:
259
341
  # merged_bin[key] = merged_bin[key].cpu().numpy() # Move to CPU & convert to NumPy
260
- bin_df = pd.DataFrame.from_dict(merged_bin, orient='index')
342
+ bin_df = pd.DataFrame.from_dict(merged_bin, orient="index")
261
343
  sorted_index = sorted(bin_df.index)
262
344
  bin_df = bin_df.reindex(sorted_index)
263
345
 
@@ -265,14 +347,18 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
265
347
  one_hot_reads = {}
266
348
 
267
349
  if fwd_bases:
268
- fwd_ohe_files = ohe_batching(fwd_bases, tmp_dir, record, f"{bam_index}_fwd", batch_size=100000)
350
+ fwd_ohe_files = ohe_batching(
351
+ fwd_bases, tmp_dir, record, f"{bam_index}_fwd", batch_size=100000
352
+ )
269
353
  for ohe_file in fwd_ohe_files:
270
354
  tmp_ohe_dict = ad.read_h5ad(ohe_file).uns
271
355
  one_hot_reads.update(tmp_ohe_dict)
272
356
  del tmp_ohe_dict
273
357
 
274
358
  if rev_bases:
275
- rev_ohe_files = ohe_batching(rev_bases, tmp_dir, record, f"{bam_index}_rev", batch_size=100000)
359
+ rev_ohe_files = ohe_batching(
360
+ rev_bases, tmp_dir, record, f"{bam_index}_rev", batch_size=100000
361
+ )
276
362
  for ohe_file in rev_ohe_files:
277
363
  tmp_ohe_dict = ad.read_h5ad(ohe_file).uns
278
364
  one_hot_reads.update(tmp_ohe_dict)
@@ -280,7 +366,9 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
280
366
 
281
367
  # Skip if one_hot_reads is empty
282
368
  if not one_hot_reads:
283
- print(f"{timestamp()} [Worker {current_process().pid}] Skipping {sample} - No valid one-hot encoded data for {record}.")
369
+ logger.debug(
370
+ f"[Worker {current_process().pid}] Skipping {sample} - No valid one-hot encoded data for {record}."
371
+ )
284
372
  continue
285
373
 
286
374
  gc.collect()
@@ -291,11 +379,15 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
291
379
 
292
380
  # Skip if no read names exist
293
381
  if not read_names:
294
- print(f"{timestamp()} [Worker {current_process().pid}] Skipping {sample} - No reads found in one-hot encoded data for {record}.")
382
+ logger.debug(
383
+ f"[Worker {current_process().pid}] Skipping {sample} - No reads found in one-hot encoded data for {record}."
384
+ )
295
385
  continue
296
386
 
297
387
  sequence_length = one_hot_reads[read_names[0]].reshape(n_rows_OHE, -1).shape[1]
298
- df_A, df_C, df_G, df_T, df_N = [np.zeros((len(sorted_index), sequence_length), dtype=int) for _ in range(5)]
388
+ df_A, df_C, df_G, df_T, df_N = [
389
+ np.zeros((len(sorted_index), sequence_length), dtype=int) for _ in range(5)
390
+ ]
299
391
 
300
392
  # Populate One-Hot Arrays
301
393
  for j, read_name in enumerate(sorted_index):
@@ -310,8 +402,8 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
310
402
  adata.var_names = bin_df.columns.astype(str)
311
403
  adata.obs["Sample"] = [sample] * len(adata)
312
404
  try:
313
- barcode = sample.split('barcode')[1]
314
- except:
405
+ barcode = sample.split("barcode")[1]
406
+ except Exception:
315
407
  barcode = np.nan
316
408
  adata.obs["Barcode"] = [int(barcode)] * len(adata)
317
409
  adata.obs["Barcode"] = adata.obs["Barcode"].astype(str)
@@ -323,49 +415,81 @@ def process_single_bam(bam_index, bam, records_to_analyze, record_FASTA_dict, ch
323
415
  adata.obs["Read_mismatch_trend"] = adata.obs_names.map(mismatch_trend_series)
324
416
 
325
417
  # Attach One-Hot Encodings to Layers
326
- adata.layers["A_binary_encoding"] = df_A
327
- adata.layers["C_binary_encoding"] = df_C
328
- adata.layers["G_binary_encoding"] = df_G
329
- adata.layers["T_binary_encoding"] = df_T
330
- adata.layers["N_binary_encoding"] = df_N
418
+ adata.layers["A_binary_sequence_encoding"] = df_A
419
+ adata.layers["C_binary_sequence_encoding"] = df_C
420
+ adata.layers["G_binary_sequence_encoding"] = df_G
421
+ adata.layers["T_binary_sequence_encoding"] = df_T
422
+ adata.layers["N_binary_sequence_encoding"] = df_N
331
423
 
332
424
  adata_list.append(adata)
333
425
 
334
426
  return ad.concat(adata_list, join="outer") if adata_list else None
335
427
 
428
+
336
429
  def timestamp():
337
430
  """Returns a formatted timestamp for logging."""
338
431
  return time.strftime("[%Y-%m-%d %H:%M:%S]")
339
432
 
340
433
 
341
- def worker_function(bam_index, bam, records_to_analyze, shared_record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, h5_dir, max_reference_length, device, deaminase_footprinting, progress_queue):
434
+ def worker_function(
435
+ bam_index,
436
+ bam,
437
+ records_to_analyze,
438
+ shared_record_FASTA_dict,
439
+ chromosome_FASTA_dict,
440
+ tmp_dir,
441
+ h5_dir,
442
+ max_reference_length,
443
+ device,
444
+ deaminase_footprinting,
445
+ samtools_backend,
446
+ progress_queue,
447
+ log_level,
448
+ log_file,
449
+ ):
342
450
  """Worker function that processes a single BAM and writes the output to an H5AD file."""
451
+ _ensure_worker_logging(log_level, log_file)
343
452
  worker_id = current_process().pid # Get worker process ID
344
453
  sample = bam.stem
345
454
 
346
455
  try:
347
- print(f"{timestamp()} [Worker {worker_id}] Processing BAM: {sample}")
456
+ logger.info(f"[Worker {worker_id}] Processing BAM: {sample}")
348
457
 
349
458
  h5ad_path = h5_dir / bam.with_suffix(".h5ad").name
350
459
  if h5ad_path.exists():
351
- print(f"{timestamp()} [Worker {worker_id}] Skipping {sample}: Already processed.")
460
+ logger.debug(f"[Worker {worker_id}] Skipping {sample}: Already processed.")
352
461
  progress_queue.put(sample)
353
462
  return
354
463
 
355
464
  # Filter records specific to this BAM
356
- bam_records_to_analyze = {record for record in records_to_analyze if record in shared_record_FASTA_dict}
465
+ bam_records_to_analyze = {
466
+ record for record in records_to_analyze if record in shared_record_FASTA_dict
467
+ }
357
468
 
358
469
  if not bam_records_to_analyze:
359
- print(f"{timestamp()} [Worker {worker_id}] No valid records to analyze for {sample}. Skipping.")
470
+ logger.debug(
471
+ f"[Worker {worker_id}] No valid records to analyze for {sample}. Skipping."
472
+ )
360
473
  progress_queue.put(sample)
361
474
  return
362
475
 
363
476
  # Process BAM
364
- adata = process_single_bam(bam_index, bam, bam_records_to_analyze, shared_record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, max_reference_length, device, deaminase_footprinting)
477
+ adata = process_single_bam(
478
+ bam_index,
479
+ bam,
480
+ bam_records_to_analyze,
481
+ shared_record_FASTA_dict,
482
+ chromosome_FASTA_dict,
483
+ tmp_dir,
484
+ max_reference_length,
485
+ device,
486
+ deaminase_footprinting,
487
+ samtools_backend,
488
+ )
365
489
 
366
490
  if adata is not None:
367
491
  adata.write_h5ad(str(h5ad_path))
368
- print(f"{timestamp()} [Worker {worker_id}] Completed processing for BAM: {sample}")
492
+ logger.info(f"[Worker {worker_id}] Completed processing for BAM: {sample}")
369
493
 
370
494
  # Free memory
371
495
  del adata
@@ -373,22 +497,31 @@ def worker_function(bam_index, bam, records_to_analyze, shared_record_FASTA_dict
373
497
 
374
498
  progress_queue.put(sample)
375
499
 
376
- except Exception as e:
377
- print(f"{timestamp()} [Worker {worker_id}] ERROR while processing {sample}:\n{traceback.format_exc()}")
500
+ except Exception:
501
+ logger.warning(
502
+ f"[Worker {worker_id}] ERROR while processing {sample}:\n{traceback.format_exc()}"
503
+ )
378
504
  progress_queue.put(sample) # Still signal completion to prevent deadlock
379
505
 
380
- def process_bams_parallel(bam_path_list, records_to_analyze, record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, h5_dir, num_threads, max_reference_length, device, deaminase_footprinting):
506
+
507
+ def process_bams_parallel(
508
+ bam_path_list,
509
+ records_to_analyze,
510
+ record_FASTA_dict,
511
+ chromosome_FASTA_dict,
512
+ tmp_dir,
513
+ h5_dir,
514
+ num_threads,
515
+ max_reference_length,
516
+ device,
517
+ deaminase_footprinting,
518
+ samtools_backend,
519
+ ):
381
520
  """Processes BAM files in parallel, writes each H5AD to disk, and concatenates them at the end."""
382
521
  make_dirs(h5_dir) # Ensure h5_dir exists
383
522
 
384
- print(f"{timestamp()} Starting parallel BAM processing with {num_threads} threads...")
385
-
386
- # Ensure macOS uses forkserver to avoid spawning issues
387
- try:
388
- import multiprocessing
389
- multiprocessing.set_start_method("forkserver", force=True)
390
- except RuntimeError:
391
- print(f"{timestamp()} [WARNING] Multiprocessing context already set. Skipping set_start_method.")
523
+ logger.info(f"Starting parallel BAM processing with {num_threads} threads...")
524
+ log_level, log_file = _get_logger_config()
392
525
 
393
526
  with Manager() as manager:
394
527
  progress_queue = manager.Queue()
@@ -396,11 +529,29 @@ def process_bams_parallel(bam_path_list, records_to_analyze, record_FASTA_dict,
396
529
 
397
530
  with Pool(processes=num_threads) as pool:
398
531
  results = [
399
- pool.apply_async(worker_function, (i, bam, records_to_analyze, shared_record_FASTA_dict, chromosome_FASTA_dict, tmp_dir, h5_dir, max_reference_length, device, deaminase_footprinting, progress_queue))
532
+ pool.apply_async(
533
+ worker_function,
534
+ (
535
+ i,
536
+ bam,
537
+ records_to_analyze,
538
+ shared_record_FASTA_dict,
539
+ chromosome_FASTA_dict,
540
+ tmp_dir,
541
+ h5_dir,
542
+ max_reference_length,
543
+ device,
544
+ deaminase_footprinting,
545
+ samtools_backend,
546
+ progress_queue,
547
+ log_level,
548
+ log_file,
549
+ ),
550
+ )
400
551
  for i, bam in enumerate(bam_path_list)
401
552
  ]
402
553
 
403
- print(f"{timestamp()} Submitted {len(bam_path_list)} BAMs for processing.")
554
+ logger.info(f"Submitting {len(results)} BAMs for processing.")
404
555
 
405
556
  # Track completed BAMs
406
557
  completed_bams = set()
@@ -409,24 +560,58 @@ def process_bams_parallel(bam_path_list, records_to_analyze, record_FASTA_dict,
409
560
  processed_bam = progress_queue.get(timeout=2400) # Wait for a finished BAM
410
561
  completed_bams.add(processed_bam)
411
562
  except Exception as e:
412
- print(f"{timestamp()} [ERROR] Timeout waiting for worker process. Possible crash? {e}")
563
+ logger.error(f"Timeout waiting for worker process. Possible crash? {e}")
564
+ _log_async_result_errors(results, bam_path_list)
413
565
 
414
566
  pool.close()
415
567
  pool.join() # Ensure all workers finish
416
568
 
569
+ _log_async_result_errors(results, bam_path_list)
570
+
417
571
  # Final Concatenation Step
418
- h5ad_files = [h5_dir / f for f in h5_dir.iterdir() if f.suffix == ".h5ad"]
572
+ h5ad_files = [f for f in h5_dir.iterdir() if f.suffix == ".h5ad"]
419
573
 
420
574
  if not h5ad_files:
421
- print(f"{timestamp()} No valid H5AD files generated. Exiting.")
575
+ logger.warning(f"No valid H5AD files generated. Exiting.")
422
576
  return None
423
577
 
424
- print(f"{timestamp()} Concatenating {len(h5ad_files)} H5AD files into final output...")
578
+ logger.info(f"Concatenating {len(h5ad_files)} H5AD files into final output...")
425
579
  final_adata = ad.concat([ad.read_h5ad(f) for f in h5ad_files], join="outer")
426
580
 
427
- print(f"{timestamp()} Successfully generated final AnnData object.")
581
+ logger.info(f"Successfully generated final AnnData object.")
428
582
  return final_adata
429
583
 
584
+
585
+ def _log_async_result_errors(results, bam_path_list):
586
+ """Log worker failures captured by multiprocessing AsyncResult objects."""
587
+ for bam, result in zip(bam_path_list, results):
588
+ if not result.ready():
589
+ continue
590
+ try:
591
+ result.get()
592
+ except Exception as exc:
593
+ logger.error("Worker process failed for %s: %s", bam, exc)
594
+
595
+
596
+ def _get_logger_config() -> tuple[int, Path | None]:
597
+ smftools_logger = logging.getLogger("smftools")
598
+ level = smftools_logger.level
599
+ if level == logging.NOTSET:
600
+ level = logging.INFO
601
+ log_file: Path | None = None
602
+ for handler in smftools_logger.handlers:
603
+ if isinstance(handler, logging.FileHandler):
604
+ log_file = Path(handler.baseFilename)
605
+ break
606
+ return level, log_file
607
+
608
+
609
+ def _ensure_worker_logging(log_level: int, log_file: Path | None) -> None:
610
+ smftools_logger = logging.getLogger("smftools")
611
+ if not smftools_logger.handlers:
612
+ setup_logging(level=log_level, log_file=log_file)
613
+
614
+
430
615
  def delete_intermediate_h5ads_and_tmpdir(
431
616
  h5_dir: Union[str, Path, Iterable[str], None],
432
617
  tmp_dir: Optional[Union[str, Path]] = None,
@@ -450,25 +635,27 @@ def delete_intermediate_h5ads_and_tmpdir(
450
635
  verbose : bool
451
636
  Print progress / warnings.
452
637
  """
638
+
453
639
  # Helper: remove a single file path (Path-like or string)
454
640
  def _maybe_unlink(p: Path):
641
+ """Remove a file path if it exists and is a file."""
455
642
  if not p.exists():
456
643
  if verbose:
457
- print(f"[skip] not found: {p}")
644
+ logger.debug(f"[skip] not found: {p}")
458
645
  return
459
646
  if not p.is_file():
460
647
  if verbose:
461
- print(f"[skip] not a file: {p}")
648
+ logger.debug(f"[skip] not a file: {p}")
462
649
  return
463
650
  if dry_run:
464
- print(f"[dry-run] would remove file: {p}")
651
+ logger.debug(f"[dry-run] would remove file: {p}")
465
652
  return
466
653
  try:
467
654
  p.unlink()
468
655
  if verbose:
469
- print(f"Removed file: {p}")
656
+ logger.info(f"Removed file: {p}")
470
657
  except Exception as e:
471
- print(f"[error] failed to remove file {p}: {e}")
658
+ logger.warning(f"[error] failed to remove file {p}: {e}")
472
659
 
473
660
  # Handle h5_dir input (directory OR iterable of file paths)
474
661
  if h5_dir is not None:
@@ -483,7 +670,7 @@ def delete_intermediate_h5ads_and_tmpdir(
483
670
  else:
484
671
  if verbose:
485
672
  # optional: comment this out if too noisy
486
- print(f"[skip] not matching pattern: {p.name}")
673
+ logger.debug(f"[skip] not matching pattern: {p.name}")
487
674
  else:
488
675
  # treat as iterable of file paths
489
676
  for f in h5_dir:
@@ -493,25 +680,25 @@ def delete_intermediate_h5ads_and_tmpdir(
493
680
  _maybe_unlink(p)
494
681
  else:
495
682
  if verbose:
496
- print(f"[skip] not matching pattern or not a file: {p}")
683
+ logger.debug(f"[skip] not matching pattern or not a file: {p}")
497
684
 
498
685
  # Remove tmp_dir recursively (if provided)
499
686
  if tmp_dir is not None:
500
687
  td = Path(tmp_dir)
501
688
  if not td.exists():
502
689
  if verbose:
503
- print(f"[skip] tmp_dir not found: {td}")
690
+ logger.debug(f"[skip] tmp_dir not found: {td}")
504
691
  else:
505
692
  if not td.is_dir():
506
693
  if verbose:
507
- print(f"[skip] tmp_dir is not a directory: {td}")
694
+ logger.debug(f"[skip] tmp_dir is not a directory: {td}")
508
695
  else:
509
696
  if dry_run:
510
- print(f"[dry-run] would remove directory tree: {td}")
697
+ logger.debug(f"[dry-run] would remove directory tree: {td}")
511
698
  else:
512
699
  try:
513
700
  shutil.rmtree(td)
514
701
  if verbose:
515
- print(f"Removed directory tree: {td}")
702
+ logger.info(f"Removed directory tree: {td}")
516
703
  except Exception as e:
517
- print(f"[error] failed to remove tmp dir {td}: {e}")
704
+ logger.warning(f"[error] failed to remove tmp dir {td}: {e}")