smftools 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. smftools/__init__.py +2 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/__init__.py +0 -0
  4. smftools/cli/cli_flows.py +94 -0
  5. smftools/cli/hmm_adata.py +338 -0
  6. smftools/cli/load_adata.py +577 -0
  7. smftools/cli/preprocess_adata.py +363 -0
  8. smftools/cli/spatial_adata.py +564 -0
  9. smftools/cli_entry.py +435 -0
  10. smftools/config/conversion.yaml +11 -6
  11. smftools/config/deaminase.yaml +12 -7
  12. smftools/config/default.yaml +36 -25
  13. smftools/config/direct.yaml +25 -1
  14. smftools/config/discover_input_files.py +115 -0
  15. smftools/config/experiment_config.py +109 -12
  16. smftools/informatics/__init__.py +13 -7
  17. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  18. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  19. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  20. smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
  21. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  22. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  23. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  24. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  25. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  26. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
  27. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  28. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  29. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  30. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  31. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  32. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  33. smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
  34. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
  35. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
  36. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  37. smftools/informatics/bam_functions.py +812 -0
  38. smftools/informatics/basecalling.py +67 -0
  39. smftools/informatics/bed_functions.py +366 -0
  40. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
  41. smftools/informatics/fasta_functions.py +255 -0
  42. smftools/informatics/h5ad_functions.py +197 -0
  43. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
  44. smftools/informatics/modkit_functions.py +129 -0
  45. smftools/informatics/ohe.py +160 -0
  46. smftools/informatics/pod5_functions.py +224 -0
  47. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  48. smftools/plotting/autocorrelation_plotting.py +1 -3
  49. smftools/plotting/general_plotting.py +1037 -362
  50. smftools/preprocessing/__init__.py +2 -0
  51. smftools/preprocessing/append_base_context.py +3 -3
  52. smftools/preprocessing/append_binary_layer_by_base_context.py +4 -4
  53. smftools/preprocessing/binarize.py +17 -0
  54. smftools/preprocessing/binarize_on_Youden.py +2 -2
  55. smftools/preprocessing/calculate_position_Youden.py +1 -1
  56. smftools/preprocessing/calculate_read_modification_stats.py +1 -1
  57. smftools/preprocessing/filter_reads_on_modification_thresholds.py +19 -19
  58. smftools/preprocessing/flag_duplicate_reads.py +1 -1
  59. smftools/readwrite.py +266 -140
  60. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/METADATA +10 -9
  61. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/RECORD +82 -70
  62. smftools-0.2.3.dist-info/entry_points.txt +2 -0
  63. smftools/cli.py +0 -184
  64. smftools/informatics/fast5_to_pod5.py +0 -24
  65. smftools/informatics/helpers/__init__.py +0 -73
  66. smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
  67. smftools/informatics/helpers/bam_qc.py +0 -66
  68. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  69. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
  70. smftools/informatics/helpers/discover_input_files.py +0 -100
  71. smftools/informatics/helpers/index_fasta.py +0 -12
  72. smftools/informatics/helpers/make_dirs.py +0 -21
  73. smftools/informatics/readwrite.py +0 -106
  74. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  75. smftools/load_adata.py +0 -1346
  76. smftools-0.2.1.dist-info/entry_points.txt +0 -2
  77. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  78. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  79. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  80. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
  81. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  82. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  83. /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
  84. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  85. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  86. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  87. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  88. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  89. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  90. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  91. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  92. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  93. /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
  94. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  95. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/WHEEL +0 -0
  96. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -2,6 +2,7 @@ from .add_read_length_and_mapping_qc import add_read_length_and_mapping_qc
2
2
  from .append_base_context import append_base_context
3
3
  from .append_binary_layer_by_base_context import append_binary_layer_by_base_context
4
4
  from .binarize_on_Youden import binarize_on_Youden
5
+ from .binarize import binarize_adata
5
6
  from .calculate_complexity import calculate_complexity
6
7
  from .calculate_complexity_II import calculate_complexity_II
7
8
  from .calculate_read_modification_stats import calculate_read_modification_stats
@@ -22,6 +23,7 @@ __all__ = [
22
23
  "append_base_context",
23
24
  "append_binary_layer_by_base_context",
24
25
  "binarize_on_Youden",
26
+ "binarize_adata",
25
27
  "calculate_complexity",
26
28
  "calculate_read_modification_stats",
27
29
  "calculate_coverage",
@@ -34,7 +34,7 @@ def append_base_context(adata,
34
34
  site_types = []
35
35
 
36
36
  if any(base in mod_target_bases for base in ['GpC', 'CpG', 'C']):
37
- site_types += ['GpC_site', 'CpG_site', 'ambiguous_GpC_CpG_site', 'other_C_site', 'any_C_site']
37
+ site_types += ['GpC_site', 'CpG_site', 'ambiguous_GpC_CpG_site', 'other_C_site', 'C_site']
38
38
 
39
39
  if 'A' in mod_target_bases:
40
40
  site_types += ['A_site']
@@ -70,7 +70,7 @@ def append_base_context(adata,
70
70
  # Iterate through the sequence and apply the criteria
71
71
  for i in range(1, len(sequence) - 1):
72
72
  if sequence[i] == 'C':
73
- boolean_dict[f'{cat}_any_C_site'][i] = True
73
+ boolean_dict[f'{cat}_C_site'][i] = True
74
74
  if sequence[i - 1] == 'G' and sequence[i + 1] != 'G':
75
75
  boolean_dict[f'{cat}_GpC_site'][i] = True
76
76
  elif sequence[i - 1] == 'G' and sequence[i + 1] == 'G':
@@ -83,7 +83,7 @@ def append_base_context(adata,
83
83
  # Iterate through the sequence and apply the criteria
84
84
  for i in range(1, len(sequence) - 1):
85
85
  if sequence[i] == 'G':
86
- boolean_dict[f'{cat}_any_C_site'][i] = True
86
+ boolean_dict[f'{cat}_C_site'][i] = True
87
87
  if sequence[i + 1] == 'C' and sequence[i - 1] != 'C':
88
88
  boolean_dict[f'{cat}_GpC_site'][i] = True
89
89
  elif sequence[i - 1] == 'C' and sequence[i + 1] == 'C':
@@ -15,7 +15,7 @@ def append_binary_layer_by_base_context(
15
15
  - GpC_site_binary
16
16
  - CpG_site_binary
17
17
  - GpC_CpG_combined_site_binary (numeric sum where present; NaN where neither present)
18
- - any_C_site_binary
18
+ - C_site_binary
19
19
  - other_C_site_binary
20
20
 
21
21
  Behavior:
@@ -48,7 +48,7 @@ def append_binary_layer_by_base_context(
48
48
  references = adata.obs[reference_column].astype("category").cat.categories
49
49
  reference_to_gpc_column = {ref: f"{ref}_GpC_site" for ref in references}
50
50
  reference_to_cpg_column = {ref: f"{ref}_CpG_site" for ref in references}
51
- reference_to_c_column = {ref: f"{ref}_any_C_site" for ref in references}
51
+ reference_to_c_column = {ref: f"{ref}_C_site" for ref in references}
52
52
  reference_to_other_c_column = {ref: f"{ref}_other_C_site" for ref in references}
53
53
 
54
54
  # verify var columns exist and build boolean masks per ref (len = n_vars)
@@ -124,7 +124,7 @@ def append_binary_layer_by_base_context(
124
124
  adata.layers['GpC_site_binary'] = masked_gpc
125
125
  adata.layers['CpG_site_binary'] = masked_cpg
126
126
  adata.layers['GpC_CpG_combined_site_binary'] = combined_sum
127
- adata.layers['any_C_site_binary'] = masked_any_c
127
+ adata.layers['C_site_binary'] = masked_any_c
128
128
  adata.layers['other_C_site_binary'] = masked_other_c
129
129
 
130
130
  if verbose:
@@ -134,7 +134,7 @@ def append_binary_layer_by_base_context(
134
134
  print(f" GpC: {_filled_positions(masked_gpc)}")
135
135
  print(f" CpG: {_filled_positions(masked_cpg)}")
136
136
  print(f" GpC+CpG combined: {_filled_positions(combined_sum)}")
137
- print(f" any_C: {_filled_positions(masked_any_c)}")
137
+ print(f" C: {_filled_positions(masked_any_c)}")
138
138
  print(f" other_C: {_filled_positions(masked_other_c)}")
139
139
 
140
140
  # mark as done
@@ -0,0 +1,17 @@
1
+ import numpy as np
2
+
3
+ def binarize_adata(adata, source="X", target_layer="binary", threshold=0.8):
4
+ """
5
+ Binarize a dense matrix and preserve NaN.
6
+ source: "X" or layer name
7
+ """
8
+ X = adata.X if source == "X" else adata.layers[source]
9
+
10
+ # Copy to avoid modifying original in-place
11
+ X_bin = X.copy()
12
+
13
+ # Where not NaN: apply threshold
14
+ mask = ~np.isnan(X_bin)
15
+ X_bin[mask] = (X_bin[mask] > threshold).astype(np.int8)
16
+
17
+ adata.layers[target_layer] = X_bin
@@ -1,4 +1,4 @@
1
- def binarize_on_Youden(adata, obs_column='Reference'):
1
+ def binarize_on_Youden(adata, obs_column='Reference', output_layer_name='binarized_methylation'):
2
2
  """
3
3
  Binarize SMF values based on position thresholds determined by calculate_position_Youden.
4
4
 
@@ -42,4 +42,4 @@ def binarize_on_Youden(adata, obs_column='Reference'):
42
42
  binarized_methylation[cat_mask, :] = binarized_matrix
43
43
 
44
44
  # Store the binarized matrix in a new layer
45
- adata.layers['binarized_methylation'] = binarized_methylation
45
+ adata.layers[output_layer_name] = binarized_methylation
@@ -103,7 +103,7 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
103
103
  probability_thresholding_list[position] = (0.8, np.nan)
104
104
  title = f'ROC Curve for {n_passed_positions} positions with J-stat greater than {J_threshold}\n out of {n_total_positions} total positions on {cat}'
105
105
  plt.title(title)
106
- save_name = output_directory + f'/{title}'
106
+ save_name = output_directory / f"{title}.png"
107
107
  if save:
108
108
  plt.savefig(save_name)
109
109
  plt.close()
@@ -36,7 +36,7 @@ def calculate_read_modification_stats(adata,
36
36
  site_types = []
37
37
 
38
38
  if any(base in mod_target_bases for base in ['GpC', 'CpG', 'C']):
39
- site_types += ['GpC_site', 'CpG_site', 'ambiguous_GpC_CpG_site', 'other_C_site', 'any_C_site']
39
+ site_types += ['GpC_site', 'CpG_site', 'ambiguous_GpC_CpG_site', 'other_C_site', 'C_site']
40
40
 
41
41
  if 'A' in mod_target_bases:
42
42
  site_types += ['A_site']
@@ -31,9 +31,9 @@ def filter_reads_on_modification_thresholds(
31
31
  - Otherwise, computes the relevant per-read metrics per-reference in batches
32
32
  and writes them into adata.obs before filtering.
33
33
 
34
- Parameters of interest (same semantics as your original function):
34
+ Parameters of interest :
35
35
  - gpc_thresholds, cpg_thresholds, any_c_thresholds, a_thresholds:
36
- each should be [min, max] (floats 0..1) or None.
36
+ each should be [min, max] (floats 0..1) or None. Thresholds are inclusive.
37
37
  - use_other_c_as_background: require GpC/CpG > other_C background (if present).
38
38
  - min_valid_fraction_positions_in_read_vs_ref: minimum fraction of valid sites
39
39
  in the read vs reference (0..1). If None, this check is skipped.
@@ -53,7 +53,7 @@ def filter_reads_on_modification_thresholds(
53
53
  col_pref = {
54
54
  "GpC": ("Fraction_GpC_site_modified", f"Valid_GpC_site_in_read_vs_reference"),
55
55
  "CpG": ("Fraction_CpG_site_modified", f"Valid_CpG_site_in_read_vs_reference"),
56
- "C": ("Fraction_any_C_site_modified", f"Valid_any_C_site_in_read_vs_reference"),
56
+ "C": ("Fraction_C_site_modified", f"Valid_C_site_in_read_vs_reference"),
57
57
  "A": ("Fraction_A_site_modified", f"Valid_A_site_in_read_vs_reference"),
58
58
  }.get(mod_type, (None, None))
59
59
  return (col_pref[0] in adata.obs.columns) and (col_pref[1] in adata.obs.columns)
@@ -99,8 +99,8 @@ def filter_reads_on_modification_thresholds(
99
99
  create_cols["Valid_CpG_site_in_read_vs_reference"] = np.full((n_obs,), np.nan)
100
100
  create_cols["CpG_to_other_C_mod_ratio"] = np.full((n_obs,), np.nan)
101
101
  if "C" in mod_target_bases:
102
- create_cols["Fraction_any_C_site_modified"] = np.full((n_obs,), np.nan)
103
- create_cols["Valid_any_C_site_in_read_vs_reference"] = np.full((n_obs,), np.nan)
102
+ create_cols["Fraction_C_site_modified"] = np.full((n_obs,), np.nan)
103
+ create_cols["Valid_C_site_in_read_vs_reference"] = np.full((n_obs,), np.nan)
104
104
  if "A" in mod_target_bases:
105
105
  create_cols["Fraction_A_site_modified"] = np.full((n_obs,), np.nan)
106
106
  create_cols["Valid_A_site_in_read_vs_reference"] = np.full((n_obs,), np.nan)
@@ -227,7 +227,7 @@ def filter_reads_on_modification_thresholds(
227
227
  # any C
228
228
  if "C" in mod_target_bases:
229
229
  for ref in refs:
230
- _compute_for_ref_and_suffix(ref, "any_C_site", create_cols["Fraction_any_C_site_modified"], create_cols["Valid_any_C_site_in_read_vs_reference"])
230
+ _compute_for_ref_and_suffix(ref, "C_site", create_cols["Fraction_C_site_modified"], create_cols["Valid_C_site_in_read_vs_reference"])
231
231
 
232
232
  # A
233
233
  if "A" in mod_target_bases:
@@ -283,15 +283,15 @@ def filter_reads_on_modification_thresholds(
283
283
  filtered = filtered[filtered.obs["GpC_to_other_C_mod_ratio"].astype(float) > 1]
284
284
  if lo is not None:
285
285
  s0 = filtered.n_obs
286
- filtered = filtered[filtered.obs["Fraction_GpC_site_modified"].astype(float) > lo]
286
+ filtered = filtered[filtered.obs["Fraction_GpC_site_modified"].astype(float) >= lo]
287
287
  print(f"Removed {s0 - filtered.n_obs} reads below min GpC fraction {lo}")
288
288
  if hi is not None:
289
289
  s0 = filtered.n_obs
290
- filtered = filtered[filtered.obs["Fraction_GpC_site_modified"].astype(float) < hi]
290
+ filtered = filtered[filtered.obs["Fraction_GpC_site_modified"].astype(float) <= hi]
291
291
  print(f"Removed {s0 - filtered.n_obs} reads above max GpC fraction {hi}")
292
292
  if (min_valid_fraction_positions_in_read_vs_ref is not None) and ("Valid_GpC_site_in_read_vs_reference" in filtered.obs.columns):
293
293
  s0 = filtered.n_obs
294
- filtered = filtered[filtered.obs["Valid_GpC_site_in_read_vs_reference"].astype(float) > float(min_valid_fraction_positions_in_read_vs_ref)]
294
+ filtered = filtered[filtered.obs["Valid_GpC_site_in_read_vs_reference"].astype(float) >= float(min_valid_fraction_positions_in_read_vs_ref)]
295
295
  print(f"Removed {s0 - filtered.n_obs} reads with insufficient valid GpC site fraction vs ref")
296
296
 
297
297
  # CpG thresholds
@@ -301,15 +301,15 @@ def filter_reads_on_modification_thresholds(
301
301
  filtered = filtered[filtered.obs["CpG_to_other_C_mod_ratio"].astype(float) > 1]
302
302
  if lo is not None:
303
303
  s0 = filtered.n_obs
304
- filtered = filtered[filtered.obs["Fraction_CpG_site_modified"].astype(float) > lo]
304
+ filtered = filtered[filtered.obs["Fraction_CpG_site_modified"].astype(float) >= lo]
305
305
  print(f"Removed {s0 - filtered.n_obs} reads below min CpG fraction {lo}")
306
306
  if hi is not None:
307
307
  s0 = filtered.n_obs
308
- filtered = filtered[filtered.obs["Fraction_CpG_site_modified"].astype(float) < hi]
308
+ filtered = filtered[filtered.obs["Fraction_CpG_site_modified"].astype(float) <= hi]
309
309
  print(f"Removed {s0 - filtered.n_obs} reads above max CpG fraction {hi}")
310
310
  if (min_valid_fraction_positions_in_read_vs_ref is not None) and ("Valid_CpG_site_in_read_vs_reference" in filtered.obs.columns):
311
311
  s0 = filtered.n_obs
312
- filtered = filtered[filtered.obs["Valid_CpG_site_in_read_vs_reference"].astype(float) > float(min_valid_fraction_positions_in_read_vs_ref)]
312
+ filtered = filtered[filtered.obs["Valid_CpG_site_in_read_vs_reference"].astype(float) >= float(min_valid_fraction_positions_in_read_vs_ref)]
313
313
  print(f"Removed {s0 - filtered.n_obs} reads with insufficient valid CpG site fraction vs ref")
314
314
 
315
315
  # any C thresholds
@@ -317,15 +317,15 @@ def filter_reads_on_modification_thresholds(
317
317
  lo, hi = _unpack_minmax(any_c_thresholds)
318
318
  if lo is not None:
319
319
  s0 = filtered.n_obs
320
- filtered = filtered[filtered.obs["Fraction_any_C_site_modified"].astype(float) > lo]
320
+ filtered = filtered[filtered.obs["Fraction_C_site_modified"].astype(float) >= lo]
321
321
  print(f"Removed {s0 - filtered.n_obs} reads below min any-C fraction {lo}")
322
322
  if hi is not None:
323
323
  s0 = filtered.n_obs
324
- filtered = filtered[filtered.obs["Fraction_any_C_site_modified"].astype(float) < hi]
324
+ filtered = filtered[filtered.obs["Fraction_C_site_modified"].astype(float) <= hi]
325
325
  print(f"Removed {s0 - filtered.n_obs} reads above max any-C fraction {hi}")
326
- if (min_valid_fraction_positions_in_read_vs_ref is not None) and ("Valid_any_C_site_in_read_vs_reference" in filtered.obs.columns):
326
+ if (min_valid_fraction_positions_in_read_vs_ref is not None) and ("Valid_C_site_in_read_vs_reference" in filtered.obs.columns):
327
327
  s0 = filtered.n_obs
328
- filtered = filtered[filtered.obs["Valid_any_C_site_in_read_vs_reference"].astype(float) > float(min_valid_fraction_positions_in_read_vs_ref)]
328
+ filtered = filtered[filtered.obs["Valid_C_site_in_read_vs_reference"].astype(float) >= float(min_valid_fraction_positions_in_read_vs_ref)]
329
329
  print(f"Removed {s0 - filtered.n_obs} reads with insufficient valid any-C site fraction vs ref")
330
330
 
331
331
  # A thresholds
@@ -333,15 +333,15 @@ def filter_reads_on_modification_thresholds(
333
333
  lo, hi = _unpack_minmax(a_thresholds)
334
334
  if lo is not None:
335
335
  s0 = filtered.n_obs
336
- filtered = filtered[filtered.obs["Fraction_A_site_modified"].astype(float) > lo]
336
+ filtered = filtered[filtered.obs["Fraction_A_site_modified"].astype(float) >= lo]
337
337
  print(f"Removed {s0 - filtered.n_obs} reads below min A fraction {lo}")
338
338
  if hi is not None:
339
339
  s0 = filtered.n_obs
340
- filtered = filtered[filtered.obs["Fraction_A_site_modified"].astype(float) < hi]
340
+ filtered = filtered[filtered.obs["Fraction_A_site_modified"].astype(float) <= hi]
341
341
  print(f"Removed {s0 - filtered.n_obs} reads above max A fraction {hi}")
342
342
  if (min_valid_fraction_positions_in_read_vs_ref is not None) and ("Valid_A_site_in_read_vs_reference" in filtered.obs.columns):
343
343
  s0 = filtered.n_obs
344
- filtered = filtered[filtered.obs["Valid_A_site_in_read_vs_reference"].astype(float) > float(min_valid_fraction_positions_in_read_vs_ref)]
344
+ filtered = filtered[filtered.obs["Valid_A_site_in_read_vs_reference"].astype(float) >= float(min_valid_fraction_positions_in_read_vs_ref)]
345
345
  print(f"Removed {s0 - filtered.n_obs} reads with insufficient valid A site fraction vs ref")
346
346
 
347
347
  filtered = filtered.copy()
@@ -13,7 +13,7 @@ import pandas as pd
13
13
  import matplotlib.pyplot as plt
14
14
  from tqdm import tqdm
15
15
 
16
- from ..informatics.helpers import make_dirs
16
+ from ..readwrite import make_dirs
17
17
 
18
18
  # optional imports for clustering / PCA / KDE
19
19
  try: