smftools 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/helpers.py +48 -0
  3. smftools/cli/hmm_adata.py +168 -145
  4. smftools/cli/load_adata.py +155 -95
  5. smftools/cli/preprocess_adata.py +222 -130
  6. smftools/cli/spatial_adata.py +441 -308
  7. smftools/cli_entry.py +4 -5
  8. smftools/config/conversion.yaml +12 -5
  9. smftools/config/deaminase.yaml +11 -9
  10. smftools/config/default.yaml +123 -19
  11. smftools/config/direct.yaml +3 -0
  12. smftools/config/experiment_config.py +120 -19
  13. smftools/hmm/HMM.py +12 -1
  14. smftools/hmm/__init__.py +0 -6
  15. smftools/hmm/archived/call_hmm_peaks.py +106 -0
  16. smftools/hmm/call_hmm_peaks.py +318 -90
  17. smftools/informatics/bam_functions.py +28 -29
  18. smftools/informatics/h5ad_functions.py +1 -1
  19. smftools/plotting/general_plotting.py +97 -51
  20. smftools/plotting/position_stats.py +3 -3
  21. smftools/preprocessing/__init__.py +2 -4
  22. smftools/preprocessing/append_base_context.py +34 -25
  23. smftools/preprocessing/append_binary_layer_by_base_context.py +2 -2
  24. smftools/preprocessing/binarize_on_Youden.py +10 -8
  25. smftools/preprocessing/calculate_complexity_II.py +1 -1
  26. smftools/preprocessing/calculate_coverage.py +16 -13
  27. smftools/preprocessing/calculate_position_Youden.py +41 -25
  28. smftools/preprocessing/calculate_read_modification_stats.py +1 -1
  29. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
  30. smftools/preprocessing/filter_reads_on_modification_thresholds.py +1 -1
  31. smftools/preprocessing/flag_duplicate_reads.py +1 -1
  32. smftools/preprocessing/invert_adata.py +1 -1
  33. smftools/preprocessing/load_sample_sheet.py +1 -1
  34. smftools/preprocessing/reindex_references_adata.py +37 -0
  35. smftools/readwrite.py +94 -0
  36. {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/METADATA +18 -12
  37. {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/RECORD +46 -43
  38. /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
  39. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  40. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  41. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  42. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
  43. /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
  44. {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
  45. {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/entry_points.txt +0 -0
  46. {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,6 @@
1
- def binarize_on_Youden(adata, obs_column='Reference', output_layer_name='binarized_methylation'):
1
+ def binarize_on_Youden(adata,
2
+ ref_column='Reference_strand',
3
+ output_layer_name='binarized_methylation'):
2
4
  """
3
5
  Binarize SMF values based on position thresholds determined by calculate_position_Youden.
4
6
 
@@ -16,18 +18,18 @@ def binarize_on_Youden(adata, obs_column='Reference', output_layer_name='binariz
16
18
  binarized_methylation = np.full_like(adata.X, np.nan, dtype=float) # Keeps same shape as adata.X
17
19
 
18
20
  # Get unique categories
19
- categories = adata.obs[obs_column].cat.categories
21
+ references = adata.obs[ref_column].cat.categories
20
22
 
21
- for cat in categories:
23
+ for ref in references:
22
24
  # Select subset for this category
23
- cat_mask = adata.obs[obs_column] == cat
24
- cat_subset = adata[cat_mask]
25
+ ref_mask = adata.obs[ref_column] == ref
26
+ ref_subset = adata[ref_mask]
25
27
 
26
28
  # Extract the probability matrix
27
- original_matrix = cat_subset.X.copy()
29
+ original_matrix = ref_subset.X.copy()
28
30
 
29
31
  # Extract the thresholds for each position efficiently
30
- thresholds = np.array(cat_subset.var[f'{cat}_position_methylation_thresholding_Youden_stats'].apply(lambda x: x[0]))
32
+ thresholds = np.array(ref_subset.var[f'{ref}_position_methylation_thresholding_Youden_stats'].apply(lambda x: x[0]))
31
33
 
32
34
  # Identify NaN values
33
35
  nan_mask = np.isnan(original_matrix)
@@ -39,7 +41,7 @@ def binarize_on_Youden(adata, obs_column='Reference', output_layer_name='binariz
39
41
  binarized_matrix[nan_mask] = np.nan
40
42
 
41
43
  # Assign the binarized values back into the preallocated storage
42
- binarized_methylation[cat_mask, :] = binarized_matrix
44
+ binarized_methylation[ref_subset, :] = binarized_matrix
43
45
 
44
46
  # Store the binarized matrix in a new layer
45
47
  adata.layers[output_layer_name] = binarized_methylation
@@ -11,7 +11,7 @@ def calculate_complexity_II(
11
11
  n_depths=12,
12
12
  random_state=0,
13
13
  csv_summary=True,
14
- uns_flag='complexity_analysis_complete',
14
+ uns_flag='calculate_complexity_II_performed',
15
15
  force_redo=False,
16
16
  bypass=False
17
17
  ):
@@ -1,4 +1,7 @@
1
- def calculate_coverage(adata, obs_column='Reference_strand', position_nan_threshold=0.00001, uns_flag='positional_coverage_calculated'):
1
+ def calculate_coverage(adata,
2
+ ref_column='Reference_strand',
3
+ position_nan_threshold=0.01,
4
+ uns_flag='calculate_coverage_performed'):
2
5
  """
3
6
  Append position-level metadata regarding whether the position is informative within the given observation category.
4
7
 
@@ -20,32 +23,32 @@ def calculate_coverage(adata, obs_column='Reference_strand', position_nan_thresh
20
23
  # QC already performed; nothing to do
21
24
  return
22
25
 
23
- categories = adata.obs[obs_column].cat.categories
26
+ references = adata.obs[ref_column].cat.categories
24
27
  n_categories_with_position = np.zeros(adata.shape[1])
25
28
 
26
- # Loop over categories
27
- for cat in categories:
28
- print(f'Assessing positional coverage across samples for {cat} reference')
29
+ # Loop over references
30
+ for ref in references:
31
+ print(f'Assessing positional coverage across samples for {ref} reference')
29
32
 
30
33
  # Subset to current category
31
- cat_mask = adata.obs[obs_column] == cat
32
- temp_cat_adata = adata[cat_mask]
34
+ ref_mask = adata.obs[ref_column] == ref
35
+ temp_ref_adata = adata[ref_mask]
33
36
 
34
37
  # Compute fraction of valid coverage
35
- cat_valid_coverage = np.sum(~np.isnan(temp_cat_adata.X), axis=0)
36
- cat_valid_fraction = cat_valid_coverage / temp_cat_adata.shape[0] # Avoid extra computation
38
+ ref_valid_coverage = np.sum(~np.isnan(temp_ref_adata.X), axis=0)
39
+ ref_valid_fraction = ref_valid_coverage / temp_ref_adata.shape[0] # Avoid extra computation
37
40
 
38
41
  # Store coverage stats
39
- adata.var[f'{cat}_valid_fraction'] = pd.Series(cat_valid_fraction, index=adata.var.index)
42
+ adata.var[f'{ref}_valid_fraction'] = pd.Series(ref_valid_fraction, index=adata.var.index)
40
43
 
41
44
  # Assign whether the position is covered based on threshold
42
- adata.var[f'position_in_{cat}'] = cat_valid_fraction >= position_nan_threshold
45
+ adata.var[f'position_in_{ref}'] = ref_valid_fraction >= position_nan_threshold
43
46
 
44
47
  # Sum the number of categories covering each position
45
- n_categories_with_position += adata.var[f'position_in_{cat}'].values
48
+ n_categories_with_position += adata.var[f'position_in_{ref}'].values
46
49
 
47
50
  # Store final category count
48
- adata.var[f'N_{obs_column}_with_position'] = n_categories_with_position.astype(int)
51
+ adata.var[f'N_{ref_column}_with_position'] = n_categories_with_position.astype(int)
49
52
 
50
53
  # mark as done
51
54
  adata.uns[uns_flag] = True
@@ -1,7 +1,15 @@
1
1
  ## calculate_position_Youden
2
-
3
2
  ## Calculating and applying position level thresholds for methylation calls to binarize the SMF data
4
- def calculate_position_Youden(adata, positive_control_sample='positive', negative_control_sample='negative', J_threshold=0.5, obs_column='Reference', infer_on_percentile=False, inference_variable='', save=False, output_directory=''):
3
+ def calculate_position_Youden(adata,
4
+ positive_control_sample=None,
5
+ negative_control_sample=None,
6
+ J_threshold=0.5,
7
+ ref_column='Reference_strand',
8
+ sample_column='Sample_names',
9
+ infer_on_percentile=True,
10
+ inference_variable='Raw_modification_signal',
11
+ save=False,
12
+ output_directory=''):
5
13
  """
6
14
  Adds new variable metadata to each position indicating whether the position provides reliable SMF methylation calls. Also outputs plots of the positional ROC curves.
7
15
 
@@ -26,28 +34,36 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
26
34
  from sklearn.metrics import roc_curve, roc_auc_score
27
35
 
28
36
  control_samples = [positive_control_sample, negative_control_sample]
29
- categories = adata.obs[obs_column].cat.categories
37
+ references = adata.obs[ref_column].cat.categories
30
38
  # Iterate over each category in the specified obs_column
31
- for cat in categories:
32
- print(f"Calculating position Youden statistics for {cat}")
39
+ for ref in references:
40
+ print(f"Calculating position Youden statistics for {ref}")
33
41
  # Subset to keep only reads associated with the category
34
- cat_subset = adata[adata.obs[obs_column] == cat]
42
+ ref_subset = adata[adata.obs[ref_column] == ref]
35
43
  # Iterate over positive and negative control samples
36
- for control in control_samples:
44
+ for i, control in enumerate(control_samples):
37
45
  # Initialize a dictionary for the given control sample. This will be keyed by dataset and position to point to a tuple of coordinate position and an array of methylation probabilities
38
- adata.uns[f'{cat}_position_methylation_dict_{control}'] = {}
39
- if infer_on_percentile:
40
- sorted_column = cat_subset.obs[inference_variable].sort_values(ascending=False)
41
- if control == "positive":
46
+ adata.uns[f'{ref}_position_methylation_dict_{control}'] = {}
47
+ # If controls are not passed and infer on percentile is True, infer thresholds based on top and bottom percentile windows for a given obs column metric.
48
+ if infer_on_percentile and not control:
49
+ sorted_column = ref_subset.obs[inference_variable].sort_values(ascending=False)
50
+ if i == 0:
51
+ control == 'positive'
52
+ positive_control_sample = control
42
53
  threshold = np.percentile(sorted_column, 100 - infer_on_percentile)
43
- control_subset = cat_subset[cat_subset.obs[inference_variable] >= threshold, :]
54
+ control_subset = ref_subset[ref_subset.obs[inference_variable] >= threshold, :]
44
55
  else:
56
+ control == 'negative'
57
+ negative_control_sample = control
45
58
  threshold = np.percentile(sorted_column, infer_on_percentile)
46
- control_subset = cat_subset[cat_subset.obs[inference_variable] <= threshold, :]
59
+ control_subset = ref_subset[ref_subset.obs[inference_variable] <= threshold, :]
60
+ elif not infer_on_percentile and not control:
61
+ print("Can not threshold Anndata on Youden threshold. Need to either provide control samples or set infer_on_percentile to True")
62
+ return
47
63
  else:
48
64
  # get the current control subset on the given category
49
- filtered_obs = cat_subset.obs[cat_subset.obs['Sample_names'].str.contains(control, na=False, regex=True)]
50
- control_subset = cat_subset[filtered_obs.index]
65
+ filtered_obs = ref_subset.obs[ref_subset.obs[sample_column] == control]
66
+ control_subset = ref_subset[filtered_obs.index]
51
67
  # Iterate through every position in the control subset
52
68
  for position in range(control_subset.shape[1]):
53
69
  # Get the coordinate name associated with that position
@@ -63,9 +79,9 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
63
79
  # Get fraction coverage
64
80
  fraction_coverage = position_coverage / control_subset.shape[0]
65
81
  # Save the position and the position methylation data for the control subset
66
- adata.uns[f'{cat}_position_methylation_dict_{control}'][f'{position}'] = (position, position_data, fraction_coverage)
82
+ adata.uns[f'{ref}_position_methylation_dict_{control}'][f'{position}'] = (position, position_data, fraction_coverage)
67
83
 
68
- for cat in categories:
84
+ for ref in references:
69
85
  fig, ax = plt.subplots(figsize=(6, 4))
70
86
  plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
71
87
  plt.xlabel('False Positive Rate')
@@ -76,13 +92,13 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
76
92
  n_total_positions = 0
77
93
  # Initialize a list that will hold the positional thresholds for the category
78
94
  probability_thresholding_list = [(np.nan, np.nan)] * adata.shape[1]
79
- for i, key in enumerate(adata.uns[f'{cat}_position_methylation_dict_{positive_control_sample}'].keys()):
80
- position = int(adata.uns[f'{cat}_position_methylation_dict_{positive_control_sample}'][key][0])
81
- positive_position_array = adata.uns[f'{cat}_position_methylation_dict_{positive_control_sample}'][key][1]
82
- fraction_coverage = adata.uns[f'{cat}_position_methylation_dict_{positive_control_sample}'][key][2]
95
+ for i, key in enumerate(adata.uns[f'{ref}_position_methylation_dict_{positive_control_sample}'].keys()):
96
+ position = int(adata.uns[f'{ref}_position_methylation_dict_{positive_control_sample}'][key][0])
97
+ positive_position_array = adata.uns[f'{ref}_position_methylation_dict_{positive_control_sample}'][key][1]
98
+ fraction_coverage = adata.uns[f'{ref}_position_methylation_dict_{positive_control_sample}'][key][2]
83
99
  if fraction_coverage > 0.2:
84
100
  try:
85
- negative_position_array = adata.uns[f'{cat}_position_methylation_dict_{negative_control_sample}'][key][1]
101
+ negative_position_array = adata.uns[f'{ref}_position_methylation_dict_{negative_control_sample}'][key][1]
86
102
  # Combine the negative and positive control data
87
103
  data = np.concatenate([negative_position_array, positive_position_array])
88
104
  labels = np.array([0] * len(negative_position_array) + [1] * len(positive_position_array))
@@ -101,7 +117,7 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
101
117
  plt.plot(fpr, tpr, label='ROC curve')
102
118
  except:
103
119
  probability_thresholding_list[position] = (0.8, np.nan)
104
- title = f'ROC Curve for {n_passed_positions} positions with J-stat greater than {J_threshold}\n out of {n_total_positions} total positions on {cat}'
120
+ title = f'ROC Curve for {n_passed_positions} positions with J-stat greater than {J_threshold}\n out of {n_total_positions} total positions on {ref}'
105
121
  plt.title(title)
106
122
  save_name = output_directory / f"{title}.png"
107
123
  if save:
@@ -110,6 +126,6 @@ def calculate_position_Youden(adata, positive_control_sample='positive', negativ
110
126
  else:
111
127
  plt.show()
112
128
 
113
- adata.var[f'{cat}_position_methylation_thresholding_Youden_stats'] = probability_thresholding_list
129
+ adata.var[f'{ref}_position_methylation_thresholding_Youden_stats'] = probability_thresholding_list
114
130
  J_max_list = [probability_thresholding_list[i][1] for i in range(adata.shape[1])]
115
- adata.var[f'{cat}_position_passed_QC'] = [True if i > J_threshold else False for i in J_max_list]
131
+ adata.var[f'{ref}_position_passed_QC'] = [True if i > J_threshold else False for i in J_max_list]
@@ -2,7 +2,7 @@ def calculate_read_modification_stats(adata,
2
2
  reference_column,
3
3
  sample_names_col,
4
4
  mod_target_bases,
5
- uns_flag="read_modification_stats_calculated",
5
+ uns_flag="calculate_read_modification_stats_performed",
6
6
  bypass=False,
7
7
  force_redo=False
8
8
  ):
@@ -11,7 +11,7 @@ def filter_reads_on_length_quality_mapping(
11
11
  length_ratio: Optional[Sequence[float]] = None, # e.g. [min, max]
12
12
  read_quality: Optional[Sequence[float]] = None, # e.g. [min, max] (commonly min only)
13
13
  mapping_quality: Optional[Sequence[float]] = None, # e.g. [min, max] (commonly min only)
14
- uns_flag: str = "reads_removed_failing_length_quality_mapping_qc",
14
+ uns_flag: str = "filter_reads_on_length_quality_mapping_performed",
15
15
  bypass: bool = False,
16
16
  force_redo: bool = True
17
17
  ) -> ad.AnnData:
@@ -15,7 +15,7 @@ def filter_reads_on_modification_thresholds(
15
15
  a_thresholds: Optional[Sequence[float]] = None,
16
16
  use_other_c_as_background: bool = False,
17
17
  min_valid_fraction_positions_in_read_vs_ref: Optional[float] = None,
18
- uns_flag: str = 'reads_filtered_on_modification_thresholds',
18
+ uns_flag: str = 'filter_reads_on_modification_thresholds_performed',
19
19
  bypass: bool = False,
20
20
  force_redo: bool = False,
21
21
  reference_column: str = 'Reference_strand',
@@ -77,7 +77,7 @@ def flag_duplicate_reads(
77
77
  sample_col: str = "Barcode",
78
78
  output_directory: Optional[str] = None,
79
79
  metric_keys: Union[str, List[str]] = ("Fraction_any_C_site_modified",),
80
- uns_flag: str = "read_duplicate_detection_performed",
80
+ uns_flag: str = "flag_duplicate_reads_performed",
81
81
  uns_filtered_flag: str = "read_duplicates_removed",
82
82
  bypass: bool = False,
83
83
  force_redo: bool = False,
@@ -1,6 +1,6 @@
1
1
  ## invert_adata
2
2
 
3
- def invert_adata(adata, uns_flag='adata_positions_inverted', force_redo=False):
3
+ def invert_adata(adata, uns_flag='invert_adata_performed', force_redo=False):
4
4
  """
5
5
  Inverts the AnnData object along the column (variable) axis.
6
6
 
@@ -2,7 +2,7 @@ def load_sample_sheet(adata,
2
2
  sample_sheet_path,
3
3
  mapping_key_column='obs_names',
4
4
  as_category=True,
5
- uns_flag='sample_sheet_loaded',
5
+ uns_flag='load_sample_sheet_performed',
6
6
  force_reload=True
7
7
  ):
8
8
  """
@@ -0,0 +1,37 @@
1
+ ## reindex_references_adata
2
+
3
+ def reindex_references_adata(adata,
4
+ reference_col="Reference_strand",
5
+ offsets=None,
6
+ new_col="reindexed",
7
+ uns_flag='reindex_references_adata_performed',
8
+ force_redo=False):
9
+
10
+ # Only run if not already performed
11
+ already = bool(adata.uns.get(uns_flag, False))
12
+ if (already and not force_redo):
13
+ return None
14
+
15
+ if offsets is None:
16
+ pass
17
+ else:
18
+ # Ensure var_names are numeric
19
+ var_coords = adata.var_names.astype(int)
20
+
21
+ for ref in adata.obs[reference_col].unique():
22
+ if ref not in offsets:
23
+ pass
24
+ else:
25
+ offset_value = offsets[ref]
26
+
27
+ # Create a new var column for this reference
28
+ colname = f"{ref}_{new_col}"
29
+
30
+ # Add offset to all var positions
31
+ adata.var[colname] = var_coords + offset_value
32
+
33
+ # mark as done
34
+ adata.uns[uns_flag] = True
35
+
36
+ print("Reindexing complete!")
37
+ return None
smftools/readwrite.py CHANGED
@@ -722,6 +722,100 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
722
722
  print(" -", e)
723
723
 
724
724
  print("=== end report ===\n")
725
+
726
+ # ---------- create CSV output directory ----------
727
+ try:
728
+ csv_dir = path.parent / "csvs"
729
+ csv_dir.mkdir(exist_ok=True)
730
+ if verbose:
731
+ print(f"CSV outputs will be written to: {csv_dir}")
732
+ except Exception as e:
733
+ msg = f"Failed to create CSV output directory: {e}"
734
+ report['errors'].append(msg)
735
+ if verbose:
736
+ print(msg)
737
+ csv_dir = path.parent # fallback just in case
738
+
739
+ # ---------- write keys summary CSV ----------
740
+ try:
741
+ meta_rows = []
742
+
743
+ # obs columns
744
+ for col in adata_copy.obs.columns:
745
+ meta_rows.append({
746
+ "kind": "obs",
747
+ "name": col,
748
+ "dtype": str(adata_copy.obs[col].dtype),
749
+ })
750
+
751
+ # var columns
752
+ for col in adata_copy.var.columns:
753
+ meta_rows.append({
754
+ "kind": "var",
755
+ "name": col,
756
+ "dtype": str(adata_copy.var[col].dtype),
757
+ })
758
+
759
+ # layers
760
+ for k, v in adata_copy.layers.items():
761
+ meta_rows.append({
762
+ "kind": "layer",
763
+ "name": k,
764
+ "dtype": str(np.asarray(v).dtype),
765
+ })
766
+
767
+ # obsm
768
+ for k, v in adata_copy.obsm.items():
769
+ meta_rows.append({
770
+ "kind": "obsm",
771
+ "name": k,
772
+ "dtype": str(np.asarray(v).dtype),
773
+ })
774
+
775
+ # uns
776
+ for k, v in adata_copy.uns.items():
777
+ meta_rows.append({
778
+ "kind": "uns",
779
+ "name": k,
780
+ "dtype": type(v).__name__,
781
+ })
782
+
783
+ meta_df = pd.DataFrame(meta_rows)
784
+
785
+ # same base name, inside csvs/
786
+ base = path.stem # removes .h5ad
787
+ meta_path = csv_dir / f"{base}.keys.csv"
788
+
789
+ meta_df.to_csv(meta_path, index=False)
790
+ if verbose:
791
+ print(f"Wrote keys summary CSV to {meta_path}")
792
+
793
+ except Exception as e:
794
+ msg = f"Failed to write keys CSV: {e}"
795
+ report["errors"].append(msg)
796
+ if verbose:
797
+ print(msg)
798
+
799
+ # ---------- write full obs and var dataframes ----------
800
+ try:
801
+ base = path.stem
802
+
803
+ obs_path = csv_dir / f"{base}.obs.csv"
804
+ var_path = csv_dir / f"{base}.var.csv"
805
+
806
+ adata_copy.obs.to_csv(obs_path, index=True)
807
+ adata_copy.var.to_csv(var_path, index=True)
808
+
809
+ if verbose:
810
+ print(f"Wrote obs DataFrame to {obs_path}")
811
+ print(f"Wrote var DataFrame to {var_path}")
812
+
813
+ except Exception as e:
814
+ msg = f"Failed to write obs/var CSVs: {e}"
815
+ report["errors"].append(msg)
816
+ if verbose:
817
+ print(msg)
818
+
725
819
  return report
726
820
 
727
821
  def safe_read_h5ad(path, backup_dir=None, restore_backups=True, re_categorize=True, categorical_threshold=100, verbose=True):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -96,30 +96,36 @@ Description-Content-Type: text/markdown
96
96
  [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
97
97
 
98
98
  # smftools
99
- A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization.
99
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, spatial analyses, and HMM based feature annotation.
100
100
 
101
101
  ## Philosophy
102
- While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to >1,000,000X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
102
+ While genomic data structures (SAM/BAM) were built to handle low-coverage data (<1000X) along large references, smftools prioritizes high-coverage data (scalable to >1,000,000X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
103
103
 
104
104
  ## Dependencies
105
105
  The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
106
106
  1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing.
107
107
  2) [Minimap2](https://github.com/lh3/minimap2) -> Alignment if not using dorado.
108
- 3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files.
108
+ 3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files. Only required for direct modification detection (ie methylation).
109
109
 
110
- ## Modules
111
- ### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
110
+ ## Main Commands
111
+ ### smftools load: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
112
112
  ![](docs/source/_static/smftools_informatics_diagram.png)
113
- ### Preprocessing: Appends QC metrics to the AnnData object and performs filtering.
113
+ ### smftools preprocess: Appends QC metrics to the AnnData object and performs filtering.
114
114
  ![](docs/source/_static/smftools_preprocessing_diagram.png)
115
- ### Tools: Appends analyses to the AnnData object.
116
- - Currently Includes: Position X Position correlation matrices, Hidden Markov Model feature detection, clustering, dimensionality reduction, peak calling, train/test workflows for various ML classifiers.
117
- - To do: Additional ML methods for learning predictive single molecule features on condition labels: Autoencoders, Variational Autoencoders, Transformers.
118
- ### Plotting: Visualization of analyses stored within the AnnData object.
119
- - Most analyses appended to the adata object by a tools method have, or will have, an accompanying plotting method.
115
+ ### smftools spatial: Appends spatial analyses to the AnnData object.
116
+ - Currently Includes: Position X Position correlation matrices, clustering, dimensionality reduction, spatial autocorrelation.
117
+ ### smftools hmm: Fits a basic HMM to each sample and appends HMM feature layers
118
+ - Main outputs wills be stored in adata.layers
119
+ ### smftools batch <command>: Performs batch processing on a csv of config file pathes for any of the above commands.
120
+ - Nice when analyzing multiple experiments
121
+ ### smftools concatenate: Concatenates a list or directory of anndata objects.
122
+ - Mainly used for combining multiple experiments into a single anndata object.
120
123
 
121
124
  ## Announcements
122
125
 
126
+ ### 12/02/25 - Version 0.2.3 is available through PyPI
127
+ Version 0.2.3 provides the core smftools functionality through several command line commands (load, preprocess, spatial, hmm).
128
+
123
129
  ### 11/05/25 - Version 0.2.1 is available through PyPI
124
130
  Version 0.2.1 makes the core workflow (smftools load) a command line tool that takes in an experiment_config.csv file for input/output and parameter management.
125
131
 
@@ -1,44 +1,46 @@
1
1
  smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
2
2
  smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
- smftools/_version.py,sha256=X0PliCRFAeVnSTceUeHX1eM0j1HFhGFDWCRxLdde2Bs,21
4
- smftools/cli_entry.py,sha256=_QdtEKcVK5o-e5s9ETB9sOIdftPVlrDxvvjBKcP6YNk,14680
5
- smftools/readwrite.py,sha256=ExKZHNZ0QB-PtSck08drXfHTqbPeSUTHiYhv951SH1s,45994
3
+ smftools/_version.py,sha256=k2uKAAzDEmm1BIVWeztFlHrCh9fq64H6szFcsXW7tvs,21
4
+ smftools/cli_entry.py,sha256=LvobMVtEb_jrLZScoWCB-OBjUMue9JQBXJZW1oMbHnw,14618
5
+ smftools/readwrite.py,sha256=mbuCKj7LfEKp4bDBxxxMiaTddMwblwURpcCKpgmU6Sw,48678
6
6
  smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- smftools/cli/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
8
- smftools/cli/hmm_adata.py,sha256=PApUJW0lO4kcLjsiqqQopXgL3Dg-AascIqJrgvSY1Rg,15916
9
- smftools/cli/load_adata.py,sha256=Qt1ej-osyJ47fpBkGaSDgR1F8E4aBNAdcXeBAGM-Lqg,29100
10
- smftools/cli/preprocess_adata.py,sha256=EKGbSTli7qvL44OQUmMalYJjsH9vn3w4Rx7U7BL0ybs,20991
11
- smftools/cli/spatial_adata.py,sha256=AX6iyBfbXud9actteTvDuaQUU_SE3SyBIeknR317g34,30212
7
+ smftools/cli/helpers.py,sha256=tgjxUlOIhFGCLGD2ON7zlD45UPx93vENM82mM_BpLFk,1281
8
+ smftools/cli/hmm_adata.py,sha256=2ria8u6cCBQnzX_GjUUO3wBVOd7a4m3Al-vzwk0OasQ,17728
9
+ smftools/cli/load_adata.py,sha256=W4NgbM28wOzQHkLnZNILJyblRgee-O4oLnNZcyPDCXc,30486
10
+ smftools/cli/preprocess_adata.py,sha256=g9aHQ1DSScb4zx8RfpCjcEmam6APWHiu8Ow0sza6D2Y,22203
11
+ smftools/cli/spatial_adata.py,sha256=pp0KLK8d-MYjl_hF1ziDVKc6uOJGDDDbKNQELQcRUa8,28980
12
+ smftools/cli/archived/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
12
13
  smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
13
- smftools/config/conversion.yaml,sha256=HrFz2f9QRe1RuhmgU6ZtMHaM4ZzY61_aLcugsmpV40Q,969
14
- smftools/config/deaminase.yaml,sha256=mw2aY222y2xg08Rs5CWvjlrXo3vaEim7JwBThA80y4o,1349
15
- smftools/config/default.yaml,sha256=3IrX0OrUyjhVc3CqTjM8uiprKWrrBdVtil4YhtVzKdQ,10233
16
- smftools/config/direct.yaml,sha256=SBhdtG7PKm-z5xxQmA7JV3NQsGnUJ4p58fGH8BnoMrM,2137
14
+ smftools/config/conversion.yaml,sha256=07dKEXykQeP5VoVxa4xst-tcbSX4B6ErqyqtWJ5RCKk,1177
15
+ smftools/config/deaminase.yaml,sha256=okXdMFAghUAsDyx6P5Kru7ydF2bcbrhMPOaMpXlZPGM,1359
16
+ smftools/config/default.yaml,sha256=cKUUxVkH42kkHQM82mNJC8bfcak6lY063AnIif5o-1g,13071
17
+ smftools/config/direct.yaml,sha256=s30JbOTOOdIiBIefPSEi72YABHnfcCyFXj9WwZ7duJQ,2173
17
18
  smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
18
- smftools/config/experiment_config.py,sha256=d_6f_Uv3CY-1orHbxpHtAZDsY2gwxw079_pNgR9wDUg,58837
19
+ smftools/config/experiment_config.py,sha256=f7hVIc9ShUZk852Ypp6Dfelus8iKFHrSbThiyhpuQsE,63259
19
20
  smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
20
21
  smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
21
22
  smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
22
23
  smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
23
24
  smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
24
- smftools/hmm/HMM.py,sha256=K8rt-EHn3ylIHpQ3dHf_OZCXxCBVSS2UWTgSGOatwHw,71046
25
- smftools/hmm/__init__.py,sha256=BkX145eGVy-kFOtyqOcu-Hzv9ZJLDQ3cfDe51eKBTwY,585
26
- smftools/hmm/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
27
- smftools/hmm/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
28
- smftools/hmm/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
25
+ smftools/hmm/HMM.py,sha256=Y7YB-45HoLN--JloajoLBgC0rIYmHuWHDfmKRXfFuFk,71458
26
+ smftools/hmm/__init__.py,sha256=_-plMbL5xq8d0szNIYgUrgUwdb8oybuyTn6jned8eSU,382
27
+ smftools/hmm/call_hmm_peaks.py,sha256=BMlwDh-_k8bzqRn4LSYuTk3dCcUoNYHp8eohvWYNn7A,14573
29
28
  smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,825
30
29
  smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
31
30
  smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
32
- smftools/hmm/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
31
+ smftools/hmm/archived/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
32
+ smftools/hmm/archived/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
33
+ smftools/hmm/archived/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
34
+ smftools/hmm/archived/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
33
35
  smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
34
- smftools/informatics/bam_functions.py,sha256=otgl3TRPLn5Fnsx1jXX75du90k3XB3RHGzlfamvETsU,32670
36
+ smftools/informatics/bam_functions.py,sha256=SCtOQWgF7Nqbk7-22fAq9J8kRYrd2V5chmM0x1lLJh0,32261
35
37
  smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
36
38
  smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
37
39
  smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
38
40
  smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
39
41
  smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
40
42
  smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
41
- smftools/informatics/h5ad_functions.py,sha256=iAOxJjhaDslTUC78kjUHlCELigDl73sWo0fvXcKuFoI,7824
43
+ smftools/informatics/h5ad_functions.py,sha256=9zUKuARwjjt0J-i_kBqo2jxLtD6Gud1VxKT0pV-ACeA,7829
42
44
  smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
43
45
  smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
44
46
  smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
@@ -118,37 +120,38 @@ smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3
118
120
  smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
119
121
  smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
120
122
  smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
121
- smftools/plotting/general_plotting.py,sha256=2JzE7agm_tILpQ67BHs5pdyPRsHBwcENZe7n4gfMWgM,61350
123
+ smftools/plotting/general_plotting.py,sha256=o4aPXm_2JRj69XyHINKSTAJGaw9VA-csDgX1pyirso0,63151
122
124
  smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
123
- smftools/plotting/position_stats.py,sha256=4XukYIWeWZ_aGSZg1K0t37KA2aknjNNKT5kcKFfuz8Q,17428
125
+ smftools/plotting/position_stats.py,sha256=Ia15EuYq5r3Ckz3jVjYMHON6IHZboatAVqJdb2WrUA4,17415
124
126
  smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
125
- smftools/preprocessing/__init__.py,sha256=GAQBULUH7fGVabzK5Cq5Wj-0ew0vNA-jWQtR5LAowvs,1746
126
- smftools/preprocessing/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
127
- smftools/preprocessing/append_base_context.py,sha256=wGBAADePnys8DLUR15MpRe2BUcfCMDJWaCDDNyjn6AU,6209
128
- smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=s-7t-VKCs9Y67pX7kH6DNCEkC-RW4nM-UPsBQV2ZwtE,6186
127
+ smftools/preprocessing/__init__.py,sha256=mcmovdFq6jt1kWIe0sVW6MwCXs4tUVTy3Qak7RDts74,1644
128
+ smftools/preprocessing/append_base_context.py,sha256=VnxKf8sI4uWale215FEFFoE2me6uJszXvswl-dFQmUY,6702
129
+ smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=qgjeDyfOghuqWZAzCjd4eE5riCWAgra6CIZ9UCyUgTs,6207
129
130
  smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
130
- smftools/preprocessing/binarize_on_Youden.py,sha256=HGs4p7XiOSYU3_z8QswNHIA9HlrI-7Pp1Kggrn6yUnI,1834
131
+ smftools/preprocessing/binarize_on_Youden.py,sha256=OwI0JwKBsSPVdPr61D31dR9XhnF0N4e5PnbboTpk8xI,1891
131
132
  smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
132
- smftools/preprocessing/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
133
- smftools/preprocessing/calculate_complexity_II.py,sha256=DGfl0jkuBPUpzhKVItN0W7EPzh-QYuR4IxRObPE6gAQ,9301
133
+ smftools/preprocessing/calculate_complexity_II.py,sha256=oh5y0jbM1-k29ujRUfvXoL3ir4E6bVXLE9bWxlD5efc,9306
134
134
  smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
135
- smftools/preprocessing/calculate_coverage.py,sha256=4WTILzKLzxGLSsQrZkshXP-IRQpoVu3Fkqc0QTpux3Y,2132
135
+ smftools/preprocessing/calculate_coverage.py,sha256=L417_XWAadMH3vxVDGEEAqxIGOiV48nfzVzD7HYyhus,2199
136
136
  smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
137
137
  smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
138
- smftools/preprocessing/calculate_position_Youden.py,sha256=yaSd6UDXPCddoN1UR6LgTqE5teJ79Ldw0BAlemc9fB4,7453
138
+ smftools/preprocessing/calculate_position_Youden.py,sha256=JJLvU62zpBcvWm5QnsQ3FeRgIv5TMQbz5zTHa3z_Y1s,8342
139
139
  smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
140
- smftools/preprocessing/calculate_read_modification_stats.py,sha256=mIlLBqNflVIkuoLxhbyujq3JEKyPl8iebhUlikB9brM,4775
140
+ smftools/preprocessing/calculate_read_modification_stats.py,sha256=hZzoEe1Acc1TQV3crkjyGZBWTMkMMcqXymJb3vJMHks,4784
141
141
  smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
142
142
  smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
143
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=93LgTy_vsPnOZgoiXhZ1-w_pix2oFdBk-dsBUoz33Go,7379
144
- smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=4TUvChkSH8R4p_0TpRCh7TounkdUgQHh71TGNmsZ29A,19355
145
- smftools/preprocessing/flag_duplicate_reads.py,sha256=MySI9En6xVp0FqL7hfiLw0EP3JnGVJWM_yZfkvN-m1U,65585
146
- smftools/preprocessing/invert_adata.py,sha256=HYMJ1sR3Ui8j6bDjY8OcVQOETzZV-_rrpIYaWLZL6S4,1049
147
- smftools/preprocessing/load_sample_sheet.py,sha256=AjJf2MrqGHJJ2rNjYi09zV1QkLTq8qGaHGVklXHnPuU,1908
143
+ smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=UhMXpM_qxbhTCorjpKAePRk1qQVls8DP6Z51aFVnr3k,7380
144
+ smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=LK3u0mIwD-T_qwqIH8v7BP1ZRL88HtRXPkDJwchsCjk,19363
145
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=8Z3sVQr8gmci3ZtYfQGDAHI7GpKGhzmAFHoZVyL6nK4,65581
146
+ smftools/preprocessing/invert_adata.py,sha256=TmvwRGlkJKnMajOADAzpE_C2kYEtDVYDYtQKv3IthKs,1047
147
+ smftools/preprocessing/load_sample_sheet.py,sha256=WXAKfIhbnptnkbIpI5hEe6p02HhpQ3eRX1EDGEEvH-8,1916
148
148
  smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
149
149
  smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
150
150
  smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
151
+ smftools/preprocessing/reindex_references_adata.py,sha256=4oViEcWWSi7bnX3Yyf-DdSZBSocvuiqr4LC-jDFHwu0,1137
151
152
  smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
153
+ smftools/preprocessing/archives/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
154
+ smftools/preprocessing/archives/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
152
155
  smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
153
156
  smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
154
157
  smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
@@ -166,8 +169,8 @@ smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8w
166
169
  smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
167
170
  smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
168
171
  smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
169
- smftools-0.2.3.dist-info/METADATA,sha256=w_PRsBPndPoTQZviW9WTuiZV1Pk3ukeJ155OvC4E57M,8787
170
- smftools-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
171
- smftools-0.2.3.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
172
- smftools-0.2.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
173
- smftools-0.2.3.dist-info/RECORD,,
172
+ smftools-0.2.4.dist-info/METADATA,sha256=BVgWPtWTeDoNF6d1IOpvXyV0IE4fI5X_fLIs4nmVvJ4,9138
173
+ smftools-0.2.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
174
+ smftools-0.2.4.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
175
+ smftools-0.2.4.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
176
+ smftools-0.2.4.dist-info/RECORD,,