smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/chimeric_adata.py +1563 -0
  3. smftools/cli/helpers.py +18 -2
  4. smftools/cli/hmm_adata.py +18 -1
  5. smftools/cli/latent_adata.py +522 -67
  6. smftools/cli/load_adata.py +2 -2
  7. smftools/cli/preprocess_adata.py +32 -93
  8. smftools/cli/recipes.py +26 -0
  9. smftools/cli/spatial_adata.py +23 -109
  10. smftools/cli/variant_adata.py +423 -0
  11. smftools/cli_entry.py +41 -5
  12. smftools/config/conversion.yaml +0 -10
  13. smftools/config/deaminase.yaml +3 -0
  14. smftools/config/default.yaml +49 -13
  15. smftools/config/experiment_config.py +96 -3
  16. smftools/constants.py +4 -0
  17. smftools/hmm/call_hmm_peaks.py +1 -1
  18. smftools/informatics/binarize_converted_base_identities.py +2 -89
  19. smftools/informatics/converted_BAM_to_adata.py +53 -13
  20. smftools/informatics/h5ad_functions.py +83 -0
  21. smftools/informatics/modkit_extract_to_adata.py +4 -0
  22. smftools/plotting/__init__.py +26 -12
  23. smftools/plotting/autocorrelation_plotting.py +22 -4
  24. smftools/plotting/chimeric_plotting.py +1893 -0
  25. smftools/plotting/classifiers.py +28 -14
  26. smftools/plotting/general_plotting.py +58 -3362
  27. smftools/plotting/hmm_plotting.py +1586 -2
  28. smftools/plotting/latent_plotting.py +804 -0
  29. smftools/plotting/plotting_utils.py +243 -0
  30. smftools/plotting/position_stats.py +16 -8
  31. smftools/plotting/preprocess_plotting.py +281 -0
  32. smftools/plotting/qc_plotting.py +8 -3
  33. smftools/plotting/spatial_plotting.py +1134 -0
  34. smftools/plotting/variant_plotting.py +1231 -0
  35. smftools/preprocessing/__init__.py +3 -0
  36. smftools/preprocessing/append_base_context.py +1 -1
  37. smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
  38. smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  39. smftools/preprocessing/append_variant_call_layer.py +480 -0
  40. smftools/preprocessing/flag_duplicate_reads.py +4 -4
  41. smftools/preprocessing/invert_adata.py +1 -0
  42. smftools/readwrite.py +109 -85
  43. smftools/tools/__init__.py +6 -0
  44. smftools/tools/calculate_knn.py +121 -0
  45. smftools/tools/calculate_nmf.py +18 -7
  46. smftools/tools/calculate_pca.py +180 -0
  47. smftools/tools/calculate_umap.py +70 -154
  48. smftools/tools/position_stats.py +4 -4
  49. smftools/tools/rolling_nn_distance.py +640 -3
  50. smftools/tools/sequence_alignment.py +140 -0
  51. smftools/tools/tensor_factorization.py +52 -4
  52. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
  53. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
  54. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
  55. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
  56. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
smftools/readwrite.py CHANGED
@@ -9,6 +9,10 @@ import anndata as ad
9
9
  import pandas as pd
10
10
  from Bio import SeqIO
11
11
 
12
+ from smftools.logging_utils import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
12
16
 
13
17
  ######################################################################################################
14
18
  ## Datetime functionality
@@ -464,7 +468,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
464
468
  ser = df[col]
465
469
  # categorical handling
466
470
  try:
467
- is_cat = pd.api.types.is_categorical_dtype(ser.dtype)
471
+ is_cat = isinstance(ser.dtype, pd.CategoricalDtype)
468
472
  except Exception:
469
473
  is_cat = False
470
474
 
@@ -474,7 +478,9 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
474
478
  cats_str = cats.astype(str)
475
479
  df[col] = pd.Categorical(ser.astype(str), categories=cats_str)
476
480
  if verbose:
477
- print(f" coerced categorical column '{which}.{col}' -> string categories")
481
+ logger.debug(
482
+ f" coerced categorical column '{which}.{col}' -> string categories"
483
+ )
478
484
  if which == "obs":
479
485
  report["obs_converted_columns"].append(col)
480
486
  else:
@@ -489,7 +495,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
489
495
  report["var_backed_up_columns"].append(col)
490
496
  df[col] = ser.astype(str)
491
497
  if verbose:
492
- print(
498
+ logger.debug(
493
499
  f" coerced categorical column '{which}.{col}' -> strings (backup={backup})"
494
500
  )
495
501
  continue
@@ -512,7 +518,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
512
518
  report["var_backed_up_columns"].append(col)
513
519
  df[col] = ser.values.astype(str)
514
520
  if verbose:
515
- print(
521
+ logger.debug(
516
522
  f" converted object column '{which}.{col}' -> strings (backup={backup})"
517
523
  )
518
524
  if which == "obs":
@@ -537,7 +543,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
537
543
  report["var_backed_up_columns"].append(col)
538
544
  df[col] = [json.dumps(v, default=str) for v in ser.values]
539
545
  if verbose:
540
- print(
546
+ logger.debug(
541
547
  f" json-stringified object column '{which}.{col}' (backup={backup})"
542
548
  )
543
549
  if which == "obs":
@@ -554,7 +560,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
554
560
  report["var_backed_up_columns"].append(col)
555
561
  df[col] = ser.astype(str)
556
562
  if verbose:
557
- print(
563
+ logger.debug(
558
564
  f" WARNING: column '{which}.{col}' was complex; coerced via str() (backed up)."
559
565
  )
560
566
  if which == "obs":
@@ -583,7 +589,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
583
589
  _backup(v, f"uns_{k}_backup")
584
590
  backed_up.append(k)
585
591
  if verbose:
586
- print(
592
+ logger.debug(
587
593
  f" uns['{k}'] non-JSON -> stored '{k}_json' and backed up (backup={backup})"
588
594
  )
589
595
  report["uns_json_keys"].append(k)
@@ -594,15 +600,17 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
594
600
  clean[k + "_str"] = str(v)
595
601
  backed_up.append(k)
596
602
  if verbose:
597
- print(f" uns['{k}'] stored as string under '{k}_str' (backed up).")
603
+ logger.debug(
604
+ f" uns['{k}'] stored as string under '{k}_str' (backed up)."
605
+ )
598
606
  report["uns_backed_up_keys"].append(k)
599
607
  except Exception as e:
600
608
  msg = f"uns['{k}'] could not be preserved: {e}"
601
609
  report["errors"].append(msg)
602
610
  if verbose:
603
- print(" " + msg)
611
+ logger.debug(" " + msg)
604
612
  if backed_up and verbose:
605
- print(f"Sanitized .uns keys (backed up): {backed_up}")
613
+ logger.debug(f"Sanitized .uns keys (backed up): {backed_up}")
606
614
  return clean
607
615
 
608
616
  def _sanitize_layers_obsm(src_dict, which: str):
@@ -628,7 +636,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
628
636
  report_key = f"{which}.{k}"
629
637
  report[converted_key].append(report_key)
630
638
  if verbose:
631
- print(f" {which}.{k} object array coerced to float.")
639
+ logger.debug(f" {which}.{k} object array coerced to float.")
632
640
  except Exception:
633
641
  try:
634
642
  arr_i = arr.astype(int)
@@ -636,13 +644,13 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
636
644
  report_key = f"{which}.{k}"
637
645
  report[converted_key].append(report_key)
638
646
  if verbose:
639
- print(f" {which}.{k} object array coerced to int.")
647
+ logger.debug(f" {which}.{k} object array coerced to int.")
640
648
  except Exception:
641
649
  if backup:
642
650
  _backup(v, f"{which}_{k}_backup")
643
651
  report[skipped_key].append(k)
644
652
  if verbose:
645
- print(
653
+ logger.debug(
646
654
  f" SKIPPING {which}.{k} (object dtype not numeric). Backed up: {backup}"
647
655
  )
648
656
  continue
@@ -655,7 +663,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
655
663
  msg = f" SKIPPING {which}.{k} due to conversion error: {e}"
656
664
  report["errors"].append(msg)
657
665
  if verbose:
658
- print(msg)
666
+ logger.debug(msg)
659
667
  continue
660
668
  return cleaned
661
669
 
@@ -666,7 +674,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
666
674
  msg = f"Failed to sanitize obs: {e}"
667
675
  report["errors"].append(msg)
668
676
  if verbose:
669
- print(msg)
677
+ logger.debug(msg)
670
678
  obs_clean = adata.obs.copy()
671
679
 
672
680
  try:
@@ -675,7 +683,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
675
683
  msg = f"Failed to sanitize var: {e}"
676
684
  report["errors"].append(msg)
677
685
  if verbose:
678
- print(msg)
686
+ logger.debug(msg)
679
687
  var_clean = adata.var.copy()
680
688
 
681
689
  # ---------- sanitize uns ----------
@@ -685,7 +693,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
685
693
  msg = f"Failed to sanitize uns: {e}"
686
694
  report["errors"].append(msg)
687
695
  if verbose:
688
- print(msg)
696
+ logger.debug(msg)
689
697
  uns_clean = {}
690
698
 
691
699
  # ---------- sanitize layers and obsm ----------
@@ -699,7 +707,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
699
707
  msg = f"Failed to sanitize layers: {e}"
700
708
  report["errors"].append(msg)
701
709
  if verbose:
702
- print(msg)
710
+ logger.debug(msg)
703
711
  layers_clean = {}
704
712
 
705
713
  try:
@@ -708,7 +716,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
708
716
  msg = f"Failed to sanitize obsm: {e}"
709
717
  report["errors"].append(msg)
710
718
  if verbose:
711
- print(msg)
719
+ logger.debug(msg)
712
720
  obsm_clean = {}
713
721
 
714
722
  try:
@@ -717,7 +725,7 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
717
725
  msg = f"Failed to sanitize varm: {e}"
718
726
  report["errors"].append(msg)
719
727
  if verbose:
720
- print(msg)
728
+ logger.debug(msg)
721
729
  varm_clean = {}
722
730
 
723
731
  # ---------- handle X ----------
@@ -729,21 +737,21 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
729
737
  X_to_use = X_arr.astype(float)
730
738
  report["X_replaced_or_converted"] = "converted_to_float"
731
739
  if verbose:
732
- print("Converted adata.X object-dtype -> float")
740
+ logger.debug("Converted adata.X object-dtype -> float")
733
741
  except Exception:
734
742
  if backup:
735
743
  _backup(adata.X, "X_backup")
736
744
  X_to_use = np.zeros_like(X_arr, dtype=float)
737
745
  report["X_replaced_or_converted"] = "replaced_with_zeros_backup"
738
746
  if verbose:
739
- print(
747
+ logger.debug(
740
748
  "adata.X had object dtype and couldn't be converted; replaced with zeros (backup set)."
741
749
  )
742
750
  except Exception as e:
743
751
  msg = f"Error handling adata.X: {e}"
744
752
  report["errors"].append(msg)
745
753
  if verbose:
746
- print(msg)
754
+ logger.debug(msg)
747
755
  X_to_use = adata.X
748
756
 
749
757
  # ---------- build lightweight AnnData copy ----------
@@ -769,71 +777,71 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
769
777
  # --- write
770
778
  adata_copy.write_h5ad(path, compression=compression)
771
779
  if verbose:
772
- print(f"Saved safely to {path}")
780
+ logger.debug(f"Saved safely to {path}")
773
781
  except Exception as e:
774
782
  msg = f"Failed to write h5ad: {e}"
775
783
  report["errors"].append(msg)
776
784
  if verbose:
777
- print(msg)
785
+ logger.error(msg)
778
786
  raise
779
787
 
780
788
  # Print a concise interactive report
781
- print("\n=== safe_write_h5ad REPORT ===")
782
- print(f"Saved file: {path}")
783
- print(f"Adata shape: {adata.shape}")
789
+ logger.info("\n=== safe_write_h5ad REPORT ===")
790
+ logger.info(f"Saved file: {path}")
791
+ logger.info(f"Adata shape: {adata.shape}")
784
792
  if report["obs_converted_columns"] or report["obs_backed_up_columns"]:
785
- print(f"obs: converted columns -> {report['obs_converted_columns']}")
786
- print(f"obs: backed-up columns -> {report['obs_backed_up_columns']}")
793
+ logger.debug(f"obs: converted columns -> {report['obs_converted_columns']}")
794
+ logger.debug(f"obs: backed-up columns -> {report['obs_backed_up_columns']}")
787
795
  else:
788
- print("obs: no problematic columns found.")
796
+ logger.debug("obs: no problematic columns found.")
789
797
 
790
798
  if report["var_converted_columns"] or report["var_backed_up_columns"]:
791
- print(f"var: converted columns -> {report['var_converted_columns']}")
792
- print(f"var: backed-up columns -> {report['var_backed_up_columns']}")
799
+ logger.debug(f"var: converted columns -> {report['var_converted_columns']}")
800
+ logger.debug(f"var: backed-up columns -> {report['var_backed_up_columns']}")
793
801
  else:
794
- print("var: no problematic columns found.")
802
+ logger.debug("var: no problematic columns found.")
795
803
 
796
804
  if report["uns_json_keys"] or report["uns_backed_up_keys"]:
797
- print(f".uns: jsonified keys -> {report['uns_json_keys']}")
798
- print(f".uns: backed-up keys -> {report['uns_backed_up_keys']}")
805
+ logger.debug(f".uns: jsonified keys -> {report['uns_json_keys']}")
806
+ logger.debug(f".uns: backed-up keys -> {report['uns_backed_up_keys']}")
799
807
  else:
800
- print(".uns: no problematic keys found.")
808
+ logger.debug(".uns: no problematic keys found.")
801
809
 
802
810
  if report["layers_converted"] or report["layers_skipped"]:
803
- print(f"layers: converted -> {report['layers_converted']}")
804
- print(f"layers: skipped -> {report['layers_skipped']}")
811
+ logger.debug(f"layers: converted -> {report['layers_converted']}")
812
+ logger.debug(f"layers: skipped -> {report['layers_skipped']}")
805
813
  else:
806
- print("layers: no problematic entries found.")
814
+ logger.debug("layers: no problematic entries found.")
807
815
 
808
816
  if report["obsm_converted"] or report["obsm_skipped"]:
809
- print(f"obsm: converted -> {report['obsm_converted']}")
810
- print(f"obsm: skipped -> {report['obsm_skipped']}")
817
+ logger.debug(f"obsm: converted -> {report['obsm_converted']}")
818
+ logger.debug(f"obsm: skipped -> {report['obsm_skipped']}")
811
819
  else:
812
- print("obsm: no problematic entries found.")
820
+ logger.debug("obsm: no problematic entries found.")
813
821
 
814
822
  if report["X_replaced_or_converted"]:
815
- print(f"adata.X handled: {report['X_replaced_or_converted']}")
823
+ logger.debug(f"adata.X handled: {report['X_replaced_or_converted']}")
816
824
  else:
817
- print("adata.X: no changes.")
825
+ logger.debug("adata.X: no changes.")
818
826
 
819
827
  if report["errors"]:
820
- print("\nWarnings / errors encountered:")
828
+ logger.error("\nWarnings / errors encountered:")
821
829
  for e in report["errors"]:
822
- print(" -", e)
830
+ logger.error(" -", e)
823
831
 
824
- print("=== end report ===\n")
832
+ logger.info("=== end report ===\n")
825
833
 
826
834
  # ---------- create CSV output directory ----------
827
835
  try:
828
836
  csv_dir = path.parent / "csvs"
829
837
  csv_dir.mkdir(exist_ok=True)
830
838
  if verbose:
831
- print(f"CSV outputs will be written to: {csv_dir}")
839
+ logger.info(f"CSV outputs will be written to: {csv_dir}")
832
840
  except Exception as e:
833
841
  msg = f"Failed to create CSV output directory: {e}"
834
842
  report["errors"].append(msg)
835
843
  if verbose:
836
- print(msg)
844
+ logger.error(msg)
837
845
  csv_dir = path.parent # fallback just in case
838
846
 
839
847
  # ---------- write keys summary CSV ----------
@@ -890,6 +898,16 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
890
898
  }
891
899
  )
892
900
 
901
+ # obsp
902
+ for k, v in adata_copy.obsp.items():
903
+ meta_rows.append(
904
+ {
905
+ "kind": "obsp",
906
+ "name": k,
907
+ "dtype": str(np.asarray(v).dtype),
908
+ }
909
+ )
910
+
893
911
  # uns
894
912
  for k, v in adata_copy.uns.items():
895
913
  meta_rows.append(
@@ -908,13 +926,13 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
908
926
 
909
927
  meta_df.to_csv(meta_path, index=False)
910
928
  if verbose:
911
- print(f"Wrote keys summary CSV to {meta_path}")
929
+ logger.info(f"Wrote keys summary CSV to {meta_path}")
912
930
 
913
931
  except Exception as e:
914
932
  msg = f"Failed to write keys CSV: {e}"
915
933
  report["errors"].append(msg)
916
934
  if verbose:
917
- print(msg)
935
+ logger.error(msg)
918
936
 
919
937
  # ---------- write full obs and var dataframes ----------
920
938
  try:
@@ -927,14 +945,14 @@ def safe_write_h5ad(adata, path, compression="gzip", backup=False, backup_dir=No
927
945
  adata_copy.var.to_csv(var_path, index=True)
928
946
 
929
947
  if verbose:
930
- print(f"Wrote obs DataFrame to {obs_path}")
931
- print(f"Wrote var DataFrame to {var_path}")
948
+ logger.info(f"Wrote obs DataFrame to {obs_path}")
949
+ logger.info(f"Wrote var DataFrame to {var_path}")
932
950
 
933
951
  except Exception as e:
934
952
  msg = f"Failed to write obs/var CSVs: {e}"
935
953
  report["errors"].append(msg)
936
954
  if verbose:
937
- print(msg)
955
+ logger.error(msg)
938
956
 
939
957
  return report
940
958
 
@@ -1003,7 +1021,7 @@ def safe_read_h5ad(
1003
1021
  }
1004
1022
 
1005
1023
  if verbose:
1006
- print(f"[safe_read_h5ad] loading {path}")
1024
+ logger.info(f"[safe_read_h5ad] loading {path}")
1007
1025
 
1008
1026
  # 1) load the cleaned h5ad
1009
1027
  try:
@@ -1013,7 +1031,7 @@ def safe_read_h5ad(
1013
1031
 
1014
1032
  # Ensure backup_dir exists (may be relative to cwd)
1015
1033
  if verbose:
1016
- print(f"[safe_read_h5ad] looking for backups in {backup_dir}")
1034
+ logger.debug(f"[safe_read_h5ad] looking for backups in {backup_dir}")
1017
1035
 
1018
1036
  def _load_pickle_if_exists(fname):
1019
1037
  if os.path.exists(fname):
@@ -1024,7 +1042,7 @@ def safe_read_h5ad(
1024
1042
  except Exception as e:
1025
1043
  report["errors"].append(f"Failed to load pickle {fname}: {e}")
1026
1044
  if verbose:
1027
- print(f" error loading {fname}: {e}")
1045
+ logger.error(f" error loading {fname}: {e}")
1028
1046
  return None
1029
1047
  return None
1030
1048
 
@@ -1049,7 +1067,7 @@ def safe_read_h5ad(
1049
1067
  report["restored_obs_columns"].append((col, bname2))
1050
1068
  restored = True
1051
1069
  if verbose:
1052
- print(f"[safe_read_h5ad] restored obs.{col} from {bname2}")
1070
+ logger.debug(f"[safe_read_h5ad] restored obs.{col} from {bname2}")
1053
1071
  except Exception as e:
1054
1072
  report["errors"].append(f"Failed to restore obs.{col} from {bname2}: {e}")
1055
1073
  restored = False
@@ -1067,7 +1085,7 @@ def safe_read_h5ad(
1067
1085
  report["restored_obs_columns"].append((col, bname1))
1068
1086
  restored = True
1069
1087
  if verbose:
1070
- print(f"[safe_read_h5ad] restored obs.{col} from {bname1}")
1088
+ logger.debug(f"[safe_read_h5ad] restored obs.{col} from {bname1}")
1071
1089
  except Exception as e:
1072
1090
  report["errors"].append(f"Failed to restore obs.{col} from {bname1}: {e}")
1073
1091
  restored = False
@@ -1098,7 +1116,7 @@ def safe_read_h5ad(
1098
1116
  report["restored_obs_columns"].append((col, "parsed_json"))
1099
1117
  restored = True
1100
1118
  if verbose:
1101
- print(
1119
+ logger.debug(
1102
1120
  f"[safe_read_h5ad] parsed obs.{col} JSON strings back to Python objects"
1103
1121
  )
1104
1122
 
@@ -1111,7 +1129,7 @@ def safe_read_h5ad(
1111
1129
  adata.obs[col] = adata.obs[col].astype(str).astype("category")
1112
1130
  report["recategorized_obs"].append(col)
1113
1131
  if verbose:
1114
- print(
1132
+ logger.debug(
1115
1133
  f"[safe_read_h5ad] recast obs.{col} -> categorical (n_unique={nunique})"
1116
1134
  )
1117
1135
  except Exception as e:
@@ -1134,7 +1152,7 @@ def safe_read_h5ad(
1134
1152
  report["restored_var_columns"].append((col, bname2))
1135
1153
  restored = True
1136
1154
  if verbose:
1137
- print(f"[safe_read_h5ad] restored var.{col} from {bname2}")
1155
+ logger.debug(f"[safe_read_h5ad] restored var.{col} from {bname2}")
1138
1156
  except Exception as e:
1139
1157
  report["errors"].append(f"Failed to restore var.{col} from {bname2}: {e}")
1140
1158
 
@@ -1151,7 +1169,7 @@ def safe_read_h5ad(
1151
1169
  report["restored_var_columns"].append((col, bname1))
1152
1170
  restored = True
1153
1171
  if verbose:
1154
- print(f"[safe_read_h5ad] restored var.{col} from {bname1}")
1172
+ logger.debug(f"[safe_read_h5ad] restored var.{col} from {bname1}")
1155
1173
  except Exception as e:
1156
1174
  report["errors"].append(f"Failed to restore var.{col} from {bname1}: {e}")
1157
1175
 
@@ -1179,7 +1197,7 @@ def safe_read_h5ad(
1179
1197
  adata.var[col] = pd.Series(parsed, index=adata.var.index)
1180
1198
  report["restored_var_columns"].append((col, "parsed_json"))
1181
1199
  if verbose:
1182
- print(
1200
+ logger.debug(
1183
1201
  f"[safe_read_h5ad] parsed var.{col} JSON strings back to Python objects"
1184
1202
  )
1185
1203
 
@@ -1190,7 +1208,7 @@ def safe_read_h5ad(
1190
1208
  adata.var[col] = adata.var[col].astype(str).astype("category")
1191
1209
  report["recategorized_var"].append(col)
1192
1210
  if verbose:
1193
- print(
1211
+ logger.debug(
1194
1212
  f"[safe_read_h5ad] recast var.{col} -> categorical (n_unique={nunique})"
1195
1213
  )
1196
1214
  except Exception as e:
@@ -1208,7 +1226,7 @@ def safe_read_h5ad(
1208
1226
  adata.uns[base] = parsed
1209
1227
  report["parsed_uns_json_keys"].append(base)
1210
1228
  if verbose:
1211
- print(f"[safe_read_h5ad] parsed adata.uns['{k}'] -> adata.uns['{base}']")
1229
+ logger.debug(f"[safe_read_h5ad] parsed adata.uns['{k}'] -> adata.uns['{base}']")
1212
1230
  # remove the _json entry
1213
1231
  try:
1214
1232
  del adata.uns[k]
@@ -1231,7 +1249,7 @@ def safe_read_h5ad(
1231
1249
  adata.uns[key] = val
1232
1250
  report["restored_uns_keys"].append((key, full))
1233
1251
  if verbose:
1234
- print(f"[safe_read_h5ad] restored adata.uns['{key}'] from {full}")
1252
+ logger.debug(f"[safe_read_h5ad] restored adata.uns['{key}'] from {full}")
1235
1253
 
1236
1254
  # 5) Restore layers and obsm from backups if present
1237
1255
  # expected backup names: layers_<name>_backup.pkl, obsm_<name>_backup.pkl, varm_<name>_backup.pkl
@@ -1246,7 +1264,9 @@ def safe_read_h5ad(
1246
1264
  adata.layers[layer_name] = np.asarray(val)
1247
1265
  report["restored_layers"].append((layer_name, full))
1248
1266
  if verbose:
1249
- print(f"[safe_read_h5ad] restored layers['{layer_name}'] from {full}")
1267
+ logger.debug(
1268
+ f"[safe_read_h5ad] restored layers['{layer_name}'] from {full}"
1269
+ )
1250
1270
  except Exception as e:
1251
1271
  report["errors"].append(
1252
1272
  f"Failed to restore layers['{layer_name}'] from {full}: {e}"
@@ -1261,7 +1281,9 @@ def safe_read_h5ad(
1261
1281
  adata.obsm[obsm_name] = np.asarray(val)
1262
1282
  report["restored_obsm"].append((obsm_name, full))
1263
1283
  if verbose:
1264
- print(f"[safe_read_h5ad] restored obsm['{obsm_name}'] from {full}")
1284
+ logger.debug(
1285
+ f"[safe_read_h5ad] restored obsm['{obsm_name}'] from {full}"
1286
+ )
1265
1287
  except Exception as e:
1266
1288
  report["errors"].append(
1267
1289
  f"Failed to restore obsm['{obsm_name}'] from {full}: {e}"
@@ -1276,7 +1298,9 @@ def safe_read_h5ad(
1276
1298
  adata.varm[varm_name] = np.asarray(val)
1277
1299
  report["restored_varm"].append((varm_name, full))
1278
1300
  if verbose:
1279
- print(f"[safe_read_h5ad] restored varm['{varm_name}'] from {full}")
1301
+ logger.debug(
1302
+ f"[safe_read_h5ad] restored varm['{varm_name}'] from {full}"
1303
+ )
1280
1304
  except Exception as e:
1281
1305
  report["errors"].append(
1282
1306
  f"Failed to restore varm['{varm_name}'] from {full}: {e}"
@@ -1310,7 +1334,7 @@ def safe_read_h5ad(
1310
1334
  if expected_missing and verbose:
1311
1335
  n = len(expected_missing)
1312
1336
  if verbose:
1313
- print(
1337
+ logger.warning(
1314
1338
  f"[safe_read_h5ad] note: {n} obs/var object columns may not have backups; check if their content is acceptable."
1315
1339
  )
1316
1340
  # add to report
@@ -1318,37 +1342,37 @@ def safe_read_h5ad(
1318
1342
 
1319
1343
  # final summary print
1320
1344
  if verbose:
1321
- print("\n=== safe_read_h5ad summary ===")
1345
+ logger.info("\n=== safe_read_h5ad summary ===")
1322
1346
  if report["restored_obs_columns"]:
1323
- print("Restored obs columns:", report["restored_obs_columns"])
1347
+ logger.info("Restored obs columns:", report["restored_obs_columns"])
1324
1348
  if report["restored_var_columns"]:
1325
- print("Restored var columns:", report["restored_var_columns"])
1349
+ logger.info("Restored var columns:", report["restored_var_columns"])
1326
1350
  if report["restored_uns_keys"]:
1327
- print("Restored uns keys:", report["restored_uns_keys"])
1351
+ logger.info("Restored uns keys:", report["restored_uns_keys"])
1328
1352
  if report["parsed_uns_json_keys"]:
1329
- print("Parsed uns JSON keys:", report["parsed_uns_json_keys"])
1353
+ logger.info("Parsed uns JSON keys:", report["parsed_uns_json_keys"])
1330
1354
  if report["restored_layers"]:
1331
- print("Restored layers:", report["restored_layers"])
1355
+ logger.info("Restored layers:", report["restored_layers"])
1332
1356
  if report["restored_obsm"]:
1333
- print("Restored obsm:", report["restored_obsm"])
1357
+ logger.info("Restored obsm:", report["restored_obsm"])
1334
1358
  if report["restored_varm"]:
1335
- print("Restored varm:", report["restored_varm"])
1359
+ logger.info("Restored varm:", report["restored_varm"])
1336
1360
  if report["recategorized_obs"] or report["recategorized_var"]:
1337
- print(
1361
+ logger.info(
1338
1362
  "Recategorized columns (obs/var):",
1339
1363
  report["recategorized_obs"],
1340
1364
  report["recategorized_var"],
1341
1365
  )
1342
1366
  if report["missing_backups"]:
1343
- print(
1367
+ logger.info(
1344
1368
  "Missing backups or object columns without backups (investigate):",
1345
1369
  report["missing_backups"],
1346
1370
  )
1347
1371
  if report["errors"]:
1348
- print("Errors encountered (see report['errors']):")
1372
+ logger.error("Errors encountered (see report['errors']):")
1349
1373
  for e in report["errors"]:
1350
- print(" -", e)
1351
- print("=== end summary ===\n")
1374
+ logger.error(" -", e)
1375
+ logger.info("=== end summary ===\n")
1352
1376
 
1353
1377
  return adata, report
1354
1378
 
@@ -6,6 +6,8 @@ _LAZY_ATTRS = {
6
6
  "calculate_leiden": "smftools.tools.calculate_leiden",
7
7
  "calculate_nmf": "smftools.tools.calculate_nmf",
8
8
  "calculate_sequence_cp_decomposition": "smftools.tools.tensor_factorization",
9
+ "calculate_pca": "smftools.tools.calculate_pca",
10
+ "calculate_knn": "smftools.tools.calculate_knn",
9
11
  "calculate_umap": "smftools.tools.calculate_umap",
10
12
  "cluster_adata_on_methylation": "smftools.tools.cluster_adata_on_methylation",
11
13
  "combine_layers": "smftools.tools.general_tools",
@@ -14,7 +16,11 @@ _LAZY_ATTRS = {
14
16
  "calculate_relative_risk_on_activity": "smftools.tools.position_stats",
15
17
  "compute_positionwise_statistics": "smftools.tools.position_stats",
16
18
  "calculate_row_entropy": "smftools.tools.read_stats",
19
+ "align_sequences_with_mismatches": "smftools.tools.sequence_alignment",
17
20
  "rolling_window_nn_distance": "smftools.tools.rolling_nn_distance",
21
+ "annotate_zero_hamming_segments": "smftools.tools.rolling_nn_distance",
22
+ "assign_per_read_segments_layer": "smftools.tools.rolling_nn_distance",
23
+ "select_top_segments_per_read": "smftools.tools.rolling_nn_distance",
18
24
  "subset_adata": "smftools.tools.subset_adata",
19
25
  }
20
26