sampledisco 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. sampledisco/__init__.py +27 -0
  2. sampledisco/cli.py +70 -0
  3. sampledisco/gene_activity/ATAC_ArchR.py +745 -0
  4. sampledisco/gene_activity/ATAC_RNA_harmony.py +1107 -0
  5. sampledisco/gene_activity/ATAC_gene_activity.py +547 -0
  6. sampledisco/gene_activity/ATAC_peak_annotation.py +441 -0
  7. sampledisco/gene_activity/RNA_name_convertor.py +330 -0
  8. sampledisco/gene_activity/__init__.py +0 -0
  9. sampledisco/gene_activity/pseudo_correlation.py +1112 -0
  10. sampledisco/gene_activity/test_pseudo_correlation.py +694 -0
  11. sampledisco/gene_activity/validation.py +913 -0
  12. sampledisco/parameter_selection/__init__.py +0 -0
  13. sampledisco/parameter_selection/autotune.py +877 -0
  14. sampledisco/preparation/ATAC_cell_type.py +493 -0
  15. sampledisco/preparation/ATAC_cell_type_gpu.py +425 -0
  16. sampledisco/preparation/__init__.py +0 -0
  17. sampledisco/preparation/atac_preprocess_cpu.py +316 -0
  18. sampledisco/preparation/atac_preprocess_gpu.py +306 -0
  19. sampledisco/preparation/cell_type_cpu.py +189 -0
  20. sampledisco/preparation/cell_type_gpu.py +202 -0
  21. sampledisco/preparation/multi_omics_batch_correction.py +111 -0
  22. sampledisco/preparation/multi_omics_cell_type_cpu.py +447 -0
  23. sampledisco/preparation/multi_omics_cell_type_gpu.py +482 -0
  24. sampledisco/preparation/multi_omics_glue.py +1045 -0
  25. sampledisco/preparation/multi_omics_merge.py +404 -0
  26. sampledisco/preparation/rna_preprocess_cpu.py +303 -0
  27. sampledisco/preparation/rna_preprocess_gpu.py +292 -0
  28. sampledisco/sample_association/__init__.py +0 -0
  29. sampledisco/sample_association/association.py +649 -0
  30. sampledisco/sample_clustering/HRA_VEC.py +15 -0
  31. sampledisco/sample_clustering/HRC_VEC.py +15 -0
  32. sampledisco/sample_clustering/NN.py +108 -0
  33. sampledisco/sample_clustering/RAISIN.py +847 -0
  34. sampledisco/sample_clustering/RAISIN_TEST.py +787 -0
  35. sampledisco/sample_clustering/UPGMA.py +84 -0
  36. sampledisco/sample_clustering/__init__.py +0 -0
  37. sampledisco/sample_clustering/cluster.py +117 -0
  38. sampledisco/sample_clustering/cluster_helper.py +136 -0
  39. sampledisco/sample_clustering/cluster_severity_reconcile.py +130 -0
  40. sampledisco/sample_clustering/consensus.py +222 -0
  41. sampledisco/sample_clustering/proportion_test.py +624 -0
  42. sampledisco/sample_clustering/tree_cut.py +90 -0
  43. sampledisco/sample_distance/ChiSquare.py +143 -0
  44. sampledisco/sample_distance/__init__.py +0 -0
  45. sampledisco/sample_distance/distance_test.py +147 -0
  46. sampledisco/sample_distance/jensenshannon.py +142 -0
  47. sampledisco/sample_distance/sample_distance.py +609 -0
  48. sampledisco/sample_embedding/__init__.py +73 -0
  49. sampledisco/sample_embedding/blocks.py +528 -0
  50. sampledisco/sample_embedding/sample_embedding.py +346 -0
  51. sampledisco/sample_embedding/sample_embedding_gpu.py +359 -0
  52. sampledisco/sample_trajectory/CCA.py +482 -0
  53. sampledisco/sample_trajectory/CCA_test.py +611 -0
  54. sampledisco/sample_trajectory/TSCAN.py +998 -0
  55. sampledisco/sample_trajectory/__init__.py +0 -0
  56. sampledisco/sample_trajectory/trajectory_DGE_visualization.py +1600 -0
  57. sampledisco/sample_trajectory/trajectory_diff_gene.py +1184 -0
  58. sampledisco/utils/Grouping.py +112 -0
  59. sampledisco/utils/__init__.py +0 -0
  60. sampledisco/utils/batch_regress.py +49 -0
  61. sampledisco/utils/imbalance_cell_type_handler.py +79 -0
  62. sampledisco/utils/inspector.py +391 -0
  63. sampledisco/utils/limma.py +54 -0
  64. sampledisco/utils/merge_sample_meta.py +102 -0
  65. sampledisco/utils/random_seed.py +36 -0
  66. sampledisco/utils/safe_save.py +132 -0
  67. sampledisco/utils/slim_adata.py +24 -0
  68. sampledisco/utils/subsample.py +117 -0
  69. sampledisco/utils/subsample_generator.py +418 -0
  70. sampledisco/utils/tf_idf.py +299 -0
  71. sampledisco/utils/unify_optimal.py +339 -0
  72. sampledisco/visualization/ATAC_visualization.py +359 -0
  73. sampledisco/visualization/DEG_visualization.py +359 -0
  74. sampledisco/visualization/__init__.py +0 -0
  75. sampledisco/visualization/multi_omics_visualization.py +1266 -0
  76. sampledisco/visualization/visualization_embedding.py +603 -0
  77. sampledisco/visualization/visualization_helper.py +439 -0
  78. sampledisco/visualization/visualization_other.py +121 -0
  79. sampledisco/wrapper/__init__.py +0 -0
  80. sampledisco/wrapper/atac_wrapper.py +234 -0
  81. sampledisco/wrapper/multiomics_wrapper.py +483 -0
  82. sampledisco/wrapper/rna_wrapper.py +233 -0
  83. sampledisco/wrapper/wrapper.py +1646 -0
  84. sampledisco-0.1.0.dist-info/METADATA +254 -0
  85. sampledisco-0.1.0.dist-info/RECORD +89 -0
  86. sampledisco-0.1.0.dist-info/WHEEL +5 -0
  87. sampledisco-0.1.0.dist-info/entry_points.txt +2 -0
  88. sampledisco-0.1.0.dist-info/licenses/LICENSE +21 -0
  89. sampledisco-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,27 @@
1
+ """SampleDisco — cross-omics, cross-condition sample embedding for single-cell data.
2
+
3
+ Public API (imported lazily so ``import sampledisco`` stays light and does not
4
+ pull scanpy / torch / scGLUE until you actually call into the pipeline):
5
+
6
+ import sampledisco
7
+ sampledisco.wrapper(...) # full pipeline (RNA / ATAC / multi-omics)
8
+ sampledisco.compute_sample_embedding(...) # the core method only
9
+
10
+ The CLI entry point is ``sampledisco --config <yaml>`` (see ``sampledisco.cli``).
11
+ """
12
+ from __future__ import annotations
13
+
14
+ __version__ = "0.1.0"
15
+
16
+ __all__ = ["wrapper", "compute_sample_embedding"]
17
+
18
+
19
+ def __getattr__(name: str):
20
+ # PEP 562 lazy attribute access — defer heavy imports to first use.
21
+ if name == "wrapper":
22
+ from sampledisco.wrapper.wrapper import wrapper
23
+ return wrapper
24
+ if name == "compute_sample_embedding":
25
+ from sampledisco.sample_embedding import compute_sample_embedding
26
+ return compute_sample_embedding
27
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
sampledisco/cli.py ADDED
@@ -0,0 +1,70 @@
1
+ import argparse
2
+ import sys
3
+ import traceback
4
+ import yaml
5
+ import os
6
+ import inspect
7
+ from sampledisco.wrapper.wrapper import wrapper
8
+
9
+ def parse_args():
10
+ parser = argparse.ArgumentParser(description="Run the data processing wrapper.")
11
+
12
+ parser.add_argument("-m", "--mode", type=str, required=True, choices=["simple", "complex"],
13
+ help="Run mode. Choose 'simple' or 'complex'.")
14
+
15
+ # Simple mode args
16
+ parser.add_argument("-c", "--count_data", type=str, help="Path to count data file")
17
+ parser.add_argument("-s", "--sample_meta_data", type=str, help="(Optional) Path to sample metadata file")
18
+ parser.add_argument("-o", "--output_directory", type=str, help="Path to output directory")
19
+
20
+ # Complex mode args
21
+ parser.add_argument("--config", type=str, help="Path to YAML config file")
22
+
23
+ return parser.parse_args()
24
+
25
+ def load_config(config_path):
26
+ if not os.path.exists(config_path):
27
+ print(f"Error: Config file '{config_path}' does not exist.", file=sys.stderr)
28
+ sys.exit(1)
29
+ with open(config_path, 'r') as f:
30
+ return yaml.safe_load(f)
31
+
32
+ def validate_config(config, func):
33
+ valid_params = inspect.signature(func).parameters
34
+ for key in config:
35
+ if key not in valid_params:
36
+ raise ValueError(f"Unexpected parameter in config: '{key}'")
37
+ for key in valid_params:
38
+ if key not in config:
39
+ raise ValueError(f"Missing required parameter in config: '{key}'")
40
+
41
+ def main():
42
+ args = parse_args()
43
+
44
+ if args.mode == "simple":
45
+ if not args.count_data or not args.output_directory:
46
+ print("Error: In 'simple' mode, -c and -o must be provided.", file=sys.stderr)
47
+ sys.exit(1)
48
+
49
+ if args.sample_meta_data:
50
+ wrapper(args.count_data, args.sample_meta_data, args.output_directory)
51
+ else:
52
+ wrapper(args.count_data, output_directory=args.output_directory)
53
+
54
+ elif args.mode == "complex":
55
+ if not args.config:
56
+ print("Error: In 'complex' mode, --config must be provided.", file=sys.stderr)
57
+ sys.exit(1)
58
+
59
+ config = load_config(args.config)
60
+
61
+ try:
62
+ validate_config(config, wrapper)
63
+ wrapper(**config)
64
+ except Exception as e:
65
+ print(f"Error: {e}", file=sys.stderr)
66
+ print(traceback.format_exc(), file=sys.stderr)
67
+ sys.exit(1)
68
+
69
+ if __name__ == "__main__":
70
+ main()