sampledisco 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sampledisco/__init__.py +27 -0
- sampledisco/cli.py +70 -0
- sampledisco/gene_activity/ATAC_ArchR.py +745 -0
- sampledisco/gene_activity/ATAC_RNA_harmony.py +1107 -0
- sampledisco/gene_activity/ATAC_gene_activity.py +547 -0
- sampledisco/gene_activity/ATAC_peak_annotation.py +441 -0
- sampledisco/gene_activity/RNA_name_convertor.py +330 -0
- sampledisco/gene_activity/__init__.py +0 -0
- sampledisco/gene_activity/pseudo_correlation.py +1112 -0
- sampledisco/gene_activity/test_pseudo_correlation.py +694 -0
- sampledisco/gene_activity/validation.py +913 -0
- sampledisco/parameter_selection/__init__.py +0 -0
- sampledisco/parameter_selection/autotune.py +877 -0
- sampledisco/preparation/ATAC_cell_type.py +493 -0
- sampledisco/preparation/ATAC_cell_type_gpu.py +425 -0
- sampledisco/preparation/__init__.py +0 -0
- sampledisco/preparation/atac_preprocess_cpu.py +316 -0
- sampledisco/preparation/atac_preprocess_gpu.py +306 -0
- sampledisco/preparation/cell_type_cpu.py +189 -0
- sampledisco/preparation/cell_type_gpu.py +202 -0
- sampledisco/preparation/multi_omics_batch_correction.py +111 -0
- sampledisco/preparation/multi_omics_cell_type_cpu.py +447 -0
- sampledisco/preparation/multi_omics_cell_type_gpu.py +482 -0
- sampledisco/preparation/multi_omics_glue.py +1045 -0
- sampledisco/preparation/multi_omics_merge.py +404 -0
- sampledisco/preparation/rna_preprocess_cpu.py +303 -0
- sampledisco/preparation/rna_preprocess_gpu.py +292 -0
- sampledisco/sample_association/__init__.py +0 -0
- sampledisco/sample_association/association.py +649 -0
- sampledisco/sample_clustering/HRA_VEC.py +15 -0
- sampledisco/sample_clustering/HRC_VEC.py +15 -0
- sampledisco/sample_clustering/NN.py +108 -0
- sampledisco/sample_clustering/RAISIN.py +847 -0
- sampledisco/sample_clustering/RAISIN_TEST.py +787 -0
- sampledisco/sample_clustering/UPGMA.py +84 -0
- sampledisco/sample_clustering/__init__.py +0 -0
- sampledisco/sample_clustering/cluster.py +117 -0
- sampledisco/sample_clustering/cluster_helper.py +136 -0
- sampledisco/sample_clustering/cluster_severity_reconcile.py +130 -0
- sampledisco/sample_clustering/consensus.py +222 -0
- sampledisco/sample_clustering/proportion_test.py +624 -0
- sampledisco/sample_clustering/tree_cut.py +90 -0
- sampledisco/sample_distance/ChiSquare.py +143 -0
- sampledisco/sample_distance/__init__.py +0 -0
- sampledisco/sample_distance/distance_test.py +147 -0
- sampledisco/sample_distance/jensenshannon.py +142 -0
- sampledisco/sample_distance/sample_distance.py +609 -0
- sampledisco/sample_embedding/__init__.py +73 -0
- sampledisco/sample_embedding/blocks.py +528 -0
- sampledisco/sample_embedding/sample_embedding.py +346 -0
- sampledisco/sample_embedding/sample_embedding_gpu.py +359 -0
- sampledisco/sample_trajectory/CCA.py +482 -0
- sampledisco/sample_trajectory/CCA_test.py +611 -0
- sampledisco/sample_trajectory/TSCAN.py +998 -0
- sampledisco/sample_trajectory/__init__.py +0 -0
- sampledisco/sample_trajectory/trajectory_DGE_visualization.py +1600 -0
- sampledisco/sample_trajectory/trajectory_diff_gene.py +1184 -0
- sampledisco/utils/Grouping.py +112 -0
- sampledisco/utils/__init__.py +0 -0
- sampledisco/utils/batch_regress.py +49 -0
- sampledisco/utils/imbalance_cell_type_handler.py +79 -0
- sampledisco/utils/inspector.py +391 -0
- sampledisco/utils/limma.py +54 -0
- sampledisco/utils/merge_sample_meta.py +102 -0
- sampledisco/utils/random_seed.py +36 -0
- sampledisco/utils/safe_save.py +132 -0
- sampledisco/utils/slim_adata.py +24 -0
- sampledisco/utils/subsample.py +117 -0
- sampledisco/utils/subsample_generator.py +418 -0
- sampledisco/utils/tf_idf.py +299 -0
- sampledisco/utils/unify_optimal.py +339 -0
- sampledisco/visualization/ATAC_visualization.py +359 -0
- sampledisco/visualization/DEG_visualization.py +359 -0
- sampledisco/visualization/__init__.py +0 -0
- sampledisco/visualization/multi_omics_visualization.py +1266 -0
- sampledisco/visualization/visualization_embedding.py +603 -0
- sampledisco/visualization/visualization_helper.py +439 -0
- sampledisco/visualization/visualization_other.py +121 -0
- sampledisco/wrapper/__init__.py +0 -0
- sampledisco/wrapper/atac_wrapper.py +234 -0
- sampledisco/wrapper/multiomics_wrapper.py +483 -0
- sampledisco/wrapper/rna_wrapper.py +233 -0
- sampledisco/wrapper/wrapper.py +1646 -0
- sampledisco-0.1.0.dist-info/METADATA +254 -0
- sampledisco-0.1.0.dist-info/RECORD +89 -0
- sampledisco-0.1.0.dist-info/WHEEL +5 -0
- sampledisco-0.1.0.dist-info/entry_points.txt +2 -0
- sampledisco-0.1.0.dist-info/licenses/LICENSE +21 -0
- sampledisco-0.1.0.dist-info/top_level.txt +1 -0
sampledisco/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""SampleDisco — cross-omics, cross-condition sample embedding for single-cell data.
|
|
2
|
+
|
|
3
|
+
Public API (imported lazily so ``import sampledisco`` stays light and does not
|
|
4
|
+
pull scanpy / torch / scGLUE until you actually call into the pipeline):
|
|
5
|
+
|
|
6
|
+
import sampledisco
|
|
7
|
+
sampledisco.wrapper(...) # full pipeline (RNA / ATAC / multi-omics)
|
|
8
|
+
sampledisco.compute_sample_embedding(...) # the core method only
|
|
9
|
+
|
|
10
|
+
The CLI entry point is ``sampledisco --config <yaml>`` (see ``sampledisco.cli``).
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
__version__ = "0.1.0"
|
|
15
|
+
|
|
16
|
+
__all__ = ["wrapper", "compute_sample_embedding"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __getattr__(name: str):
|
|
20
|
+
# PEP 562 lazy attribute access — defer heavy imports to first use.
|
|
21
|
+
if name == "wrapper":
|
|
22
|
+
from sampledisco.wrapper.wrapper import wrapper
|
|
23
|
+
return wrapper
|
|
24
|
+
if name == "compute_sample_embedding":
|
|
25
|
+
from sampledisco.sample_embedding import compute_sample_embedding
|
|
26
|
+
return compute_sample_embedding
|
|
27
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
sampledisco/cli.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
import yaml
|
|
5
|
+
import os
|
|
6
|
+
import inspect
|
|
7
|
+
from sampledisco.wrapper.wrapper import wrapper
|
|
8
|
+
|
|
9
|
+
def parse_args():
|
|
10
|
+
parser = argparse.ArgumentParser(description="Run the data processing wrapper.")
|
|
11
|
+
|
|
12
|
+
parser.add_argument("-m", "--mode", type=str, required=True, choices=["simple", "complex"],
|
|
13
|
+
help="Run mode. Choose 'simple' or 'complex'.")
|
|
14
|
+
|
|
15
|
+
# Simple mode args
|
|
16
|
+
parser.add_argument("-c", "--count_data", type=str, help="Path to count data file")
|
|
17
|
+
parser.add_argument("-s", "--sample_meta_data", type=str, help="(Optional) Path to sample metadata file")
|
|
18
|
+
parser.add_argument("-o", "--output_directory", type=str, help="Path to output directory")
|
|
19
|
+
|
|
20
|
+
# Complex mode args
|
|
21
|
+
parser.add_argument("--config", type=str, help="Path to YAML config file")
|
|
22
|
+
|
|
23
|
+
return parser.parse_args()
|
|
24
|
+
|
|
25
|
+
def load_config(config_path):
|
|
26
|
+
if not os.path.exists(config_path):
|
|
27
|
+
print(f"Error: Config file '{config_path}' does not exist.", file=sys.stderr)
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
with open(config_path, 'r') as f:
|
|
30
|
+
return yaml.safe_load(f)
|
|
31
|
+
|
|
32
|
+
def validate_config(config, func):
|
|
33
|
+
valid_params = inspect.signature(func).parameters
|
|
34
|
+
for key in config:
|
|
35
|
+
if key not in valid_params:
|
|
36
|
+
raise ValueError(f"Unexpected parameter in config: '{key}'")
|
|
37
|
+
for key in valid_params:
|
|
38
|
+
if key not in config:
|
|
39
|
+
raise ValueError(f"Missing required parameter in config: '{key}'")
|
|
40
|
+
|
|
41
|
+
def main():
|
|
42
|
+
args = parse_args()
|
|
43
|
+
|
|
44
|
+
if args.mode == "simple":
|
|
45
|
+
if not args.count_data or not args.output_directory:
|
|
46
|
+
print("Error: In 'simple' mode, -c and -o must be provided.", file=sys.stderr)
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
|
|
49
|
+
if args.sample_meta_data:
|
|
50
|
+
wrapper(args.count_data, args.sample_meta_data, args.output_directory)
|
|
51
|
+
else:
|
|
52
|
+
wrapper(args.count_data, output_directory=args.output_directory)
|
|
53
|
+
|
|
54
|
+
elif args.mode == "complex":
|
|
55
|
+
if not args.config:
|
|
56
|
+
print("Error: In 'complex' mode, --config must be provided.", file=sys.stderr)
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
config = load_config(args.config)
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
validate_config(config, wrapper)
|
|
63
|
+
wrapper(**config)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
66
|
+
print(traceback.format_exc(), file=sys.stderr)
|
|
67
|
+
sys.exit(1)
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
main()
|