ONTraC 0.0.4b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. ontrac-0.0.4b4/.gitignore +45 -0
  2. ontrac-0.0.4b4/CHANGELOG.md +49 -0
  3. ontrac-0.0.4b4/LICENSE +21 -0
  4. ontrac-0.0.4b4/ONTraC/__init__.py +0 -0
  5. ontrac-0.0.4b4/ONTraC/__pycache__/__init__.cpython-311.pyc +0 -0
  6. ontrac-0.0.4b4/ONTraC/__pycache__/__init__.cpython-312.pyc +0 -0
  7. ontrac-0.0.4b4/ONTraC/bin/GP.py +92 -0
  8. ontrac-0.0.4b4/ONTraC/bin/NTScore.py +46 -0
  9. ontrac-0.0.4b4/ONTraC/bin/ONTraC.py +109 -0
  10. ontrac-0.0.4b4/ONTraC/bin/__init__.py +0 -0
  11. ontrac-0.0.4b4/ONTraC/bin/createDataSet.py +40 -0
  12. ontrac-0.0.4b4/ONTraC/data.py +102 -0
  13. ontrac-0.0.4b4/ONTraC/log.py +41 -0
  14. ontrac-0.0.4b4/ONTraC/model/__init__.py +1 -0
  15. ontrac-0.0.4b4/ONTraC/model/_model.py +152 -0
  16. ontrac-0.0.4b4/ONTraC/model/dmon_exp_pool.py +168 -0
  17. ontrac-0.0.4b4/ONTraC/model/norm_dense_gcn_conv.py +89 -0
  18. ontrac-0.0.4b4/ONTraC/optparser/_GP.py +63 -0
  19. ontrac-0.0.4b4/ONTraC/optparser/_IO.py +104 -0
  20. ontrac-0.0.4b4/ONTraC/optparser/_NT.py +49 -0
  21. ontrac-0.0.4b4/ONTraC/optparser/_ONTraC.py +81 -0
  22. ontrac-0.0.4b4/ONTraC/optparser/__init__.py +4 -0
  23. ontrac-0.0.4b4/ONTraC/optparser/_create_dataset.py +88 -0
  24. ontrac-0.0.4b4/ONTraC/optparser/_train.py +235 -0
  25. ontrac-0.0.4b4/ONTraC/run/processes.py +212 -0
  26. ontrac-0.0.4b4/ONTraC/train/__init__.py +1 -0
  27. ontrac-0.0.4b4/ONTraC/train/_batch_train.py +254 -0
  28. ontrac-0.0.4b4/ONTraC/train/inspect_funcs.py +180 -0
  29. ontrac-0.0.4b4/ONTraC/train/loss_funs.py +178 -0
  30. ontrac-0.0.4b4/ONTraC/utils/NTScore.py +120 -0
  31. ontrac-0.0.4b4/ONTraC/utils/__init__.py +1 -0
  32. ontrac-0.0.4b4/ONTraC/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  33. ontrac-0.0.4b4/ONTraC/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  34. ontrac-0.0.4b4/ONTraC/utils/__pycache__/_utils.cpython-311.pyc +0 -0
  35. ontrac-0.0.4b4/ONTraC/utils/__pycache__/_utils.cpython-312.pyc +0 -0
  36. ontrac-0.0.4b4/ONTraC/utils/_utils.py +85 -0
  37. ontrac-0.0.4b4/ONTraC/utils/decorators.py +90 -0
  38. ontrac-0.0.4b4/ONTraC/utils/niche_net_constr.py +176 -0
  39. ontrac-0.0.4b4/ONTraC/version.py +1 -0
  40. ontrac-0.0.4b4/ONTraC.egg-info/PKG-INFO +166 -0
  41. ontrac-0.0.4b4/ONTraC.egg-info/SOURCES.txt +127 -0
  42. ontrac-0.0.4b4/ONTraC.egg-info/dependency_links.txt +1 -0
  43. ontrac-0.0.4b4/ONTraC.egg-info/entry_points.txt +5 -0
  44. ontrac-0.0.4b4/ONTraC.egg-info/requires.txt +8 -0
  45. ontrac-0.0.4b4/ONTraC.egg-info/top_level.txt +1 -0
  46. ontrac-0.0.4b4/PKG-INFO +166 -0
  47. ontrac-0.0.4b4/README.md +138 -0
  48. ontrac-0.0.4b4/docs/source/_static/images/ONTraC_structure.png +0 -0
  49. ontrac-0.0.4b4/docs/source/_static/images/examples/simulation/simulation_dataset_introduction.png +0 -0
  50. ontrac-0.0.4b4/docs/source/_static/images/tutorials/niche_cluster/Spatial_niche_cluster.png +0 -0
  51. ontrac-0.0.4b4/docs/source/_static/images/tutorials/niche_cluster/spatial_cluster_prob_E14.png +0 -0
  52. ontrac-0.0.4b4/docs/source/_static/images/tutorials/post_analysis/Spatial_cell_type.png +0 -0
  53. ontrac-0.0.4b4/docs/source/_static/images/tutorials/post_analysis/cell_level_NT_score.png +0 -0
  54. ontrac-0.0.4b4/docs/source/_static/images/tutorials/post_analysis/cell_level_NT_score_distribution_for_each_cell_type.png +0 -0
  55. ontrac-0.0.4b4/docs/source/_static/images/tutorials/post_analysis/cell_type_compostion.png +0 -0
  56. ontrac-0.0.4b4/docs/source/_static/images/tutorials/post_analysis/niche_level_NT_score.png +0 -0
  57. ontrac-0.0.4b4/examples/simulation/README.md +18 -0
  58. ontrac-0.0.4b4/examples/simulation/original_data.csv +1001 -0
  59. ontrac-0.0.4b4/examples/simulation/pseudotime_output.csv +1001 -0
  60. ontrac-0.0.4b4/examples/simulation/simulation_dataset_sce.rds +0 -0
  61. ontrac-0.0.4b4/examples/stereo_seq_brain/Stereo_seq_example.md +55 -0
  62. ontrac-0.0.4b4/examples/stereo_seq_brain/original_data.csv +26622 -0
  63. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final.log +1038 -0
  64. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E12_E1S3_out.csv.gz +0 -0
  65. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E12_E1S3_out_adj.csv.gz +0 -0
  66. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E12_E1S3_s.csv.gz +0 -0
  67. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E12_E1S3_z.csv.gz +0 -0
  68. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E14_E1S3_out.csv.gz +0 -0
  69. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E14_E1S3_out_adj.csv.gz +0 -0
  70. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E14_E1S3_s.csv.gz +0 -0
  71. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E14_E1S3_z.csv.gz +0 -0
  72. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E1S3_out.csv.gz +0 -0
  73. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E1S3_out_adj.csv.gz +0 -0
  74. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E1S3_s.csv.gz +0 -0
  75. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E1S3_z.csv.gz +0 -0
  76. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S6_out.csv.gz +0 -0
  77. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S6_out_adj.csv.gz +0 -0
  78. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S6_s.csv.gz +0 -0
  79. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S6_z.csv.gz +0 -0
  80. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S7_out.csv.gz +0 -0
  81. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S7_out_adj.csv.gz +0 -0
  82. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S7_s.csv.gz +0 -0
  83. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/E16_E2S7_z.csv.gz +0 -0
  84. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/consolidate_out.csv.gz +0 -0
  85. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/consolidate_out_adj.csv.gz +0 -0
  86. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/consolidate_s.csv.gz +0 -0
  87. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/epoch_0.pt +0 -0
  88. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_GNN/model_state_dict.pt +0 -0
  89. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/E12_E1S3_NTScore.csv.gz +0 -0
  90. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/E14_E1S3_NTScore.csv.gz +0 -0
  91. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/E16_E1S3_NTScore.csv.gz +0 -0
  92. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/E16_E2S6_NTScore.csv.gz +0 -0
  93. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/E16_E2S7_NTScore.csv.gz +0 -0
  94. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/NTScore.csv.gz +0 -0
  95. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/cell_NTScore.csv.gz +0 -0
  96. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/niche_NTScore.csv.gz +0 -0
  97. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_NTScore/niche_cluster_score.csv.gz +0 -0
  98. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E12_E1S3_CellTypeComposition.csv.gz +0 -0
  99. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E12_E1S3_Coordinates.csv +3674 -0
  100. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E12_E1S3_EdgeIndex.csv.gz +0 -0
  101. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E12_E1S3_NeighborIndicesMatrix.csv.gz +0 -0
  102. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E12_E1S3_NicheWeightMatrix.npz +0 -0
  103. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E14_E1S3_CellTypeComposition.csv.gz +0 -0
  104. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E14_E1S3_Coordinates.csv +3655 -0
  105. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E14_E1S3_EdgeIndex.csv.gz +0 -0
  106. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E14_E1S3_NeighborIndicesMatrix.csv.gz +0 -0
  107. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E14_E1S3_NicheWeightMatrix.npz +0 -0
  108. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E1S3_CellTypeComposition.csv.gz +0 -0
  109. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E1S3_Coordinates.csv +6626 -0
  110. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E1S3_EdgeIndex.csv.gz +0 -0
  111. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E1S3_NeighborIndicesMatrix.csv.gz +0 -0
  112. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E1S3_NicheWeightMatrix.npz +0 -0
  113. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S6_CellTypeComposition.csv.gz +0 -0
  114. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S6_Coordinates.csv +5458 -0
  115. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S6_EdgeIndex.csv.gz +0 -0
  116. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S6_NeighborIndicesMatrix.csv.gz +0 -0
  117. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S6_NicheWeightMatrix.npz +0 -0
  118. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S7_CellTypeComposition.csv.gz +0 -0
  119. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S7_Coordinates.csv +7213 -0
  120. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S7_EdgeIndex.csv.gz +0 -0
  121. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S7_NeighborIndicesMatrix.csv.gz +0 -0
  122. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/E16_E2S7_NicheWeightMatrix.npz +0 -0
  123. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/cell_type_code.csv +11 -0
  124. ontrac-0.0.4b4/examples/stereo_seq_brain/stereo_seq_final_preprocessing_dir/samples.yaml +31 -0
  125. ontrac-0.0.4b4/pyproject.toml +50 -0
  126. ontrac-0.0.4b4/setup.cfg +4 -0
  127. ontrac-0.0.4b4/tutorials/IO_files.md +147 -0
  128. ontrac-0.0.4b4/tutorials/niche_cluster.md +181 -0
  129. ontrac-0.0.4b4/tutorials/post_analysis.md +188 -0
@@ -0,0 +1,45 @@
1
+ # MacOSX temp
2
+ .DS_Store
3
+
4
+ # Distribution / packaging
5
+ .Python
6
+ build/
7
+ develop-eggs/
8
+ bld-dir/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ parts/
14
+ sdist/
15
+ var/
16
+ wheels/
17
+ *.egg-info/
18
+ .installed.cfg
19
+ *.egg
20
+ MANIFEST
21
+
22
+ # Jupyter Notebook
23
+ .ipynb_checkpoints
24
+
25
+ # pyenv
26
+ .python-version
27
+
28
+ # Environments
29
+ .env
30
+ .venv
31
+ env/
32
+ venv/
33
+ ENV/
34
+ env.bak/
35
+ venv.bak/
36
+
37
+ # mypy
38
+ .mypy_cache/
39
+ *.ipynb
40
+
41
+ # cache
42
+ __pycache__/
43
+
44
+ # debug
45
+ debug/
@@ -0,0 +1,49 @@
1
+ # Change log
2
+
3
+ ## [0.0.4] - 2024-Apr-16
4
+
5
+ Added:
6
+
7
+ - Added `simulation` data and tutorial
8
+ - Added `niche cluster` information output
9
+ - Added `niche cluster` tutorial
10
+ - Added duplicate `Cell_ID` handle
11
+
12
+ Changed:
13
+
14
+ - Make this repository public
15
+
16
+ Fixed:
17
+
18
+ - Fixed errors when there is only 1 sample for `post-analysis` tutorial
19
+
20
+ ## [0.0.3] - 2024-Apr-2
21
+
22
+ Added
23
+
24
+ - Added `post-analysis` tutorial
25
+
26
+ Changed:
27
+
28
+ - Updated dependent packages information
29
+ - Updated installation tutorial
30
+ - Updated `stereo-seq` example
31
+
32
+ ## [0.0.2] - 2024-Mar-12
33
+
34
+ Added
35
+
36
+ - Added package description
37
+ - Added `.gitignore` to remove unnecessary files
38
+ - Added `ONTraC` for run all steps together
39
+ - Added `NTScore` for generate NTScore from GNN output
40
+ - Added pip installation support
41
+
42
+ Changed:
43
+
44
+ - New environment constructions
45
+ - Running process
46
+ - Uniform parameters control
47
+ - Output directories
48
+ - `createDataSet` generate cell type compostion and GNN input from raw input
49
+ - Input data format
ontrac-0.0.4b4/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Wen Wang, Shiwei Zheng, Sujung Crystal Shin, Guo-Cheng Yuan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
File without changes
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python
2
+
3
+ import random
4
+ import sys
5
+
6
+ import numpy as np
7
+ import torch
8
+
9
+ from ONTraC.model import GraphPooling
10
+ from ONTraC.optparser import opt_GP_validate, prepare_GP_optparser
11
+ from ONTraC.run.processes import *
12
+ from ONTraC.train import GPBatchTrain, SubBatchTrainProtocol
13
+ from ONTraC.train.inspect_funcs import loss_record
14
+ from ONTraC.utils import device_validate
15
+
16
+ # ------------------------------------
17
+ # Classes
18
+ # ------------------------------------
19
+
20
+
21
+ # ------------------------------------
22
+ # Functions
23
+ # ------------------------------------
24
+ def get_inspect_funcs() -> Optional[list[Callable]]:
25
+ """
26
+ Inspect function list
27
+ :param output_dir: output dir
28
+ :param epoch_filter: epoch filter
29
+ :return: list of inspect functions
30
+ """
31
+ return [loss_record]
32
+
33
+
34
+ # ------------------------------------
35
+ # Main Function
36
+ # ------------------------------------
37
+ def main() -> None:
38
+ """
39
+ Main function
40
+ :return: None
41
+ """
42
+
43
+ # ----- prepare -----
44
+ # load parameters
45
+ options = load_parameters(opt_validate_func=opt_GP_validate, prepare_optparser_func=prepare_GP_optparser)
46
+ # device
47
+ device: torch.device = device_validate(device_name=options.device)
48
+ # load data
49
+ dataset, sample_loader = load_data(options=options)
50
+ # random seed
51
+ n_seed = t_seed = r_seed = options.seed
52
+ random.seed(a=r_seed)
53
+ torch.manual_seed(seed=t_seed)
54
+ np.random.seed(seed=n_seed)
55
+
56
+ # ----- train -----
57
+ inspect_funcs_list = get_inspect_funcs()
58
+ batch_train: SubBatchTrainProtocol = train(nn_model=GraphPooling,
59
+ options=options,
60
+ BatchTrain=GPBatchTrain,
61
+ device=device,
62
+ dataset=dataset,
63
+ sample_loader=sample_loader,
64
+ inspect_funcs=inspect_funcs_list,
65
+ model_name='GraphPooling')
66
+
67
+ # --- evaluate ---
68
+ evaluate(batch_train=batch_train, model_name='GraphPooling')
69
+
70
+ # ----- predict -----
71
+ consolidate_s_array, consolidate_out_adj_array = predict(output_dir=options.GNN_dir,
72
+ batch_train=batch_train,
73
+ dataset=dataset,
74
+ model_name='GraphPooling')
75
+
76
+ # ----- Pseudotime -----
77
+ if consolidate_s_array is not None and consolidate_out_adj_array is not None:
78
+ NTScore(options=options,
79
+ dataset=dataset,
80
+ consolidate_s_array=consolidate_s_array,
81
+ consolidate_out_adj_array=consolidate_out_adj_array)
82
+
83
+
84
+ # ------------------------------------
85
+ # Program running
86
+ # ------------------------------------
87
+ if __name__ == '__main__':
88
+ try:
89
+ main()
90
+ except KeyboardInterrupt:
91
+ sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
92
+ sys.exit(0)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+
5
+ import numpy as np
6
+
7
+ from ONTraC.data import load_dataset
8
+ from ONTraC.optparser import opt_NT_validate, prepare_NT_optparser
9
+ from ONTraC.run.processes import *
10
+
11
+
12
+ # ------------------------------------
13
+ # Main Function
14
+ # ------------------------------------
15
+ def main() -> None:
16
+ """
17
+ Main function
18
+ :return: None
19
+ """
20
+
21
+ # ----- prepare -----
22
+ # --- load parameters ---
23
+ options = load_parameters(opt_validate_func=opt_NT_validate, prepare_optparser_func=prepare_NT_optparser)
24
+ # --- load data ---
25
+ dataset, _ = load_dataset(options=options)
26
+ # load consolidated s_array and out_adj_array
27
+ consolidate_s_array = np.loadtxt(fname=f'{options.GNN_dir}/consolidate_s.csv.gz', delimiter=',')
28
+ consolidate_out_adj_array = np.loadtxt(fname=f'{options.GNN_dir}/consolidate_out_adj.csv.gz', delimiter=',')
29
+
30
+ # ----- Pseudotime -----
31
+ if consolidate_s_array is not None and consolidate_out_adj_array is not None:
32
+ NTScore(options=options,
33
+ dataset=dataset,
34
+ consolidate_s_array=consolidate_s_array,
35
+ consolidate_out_adj_array=consolidate_out_adj_array)
36
+
37
+
38
+ # ------------------------------------
39
+ # Program running
40
+ # ------------------------------------
41
+ if __name__ == '__main__':
42
+ try:
43
+ main()
44
+ except KeyboardInterrupt:
45
+ sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
46
+ sys.exit(0)
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env python
2
+
3
+ import random
4
+ import sys
5
+ from typing import Optional
6
+
7
+ import numpy as np
8
+ import torch
9
+
10
+ from ONTraC.log import *
11
+ from ONTraC.model import GraphPooling
12
+ from ONTraC.optparser import opt_ontrac_validate, prepare_ontrac_optparser
13
+ from ONTraC.run.processes import *
14
+ from ONTraC.train import GPBatchTrain, SubBatchTrainProtocol
15
+ from ONTraC.train.inspect_funcs import loss_record
16
+ from ONTraC.utils import device_validate
17
+ from ONTraC.utils.niche_net_constr import (construct_niche_network,
18
+ gen_samples_yaml,
19
+ load_original_data)
20
+
21
+
22
+ # ------------------------------------
23
+ # Functions
24
+ # ------------------------------------
25
+ def get_inspect_funcs() -> Optional[list[Callable]]:
26
+ """
27
+ Inspect function list
28
+ :param output_dir: output dir
29
+ :param epoch_filter: epoch filter
30
+ :return: list of inspect functions
31
+ """
32
+ return [loss_record]
33
+
34
+
35
+ # ------------------------------------
36
+ # Main Function
37
+ # ------------------------------------
38
+ def main() -> None:
39
+ """
40
+ main function
41
+ Input data files information should be stored in a YAML file.
42
+ """
43
+
44
+ # prepare options
45
+ options = load_parameters(opt_validate_func=opt_ontrac_validate, prepare_optparser_func=prepare_ontrac_optparser)
46
+
47
+ # ----- Niche Network Construct -----
48
+ # load original data
49
+ ori_data_df = load_original_data(options=options)
50
+
51
+ # define edges for each sample
52
+ construct_niche_network(options=options, ori_data_df=ori_data_df)
53
+
54
+ # save samples.yaml
55
+ gen_samples_yaml(options=options, ori_data_df=ori_data_df)
56
+
57
+ # ----- Graph Pooling -----
58
+ # device
59
+ device: torch.device = device_validate(device_name=options.device)
60
+ # load data
61
+ dataset, sample_loader = load_data(options=options)
62
+ # random seed
63
+ n_seed = t_seed = r_seed = options.seed
64
+ random.seed(a=r_seed)
65
+ torch.manual_seed(seed=t_seed)
66
+ np.random.seed(seed=n_seed)
67
+ # train
68
+ inspect_funcs_list = get_inspect_funcs()
69
+ batch_train: SubBatchTrainProtocol = train(nn_model=GraphPooling,
70
+ options=options,
71
+ BatchTrain=GPBatchTrain,
72
+ device=device,
73
+ dataset=dataset,
74
+ sample_loader=sample_loader,
75
+ inspect_funcs=inspect_funcs_list,
76
+ model_name='GraphPooling')
77
+ # evaluate
78
+ evaluate(batch_train=batch_train, model_name='GraphPooling')
79
+ # predict
80
+ consolidate_s_array, consolidate_out_adj_array = predict(output_dir=options.GNN_dir,
81
+ batch_train=batch_train,
82
+ dataset=dataset,
83
+ model_name='GraphPooling')
84
+ # niche cluster
85
+ if consolidate_s_array is not None:
86
+ graph_pooling_output(ori_data_df=ori_data_df,
87
+ dataset=dataset,
88
+ rel_params=get_rel_params(
89
+ options=options, params=read_yaml_file(f'{options.preprocessing_dir}/samples.yaml')),
90
+ consolidate_s_array=consolidate_s_array,
91
+ output_dir=options.GNN_dir)
92
+
93
+ # ----- NT score -----
94
+ if consolidate_s_array is not None and consolidate_out_adj_array is not None:
95
+ NTScore(options=options,
96
+ dataset=dataset,
97
+ consolidate_s_array=consolidate_s_array,
98
+ consolidate_out_adj_array=consolidate_out_adj_array)
99
+
100
+
101
+ # ------------------------------------
102
+ # Program running
103
+ # ------------------------------------
104
+ if __name__ == '__main__':
105
+ try:
106
+ main()
107
+ except KeyboardInterrupt:
108
+ sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
109
+ sys.exit(0)
File without changes
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+
5
+ from ONTraC.log import *
6
+ from ONTraC.optparser import (opt_create_ds_validate, prepare_create_ds_optparser)
7
+ from ONTraC.utils.niche_net_constr import load_original_data, construct_niche_network, gen_samples_yaml
8
+
9
+
10
+ # ------------------------------------
11
+ # Main Function
12
+ # ------------------------------------
13
+ def main() -> None:
14
+ """
15
+ main function
16
+ Input data files information should be stored in a YAML file.
17
+ """
18
+
19
+ # prepare options
20
+ options = opt_create_ds_validate(prepare_create_ds_optparser())
21
+
22
+ # load original data
23
+ ori_data_df = load_original_data(options=options, data_file=options.dataset)
24
+
25
+ # define edges for each sample
26
+ construct_niche_network(options=options, ori_data_df=ori_data_df)
27
+
28
+ # save samples.yaml
29
+ gen_samples_yaml(options=options, ori_data_df=ori_data_df)
30
+
31
+
32
+ # ------------------------------------
33
+ # Program running
34
+ # ------------------------------------
35
+ if __name__ == '__main__':
36
+ try:
37
+ main()
38
+ except KeyboardInterrupt:
39
+ sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
40
+ sys.exit(0)
@@ -0,0 +1,102 @@
1
+ from optparse import Values
2
+ from typing import Dict, List, Tuple
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import torch
7
+ import torch_geometric.transforms as T
8
+ from torch_geometric.data import Data, InMemoryDataset
9
+ from torch_geometric.loader import DenseDataLoader
10
+
11
+ from .log import *
12
+ from .utils import count_lines, device_validate, get_rel_params, read_yaml_file
13
+
14
+
15
+ # ------------------------------------
16
+ # Classes
17
+ # ------------------------------------
18
+ class SpatailOmicsDataset(InMemoryDataset):
19
+
20
+ def __init__(self, root, params: Dict, transform=None, pre_transform=None):
21
+ self.params = params
22
+ super(SpatailOmicsDataset, self).__init__(root, transform, pre_transform)
23
+ self.data, self.slices = torch.load(self.processed_paths[0])
24
+
25
+ @property
26
+ def raw_file_names(self):
27
+ # return list(
28
+ # flatten([[sample for name, sample in data.items() if name != 'Name'] for data in self.params['Data']]))
29
+ return []
30
+
31
+ @property
32
+ def processed_file_names(self):
33
+ return ['data.pt']
34
+
35
+ def download(self):
36
+ pass
37
+
38
+ def process(self):
39
+ data_list = []
40
+ for index, sample in enumerate(self.params['Data']):
41
+ info(f'Processing sample {index + 1} of {len(self.params["Data"])}')
42
+ data = Data(
43
+ x=torch.from_numpy(np.loadtxt(sample['Features'], dtype=np.float32, delimiter=',')),
44
+ edge_index=torch.from_numpy(np.loadtxt(sample['EdgeIndex'], dtype=np.int64,
45
+ delimiter=',')).t().contiguous(),
46
+ # TODO: support 3D coordinates
47
+ pos=torch.from_numpy(pd.read_csv(sample['Coordinates'])[['x', 'y']].values),
48
+ name=sample['Name'])
49
+ data_list.append(data)
50
+ data, slices = self.collate(data_list)
51
+ torch.save((data, slices), self.processed_paths[0])
52
+
53
+
54
+ # ------------------------------------
55
+ # Misc functions
56
+ # ------------------------------------
57
+ def max_nodes(samples: List[Dict[str, str]]) -> int:
58
+ """
59
+ Get the maximum number of nodes in a dataset
60
+ :param params: List[Dict[str, str], list of samples
61
+ :return: int, maximum number of nodes
62
+ """
63
+ max_nodes = 0
64
+ for sample in samples:
65
+ max_nodes = max(max_nodes, count_lines(sample['Coordinates']))
66
+ return max_nodes
67
+
68
+
69
+ def load_dataset(options: Values) -> Tuple[SpatailOmicsDataset, Data]:
70
+ device = device_validate()
71
+ params = read_yaml_file(f'{options.preprocessing_dir}/samples.yaml')
72
+ rel_params = get_rel_params(options, params)
73
+ dataset = create_torch_dataset(options, rel_params)
74
+ all_sample_loader = DenseDataLoader(dataset, batch_size=len(dataset))
75
+ data = next(iter(all_sample_loader)).to(device)
76
+ return dataset, data
77
+
78
+
79
+ # ------------------------------------
80
+ # Flow control functions
81
+ # ------------------------------------
82
+ def create_torch_dataset(options: Values, params: Dict) -> SpatailOmicsDataset:
83
+ """
84
+ Create torch dataset
85
+ :param params: Dict, input samples
86
+ :return: None
87
+ """
88
+
89
+ # ------------------------------------
90
+ # Step 1: Get the maximum number of nodes
91
+ m_nodes = max_nodes(params['Data'])
92
+ # upcelling m_nodes to the nearest 100
93
+ m_nodes = int(np.ceil(m_nodes / 100.0)) * 100
94
+ info(f'Maximum number of nodes: {m_nodes}')
95
+ # ------------------------------------
96
+
97
+ # ------------------------------------
98
+ # Step 2: Create torch dataset
99
+ dataset = SpatailOmicsDataset(root=options.preprocessing_dir, params=params,
100
+ transform=T.ToDense(m_nodes)) # transform edge_index to adj matrix
101
+ # dataset = SpatailOmicsDataset(root=options.input, params=params)
102
+ return dataset
@@ -0,0 +1,41 @@
1
+ import sys
2
+ import time
3
+
4
+
5
+ def get_current_time() -> str:
6
+ return time.strftime('%H:%M:%S', time.localtime())
7
+
8
+
9
+ def write_direct_message(message: str):
10
+ curr_time_str = get_current_time()
11
+ sys.stdout.write(f'{curr_time_str} --- {message}\n')
12
+ sys.stdout.flush()
13
+
14
+
15
+ def debug(message: str):
16
+ write_direct_message(f'DEBUG: {message}')
17
+
18
+
19
+ def info(message: str):
20
+ write_direct_message(f'INFO: {message}')
21
+
22
+
23
+ def write_direct_message_err(message: str):
24
+ curr_time_str = get_current_time()
25
+ sys.stderr.write(f'{curr_time_str} --- {message}\n')
26
+ sys.stderr.flush()
27
+
28
+
29
+ def warning(message: str):
30
+ write_direct_message_err(f'WARNING: {message}')
31
+
32
+
33
+ def error(message: str):
34
+ write_direct_message_err(f'ERROR: {message}')
35
+
36
+
37
+ def critical(message: str):
38
+ write_direct_message_err(f'CRITICAL: {message}')
39
+
40
+
41
+ __all__ = ['debug', 'info', 'warning', 'error', 'critical']
@@ -0,0 +1 @@
1
+ from ._model import *