PyPI - ONTraC - Versions diffs - 0.0.4b4__py3-none-any.whl - Mend

ONTraC 0.0.4b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

ONTraC/__init__.py +0 -0
ONTraC/__pycache__/__init__.cpython-311.pyc +0 -0
ONTraC/__pycache__/__init__.cpython-312.pyc +0 -0
ONTraC/bin/GP.py +92 -0
ONTraC/bin/NTScore.py +46 -0
ONTraC/bin/ONTraC.py +109 -0
ONTraC/bin/__init__.py +0 -0
ONTraC/bin/createDataSet.py +40 -0
ONTraC/data.py +102 -0
ONTraC/log.py +41 -0
ONTraC/model/__init__.py +1 -0
ONTraC/model/_model.py +152 -0
ONTraC/model/dmon_exp_pool.py +168 -0
ONTraC/model/norm_dense_gcn_conv.py +89 -0
ONTraC/optparser/_GP.py +63 -0
ONTraC/optparser/_IO.py +104 -0
ONTraC/optparser/_NT.py +49 -0
ONTraC/optparser/_ONTraC.py +81 -0
ONTraC/optparser/__init__.py +4 -0
ONTraC/optparser/_create_dataset.py +88 -0
ONTraC/optparser/_train.py +235 -0
ONTraC/run/processes.py +212 -0
ONTraC/train/__init__.py +1 -0
ONTraC/train/_batch_train.py +254 -0
ONTraC/train/inspect_funcs.py +180 -0
ONTraC/train/loss_funs.py +178 -0
ONTraC/utils/NTScore.py +120 -0
ONTraC/utils/__init__.py +1 -0
ONTraC/utils/__pycache__/__init__.cpython-311.pyc +0 -0
ONTraC/utils/__pycache__/__init__.cpython-312.pyc +0 -0
ONTraC/utils/__pycache__/_utils.cpython-311.pyc +0 -0
ONTraC/utils/__pycache__/_utils.cpython-312.pyc +0 -0
ONTraC/utils/_utils.py +85 -0
ONTraC/utils/decorators.py +90 -0
ONTraC/utils/niche_net_constr.py +176 -0
ONTraC/version.py +1 -0
ONTraC-0.0.4b4.dist-info/LICENSE +21 -0
ONTraC-0.0.4b4.dist-info/METADATA +166 -0
ONTraC-0.0.4b4.dist-info/RECORD +42 -0
ONTraC-0.0.4b4.dist-info/WHEEL +5 -0
ONTraC-0.0.4b4.dist-info/entry_points.txt +5 -0
ONTraC-0.0.4b4.dist-info/top_level.txt +1 -0

ONTraC/__init__.py ADDED Viewed

File without changes

ONTraC/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file

ONTraC/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file

ONTraC/bin/GP.py ADDED Viewed

@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+import random
+import sys
+import numpy as np
+import torch
+from ONTraC.model import GraphPooling
+from ONTraC.optparser import opt_GP_validate, prepare_GP_optparser
+from ONTraC.run.processes import *
+from ONTraC.train import GPBatchTrain, SubBatchTrainProtocol
+from ONTraC.train.inspect_funcs import loss_record
+from ONTraC.utils import device_validate
+# ------------------------------------
+# Classes
+# ------------------------------------
+# ------------------------------------
+# Functions
+# ------------------------------------
+def get_inspect_funcs() -> Optional[list[Callable]]:
+    """
+    Inspect function list
+    :param output_dir: output dir
+    :param epoch_filter: epoch filter
+    :return: list of inspect functions
+    """
+    return [loss_record]
+# ------------------------------------
+# Main Function
+# ------------------------------------
+def main() -> None:
+    """
+    Main function
+    :return: None
+    """
+    # ----- prepare -----
+    # load parameters
+    options = load_parameters(opt_validate_func=opt_GP_validate, prepare_optparser_func=prepare_GP_optparser)
+    # device
+    device: torch.device = device_validate(device_name=options.device)
+    # load data
+    dataset, sample_loader = load_data(options=options)
+    # random seed
+    n_seed = t_seed = r_seed = options.seed
+    random.seed(a=r_seed)
+    torch.manual_seed(seed=t_seed)
+    np.random.seed(seed=n_seed)
+    # ----- train -----
+    inspect_funcs_list = get_inspect_funcs()
+    batch_train: SubBatchTrainProtocol = train(nn_model=GraphPooling,
+                                               options=options,
+                                               BatchTrain=GPBatchTrain,
+                                               device=device,
+                                               dataset=dataset,
+                                               sample_loader=sample_loader,
+                                               inspect_funcs=inspect_funcs_list,
+                                               model_name='GraphPooling')
+    # --- evaluate ---
+    evaluate(batch_train=batch_train, model_name='GraphPooling')
+    # ----- predict -----
+    consolidate_s_array, consolidate_out_adj_array = predict(output_dir=options.GNN_dir,
+                                                             batch_train=batch_train,
+                                                             dataset=dataset,
+                                                             model_name='GraphPooling')
+    # ----- Pseudotime -----
+    if consolidate_s_array is not None and consolidate_out_adj_array is not None:
+        NTScore(options=options,
+                dataset=dataset,
+                consolidate_s_array=consolidate_s_array,
+                consolidate_out_adj_array=consolidate_out_adj_array)
+# ------------------------------------
+# Program running
+# ------------------------------------
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
+        sys.exit(0)

ONTraC/bin/NTScore.py ADDED Viewed

@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+import sys
+import numpy as np
+from ONTraC.data import load_dataset
+from ONTraC.optparser import opt_NT_validate, prepare_NT_optparser
+from ONTraC.run.processes import *
+# ------------------------------------
+# Main Function
+# ------------------------------------
+def main() -> None:
+    """
+    Main function
+    :return: None
+    """
+    # ----- prepare -----
+    # --- load parameters ---
+    options = load_parameters(opt_validate_func=opt_NT_validate, prepare_optparser_func=prepare_NT_optparser)
+    # --- load data ---
+    dataset, _ = load_dataset(options=options)
+    # load consolidated s_array and out_adj_array
+    consolidate_s_array = np.loadtxt(fname=f'{options.GNN_dir}/consolidate_s.csv.gz', delimiter=',')
+    consolidate_out_adj_array = np.loadtxt(fname=f'{options.GNN_dir}/consolidate_out_adj.csv.gz', delimiter=',')
+    # ----- Pseudotime -----
+    if consolidate_s_array is not None and consolidate_out_adj_array is not None:
+        NTScore(options=options,
+                dataset=dataset,
+                consolidate_s_array=consolidate_s_array,
+                consolidate_out_adj_array=consolidate_out_adj_array)
+# ------------------------------------
+# Program running
+# ------------------------------------
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
+        sys.exit(0)

ONTraC/bin/ONTraC.py ADDED Viewed

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+import random
+import sys
+from typing import Optional
+import numpy as np
+import torch
+from ONTraC.log import *
+from ONTraC.model import GraphPooling
+from ONTraC.optparser import opt_ontrac_validate, prepare_ontrac_optparser
+from ONTraC.run.processes import *
+from ONTraC.train import GPBatchTrain, SubBatchTrainProtocol
+from ONTraC.train.inspect_funcs import loss_record
+from ONTraC.utils import device_validate
+from ONTraC.utils.niche_net_constr import (construct_niche_network,
+                                           gen_samples_yaml,
+                                           load_original_data)
+# ------------------------------------
+# Functions
+# ------------------------------------
+def get_inspect_funcs() -> Optional[list[Callable]]:
+    """
+    Inspect function list
+    :param output_dir: output dir
+    :param epoch_filter: epoch filter
+    :return: list of inspect functions
+    """
+    return [loss_record]
+# ------------------------------------
+# Main Function
+# ------------------------------------
+def main() -> None:
+    """
+    main function
+    Input data files information should be stored in a YAML file.
+    """
+    # prepare options
+    options = load_parameters(opt_validate_func=opt_ontrac_validate, prepare_optparser_func=prepare_ontrac_optparser)
+    # ----- Niche Network Construct -----
+    # load original data
+    ori_data_df = load_original_data(options=options)
+    # define edges for each sample
+    construct_niche_network(options=options, ori_data_df=ori_data_df)
+    # save samples.yaml
+    gen_samples_yaml(options=options, ori_data_df=ori_data_df)
+    # ----- Graph Pooling -----
+    # device
+    device: torch.device = device_validate(device_name=options.device)
+    # load data
+    dataset, sample_loader = load_data(options=options)
+    # random seed
+    n_seed = t_seed = r_seed = options.seed
+    random.seed(a=r_seed)
+    torch.manual_seed(seed=t_seed)
+    np.random.seed(seed=n_seed)
+    # train
+    inspect_funcs_list = get_inspect_funcs()
+    batch_train: SubBatchTrainProtocol = train(nn_model=GraphPooling,
+                                               options=options,
+                                               BatchTrain=GPBatchTrain,
+                                               device=device,
+                                               dataset=dataset,
+                                               sample_loader=sample_loader,
+                                               inspect_funcs=inspect_funcs_list,
+                                               model_name='GraphPooling')
+    # evaluate
+    evaluate(batch_train=batch_train, model_name='GraphPooling')
+    # predict
+    consolidate_s_array, consolidate_out_adj_array = predict(output_dir=options.GNN_dir,
+                                                             batch_train=batch_train,
+                                                             dataset=dataset,
+                                                             model_name='GraphPooling')
+    # niche cluster
+    if consolidate_s_array is not None:
+        graph_pooling_output(ori_data_df=ori_data_df,
+                             dataset=dataset,
+                             rel_params=get_rel_params(
+                                 options=options, params=read_yaml_file(f'{options.preprocessing_dir}/samples.yaml')),
+                             consolidate_s_array=consolidate_s_array,
+                             output_dir=options.GNN_dir)
+    # ----- NT score -----
+    if consolidate_s_array is not None and consolidate_out_adj_array is not None:
+        NTScore(options=options,
+                dataset=dataset,
+                consolidate_s_array=consolidate_s_array,
+                consolidate_out_adj_array=consolidate_out_adj_array)
+# ------------------------------------
+# Program running
+# ------------------------------------
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
+        sys.exit(0)

ONTraC/bin/__init__.py ADDED Viewed

File without changes

ONTraC/bin/createDataSet.py ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+import sys
+from ONTraC.log import *
+from ONTraC.optparser import (opt_create_ds_validate, prepare_create_ds_optparser)
+from ONTraC.utils.niche_net_constr import load_original_data, construct_niche_network, gen_samples_yaml
+# ------------------------------------
+# Main Function
+# ------------------------------------
+def main() -> None:
+    """
+    main function
+    Input data files information should be stored in a YAML file.
+    """
+    # prepare options
+    options = opt_create_ds_validate(prepare_create_ds_optparser())
+    # load original data
+    ori_data_df = load_original_data(options=options, data_file=options.dataset)
+    # define edges for each sample
+    construct_niche_network(options=options, ori_data_df=ori_data_df)
+    # save samples.yaml
+    gen_samples_yaml(options=options, ori_data_df=ori_data_df)
+# ------------------------------------
+# Program running
+# ------------------------------------
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.stderr.write("User interrupts me! ;-) See you ^.^!\n")
+        sys.exit(0)

ONTraC/data.py ADDED Viewed

@@ -0,0 +1,102 @@
+from optparse import Values
+from typing import Dict, List, Tuple
+import numpy as np
+import pandas as pd
+import torch
+import torch_geometric.transforms as T
+from torch_geometric.data import Data, InMemoryDataset
+from torch_geometric.loader import DenseDataLoader
+from .log import *
+from .utils import count_lines, device_validate, get_rel_params, read_yaml_file
+# ------------------------------------
+# Classes
+# ------------------------------------
+class SpatailOmicsDataset(InMemoryDataset):
+    def __init__(self, root, params: Dict, transform=None, pre_transform=None):
+        self.params = params
+        super(SpatailOmicsDataset, self).__init__(root, transform, pre_transform)
+        self.data, self.slices = torch.load(self.processed_paths[0])
+    @property
+    def raw_file_names(self):
+        # return list(
+        #     flatten([[sample for name, sample in data.items() if name != 'Name'] for data in self.params['Data']]))
+        return []
+    @property
+    def processed_file_names(self):
+        return ['data.pt']
+    def download(self):
+        pass
+    def process(self):
+        data_list = []
+        for index, sample in enumerate(self.params['Data']):
+            info(f'Processing sample {index + 1} of {len(self.params["Data"])}')
+            data = Data(
+                x=torch.from_numpy(np.loadtxt(sample['Features'], dtype=np.float32, delimiter=',')),
+                edge_index=torch.from_numpy(np.loadtxt(sample['EdgeIndex'], dtype=np.int64,
+                                                       delimiter=',')).t().contiguous(),
+                # TODO: support 3D coordinates
+                pos=torch.from_numpy(pd.read_csv(sample['Coordinates'])[['x', 'y']].values),
+                name=sample['Name'])
+            data_list.append(data)
+        data, slices = self.collate(data_list)
+        torch.save((data, slices), self.processed_paths[0])
+# ------------------------------------
+# Misc functions
+# ------------------------------------
+def max_nodes(samples: List[Dict[str, str]]) -> int:
+    """
+    Get the maximum number of nodes in a dataset
+    :param params: List[Dict[str, str], list of samples
+    :return: int, maximum number of nodes
+    """
+    max_nodes = 0
+    for sample in samples:
+        max_nodes = max(max_nodes, count_lines(sample['Coordinates']))
+    return max_nodes
+def load_dataset(options: Values) -> Tuple[SpatailOmicsDataset, Data]:
+    device = device_validate()
+    params = read_yaml_file(f'{options.preprocessing_dir}/samples.yaml')
+    rel_params = get_rel_params(options, params)
+    dataset = create_torch_dataset(options, rel_params)
+    all_sample_loader = DenseDataLoader(dataset, batch_size=len(dataset))
+    data = next(iter(all_sample_loader)).to(device)
+    return dataset, data
+# ------------------------------------
+# Flow control functions
+# ------------------------------------
+def create_torch_dataset(options: Values, params: Dict) -> SpatailOmicsDataset:
+    """
+    Create torch dataset
+    :param params: Dict, input samples
+    :return: None
+    """
+    # ------------------------------------
+    # Step 1: Get the maximum number of nodes
+    m_nodes = max_nodes(params['Data'])
+    # upcelling m_nodes to the nearest 100
+    m_nodes = int(np.ceil(m_nodes / 100.0)) * 100
+    info(f'Maximum number of nodes: {m_nodes}')
+    # ------------------------------------
+    # ------------------------------------
+    # Step 2: Create torch dataset
+    dataset = SpatailOmicsDataset(root=options.preprocessing_dir, params=params,
+                                  transform=T.ToDense(m_nodes))  # transform edge_index to adj matrix
+    # dataset = SpatailOmicsDataset(root=options.input, params=params)
+    return dataset

ONTraC/log.py ADDED Viewed

@@ -0,0 +1,41 @@
+import sys
+import time
+def get_current_time() -> str:
+    return time.strftime('%H:%M:%S', time.localtime())
+def write_direct_message(message: str):
+    curr_time_str = get_current_time()
+    sys.stdout.write(f'{curr_time_str} --- {message}\n')
+    sys.stdout.flush()
+def debug(message: str):
+    write_direct_message(f'DEBUG: {message}')
+def info(message: str):
+    write_direct_message(f'INFO: {message}')
+def write_direct_message_err(message: str):
+    curr_time_str = get_current_time()
+    sys.stderr.write(f'{curr_time_str} --- {message}\n')
+    sys.stderr.flush()
+def warning(message: str):
+    write_direct_message_err(f'WARNING: {message}')
+def error(message: str):
+    write_direct_message_err(f'ERROR: {message}')
+def critical(message: str):
+    write_direct_message_err(f'CRITICAL: {message}')
+__all__ = ['debug', 'info', 'warning', 'error', 'critical']

ONTraC/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from ._model import *

ONTraC/model/_model.py ADDED Viewed

@@ -0,0 +1,152 @@
+from typing import Optional, Tuple
+import torch
+from torch import Tensor
+from ..log import *
+from .dmon_exp_pool import DMoNPooling
+from .norm_dense_gcn_conv import NormDenseGCNConv
+class NodePooling(torch.nn.Module):
+    """
+    NodePooling
+    """
+    def __init__(self, input_feats, k: int, dropout: float = 0, exponent: float = 1, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.dropout = dropout
+        self.exponent = exponent
+        self.pool = DMoNPooling(channels=input_feats, k=k, dropout=0, exponent=self.exponent)
+        self.k = k
+        self.reset_parameters()
+    def reset_parameters(self) -> None:
+        self.pool.reset_parameters()
+    def forward(self,
+                x: Tensor,
+                adj: Tensor,
+                mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        r"""
+        forward function
+        Args:
+            x (torch.Tensor): Node feature tensor
+                :math:`\mathbf{X} \in \mathbb{R}^{B \times N \times F}`, with
+                batch-size :math:`B`, (maximum) number of nodes :math:`N` for
+                each graph, and feature dimension :math:`F`.
+            adj (torch.Tensor): Adjacency tensor
+                :math:`\mathbf{A} \in \mathbb{R}^{B \times N \times N}`.
+            mask (torch.Tensor, optional): Mask matrix
+                :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}` indicating
+                the valid nodes for each graph. (default: :obj:`None`)
+        Returns:
+            Tensor: output feature matrix
+        """
+        s, out, out_adj, spectral_loss, ortho_loss, cluster_loss = self.pool(x=x, adj=adj, mask=mask)
+        return s, out, out_adj, spectral_loss, ortho_loss, cluster_loss
+    def predict(self, x: Tensor, adj: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
+        s, out, out_adj, *_ = self.pool(x=x, adj=adj, mask=mask)
+        return s, out, out_adj
+class GraphPooling(torch.nn.Module):
+    """
+    GNN with Node Pooling
+    """
+    def __init__(self,
+                 input_feats: int,
+                 hidden_feats: int,
+                 k: int,
+                 dropout: float = 0,
+                 exponent: float = 1,
+                 *args,
+                 **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.gcn1 = NormDenseGCNConv(input_feats, hidden_feats)
+        self.activation1 = torch.nn.SELU()
+        self.gcn2 = NormDenseGCNConv(hidden_feats, hidden_feats)
+        self.activation2 = torch.nn.SELU()
+        self.pool = NodePooling(input_feats=hidden_feats, k=k, dropout=dropout, exponent=exponent)
+        self.k = k
+        self.reset_parameters()
+    def reset_parameters(self) -> None:
+        self.gcn1.reset_parameters()
+        self.gcn2.reset_parameters()
+        self.pool.reset_parameters()
+    def forward(self,
+                x: Tensor,
+                adj: Tensor,
+                mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        r"""
+        forward function
+        X' = \mathbf{\hat{L}}X\mathbf{\Theta}
+        \mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}
+        \mathbf(\hat{D}) = \sum_{j=1}^N \mathbf{\hat{A}}_{ij}
+        \mathbf{\hat{L}} = \mathbf{\hat{D}}^{-1/2}\mathbf{\hat{A}}\mathbf{\hat{D}}^{-1/2}
+        Args:
+            x (torch.Tensor): Node feature tensor
+                :math:`\mathbf{X} \in \mathbb{R}^{B \times N \times F}`, with
+                batch-size :math:`B`, (maximum) number of nodes :math:`N` for
+                each graph, and feature dimension :math:`F`.
+            adj (torch.Tensor): Adjacency tensor
+                :math:`\mathbf{A} \in \mathbb{R}^{B \times N \times N}`.
+            mask (torch.Tensor, optional): Mask matrix
+                :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}` indicating
+                the valid nodes for each graph. (default: :obj:`None`)
+        Returns:
+            Tensor: output feature matrix
+        """
+        x = self.activation1(self.gcn1(x=x, adj=adj, mask=mask))
+        x = self.activation2(self.gcn2(x=x, adj=adj, mask=mask))
+        s, out, out_adj, spectral_loss, ortho_loss, cluster_loss = self.pool(x=x, adj=adj, mask=mask)
+        return s, out, out_adj, spectral_loss, ortho_loss, cluster_loss
+    def evaluate(self,
+                 x: Tensor,
+                 adj: Tensor,
+                 mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        x = self.activation1(self.gcn1(x=x, adj=adj, mask=mask))
+        x = self.activation2(self.gcn2(x=x, adj=adj, mask=mask))
+        s, out, out_adj, spectral_loss, ortho_loss, cluster_loss = self.pool(x=x, adj=adj, mask=mask)
+        return s, out, out_adj, spectral_loss, ortho_loss, cluster_loss
+    def predict(self, x: Tensor, adj: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
+        r"""
+        predict function
+        Args:
+            x (torch.Tensor): Node feature tensor
+                :math:`\mathbf{X} \in \mathbb{R}^{B \times N \times F}`, with
+                batch-size :math:`B`, (maximum) number of nodes :math:`N` for
+                each graph, and feature dimension :math:`F`.
+            adj (torch.Tensor): Adjacency tensor
+                :math:`\mathbf{A} \in \mathbb{R}^{B \times N \times N}`.
+            mask (torch.Tensor, optional): Mask matrix\
+                :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}` indicating\
+                the valid nodes for each graph. (default: :obj:`None`)
+        Returns:
+            s (torch.Tensor): Node assignment matrix
+                :math:`\mathbf{S} \in \mathbb{R}^{B \times N \times K}`
+            out (torch.Tensor): Output feature matrix
+                :math:`\mathbf{X} \in \mathbb{R}^{B \times K \times H}`
+            out_adj (torch.Tensor): Output adjacency matrix
+                :math:`\mathbf{A} \in \mathbb{R}^{B \times K \times K}`
+            """
+        x = self.activation1(self.gcn1(x=x, adj=adj, mask=mask))
+        x = self.activation2(self.gcn2(x=x, adj=adj, mask=mask))
+        s, out, out_adj, *_ = self.pool(x=x, adj=adj, mask=mask)
+        return s, out, out_adj
+    def predict_embed(self, x: Tensor, adj: Tensor, mask: Optional[Tensor] = None) -> Tensor:
+        x = self.activation1(self.gcn1(x=x, adj=adj, mask=mask))
+        x = self.activation2(self.gcn2(x=x, adj=adj, mask=mask))
+        return x
+__all__ = ['NodePooling', 'GraphPooling']