PyPI - pytour - Versions diffs - 3.0.0__py3-none-any.whl - Mend

pytour 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pytour-3.0.0.dist-info/METADATA +27 -0
pytour-3.0.0.dist-info/RECORD +15 -0
pytour-3.0.0.dist-info/WHEEL +5 -0
pytour-3.0.0.dist-info/licenses/LICENSE +21 -0
pytour-3.0.0.dist-info/top_level.txt +1 -0
tour/__init__.py +1 -0
tour/artifacts_removal.py +122 -0
tour/backend.py +34 -0
tour/dataclass/__init__.py +0 -0
tour/dataclass/dataset.py +465 -0
tour/dataclass/io.py +225 -0
tour/dataclass/stim.py +33 -0
tour/package_manage.py +13 -0
tour/torch_trainer.py +339 -0
tour/vis.py +201 -0

tour/dataclass/io.py ADDED Viewed

@@ -0,0 +1,225 @@
+import os
+import mne
+import h5py
+import json
+import numpy as np
+from collections import OrderedDict
+from typing import Union, List
+"""
+ mne montage data class related
+"""
+def _validate_stimuli_dict(stimuli_dict:dict):
+    for k in stimuli_dict:
+        stim:dict = stimuli_dict[k]
+        if not isinstance(stim, dict):
+            raise ValueError(f'value for stim {k} should be a dict')
+        for feat_k, feat_v in stim.items():
+            if isinstance(feat_v, dict):
+                assert all([s in feat_v for s in ['x', 'timeinfo', 'tag']])
+            else:
+                pass
+                # pattern = r"_fs\d+$"
+                # assert re.search(pattern, feat_k)
+    return stimuli_dict
+def mne_montage_to_h5py_group(montage:mne.channels.DigMontage, f:h5py.File):
+    montage_grp = f.require_group('montage')
+    pos_dict = montage.get_positions()
+    for k,v in pos_dict.items():
+        # print(k)
+        if k == 'ch_pos':
+            chs, ch_coords = list(zip(
+                *[
+                    (ch, ch_coord)
+                for ch, ch_coord in v.items()
+            ]))
+            ch_coords = np.stack(ch_coords)
+            chs_json_str = json.dumps(chs)
+            # print(chs_json_str)
+            t_ds = montage_grp.create_dataset(k, data = ch_coords)
+            t_ds.attrs['chs_json_str'] = chs_json_str
+        elif k == 'coord_frame':
+            montage_grp.attrs['coord_frame'] = v
+        else:
+            if v is None:
+                v = np.array([])
+            montage_grp.create_dataset(k, data = v)
+    return f
+def mne_montage_from_h5py_group(f:h5py.File):
+    pos_dict = {}
+    montage_grp = f['montage']
+    for k, v in montage_grp.items():
+        if k == 'ch_pos':
+            t_dict = OrderedDict()
+            t_ds = montage_grp[k]
+            ch_coords = t_ds[:]
+            chs = json.loads(t_ds.attrs['chs_json_str'])
+            for i_ch, ch in enumerate(chs):
+                t_dict[ch] = ch_coords[i_ch]
+            pos_dict[k] = t_dict
+        else:
+            # print(v.shape)
+            if v.shape == (0,):
+                pos_dict[k] = None
+            else:
+                pos_dict[k] = v[:]
+    pos_dict['coord_frame'] = montage_grp.attrs['coord_frame']
+    montage = mne.channels.make_dig_montage(**pos_dict)
+    return montage
+"""
+ DataRecord class related
+"""
+def data_record_to_h5py_group(
+    key: str,
+    data: np.ndarray,
+    stim_id: Union[str, int],
+    meta_info:dict,
+    srate: int,
+    f:h5py.File
+):
+    root_grp = f.require_group(f'records/{key}')
+    root_grp.create_dataset('data', data = data)
+    root_grp.attrs['stim_id'] = stim_id
+    root_grp.attrs['srate'] = srate
+    meta_info_grp = root_grp.require_group('meta_info')
+    for k,v in meta_info.items():
+        if isinstance(v, np.ndarray):
+            meta_info_grp.create_dataset(k, data=v)
+        else:
+            meta_info_grp.attrs[k] = v
+    return f
+def data_record_from_h5py_group(
+    f:h5py.File
+):
+    data = f['data'][:]
+    stim_id = f.attrs['stim_id']
+    srate = int(f.attrs['srate'])
+    meta_info_grp = f['meta_info']
+    meta_info = {}
+    for k,v in meta_info_grp.attrs.items():
+        meta_info[k] = v
+    for k,v in meta_info.items():
+        meta_info[k] = v
+    return dict(
+        data = data, stim_id = stim_id, meta_info = meta_info, srate = srate
+    )
+"""
+Stim Dict related
+"""
+def check_list_of_string(data:List[str]):
+    assert isinstance(data, list)
+    assert all([isinstance(i, str) for i in data])
+    return data
+def stim_dict_to_hdf5(
+    filename:str,
+    stim_dict: dict,
+    attrs:dict = None,
+):
+    with h5py.File(filename, 'a') as hdf5f:
+        _validate_stimuli_dict(stim_dict)
+        for stim_id in stim_dict:
+            grp = hdf5f.require_group(stim_id)
+            for feat_name in stim_dict[stim_id]:
+                assert feat_name not in grp
+                data = stim_dict[stim_id][feat_name]
+                if isinstance(data, np.ndarray):
+                    dataset = grp.create_dataset(feat_name, data = data)
+                elif isinstance(data, dict):
+                    dataset = discrete_stim_to_hdf5(
+                        feat_name=feat_name,
+                        feat_dict=data,
+                        hdf5f=grp
+                    )
+                else:
+                    raise TypeError
+                if attrs is not None:
+                    dataset.attrs.update(attrs[stim_id][feat_name])
+def stim_dict_from_hdf5(
+    filename:str,
+) -> dict:
+    stim_dict = {}
+    with h5py.File(filename, 'r') as hdf5f:
+        for stim_id, stim_grp in hdf5f.items():
+            stim_dict[stim_id] = {}
+            for k, v in stim_grp.items():
+                if isinstance(v, h5py.Dataset):
+                    stim_dict[stim_id][k] = v[:]
+                elif isinstance(v, h5py.Group):
+                    stim_dict[stim_id][k] = discrete_stim_from_hdf5(v)
+                else:
+                    raise TypeError
+    _validate_stimuli_dict(stim_dict)
+    return stim_dict
+def discrete_stim_to_hdf5(
+    feat_name:str,
+    feat_dict:dict,
+    hdf5f:h5py.Group
+) -> h5py.Group:
+    """
+    {
+        'x': None,
+        'tag':None,
+        'timeinfo':None
+    }
+    """
+    grp = hdf5f.require_group(feat_name)
+    for k,v in feat_dict.items():
+        if isinstance(v, np.ndarray):
+            grp.create_dataset(k, data = v)
+        elif check_list_of_string(v):
+            string_list_to_hdf5(k, v, grp)
+        else:
+            raise TypeError
+    return grp
+def discrete_stim_from_hdf5(
+    hdf5f:h5py.Group,
+):
+    stim_dict = {}
+    for k,v in hdf5f.items():
+        if isinstance(v, h5py.Dataset):
+            v:h5py.Dataset
+            if v.dtype == h5py.string_dtype(encoding='utf-8'):
+                stim_dict[k] = string_list_from_hdf5(
+                    v
+                )
+            elif v.dtype:
+                stim_dict[k] = v[:]
+        else:
+            raise ValueError
+    return stim_dict
+def string_list_to_hdf5(
+    dataset_name:str,
+    strings: List[str],
+    f:h5py.Dataset
+):
+    '''
+    from chatGPT
+    '''
+    dt = h5py.string_dtype(encoding='utf-8')
+    # Create dataset
+    f.require_dataset(dataset_name, (len(strings),), dtype=dt, data = strings)
+    return f
+def string_list_from_hdf5(
+    dt:h5py.Dataset
+):
+    return dt.asstr()[:].tolist()

tour/dataclass/stim.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import Dict
+from ..backend import is_tensor, np, torch, Array
+def to_impulses(x:Array, timeinfo:Array, f:float, padding_s:float = 0):
+    '''
+    # align the vectors into impulses with specific sampling rate
+    '''
+    if is_tensor(x):
+        assert is_tensor(timeinfo)
+    else:
+        assert not is_tensor(timeinfo)
+    startTimes = timeinfo[0]
+    endTimes = timeinfo[1]
+    secLen = endTimes[-1] + padding_s
+    nDim = x.shape[0]
+    if is_tensor(x):
+        nLen = torch.ceil(secLen * f).long()
+        out = torch.zeros((nDim, nLen), dtype=x.dtype)
+        timeIndices = torch.round(startTimes * f).long()
+    else:
+        nLen = np.ceil( secLen * f).astype(int)
+        out = np.zeros((nDim, nLen), dtype=x.dtype)
+        timeIndices = np.round(startTimes * f).astype(int)
+    out[:,timeIndices] = x
+    return out
+def dictTensor_to(x:Dict[str, Array], device):
+    output = {
+        k:v.to(device) if is_tensor(v) else v for k,v in x.items()
+    }
+    return output

tour/package_manage.py ADDED Viewed

@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb  6 08:33:37 2025
+@author: ShiningStone
+"""
+def check_import(package, name):
+    if package is None:
+        raise ImportError(f'{name} is failed to be imported')
+    return package

tour/torch_trainer.py ADDED Viewed

@@ -0,0 +1,339 @@
+import os
+import sys
+import torch
+import logging
+import numpy as np
+from itertools import chain
+from typing import Callable, List, Union, Protocol
+def func_reduce_mean(values):
+    # print(torch.cat(values).shape)
+    if values[0].ndim == 0:
+        return torch.mean(torch.stack(values), dim = 0)
+    else:
+        return torch.mean(torch.cat(values), dim = 0)
+def get_logger(
+    file_dir,
+    console_level=logging.INFO,
+    file_level=logging.DEBUG,
+    file_name="logfile.log",
+    if_print = True,
+):
+    #adopt from chat-gpt
+    file_path = f"{file_dir}/{file_name}"
+    logger = logging.getLogger('tray/trainer')
+    logger.setLevel(logging.DEBUG)  # master level: allow all through to handlers
+    logger.handlers.clear()  # prevent duplicate handlers on re-run
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    if if_print:
+        # Console handler
+        console_handler = logging.StreamHandler(sys.stdout)
+        console_handler.setLevel(console_level)
+        console_handler.setFormatter(formatter)
+        logger.addHandler(console_handler)
+    # File handler
+    file_handler = logging.FileHandler(file_path)
+    file_handler.setLevel(file_level)
+    file_handler.setFormatter(formatter)
+    logger.addHandler(file_handler)
+    return logger
+class DependentModule(Protocol):
+    def load_state(self, state:dict) -> dict:...
+    def get_state(self) -> dict:...
+class BatchAccumulator:
+    def __init__(self,):
+        self._data:list = []
+    def append(self, output):
+        self._data.append(output)
+    @property
+    def data(self):
+        # concatenate along the batch dimension
+        return torch.cat(self._data)
+class MetricsRecord:
+    def __init__(self,):
+        self._data = {}
+    def append(self, metricDict:dict, tag:str = ''):
+        data = self._data
+        for k in metricDict:
+            if tag != '':
+                real_k = tag + '/' + k
+            else:
+                real_k = k
+            if real_k not in data:
+                data[real_k] = []
+            data[real_k].append(metricDict[k].cpu())
+    def __iter__(self):
+        return iter(self._data.keys())
+    def __getitem__(self, key):
+        return self._data[key]
+    def items(self):
+        for k, v in self._data.items():
+            yield k,v
+def ndarrays_to_tensors(*datas:List[np.ndarray]):
+    # the resulted tensor will share the same memory as the array
+    return [
+        [
+            torch.from_numpy(d) if not np.isscalar(d) else torch.tensor(d, dtype=torch.get_default_dtype())
+            for d in data
+        ]
+        for data in datas
+    ]
+class StimRespDataset(torch.utils.data.Dataset):
+    def __init__(self,
+        stims:Union[List[np.ndarray], List[torch.Tensor]],
+        resps:Union[List[np.ndarray], List[torch.Tensor]],
+        device = 'cpu'
+    ):
+        if isinstance(stims[0], np.ndarray):
+            stims, resps = ndarrays_to_tensors(stims, resps)
+        self.stims = stims
+        self.resps = resps
+        self.device = device
+        assert len(stims) == len(resps)
+    def __getitem__(self, index:int):
+        return self.stims[index].to(self.device), self.resps[index].to(self.device)
+    def __len__(self):
+        return len(self.stims)
+class Context:
+    def __init__(
+        self,
+        model:torch.nn.Module,
+        optimizer:torch.optim.Optimizer,
+        func_metrics: Callable,
+        checkpoint_folder: str,
+        checkpoint_file = "checkpoint.pt",
+        custom_config = {},
+        if_print_metric = True,
+    ):
+        self.model = model
+        self.optimizer = optimizer
+        self.state_current_epoch = -1
+        self.func_metrics = func_metrics
+        self.checkpoint_folder = checkpoint_folder
+        self.checkpoint_file = checkpoint_file
+        self.metrics_log = MetricsRecord()
+        self.custom_config = custom_config
+        self.logger = get_logger(checkpoint_folder, if_print=if_print_metric)
+        self.dependents:List[DependentModule] = []
+    def add_dependent(self, module:DependentModule):
+        self.dependents.append(module)
+    def new_epochs(self):
+        self.state_current_epoch += 1
+    def checkpoint_exists(self):
+        return os.path.exists(self.checkpoint_path)
+    def save_checkpoint(self):
+        checkpoint = {}
+        for module in self.dependents:
+            checkpoint[module.__class__.__name__] = module.get_state()
+        checkpoint['context'] = self.get_state()
+        torch.save(checkpoint, self.checkpoint_path)
+    def load_checkpoint(self):
+        checkpoint = torch.load(self.checkpoint_path)
+        self.load_state(checkpoint['context'])
+        for module in self.dependents:
+            module.load_state(checkpoint[module.__class__.__name__])
+    @property
+    def checkpoint_path(self):
+        return f'{self.checkpoint_folder}/{self.checkpoint_file}'
+    def log_metrics(
+        self,
+        metrics,
+        tag = ''
+    ):
+        scalar_metrics = {k:v.item() for k,v in metrics.items() if v.numel() == 1}
+        metrics_log = ''
+        for k,v in scalar_metrics.items():
+            metrics_log += f'{k}:{v} '
+        self.logger.info(f"epochs:{self.state_current_epoch} - {tag} - {metrics_log}")
+        self.metrics_log.append(metrics,tag)
+    def new_metrics_record(self):
+        return MetricsRecord()
+    def evaluate_dataloader(
+        self,
+        tag:str,
+        dtldr:torch.utils.data.DataLoader,
+        forward_function: Callable,
+        f_reduce_metrics_records = func_reduce_mean,
+        save_in_context = False,
+        batch_hook:List[Callable] = [],
+        output_hook:List[Callable] = []
+    ):
+        new_log = MetricsRecord()
+        is_model_training = self.model.training
+        with torch.no_grad():
+            for batch in dtldr:
+                for f_batch in batch_hook:
+                    f_batch(batch)
+                self.model.eval()
+                output = forward_function(self.model, batch)
+                for f_output in output_hook:
+                    f_output(output)
+                metrics_dict = self.func_metrics(
+                    batch,
+                    output
+                )
+                new_log.append(
+                    metrics_dict,
+                )
+        # print([i.shape for i in new_log['loss']])
+        # print(torch.cat(new_log['loss']).shape)
+        if is_model_training:
+            self.model.train()
+        else:
+            self.model.eval()
+        reduced_record = {k: f_reduce_metrics_records(v) for k, v in new_log.items()}
+        if save_in_context:
+            self.log_metrics(reduced_record, tag)
+        output_record = {}
+        for k,v in reduced_record.items():
+            if tag != '':
+                real_k = tag + '/' + k
+            else:
+                real_k = k
+            output_record[real_k] = v.cpu()
+        scalar_metrics = {k:v.item() for k,v in output_record.items() if v.numel() == 1}
+        return output_record, scalar_metrics
+    def get_state(self):
+        state = {
+            'model_state_dict': self.model.state_dict(),
+            'optim_state_dict': self.optimizer.state_dict(),
+            'state_current_epoch': self.state_current_epoch,
+            'custom_config':self.custom_config
+        }
+        return state
+    def load_state(self, state):
+        self.model.load_state_dict(state['model_state_dict'])
+        self.optimizer.load_state_dict(state['optim_state_dict'])
+        self.state_current_epoch = state['state_current_epoch']
+        self.custom_config = state['custom_config']
+class SaveBest:
+    def __init__(
+        self,
+        ctx:Context,
+        state_metric_name,
+        op = lambda old, new: new > old,
+        tol = None,
+        ifLog = True,
+        file_name = "save_best.pt"
+    ):
+        self.ctx = ctx
+        ctx.add_dependent(self)
+        self.state_cnt = 0
+        self.state_best_cnt = -1
+        self.state_best_metric = None
+        self.state_metric_name = state_metric_name
+        self.op = op
+        self.tol = tol
+        self.saved_checkpoint= None
+        self.ifLog = ifLog
+        self.file_name = file_name
+    @property
+    def target_path(self):
+        return f'{self.ctx.checkpoint_folder}/{self.file_name}'
+    def get_state(self):
+        output = {}
+        for k,v in self.__dict__.items():
+            if k.startswith('state_'):
+                output[k] = v
+        return output
+    def load_state(self, state):
+        for k,v in self.__dict__.items():
+            if k.startswith('state_'):
+                self.__dict__[k] = state[k]
+    def step(self,):
+        t_metric = self.ctx.metrics_log[self.state_metric_name][-1]
+        assert t_metric.ndim == 0 or (t_metric.ndim == 1 and t_metric.shape[0] == 1), t_metric.shape
+        t_metric = t_metric.item()
+        t_cnt = self.state_cnt
+        ifUpdate = False
+        ifStop = False
+        if self.state_best_metric is None:
+            ifUpdate = True
+        else:
+            ifUpdate = self.op(self.state_best_metric, t_metric)
+        if ifUpdate:
+            self.state_best_metric = t_metric
+            self.state_best_cnt = t_cnt
+            checkpoint = {}
+            checkpoint.update(self.ctx.get_state())
+            checkpoint.update(self.get_state())
+            if self.ifLog:
+                msg = f'save_best --- cnt: {self.state_best_cnt}, {self.state_metric_name}: {self.state_best_metric}'
+                self.ctx.logger.info(msg)
+            torch.save(checkpoint, self.target_path)
+            self.saved_checkpoint = checkpoint
+        if self.tol is not None:
+            if self.state_cnt - self.state_best_cnt > self.tol:
+                ifStop = True
+                msg = f'early_stop --- epoch: {self.state_best_cnt}, metric: {self.state_best_metric}'
+                self.ctx.logger.info(msg)
+        self.state_cnt += 1
+        return ifUpdate, ifStop
+def pearsonr(y, y_pred):
+    """
+    Compute Pearson's correlation coefficient between predicted
+    and observed data
+    y: (..., n_samples, n_chans)
+    y_pred: (..., n_samples, n_chans)
+    """
+    r = torch.mean(
+        (y - y.mean(-2, keepdims = True)) * (y_pred - y_pred.mean(-2, keepdims = True)),
+        -2
+    ) / (
+        y.std(-2) * y_pred.std(-2)
+    )
+    return r