PyPI - gwaslab - Versions diffs - 3.4.42__py3-none-any.whl → 3.4.43__py3-none-any.whl - Mend

gwaslab 3.4.42py3-none-any.whl → 3.4.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (11) hide show

gwaslab/cache_manager.py +687 -0
gwaslab/g_version.py +2 -2
gwaslab/hm_harmonize_sumstats.py +193 -20
gwaslab/qc_fix_sumstats.py +2 -9
gwaslab/viz_plot_mqqplot.py +12 -11
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/METADATA +3 -1
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/RECORD +11 -10
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/LICENSE +0 -0
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/WHEEL +0 -0
{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/top_level.txt +0 -0

gwaslab/cache_manager.py ADDED Viewed

@@ -0,0 +1,687 @@
+from pathlib import Path
+import os
+import pickle
+import concurrent.futures
+import threading
+import multiprocessing as mp
+import time
+import h5py
+from gwaslab.g_Log import Log
+from platformdirs import user_cache_dir
+from pysam import VariantFile
+APPNAME = "gwaspipe"
+APPAUTHOR = "cloufield"
+CACHE_EXT = '.cache'
+################################################# UTILS #################################################
+def get_cache_path(base_path):
+    cache_filename = str(Path(base_path).stem) + CACHE_EXT
+    cache_path = os.path.join(os.path.dirname(base_path), cache_filename)
+    if os.path.exists(cache_path):
+        return cache_path
+    else:
+        cache_dir = user_cache_dir(APPNAME, APPAUTHOR)
+        user_cache_path = os.path.join(cache_dir, cache_filename)
+        if os.path.exists(user_cache_path):
+            return user_cache_path
+    return None
+def get_write_path(base_path):
+    cache_filename = str(Path(base_path).stem) + CACHE_EXT
+    if os.access(os.path.dirname(base_path), os.W_OK):
+        # if we have write access to the directory where the original input file is located
+        return os.path.join(os.path.dirname(base_path), cache_filename)
+    else:
+        cache_dir = user_cache_dir(APPNAME, APPAUTHOR)
+        if os.access(cache_dir, os.W_OK):
+            # if we have write access to the user cache directory
+            return os.path.join(cache_dir, cache_filename)
+    raise Exception('No write access to any cache directory')
+def cache_exists(path, ref_alt_freq, category='all'):
+    ''' Check if the cache file exists and contains the required data '''
+    found = False
+    try:
+        found = is_in_h5py(path, ref_alt_freq, category)
+    except Exception as e:
+        pass
+    return found
+def is_in_h5py(path, ref_alt_freq, category='all'):
+    '''
+    Check if the cache file exists and contains the required data.
+    Raise an exception if the cache file does not exist.
+    '''
+    if not path or not os.path.exists(path):
+        raise Exception('Cache file not found')
+    with h5py.File(path, 'r') as f:
+        if ref_alt_freq in f.keys():
+            if category in f[ref_alt_freq].keys():
+                if len(f[ref_alt_freq][category].keys()) > 0:
+                    return True
+    return False
+def load_h5py_cache(path, ref_alt_freq, category='all'):
+    if not path or not os.path.exists(path):
+        raise Exception('Cache file not found')
+    if not is_in_h5py(path, ref_alt_freq, category):
+        raise Exception('Cache file does not contain the required data')
+    _cache = {}
+    with h5py.File(path, 'r') as f:
+        for v in f[ref_alt_freq][category].values():
+            # iterate over chromosomes
+            keys = list(v['keys'].asstr()[:])
+            values = list(v['values'][:])
+            chrom_cache = dict(zip(keys, values)) # Combine keys and values into a dictionary
+            _cache.update(chrom_cache)
+    return _cache
+def build_cache(base_path, ref_alt_freq=None, n_cores=1, return_cache=False, filter_fn=None, category='all', log=Log(), verbose=True):
+    cache_builder = CacheBuilder(base_path, ref_alt_freq=ref_alt_freq, n_cores=n_cores, log=log, verbose=verbose)
+    cache_builder.start_building(filter_fn=filter_fn, category=category, set_cache=return_cache) # start_building will wait for all processes to finish building cache
+    if return_cache:
+        return cache_builder.get_cache()
+def is_palindromic(ref, alt):
+    gc = (ref=="G") & (alt=="C")
+    cg = (ref=="C") & (alt=="G")
+    at = (ref=="A") & (alt=="T")
+    ta = (ref=="T") & (alt=="A")
+    palindromic = gc | cg | at | ta
+    return palindromic
+def is_indel(ref, alt):
+    return len(ref) != len(alt)
+def filter_fn_pi(*, ref, alt):
+    return is_palindromic(ref, alt) or is_indel(ref, alt)
+def filter_fn_np(*, ref, alt):
+    return not is_palindromic(ref, alt)
+PALINDROMIC_INDEL = 'pi' # palindromic + indel
+NON_PALINDROMIC = 'np' # non-palindromic
+FILTER_FN = {
+    PALINDROMIC_INDEL: filter_fn_pi,
+    NON_PALINDROMIC: filter_fn_np
+}
+################################################# CACHE MANAGERs #################################################
+class CacheMainManager:
+    def __init__(self, base_path, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        self.base_path = base_path
+        self.ref_alt_freq = ref_alt_freq
+        self.category = category
+        self.filter_fn = filter_fn
+        self.n_cores = n_cores
+        self.log = log
+        self.verbose = verbose
+    def _get_cache_path(self):
+        return get_cache_path(self.base_path)
+    def _get_write_path(self):
+        if self.base_path is not None:
+            return get_write_path(self.base_path)
+        else:
+            raise Exception('base_path is None')
+    @property
+    def cache_len(self):
+        return len(self.cache)
+    @property
+    def cache(self):
+        if not hasattr(self, '_cache'):
+            raise Exception('Cache not loaded')
+        return self._cache
+    def build_cache(self):
+        ''' Build and load the cache'''
+        self._cache = build_cache(
+            self.base_path, ref_alt_freq=self.ref_alt_freq, n_cores=self.n_cores,
+            filter_fn=self.filter_fn, category=self.category,
+            return_cache=True, log=self.log, verbose=self.verbose
+        )
+    def load_cache(self, category=None):
+        if category is None:
+            category = self.category
+        cache_path = self._get_cache_path()
+        self._cache = load_h5py_cache(cache_path, ref_alt_freq=self.ref_alt_freq, category=category)
+class CacheManager(CacheMainManager):
+    def __init__(self, base_path=None, cache_loader=None, cache_process=None, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        none_value = sum([cache_loader is not None, cache_process is not None])
+        assert none_value in [0, 1], 'Only one between cache_loader and cache_process should be provided'
+        super().__init__(base_path, ref_alt_freq=ref_alt_freq, category=category, filter_fn=filter_fn, n_cores=n_cores, log=log, verbose=verbose)
+        if none_value == 1:
+            self.base_path = None # unset base_path if cache_loader or cache_process is provided
+        self.cache_loader = cache_loader
+        self.cache_process = cache_process
+        if cache_loader is not None:
+            assert callable(getattr(cache_loader, 'get_cache', None)), 'cache_loader must have a get_cache method'
+        elif cache_process is not None:
+            assert isinstance(cache_process, CacheProcess), 'cache_process must be an instance of CacheProcess'
+        else:
+            cache_path = self._get_cache_path()
+            if cache_path is not None:
+                self.log.write(f'Start loading cache from {cache_path}...', verbose=self.verbose)
+                self.load_cache()
+                self.log.write('Finshed loading cache.', verbose=self.verbose)
+            else:
+                self.log.write(f'Start building cache from {base_path}...', verbose=self.verbose)
+                self.build_cache()
+                self.log.write('Finished building (and loading) cache.', verbose=self.verbose)
+    @property
+    def cache_len(self):
+        if self.cache_process is not None:
+            return self.cache_process.cache_len()
+        else:
+            return len(self.cache)
+    @property
+    def cache(self):
+        if self.cache_loader is not None:
+            return self.cache_loader.get_cache()
+        else:
+            if not hasattr(self, '_cache'):
+                raise Exception('Cache not loaded or class not exposing cache')
+            return self._cache
+    def apply_fn(self, fn, *args, **kwargs):
+        assert 'cache' not in kwargs, "'cache' can't be inside kwargs"
+        if self.cache_process is not None:
+            return self.cache_process.apply_fn(fn, *args, **kwargs)
+        else:
+            return fn(*args, cache=self.cache, **kwargs)
+    def _get_cache_path(self):
+        if self.cache_loader is None and self.cache_process is None:
+            return super()._get_cache_path()
+        return None
+class CacheProcess(mp.Process):
+    '''
+    A class for managing a cache in a separate process. It is used to reduce memory consumption when the cache is very large.
+    This class will load the cache in a separate process and provide methods to perform operations on the cache directly on the subprocess.
+    In this way, the cache is not copied to the main process, but the operations are performed on the cache in the subprocess and only the
+    input and output of the operations are communicated (i.e. copied) between the main and the subprocess.
+    This is very useful when the cache is huge (e.g. 40GB in memory) and we want to perform operations on it based on a relatively small input
+    (e.g. a "small" dataframe, where small is relative to the cache size) and the output is also relatively small.
+    '''
+    def __init__(self, base_path, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        super().__init__()
+        self.base_path = base_path
+        self.ref_alt_freq = ref_alt_freq
+        self.filter_fn = filter_fn
+        self.category = category
+        self.n_cores = n_cores
+        self.log = log
+        self.verbose = verbose
+        self.daemon = True # When parent process exits, it will attempt to terminate all of its daemonic child processes.
+        self.manager = mp.Manager()
+        self.input_queue = mp.Queue()  # Queue for communication between processes
+        self.result_queue = mp.Queue()
+        self.result_produced = mp.Value('b', True)
+        cache_path = self._get_cache_path()
+        if not cache_exists(cache_path, ref_alt_freq, category):
+            self.build_cache()
+        else:
+            if n_cores > 1:
+                self.log.warning('[CacheProcess: since the cache already exists, the parameter n_cores could be set to 1 without any performance loss]', verbose=self.verbose)
+    def _get_cache_path(self):
+        return get_cache_path(self.base_path)
+    def build_cache(self):
+        build_cache(
+            self.base_path, ref_alt_freq=self.ref_alt_freq, n_cores=self.n_cores,
+            filter_fn=self.filter_fn, category=self.category,
+            return_cache=False, log=self.log, verbose=self.verbose
+        )
+    def run(self):
+        cache_path = self._get_cache_path()
+        self.log.write(f'[CacheProcess: Start loading cache from {cache_path}...]', verbose=self.verbose)
+        cache = load_h5py_cache(cache_path, ref_alt_freq=self.ref_alt_freq, category=self.category)
+        self.log.write('[CacheProcess: Finshed loading cache.]', verbose=self.verbose)
+        # Continuously listen for method calls
+        while True:
+            method, args, kwargs = self.input_queue.get()
+            if method == 'get_from_cache':
+                key = args[0]
+                self.result_queue.put(cache[key])
+                self.result_produced.value = True
+            elif method == 'apply_fn':
+                assert 'cache' not in kwargs, "'cache' can't be inside kwargs"
+                fn, *args = args
+                result = fn(*args, cache=cache, **kwargs)
+                self.result_queue.put(result)
+                self.result_produced.value = True
+            elif method == 'cache_len':
+                self.result_queue.put(len(cache))
+                self.result_produced.value = True
+            elif method == "terminate":
+                self.result_produced.value = True
+                break
+    def _call_method(self, method, *args, **kwargs):
+        self.result_produced.value = False
+        self.input_queue.put((method, args, kwargs))
+        # wait until the result is produced
+        while not self.result_produced.value:
+            pass
+    def get_from_cache(self, key):
+        self._call_method('get_from_cache', key)
+        return self.result_queue.get()
+    def apply_fn(self, fn, **kwargs):
+        '''
+        Apply an arbitrary function to the cache. The function should take the cache as an argument,
+        and all the arguments should be passed as named arguments.
+        '''
+        self._call_method('apply_fn', fn, **kwargs)
+        return self.result_queue.get()
+    def cache_len(self):
+        self._call_method('cache_len')
+        return self.result_queue.get()
+    def terminate(self):
+        self._call_method("terminate")
+################################################# CACHE BUILDER #################################################
+class CacheBuilderOld:
+    def __init__(self, ref_infer, ref_alt_freq=None, n_cores=1, log=Log(), verbose=True):
+        self.ref_infer = ref_infer
+        self.ref_alt_freq = ref_alt_freq
+        self.n_cores = n_cores
+        self.log = log
+        self.verbose = verbose
+        self.cache = {}
+        self.lock = threading.Lock()  # For thread-safe cache access
+        self.cancelled = False  # Flag for cancelling the cache building process
+        self.running = False
+        self.executor = None  # Thread pool executor
+        self.futures = None  # Stores Future objects
+    def start_building(self):
+        if self.running:
+            print("Cache building is already running. If you want to restart, please stop the current process first.")
+            return
+        n_cores = self.n_cores
+        contigs = self.get_contigs()
+        self.cancelled = False
+        self.running = True
+        self.log.write(f" -Building cache on {n_cores} cores...", verbose=self.verbose)
+        self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=n_cores)
+        self.futures = [self.executor.submit(self.build_cache, chrom) for chrom in contigs]
+    def get_contigs(self):
+        vcf_reader = VariantFile(self.ref_infer, drop_samples=True)
+        contigs = [v.name for v in vcf_reader.header.contigs.values()]
+        vcf_reader.close()
+        return contigs
+    def build_cache(self, chrom):
+        vcf_reader = VariantFile(self.ref_infer, drop_samples=True)
+        #self.log.write(f"   -Fetching contig '{chrom}'...")
+        seq = vcf_reader.fetch(chrom)
+        first = True
+        for record in seq:
+            if first:
+                #self.log.write(f"   -Found at least one record for contig '{chrom}'...")
+                first = False
+            chrom = record.chrom
+            start = record.pos - 1
+            end = record.pos
+            cache_key = f"{chrom}:{start}:{end}"
+            to_add = [record.pos, record.ref, record.alts, record.info[self.ref_alt_freq][0]]
+            self.add_to_cache(cache_key, to_add)
+    def stop_building(self, wait=False, verbose=False):
+        if self.futures:
+            self.cancelled = True
+            for future in self.futures:
+                future.cancel()
+            self.executor.shutdown(wait=wait)  # Whether to wait for threads to finish
+            self.futures = None
+            self.executor = None
+            self.running = False
+        if verbose:
+            print(f"Cache contains {len(self.get_cache())} variants")
+    def add_to_cache(self, key, value):
+        self.lock.acquire()
+        if key in self.cache:
+            self.cache[key].append(value)
+        else:
+            self.cache[key] = [value]
+        self.lock.release()
+    def get_cache(self, complete=False):
+        if complete:
+            concurrent.futures.wait(self.futures)
+        self.lock.acquire()
+        cache = self.cache
+        self.lock.release()
+        return cache
+    def reset_cache(self):
+        self.lock.acquire()
+        self.cache = {}
+        self.lock.release()
+    def save_cache(self, save_path):
+        cache = self.get_cache(complete=True)
+        self.log.write(f' -Saving cache to {save_path}', verbose=self.verbose)
+        with open(save_path, 'wb') as f:
+            pickle.dump(cache, f, protocol=pickle.HIGHEST_PROTOCOL)
+        self.log.write(' -Cache saved', verbose=self.verbose)
+class CacheBuilder:
+    def __init__(self, ref_infer, ref_alt_freq=None, n_cores=1, log=Log(), verbose=True):
+        self.ref_infer = ref_infer
+        self.ref_alt_freq = ref_alt_freq
+        self.n_cores = n_cores
+        self.log = log
+        self.verbose = verbose
+        self.running = False
+        self.cache = None
+    def get_contigs(self):
+        vcf_reader = VariantFile(self.ref_infer, drop_samples=True)
+        contigs = [v.name for v in vcf_reader.header.contigs.values()]
+        vcf_reader.close()
+        return contigs
+    def already_built(self, category):
+        cache_path = get_cache_path(self.ref_infer)
+        return cache_exists(cache_path, self.ref_alt_freq, category)
+    def start_building(self, filter_fn=None, category='all', set_cache=True):
+        if self.running:
+            print("Cache building is already running. If you want to restart, please stop the current process first.")
+            return
+        if isinstance(filter_fn, str) and filter_fn in FILTER_FN:
+            filter_fn = FILTER_FN[filter_fn]
+            category = filter_fn
+        elif category in FILTER_FN:
+            self.log.write(f" -Using the built-in filter function for category '{category}'. filter_fn will be ignored if provided.", verbose=self.verbose)
+            filter_fn = FILTER_FN[category]
+        assert filter_fn is None or category != 'all', "If filter_fn is not None, category cannot be 'all'"
+        assert filter_fn is not None or category == 'all', "If category is not 'all', filter_fn must be provided"
+        if self.already_built(category=category):
+            # TODO: we should probably improve the checking logic, and maybe also allows to overwrite the cache
+            self.log.write(f"Cache for category '{category}' and ref_alt_freq {self.ref_alt_freq} already exists. Skipping cache building", verbose=self.verbose)
+            return
+        n_cores = max(self.n_cores-1, 1) # leave one core for the watcher process
+        contigs = self.get_contigs()
+        self.running = True
+        self.log.write(f" -Building cache for category '{category}' on {n_cores} cores...", verbose=self.verbose)
+        pool = mp.Pool(n_cores)
+        manager = mp.Manager()
+        queue = manager.Queue()
+        jobs = []
+        # Start a watcher process to handle the output of each subprocess.
+        # The watcher will write the cache to the file as soon as it receives the output from the subprocess, in a safe way.
+        watcher = mp.Process(target=self.handle_output, args=(queue,))
+        watcher.daemon = True
+        watcher.start()
+        for chrom in contigs:
+            job = pool.apply_async(self.build_cache, args=(chrom, queue), kwds={'filter_fn': filter_fn, 'category': category})
+            jobs.append(job)
+        pool.close()
+        pool.join() # wait for all processes to finish
+        queue.put('kill') # send a signal to the watcher process to stop
+        watcher.join()
+        if set_cache:
+            self.cache = {}
+            for job in jobs:
+                self.cache.update(job.get()['cache'])
+        self.running = False
+    def build_cache(self, chrom, queue, filter_fn=None, category='all'):
+        assert filter_fn is None or category != 'all', "If filter_fn is not None, category cannot be 'all'"
+        inner_cache = {}
+        ref_alt_freq = self.ref_alt_freq
+        vcf_reader = VariantFile(self.ref_infer, drop_samples=True)
+        #self.log.write(f"   -Fetching contig '{chrom}'...", verbose=self.verbose)
+        seq = vcf_reader.fetch(chrom)
+        for record in seq:
+            for alt in record.alts:
+                if filter_fn is None or filter_fn(ref=record.ref, alt=alt):
+                    key = f"{record.chrom}:{record.pos}:{record.ref}:{alt}"
+                    value = record.info[ref_alt_freq][0]
+                    inner_cache[key] = value
+        vcf_reader.close()
+        result = {}
+        result['chrom'] = chrom
+        result['ref_alt_freq'] = ref_alt_freq
+        result['category'] = category
+        result['cache'] = inner_cache
+        queue.put(result)
+        return result
+    def handle_output(self, queue):
+        ''' Function that monitors a queue and writes the cache to a file as soon as it receives the output from the subprocess.'''
+        first = True
+        m = queue.get() # wait for the first message, to avoid creating an empty cache file
+        if m != 'kill':
+            cache_path = get_write_path(self.ref_infer)
+            with h5py.File(cache_path, mode='a') as f:
+                while True:
+                    if first:
+                        first = False
+                    else:
+                        m = queue.get()
+                    if m == 'kill':
+                        break
+                    result = m
+                    cache = result['cache']
+                    if cache is not None and len(cache) > 0:
+                        main_group = f.require_group(result['ref_alt_freq'])
+                        sub_group = main_group.require_group(result['category'])
+                        chrom_group = sub_group.require_group(str(result['chrom']))
+                        keys_list = list(cache.keys())
+                        max_len = len(max(keys_list, key=len))
+                        #self.log.write(f"Writing {result['ref_alt_freq']}, {result['category']}, {str(result['chrom'])}\n")
+                        keys_dataset = chrom_group.create_dataset('keys', data=keys_list, dtype=f'S{max_len}', compression="gzip", compression_opts=4)
+                        values_dataset = chrom_group.create_dataset('values', data=list(cache.values()), dtype='f', compression="gzip", compression_opts=4)
+    def get_cache(self):
+        return self.cache
+################################################# CACHE LOADERs #################################################
+# Classes for loading the cache in a separate thread or process in the background while the main process is running.
+# However, right now, the most efficient way to load the cache and perform operations on it is to use the CacheProcess class.
+class CacheLoader:
+    def __new__(cls, *args, **kwargs):
+        if cls is CacheLoader:
+            raise TypeError(f"You are trying to instantiate an abstract class {cls.__name__}. Please use a concrete subclass.")
+        return super().__new__(cls)
+    def __init__(self, base_path, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        self.base_path = base_path
+        self.ref_alt_freq = ref_alt_freq
+        self.category = category
+        self.filter_fn = filter_fn
+        self.n_cores = n_cores
+        self.log = log
+        self.verbose = verbose
+    def _get_cache_path(self):
+        return get_cache_path(self.base_path)
+    def build_cache(self):
+        self.cache = build_cache(
+            self.base_path, ref_alt_freq=self.ref_alt_freq, n_cores=self.n_cores,
+            filter_fn=self.filter_fn, category=self.category,
+            return_cache=True, log=self.log, verbose=self.verbose
+        )
+    def add_to_cache(self, key, value):
+        self.cache[key] = value
+    def get_cache(self):
+        return self.cache
+    def reset_cache(self):
+        self.cache = {}
+class CacheLoaderThread(CacheLoader):
+    '''
+    A class for loading a cache in a separate thread. It is used to load the cache in the background while the main process is running.
+    In theory, this should be the best and simplest approach to directly load the cache in the same process as the main process, without further
+    copying the cache to the main process. However, due to the GIL (Global Interpreter Lock) in Python, this approach is not efficient and
+    it slows down the main process.
+    '''
+    def __init__(self, base_path, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        super().__init__(base_path, ref_alt_freq=ref_alt_freq, category=category, filter_fn=filter_fn, n_cores=n_cores, log=log, verbose=verbose)
+        self.cache = {}
+        self.lock = threading.Lock()  # For thread-safe cache access
+        self.running = False
+        self.executor = None  # Thread pool executor
+        self.future = None  # Stores Future objects
+    def start_loading(self):
+        if self.running:
+            print("Cache loading is already running. If you want to restart, please stop the current process first.")
+            return
+        cache_path = self._get_cache_path()
+        if not cache_exists(cache_path, self.ref_alt_freq, self.category):
+            self.log.write("Cache does not exist. Start building (and loading) cache...", verbose=self.verbose)
+            self.build_cache() # this will also load the cache
+        else:
+            self.running = True
+            self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+            self.future = self.executor.submit(self.load_cache)
+    def load_cache(self):
+        cache_path = self._get_cache_path()
+        self.log.write(f'[Start loading cache from {cache_path}...]', verbose=self.verbose)
+        self.cache = load_h5py_cache(cache_path, ref_alt_freq=self.ref_alt_freq, category=self.category)
+        self.log.write('[Finshed loading cache.]', verbose=self.verbose)
+        self.future.cancel()
+        self.executor.shutdown(wait=False)
+        self.executor = None
+        self.future = None
+        self.running = False
+    def get_cache(self):
+        if self.future is not None:
+            self.future.result()  # Ensure loading is finished before accessing the cache
+        return self.cache
+def _load_cache_process(path, ref_alt_freq, category, cache):
+    #start = time.time()
+    local_cache = load_h5py_cache(path, ref_alt_freq=ref_alt_freq, category=category)
+    #print(f" ********* DONE LOADING local in {time.time() - start} seconds *********")
+    #start = time.time()
+    cache.update(local_cache)
+    #print(f" ********* DONE COPYING shared in {time.time() - start} seconds *********")
+    del local_cache
+class CacheLoaderProcess(CacheLoader):
+    '''
+    A class for loading a cache in a separate process. It is used to load the cache in the background while the main process is running.
+    Unlike CacheLoaderThread, this class is more efficient because it loads the cache in a separate process, which is not affected by the GIL.
+    However, a lot of memory and time is wasted in copying the cache from the subprocess to the main process.
+    '''
+    def __init__(self, base_path, ref_alt_freq=None, category='all', filter_fn=None, n_cores=1, log=Log(), verbose=True):
+        super().__init__(base_path, ref_alt_freq=ref_alt_freq, category=category, filter_fn=filter_fn, n_cores=n_cores, log=log, verbose=verbose)
+        self.manager = mp.Manager()
+        self.cache = self.manager.dict()
+        self.running = False
+        self.process = None
+    def start_loading(self):
+        if self.running:
+            print("Cache loading is already running. If you want to restart, please stop the current process first.")
+            return
+        cache_path = self._get_cache_path()
+        if not cache_exists(cache_path, self.ref_alt_freq, self.category):
+            self.log.write("Cache does not exist. Start building (and loading) cache...", verbose=self.verbose)
+            self.build_cache() # this will also load the cache
+        else:
+            self.running = True
+            self.process = mp.Process(target=_load_cache_process, args=(cache_path, self.ref_alt_freq, self.filter_fn, self.cache))
+            self.process.start()
+    def get_cache(self):
+        if self.running:
+            self.process.join()  # Wait for cache loading process to finish
+            self.running = False
+        return self.cache

gwaslab/g_version.py CHANGED Viewed

@@ -15,8 +15,8 @@ def _get_version():
 def gwaslab_info():
     # version meta information
     dic={
-   "version":"3.4.42",
-   "release_date":"20240328"
+   "version":"3.4.43",
+   "release_date":"20240403"
     }
     return dic

gwaslab/hm_harmonize_sumstats.py CHANGED Viewed

@@ -24,6 +24,7 @@ from gwaslab.bd_common_data import get_chr_to_number
 from gwaslab.bd_common_data import _maketrans
 from gwaslab.g_vchange_status import vchange_status
 from gwaslab.g_version import _get_version
+from gwaslab.cache_manager import CacheManager, PALINDROMIC_INDEL, NON_PALINDROMIC
 #rsidtochrpos
 #checkref
@@ -912,6 +913,56 @@ def check_strand_status(chr,start,end,ref,alt,eaf,vcf_reader,alt_freq,status,chr
                 return status_pre+"5"+status_end
     return status_pre+"8"+status_end
+def check_strand_status_cache(data,cache,ref_infer=None,ref_alt_freq=None,chr_dict=get_number_to_chr(),trust_cache=True,log=Log(),verbose=True):
+    if not trust_cache:
+        assert ref_infer is not None, "If trust_cache is False, ref_infer must be provided"
+        log.warning("You are not trusting the cache, this will slow down the process. Please consider building a complete cache.")
+    if ref_infer is not None and not trust_cache:
+        vcf_reader = VariantFile(ref_infer)
+    if isinstance(data, pd.DataFrame):
+        data = data.values
+    in_cache = 0
+    new_statuses = []
+    for i in range(data.shape[0]):
+        _chrom, pos, ref, alt, eaf, status = data[i]
+        chrom = _chrom
+        start = pos - 1
+        end = pos
+        if chr_dict is not None: chrom=chr_dict[chrom]
+        status_pre=status[:6]
+        status_end=""
+        new_status = status_pre+"8"+status_end # default value
+        cache_key = f"{chrom}:{pos}:{ref}:{alt}"
+        if cache_key in cache:
+            in_cache += 1
+            record = cache[cache_key]
+            if record is None:
+                new_status = status_pre+"8"+status_end
+            else:
+                if (record<0.5) and (eaf<0.5):
+                    new_status = status_pre+"1"+status_end
+                elif (record>0.5) and (eaf>0.5):
+                    new_status = status_pre+"1"+status_end
+                else:
+                    new_status = status_pre+"5"+status_end
+        else:
+            if not trust_cache:
+                # If we don't trust the cache as a not complete cache, we should perform the check reading from the VCF file
+                new_status = check_strand_status(_chrom, start, end, ref, alt, eaf, vcf_reader, ref_alt_freq, status, chr_dict)
+        new_statuses.append(new_status)
+    log.write(f"  -Elements in cache: {in_cache}", verbose=verbose)
+    return new_statuses
 def check_unkonwn_indel(chr,start,end,ref,alt,eaf,vcf_reader,alt_freq,status,chr_dict=get_number_to_chr(),daf_tolerance=0.2):
     ### input : unknown indel, both on genome (xx1[45]x)
@@ -939,6 +990,65 @@ def check_unkonwn_indel(chr,start,end,ref,alt,eaf,vcf_reader,alt_freq,status,chr
     return status_pre+"8"+status_end
+def check_unkonwn_indel_cache(data,cache,ref_infer=None,ref_alt_freq=None,chr_dict=get_number_to_chr(),daf_tolerance=0.2,trust_cache=True,log=Log(),verbose=True):
+    if not trust_cache:
+        assert ref_infer is not None, "If trust_cache is False, ref_infer must be provided"
+        log.warning("You are not trusting the cache, this will slow down the process. Please consider building a complete cache.")
+    if ref_infer is not None:
+        vcf_reader = VariantFile(ref_infer)
+    if isinstance(data, pd.DataFrame):
+        data = data.values
+    in_cache = 0
+    new_statuses = []
+    for i in range(data.shape[0]):
+        _chrom, pos, ref, alt, eaf, status = data[i]
+        chrom = _chrom
+        if chr_dict is not None: chrom=chr_dict[chrom]
+        start = pos - 1
+        end = pos
+        status_pre=status[:6]
+        status_end=""
+        new_status = status_pre+"8"+status_end # default value
+        cache_key_ref_alt = f"{chrom}:{pos}:{ref}:{alt}"
+        cache_key_alt_ref = f"{chrom}:{pos}:{alt}:{ref}"
+        if cache_key_ref_alt in cache:
+            in_cache += 1
+            record = cache[cache_key_ref_alt]
+            if record is None:
+                new_status = status_pre+"8"+status_end
+            else:
+                if  abs(record - eaf)<daf_tolerance:
+                    new_status = status_pre+"3"+status_end
+        elif cache_key_alt_ref in cache:
+            in_cache += 1
+            record = cache[cache_key_alt_ref]
+            if record is None:
+                new_status = status_pre+"8"+status_end
+            else:
+                if  abs(record - (1 - eaf))<daf_tolerance:
+                    new_status = status_pre+"6"+status_end
+        else:
+            if not trust_cache:
+                # If we don't trust the cache as a not complete cache, we should perform the check reading from the VCF file
+                new_status = check_unkonwn_indel(_chrom, start, end, ref, alt, eaf, vcf_reader, ref_alt_freq, status, chr_dict, daf_tolerance)
+        new_statuses.append(new_status)
+    log.write(f"  -Elements in cache: {in_cache}", verbose=verbose)
+    return new_statuses
 def get_reverse_complementary_allele(a):
     dic = str.maketrans({
@@ -963,16 +1073,40 @@ def check_strand(sumstats,ref_infer,ref_alt_freq=None,chr="CHR",pos="POS",ref="N
     status_part = sumstats.apply(lambda x:check_strand_status(x.iloc[0],x.iloc[1]-1,x.iloc[1],x.iloc[2],x.iloc[3],x.iloc[4],vcf_reader,ref_alt_freq,x.iloc[5],chr_dict),axis=1)
     return status_part
+def check_strand_cache(sumstats,cache,ref_infer,ref_alt_freq=None,chr_dict=get_number_to_chr(),trust_cache=True,log=Log(),verbose=True):
+    assert cache is not None, "Cache must be provided"
+    status_part = check_strand_status_cache(sumstats,cache,ref_infer,ref_alt_freq,chr_dict,trust_cache,log,verbose)
+    return status_part
 def check_indel(sumstats,ref_infer,ref_alt_freq=None,chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",chr_dict=get_number_to_chr(),status="STATUS",daf_tolerance=0.2):
     vcf_reader = VariantFile(ref_infer)
     status_part = sumstats.apply(lambda x:check_unkonwn_indel(x.iloc[0],x.iloc[1]-1,x.iloc[1],x.iloc[2],x.iloc[3],x.iloc[4],vcf_reader,ref_alt_freq,x.iloc[5],chr_dict,daf_tolerance),axis=1)
     return status_part
+def check_indel_cache(sumstats,cache,ref_infer,ref_alt_freq=None,chr_dict=get_number_to_chr(),daf_tolerance=0.2,trust_cache=True,log=Log(),verbose=True):
+    assert cache is not None, "Cache must be provided"
+    status_part = check_unkonwn_indel_cache(sumstats,cache,ref_infer,ref_alt_freq,chr_dict,daf_tolerance,trust_cache,log,verbose)
+    return status_part
 ##################################################################################################################################################
 def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,daf_tolerance=0.20,remove_snp="",mode="pi",n_cores=1,remove_indel="",
                        chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",status="STATUS",
-                       chr_dict=None,verbose=True,log=Log()):
+                       chr_dict=None,cache_options={},verbose=True,log=Log()):
+    '''
+    Args:
+    cache_options : A dictionary with the following keys:
+        - cache_manager: CacheManager object or None. If any between cache_loader and cache_process is not None, or use_cache is True, a CacheManager object will be created automatically.
+        - trust_cache: bool (optional, default: True). Whether to completely trust the cache or not. Trusting the cache means that any key not found inside the cache will be considered as a missing value even in the VCF file.
+        - cache_loader: Object with a get_cache() method or None.
+        - cache_process: Object with an apply_fn() method or None.
+        - use_cache: bool (optional, default: False). If any of the cache_manager, cache_loader or cache_process is not None, this will be set to True automatically.
+                     If set to True and all between cache_manager, cache_loader and cache_process are None, the cache will be loaded (or built) on the spot.
+        The usefulness of a cache_loader or cache_process object is to pass a custom object which already has the cache loaded. This can be useful if the cache is loaded in background in another thread/process while other operations are performed.
+        The cache_manager is a CacheManager object is used to expose the API to interact with the cache.
+    '''
     ##start function with col checking##########################################################
     _start_line = "infer strand for palindromic SNPs/align indistinguishable indels"
     _end_line = "inferring strand for palindromic SNPs/align indistinguishable indels"
@@ -995,6 +1129,16 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
     chr_dict = auto_check_vcf_chr_dict(ref_infer, chr_dict, verbose, log)
+    # Setup cache variables
+    cache_manager = cache_options.get("cache_manager", None)
+    if cache_manager is not None:
+        assert isinstance(cache_manager, CacheManager), "cache_manager must be a CacheManager object"
+    trust_cache = cache_options.get("trust_cache", True)
+    cache_loader = cache_options.get("cache_loader", None)
+    cache_process = cache_options.get("cache_process", None)
+    use_cache = any(c is not None for c in [cache_manager, cache_loader, cache_process]) or cache_options.get('use_cache', False)
+    _n_cores = n_cores # backup n_cores
     log.write(" -Field for alternative allele frequency in VCF INFO: {}".format(ref_alt_freq), verbose=verbose)
     if "p" in mode:
@@ -1022,16 +1166,30 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
         #########################################################################################
         if sum(unknow_palindromic_to_check)>0:
             if sum(unknow_palindromic_to_check)<10000:
-                n_cores=1
-            #df_split = np.array_split(sumstats.loc[unknow_palindromic_to_check,[chr,pos,ref,alt,eaf,status]], n_cores)
-            df_split = _df_split(sumstats.loc[unknow_palindromic_to_check,[chr,pos,ref,alt,eaf,status]], n_cores)
-            pool = Pool(n_cores)
-            map_func = partial(check_strand,chr=chr,pos=pos,ref=ref,alt=alt,eaf=eaf,status=status,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict)
-            status_inferred = pd.concat(pool.map(map_func,df_split))
-            sumstats.loc[unknow_palindromic_to_check,status] = status_inferred.values
-            pool.close()
-            pool.join()
+                n_cores=1
+            if use_cache and cache_manager is None:
+                cache_manager = CacheManager(base_path=ref_infer, cache_loader=cache_loader, cache_process=cache_process,
+                                             ref_alt_freq=ref_alt_freq, category=PALINDROMIC_INDEL,
+                                             n_cores=_n_cores, log=log, verbose=verbose)
+            log.write(" -Starting strand inference for palindromic SNPs...",verbose=verbose)
+            df_to_check = sumstats.loc[unknow_palindromic_to_check,[chr,pos,ref,alt,eaf,status]]
+            if use_cache and cache_manager.cache_len > 0:
+                log.write("  -Using cache for strand inference",verbose=verbose)
+                status_inferred = cache_manager.apply_fn(check_strand_cache, sumstats=df_to_check, ref_infer=ref_infer, ref_alt_freq=ref_alt_freq, chr_dict=chr_dict, trust_cache=trust_cache, log=log, verbose=verbose)
+                sumstats.loc[unknow_palindromic_to_check,status] = status_inferred
+            else:
+                #df_split = np.array_split(df_to_check, n_cores)
+                df_split = _df_split(df_to_check, n_cores)
+                pool = Pool(n_cores)
+                map_func = partial(check_strand,chr=chr,pos=pos,ref=ref,alt=alt,eaf=eaf,status=status,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict)
+                status_inferred = pd.concat(pool.map(map_func,df_split))
+                sumstats.loc[unknow_palindromic_to_check,status] = status_inferred.values
+                pool.close()
+                pool.join()
+            log.write(" -Finished strand inference.",verbose=verbose)
         else:
             log.warning("No palindromic variants available for checking.")
         #########################################################################################
@@ -1082,15 +1240,30 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
             if sum(unknow_indel)>0:
                 if sum(unknow_indel)<10000:
-                    n_cores=1
-                #df_split = np.array_split(sumstats.loc[unknow_indel, [chr,pos,ref,alt,eaf,status]], n_cores)
-                df_split = _df_split(sumstats.loc[unknow_indel, [chr,pos,ref,alt,eaf,status]], n_cores)
-                pool = Pool(n_cores)
-                map_func = partial(check_indel,chr=chr,pos=pos,ref=ref,alt=alt,eaf=eaf,status=status,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict,daf_tolerance=daf_tolerance)
-                status_inferred = pd.concat(pool.map(map_func,df_split))
-                sumstats.loc[unknow_indel,status] = status_inferred.values
-                pool.close()
-                pool.join()
+                    n_cores=1
+                if use_cache and cache_manager is None:
+                    cache_manager = CacheManager(base_path=ref_infer, cache_loader=cache_loader, cache_process=cache_process,
+                                                ref_alt_freq=ref_alt_freq, category=PALINDROMIC_INDEL,
+                                                n_cores=_n_cores, log=log, verbose=verbose)
+                log.write(" -Starting indistinguishable indel inference...",verbose=verbose)
+                df_to_check = sumstats.loc[unknow_indel,[chr,pos,ref,alt,eaf,status]]
+                if use_cache and cache_manager.cache_len > 0:
+                    log.write("  -Using cache for indel inference",verbose=verbose)
+                    status_inferred = cache_manager.apply_fn(check_indel_cache, sumstats=df_to_check, ref_infer=ref_infer, ref_alt_freq=ref_alt_freq, chr_dict=chr_dict, daf_tolerance=daf_tolerance, trust_cache=trust_cache, log=log, verbose=verbose)
+                    sumstats.loc[unknow_indel,status] = status_inferred
+                else:
+                    #df_split = np.array_split(sumstats.loc[unknow_indel, [chr,pos,ref,alt,eaf,status]], n_cores)
+                    df_split = _df_split(sumstats.loc[unknow_indel, [chr,pos,ref,alt,eaf,status]], n_cores)
+                    pool = Pool(n_cores)
+                    map_func = partial(check_indel,chr=chr,pos=pos,ref=ref,alt=alt,eaf=eaf,status=status,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict,daf_tolerance=daf_tolerance)
+                    status_inferred = pd.concat(pool.map(map_func,df_split))
+                    sumstats.loc[unknow_indel,status] = status_inferred.values
+                    pool.close()
+                    pool.join()
+                log.write(" -Finished indistinguishable indel inference.",verbose=verbose)
             #########################################################################################

gwaslab/qc_fix_sumstats.py CHANGED Viewed

@@ -1611,12 +1611,5 @@ def check_col(df_col_names, verbose=True, log=Log(), cols=None, function=None):
 ###############################################################################################################
 def _df_split(dataframe, n):
-    chunks = []
-    chunk_size = int(dataframe.shape[0] // n)+1
-    for index in range(0, dataframe.shape[0], chunk_size):
-        chunks.append(
-            dataframe.iloc[index:index + chunk_size]
-        )
-    return chunks
+    k, m = divmod(len(dataframe), n)
+    return [dataframe.iloc[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)]

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -1031,17 +1031,18 @@ def mqqplot(insumstats,
 def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
-    xmin, xmax = ax1.get_xlim()
-    if xpad is not None:
-        pad = xpad* sumstats["i"].max()
-        ax1.set_xlim([xmin - pad, xmin + pad])
-    if xpadl is not None:
-        pad = xpadl* sumstats["i"].max()
-        ax1.set_xlim([xmin - pad,xmax])
-    if xpadr is not None:
-        pad = xpadr* sumstats["i"].max()
-        ax1.set_xlim([xmin, xmax + pad])
+    if ax1 is not None:
+        xmin, xmax = ax1.get_xlim()
+        if xpad is not None:
+            pad = xpad* sumstats["i"].max()
+            ax1.set_xlim([xmin - pad, xmin + pad])
+        if xpadl is not None:
+            pad = xpadl* sumstats["i"].max()
+            ax1.set_xlim([xmin - pad,xmax])
+        if xpadr is not None:
+            pad = xpadr* sumstats["i"].max()
+            ax1.set_xlim([xmin, xmax + pad])
     return ax1

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gwaslab
-Version: 3.4.42
+Version: 3.4.43
 Summary: A collection of handy tools for GWAS SumStats
 Author-email: Yunye <yunye@gwaslab.com>
 Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -24,6 +24,7 @@ Requires-Dist: liftover >=1.1.13
 Requires-Dist: scikit-allel >=1.3.5
 Requires-Dist: pyensembl ==2.2.3
 Requires-Dist: gtfparse ==1.3.0
+Requires-Dist: h5py >=3.10.0
 # GWASLab
@@ -193,6 +194,7 @@ dependencies:
       - adjustText==0.8
       - pysam==0.19
       - pyensembl==2.2.3
+      - h5py==3.10.0
 ```
 ## How to cite

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/RECORD RENAMED Viewed

@@ -3,6 +3,7 @@ gwaslab/bd_common_data.py,sha256=v98X3tdRNOVE2gCiSHkfyBb0pSIjTk5IFG8A725Oj3o,126
 gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
 gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
 gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,3937
+gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
 gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
 gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gwaslab/g_Sumstats.py,sha256=GS0YUdvNYlwiR-mu6VJIv_JRqgBpHmTq9123XX5kiMI,35132
@@ -11,9 +12,9 @@ gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
 gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
 gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
 gwaslab/g_vchange_status.py,sha256=eX0jdIb6Spa07ZdpWNqUWqdVBWS0fuH2yrt4PDi3Res,1746
-gwaslab/g_version.py,sha256=sYIrfQwAxGSUHOGcP64nhQ71-Cgzax3Xs18GM0Os_9k,1818
+gwaslab/g_version.py,sha256=79WGi9pB-TL4T-lRgKtkq1p5WXZOYfBG5KdKplTJxfs,1818
 gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
-gwaslab/hm_harmonize_sumstats.py,sha256=n6aygO8V7MJaDgkNHozNzIsm_G8KcR70ukS-IOygw0E,67684
+gwaslab/hm_harmonize_sumstats.py,sha256=Lu3UkNK6S9imwOgjK1ZBZTu2gDSFEDjBbgSwSOGfzcI,76705
 gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
 gwaslab/io_preformat_input.py,sha256=w62JLAr16Ru0EgUtBCEV2eXRO89OqhidQxwf2IPAM38,20014
 gwaslab/io_read_ldsc.py,sha256=8S9n4imgl4d0WPms_GYld-6uUM5z7iWGiCA-M814kzY,12123
@@ -27,7 +28,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
 gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
 gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
 gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
-gwaslab/qc_fix_sumstats.py,sha256=Dp2HnVnqdO5aiXpLhnLsvL6XCKuC4Du2HJFEVIH2Ss0,87342
+gwaslab/qc_fix_sumstats.py,sha256=YtuADrWFhT1kdRp9CmhWF9IQkkXwN8SLnmbF9DIIZ-Y,87231
 gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gwaslab/util_ex_calculate_ldmatrix.py,sha256=LpE__LoYRHLgVKlCHo6lYWlz9LEUVUDqYPEAP-Svbm0,14598
 gwaslab/util_ex_calculate_prs.py,sha256=5l1eiZs8YwIpEgp7i3IurP8n5KwQM5awbG9fWSm4iT4,9053
@@ -58,7 +59,7 @@ gwaslab/viz_plot_compare_effect.py,sha256=8om3y6YQfnOk4FfkKSpKr2KqJcsMeCwQ6FRRKb
 gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
 gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
 gwaslab/viz_plot_miamiplot2.py,sha256=SWv82D8UBbREKsk8EoKth-2w68l6FbXyVLsb_E1hh8o,15882
-gwaslab/viz_plot_mqqplot.py,sha256=xIx-m8IP0GAAKuIoiAbzxl3fkUDEEunczo6dVEZ3KRY,61671
+gwaslab/viz_plot_mqqplot.py,sha256=PzRWnm11whxww7ut-bzFkj1sbPc_c0OP7yRpIgYo2iQ,61739
 gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
 gwaslab/viz_plot_regionalplot.py,sha256=PBIWkNj2fj-dRLKQJNpM8wor5jya2anqix0-UYLE0Is,37901
 gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
@@ -72,9 +73,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
 gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
 gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
 gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
-gwaslab-3.4.42.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-gwaslab-3.4.42.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
-gwaslab-3.4.42.dist-info/METADATA,sha256=iqArSw_x7yXovyF9D-z3gt5fzskQSOVObXPWCrYIcsg,7714
-gwaslab-3.4.42.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-gwaslab-3.4.42.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
-gwaslab-3.4.42.dist-info/RECORD,,
+gwaslab-3.4.43.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+gwaslab-3.4.43.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
+gwaslab-3.4.43.dist-info/METADATA,sha256=bziEH7fBqmzBIWDEZQUaa9w_DinQxI2SbjaatoN-jYw,7764
+gwaslab-3.4.43.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+gwaslab-3.4.43.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
+gwaslab-3.4.43.dist-info/RECORD,,

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/LICENSE RENAMED Viewed

File without changes

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/LICENSE_before_v3.4.39 RENAMED Viewed

File without changes

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/WHEEL RENAMED Viewed

File without changes

{gwaslab-3.4.42.dist-info → gwaslab-3.4.43.dist-info}/top_level.txt RENAMED Viewed

File without changes

gwaslab 3.4.42__py3-none-any.whl → 3.4.43__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.42py3-none-any.whl → 3.4.43py3-none-any.whl