PyPI - cpgtools - Versions diffs - 2.0.5__py3-none-any.whl - Mend

cpgtools 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

cpgmodule/BED.py +441 -0
cpgmodule/MI.py +193 -0
cpgmodule/__init__.py +0 -0
cpgmodule/_version.py +1 -0
cpgmodule/cgID.py +866897 -0
cpgmodule/data/AltumAge_cpg.pkl +0 -0
cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
cpgmodule/data/AltumAge_scaler.pkl +0 -0
cpgmodule/data/GA_Bohlin.pkl +0 -0
cpgmodule/data/GA_Haftorn.pkl +0 -0
cpgmodule/data/GA_Knight.pkl +0 -0
cpgmodule/data/GA_Lee_CPC.pkl +0 -0
cpgmodule/data/GA_Lee_RPC.pkl +0 -0
cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
cpgmodule/data/GA_Mayne.pkl +0 -0
cpgmodule/data/Hannum.pkl +0 -0
cpgmodule/data/Horvath_2013.pkl +0 -0
cpgmodule/data/Horvath_2018.pkl +0 -0
cpgmodule/data/Levine.pkl +0 -0
cpgmodule/data/Lu_DNAmTL.pkl +0 -0
cpgmodule/data/Ped_McEwen.pkl +0 -0
cpgmodule/data/Ped_Wu.pkl +0 -0
cpgmodule/data/Zhang_BLUP.pkl +0 -0
cpgmodule/data/Zhang_EN.pkl +0 -0
cpgmodule/data/__init__.py +0 -0
cpgmodule/extend_bed.py +147 -0
cpgmodule/imotif.py +348 -0
cpgmodule/ireader.py +28 -0
cpgmodule/methylClock.py +53 -0
cpgmodule/padjust.py +58 -0
cpgmodule/region2gene.py +170 -0
cpgmodule/utils.py +642 -0
cpgtools-2.0.5.data/scripts/CpG_aggregation.py +238 -0
cpgtools-2.0.5.data/scripts/CpG_anno_position.py +156 -0
cpgtools-2.0.5.data/scripts/CpG_anno_probe.py +112 -0
cpgtools-2.0.5.data/scripts/CpG_density_gene_centered.py +107 -0
cpgtools-2.0.5.data/scripts/CpG_distrb_chrom.py +154 -0
cpgtools-2.0.5.data/scripts/CpG_distrb_gene_centered.py +193 -0
cpgtools-2.0.5.data/scripts/CpG_distrb_region.py +146 -0
cpgtools-2.0.5.data/scripts/CpG_logo.py +134 -0
cpgtools-2.0.5.data/scripts/CpG_to_gene.py +141 -0
cpgtools-2.0.5.data/scripts/beta_PCA.py +188 -0
cpgtools-2.0.5.data/scripts/beta_UMAP.py +181 -0
cpgtools-2.0.5.data/scripts/beta_combat.py +174 -0
cpgtools-2.0.5.data/scripts/beta_jitter_plot.py +107 -0
cpgtools-2.0.5.data/scripts/beta_m_conversion.py +105 -0
cpgtools-2.0.5.data/scripts/beta_profile_gene_centered.py +165 -0
cpgtools-2.0.5.data/scripts/beta_profile_region.py +152 -0
cpgtools-2.0.5.data/scripts/beta_selectNBest.py +116 -0
cpgtools-2.0.5.data/scripts/beta_stacked_barplot.py +119 -0
cpgtools-2.0.5.data/scripts/beta_stats.py +101 -0
cpgtools-2.0.5.data/scripts/beta_tSNE.py +179 -0
cpgtools-2.0.5.data/scripts/beta_topN.py +99 -0
cpgtools-2.0.5.data/scripts/beta_trichotmize.py +190 -0
cpgtools-2.0.5.data/scripts/dmc_Bayes.py +442 -0
cpgtools-2.0.5.data/scripts/dmc_bb.py +221 -0
cpgtools-2.0.5.data/scripts/dmc_fisher.py +161 -0
cpgtools-2.0.5.data/scripts/dmc_glm.py +191 -0
cpgtools-2.0.5.data/scripts/dmc_logit.py +226 -0
cpgtools-2.0.5.data/scripts/dmc_nonparametric.py +176 -0
cpgtools-2.0.5.data/scripts/dmc_ttest.py +222 -0
cpgtools-2.0.5.data/scripts/predict_missing.py +673 -0
cpgtools-2.0.5.data/scripts/predict_sex.py +126 -0
cpgtools-2.0.5.dist-info/METADATA +59 -0
cpgtools-2.0.5.dist-info/RECORD +104 -0
cpgtools-2.0.5.dist-info/WHEEL +5 -0
cpgtools-2.0.5.dist-info/licenses/LICENSE.txt +19 -0
cpgtools-2.0.5.dist-info/top_level.txt +5 -0
impyute/__init__.py +3 -0
impyute/contrib/__init__.py +7 -0
impyute/contrib/compare.py +69 -0
impyute/contrib/count_missing.py +30 -0
impyute/contrib/describe.py +63 -0
impyute/cs/__init__.py +11 -0
impyute/cs/buck_iterative.py +82 -0
impyute/cs/central_tendency.py +84 -0
impyute/cs/em.py +52 -0
impyute/cs/fast_knn.py +130 -0
impyute/cs/random.py +27 -0
impyute/dataset/__init__.py +6 -0
impyute/dataset/base.py +137 -0
impyute/dataset/corrupt.py +55 -0
impyute/deletion/__init__.py +5 -0
impyute/deletion/complete_case.py +21 -0
impyute/ops/__init__.py +12 -0
impyute/ops/error.py +9 -0
impyute/ops/inverse_distance_weighting.py +31 -0
impyute/ops/matrix.py +47 -0
impyute/ops/testing.py +20 -0
impyute/ops/util.py +96 -0
impyute/ops/wrapper.py +179 -0
impyute/ts/__init__.py +6 -0
impyute/ts/locf.py +57 -0
impyute/ts/moving_window.py +128 -0
impyutelib.py +890 -0
missingpy/__init__.py +4 -0
missingpy/knnimpute.py +328 -0
missingpy/missforest.py +556 -0
missingpy/pairwise_external.py +315 -0
missingpy/tests/__init__.py +0 -0
missingpy/tests/test_knnimpute.py +605 -0
missingpy/tests/test_missforest.py +409 -0
missingpy/utils.py +124 -0
misspylib.py +565 -0

missingpy/pairwise_external.py ADDED Viewed

@@ -0,0 +1,315 @@
+# This file is a modification of sklearn.metrics.pairwise
+# Modifications by Ashim Bhattarai
+"""
+New BSD License
+Copyright (c) 2007–2018 The scikit-learn developers.
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+  a. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+  b. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  c. Neither the name of the Scikit-learn Developers  nor the names of
+     its contributors may be used to endorse or promote products
+     derived from this software without specific prior written
+     permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+"""
+from __future__ import division
+from functools import partial
+import itertools
+import numpy as np
+from scipy.spatial import distance
+from scipy.sparse import issparse
+from sklearn.metrics.pairwise import _VALID_METRICS, _return_float_dtype
+from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS
+from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
+from sklearn.metrics.pairwise import _parallel_pairwise
+from sklearn.utils import check_array
+from .utils import masked_euclidean_distances
+_MASKED_METRICS = ['masked_euclidean']
+_VALID_METRICS += ['masked_euclidean']
+def _get_mask(X, value_to_mask):
+    """Compute the boolean mask X == missing_values."""
+    if value_to_mask == "NaN" or np.isnan(value_to_mask):
+        return np.isnan(X)
+    else:
+        return X == value_to_mask
+def check_pairwise_arrays(X, Y, precomputed=False, dtype=None,
+                          accept_sparse='csr', force_all_finite=True,
+                          copy=False):
+    """ Set X and Y appropriately and checks inputs
+    If Y is None, it is set as a pointer to X (i.e. not a copy).
+    If Y is given, this does not happen.
+    All distance metrics should use this function first to assert that the
+    given parameters are correct and safe to use.
+    Specifically, this function first ensures that both X and Y are arrays,
+    then checks that they are at least two dimensional while ensuring that
+    their elements are floats (or dtype if provided). Finally, the function
+    checks that the size of the second dimension of the two arrays is equal, or
+    the equivalent check for a precomputed distance matrix.
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}, shape (n_samples_a, n_features)
+    Y : {array-like, sparse matrix}, shape (n_samples_b, n_features)
+    precomputed : bool
+        True if X is to be treated as precomputed distances to the samples in
+        Y.
+    dtype : string, type, list of types or None (default=None)
+        Data type required for X and Y. If None, the dtype will be an
+        appropriate float type selected by _return_float_dtype.
+        .. versionadded:: 0.18
+    accept_sparse : string, boolean or list/tuple of strings
+        String[s] representing allowed sparse matrix formats, such as 'csc',
+        'csr', etc. If the input is sparse but not in the allowed format,
+        it will be converted to the first listed format. True allows the input
+        to be any format. False means that a sparse matrix input will
+        raise an error.
+    force_all_finite : bool
+        Whether to raise an error on np.inf and np.nan in X (or Y if it exists)
+    copy : bool
+        Whether a forced copy will be triggered. If copy=False, a copy might
+        be triggered by a conversion.
+    Returns
+    -------
+    safe_X : {array-like, sparse matrix}, shape (n_samples_a, n_features)
+        An array equal to X, guaranteed to be a numpy array.
+    safe_Y : {array-like, sparse matrix}, shape (n_samples_b, n_features)
+        An array equal to Y if Y was not None, guaranteed to be a numpy array.
+        If Y was None, safe_Y will be a pointer to X.
+    """
+    X, Y, dtype_float = _return_float_dtype(X, Y)
+    warn_on_dtype = dtype is not None
+    estimator = 'check_pairwise_arrays'
+    if dtype is None:
+        dtype = dtype_float
+    if Y is X or Y is None:
+        X = Y = check_array(X, accept_sparse=accept_sparse, dtype=dtype,
+                            copy=copy, force_all_finite=force_all_finite,
+                            warn_on_dtype=warn_on_dtype, estimator=estimator)
+    else:
+        X = check_array(X, accept_sparse=accept_sparse, dtype=dtype,
+                        copy=copy, force_all_finite=force_all_finite,
+                        warn_on_dtype=warn_on_dtype, estimator=estimator)
+        Y = check_array(Y, accept_sparse=accept_sparse, dtype=dtype,
+                        copy=copy, force_all_finite=force_all_finite,
+                        warn_on_dtype=warn_on_dtype, estimator=estimator)
+    if precomputed:
+        if X.shape[1] != Y.shape[0]:
+            raise ValueError("Precomputed metric requires shape "
+                             "(n_queries, n_indexed). Got (%d, %d) "
+                             "for %d indexed." %
+                             (X.shape[0], X.shape[1], Y.shape[0]))
+    elif X.shape[1] != Y.shape[1]:
+        raise ValueError("Incompatible dimension for X and Y matrices: "
+                         "X.shape[1] == %d while Y.shape[1] == %d" % (
+                             X.shape[1], Y.shape[1]))
+    return X, Y
+def _pairwise_callable(X, Y, metric, **kwds):
+    """Handle the callable case for pairwise_{distances,kernels}
+    """
+    force_all_finite = False if callable(metric) else True
+    X, Y = check_pairwise_arrays(X, Y, force_all_finite=force_all_finite)
+    if X is Y:
+        # Only calculate metric for upper triangle
+        out = np.zeros((X.shape[0], Y.shape[0]), dtype='float')
+        iterator = itertools.combinations(range(X.shape[0]), 2)
+        for i, j in iterator:
+            out[i, j] = metric(X[i], Y[j], **kwds)
+        # Make symmetric
+        # NB: out += out.T will produce incorrect results
+        out = out + out.T
+        # Calculate diagonal
+        # NB: nonzero diagonals are allowed for both metrics and kernels
+        for i in range(X.shape[0]):
+            x = X[i]
+            out[i, i] = metric(x, x, **kwds)
+    else:
+        # Calculate all cells
+        out = np.empty((X.shape[0], Y.shape[0]), dtype='float')
+        iterator = itertools.product(range(X.shape[0]), range(Y.shape[0]))
+        for i, j in iterator:
+            out[i, j] = metric(X[i], Y[j], **kwds)
+    return out
+# Helper functions - distance
+PAIRWISE_DISTANCE_FUNCTIONS['masked_euclidean'] = masked_euclidean_distances
+def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
+    """ Compute the distance matrix from a vector array X and optional Y.
+    This method takes either a vector array or a distance matrix, and returns
+    a distance matrix. If the input is a vector array, the distances are
+    computed. If the input is a distances matrix, it is returned instead.
+    This method provides a safe way to take a distance matrix as input, while
+    preserving compatibility with many other algorithms that take a vector
+    array.
+    If Y is given (default is None), then the returned matrix is the pairwise
+    distance between the arrays from both X and Y.
+    Valid values for metric are:
+    - From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+      'manhattan']. These metrics support sparse matrix
+      inputs.
+      Also, ['masked_euclidean'] but it does not yet support sparse matrices.
+    - From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',
+      'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+      'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']
+      See the documentation for scipy.spatial.distance for details on these
+      metrics. These metrics do not support sparse matrix inputs.
+    Note that in the case of 'cityblock', 'cosine' and 'euclidean' (which are
+    valid scipy.spatial.distance metrics), the scikit-learn implementation
+    will be used, which is faster and has support for sparse matrices (except
+    for 'cityblock'). For a verbose description of the metrics from
+    scikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics
+    function.
+    Read more in the :ref:`User Guide <metrics>`.
+    Parameters
+    ----------
+    X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
+             [n_samples_a, n_features] otherwise
+        Array of pairwise distances between samples, or a feature array.
+    Y : array [n_samples_b, n_features], optional
+        An optional second feature array. Only allowed if
+        metric != "precomputed".
+    metric : string, or callable
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string, it must be one of the options
+        allowed by scipy.spatial.distance.pdist for its metric parameter, or
+        a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.
+        If metric is "precomputed", X is assumed to be a distance matrix.
+        Alternatively, if metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays from X as input and return a value indicating
+        the distance between them.
+    n_jobs : int
+        The number of jobs to use for the computation. This works by breaking
+        down the pairwise matrix into n_jobs even slices and computing them in
+        parallel.
+        If -1 all CPUs are used. If 1 is given, no parallel computing code is
+        used at all, which is useful for debugging. For n_jobs below -1,
+        (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
+        are used.
+    **kwds : optional keyword parameters
+        Any further parameters are passed directly to the distance function.
+        If using a scipy.spatial.distance metric, the parameters are still
+        metric dependent. See the scipy docs for usage examples.
+    Returns
+    -------
+    D : array [n_samples_a, n_samples_a] or [n_samples_a, n_samples_b]
+        A distance matrix D such that D_{i, j} is the distance between the
+        ith and jth vectors of the given matrix X, if Y is None.
+        If Y is not None, then D_{i, j} is the distance between the ith array
+        from X and the jth array from Y.
+    See also
+    --------
+    pairwise_distances_chunked : performs the same calculation as this funtion,
+        but returns a generator of chunks of the distance matrix, in order to
+        limit memory usage.
+    paired_distances : Computes the distances between corresponding
+                       elements of two arrays
+    """
+    if (metric not in _VALID_METRICS and
+            not callable(metric) and metric != "precomputed"):
+        raise ValueError("Unknown metric %s. "
+                         "Valid metrics are %s, or 'precomputed', or a "
+                         "callable" % (metric, _VALID_METRICS))
+    if metric in _MASKED_METRICS or callable(metric):
+        missing_values = kwds.get("missing_values") if kwds.get(
+            "missing_values") is not None else np.nan
+        if np.all(_get_mask(X.data if issparse(X) else X, missing_values)):
+            raise ValueError(
+                "One or more samples(s) only have missing values.")
+    if metric == "precomputed":
+        X, _ = check_pairwise_arrays(X, Y, precomputed=True)
+        return X
+    elif metric in PAIRWISE_DISTANCE_FUNCTIONS:
+        func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
+    elif callable(metric):
+        func = partial(_pairwise_callable, metric=metric, **kwds)
+    else:
+        if issparse(X) or issparse(Y):
+            raise TypeError("scipy distance metrics do not"
+                            " support sparse matrices.")
+        dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None
+        X, Y = check_pairwise_arrays(X, Y, dtype=dtype)
+        if n_jobs == 1 and X is Y:
+            return distance.squareform(distance.pdist(X, metric=metric,
+                                                      **kwds))
+        func = partial(distance.cdist, metric=metric, **kwds)
+    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)

missingpy/tests/__init__.py ADDED Viewed

File without changes