risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +74 -47
- risk/annotations/io.py +47 -31
- risk/log/__init__.py +4 -2
- risk/log/{config.py → console.py} +5 -3
- risk/log/{params.py → parameters.py} +17 -42
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +255 -77
- risk/neighborhoods/domains.py +62 -31
- risk/neighborhoods/neighborhoods.py +156 -160
- risk/network/__init__.py +1 -3
- risk/network/geometry.py +65 -57
- risk/network/graph/__init__.py +6 -0
- risk/network/graph/api.py +194 -0
- risk/network/{graph.py → graph/network.py} +87 -37
- risk/network/graph/summary.py +254 -0
- risk/network/io.py +56 -47
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +7 -4
- risk/network/{plot → plotter}/contour.py +22 -19
- risk/network/{plot → plotter}/labels.py +69 -74
- risk/network/{plot → plotter}/network.py +170 -34
- risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
- risk/network/{plot → plotter}/utils/layout.py +8 -5
- risk/risk.py +11 -500
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +27 -17
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +44 -38
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +15 -9
- risk/stats/stats.py +15 -13
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
- risk_network-0.0.9b26.dist-info/RECORD +44 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/plotter.py +0 -137
- risk_network-0.0.8b26.dist-info/RECORD +0 -37
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
    
        risk/stats/hypergeom.py
    CHANGED
    
    | @@ -6,44 +6,54 @@ risk/stats/hypergeom | |
| 6 6 | 
             
            from typing import Any, Dict
         | 
| 7 7 |  | 
| 8 8 | 
             
            import numpy as np
         | 
| 9 | 
            +
            from scipy.sparse import csr_matrix
         | 
| 9 10 | 
             
            from scipy.stats import hypergeom
         | 
| 10 11 |  | 
| 11 12 |  | 
| 12 13 | 
             
            def compute_hypergeom_test(
         | 
| 13 | 
            -
                neighborhoods:  | 
| 14 | 
            +
                neighborhoods: csr_matrix,
         | 
| 15 | 
            +
                annotations: csr_matrix,
         | 
| 16 | 
            +
                null_distribution: str = "network",
         | 
| 14 17 | 
             
            ) -> Dict[str, Any]:
         | 
| 15 | 
            -
                """ | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
         | 
| 16 20 |  | 
| 17 21 | 
             
                Args:
         | 
| 18 | 
            -
                    neighborhoods ( | 
| 19 | 
            -
                    annotations ( | 
| 22 | 
            +
                    neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 23 | 
            +
                    annotations (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 20 24 | 
             
                    null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         | 
| 21 25 |  | 
| 22 26 | 
             
                Returns:
         | 
| 23 27 | 
             
                    Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
         | 
| 24 28 | 
             
                """
         | 
| 25 29 | 
             
                # Get the total number of nodes in the network
         | 
| 26 | 
            -
                 | 
| 30 | 
            +
                total_nodes = neighborhoods.shape[1]
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Compute sums
         | 
| 33 | 
            +
                neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Convert to dense array
         | 
| 34 | 
            +
                annotation_sums = annotations.sum(axis=0).A.flatten()  # Convert to dense array
         | 
| 27 35 |  | 
| 28 36 | 
             
                if null_distribution == "network":
         | 
| 29 | 
            -
                     | 
| 30 | 
            -
                    background_population = total_node_count
         | 
| 31 | 
            -
                    neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
         | 
| 32 | 
            -
                    annotation_sums = np.sum(annotations, axis=0, keepdims=True)
         | 
| 37 | 
            +
                    background_population = total_nodes
         | 
| 33 38 | 
             
                elif null_distribution == "annotations":
         | 
| 34 | 
            -
                     | 
| 35 | 
            -
                     | 
| 36 | 
            -
                     | 
| 37 | 
            -
                     | 
| 38 | 
            -
                    annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
         | 
| 39 | 
            +
                    annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Boolean mask
         | 
| 40 | 
            +
                    background_population = annotated_nodes.sum()
         | 
| 41 | 
            +
                    neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
         | 
| 42 | 
            +
                    annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
         | 
| 39 43 | 
             
                else:
         | 
| 40 44 | 
             
                    raise ValueError(
         | 
| 41 45 | 
             
                        "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         | 
| 42 46 | 
             
                    )
         | 
| 43 47 |  | 
| 44 | 
            -
                #  | 
| 45 | 
            -
                annotated_in_neighborhood = neighborhoods.T @ annotations
         | 
| 46 | 
            -
                 | 
| 48 | 
            +
                # Observed counts
         | 
| 49 | 
            +
                annotated_in_neighborhood = neighborhoods.T @ annotations  # Sparse result
         | 
| 50 | 
            +
                annotated_in_neighborhood = annotated_in_neighborhood.toarray()  # Convert to dense
         | 
| 51 | 
            +
                # Align shapes for broadcasting
         | 
| 52 | 
            +
                neighborhood_sums = neighborhood_sums.reshape(-1, 1)
         | 
| 53 | 
            +
                annotation_sums = annotation_sums.reshape(1, -1)
         | 
| 54 | 
            +
                background_population = np.array(background_population).reshape(1, 1)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                # Compute hypergeometric p-values
         | 
| 47 57 | 
             
                depletion_pvals = hypergeom.cdf(
         | 
| 48 58 | 
             
                    annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
         | 
| 49 59 | 
             
                )
         | 
| @@ -5,18 +5,19 @@ risk/stats/permutation/permutation | |
| 5 5 |  | 
| 6 6 | 
             
            from multiprocessing import get_context, Manager
         | 
| 7 7 | 
             
            from multiprocessing.managers import ValueProxy
         | 
| 8 | 
            -
            from  | 
| 9 | 
            -
            from typing import Any, Callable, Dict
         | 
| 8 | 
            +
            from typing import Any, Callable, Dict, List, Tuple, Union
         | 
| 10 9 |  | 
| 11 10 | 
             
            import numpy as np
         | 
| 11 | 
            +
            from scipy.sparse import csr_matrix
         | 
| 12 12 | 
             
            from threadpoolctl import threadpool_limits
         | 
| 13 | 
            +
            from tqdm import tqdm
         | 
| 13 14 |  | 
| 14 15 | 
             
            from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
         | 
| 15 16 |  | 
| 16 17 |  | 
| 17 18 | 
             
            def compute_permutation_test(
         | 
| 18 | 
            -
                neighborhoods:  | 
| 19 | 
            -
                annotations:  | 
| 19 | 
            +
                neighborhoods: csr_matrix,
         | 
| 20 | 
            +
                annotations: csr_matrix,
         | 
| 20 21 | 
             
                score_metric: str = "sum",
         | 
| 21 22 | 
             
                null_distribution: str = "network",
         | 
| 22 23 | 
             
                num_permutations: int = 1000,
         | 
| @@ -26,9 +27,9 @@ def compute_permutation_test( | |
| 26 27 | 
             
                """Compute permutation test for enrichment and depletion in neighborhoods.
         | 
| 27 28 |  | 
| 28 29 | 
             
                Args:
         | 
| 29 | 
            -
                    neighborhoods ( | 
| 30 | 
            -
                    annotations ( | 
| 31 | 
            -
                    score_metric (str, optional): Metric to use for scoring ('sum' | 
| 30 | 
            +
                    neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 31 | 
            +
                    annotations (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 32 | 
            +
                    score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
         | 
| 32 33 | 
             
                    null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         | 
| 33 34 | 
             
                    num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
         | 
| 34 35 | 
             
                    random_seed (int, optional): Seed for random number generation. Defaults to 888.
         | 
| @@ -38,6 +39,7 @@ def compute_permutation_test( | |
| 38 39 | 
             
                    Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
         | 
| 39 40 | 
             
                """
         | 
| 40 41 | 
             
                # Ensure that the matrices are in the correct format and free of NaN values
         | 
| 42 | 
            +
                # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
         | 
| 41 43 | 
             
                neighborhoods = neighborhoods.astype(np.float32)
         | 
| 42 44 | 
             
                annotations = annotations.astype(np.float32)
         | 
| 43 45 | 
             
                # Retrieve the appropriate neighborhood score function based on the metric
         | 
| @@ -65,19 +67,19 @@ def compute_permutation_test( | |
| 65 67 |  | 
| 66 68 |  | 
| 67 69 | 
             
            def _run_permutation_test(
         | 
| 68 | 
            -
                neighborhoods:  | 
| 69 | 
            -
                annotations:  | 
| 70 | 
            +
                neighborhoods: csr_matrix,
         | 
| 71 | 
            +
                annotations: csr_matrix,
         | 
| 70 72 | 
             
                neighborhood_score_func: Callable,
         | 
| 71 73 | 
             
                null_distribution: str = "network",
         | 
| 72 74 | 
             
                num_permutations: int = 1000,
         | 
| 73 75 | 
             
                random_seed: int = 888,
         | 
| 74 76 | 
             
                max_workers: int = 4,
         | 
| 75 77 | 
             
            ) -> tuple:
         | 
| 76 | 
            -
                """Run  | 
| 78 | 
            +
                """Run the permutation test to calculate depletion and enrichment counts.
         | 
| 77 79 |  | 
| 78 80 | 
             
                Args:
         | 
| 79 | 
            -
                    neighborhoods ( | 
| 80 | 
            -
                    annotations ( | 
| 81 | 
            +
                    neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 82 | 
            +
                    annotations (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 81 83 | 
             
                    neighborhood_score_func (Callable): Function to calculate neighborhood scores.
         | 
| 82 84 | 
             
                    null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         | 
| 83 85 | 
             
                    num_permutations (int, optional): Number of permutations. Defaults to 1000.
         | 
| @@ -93,14 +95,14 @@ def _run_permutation_test( | |
| 93 95 | 
             
                if null_distribution == "network":
         | 
| 94 96 | 
             
                    idxs = range(annotations.shape[0])
         | 
| 95 97 | 
             
                elif null_distribution == "annotations":
         | 
| 96 | 
            -
                    idxs = np.nonzero( | 
| 98 | 
            +
                    idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
         | 
| 97 99 | 
             
                else:
         | 
| 98 100 | 
             
                    raise ValueError(
         | 
| 99 101 | 
             
                        "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         | 
| 100 102 | 
             
                    )
         | 
| 101 103 |  | 
| 102 | 
            -
                # Replace NaNs with zeros in the annotations matrix
         | 
| 103 | 
            -
                annotations[np.isnan(annotations)] = 0
         | 
| 104 | 
            +
                # Replace NaNs with zeros in the sparse annotations matrix
         | 
| 105 | 
            +
                annotations.data[np.isnan(annotations.data)] = 0
         | 
| 104 106 | 
             
                annotation_matrix_obsv = annotations[idxs]
         | 
| 105 107 | 
             
                neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
         | 
| 106 108 | 
             
                # Calculate observed neighborhood scores
         | 
| @@ -121,28 +123,35 @@ def _run_permutation_test( | |
| 121 123 | 
             
                manager = Manager()
         | 
| 122 124 | 
             
                progress_counter = manager.Value("i", 0)
         | 
| 123 125 | 
             
                total_progress = num_permutations
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                # Generate precomputed permutations
         | 
| 128 | 
            +
                permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
         | 
| 129 | 
            +
                # Divide permutations into batches for workers
         | 
| 130 | 
            +
                batch_size = subset_size + (1 if remainder > 0 else 0)
         | 
| 131 | 
            +
                permutation_batches = [
         | 
| 132 | 
            +
                    permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
         | 
| 133 | 
            +
                ]
         | 
| 134 | 
            +
             | 
| 124 135 | 
             
                # Execute the permutation test using multiprocessing
         | 
| 125 136 | 
             
                with ctx.Pool(max_workers) as pool:
         | 
| 126 137 | 
             
                    with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
         | 
| 127 138 | 
             
                        # Prepare parameters for multiprocessing
         | 
| 128 139 | 
             
                        params_list = [
         | 
| 129 140 | 
             
                            (
         | 
| 141 | 
            +
                                permutation_batches[i],  # Pass the batch of precomputed permutations
         | 
| 130 142 | 
             
                                annotations,
         | 
| 131 | 
            -
                                np.array(idxs),
         | 
| 132 143 | 
             
                                neighborhoods_matrix_obsv,
         | 
| 133 144 | 
             
                                observed_neighborhood_scores,
         | 
| 134 145 | 
             
                                neighborhood_score_func,
         | 
| 135 | 
            -
                                subset_size + (1 if i < remainder else 0),
         | 
| 136 146 | 
             
                                num_permutations,
         | 
| 137 147 | 
             
                                progress_counter,
         | 
| 138 148 | 
             
                                max_workers,
         | 
| 139 | 
            -
                                rng,  # Pass the random number generator to each worker
         | 
| 140 149 | 
             
                            )
         | 
| 141 150 | 
             
                            for i in range(max_workers)
         | 
| 142 151 | 
             
                        ]
         | 
| 143 152 |  | 
| 144 153 | 
             
                        # Start the permutation process in parallel
         | 
| 145 | 
            -
                        results = pool.starmap_async( | 
| 154 | 
            +
                        results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
         | 
| 146 155 |  | 
| 147 156 | 
             
                        # Update progress bar based on progress_counter
         | 
| 148 157 | 
             
                        while not results.ready():
         | 
| @@ -159,31 +168,27 @@ def _run_permutation_test( | |
| 159 168 | 
             
                return counts_depletion, counts_enrichment
         | 
| 160 169 |  | 
| 161 170 |  | 
| 162 | 
            -
            def  | 
| 163 | 
            -
                 | 
| 164 | 
            -
                 | 
| 165 | 
            -
                neighborhoods_matrix_obsv:  | 
| 171 | 
            +
            def _permutation_process_batch(
         | 
| 172 | 
            +
                permutations: Union[List, Tuple, np.ndarray],
         | 
| 173 | 
            +
                annotation_matrix: csr_matrix,
         | 
| 174 | 
            +
                neighborhoods_matrix_obsv: csr_matrix,
         | 
| 166 175 | 
             
                observed_neighborhood_scores: np.ndarray,
         | 
| 167 176 | 
             
                neighborhood_score_func: Callable,
         | 
| 168 | 
            -
                subset_size: int,
         | 
| 169 177 | 
             
                num_permutations: int,
         | 
| 170 178 | 
             
                progress_counter: ValueProxy,
         | 
| 171 179 | 
             
                max_workers: int,
         | 
| 172 | 
            -
                rng: np.random.Generator,
         | 
| 173 180 | 
             
            ) -> tuple:
         | 
| 174 | 
            -
                """Process a  | 
| 181 | 
            +
                """Process a batch of permutations in a worker process.
         | 
| 175 182 |  | 
| 176 183 | 
             
                Args:
         | 
| 177 | 
            -
                     | 
| 178 | 
            -
                     | 
| 179 | 
            -
                    neighborhoods_matrix_obsv ( | 
| 184 | 
            +
                    permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
         | 
| 185 | 
            +
                    annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 186 | 
            +
                    neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
         | 
| 180 187 | 
             
                    observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
         | 
| 181 188 | 
             
                    neighborhood_score_func (Callable): Function to calculate neighborhood scores.
         | 
| 182 | 
            -
                    subset_size (int): Number of permutations to run in this subset.
         | 
| 183 189 | 
             
                    num_permutations (int): Number of total permutations across all subsets.
         | 
| 184 190 | 
             
                    progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
         | 
| 185 191 | 
             
                    max_workers (int): Number of workers for multiprocessing.
         | 
| 186 | 
            -
                    rng (np.random.Generator): Random number generator object.
         | 
| 187 192 |  | 
| 188 193 | 
             
                Returns:
         | 
| 189 194 | 
             
                    tuple: Local counts of depletion and enrichment.
         | 
| @@ -192,7 +197,9 @@ def _permutation_process_subset( | |
| 192 197 | 
             
                local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
         | 
| 193 198 | 
             
                local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
         | 
| 194 199 |  | 
| 195 | 
            -
                #  | 
| 200 | 
            +
                # Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
         | 
| 201 | 
            +
                # NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
         | 
| 202 | 
            +
                # This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
         | 
| 196 203 | 
             
                limits = None if max_workers == 1 else 1
         | 
| 197 204 | 
             
                with threadpool_limits(limits=limits, user_api="blas"):
         | 
| 198 205 | 
             
                    # Initialize a local counter for batched progress updates
         | 
| @@ -200,16 +207,16 @@ def _permutation_process_subset( | |
| 200 207 | 
             
                    # Calculate the modulo value based on total permutations for 1/100th frequency updates
         | 
| 201 208 | 
             
                    modulo_value = max(1, num_permutations // 100)
         | 
| 202 209 |  | 
| 203 | 
            -
                    for  | 
| 204 | 
            -
                        #  | 
| 205 | 
            -
                        annotation_matrix_permut = annotation_matrix[ | 
| 210 | 
            +
                    for permuted_idxs in permutations:
         | 
| 211 | 
            +
                        # Apply precomputed permutation
         | 
| 212 | 
            +
                        annotation_matrix_permut = annotation_matrix[permuted_idxs]
         | 
| 206 213 | 
             
                        # Calculate permuted neighborhood scores
         | 
| 207 214 | 
             
                        with np.errstate(invalid="ignore", divide="ignore"):
         | 
| 208 215 | 
             
                            permuted_neighborhood_scores = neighborhood_score_func(
         | 
| 209 216 | 
             
                                neighborhoods_matrix_obsv, annotation_matrix_permut
         | 
| 210 217 | 
             
                            )
         | 
| 211 218 |  | 
| 212 | 
            -
                        # Update local depletion and enrichment counts | 
| 219 | 
            +
                        # Update local depletion and enrichment counts
         | 
| 213 220 | 
             
                        local_counts_depletion = np.add(
         | 
| 214 221 | 
             
                            local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
         | 
| 215 222 | 
             
                        )
         | 
| @@ -218,9 +225,8 @@ def _permutation_process_subset( | |
| 218 225 | 
             
                            permuted_neighborhood_scores >= observed_neighborhood_scores,
         | 
| 219 226 | 
             
                        )
         | 
| 220 227 |  | 
| 221 | 
            -
                        # Update  | 
| 228 | 
            +
                        # Update progress
         | 
| 222 229 | 
             
                        local_progress += 1
         | 
| 223 | 
            -
                        # Update shared progress counter every 1/100th of total permutations
         | 
| 224 230 | 
             
                        if local_progress % modulo_value == 0:
         | 
| 225 231 | 
             
                            progress_counter.value += modulo_value
         | 
| 226 232 |  | 
| @@ -4,6 +4,7 @@ risk/stats/permutation/test_functions | |
| 4 4 | 
             
            """
         | 
| 5 5 |  | 
| 6 6 | 
             
            import numpy as np
         | 
| 7 | 
            +
            from scipy.sparse import csr_matrix
         | 
| 7 8 |  | 
| 8 9 | 
             
            # Note: Cython optimizations provided minimal performance benefits.
         | 
| 9 10 | 
             
            # The final version with Cython is archived in the `cython_permutation` branch.
         | 
| @@ -11,46 +12,53 @@ import numpy as np | |
| 11 12 |  | 
| 12 13 |  | 
| 13 14 | 
             
            def compute_neighborhood_score_by_sum(
         | 
| 14 | 
            -
                neighborhoods_matrix:  | 
| 15 | 
            +
                neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
         | 
| 15 16 | 
             
            ) -> np.ndarray:
         | 
| 16 | 
            -
                """Compute the sum of attribute values for each neighborhood.
         | 
| 17 | 
            +
                """Compute the sum of attribute values for each neighborhood using sparse matrices.
         | 
| 17 18 |  | 
| 18 19 | 
             
                Args:
         | 
| 19 | 
            -
                    neighborhoods_matrix ( | 
| 20 | 
            -
                    annotation_matrix ( | 
| 20 | 
            +
                    neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 21 | 
            +
                    annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
         | 
| 21 22 |  | 
| 22 23 | 
             
                Returns:
         | 
| 23 | 
            -
                    np.ndarray:  | 
| 24 | 
            +
                    np.ndarray: Dense array of summed attribute values for each neighborhood.
         | 
| 24 25 | 
             
                """
         | 
| 25 26 | 
             
                # Calculate the neighborhood score as the dot product of neighborhoods and annotations
         | 
| 26 | 
            -
                 | 
| 27 | 
            -
                 | 
| 27 | 
            +
                neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
         | 
| 28 | 
            +
                # Convert the result to a dense array for downstream calculations
         | 
| 29 | 
            +
                neighborhood_score_dense = neighborhood_score.toarray()
         | 
| 30 | 
            +
                return neighborhood_score_dense
         | 
| 28 31 |  | 
| 29 32 |  | 
| 30 33 | 
             
            def compute_neighborhood_score_by_stdev(
         | 
| 31 | 
            -
                neighborhoods_matrix:  | 
| 34 | 
            +
                neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
         | 
| 32 35 | 
             
            ) -> np.ndarray:
         | 
| 33 | 
            -
                """Compute the standard deviation of neighborhood scores.
         | 
| 36 | 
            +
                """Compute the standard deviation of neighborhood scores for sparse matrices.
         | 
| 34 37 |  | 
| 35 38 | 
             
                Args:
         | 
| 36 | 
            -
                    neighborhoods_matrix ( | 
| 37 | 
            -
                    annotation_matrix ( | 
| 39 | 
            +
                    neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 40 | 
            +
                    annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
         | 
| 38 41 |  | 
| 39 42 | 
             
                Returns:
         | 
| 40 43 | 
             
                    np.ndarray: Standard deviation of the neighborhood scores.
         | 
| 41 44 | 
             
                """
         | 
| 42 45 | 
             
                # Calculate the neighborhood score as the dot product of neighborhoods and annotations
         | 
| 43 | 
            -
                neighborhood_score =  | 
| 44 | 
            -
                # Calculate the number of elements in each neighborhood
         | 
| 45 | 
            -
                N =  | 
| 46 | 
            +
                neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
         | 
| 47 | 
            +
                # Calculate the number of elements in each neighborhood (sum of rows)
         | 
| 48 | 
            +
                N = neighborhoods_matrix.sum(axis=1).A.flatten()  # Convert to 1D array
         | 
| 49 | 
            +
                # Avoid division by zero by replacing zeros in N with np.nan temporarily
         | 
| 50 | 
            +
                N[N == 0] = np.nan
         | 
| 46 51 | 
             
                # Compute the mean of the neighborhood scores
         | 
| 47 | 
            -
                M = neighborhood_score / N[:, None]
         | 
| 52 | 
            +
                M = neighborhood_score.multiply(1 / N[:, None]).toarray()  # Sparse element-wise division
         | 
| 48 53 | 
             
                # Compute the mean of squares (EXX) directly using squared annotation matrix
         | 
| 49 | 
            -
                 | 
| 54 | 
            +
                annotation_squared = annotation_matrix.multiply(annotation_matrix)  # Element-wise squaring
         | 
| 55 | 
            +
                EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
         | 
| 50 56 | 
             
                # Calculate variance as EXX - M^2
         | 
| 51 | 
            -
                variance = EXX - M | 
| 57 | 
            +
                variance = EXX - np.power(M, 2)
         | 
| 52 58 | 
             
                # Compute the standard deviation as the square root of the variance
         | 
| 53 59 | 
             
                neighborhood_stdev = np.sqrt(variance)
         | 
| 60 | 
            +
                # Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
         | 
| 61 | 
            +
                neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
         | 
| 54 62 | 
             
                return neighborhood_stdev
         | 
| 55 63 |  | 
| 56 64 |  | 
    
        risk/stats/poisson.py
    CHANGED
    
    | @@ -6,39 +6,45 @@ risk/stats/poisson | |
| 6 6 | 
             
            from typing import Any, Dict
         | 
| 7 7 |  | 
| 8 8 | 
             
            import numpy as np
         | 
| 9 | 
            +
            from scipy.sparse import csr_matrix
         | 
| 9 10 | 
             
            from scipy.stats import poisson
         | 
| 10 11 |  | 
| 11 12 |  | 
| 12 13 | 
             
            def compute_poisson_test(
         | 
| 13 | 
            -
                neighborhoods:  | 
| 14 | 
            +
                neighborhoods: csr_matrix,
         | 
| 15 | 
            +
                annotations: csr_matrix,
         | 
| 16 | 
            +
                null_distribution: str = "network",
         | 
| 14 17 | 
             
            ) -> Dict[str, Any]:
         | 
| 15 | 
            -
                """ | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
         | 
| 16 20 |  | 
| 17 21 | 
             
                Args:
         | 
| 18 | 
            -
                    neighborhoods ( | 
| 19 | 
            -
                    annotations ( | 
| 22 | 
            +
                    neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 23 | 
            +
                    annotations (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 20 24 | 
             
                    null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         | 
| 21 25 |  | 
| 22 26 | 
             
                Returns:
         | 
| 23 27 | 
             
                    Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
         | 
| 24 28 | 
             
                """
         | 
| 25 29 | 
             
                # Matrix multiplication to get the number of annotated nodes in each neighborhood
         | 
| 26 | 
            -
                annotated_in_neighborhood = neighborhoods @ annotations
         | 
| 30 | 
            +
                annotated_in_neighborhood = neighborhoods @ annotations  # Sparse result
         | 
| 31 | 
            +
                # Convert annotated counts to dense for downstream calculations
         | 
| 32 | 
            +
                annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
         | 
| 27 33 |  | 
| 28 34 | 
             
                # Compute lambda_expected based on the chosen null distribution
         | 
| 29 35 | 
             
                if null_distribution == "network":
         | 
| 30 36 | 
             
                    # Use the mean across neighborhoods (axis=1)
         | 
| 31 | 
            -
                    lambda_expected = np.mean( | 
| 37 | 
            +
                    lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
         | 
| 32 38 | 
             
                elif null_distribution == "annotations":
         | 
| 33 39 | 
             
                    # Use the mean across annotations (axis=0)
         | 
| 34 | 
            -
                    lambda_expected = np.mean( | 
| 40 | 
            +
                    lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
         | 
| 35 41 | 
             
                else:
         | 
| 36 42 | 
             
                    raise ValueError(
         | 
| 37 43 | 
             
                        "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         | 
| 38 44 | 
             
                    )
         | 
| 39 45 |  | 
| 40 46 | 
             
                # Compute p-values for enrichment and depletion using Poisson distribution
         | 
| 41 | 
            -
                enrichment_pvals = 1 - poisson.cdf( | 
| 42 | 
            -
                depletion_pvals = poisson.cdf( | 
| 47 | 
            +
                enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
         | 
| 48 | 
            +
                depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
         | 
| 43 49 |  | 
| 44 50 | 
             
                return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
         | 
    
        risk/stats/stats.py
    CHANGED
    
    | @@ -44,7 +44,7 @@ def calculate_significance_matrices( | |
| 44 44 | 
             
                        enrichment_pvals, enrichment_qvals, pval_cutoff=pval_cutoff, fdr_cutoff=fdr_cutoff
         | 
| 45 45 | 
             
                    )
         | 
| 46 46 | 
             
                    # Compute the enrichment matrix using both q-values and p-values
         | 
| 47 | 
            -
                    enrichment_matrix = ( | 
| 47 | 
            +
                    enrichment_matrix = (enrichment_pvals**0.5) * (enrichment_qvals**2)
         | 
| 48 48 | 
             
                else:
         | 
| 49 49 | 
             
                    # Compute threshold matrices based on p-value cutoffs only
         | 
| 50 50 | 
             
                    depletion_alpha_threshold_matrix = _compute_threshold_matrix(
         | 
| @@ -62,7 +62,7 @@ def calculate_significance_matrices( | |
| 62 62 | 
             
                log_enrichment_matrix = -np.log10(enrichment_matrix)
         | 
| 63 63 |  | 
| 64 64 | 
             
                # Select the appropriate significance matrices based on the specified tail
         | 
| 65 | 
            -
                 | 
| 65 | 
            +
                significance_matrix, significant_binary_significance_matrix = _select_significance_matrices(
         | 
| 66 66 | 
             
                    tail,
         | 
| 67 67 | 
             
                    log_depletion_matrix,
         | 
| 68 68 | 
             
                    depletion_alpha_threshold_matrix,
         | 
| @@ -71,14 +71,14 @@ def calculate_significance_matrices( | |
| 71 71 | 
             
                )
         | 
| 72 72 |  | 
| 73 73 | 
             
                # Filter the enrichment matrix using the binary significance matrix
         | 
| 74 | 
            -
                 | 
| 75 | 
            -
                     | 
| 74 | 
            +
                significant_significance_matrix = np.where(
         | 
| 75 | 
            +
                    significant_binary_significance_matrix == 1, significance_matrix, 0
         | 
| 76 76 | 
             
                )
         | 
| 77 77 |  | 
| 78 78 | 
             
                return {
         | 
| 79 | 
            -
                    " | 
| 80 | 
            -
                    " | 
| 81 | 
            -
                    " | 
| 79 | 
            +
                    "significance_matrix": significance_matrix,
         | 
| 80 | 
            +
                    "significant_significance_matrix": significant_significance_matrix,
         | 
| 81 | 
            +
                    "significant_binary_significance_matrix": significant_binary_significance_matrix,
         | 
| 82 82 | 
             
                }
         | 
| 83 83 |  | 
| 84 84 |  | 
| @@ -109,15 +109,15 @@ def _select_significance_matrices( | |
| 109 109 |  | 
| 110 110 | 
             
                if tail == "left":
         | 
| 111 111 | 
             
                    # Select depletion matrix and corresponding alpha threshold for left-tail analysis
         | 
| 112 | 
            -
                     | 
| 112 | 
            +
                    significance_matrix = -log_depletion_matrix
         | 
| 113 113 | 
             
                    alpha_threshold_matrix = depletion_alpha_threshold_matrix
         | 
| 114 114 | 
             
                elif tail == "right":
         | 
| 115 115 | 
             
                    # Select enrichment matrix and corresponding alpha threshold for right-tail analysis
         | 
| 116 | 
            -
                     | 
| 116 | 
            +
                    significance_matrix = log_enrichment_matrix
         | 
| 117 117 | 
             
                    alpha_threshold_matrix = enrichment_alpha_threshold_matrix
         | 
| 118 118 | 
             
                elif tail == "both":
         | 
| 119 119 | 
             
                    # Select the matrix with the highest absolute values while preserving the sign
         | 
| 120 | 
            -
                     | 
| 120 | 
            +
                    significance_matrix = np.where(
         | 
| 121 121 | 
             
                        np.abs(log_depletion_matrix) >= np.abs(log_enrichment_matrix),
         | 
| 122 122 | 
             
                        -log_depletion_matrix,
         | 
| 123 123 | 
             
                        log_enrichment_matrix,
         | 
| @@ -126,13 +126,15 @@ def _select_significance_matrices( | |
| 126 126 | 
             
                    alpha_threshold_matrix = np.logical_or(
         | 
| 127 127 | 
             
                        depletion_alpha_threshold_matrix, enrichment_alpha_threshold_matrix
         | 
| 128 128 | 
             
                    )
         | 
| 129 | 
            +
                else:
         | 
| 130 | 
            +
                    raise ValueError("Invalid value for 'tail'. Must be 'left', 'right', or 'both'.")
         | 
| 129 131 |  | 
| 130 132 | 
             
                # Create a binary significance matrix where valid indices meet the alpha threshold
         | 
| 131 133 | 
             
                valid_idxs = ~np.isnan(alpha_threshold_matrix)
         | 
| 132 | 
            -
                 | 
| 133 | 
            -
                 | 
| 134 | 
            +
                significant_binary_significance_matrix = np.zeros(alpha_threshold_matrix.shape)
         | 
| 135 | 
            +
                significant_binary_significance_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
         | 
| 134 136 |  | 
| 135 | 
            -
                return  | 
| 137 | 
            +
                return significance_matrix, significant_binary_significance_matrix
         | 
| 136 138 |  | 
| 137 139 |  | 
| 138 140 | 
             
            def _compute_threshold_matrix(
         | 
    
        risk/stats/zscore.py
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            risk/stats/zscore
         | 
| 3 | 
            +
            ~~~~~~~~~~~~~~~~~~
         | 
| 4 | 
            +
            """
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            from typing import Any, Dict
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            import numpy as np
         | 
| 9 | 
            +
            from scipy.sparse import csr_matrix
         | 
| 10 | 
            +
            from scipy.stats import norm
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def compute_zscore_test(
         | 
| 14 | 
            +
                neighborhoods: csr_matrix,
         | 
| 15 | 
            +
                annotations: csr_matrix,
         | 
| 16 | 
            +
                null_distribution: str = "network",
         | 
| 17 | 
            +
            ) -> Dict[str, Any]:
         | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                Args:
         | 
| 22 | 
            +
                    neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
         | 
| 23 | 
            +
                    annotations (csr_matrix): Sparse binary matrix representing annotations.
         | 
| 24 | 
            +
                    null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                Returns:
         | 
| 27 | 
            +
                    Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
         | 
| 28 | 
            +
                """
         | 
| 29 | 
            +
                # Total number of nodes in the network
         | 
| 30 | 
            +
                total_node_count = neighborhoods.shape[1]
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Compute sums
         | 
| 33 | 
            +
                if null_distribution == "network":
         | 
| 34 | 
            +
                    background_population = total_node_count
         | 
| 35 | 
            +
                    neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Dense column sums
         | 
| 36 | 
            +
                    annotation_sums = annotations.sum(axis=0).A.flatten()  # Dense row sums
         | 
| 37 | 
            +
                elif null_distribution == "annotations":
         | 
| 38 | 
            +
                    annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Dense boolean mask
         | 
| 39 | 
            +
                    background_population = annotated_nodes.sum()
         | 
| 40 | 
            +
                    neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
         | 
| 41 | 
            +
                    annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
         | 
| 42 | 
            +
                else:
         | 
| 43 | 
            +
                    raise ValueError(
         | 
| 44 | 
            +
                        "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         | 
| 45 | 
            +
                    )
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                # Observed values
         | 
| 48 | 
            +
                observed = (neighborhoods.T @ annotations).toarray()  # Convert sparse result to dense
         | 
| 49 | 
            +
                # Expected values under the null
         | 
| 50 | 
            +
                neighborhood_sums = neighborhood_sums.reshape(-1, 1)  # Ensure correct shape
         | 
| 51 | 
            +
                annotation_sums = annotation_sums.reshape(1, -1)  # Ensure correct shape
         | 
| 52 | 
            +
                expected = (neighborhood_sums @ annotation_sums) / background_population
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                # Standard deviation under the null
         | 
| 55 | 
            +
                std_dev = np.sqrt(
         | 
| 56 | 
            +
                    expected
         | 
| 57 | 
            +
                    * (1 - annotation_sums / background_population)
         | 
| 58 | 
            +
                    * (1 - neighborhood_sums / background_population)
         | 
| 59 | 
            +
                )
         | 
| 60 | 
            +
                std_dev[std_dev == 0] = np.nan  # Avoid division by zero
         | 
| 61 | 
            +
                # Compute Z-scores
         | 
| 62 | 
            +
                z_scores = (observed - expected) / std_dev
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                # Convert Z-scores to depletion and enrichment p-values
         | 
| 65 | 
            +
                enrichment_pvals = norm.sf(z_scores)  # Upper tail
         | 
| 66 | 
            +
                depletion_pvals = norm.cdf(z_scores)  # Lower tail
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
         | 
| @@ -1,10 +1,10 @@ | |
| 1 | 
            -
            Metadata-Version: 2. | 
| 1 | 
            +
            Metadata-Version: 2.2
         | 
| 2 2 | 
             
            Name: risk-network
         | 
| 3 | 
            -
            Version: 0.0. | 
| 3 | 
            +
            Version: 0.0.9b26
         | 
| 4 4 | 
             
            Summary: A Python package for biological network analysis
         | 
| 5 5 | 
             
            Author: Ira Horecka
         | 
| 6 6 | 
             
            Author-email: Ira Horecka <ira89@icloud.com>
         | 
| 7 | 
            -
            License: | 
| 7 | 
            +
            License:                     GNU GENERAL PUBLIC LICENSE
         | 
| 8 8 | 
             
                                           Version 3, 29 June 2007
         | 
| 9 9 |  | 
| 10 10 | 
             
                     Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
         | 
| @@ -695,19 +695,23 @@ Requires-Python: >=3.8 | |
| 695 695 | 
             
            Description-Content-Type: text/markdown
         | 
| 696 696 | 
             
            License-File: LICENSE
         | 
| 697 697 | 
             
            Requires-Dist: ipywidgets
         | 
| 698 | 
            -
            Requires-Dist:  | 
| 698 | 
            +
            Requires-Dist: leidenalg
         | 
| 699 | 
            +
            Requires-Dist: markov_clustering
         | 
| 699 700 | 
             
            Requires-Dist: matplotlib
         | 
| 700 701 | 
             
            Requires-Dist: networkx
         | 
| 701 702 | 
             
            Requires-Dist: nltk==3.8.1
         | 
| 702 703 | 
             
            Requires-Dist: numpy
         | 
| 703 704 | 
             
            Requires-Dist: openpyxl
         | 
| 704 705 | 
             
            Requires-Dist: pandas
         | 
| 706 | 
            +
            Requires-Dist: python-igraph
         | 
| 705 707 | 
             
            Requires-Dist: python-louvain
         | 
| 706 708 | 
             
            Requires-Dist: scikit-learn
         | 
| 707 709 | 
             
            Requires-Dist: scipy
         | 
| 708 710 | 
             
            Requires-Dist: statsmodels
         | 
| 709 711 | 
             
            Requires-Dist: threadpoolctl
         | 
| 710 712 | 
             
            Requires-Dist: tqdm
         | 
| 713 | 
            +
            Dynamic: author
         | 
| 714 | 
            +
            Dynamic: requires-python
         | 
| 711 715 |  | 
| 712 716 | 
             
            # RISK Network
         | 
| 713 717 |  | 
| @@ -724,7 +728,7 @@ Requires-Dist: tqdm | |
| 724 728 | 
             
            
         | 
| 725 729 | 
             
            
         | 
| 726 730 |  | 
| 727 | 
            -
            **RISK ( | 
| 731 | 
            +
            **RISK** (Regional Inference of Significant Kinships) is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
         | 
| 728 732 |  | 
| 729 733 | 
             
            ## Documentation and Tutorial
         | 
| 730 734 |  |