risk-network 0.0.9b38__py3-none-any.whl → 0.0.9b39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
 - risk/neighborhoods/domains.py +111 -137
 - risk/network/graph/api.py +3 -6
 - {risk_network-0.0.9b38.dist-info → risk_network-0.0.9b39.dist-info}/METADATA +1 -1
 - {risk_network-0.0.9b38.dist-info → risk_network-0.0.9b39.dist-info}/RECORD +8 -8
 - {risk_network-0.0.9b38.dist-info → risk_network-0.0.9b39.dist-info}/LICENSE +0 -0
 - {risk_network-0.0.9b38.dist-info → risk_network-0.0.9b39.dist-info}/WHEEL +0 -0
 - {risk_network-0.0.9b38.dist-info → risk_network-0.0.9b39.dist-info}/top_level.txt +0 -0
 
    
        risk/__init__.py
    CHANGED
    
    
    
        risk/neighborhoods/domains.py
    CHANGED
    
    | 
         @@ -10,8 +10,7 @@ from typing import Tuple, Union 
     | 
|
| 
       10 
10 
     | 
    
         
             
            import numpy as np
         
     | 
| 
       11 
11 
     | 
    
         
             
            import pandas as pd
         
     | 
| 
       12 
12 
     | 
    
         
             
            from scipy.cluster.hierarchy import linkage, fcluster
         
     | 
| 
       13 
     | 
    
         
            -
            from  
     | 
| 
       14 
     | 
    
         
            -
            from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score, silhouette_score
         
     | 
| 
      
 13 
     | 
    
         
            +
            from sklearn.metrics import silhouette_score
         
     | 
| 
       15 
14 
     | 
    
         
             
            from tqdm import tqdm
         
     | 
| 
       16 
15 
     | 
    
         | 
| 
       17 
16 
     | 
    
         
             
            from risk.annotations import get_weighted_description
         
     | 
| 
         @@ -19,19 +18,13 @@ from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS 
     | 
|
| 
       19 
18 
     | 
    
         
             
            from risk.log import logger
         
     | 
| 
       20 
19 
     | 
    
         | 
| 
       21 
20 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
            class LinkageThresholdError(Exception):
         
     | 
| 
       23 
     | 
    
         
            -
                """Exception raised for errors in the linkage threshold optimization process."""
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                pass
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
21 
     | 
    
         
             
            def define_domains(
         
     | 
| 
       29 
22 
     | 
    
         
             
                top_annotations: pd.DataFrame,
         
     | 
| 
       30 
23 
     | 
    
         
             
                significant_neighborhoods_significance: np.ndarray,
         
     | 
| 
       31 
24 
     | 
    
         
             
                linkage_criterion: str,
         
     | 
| 
       32 
25 
     | 
    
         
             
                linkage_method: str,
         
     | 
| 
       33 
26 
     | 
    
         
             
                linkage_metric: str,
         
     | 
| 
       34 
     | 
    
         
            -
                linkage_threshold:  
     | 
| 
      
 27 
     | 
    
         
            +
                linkage_threshold: float,
         
     | 
| 
       35 
28 
     | 
    
         
             
            ) -> pd.DataFrame:
         
     | 
| 
       36 
29 
     | 
    
         
             
                """Define domains and assign nodes to these domains based on their significance scores and clustering,
         
     | 
| 
       37 
30 
     | 
    
         
             
                handling errors by assigning unique domains when clustering fails.
         
     | 
| 
         @@ -39,19 +32,13 @@ def define_domains( 
     | 
|
| 
       39 
32 
     | 
    
         
             
                Args:
         
     | 
| 
       40 
33 
     | 
    
         
             
                    top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
         
     | 
| 
       41 
34 
     | 
    
         
             
                    significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
         
     | 
| 
       42 
     | 
    
         
            -
                    linkage_criterion (str): The clustering criterion for defining groups. 
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                     
     | 
| 
       45 
     | 
    
         
            -
                     
     | 
| 
       46 
     | 
    
         
            -
                    linkage_threshold (str, float): The linkage threshold for clustering, or one of "silhouette",
         
     | 
| 
       47 
     | 
    
         
            -
                        "calinski_harabasz", or "davies_bouldin" to optimize the threshold.
         
     | 
| 
      
 35 
     | 
    
         
            +
                    linkage_criterion (str): The clustering criterion for defining groups.
         
     | 
| 
      
 36 
     | 
    
         
            +
                    linkage_method (str): The linkage method for clustering.
         
     | 
| 
      
 37 
     | 
    
         
            +
                    linkage_metric (str): The linkage metric for clustering.
         
     | 
| 
      
 38 
     | 
    
         
            +
                    linkage_threshold (float): The threshold for clustering.
         
     | 
| 
       48 
39 
     | 
    
         | 
| 
       49 
40 
     | 
    
         
             
                Returns:
         
     | 
| 
       50 
41 
     | 
    
         
             
                    pd.DataFrame: DataFrame with the primary domain for each node.
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                Raises:
         
     | 
| 
       53 
     | 
    
         
            -
                    ValueError: If an improper value is passed for linkage_threshold. Acceptable values are "silhouette",
         
     | 
| 
       54 
     | 
    
         
            -
                        "calinski_harabasz", "davies_bouldin", or a float value.
         
     | 
| 
       55 
42 
     | 
    
         
             
                """
         
     | 
| 
       56 
43 
     | 
    
         
             
                try:
         
     | 
| 
       57 
44 
     | 
    
         
             
                    if linkage_criterion == "off":
         
     | 
| 
         @@ -62,10 +49,8 @@ def define_domains( 
     | 
|
| 
       62 
49 
     | 
    
         
             
                    # Safeguard the matrix by replacing NaN, Inf, and -Inf values
         
     | 
| 
       63 
50 
     | 
    
         
             
                    m = _safeguard_matrix(m)
         
     | 
| 
       64 
51 
     | 
    
         
             
                    # Optimize silhouette score across different linkage methods and distance metrics
         
     | 
| 
       65 
     | 
    
         
            -
                    best_linkage, best_metric, best_threshold = (
         
     | 
| 
       66 
     | 
    
         
            -
                         
     | 
| 
       67 
     | 
    
         
            -
                            m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
         
     | 
| 
       68 
     | 
    
         
            -
                        )
         
     | 
| 
      
 52 
     | 
    
         
            +
                    best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
         
     | 
| 
      
 53 
     | 
    
         
            +
                        m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
         
     | 
| 
       69 
54 
     | 
    
         
             
                    )
         
     | 
| 
       70 
55 
     | 
    
         
             
                    # Perform hierarchical clustering
         
     | 
| 
       71 
56 
     | 
    
         
             
                    Z = linkage(m, method=best_linkage, metric=best_metric)
         
     | 
| 
         @@ -91,9 +76,6 @@ def define_domains( 
     | 
|
| 
       91 
76 
     | 
    
         
             
                            f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
         
     | 
| 
       92 
77 
     | 
    
         
             
                        )
         
     | 
| 
       93 
78 
     | 
    
         
             
                    top_annotations["domain"] = range(1, n_rows + 1)  # Assign unique domains
         
     | 
| 
       94 
     | 
    
         
            -
                except LinkageThresholdError as e:
         
     | 
| 
       95 
     | 
    
         
            -
                    # If a LinkageThresholdError is encountered, raise a ValueError with the original exception
         
     | 
| 
       96 
     | 
    
         
            -
                    raise ValueError(e) from e
         
     | 
| 
       97 
79 
     | 
    
         | 
| 
       98 
80 
     | 
    
         
             
                # Create DataFrames to store domain information
         
     | 
| 
       99 
81 
     | 
    
         
             
                node_to_significance = pd.DataFrame(
         
     | 
| 
         @@ -215,154 +197,146 @@ def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray: 
     | 
|
| 
       215 
197 
     | 
    
         
             
                return matrix
         
     | 
| 
       216 
198 
     | 
    
         | 
| 
       217 
199 
     | 
    
         | 
| 
       218 
     | 
    
         
            -
            def  
     | 
| 
      
 200 
     | 
    
         
            +
            def _optimize_silhouette_across_linkage_and_metrics(
         
     | 
| 
       219 
201 
     | 
    
         
             
                m: np.ndarray,
         
     | 
| 
       220 
202 
     | 
    
         
             
                linkage_criterion: str,
         
     | 
| 
       221 
203 
     | 
    
         
             
                linkage_method: str,
         
     | 
| 
       222 
204 
     | 
    
         
             
                linkage_metric: str,
         
     | 
| 
       223 
205 
     | 
    
         
             
                linkage_threshold: Union[str, float],
         
     | 
| 
       224 
206 
     | 
    
         
             
            ) -> Tuple[str, str, float]:
         
     | 
| 
       225 
     | 
    
         
            -
                """Optimize  
     | 
| 
       226 
     | 
    
         
            -
                a string, optimize the threshold using the specified metric; otherwise, use the provided threshold.
         
     | 
| 
      
 207 
     | 
    
         
            +
                """Optimize silhouette score across different linkage methods and distance metrics.
         
     | 
| 
       227 
208 
     | 
    
         | 
| 
       228 
209 
     | 
    
         
             
                Args:
         
     | 
| 
       229 
210 
     | 
    
         
             
                    m (np.ndarray): Data matrix.
         
     | 
| 
       230 
     | 
    
         
            -
                    linkage_criterion (str):  
     | 
| 
       231 
     | 
    
         
            -
                    linkage_method (str): Linkage method for clustering 
     | 
| 
       232 
     | 
    
         
            -
                    linkage_metric (str):  
     | 
| 
       233 
     | 
    
         
            -
                    linkage_threshold (str, float):  
     | 
| 
       234 
     | 
    
         
            -
                        "silhouette", "calinski_harabasz", or "davies_bouldin" to trigger optimization.
         
     | 
| 
      
 211 
     | 
    
         
            +
                    linkage_criterion (str): Clustering criterion.
         
     | 
| 
      
 212 
     | 
    
         
            +
                    linkage_method (str): Linkage method for clustering.
         
     | 
| 
      
 213 
     | 
    
         
            +
                    linkage_metric (str): Linkage metric for clustering.
         
     | 
| 
      
 214 
     | 
    
         
            +
                    linkage_threshold (Union[str, float]): Threshold for clustering. Set to "auto" to optimize.
         
     | 
| 
       235 
215 
     | 
    
         | 
| 
       236 
216 
     | 
    
         
             
                Returns:
         
     | 
| 
       237 
217 
     | 
    
         
             
                    Tuple[str, str, float]:
         
     | 
| 
       238 
     | 
    
         
            -
                        -  
     | 
| 
       239 
     | 
    
         
            -
                        -  
     | 
| 
       240 
     | 
    
         
            -
                        -  
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
                Raises:
         
     | 
| 
       243 
     | 
    
         
            -
                    ValueError: If linkage_threshold is neither one of the supported keywords nor convertible to float.
         
     | 
| 
      
 218 
     | 
    
         
            +
                        - Best linkage method (str)
         
     | 
| 
      
 219 
     | 
    
         
            +
                        - Best linkage metric (str)
         
     | 
| 
      
 220 
     | 
    
         
            +
                        - Best threshold (float)
         
     | 
| 
       244 
221 
     | 
    
         
             
                """
         
     | 
| 
       245 
     | 
    
         
            -
                #  
     | 
| 
       246 
     | 
    
         
            -
                 
     | 
| 
       247 
     | 
    
         
            -
             
     | 
| 
       248 
     | 
    
         
            -
                 
     | 
| 
       249 
     | 
    
         
            -
                if isinstance(linkage_threshold, str):
         
     | 
| 
       250 
     | 
    
         
            -
                    if linkage_threshold in supported_linkage_thresholds:
         
     | 
| 
       251 
     | 
    
         
            -
                        opt_metric = linkage_threshold
         
     | 
| 
       252 
     | 
    
         
            -
                    else:
         
     | 
| 
       253 
     | 
    
         
            -
                        try:
         
     | 
| 
       254 
     | 
    
         
            -
                            threshold_float = float(linkage_threshold)
         
     | 
| 
       255 
     | 
    
         
            -
                        except (TypeError, ValueError):
         
     | 
| 
       256 
     | 
    
         
            -
                            raise LinkageThresholdError(
         
     | 
| 
       257 
     | 
    
         
            -
                                f"linkage_threshold must be one of {', '.join(supported_linkage_thresholds)} or a float value."
         
     | 
| 
       258 
     | 
    
         
            -
                            )
         
     | 
| 
       259 
     | 
    
         
            -
                        return linkage_method, linkage_metric, threshold_float
         
     | 
| 
       260 
     | 
    
         
            -
                else:
         
     | 
| 
       261 
     | 
    
         
            -
                    # If not a string, try to convert it to float.
         
     | 
| 
       262 
     | 
    
         
            -
                    try:
         
     | 
| 
       263 
     | 
    
         
            -
                        threshold_float = float(linkage_threshold)
         
     | 
| 
       264 
     | 
    
         
            -
                    except (TypeError, ValueError):
         
     | 
| 
       265 
     | 
    
         
            -
                        raise LinkageThresholdError(
         
     | 
| 
       266 
     | 
    
         
            -
                            f"linkage_threshold must be one of {', '.join(supported_linkage_thresholds)} or a float value."
         
     | 
| 
       267 
     | 
    
         
            -
                        )
         
     | 
| 
       268 
     | 
    
         
            -
                    return linkage_method, linkage_metric, threshold_float
         
     | 
| 
       269 
     | 
    
         
            -
             
     | 
| 
       270 
     | 
    
         
            -
                # Otherwise, perform optimization using the specified metric (opt_metric).
         
     | 
| 
       271 
     | 
    
         
            -
                best_overall_method = None
         
     | 
| 
       272 
     | 
    
         
            -
                best_overall_metric = None
         
     | 
| 
       273 
     | 
    
         
            -
                best_overall_threshold = None
         
     | 
| 
      
 222 
     | 
    
         
            +
                # Initialize best overall values
         
     | 
| 
      
 223 
     | 
    
         
            +
                best_overall_method = linkage_method
         
     | 
| 
      
 224 
     | 
    
         
            +
                best_overall_metric = linkage_metric
         
     | 
| 
      
 225 
     | 
    
         
            +
                best_overall_threshold = linkage_threshold
         
     | 
| 
       274 
226 
     | 
    
         
             
                best_overall_score = -np.inf
         
     | 
| 
       275 
227 
     | 
    
         | 
| 
       276 
     | 
    
         
            -
                #  
     | 
| 
       277 
     | 
    
         
            -
                 
     | 
| 
       278 
     | 
    
         
            -
                 
     | 
| 
       279 
     | 
    
         
            -
                total_combinations = len( 
     | 
| 
      
 228 
     | 
    
         
            +
                # Set linkage methods and metrics to all combinations if "auto" is selected
         
     | 
| 
      
 229 
     | 
    
         
            +
                linkage_methods = GROUP_LINKAGE_METHODS if linkage_method == "auto" else [linkage_method]
         
     | 
| 
      
 230 
     | 
    
         
            +
                linkage_metrics = GROUP_DISTANCE_METRICS if linkage_metric == "auto" else [linkage_metric]
         
     | 
| 
      
 231 
     | 
    
         
            +
                total_combinations = len(linkage_methods) * len(linkage_metrics)
         
     | 
| 
       280 
232 
     | 
    
         | 
| 
      
 233 
     | 
    
         
            +
                # Evaluating optimal linkage method and metric
         
     | 
| 
       281 
234 
     | 
    
         
             
                for method, metric in tqdm(
         
     | 
| 
       282 
     | 
    
         
            -
                    product( 
     | 
| 
      
 235 
     | 
    
         
            +
                    product(linkage_methods, linkage_metrics),
         
     | 
| 
       283 
236 
     | 
    
         
             
                    desc="Evaluating optimal linkage method and metric",
         
     | 
| 
       284 
237 
     | 
    
         
             
                    total=total_combinations,
         
     | 
| 
       285 
238 
     | 
    
         
             
                    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
         
     | 
| 
       286 
239 
     | 
    
         
             
                ):
         
     | 
| 
      
 240 
     | 
    
         
            +
                    # Some linkage methods and metrics may not work with certain data
         
     | 
| 
       287 
241 
     | 
    
         
             
                    with suppress(ValueError):
         
     | 
| 
       288 
242 
     | 
    
         
             
                        Z = linkage(m, method=method, metric=metric)
         
     | 
| 
       289 
     | 
    
         
            -
                         
     | 
| 
       290 
     | 
    
         
            -
             
     | 
| 
       291 
     | 
    
         
            -
             
     | 
| 
       292 
     | 
    
         
            -
             
     | 
| 
       293 
     | 
    
         
            -
             
     | 
| 
       294 
     | 
    
         
            -
             
     | 
| 
       295 
     | 
    
         
            -
             
     | 
| 
       296 
     | 
    
         
            -
             
     | 
| 
       297 
     | 
    
         
            -
             
     | 
| 
       298 
     | 
    
         
            -
             
     | 
| 
       299 
     | 
    
         
            -
             
     | 
| 
      
 243 
     | 
    
         
            +
                        # Only optimize silhouette score if the threshold is "auto"
         
     | 
| 
      
 244 
     | 
    
         
            +
                        if linkage_threshold == "auto":
         
     | 
| 
      
 245 
     | 
    
         
            +
                            threshold, score = _find_best_silhouette_score(Z, m, metric, linkage_criterion)
         
     | 
| 
      
 246 
     | 
    
         
            +
                            if score > best_overall_score:
         
     | 
| 
      
 247 
     | 
    
         
            +
                                best_overall_score = score
         
     | 
| 
      
 248 
     | 
    
         
            +
                                best_overall_threshold = threshold
         
     | 
| 
      
 249 
     | 
    
         
            +
                                best_overall_method = method
         
     | 
| 
      
 250 
     | 
    
         
            +
                                best_overall_metric = metric
         
     | 
| 
      
 251 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 252 
     | 
    
         
            +
                            # Use the provided threshold without optimization
         
     | 
| 
      
 253 
     | 
    
         
            +
                            score = silhouette_score(
         
     | 
| 
      
 254 
     | 
    
         
            +
                                m,
         
     | 
| 
      
 255 
     | 
    
         
            +
                                fcluster(Z, linkage_threshold * np.max(Z[:, 2]), criterion=linkage_criterion),
         
     | 
| 
      
 256 
     | 
    
         
            +
                                metric=metric,
         
     | 
| 
      
 257 
     | 
    
         
            +
                            )
         
     | 
| 
      
 258 
     | 
    
         
            +
                            if score > best_overall_score:
         
     | 
| 
      
 259 
     | 
    
         
            +
                                best_overall_score = score
         
     | 
| 
      
 260 
     | 
    
         
            +
                                best_overall_threshold = linkage_threshold
         
     | 
| 
      
 261 
     | 
    
         
            +
                                best_overall_method = method
         
     | 
| 
      
 262 
     | 
    
         
            +
                                best_overall_metric = metric
         
     | 
| 
      
 263 
     | 
    
         
            +
             
     | 
| 
       300 
264 
     | 
    
         
             
                return best_overall_method, best_overall_metric, best_overall_threshold
         
     | 
| 
       301 
265 
     | 
    
         | 
| 
       302 
266 
     | 
    
         | 
| 
       303 
     | 
    
         
            -
            def  
     | 
| 
      
 267 
     | 
    
         
            +
            def _find_best_silhouette_score(
         
     | 
| 
       304 
268 
     | 
    
         
             
                Z: np.ndarray,
         
     | 
| 
       305 
269 
     | 
    
         
             
                m: np.ndarray,
         
     | 
| 
       306 
270 
     | 
    
         
             
                linkage_metric: str,
         
     | 
| 
       307 
271 
     | 
    
         
             
                linkage_criterion: str,
         
     | 
| 
       308 
     | 
    
         
            -
                 
     | 
| 
      
 272 
     | 
    
         
            +
                lower_bound: float = 0.001,
         
     | 
| 
      
 273 
     | 
    
         
            +
                upper_bound: float = 1.0,
         
     | 
| 
      
 274 
     | 
    
         
            +
                resolution: float = 0.001,
         
     | 
| 
       309 
275 
     | 
    
         
             
            ) -> Tuple[float, float]:
         
     | 
| 
       310 
     | 
    
         
            -
                """Find the  
     | 
| 
       311 
     | 
    
         
            -
                the threshold value using the specified metric (opt_metric).
         
     | 
| 
      
 276 
     | 
    
         
            +
                """Find the best silhouette score using binary search.
         
     | 
| 
       312 
277 
     | 
    
         | 
| 
       313 
278 
     | 
    
         
             
                Args:
         
     | 
| 
       314 
     | 
    
         
            -
                    Z (np.ndarray): Linkage matrix 
     | 
| 
       315 
     | 
    
         
            -
                    m (np.ndarray): Data matrix 
     | 
| 
       316 
     | 
    
         
            -
                    linkage_metric (str):  
     | 
| 
       317 
     | 
    
         
            -
             
     | 
| 
       318 
     | 
    
         
            -
                     
     | 
| 
       319 
     | 
    
         
            -
                     
     | 
| 
       320 
     | 
    
         
            -
             
     | 
| 
      
 279 
     | 
    
         
            +
                    Z (np.ndarray): Linkage matrix.
         
     | 
| 
      
 280 
     | 
    
         
            +
                    m (np.ndarray): Data matrix.
         
     | 
| 
      
 281 
     | 
    
         
            +
                    linkage_metric (str): Linkage metric for silhouette score calculation.
         
     | 
| 
      
 282 
     | 
    
         
            +
                    linkage_criterion (str): Clustering criterion.
         
     | 
| 
      
 283 
     | 
    
         
            +
                    lower_bound (float, optional): Lower bound for search. Defaults to 0.001.
         
     | 
| 
      
 284 
     | 
    
         
            +
                    upper_bound (float, optional): Upper bound for search. Defaults to 1.0.
         
     | 
| 
      
 285 
     | 
    
         
            +
                    resolution (float, optional): Desired resolution for the best threshold. Defaults to 0.001.
         
     | 
| 
       321 
286 
     | 
    
         | 
| 
       322 
287 
     | 
    
         
             
                Returns:
         
     | 
| 
       323 
288 
     | 
    
         
             
                    Tuple[float, float]:
         
     | 
| 
       324 
     | 
    
         
            -
                        -  
     | 
| 
       325 
     | 
    
         
            -
                        -  
     | 
| 
       326 
     | 
    
         
            -
                          at the optimal threshold (higher for "silhouette" and "calinski_harabasz",
         
     | 
| 
       327 
     | 
    
         
            -
                          lower for "davies_bouldin").
         
     | 
| 
       328 
     | 
    
         
            -
             
     | 
| 
       329 
     | 
    
         
            -
                Raises:
         
     | 
| 
       330 
     | 
    
         
            -
                    ValueError: If the `opt_metric` argument is not one of the supported metrics.
         
     | 
| 
      
 289 
     | 
    
         
            +
                        - Best threshold (float): The threshold that yields the best silhouette score.
         
     | 
| 
      
 290 
     | 
    
         
            +
                        - Best silhouette score (float): The highest silhouette score achieved.
         
     | 
| 
       331 
291 
     | 
    
         
             
                """
         
     | 
| 
       332 
     | 
    
         
            -
                 
     | 
| 
       333 
     | 
    
         
            -
                 
     | 
| 
       334 
     | 
    
         
            -
                resolution = 1e-6
         
     | 
| 
       335 
     | 
    
         
            -
             
     | 
| 
       336 
     | 
    
         
            -
                def compute_objective(coefficient: float) -> float:
         
     | 
| 
       337 
     | 
    
         
            -
                    """Compute the objective function for optimization."""
         
     | 
| 
       338 
     | 
    
         
            -
                    threshold_val = coefficient * max_d
         
     | 
| 
       339 
     | 
    
         
            -
                    clusters = fcluster(Z, threshold_val, criterion=linkage_criterion)
         
     | 
| 
       340 
     | 
    
         
            -
                    unique_clusters = np.unique(clusters)
         
     | 
| 
       341 
     | 
    
         
            -
                    if len(unique_clusters) <= 1 or len(unique_clusters) == m.shape[0]:
         
     | 
| 
       342 
     | 
    
         
            -
                        return 1e6
         
     | 
| 
       343 
     | 
    
         
            -
                    try:
         
     | 
| 
       344 
     | 
    
         
            -
                        if opt_metric == "silhouette":
         
     | 
| 
       345 
     | 
    
         
            -
                            score = silhouette_score(m, clusters, metric=linkage_metric)
         
     | 
| 
       346 
     | 
    
         
            -
                            return -score  # We want to maximize the score.
         
     | 
| 
       347 
     | 
    
         
            -
                        elif opt_metric == "calinski_harabasz":
         
     | 
| 
       348 
     | 
    
         
            -
                            score = calinski_harabasz_score(m, clusters)
         
     | 
| 
       349 
     | 
    
         
            -
                            return -score
         
     | 
| 
       350 
     | 
    
         
            -
                        elif opt_metric == "davies_bouldin":
         
     | 
| 
       351 
     | 
    
         
            -
                            score = davies_bouldin_score(m, clusters)
         
     | 
| 
       352 
     | 
    
         
            -
                            return score
         
     | 
| 
       353 
     | 
    
         
            -
                        else:
         
     | 
| 
       354 
     | 
    
         
            -
                            raise ValueError(f"Unknown optimization metric: {opt_metric}.")
         
     | 
| 
       355 
     | 
    
         
            -
                    except Exception:
         
     | 
| 
       356 
     | 
    
         
            -
                        return 1e6
         
     | 
| 
      
 292 
     | 
    
         
            +
                best_score = -np.inf
         
     | 
| 
      
 293 
     | 
    
         
            +
                best_threshold = None
         
     | 
| 
       357 
294 
     | 
    
         | 
| 
       358 
     | 
    
         
            -
                #  
     | 
| 
       359 
     | 
    
         
            -
                 
     | 
| 
       360 
     | 
    
         
            -
             
     | 
| 
       361 
     | 
    
         
            -
                 
     | 
| 
      
 295 
     | 
    
         
            +
                # Test lower bound
         
     | 
| 
      
 296 
     | 
    
         
            +
                max_d_lower = np.max(Z[:, 2]) * lower_bound
         
     | 
| 
      
 297 
     | 
    
         
            +
                clusters_lower = fcluster(Z, max_d_lower, criterion=linkage_criterion)
         
     | 
| 
      
 298 
     | 
    
         
            +
                try:
         
     | 
| 
      
 299 
     | 
    
         
            +
                    score_lower = silhouette_score(m, clusters_lower, metric=linkage_metric)
         
     | 
| 
      
 300 
     | 
    
         
            +
                except ValueError:
         
     | 
| 
      
 301 
     | 
    
         
            +
                    score_lower = -np.inf
         
     | 
| 
      
 302 
     | 
    
         
            +
             
     | 
| 
      
 303 
     | 
    
         
            +
                # Test upper bound
         
     | 
| 
      
 304 
     | 
    
         
            +
                max_d_upper = np.max(Z[:, 2]) * upper_bound
         
     | 
| 
      
 305 
     | 
    
         
            +
                clusters_upper = fcluster(Z, max_d_upper, criterion=linkage_criterion)
         
     | 
| 
      
 306 
     | 
    
         
            +
                try:
         
     | 
| 
      
 307 
     | 
    
         
            +
                    score_upper = silhouette_score(m, clusters_upper, metric=linkage_metric)
         
     | 
| 
      
 308 
     | 
    
         
            +
                except ValueError:
         
     | 
| 
      
 309 
     | 
    
         
            +
                    score_upper = -np.inf
         
     | 
| 
       362 
310 
     | 
    
         | 
| 
       363 
     | 
    
         
            -
                 
     | 
| 
       364 
     | 
    
         
            -
                 
     | 
| 
       365 
     | 
    
         
            -
             
     | 
| 
       366 
     | 
    
         
            -
             
     | 
| 
      
 311 
     | 
    
         
            +
                # Determine initial bounds for binary search
         
     | 
| 
      
 312 
     | 
    
         
            +
                if score_lower > score_upper:
         
     | 
| 
      
 313 
     | 
    
         
            +
                    best_score = score_lower
         
     | 
| 
      
 314 
     | 
    
         
            +
                    best_threshold = lower_bound
         
     | 
| 
      
 315 
     | 
    
         
            +
                    upper_bound = (lower_bound + upper_bound) / 2
         
     | 
| 
      
 316 
     | 
    
         
            +
                else:
         
     | 
| 
      
 317 
     | 
    
         
            +
                    best_score = score_upper
         
     | 
| 
      
 318 
     | 
    
         
            +
                    best_threshold = upper_bound
         
     | 
| 
      
 319 
     | 
    
         
            +
                    lower_bound = (lower_bound + upper_bound) / 2
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
                # Binary search loop
         
     | 
| 
      
 322 
     | 
    
         
            +
                while upper_bound - lower_bound > resolution:
         
     | 
| 
      
 323 
     | 
    
         
            +
                    mid_threshold = (upper_bound + lower_bound) / 2
         
     | 
| 
      
 324 
     | 
    
         
            +
                    max_d_mid = np.max(Z[:, 2]) * mid_threshold
         
     | 
| 
      
 325 
     | 
    
         
            +
                    clusters_mid = fcluster(Z, max_d_mid, criterion=linkage_criterion)
         
     | 
| 
      
 326 
     | 
    
         
            +
                    try:
         
     | 
| 
      
 327 
     | 
    
         
            +
                        score_mid = silhouette_score(m, clusters_mid, metric=linkage_metric)
         
     | 
| 
      
 328 
     | 
    
         
            +
                    except ValueError:
         
     | 
| 
      
 329 
     | 
    
         
            +
                        score_mid = -np.inf
         
     | 
| 
      
 330 
     | 
    
         
            +
             
     | 
| 
      
 331 
     | 
    
         
            +
                    # Update best score and threshold if mid-point is better
         
     | 
| 
      
 332 
     | 
    
         
            +
                    if score_mid > best_score:
         
     | 
| 
      
 333 
     | 
    
         
            +
                        best_score = score_mid
         
     | 
| 
      
 334 
     | 
    
         
            +
                        best_threshold = mid_threshold
         
     | 
| 
      
 335 
     | 
    
         
            +
             
     | 
| 
      
 336 
     | 
    
         
            +
                    # Adjust bounds based on the scores
         
     | 
| 
      
 337 
     | 
    
         
            +
                    if score_lower > score_upper:
         
     | 
| 
      
 338 
     | 
    
         
            +
                        upper_bound = mid_threshold
         
     | 
| 
      
 339 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 340 
     | 
    
         
            +
                        lower_bound = mid_threshold
         
     | 
| 
       367 
341 
     | 
    
         | 
| 
       368 
     | 
    
         
            -
                return best_threshold, float( 
     | 
| 
      
 342 
     | 
    
         
            +
                return best_threshold, float(best_score)
         
     | 
    
        risk/network/graph/api.py
    CHANGED
    
    | 
         @@ -58,12 +58,9 @@ class GraphAPI: 
     | 
|
| 
       58 
58 
     | 
    
         
             
                        impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
         
     | 
| 
       59 
59 
     | 
    
         
             
                        prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
         
     | 
| 
       60 
60 
     | 
    
         
             
                        linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
         
     | 
| 
       61 
     | 
    
         
            -
                        linkage_method (str, optional): Clustering method to use. Defaults to "average". 
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
                         
     | 
| 
       64 
     | 
    
         
            -
                            to automatically select the best linkage metric.
         
     | 
| 
       65 
     | 
    
         
            -
                        linkage_threshold (str, float, optional): Threshold for clustering. Choose "silhouette", "calinski_harabasz",
         
     | 
| 
       66 
     | 
    
         
            -
                            or "davies_bouldin" to automatically select the best threshold. Defaults to 0.2.
         
     | 
| 
      
 61 
     | 
    
         
            +
                        linkage_method (str, optional): Clustering method to use. Defaults to "average".
         
     | 
| 
      
 62 
     | 
    
         
            +
                        linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
         
     | 
| 
      
 63 
     | 
    
         
            +
                        linkage_threshold (float, optional): Threshold for clustering. Defaults to 0.2.
         
     | 
| 
       67 
64 
     | 
    
         
             
                        min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
         
     | 
| 
       68 
65 
     | 
    
         
             
                        max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
         
     | 
| 
       69 
66 
     | 
    
         | 
| 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            risk/__init__.py,sha256= 
     | 
| 
      
 1 
     | 
    
         
            +
            risk/__init__.py,sha256=ewYSGDLHigkwFLI9IW6qDbQk4uS6nb3RTd-k2GCD1b0,127
         
     | 
| 
       2 
2 
     | 
    
         
             
            risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
         
     | 
| 
       3 
3 
     | 
    
         
             
            risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
         
     | 
| 
       4 
4 
     | 
    
         
             
            risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
         
     | 
| 
         @@ -10,13 +10,13 @@ risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716 
     | 
|
| 
       10 
10 
     | 
    
         
             
            risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
         
     | 
| 
       11 
11 
     | 
    
         
             
            risk/neighborhoods/api.py,sha256=ywngw2TQVV27gYlWDXcs8-qnmeepnvb-W9ov6J6VEPM,23341
         
     | 
| 
       12 
12 
     | 
    
         
             
            risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
         
     | 
| 
       13 
     | 
    
         
            -
            risk/neighborhoods/domains.py,sha256= 
     | 
| 
      
 13 
     | 
    
         
            +
            risk/neighborhoods/domains.py,sha256=Yu93mKNCuOpBGa87knAH-XIl260kf-rswPfn3aC9GNo,13937
         
     | 
| 
       14 
14 
     | 
    
         
             
            risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
         
     | 
| 
       15 
15 
     | 
    
         
             
            risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
         
     | 
| 
       16 
16 
     | 
    
         
             
            risk/network/geometry.py,sha256=eVtGHMgBf9fEqQZUFdHWjw-zFYYpfUONoHFSAxoRkug,6219
         
     | 
| 
       17 
17 
     | 
    
         
             
            risk/network/io.py,sha256=RCH4nQdgYDXcNwMfpSz7qEmPO0pJ1p9fL0rNQptsQrc,21673
         
     | 
| 
       18 
18 
     | 
    
         
             
            risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
         
     | 
| 
       19 
     | 
    
         
            -
            risk/network/graph/api.py,sha256= 
     | 
| 
      
 19 
     | 
    
         
            +
            risk/network/graph/api.py,sha256=9yoviP7EqFU1okLJZlaLBZzFNmjOHv30B1JgDFNP1bg,8399
         
     | 
| 
       20 
20 
     | 
    
         
             
            risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
         
     | 
| 
       21 
21 
     | 
    
         
             
            risk/network/graph/summary.py,sha256=Y_0rL2C1UoQeZQIPVe5LbaCO356Mcc8HisnrXwQsRm8,10289
         
     | 
| 
       22 
22 
     | 
    
         
             
            risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
         
     | 
| 
         @@ -34,8 +34,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177 
     | 
|
| 
       34 
34 
     | 
    
         
             
            risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
         
     | 
| 
       35 
35 
     | 
    
         
             
            risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
         
     | 
| 
       36 
36 
     | 
    
         
             
            risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
         
     | 
| 
       37 
     | 
    
         
            -
            risk_network-0.0. 
     | 
| 
       38 
     | 
    
         
            -
            risk_network-0.0. 
     | 
| 
       39 
     | 
    
         
            -
            risk_network-0.0. 
     | 
| 
       40 
     | 
    
         
            -
            risk_network-0.0. 
     | 
| 
       41 
     | 
    
         
            -
            risk_network-0.0. 
     | 
| 
      
 37 
     | 
    
         
            +
            risk_network-0.0.9b39.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
         
     | 
| 
      
 38 
     | 
    
         
            +
            risk_network-0.0.9b39.dist-info/METADATA,sha256=y3xDx1OCYpCS1OgBMUTNIK5y8HFORWHag4PLnyAXc5g,47627
         
     | 
| 
      
 39 
     | 
    
         
            +
            risk_network-0.0.9b39.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         
     | 
| 
      
 40 
     | 
    
         
            +
            risk_network-0.0.9b39.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
         
     | 
| 
      
 41 
     | 
    
         
            +
            risk_network-0.0.9b39.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |