risk-network 0.0.8b22__py3-none-any.whl → 0.0.8b24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.22"
10
+ __version__ = "0.0.8-beta.24"
@@ -3,6 +3,7 @@ risk/annotations/annotations
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import re
6
7
  from collections import Counter
7
8
  from itertools import compress
8
9
  from typing import Any, Dict, List, Set
@@ -205,19 +206,26 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
205
206
  weight = max(1, int((0 if pd.isna(score) else score) * 10))
206
207
  weighted_words.extend([word] * weight)
207
208
 
208
- # Tokenize the weighted words
209
+ # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
209
210
  tokens = word_tokenize(" ".join(weighted_words))
210
- # Separate numeric tokens
211
- numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
212
- unique_numeric_values = set(numeric_tokens)
213
- if len(unique_numeric_values) == 1:
214
- return f"{list(unique_numeric_values)[0]}"
215
-
216
- # Filter alphabetic and numeric tokens
217
- words = [word for word in tokens if word.isalpha() or word.replace(".", "", 1).isdigit()]
218
- # Apply word similarity filtering to remove redundant terms
219
- simplified_words = _simplify_word_list(words)
220
- # Generate a coherent description from the processed words
211
+ # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
212
+ combined_tokens = []
213
+ for token in tokens:
214
+ # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
215
+ if re.match(r"^\d+-\w+", token):
216
+ combined_tokens.append(token)
217
+ elif token.replace(".", "", 1).isdigit(): # Handle pure numeric tokens
218
+ # Ignore pure numbers as descriptions unless necessary
219
+ continue
220
+ else:
221
+ combined_tokens.append(token)
222
+
223
+ # Prevent descriptions like just '4' from being selected
224
+ if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
225
+ return "N/A" # Return "N/A" for cases where it's just a number
226
+
227
+ # Simplify the word list and generate the description
228
+ simplified_words = _simplify_word_list(combined_tokens)
221
229
  description = _generate_coherent_description(simplified_words)
222
230
 
223
231
  return description
@@ -614,9 +614,11 @@ class Labels:
614
614
  Returns:
615
615
  str: Processed terms separated by TERM_DELIMITER, with words combined if necessary to fit within constraints.
616
616
  """
617
- # Handle ids_to_replace logic
617
+ # Return custom labels if domain is in ids_to_replace
618
618
  if ids_to_replace and domain in ids_to_replace:
619
619
  terms = ids_to_replace[domain].split(" ")
620
+ return terms
621
+
620
622
  else:
621
623
  terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
622
624
 
@@ -30,6 +30,44 @@ def calculate_bounding_box(
30
30
  return center, radius
31
31
 
32
32
 
33
+ def refine_center_iteratively(
34
+ node_coordinates: np.ndarray,
35
+ radius_margin: float = 1.05,
36
+ max_iterations: int = 10,
37
+ tolerance: float = 1e-2,
38
+ ) -> Tuple[np.ndarray, float]:
39
+ """Refine the center of the graph iteratively to minimize skew in node distribution.
40
+
41
+ Args:
42
+ node_coordinates (np.ndarray): Array of node coordinates (x, y).
43
+ radius_margin (float, optional): Margin factor to apply to the bounding box radius. Defaults to 1.05.
44
+ max_iterations (int, optional): Maximum number of iterations for refining the center. Defaults to 10.
45
+ tolerance (float, optional): Stopping tolerance for center adjustment. Defaults to 1e-2.
46
+
47
+ Returns:
48
+ tuple: Refined center and the final radius.
49
+ """
50
+ # Initial center and radius based on the bounding box
51
+ center, radius = calculate_bounding_box(node_coordinates, radius_margin)
52
+ for _ in range(max_iterations):
53
+ # Shift the coordinates based on the current center
54
+ shifted_coordinates = node_coordinates - center
55
+ # Calculate skew (difference in distance from the center)
56
+ skew = np.mean(shifted_coordinates, axis=0)
57
+ # If skew is below tolerance, stop
58
+ if np.linalg.norm(skew) < tolerance:
59
+ break
60
+
61
+ # Adjust the center by moving it in the direction opposite to the skew
62
+ center += skew
63
+
64
+ # After refinement, recalculate the bounding radius
65
+ shifted_coordinates = node_coordinates - center
66
+ new_radius = np.max(np.linalg.norm(shifted_coordinates, axis=1)) * radius_margin
67
+
68
+ return center, new_radius
69
+
70
+
33
71
  def calculate_centroids(network, domain_id_to_node_ids_map):
34
72
  """Calculate the centroid for each domain based on node x and y coordinates in the network.
35
73
 
risk/risk.py CHANGED
@@ -93,6 +93,9 @@ class RISK(NetworkIO, AnnotationsIO):
93
93
  random_seed=random_seed,
94
94
  )
95
95
 
96
+ # Make a copy of the network to avoid modifying the original
97
+ network = network.copy()
98
+
96
99
  # Load neighborhoods based on the network and distance metric
97
100
  neighborhoods = self._load_neighborhoods(
98
101
  network,
@@ -150,6 +153,9 @@ class RISK(NetworkIO, AnnotationsIO):
150
153
  random_seed=random_seed,
151
154
  )
152
155
 
156
+ # Make a copy of the network to avoid modifying the original
157
+ network = network.copy()
158
+
153
159
  # Load neighborhoods based on the network and distance metric
154
160
  neighborhoods = self._load_neighborhoods(
155
161
  network,
@@ -216,6 +222,9 @@ class RISK(NetworkIO, AnnotationsIO):
216
222
  max_workers=max_workers,
217
223
  )
218
224
 
225
+ # Make a copy of the network to avoid modifying the original
226
+ network = network.copy()
227
+
219
228
  # Load neighborhoods based on the network and distance metric
220
229
  neighborhoods = self._load_neighborhoods(
221
230
  network,
@@ -295,6 +304,9 @@ class RISK(NetworkIO, AnnotationsIO):
295
304
  max_cluster_size=max_cluster_size,
296
305
  )
297
306
 
307
+ # Make a copy of the network to avoid modifying the original
308
+ network = network.copy()
309
+
298
310
  logger.debug(f"p-value cutoff: {pval_cutoff}")
299
311
  logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
300
312
  logger.debug(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.8b22
3
+ Version: 0.0.8b24
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,8 +1,8 @@
1
- risk/__init__.py,sha256=fz5ZBsLUlWdBQ5uJS0qBge1qwPNox0OYhi4OXkjQxwI,113
1
+ risk/__init__.py,sha256=cH9ocvJT5oWRV0qbQAG2O5ZSdpVcuvxbHoe8hT6DeJM,113
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
- risk/risk.py,sha256=rjV0hllegCX978QaUo175FworKxNXlhQEQaQAPjHqos,23397
3
+ risk/risk.py,sha256=7JERU-ZkDuESY3LJDyqEJGVs-YdYQwCt2SO3nHGYk0E,23809
4
4
  risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
5
- risk/annotations/annotations.py,sha256=giLJht0tPtf4UdtH_d0kbCZQU5H5fZoupGDFKaNbC_Q,12700
5
+ risk/annotations/annotations.py,sha256=dHO6kQOQjMA57nYA-yTAU1uB-ieiZ5sknAKvX6vF0Os,13024
6
6
  risk/annotations/io.py,sha256=powWzeimVdE0WCwlBCXyu5otMyZZHQujC0DS3m5DC0c,9505
7
7
  risk/log/__init__.py,sha256=aDUz5LMFQsz0UlsQI2EdXtiBKRLfml1UMeZKC7QQIGU,134
8
8
  risk/log/config.py,sha256=m8pzj-hN4vI_2JdJUfyOoSvzT8_lhoIfBt27sKbnOes,4535
@@ -18,11 +18,11 @@ risk/network/io.py,sha256=u0PPcKjp6Xze--7eDOlvalYkjQ9S2sjiC-ac2476PUI,22942
18
18
  risk/network/plot/__init__.py,sha256=MfmaXJgAZJgXZ2wrhK8pXwzETlcMaLChhWXKAozniAo,98
19
19
  risk/network/plot/canvas.py,sha256=ZO6bHw1chIsUqtE7IkPKdgX4tFLA-T5OwN5SojqGSNU,10672
20
20
  risk/network/plot/contour.py,sha256=CwX4i3uE5HL0W4kfx34U7YyoTTqMxyb7xaXKRVoNLzY,15265
21
- risk/network/plot/labels.py,sha256=ozkqwhBOTHKJLaAz4dJopXuykAvssSZUer2W5V0x2jM,45103
21
+ risk/network/plot/labels.py,sha256=2XTL8jwqiGnkJYQizQDgUrjU8CPyjpwV3AapsG_Uqw4,45153
22
22
  risk/network/plot/network.py,sha256=6RURL1OdBFyQ34qNcwM_uH3LSQGYZZ8tZT51dggH1a0,13685
23
23
  risk/network/plot/plotter.py,sha256=iTPMiTnTTatM_-q1Ox_bjt5Pvv-Lo8gceiYB6TVzDcw,5770
24
24
  risk/network/plot/utils/color.py,sha256=WSs1ge2oZ8yXwyVk2QqBF-avRd0aYT-sYZr9cxxAn7M,19626
25
- risk/network/plot/utils/layout.py,sha256=5DpRLvabgnPWwVJ-J3W6oFBBvbjCrudvvW4HDOzzoTo,1960
25
+ risk/network/plot/utils/layout.py,sha256=RnJq0yODpoheZnDl7KKFPQeXrnrsS3FLIdxupoYVZq4,3553
26
26
  risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
27
27
  risk/stats/hypergeom.py,sha256=oc39f02ViB1vQ-uaDrxG_tzAT6dxQBRjc88EK2EGn78,2282
28
28
  risk/stats/poisson.py,sha256=polLgwS08MTCNzupYdmMUoEUYrJOjAbcYtYwjlfeE5Y,1803
@@ -30,8 +30,8 @@ risk/stats/stats.py,sha256=6iGi0-oN05mTmupg6X_VEBxEQvi2rujNhfPk4aLjwNI,7186
30
30
  risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
31
31
  risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
32
32
  risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
33
- risk_network-0.0.8b22.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
34
- risk_network-0.0.8b22.dist-info/METADATA,sha256=9trSkrh2Od_B2qltA2n_uVcvX1kUlMy-QmLO4WThrds,47498
35
- risk_network-0.0.8b22.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
36
- risk_network-0.0.8b22.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
37
- risk_network-0.0.8b22.dist-info/RECORD,,
33
+ risk_network-0.0.8b24.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
34
+ risk_network-0.0.8b24.dist-info/METADATA,sha256=YlQYQ6UuQrShoKaJtiryZKTj1_bPrqJQxYN7PAAlHj4,47498
35
+ risk_network-0.0.8b24.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
36
+ risk_network-0.0.8b24.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
37
+ risk_network-0.0.8b24.dist-info/RECORD,,