risk-network 0.0.9b5__py3-none-any.whl → 0.0.9b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.5"
10
+ __version__ = "0.0.9-beta.6"
@@ -15,6 +15,8 @@ import pandas as pd
15
15
  from nltk.tokenize import word_tokenize
16
16
  from nltk.corpus import stopwords
17
17
 
18
+ from risk.log import logger
19
+
18
20
 
19
21
  def _setup_nltk():
20
22
  """Ensure necessary NLTK data is downloaded."""
@@ -35,15 +37,23 @@ _setup_nltk()
35
37
  stop_words = set(stopwords.words("english"))
36
38
 
37
39
 
38
- def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
40
+ def load_annotations(
41
+ network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
42
+ ) -> Dict[str, Any]:
39
43
  """Convert annotations input to a DataFrame and reindex based on the network's node labels.
40
44
 
41
45
  Args:
42
46
  network (nx.Graph): The network graph.
43
47
  annotations_input (Dict[str, Any]): A dictionary with annotations.
48
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
49
+ term to be included. Defaults to 2.
44
50
 
45
51
  Returns:
46
52
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
53
+
54
+ Raises:
55
+ ValueError: If no annotations are found for the nodes in the network.
56
+ ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
47
57
  """
48
58
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
49
59
  flattened_annotations = [
@@ -61,13 +71,24 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
61
71
  annotations_pivot = annotations_pivot.reindex(index=node_label_order)
62
72
  # Raise an error if no valid annotations are found for the nodes in the network
63
73
  if annotations_pivot.notnull().sum().sum() == 0:
74
+ raise ValueError("No terms found in the annotation file for the nodes in the network.")
75
+
76
+ # Filter out annotations with fewer than min_nodes_per_term occurrences
77
+ # This assists in reducing noise and focusing on more relevant annotations for statistical analysis
78
+ num_terms_before_filtering = annotations_pivot.shape[1]
79
+ annotations_pivot = annotations_pivot.loc[
80
+ :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
81
+ ]
82
+ num_terms_after_filtering = annotations_pivot.shape[1]
83
+ # Log the number of annotations before and after filtering
84
+ logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
85
+ logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
86
+ logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
87
+ if num_terms_after_filtering == 0:
64
88
  raise ValueError(
65
- "No annotations found in the annotations file for the nodes in the network."
89
+ f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
66
90
  )
67
91
 
68
- # Remove columns with all zeros and those with only a single '1' to improve statistical performance
69
- # (i.e., it's unreliable to compute the significance of an annotation in a node cluster based on a single occurrence).
70
- annotations_pivot = annotations_pivot.loc[:, (annotations_pivot.sum(axis=0) > 1)]
71
92
  # Extract ordered nodes and annotations
72
93
  ordered_nodes = tuple(annotations_pivot.index)
73
94
  ordered_annotations = tuple(annotations_pivot.columns)
risk/annotations/io.py CHANGED
@@ -25,12 +25,16 @@ class AnnotationsIO:
25
25
  def __init__(self):
26
26
  pass
27
27
 
28
- def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
28
+ def load_json_annotation(
29
+ self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
30
+ ) -> Dict[str, Any]:
29
31
  """Load annotations from a JSON file and convert them to a DataFrame.
30
32
 
31
33
  Args:
32
34
  network (NetworkX graph): The network to which the annotations are related.
33
35
  filepath (str): Path to the JSON annotations file.
36
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
37
+ term to be included. Defaults to 2.
34
38
 
35
39
  Returns:
36
40
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -40,12 +44,11 @@ class AnnotationsIO:
40
44
  params.log_annotations(filepath=filepath, filetype=filetype)
41
45
  _log_loading(filetype, filepath=filepath)
42
46
 
43
- # Open and read the JSON file
47
+ # Load the JSON file into a dictionary
44
48
  with open(filepath, "r") as file:
45
49
  annotations_input = json.load(file)
46
50
 
47
- # Load the annotations into the provided network
48
- return load_annotations(network, annotations_input)
51
+ return load_annotations(network, annotations_input, min_nodes_per_term)
49
52
 
50
53
  def load_excel_annotation(
51
54
  self,
@@ -55,6 +58,7 @@ class AnnotationsIO:
55
58
  nodes_colname: str = "nodes",
56
59
  sheet_name: str = "Sheet1",
57
60
  nodes_delimiter: str = ";",
61
+ min_nodes_per_term: int = 2,
58
62
  ) -> Dict[str, Any]:
59
63
  """Load annotations from an Excel file and associate them with the network.
60
64
 
@@ -65,6 +69,8 @@ class AnnotationsIO:
65
69
  nodes_colname (str): Name of the column containing the nodes associated with each label.
66
70
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
67
71
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
72
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
73
+ term to be included. Defaults to 2.
68
74
 
69
75
  Returns:
70
76
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -82,10 +88,9 @@ class AnnotationsIO:
82
88
  lambda x: x.split(nodes_delimiter)
83
89
  )
84
90
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
85
- label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
91
+ annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
86
92
 
87
- # Load the annotations into the provided network
88
- return load_annotations(network, label_node_dict)
93
+ return load_annotations(network, annotations_input, min_nodes_per_term)
89
94
 
90
95
  def load_csv_annotation(
91
96
  self,
@@ -94,6 +99,7 @@ class AnnotationsIO:
94
99
  label_colname: str = "label",
95
100
  nodes_colname: str = "nodes",
96
101
  nodes_delimiter: str = ";",
102
+ min_nodes_per_term: int = 2,
97
103
  ) -> Dict[str, Any]:
98
104
  """Load annotations from a CSV file and associate them with the network.
99
105
 
@@ -103,6 +109,8 @@ class AnnotationsIO:
103
109
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
104
110
  nodes_colname (str): Name of the column containing the nodes associated with each label.
105
111
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
112
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
113
+ term to be included. Defaults to 2.
106
114
 
107
115
  Returns:
108
116
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -118,8 +126,7 @@ class AnnotationsIO:
118
126
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
119
127
  )
120
128
 
121
- # Load the annotations into the provided network
122
- return load_annotations(network, annotations_input)
129
+ return load_annotations(network, annotations_input, min_nodes_per_term)
123
130
 
124
131
  def load_tsv_annotation(
125
132
  self,
@@ -128,6 +135,7 @@ class AnnotationsIO:
128
135
  label_colname: str = "label",
129
136
  nodes_colname: str = "nodes",
130
137
  nodes_delimiter: str = ";",
138
+ min_nodes_per_term: int = 2,
131
139
  ) -> Dict[str, Any]:
132
140
  """Load annotations from a TSV file and associate them with the network.
133
141
 
@@ -137,6 +145,8 @@ class AnnotationsIO:
137
145
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
138
146
  nodes_colname (str): Name of the column containing the nodes associated with each label.
139
147
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
148
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
149
+ term to be included. Defaults to 2.
140
150
 
141
151
  Returns:
142
152
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -152,15 +162,18 @@ class AnnotationsIO:
152
162
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
153
163
  )
154
164
 
155
- # Load the annotations into the provided network
156
- return load_annotations(network, annotations_input)
165
+ return load_annotations(network, annotations_input, min_nodes_per_term)
157
166
 
158
- def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
167
+ def load_dict_annotation(
168
+ self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
169
+ ) -> Dict[str, Any]:
159
170
  """Load annotations from a provided dictionary and convert them to a dictionary annotation.
160
171
 
161
172
  Args:
162
173
  network (NetworkX graph): The network to which the annotations are related.
163
174
  content (Dict[str, Any]): The annotations dictionary to load.
175
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
176
+ term to be included. Defaults to 2.
164
177
 
165
178
  Returns:
166
179
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -176,13 +189,8 @@ class AnnotationsIO:
176
189
  params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
177
190
  _log_loading(filetype, "In-memory dictionary")
178
191
 
179
- # Load the annotations into the provided network
180
- annotations_dict = load_annotations(network, content)
181
- # Ensure the output is a dictionary
182
- if not isinstance(annotations_dict, dict):
183
- raise ValueError("Expected output to be a dictionary")
184
-
185
- return annotations_dict
192
+ # Load the annotations as a dictionary from the provided dictionary
193
+ return load_annotations(network, content, min_nodes_per_term)
186
194
 
187
195
 
188
196
  def _load_matrix_file(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.9b5
3
+ Version: 0.0.9b6
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,9 +1,9 @@
1
- risk/__init__.py,sha256=xpU4eFf8OBGeA2fxuMCAFmBEQO6YlnDSVL09OvyHThs,112
1
+ risk/__init__.py,sha256=fpCtulKZFHI4Je7dm4qBJHyP9InK9uDRYmYYgog9BGQ,112
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
3
  risk/risk.py,sha256=De1vn8Xc-TKz6aTL0bvJI-SVrIqU3k0IWAbKc7dde1c,23618
4
4
  risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
5
- risk/annotations/annotations.py,sha256=aC30M-wdd72ZjOfn8RZKAsGM7Yti0Wl_4CHTvayoPvY,13312
6
- risk/annotations/io.py,sha256=eOkPD9G6KzkhGRc_ZW2McxQ8665o-H3uDG8bmKlzQ80,9591
5
+ risk/annotations/annotations.py,sha256=WVT9wzTm8lTpMw_3SnbyljWR77yExo0rb1zVgJza8nw,14284
6
+ risk/annotations/io.py,sha256=Nj_RPmn-WM1zMsssm9bVGR94SHytkEBK-wcBJ3WhqkU,10310
7
7
  risk/log/__init__.py,sha256=gy7C5L6D222AYUChq5lkc0LsCJ_QMQPaFiBJKbecdac,201
8
8
  risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
9
9
  risk/log/parameters.py,sha256=o4StqYCa0kt7_Ht4mKa1DwwvhGUwkC_dGBaiUIc0GB0,5683
@@ -32,8 +32,8 @@ risk/stats/stats.py,sha256=z8NrhiVj4BzJ250bVLfytpmfC7RzYu7mBuIZD_l0aCA,7222
32
32
  risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
33
33
  risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
34
34
  risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
35
- risk_network-0.0.9b5.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
36
- risk_network-0.0.9b5.dist-info/METADATA,sha256=Oc_07HiBSedyTbbiP-2a-xeLgEH-3zzNdXYzV6FSdQY,47497
37
- risk_network-0.0.9b5.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
38
- risk_network-0.0.9b5.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
39
- risk_network-0.0.9b5.dist-info/RECORD,,
35
+ risk_network-0.0.9b6.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
36
+ risk_network-0.0.9b6.dist-info/METADATA,sha256=0YZ5Rd4bqOid3nSHpa-S6fBBtBhYPoAQ1SlACtmDVaw,47497
37
+ risk_network-0.0.9b6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
38
+ risk_network-0.0.9b6.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
39
+ risk_network-0.0.9b6.dist-info/RECORD,,