risk-network 0.0.9b43__py3-none-any.whl → 0.0.9b45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.43"
10
+ __version__ = "0.0.9-beta.45"
@@ -3,7 +3,9 @@ risk/annotations/annotations
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import os
6
7
  import re
8
+ import zipfile
7
9
  from collections import Counter
8
10
  from itertools import compress
9
11
  from typing import Any, Dict, List, Set
@@ -19,9 +21,49 @@ from nltk.tokenize import word_tokenize
19
21
  from risk.log import logger
20
22
  from scipy.sparse import coo_matrix
21
23
 
22
- # Add the local NLTK data path to the system path
23
- nltk.data.path.append("nltk_data")
24
24
 
25
+ def ensure_nltk_resource(resource: str) -> None:
26
+ """Ensure the specified NLTK resource is available."""
27
+ # Define the path to the resource within the NLTK data directory
28
+ resource_path = f"corpora/{resource}"
29
+ # Check if the resource is already available.
30
+ try:
31
+ nltk.data.find(resource_path)
32
+ return
33
+ except LookupError:
34
+ print(f"Resource '{resource}' not found. Attempting to download...")
35
+
36
+ # Download the resource.
37
+ nltk.download(resource)
38
+ # Check again after downloading.
39
+ try:
40
+ nltk.data.find(resource_path)
41
+ return
42
+ except LookupError:
43
+ print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
44
+
45
+ # Look for a ZIP file in all known NLTK data directories.
46
+ for data_path in nltk.data.path:
47
+ zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
48
+ if os.path.isfile(zip_path):
49
+ print(f"Found ZIP file for '{resource}' at: {zip_path}")
50
+ target_dir = os.path.join(data_path, "corpora")
51
+ with zipfile.ZipFile(zip_path, "r") as z:
52
+ z.extractall(path=target_dir)
53
+ print(f"Unzipped '{resource}' successfully.")
54
+ break # Stop after unzipping the first found ZIP.
55
+
56
+ # Final check: Try to load the resource one last time.
57
+ try:
58
+ nltk.data.find(resource_path)
59
+ print(f"Resource '{resource}' is now available.")
60
+ except LookupError:
61
+ raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
62
+
63
+
64
+ # Ensure the NLTK stopwords and WordNet resources are available
65
+ ensure_nltk_resource("stopwords")
66
+ ensure_nltk_resource("wordnet")
25
67
  # Use NLTK's stopwords - load all languages
26
68
  STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
27
69
  # Initialize the WordNet lemmatizer, which is used for normalizing words
@@ -224,7 +266,7 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
224
266
  weight = max(1, int((0 if pd.isna(score) else score) * 10))
225
267
  for token in tokens:
226
268
  # Clean token: lowercase and remove extraneous punctuation (but preserve intra-word hyphens)
227
- token_clean = re.sub(r"[^\w\-]", "", token.lower()).strip()
269
+ token_clean = re.sub(r"[^\w\-]", "", token).strip()
228
270
  if not token_clean:
229
271
  continue
230
272
  # Skip tokens that are pure numbers
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b43
3
+ Version: 0.0.9b45
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,7 +1,7 @@
1
- risk/__init__.py,sha256=38C2Lmnez9GxpKuiGvgqF2uJliPu7UtOjt8icOcAldw,127
1
+ risk/__init__.py,sha256=jIlf9Do3EstfEM4dHHrgF3VRwVSyPYhBW38kms1VToc,127
2
2
  risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
3
3
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
4
- risk/annotations/annotations.py,sha256=FTnY-aeHpu8R3jwTARp_6VsUwF0Eu_Ix8QJgUajDK8A,14677
4
+ risk/annotations/annotations.py,sha256=5X2R8RFxgK6kgSoj05UdCPcpkRRPOaHjGwnIrjeD5Ww,16299
5
5
  risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
6
6
  risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
7
7
  risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
@@ -33,8 +33,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177
33
33
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
34
34
  risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
35
35
  risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
36
- risk_network-0.0.9b43.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
37
- risk_network-0.0.9b43.dist-info/METADATA,sha256=qM3Vt0aCMSbh6QzGTvNBB1f_1texUsh8Kuc2TnYFG5g,47627
38
- risk_network-0.0.9b43.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
- risk_network-0.0.9b43.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
40
- risk_network-0.0.9b43.dist-info/RECORD,,
36
+ risk_network-0.0.9b45.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
37
+ risk_network-0.0.9b45.dist-info/METADATA,sha256=2KudsInDITKJe7RnMfguwO6x2uMVJufM1TBvB92nxzA,47627
38
+ risk_network-0.0.9b45.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
+ risk_network-0.0.9b45.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
40
+ risk_network-0.0.9b45.dist-info/RECORD,,