risk-network 0.0.16b0__py3-none-any.whl → 0.0.16b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/_annotation/_annotation.py +9 -12
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b1.dist-info}/METADATA +1 -1
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b1.dist-info}/RECORD +7 -7
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b1.dist-info}/WHEEL +0 -0
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b1.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b1.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/_annotation/_annotation.py
CHANGED
|
@@ -257,26 +257,23 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
|
|
|
257
257
|
continue
|
|
258
258
|
# Lemmatize the token to merge similar forms
|
|
259
259
|
token_norm = LEMMATIZER.lemmatize(token_clean)
|
|
260
|
-
|
|
260
|
+
# Apply weighting boost for biologically structured number-word hybrids
|
|
261
|
+
if re.match(r"^\d+[\-\w]+", token_norm):
|
|
262
|
+
actual_weight = int(weight * 1.5)
|
|
263
|
+
else:
|
|
264
|
+
actual_weight = weight
|
|
265
|
+
weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + actual_weight
|
|
261
266
|
|
|
262
267
|
# Reconstruct a weighted token list by repeating each token by its aggregated count.
|
|
263
268
|
weighted_words = []
|
|
264
269
|
for token, count in weighted_counts.items():
|
|
265
270
|
weighted_words.extend([token] * count)
|
|
266
271
|
|
|
267
|
-
# Combine tokens that match number-word patterns (e.g. "4-alpha")
|
|
272
|
+
# Combine tokens that match number-word patterns (e.g. "4-alpha"), but do not remove numeric tokens.
|
|
273
|
+
# All tokens are included in the final list.
|
|
268
274
|
combined_tokens = []
|
|
269
275
|
for token in weighted_words:
|
|
270
|
-
|
|
271
|
-
combined_tokens.append(token)
|
|
272
|
-
elif token.replace(".", "", 1).isdigit():
|
|
273
|
-
continue
|
|
274
|
-
else:
|
|
275
|
-
combined_tokens.append(token)
|
|
276
|
-
|
|
277
|
-
# If the only token is numeric, return a default value.
|
|
278
|
-
if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
|
|
279
|
-
return "N/A"
|
|
276
|
+
combined_tokens.append(token)
|
|
280
277
|
|
|
281
278
|
# Simplify the token list to remove near-duplicates based on the Jaccard index.
|
|
282
279
|
simplified_words = _simplify_word_list(combined_tokens)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
risk/__init__.py,sha256=
|
|
1
|
+
risk/__init__.py,sha256=yv_r0c0z4l4vV4P0053XkOGZfAdW1Fbxa_jSYS7a0yQ,143
|
|
2
2
|
risk/_risk.py,sha256=VULCdM41BlWKM1ou4Qc579ffZ9dMZkfhAwKYgbaEeKM,1054
|
|
3
3
|
risk/_annotation/__init__.py,sha256=zr7w1DHkmvrkKFGKdPhrcvZHV-xsfd5TZOaWtFiP4Dc,164
|
|
4
|
-
risk/_annotation/_annotation.py,sha256=
|
|
4
|
+
risk/_annotation/_annotation.py,sha256=MWbimFSR6FT0SahJ5Ixbe0VMZ5osmgeOgyQqX8ntbec,15045
|
|
5
5
|
risk/_annotation/_io.py,sha256=xic3dkEA54X82HbyWfCiXrUpAhPWFPBZ69R8jw31omQ,12457
|
|
6
6
|
risk/_annotation/_nltk_setup.py,sha256=aHHnElLOKiouVDrZ3uON0CSFmBxvzmYfjYPi07v2rJM,3584
|
|
7
7
|
risk/_log/__init__.py,sha256=LX6BsfcGOH0RbAdQaUmIU-LVMmArDdKwn0jFtj45FYo,205
|
|
@@ -34,8 +34,8 @@ risk/_network/_plotter/_plotter.py,sha256=F2hw-spUdsXjvuG36o0YFR3Pnd-CZOHYUq4vW0
|
|
|
34
34
|
risk/_network/_plotter/_utils/__init__.py,sha256=JXgjKiBWvXx0X2IeFnrOh5YZQGQoELbhJZ0Zh2mFEOo,211
|
|
35
35
|
risk/_network/_plotter/_utils/_colors.py,sha256=JCliSvz8_-TsjilaRHSEsqdXFBUYlzhXKOSRGdCm9Kw,19177
|
|
36
36
|
risk/_network/_plotter/_utils/_layout.py,sha256=GyGLc2U1WWUVL1Te9uPi_CLqlW_E4TImXRAL5TeA5D8,3633
|
|
37
|
-
risk_network-0.0.
|
|
38
|
-
risk_network-0.0.
|
|
39
|
-
risk_network-0.0.
|
|
40
|
-
risk_network-0.0.
|
|
41
|
-
risk_network-0.0.
|
|
37
|
+
risk_network-0.0.16b1.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
|
38
|
+
risk_network-0.0.16b1.dist-info/METADATA,sha256=_CFQPvw-w4VBuXqn_4TxFyTQ6EQsO2KxVMBWhkBhpiI,5390
|
|
39
|
+
risk_network-0.0.16b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
risk_network-0.0.16b1.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
|
41
|
+
risk_network-0.0.16b1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|