napistu 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ """Module for comparing observed values to null distributions."""
2
+
3
+ import logging
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def calculate_quantiles(
12
+ observed_df: pd.DataFrame, null_df: pd.DataFrame
13
+ ) -> pd.DataFrame:
14
+ """
15
+ Calculate quantiles of observed scores relative to null distributions using
16
+ ultra-fast vectorized operations.
17
+
18
+ Parameters
19
+ ----------
20
+ observed_df : pd.DataFrame
21
+ DataFrame with features as index and attributes as columns containing
22
+ observed scores.
23
+ null_df : pd.DataFrame
24
+ DataFrame with null scores, features as index (multiple rows per feature)
25
+ and attributes as columns.
26
+
27
+ Returns
28
+ -------
29
+ pd.DataFrame
30
+ DataFrame with same structure as observed_df containing quantiles.
31
+ Each value represents the proportion of null values <= observed value.
32
+ """
33
+
34
+ if not observed_df.columns.equals(null_df.columns):
35
+ raise ValueError("Column names must match between observed and null data")
36
+
37
+ # Validate all features present
38
+ missing_features = set(observed_df.index) - set(null_df.index)
39
+ if missing_features:
40
+ raise ValueError(f"Missing features in null data: {missing_features}")
41
+
42
+ # Check for NaN values
43
+ if observed_df.isna().any().any():
44
+ raise ValueError("NaN values found in observed data")
45
+ if null_df.isna().any().any():
46
+ raise ValueError("NaN values found in null data")
47
+
48
+ # Check for unequal sample sizes and warn
49
+ null_grouped = null_df.groupby(level=0)
50
+ sample_counts = {name: len(group) for name, group in null_grouped}
51
+ if len(set(sample_counts.values())) > 1:
52
+ logger.warning("Unequal null sample counts per feature may affect results")
53
+
54
+ # Convert to numpy arrays for speed
55
+ observed_values = observed_df.values
56
+
57
+ # Group null data and stack into 3D array
58
+ null_grouped = null_df.groupby(level=0)
59
+
60
+ # Get the maximum number of null samples per feature
61
+ max_null_samples = max(len(group) for _, group in null_grouped)
62
+
63
+ # Pre-allocate 3D array: [features, null_samples, attributes]
64
+ null_array = np.full(
65
+ (len(observed_df), max_null_samples, len(observed_df.columns)), np.nan
66
+ )
67
+
68
+ # Fill the null array
69
+ for i, (feature, group) in enumerate(null_grouped):
70
+ feature_idx = observed_df.index.get_loc(feature)
71
+ null_array[feature_idx, : len(group)] = group.values
72
+
73
+ # Broadcast comparison: observed[features, 1, attributes] vs null[features, samples, attributes]
74
+ # This creates a boolean array of shape [features, null_samples, attributes]
75
+ # Less than or equal to is used to calculate the quantile consistent with the R quantile function
76
+ comparisons = null_array <= observed_values[:, np.newaxis, :]
77
+
78
+ # Calculate quantiles by taking mean along the null_samples axis
79
+ # Use nanmean to handle padded NaN values
80
+ quantiles = np.nanmean(comparisons, axis=1)
81
+
82
+ return pd.DataFrame(quantiles, index=observed_df.index, columns=observed_df.columns)
napistu/utils.py CHANGED
@@ -14,7 +14,7 @@ import zipfile
14
14
  from contextlib import closing
15
15
  from itertools import starmap
16
16
  from textwrap import fill
17
- from typing import Any, List, Optional, Union
17
+ from typing import Any, Dict, Optional, List, Union
18
18
  from urllib.parse import urlparse
19
19
  from pathlib import Path
20
20
  from requests.adapters import HTTPAdapter
@@ -1131,6 +1131,28 @@ def safe_fill(x: str, fill_width: int = 15) -> str:
1131
1131
  return fill(x, fill_width)
1132
1132
 
1133
1133
 
1134
+ def match_regex_dict(s: str, regex_dict: Dict[str, any]) -> Optional[any]:
1135
+ """
1136
+ Apply each regex in regex_dict to the string s. If a regex matches, return its value.
1137
+ If no regex matches, return None.
1138
+
1139
+ Parameters
1140
+ ----------
1141
+ s : str
1142
+ The string to test.
1143
+ regex_dict : dict
1144
+ Dictionary where keys are regex patterns (str), and values are the values to return.
1145
+
1146
+ Returns
1147
+ -------
1148
+ The value associated with the first matching regex, or None if no match.
1149
+ """
1150
+ for pattern, value in regex_dict.items():
1151
+ if re.search(pattern, s):
1152
+ return value
1153
+ return None
1154
+
1155
+
1134
1156
  def _add_nameness_score_wrapper(df, name_var, table_schema):
1135
1157
  """Call _add_nameness_score with default value."""
1136
1158
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -1,29 +1,30 @@
1
1
  napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
2
2
  napistu/__main__.py,sha256=xwlbh_0Ig3a-yG6BIJRiDPSN9R2HnX2pEBvlodlO6h4,29015
3
- napistu/consensus.py,sha256=xWXiqIM6ot-SSPJZXTrVpohbINSCkZXBtRi-5REfk_g,69897
3
+ napistu/consensus.py,sha256=SDw58vkDivzy5AiOQUnf5vUbFxmSrMGMMmptDMZhk0E,69807
4
4
  napistu/constants.py,sha256=8sp1l0cxu2rsnCrWBEEwhcBKvDtc4u0D0f_72zILLW0,13427
5
5
  napistu/identifiers.py,sha256=e2-nTVzr5AINa0y1ER9218bKXyF2kAeJ9At22S4Z00o,33914
6
6
  napistu/indices.py,sha256=Zjg3gE0JQ3T879lCPazYg-WXVE6hvcAr713ZKpJ32rk,9830
7
7
  napistu/sbml_dfs_core.py,sha256=s0OyoHs-AjOcbZu1d3KNkW_PI7Rxbhu5ZLpfQeO4iY8,72639
8
- napistu/sbml_dfs_utils.py,sha256=w5dFcJFDKnKDK9jxPOCuCW8IccxdXmyNmP9vCUhVdf8,46184
9
- napistu/source.py,sha256=UGpN70bqbC9gnKmM0ivSdQYim9hfzgABeXoQKzRr9oU,13646
10
- napistu/utils.py,sha256=PEAsLn7VGN8JlNJQcAMYpjF1gr2mWmb5IqBsypP9hi0,35768
8
+ napistu/sbml_dfs_utils.py,sha256=SOy1Ii2hDFOfQa7pFAJS9EfAmfBVD_sHvDJBVmCN_p8,46456
9
+ napistu/source.py,sha256=iDDKpN-4k_W_tyxEjqe_z-yPJv7uoFRRBhkiBtOH5C8,20416
10
+ napistu/utils.py,sha256=p2sJxTklmV30XS6hanJRjcdfgeaZpkULuMyQX3BPP0c,36404
11
11
  napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,242
12
12
  napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
13
13
  napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
14
14
  napistu/gcs/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
15
- napistu/gcs/constants.py,sha256=5hLp1pL7SHEiscLNKcdI4IeOP4vUaasBCIHJrEedl0o,2909
15
+ napistu/gcs/constants.py,sha256=CuWsLXC1ZvOIRCQy7-ZeZdgRPmCVAqzvMuzJakGRk10,2923
16
16
  napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
17
17
  napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
18
18
  napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
19
19
  napistu/ingestion/bigg.py,sha256=f65--8ARe248eYCUJpFMF284Wz53sLyFyBuwelxHmJA,4340
20
- napistu/ingestion/constants.py,sha256=9UP47VImZ11q0kz17N3EJg2155USqLewwNWyKpA-cbA,8089
20
+ napistu/ingestion/constants.py,sha256=jo3v8Z7Y_tNNhTmEcokVOh1HBJFAXc-Z38S4mG58qfo,10059
21
21
  napistu/ingestion/gtex.py,sha256=X0hSC1yrpf4xSJWFhpeNcnHwJzKDII2MvjfUqYA0JN8,3720
22
22
  napistu/ingestion/hpa.py,sha256=R27ExrryKQ4Crxv9ATXmBJCa-yd01TMOrDjkeBhIQac,5054
23
23
  napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
24
24
  napistu/ingestion/napistu_edgelist.py,sha256=4RLXsoIk_-Atu-Nqme_t1JpEpBET26VIY2Y_Hcd3sMw,3580
25
25
  napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
26
26
  napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
27
+ napistu/ingestion/reactom_fi.py,sha256=hKdOY2wNtcNk6WlnHnNalryiXv6mtcWUiBW9isXPB0Y,6991
27
28
  napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
28
29
  napistu/ingestion/sbml.py,sha256=l8Z98yWuOIRGns8G4UNnoQz7v_xmukZb_IZ_5ye34Ko,25296
29
30
  napistu/ingestion/string.py,sha256=go1WGTkoLJejX7GQWf9bFeInFGAw4jNSpS2B_Zr5f_s,11364
@@ -58,17 +59,17 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
58
59
  napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
59
60
  napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
60
61
  napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
61
- napistu/network/constants.py,sha256=LPsMtbAoier9Qor9REA7UIx7vnBtDpxMbcv3yI-c-2s,6441
62
+ napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
62
63
  napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
63
- napistu/network/ig_utils.py,sha256=JSlf_sZtw3DiiSIiYJ2YqJFEP4hVJMwNRox2qYTA4zY,11470
64
+ napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
64
65
  napistu/network/neighborhoods.py,sha256=g5QeGaizSfW4nNe9YZY86g8q79EQmuvSwipaNPnOVqA,56121
65
- napistu/network/net_create.py,sha256=Ylt4osGWPfj9MSDPy67pOTmLERGtS3cStR94UaqmXes,59082
66
+ napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
66
67
  napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
67
- napistu/network/net_propagation.py,sha256=S70zl0W4aYu5RPf5PZh829xT1xUyeTdi3TcIaFeYMww,4924
68
+ napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
68
69
  napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
69
- napistu/network/ng_utils.py,sha256=c1tHXz_JcH01D5KovNQmRLTEVxpCkCe36otULq-liz8,15579
70
+ napistu/network/ng_utils.py,sha256=ahSm-8M2pV662V7MMVcGaoguBM55_y-F7LDmZSVp9ag,15951
70
71
  napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
71
- napistu/network/precompute.py,sha256=ibL0ByY7Wp5kEfIG3LUDpQKdvAeQX0DNkT_46g2YrGc,8367
72
+ napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
72
73
  napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
73
74
  napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
74
75
  napistu/ontologies/dogma.py,sha256=VVj6NKBgNym4SdOSu8g22OohALj7cbObhIJmdY2Sfy0,8860
@@ -83,7 +84,10 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
83
84
  napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
84
85
  napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
85
86
  napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
86
- napistu-0.4.1.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
87
+ napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
88
+ napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
89
+ napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
90
+ napistu-0.4.3.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
87
91
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
92
  tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
89
93
  tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
@@ -104,15 +108,15 @@ tests/test_mcp_config.py,sha256=GTu9vywqAHTYkolywdYS_BEIW3gBzs4A4qcneMSPpRk,7007
104
108
  tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q3DJi8Ra4,810
105
109
  tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
106
110
  tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
107
- tests/test_network_ig_utils.py,sha256=Buoh570mNm5pcac3Hf6f3pevCjWfBwPfKuD8IkDLg58,2120
111
+ tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
108
112
  tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
109
113
  tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
110
114
  tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
111
- tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
115
+ tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
112
116
  tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
113
117
  tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
114
118
  tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
115
- tests/test_network_precompute.py,sha256=zwJrKNC3s8rIrsyAQfQMYxbl8HZXUr7u09nMJ_K8jiU,9005
119
+ tests/test_network_precompute.py,sha256=IPr1KhtxBD0fXx_2TvZqnevrD-Iig35otb8yloRFpRc,10014
116
120
  tests/test_ontologies_genodexito.py,sha256=6fINyUiubHZqu7qxye09DQfJXw28ZMAJc3clPb-cCoY,2298
117
121
  tests/test_ontologies_id_tables.py,sha256=CpwpbmQvTc1BaVd6jbDKHAVE2etwN0vx93nC8jpnMlE,7265
118
122
  tests/test_ontologies_mygene.py,sha256=VkdRcKIWmcG6V-2dpfvsBiOJN5dO-j0RqZNxtJRcyBU,1583
@@ -122,17 +126,18 @@ tests/test_rpy2_callr.py,sha256=V4a-QH5krgYOQRgqzksMzIkGAFjBqKOAqgprxrH6bE0,2904
122
126
  tests/test_rpy2_init.py,sha256=T3gnxC1O7XNvYM2P4018ikpPPAy-kwQLm7Erj0RfA-4,5895
123
127
  tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
124
128
  tests/test_sbml_dfs_core.py,sha256=nnLPpZTVtCznOBohk7CX67x6sMqktJWt-sZMWQKoaDs,26521
125
- tests/test_sbml_dfs_utils.py,sha256=gWIhzUEtQlOR9c1TiCyhlSAELmWnBSncn6vCEqH5hl0,11029
129
+ tests/test_sbml_dfs_utils.py,sha256=ZD9x2B81fsfYEjAV9wphHOR7ywjNcfvfw1LGNv4PxUA,11471
126
130
  tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
127
131
  tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
128
- tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
129
- tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
132
+ tests/test_source.py,sha256=iV-Yyu8flhIGWF17SCL8msG2bjqwb9w2IZ694b0iZ-o,2985
133
+ tests/test_statistics_hypothesis_testing.py,sha256=qD-oS9zo5JlH-jdtiOrWAKI4nKFuZvvh6361_pFSpIs,2259
134
+ tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
130
135
  tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
131
136
  tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
132
137
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
133
138
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- napistu-0.4.1.dist-info/METADATA,sha256=zl_710wCsatB3lKZAgHba-MLEOPSDOyrxs3b5FB6toA,4078
135
- napistu-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
136
- napistu-0.4.1.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
137
- napistu-0.4.1.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
138
- napistu-0.4.1.dist-info/RECORD,,
139
+ napistu-0.4.3.dist-info/METADATA,sha256=gV0a41vyQ52Ja15QyLSPGfeIJPj6oQRTC00HsxJjG88,4078
140
+ napistu-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
+ napistu-0.4.3.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
142
+ napistu-0.4.3.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
143
+ napistu-0.4.3.dist-info/RECORD,,
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import igraph as ig
4
+ import numpy as np
3
5
  import pytest
4
6
 
5
7
  from napistu.network import ig_utils
@@ -57,3 +59,134 @@ def test_filter_to_largest_subgraphs(multi_component_graph):
57
59
  # Test invalid top_k
58
60
  with pytest.raises(ValueError):
59
61
  ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=0)
62
+
63
+
64
+ def test_mask_functions_valid_inputs():
65
+ """Test mask functions with various valid input formats."""
66
+ # Create real graph with attributes
67
+ graph = ig.Graph(5)
68
+ graph.vs["attr1"] = [0, 1, 2, 0, 3]
69
+ graph.vs["attr2"] = [1, 0, 1, 2, 0]
70
+ graph.vs["name"] = ["A", "B", "C", "D", "E"]
71
+
72
+ attributes = ["attr1", "attr2"]
73
+
74
+ # Test 1: None input
75
+ specs = ig_utils._parse_mask_input(None, attributes)
76
+ assert specs == {"attr1": None, "attr2": None}
77
+
78
+ masks = ig_utils._get_attribute_masks(graph, specs)
79
+ assert np.array_equal(masks["attr1"], np.ones(5, dtype=bool))
80
+ assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
81
+
82
+ # Test 2: "attr" keyword
83
+ specs = ig_utils._parse_mask_input("attr", attributes)
84
+ assert specs == {"attr1": "attr1", "attr2": "attr2"}
85
+
86
+ masks = ig_utils._get_attribute_masks(graph, specs)
87
+ assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
88
+ assert np.array_equal(masks["attr2"], np.array([True, False, True, True, False]))
89
+
90
+ # Test 3: Single attribute name
91
+ specs = ig_utils._parse_mask_input("attr1", attributes)
92
+ assert specs == {"attr1": "attr1", "attr2": "attr1"}
93
+
94
+ # Test 4: Boolean array
95
+ bool_mask = np.array([True, False, True, False, False])
96
+ specs = ig_utils._parse_mask_input(bool_mask, attributes)
97
+ masks = ig_utils._get_attribute_masks(graph, specs)
98
+ assert np.array_equal(masks["attr1"], bool_mask)
99
+ assert np.array_equal(masks["attr2"], bool_mask)
100
+
101
+ # Test 5: Node indices list
102
+ indices = [0, 2, 4]
103
+ specs = ig_utils._parse_mask_input(indices, attributes)
104
+ masks = ig_utils._get_attribute_masks(graph, specs)
105
+ expected = np.array([True, False, True, False, True])
106
+ assert np.array_equal(masks["attr1"], expected)
107
+
108
+ # Test 6: Node names list
109
+ names = ["A", "C", "E"]
110
+ specs = ig_utils._parse_mask_input(names, attributes)
111
+ masks = ig_utils._get_attribute_masks(graph, specs)
112
+ assert np.array_equal(masks["attr1"], expected)
113
+
114
+ # Test 7: Dictionary input
115
+ mask_dict = {"attr1": "attr1", "attr2": None}
116
+ specs = ig_utils._parse_mask_input(mask_dict, attributes)
117
+ assert specs == mask_dict
118
+
119
+ masks = ig_utils._get_attribute_masks(graph, specs)
120
+ assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
121
+ assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
122
+
123
+
124
+ def test_mask_functions_error_cases():
125
+ """Test mask functions with invalid inputs that should raise errors."""
126
+ # Graph without name attribute
127
+ graph_no_names = ig.Graph(3)
128
+ graph_no_names.vs["attr1"] = [1, 2, 3]
129
+
130
+ # Graph with names
131
+ graph = ig.Graph(3)
132
+ graph.vs["attr1"] = [1, 2, 3]
133
+ graph.vs["name"] = ["A", "B", "C"]
134
+
135
+ attributes = ["attr1", "attr2"]
136
+
137
+ # Test 1: Invalid mask type
138
+ with pytest.raises(ValueError, match="Invalid mask input type"):
139
+ ig_utils._parse_mask_input(123, attributes)
140
+
141
+ # Test 2: Missing attribute in dictionary
142
+ incomplete_dict = {"attr1": None} # Missing 'attr2'
143
+ with pytest.raises(
144
+ ValueError, match="Attribute 'attr2' not found in mask dictionary"
145
+ ):
146
+ ig_utils._parse_mask_input(incomplete_dict, attributes)
147
+
148
+ # Test 3: String mask for graph without names
149
+ specs = {"attr1": ["A", "B"]}
150
+ with pytest.raises(
151
+ ValueError, match="Graph has no 'name' attribute for string mask"
152
+ ):
153
+ ig_utils._get_attribute_masks(graph_no_names, specs)
154
+
155
+ # Test 4: Invalid mask specification type in _get_attribute_masks
156
+ specs = {"attr1": 123} # Invalid type
157
+ with pytest.raises(
158
+ ValueError, match="Invalid mask specification for attribute 'attr1'"
159
+ ):
160
+ ig_utils._get_attribute_masks(graph, specs)
161
+
162
+
163
+ def test_ensure_nonnegative_vertex_attribute():
164
+ """Test _ensure_valid_attribute with various valid and invalid inputs."""
165
+ # Create test graph
166
+ graph = ig.Graph(4)
167
+ graph.vs["good_attr"] = [1.0, 2.0, 0.0, 3.0]
168
+ graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
169
+ graph.vs["negative_attr"] = [1.0, -1.0, 2.0, 0.0]
170
+ graph.vs["mixed_attr"] = [1.0, None, 2.0, 0.0] # Some None values
171
+
172
+ # Test 1: Valid attribute
173
+ result = ig_utils._ensure_valid_attribute(graph, "good_attr")
174
+ expected = np.array([1.0, 2.0, 0.0, 3.0])
175
+ assert np.array_equal(result, expected)
176
+
177
+ # Test 2: Attribute with None values (should be replaced with 0)
178
+ result = ig_utils._ensure_valid_attribute(graph, "mixed_attr")
179
+ expected = np.array([1.0, 0.0, 2.0, 0.0])
180
+ assert np.array_equal(result, expected)
181
+
182
+ # Test 3: All zero values
183
+ with pytest.raises(ValueError, match="zero for all vertices"):
184
+ ig_utils._ensure_valid_attribute(graph, "zero_attr")
185
+
186
+ # Test 4: Negative values
187
+ with pytest.raises(ValueError, match="contains negative values"):
188
+ ig_utils._ensure_valid_attribute(graph, "negative_attr")
189
+
190
+ # Test 5: Missing attribute
191
+ with pytest.raises(ValueError, match="missing for all vertices"):
192
+ ig_utils._ensure_valid_attribute(graph, "nonexistent_attr")