napistu 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("napistu")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,82 @@
1
+ """Module for comparing observed values to null distributions."""
2
+
3
+ import logging
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def calculate_quantiles(
12
+ observed_df: pd.DataFrame, null_df: pd.DataFrame
13
+ ) -> pd.DataFrame:
14
+ """
15
+ Calculate quantiles of observed scores relative to null distributions using
16
+ ultra-fast vectorized operations.
17
+
18
+ Parameters
19
+ ----------
20
+ observed_df : pd.DataFrame
21
+ DataFrame with features as index and attributes as columns containing
22
+ observed scores.
23
+ null_df : pd.DataFrame
24
+ DataFrame with null scores, features as index (multiple rows per feature)
25
+ and attributes as columns.
26
+
27
+ Returns
28
+ -------
29
+ pd.DataFrame
30
+ DataFrame with same structure as observed_df containing quantiles.
31
+ Each value represents the proportion of null values <= observed value.
32
+ """
33
+
34
+ if not observed_df.columns.equals(null_df.columns):
35
+ raise ValueError("Column names must match between observed and null data")
36
+
37
+ # Validate all features present
38
+ missing_features = set(observed_df.index) - set(null_df.index)
39
+ if missing_features:
40
+ raise ValueError(f"Missing features in null data: {missing_features}")
41
+
42
+ # Check for NaN values
43
+ if observed_df.isna().any().any():
44
+ raise ValueError("NaN values found in observed data")
45
+ if null_df.isna().any().any():
46
+ raise ValueError("NaN values found in null data")
47
+
48
+ # Check for unequal sample sizes and warn
49
+ null_grouped = null_df.groupby(level=0)
50
+ sample_counts = {name: len(group) for name, group in null_grouped}
51
+ if len(set(sample_counts.values())) > 1:
52
+ logger.warning("Unequal null sample counts per feature may affect results")
53
+
54
+ # Convert to numpy arrays for speed
55
+ observed_values = observed_df.values
56
+
57
+ # Group null data and stack into 3D array
58
+ null_grouped = null_df.groupby(level=0)
59
+
60
+ # Get the maximum number of null samples per feature
61
+ max_null_samples = max(len(group) for _, group in null_grouped)
62
+
63
+ # Pre-allocate 3D array: [features, null_samples, attributes]
64
+ null_array = np.full(
65
+ (len(observed_df), max_null_samples, len(observed_df.columns)), np.nan
66
+ )
67
+
68
+ # Fill the null array
69
+ for i, (feature, group) in enumerate(null_grouped):
70
+ feature_idx = observed_df.index.get_loc(feature)
71
+ null_array[feature_idx, : len(group)] = group.values
72
+
73
+ # Broadcast comparison: observed[features, 1, attributes] vs null[features, samples, attributes]
74
+ # This creates a boolean array of shape [features, null_samples, attributes]
75
+ # Less than or equal to is used to calculate the quantile consistent with the R quantile function
76
+ comparisons = null_array <= observed_values[:, np.newaxis, :]
77
+
78
+ # Calculate quantiles by taking mean along the null_samples axis
79
+ # Use nanmean to handle padded NaN values
80
+ quantiles = np.nanmean(comparisons, axis=1)
81
+
82
+ return pd.DataFrame(quantiles, index=observed_df.index, columns=observed_df.columns)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -12,7 +12,7 @@ napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,2
12
12
  napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
13
13
  napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
14
14
  napistu/gcs/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
15
- napistu/gcs/constants.py,sha256=5hLp1pL7SHEiscLNKcdI4IeOP4vUaasBCIHJrEedl0o,2909
15
+ napistu/gcs/constants.py,sha256=CuWsLXC1ZvOIRCQy7-ZeZdgRPmCVAqzvMuzJakGRk10,2923
16
16
  napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
17
17
  napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
18
18
  napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
@@ -58,13 +58,13 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
58
58
  napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
59
59
  napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
60
60
  napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
61
- napistu/network/constants.py,sha256=LPsMtbAoier9Qor9REA7UIx7vnBtDpxMbcv3yI-c-2s,6441
61
+ napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
62
62
  napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
63
- napistu/network/ig_utils.py,sha256=JSlf_sZtw3DiiSIiYJ2YqJFEP4hVJMwNRox2qYTA4zY,11470
63
+ napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
64
64
  napistu/network/neighborhoods.py,sha256=g5QeGaizSfW4nNe9YZY86g8q79EQmuvSwipaNPnOVqA,56121
65
- napistu/network/net_create.py,sha256=Ylt4osGWPfj9MSDPy67pOTmLERGtS3cStR94UaqmXes,59082
65
+ napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
66
66
  napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
67
- napistu/network/net_propagation.py,sha256=S70zl0W4aYu5RPf5PZh829xT1xUyeTdi3TcIaFeYMww,4924
67
+ napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
68
68
  napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
69
69
  napistu/network/ng_utils.py,sha256=c1tHXz_JcH01D5KovNQmRLTEVxpCkCe36otULq-liz8,15579
70
70
  napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
@@ -83,7 +83,9 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
83
83
  napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
84
84
  napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
85
85
  napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
86
- napistu-0.4.1.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
86
+ napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
87
+ napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
88
+ napistu-0.4.2.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
87
89
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
90
  tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
89
91
  tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
@@ -104,11 +106,11 @@ tests/test_mcp_config.py,sha256=GTu9vywqAHTYkolywdYS_BEIW3gBzs4A4qcneMSPpRk,7007
104
106
  tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q3DJi8Ra4,810
105
107
  tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
106
108
  tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
107
- tests/test_network_ig_utils.py,sha256=Buoh570mNm5pcac3Hf6f3pevCjWfBwPfKuD8IkDLg58,2120
109
+ tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
108
110
  tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
109
111
  tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
110
112
  tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
111
- tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
113
+ tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
112
114
  tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
113
115
  tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
114
116
  tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
@@ -127,12 +129,13 @@ tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
127
129
  tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
128
130
  tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
129
131
  tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
132
+ tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
130
133
  tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
131
134
  tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
132
135
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
133
136
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- napistu-0.4.1.dist-info/METADATA,sha256=zl_710wCsatB3lKZAgHba-MLEOPSDOyrxs3b5FB6toA,4078
135
- napistu-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
136
- napistu-0.4.1.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
137
- napistu-0.4.1.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
138
- napistu-0.4.1.dist-info/RECORD,,
137
+ napistu-0.4.2.dist-info/METADATA,sha256=6P_9Mmno6pVu4Me-3QdcMtiGOhCcajTqm5LP_Hns4lI,4078
138
+ napistu-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
139
+ napistu-0.4.2.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
140
+ napistu-0.4.2.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
141
+ napistu-0.4.2.dist-info/RECORD,,
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import igraph as ig
4
+ import numpy as np
3
5
  import pytest
4
6
 
5
7
  from napistu.network import ig_utils
@@ -57,3 +59,134 @@ def test_filter_to_largest_subgraphs(multi_component_graph):
57
59
  # Test invalid top_k
58
60
  with pytest.raises(ValueError):
59
61
  ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=0)
62
+
63
+
64
+ def test_mask_functions_valid_inputs():
65
+ """Test mask functions with various valid input formats."""
66
+ # Create real graph with attributes
67
+ graph = ig.Graph(5)
68
+ graph.vs["attr1"] = [0, 1, 2, 0, 3]
69
+ graph.vs["attr2"] = [1, 0, 1, 2, 0]
70
+ graph.vs["name"] = ["A", "B", "C", "D", "E"]
71
+
72
+ attributes = ["attr1", "attr2"]
73
+
74
+ # Test 1: None input
75
+ specs = ig_utils._parse_mask_input(None, attributes)
76
+ assert specs == {"attr1": None, "attr2": None}
77
+
78
+ masks = ig_utils._get_attribute_masks(graph, specs)
79
+ assert np.array_equal(masks["attr1"], np.ones(5, dtype=bool))
80
+ assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
81
+
82
+ # Test 2: "attr" keyword
83
+ specs = ig_utils._parse_mask_input("attr", attributes)
84
+ assert specs == {"attr1": "attr1", "attr2": "attr2"}
85
+
86
+ masks = ig_utils._get_attribute_masks(graph, specs)
87
+ assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
88
+ assert np.array_equal(masks["attr2"], np.array([True, False, True, True, False]))
89
+
90
+ # Test 3: Single attribute name
91
+ specs = ig_utils._parse_mask_input("attr1", attributes)
92
+ assert specs == {"attr1": "attr1", "attr2": "attr1"}
93
+
94
+ # Test 4: Boolean array
95
+ bool_mask = np.array([True, False, True, False, False])
96
+ specs = ig_utils._parse_mask_input(bool_mask, attributes)
97
+ masks = ig_utils._get_attribute_masks(graph, specs)
98
+ assert np.array_equal(masks["attr1"], bool_mask)
99
+ assert np.array_equal(masks["attr2"], bool_mask)
100
+
101
+ # Test 5: Node indices list
102
+ indices = [0, 2, 4]
103
+ specs = ig_utils._parse_mask_input(indices, attributes)
104
+ masks = ig_utils._get_attribute_masks(graph, specs)
105
+ expected = np.array([True, False, True, False, True])
106
+ assert np.array_equal(masks["attr1"], expected)
107
+
108
+ # Test 6: Node names list
109
+ names = ["A", "C", "E"]
110
+ specs = ig_utils._parse_mask_input(names, attributes)
111
+ masks = ig_utils._get_attribute_masks(graph, specs)
112
+ assert np.array_equal(masks["attr1"], expected)
113
+
114
+ # Test 7: Dictionary input
115
+ mask_dict = {"attr1": "attr1", "attr2": None}
116
+ specs = ig_utils._parse_mask_input(mask_dict, attributes)
117
+ assert specs == mask_dict
118
+
119
+ masks = ig_utils._get_attribute_masks(graph, specs)
120
+ assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
121
+ assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
122
+
123
+
124
+ def test_mask_functions_error_cases():
125
+ """Test mask functions with invalid inputs that should raise errors."""
126
+ # Graph without name attribute
127
+ graph_no_names = ig.Graph(3)
128
+ graph_no_names.vs["attr1"] = [1, 2, 3]
129
+
130
+ # Graph with names
131
+ graph = ig.Graph(3)
132
+ graph.vs["attr1"] = [1, 2, 3]
133
+ graph.vs["name"] = ["A", "B", "C"]
134
+
135
+ attributes = ["attr1", "attr2"]
136
+
137
+ # Test 1: Invalid mask type
138
+ with pytest.raises(ValueError, match="Invalid mask input type"):
139
+ ig_utils._parse_mask_input(123, attributes)
140
+
141
+ # Test 2: Missing attribute in dictionary
142
+ incomplete_dict = {"attr1": None} # Missing 'attr2'
143
+ with pytest.raises(
144
+ ValueError, match="Attribute 'attr2' not found in mask dictionary"
145
+ ):
146
+ ig_utils._parse_mask_input(incomplete_dict, attributes)
147
+
148
+ # Test 3: String mask for graph without names
149
+ specs = {"attr1": ["A", "B"]}
150
+ with pytest.raises(
151
+ ValueError, match="Graph has no 'name' attribute for string mask"
152
+ ):
153
+ ig_utils._get_attribute_masks(graph_no_names, specs)
154
+
155
+ # Test 4: Invalid mask specification type in _get_attribute_masks
156
+ specs = {"attr1": 123} # Invalid type
157
+ with pytest.raises(
158
+ ValueError, match="Invalid mask specification for attribute 'attr1'"
159
+ ):
160
+ ig_utils._get_attribute_masks(graph, specs)
161
+
162
+
163
+ def test_ensure_nonnegative_vertex_attribute():
164
+ """Test _ensure_valid_attribute with various valid and invalid inputs."""
165
+ # Create test graph
166
+ graph = ig.Graph(4)
167
+ graph.vs["good_attr"] = [1.0, 2.0, 0.0, 3.0]
168
+ graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
169
+ graph.vs["negative_attr"] = [1.0, -1.0, 2.0, 0.0]
170
+ graph.vs["mixed_attr"] = [1.0, None, 2.0, 0.0] # Some None values
171
+
172
+ # Test 1: Valid attribute
173
+ result = ig_utils._ensure_valid_attribute(graph, "good_attr")
174
+ expected = np.array([1.0, 2.0, 0.0, 3.0])
175
+ assert np.array_equal(result, expected)
176
+
177
+ # Test 2: Attribute with None values (should be replaced with 0)
178
+ result = ig_utils._ensure_valid_attribute(graph, "mixed_attr")
179
+ expected = np.array([1.0, 0.0, 2.0, 0.0])
180
+ assert np.array_equal(result, expected)
181
+
182
+ # Test 3: All zero values
183
+ with pytest.raises(ValueError, match="zero for all vertices"):
184
+ ig_utils._ensure_valid_attribute(graph, "zero_attr")
185
+
186
+ # Test 4: Negative values
187
+ with pytest.raises(ValueError, match="contains negative values"):
188
+ ig_utils._ensure_valid_attribute(graph, "negative_attr")
189
+
190
+ # Test 5: Missing attribute
191
+ with pytest.raises(ValueError, match="missing for all vertices"):
192
+ ig_utils._ensure_valid_attribute(graph, "nonexistent_attr")