localgraph 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- localgraph-0.1.0/LICENSE +21 -0
- localgraph-0.1.0/PKG-INFO +155 -0
- localgraph-0.1.0/README.md +129 -0
- localgraph-0.1.0/localgraph/__init__.py +8 -0
- localgraph-0.1.0/localgraph/evaluation/__init__.py +0 -0
- localgraph-0.1.0/localgraph/evaluation/eval.py +151 -0
- localgraph-0.1.0/localgraph/examples/__init__.py +0 -0
- localgraph-0.1.0/localgraph/examples/simple_example.py +66 -0
- localgraph-0.1.0/localgraph/pfs/__init__.py +2 -0
- localgraph-0.1.0/localgraph/pfs/helpers.py +140 -0
- localgraph-0.1.0/localgraph/pfs/main.py +133 -0
- localgraph-0.1.0/localgraph/plotting/__init__.py +0 -0
- localgraph-0.1.0/localgraph/plotting/helpers.py +58 -0
- localgraph-0.1.0/localgraph/plotting/plot_graph.py +236 -0
- localgraph-0.1.0/localgraph.egg-info/PKG-INFO +155 -0
- localgraph-0.1.0/localgraph.egg-info/SOURCES.txt +19 -0
- localgraph-0.1.0/localgraph.egg-info/dependency_links.txt +1 -0
- localgraph-0.1.0/localgraph.egg-info/requires.txt +4 -0
- localgraph-0.1.0/localgraph.egg-info/top_level.txt +1 -0
- localgraph-0.1.0/setup.cfg +4 -0
- localgraph-0.1.0/setup.py +25 -0
localgraph-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Omar Melikechi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: localgraph
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local graph estimation with pathwise feature selection
|
|
5
|
+
Author: Omar Melikechi
|
|
6
|
+
Author-email: omar.melikechi@gmail.com
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.6
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: ipss>=1.1.1
|
|
14
|
+
Requires-Dist: matplotlib>=3.0.0
|
|
15
|
+
Requires-Dist: networkx>=2.0
|
|
16
|
+
Requires-Dist: numpy>=1.16.0
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: author-email
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
# Local graph estimation with pathwise feature selection
|
|
28
|
+
|
|
29
|
+
> **Local graph estimation** is a framework for discovering local graph/network structure around specific variables of interest. **Pathwise feature selection (PFS)** is an algorithm for performing local graph estimation with pathwise false discovery control.
|
|
30
|
+
|
|
31
|
+
## Associated paper
|
|
32
|
+
|
|
33
|
+
- **Local graph estimation: Interpretable network discovery for complex data**
|
|
34
|
+
In preparation
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
```
|
|
38
|
+
pip install localgraph
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
```python
|
|
43
|
+
from localgraph import pfs, plot_graph
|
|
44
|
+
|
|
45
|
+
# Load n-by-p data matrix X (n samples, p features)
|
|
46
|
+
|
|
47
|
+
# Specify the target features (list of indices)
|
|
48
|
+
target_features = [0, 1]
|
|
49
|
+
|
|
50
|
+
# Specify the pathwise q-value threshold
|
|
51
|
+
qpath_max = 0.2
|
|
52
|
+
|
|
53
|
+
# Optional: specify the maximum radius of the local graph (default is 3)
|
|
54
|
+
max_radius = 3
|
|
55
|
+
|
|
56
|
+
# Optional: specify the neighborhood FDR thresholds for nodes in each radius
|
|
57
|
+
fdr_local = [0.2, 0.1, 0.1]
|
|
58
|
+
|
|
59
|
+
# Run PFS
|
|
60
|
+
Q = pfs(X, target_features, qpath_max=qpath_max, max_radius=max_radius, fdr_local=fdr_local)
|
|
61
|
+
|
|
62
|
+
# Plot the estimated subgraph
|
|
63
|
+
plot_graph(graph=Q, target_features=target_features, radius=max_radius)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Outputs
|
|
67
|
+
- `Q`: Dictionary mapping edges `(i,j)` to q-values. Edges are undirected, so `(i,j)` and `(j,i)` are included.
|
|
68
|
+
|
|
69
|
+
### What PFS does
|
|
70
|
+
- Expands the local graph outward, layer by layer, starting from target variables.
|
|
71
|
+
- Performs neighborhood selection with FDR control using [**integrated path stability selection**](https://github.com/omelikechi/ipss).
|
|
72
|
+
- Controls pathwise false discoveries by summing q-values along candidate paths.
|
|
73
|
+
|
|
74
|
+
## Full list of `pfs` arguments
|
|
75
|
+
|
|
76
|
+
### Required arguments:
|
|
77
|
+
- `X`: n-by-p data matrix (NumPy array). Each column is a feature/variable.
|
|
78
|
+
- `target_features`: Feature index or list of indices to center the graph around.
|
|
79
|
+
- `qpath_max`: Maximum allowed sum of q-values along any path.
|
|
80
|
+
|
|
81
|
+
### Optional arguments:
|
|
82
|
+
- `max_radius`: Maximum number of expansion layers around each target (int; default `3`).
|
|
83
|
+
- `fdr_local`: Neighborhood FDR threshold at each radius (list of length `max_radius`; default `[qpath_max]*max_radius`).
|
|
84
|
+
- `custom_nbhd`: Dictionary specifying custom FDR cutoffs for certain features (dict; default `None`).
|
|
85
|
+
- `feature_names`: List of feature names; required if `custom_nbhd` is provided (list of strings).
|
|
86
|
+
- `criterion`: Rule for resolving multiple edges (default `'min'`).
|
|
87
|
+
- `selector`: Feature importance method used by IPSS (str; default `'gb'`). Options:
|
|
88
|
+
- `'gb'`: Gradient boosting
|
|
89
|
+
- `'l1'`: L1-regularized regression (lasso)
|
|
90
|
+
- `'rf'`: Random forest
|
|
91
|
+
- Custom function (see `ipss_args`)
|
|
92
|
+
- `ipss_args`: Dictionary of arguments to pass to `ipss` (dict; default `None`)
|
|
93
|
+
- `verbose`: Whether to print progress during selection (bool; default `False`)
|
|
94
|
+
|
|
95
|
+
## Graph plotting
|
|
96
|
+
|
|
97
|
+
Use `plot_graph` to visualize a local graph up to the specified `radius` around one or more target features.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from localgraph import plot_graph
|
|
101
|
+
|
|
102
|
+
# Plot local graph around target_features using output Q from pfs
|
|
103
|
+
plot_graph(graph=Q, target_features=target_features, radius=3)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Features and customization
|
|
107
|
+
`plot_graph` visualizes a local graph of a user-specified radius around one or more target features. It supports:
|
|
108
|
+
- Flexible input formats: edge dictionary, adjacency matrix, or NetworkX graph
|
|
109
|
+
- Automatic subgraph extraction around the targets
|
|
110
|
+
- Node coloring by distance from the target(s) (default), or user-specified colors (e.g., by variable type)
|
|
111
|
+
- Several layout algorithms (`'kamada_kawai'`, `'spring'`, `'circular'`, etc.)
|
|
112
|
+
- Customizable node size, font sizes, and edge thickness
|
|
113
|
+
- Optional display of q-values; edge widths can reflect q-value strength (`edge_widths='q_value'`)
|
|
114
|
+
- False positives shown in red if the true graph is provided
|
|
115
|
+
- Integration with custom plots via `ax` or `pos`
|
|
116
|
+
- Optional saving of figures (`save_fig`) and graphs (`save_graph`)
|
|
117
|
+
|
|
118
|
+
For a full list of arguments, see the [`plot_graph`](./localgraph/plotting/plot_graph.py) docstring.
|
|
119
|
+
|
|
120
|
+
### Returns
|
|
121
|
+
The function returns a dictionary containing:
|
|
122
|
+
- `feature_radius_list`: List of `(feature name, radius)` pairs for all nodes in the graph.
|
|
123
|
+
- `graph`: The NetworkX subgraph used for plotting.
|
|
124
|
+
- `positions`: Dictionary of node coordinates.
|
|
125
|
+
- `figure`: The matplotlib figure object (only if the function creates the figure).
|
|
126
|
+
|
|
127
|
+
### Further customization
|
|
128
|
+
|
|
129
|
+
To manually adjust node positions for publication-quality figures, you can export graphs to [**Gephi**](https://gephi.org/), edit them interactively, and re-import the updated layout into Python. See: [gephi_instructions.md](./gephi_instructions.md) for a full walkthrough.
|
|
130
|
+
|
|
131
|
+
## Examples
|
|
132
|
+
|
|
133
|
+
The `examples/` folder contains scripts that demonstrate end-to-end usage:
|
|
134
|
+
|
|
135
|
+
- `simple_example.py`: Simulate data, run PFS, and visualize the result.
|
|
136
|
+
|
|
137
|
+
## Evaluation tools
|
|
138
|
+
|
|
139
|
+
The `evaluation/` folder contains helper functions for measuring subgraph recovery in simulation settings.
|
|
140
|
+
|
|
141
|
+
- The `eval.py` script contains two functions:
|
|
142
|
+
- `subgraph_within_radius`: Extract true subgraph around a target node (useful for identifying subgraphs within full graphs)
|
|
143
|
+
- `tp_and_fp`: Count true and false positives compared to ground truth
|
|
144
|
+
|
|
145
|
+
These are useful for benchmarking PFS and other graph estimation methods when the true graph is known.
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Local graph estimation with pathwise feature selection
|
|
2
|
+
|
|
3
|
+
> **Local graph estimation** is a framework for discovering local graph/network structure around specific variables of interest. **Pathwise feature selection (PFS)** is an algorithm for performing local graph estimation with pathwise false discovery control.
|
|
4
|
+
|
|
5
|
+
## Associated paper
|
|
6
|
+
|
|
7
|
+
- **Local graph estimation: Interpretable network discovery for complex data**
|
|
8
|
+
In preparation
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
```
|
|
12
|
+
pip install localgraph
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
```python
|
|
17
|
+
from localgraph import pfs, plot_graph
|
|
18
|
+
|
|
19
|
+
# Load n-by-p data matrix X (n samples, p features)
|
|
20
|
+
|
|
21
|
+
# Specify the target features (list of indices)
|
|
22
|
+
target_features = [0, 1]
|
|
23
|
+
|
|
24
|
+
# Specify the pathwise q-value threshold
|
|
25
|
+
qpath_max = 0.2
|
|
26
|
+
|
|
27
|
+
# Optional: specify the maximum radius of the local graph (default is 3)
|
|
28
|
+
max_radius = 3
|
|
29
|
+
|
|
30
|
+
# Optional: specify the neighborhood FDR thresholds for nodes in each radius
|
|
31
|
+
fdr_local = [0.2, 0.1, 0.1]
|
|
32
|
+
|
|
33
|
+
# Run PFS
|
|
34
|
+
Q = pfs(X, target_features, qpath_max=qpath_max, max_radius=max_radius, fdr_local=fdr_local)
|
|
35
|
+
|
|
36
|
+
# Plot the estimated subgraph
|
|
37
|
+
plot_graph(graph=Q, target_features=target_features, radius=max_radius)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Outputs
|
|
41
|
+
- `Q`: Dictionary mapping edges `(i,j)` to q-values. Edges are undirected, so `(i,j)` and `(j,i)` are included.
|
|
42
|
+
|
|
43
|
+
### What PFS does
|
|
44
|
+
- Expands the local graph outward, layer by layer, starting from target variables.
|
|
45
|
+
- Performs neighborhood selection with FDR control using [**integrated path stability selection**](https://github.com/omelikechi/ipss).
|
|
46
|
+
- Controls pathwise false discoveries by summing q-values along candidate paths.
|
|
47
|
+
|
|
48
|
+
## Full list of `pfs` arguments
|
|
49
|
+
|
|
50
|
+
### Required arguments:
|
|
51
|
+
- `X`: n-by-p data matrix (NumPy array). Each column is a feature/variable.
|
|
52
|
+
- `target_features`: Feature index or list of indices to center the graph around.
|
|
53
|
+
- `qpath_max`: Maximum allowed sum of q-values along any path.
|
|
54
|
+
|
|
55
|
+
### Optional arguments:
|
|
56
|
+
- `max_radius`: Maximum number of expansion layers around each target (int; default `3`).
|
|
57
|
+
- `fdr_local`: Neighborhood FDR threshold at each radius (list of length `max_radius`; default `[qpath_max]*max_radius`).
|
|
58
|
+
- `custom_nbhd`: Dictionary specifying custom FDR cutoffs for certain features (dict; default `None`).
|
|
59
|
+
- `feature_names`: List of feature names; required if `custom_nbhd` is provided (list of strings).
|
|
60
|
+
- `criterion`: Rule for resolving multiple edges (default `'min'`).
|
|
61
|
+
- `selector`: Feature importance method used by IPSS (str; default `'gb'`). Options:
|
|
62
|
+
- `'gb'`: Gradient boosting
|
|
63
|
+
- `'l1'`: L1-regularized regression (lasso)
|
|
64
|
+
- `'rf'`: Random forest
|
|
65
|
+
- Custom function (see `ipss_args`)
|
|
66
|
+
- `ipss_args`: Dictionary of arguments to pass to `ipss` (dict; default `None`)
|
|
67
|
+
- `verbose`: Whether to print progress during selection (bool; default `False`)
|
|
68
|
+
|
|
69
|
+
## Graph plotting
|
|
70
|
+
|
|
71
|
+
Use `plot_graph` to visualize a local graph up to the specified `radius` around one or more target features.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from localgraph import plot_graph
|
|
75
|
+
|
|
76
|
+
# Plot local graph around target_features using output Q from pfs
|
|
77
|
+
plot_graph(graph=Q, target_features=target_features, radius=3)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Features and customization
|
|
81
|
+
`plot_graph` visualizes a local graph of a user-specified radius around one or more target features. It supports:
|
|
82
|
+
- Flexible input formats: edge dictionary, adjacency matrix, or NetworkX graph
|
|
83
|
+
- Automatic subgraph extraction around the targets
|
|
84
|
+
- Node coloring by distance from the target(s) (default), or user-specified colors (e.g., by variable type)
|
|
85
|
+
- Several layout algorithms (`'kamada_kawai'`, `'spring'`, `'circular'`, etc.)
|
|
86
|
+
- Customizable node size, font sizes, and edge thickness
|
|
87
|
+
- Optional display of q-values; edge widths can reflect q-value strength (`edge_widths='q_value'`)
|
|
88
|
+
- False positives shown in red if the true graph is provided
|
|
89
|
+
- Integration with custom plots via `ax` or `pos`
|
|
90
|
+
- Optional saving of figures (`save_fig`) and graphs (`save_graph`)
|
|
91
|
+
|
|
92
|
+
For a full list of arguments, see the [`plot_graph`](./localgraph/plotting/plot_graph.py) docstring.
|
|
93
|
+
|
|
94
|
+
### Returns
|
|
95
|
+
The function returns a dictionary containing:
|
|
96
|
+
- `feature_radius_list`: List of `(feature name, radius)` pairs for all nodes in the graph.
|
|
97
|
+
- `graph`: The NetworkX subgraph used for plotting.
|
|
98
|
+
- `positions`: Dictionary of node coordinates.
|
|
99
|
+
- `figure`: The matplotlib figure object (only if the function creates the figure).
|
|
100
|
+
|
|
101
|
+
### Further customization
|
|
102
|
+
|
|
103
|
+
To manually adjust node positions for publication-quality figures, you can export graphs to [**Gephi**](https://gephi.org/), edit them interactively, and re-import the updated layout into Python. See: [gephi_instructions.md](./gephi_instructions.md) for a full walkthrough.
|
|
104
|
+
|
|
105
|
+
## Examples
|
|
106
|
+
|
|
107
|
+
The `examples/` folder contains scripts that demonstrate end-to-end usage:
|
|
108
|
+
|
|
109
|
+
- `simple_example.py`: Simulate data, run PFS, and visualize the result.
|
|
110
|
+
|
|
111
|
+
## Evaluation tools
|
|
112
|
+
|
|
113
|
+
The `evaluation/` folder contains helper functions for measuring subgraph recovery in simulation settings.
|
|
114
|
+
|
|
115
|
+
- The `eval.py` script contains two functions:
|
|
116
|
+
- `subgraph_within_radius`: Extract true subgraph around a target node (useful for identifying subgraphs within full graphs)
|
|
117
|
+
- `tp_and_fp`: Count true and false positives compared to ground truth
|
|
118
|
+
|
|
119
|
+
These are useful for benchmarking PFS and other graph estimation methods when the true graph is known.
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# localgraph/__init__.py
|
|
2
|
+
|
|
3
|
+
from localgraph.evaluation.eval import tp_and_fp, subgraph_within_radius
|
|
4
|
+
from localgraph.pfs.helpers import lightest_paths, prune_graph
|
|
5
|
+
from localgraph.pfs.main import pfs
|
|
6
|
+
from localgraph.plotting.plot_graph import plot_graph
|
|
7
|
+
|
|
8
|
+
__all__ = ['lightest_paths', 'pfs', 'plot_graph', 'prune_graph', 'subgraph_within_radius', 'tp_and_fp']
|
|
File without changes
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Functions for evaluating local graph estimation performance
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
def subgraph_within_radius(A, target_nodes, radius):
|
|
8
|
+
"""
|
|
9
|
+
Returns a subgraph of nodes and edges that are at most `radius` distance from the target nodes.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
- A: Adjacency matrix (numpy array) of the graph.
|
|
13
|
+
- radius: Maximum distance from the target nodes.
|
|
14
|
+
- target_nodes: List of target node indices.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
- A_r: Symmetric adjacency matrix of the subgraph.
|
|
18
|
+
"""
|
|
19
|
+
n = A.shape[0] # Number of nodes in the graph
|
|
20
|
+
visited = set() # Nodes within the radius
|
|
21
|
+
queue = deque()
|
|
22
|
+
|
|
23
|
+
if isinstance(target_nodes, int):
|
|
24
|
+
target_nodes = [target_nodes]
|
|
25
|
+
|
|
26
|
+
# Initialize the queue with target nodes
|
|
27
|
+
for node in target_nodes:
|
|
28
|
+
queue.append((node, 0)) # (node, distance from target)
|
|
29
|
+
visited.add(node)
|
|
30
|
+
|
|
31
|
+
# Perform BFS to find all nodes within the radius
|
|
32
|
+
while queue:
|
|
33
|
+
current_node, current_distance = queue.popleft()
|
|
34
|
+
if current_distance >= radius:
|
|
35
|
+
continue # Stop if we exceed the radius
|
|
36
|
+
|
|
37
|
+
# Explore neighbors
|
|
38
|
+
for neighbor in np.where(A[current_node] > 0)[0]:
|
|
39
|
+
if neighbor not in visited:
|
|
40
|
+
visited.add(neighbor)
|
|
41
|
+
queue.append((neighbor, current_distance + 1))
|
|
42
|
+
|
|
43
|
+
# Create the subgraph adjacency matrix
|
|
44
|
+
visited = sorted(visited) # Sort for consistent indexing
|
|
45
|
+
A_r = np.zeros_like(A) # Initialize the subgraph matrix
|
|
46
|
+
|
|
47
|
+
# Fill the subgraph matrix
|
|
48
|
+
for i in visited:
|
|
49
|
+
for j in visited:
|
|
50
|
+
A_r[i, j] = A[i, j] # Copy edges from the original graph
|
|
51
|
+
|
|
52
|
+
return A_r
|
|
53
|
+
|
|
54
|
+
def tp_and_fp(A, A_true, target_features, radius=None):
|
|
55
|
+
"""
|
|
56
|
+
Compute the number of true and false edges in an estimated graph
|
|
57
|
+
|
|
58
|
+
Parameters:
|
|
59
|
+
- A : dict or np.ndarray
|
|
60
|
+
Estimated adjacency structure, either as a dictionary of edge q-values
|
|
61
|
+
or a binary adjacency matrix.
|
|
62
|
+
- A_true : np.ndarray
|
|
63
|
+
Ground-truth binary adjacency matrix.
|
|
64
|
+
- target_features : list of int
|
|
65
|
+
Indices of target nodes around which the local subgraph is evaluated.
|
|
66
|
+
- radius : int or None, optional
|
|
67
|
+
Radius of the local neighborhood around target features. If None, counts
|
|
68
|
+
are computed over the full graph.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
- tp : int
|
|
72
|
+
Number of true positive edges.
|
|
73
|
+
- fp : int
|
|
74
|
+
Number of false positive edges.
|
|
75
|
+
|
|
76
|
+
Notes:
|
|
77
|
+
- If `radius` is specified, edges are evaluated within the radius neighborhood
|
|
78
|
+
of target nodes, excluding edges between two nodes at the outermost radius.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Convert dictionary of q-values to a matrix
|
|
82
|
+
if isinstance(A, dict):
|
|
83
|
+
p = A_true.shape[0]
|
|
84
|
+
A_matrix = np.zeros((p,p))
|
|
85
|
+
for (i,j), q in A.items():
|
|
86
|
+
A_matrix[i,j] = q
|
|
87
|
+
A_matrix[j,i] = q
|
|
88
|
+
A = A_matrix
|
|
89
|
+
|
|
90
|
+
A = (A != 0).astype(int)
|
|
91
|
+
p = A.shape[0]
|
|
92
|
+
if not np.all(A == A.T):
|
|
93
|
+
num_asymmetric = np.sum(A != A.T)
|
|
94
|
+
max_diff = np.max(np.abs(A - A.T))
|
|
95
|
+
print(f"A is not symmetric: {num_asymmetric} asymmetric entries.")
|
|
96
|
+
print(f"Maximum absolute difference: {max_diff}")
|
|
97
|
+
raise ValueError(f'A is not symmetric.')
|
|
98
|
+
if not np.all(A_true == A_true.T):
|
|
99
|
+
raise ValueError(f'A_true is not symmetric.')
|
|
100
|
+
|
|
101
|
+
if radius is None:
|
|
102
|
+
tp, fp = 0, 0
|
|
103
|
+
for i in range(p):
|
|
104
|
+
for j in range(i+1,p):
|
|
105
|
+
if A[i,j] == 1:
|
|
106
|
+
if A_true[i,j] == 1:
|
|
107
|
+
tp += 1
|
|
108
|
+
else:
|
|
109
|
+
fp += 1
|
|
110
|
+
else:
|
|
111
|
+
A_true = subgraph_within_radius(A_true, target_features, radius)
|
|
112
|
+
|
|
113
|
+
visited = np.zeros(p, dtype=bool)
|
|
114
|
+
nodes_within_radius = set()
|
|
115
|
+
nodes_at_outer_radius = set()
|
|
116
|
+
|
|
117
|
+
# Perform BFS to identify all nodes within the given radius
|
|
118
|
+
for target in target_features:
|
|
119
|
+
queue = deque([(target, 0)]) # (node, current_distance)
|
|
120
|
+
while queue:
|
|
121
|
+
node, dist = queue.popleft()
|
|
122
|
+
if dist > radius or visited[node]:
|
|
123
|
+
continue
|
|
124
|
+
visited[node] = True
|
|
125
|
+
if dist < radius:
|
|
126
|
+
nodes_within_radius.add(node)
|
|
127
|
+
elif dist == radius:
|
|
128
|
+
nodes_at_outer_radius.add(node)
|
|
129
|
+
|
|
130
|
+
for neighbor in range(p):
|
|
131
|
+
if A[node, neighbor] == 1 and not visited[neighbor]:
|
|
132
|
+
queue.append((neighbor, dist + 1))
|
|
133
|
+
|
|
134
|
+
# Reset visited to reuse for the TP/FP calculation
|
|
135
|
+
visited.fill(False)
|
|
136
|
+
|
|
137
|
+
tp, fp = 0, 0
|
|
138
|
+
# Check edges in the subgraph, but **exclude edges between two outer radius nodes**
|
|
139
|
+
for i in nodes_within_radius | nodes_at_outer_radius:
|
|
140
|
+
for j in range(i + 1, p): # Upper triangular part
|
|
141
|
+
if j in nodes_within_radius | nodes_at_outer_radius and A[i, j] == 1:
|
|
142
|
+
if i in nodes_at_outer_radius and j in nodes_at_outer_radius:
|
|
143
|
+
continue # Exclude edges between outer radius nodes
|
|
144
|
+
if A_true[i, j] == 1:
|
|
145
|
+
tp += 1
|
|
146
|
+
else:
|
|
147
|
+
fp += 1
|
|
148
|
+
|
|
149
|
+
return tp, fp
|
|
150
|
+
|
|
151
|
+
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# A simple example of local graph estimation with pathwise feature selection (PFS)
|
|
2
|
+
|
|
3
|
+
from localgraph import pfs, plot_graph
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
#--------------------------------
|
|
8
|
+
# Setup
|
|
9
|
+
#--------------------------------
|
|
10
|
+
np.random.seed(302)
|
|
11
|
+
n, p = 100, 50
|
|
12
|
+
|
|
13
|
+
#--------------------------------
|
|
14
|
+
# Generate true graph and samples
|
|
15
|
+
#--------------------------------
|
|
16
|
+
# define nonlinear function
|
|
17
|
+
def f(x):
|
|
18
|
+
return np.exp(-x**2 / 2)
|
|
19
|
+
|
|
20
|
+
# generate data
|
|
21
|
+
def generate_data(n, p, target_feature=0, snr=2):
|
|
22
|
+
X = np.random.normal(0, 1, size=(n,p))
|
|
23
|
+
|
|
24
|
+
# radius 2 (linear realtionships)
|
|
25
|
+
X[:,1] += X[:,3] + X[:,5]
|
|
26
|
+
X[:,2] += X[:,4] + X[:,6]
|
|
27
|
+
|
|
28
|
+
# radius 1 (nonlinear relationships)
|
|
29
|
+
signal = f(X[:,1]) + f(X[:,2])
|
|
30
|
+
sigma2 = np.var(signal) / snr
|
|
31
|
+
X[:,target_feature] = signal + np.random.normal(0, np.sqrt(sigma2), size=n)
|
|
32
|
+
|
|
33
|
+
# Construct true adjacency matrix
|
|
34
|
+
A_true = np.zeros((p,p))
|
|
35
|
+
true_edges = [(3,1), (5,1), (4,2), (6,2), (1,target_feature), (2,target_feature)]
|
|
36
|
+
for i, j in true_edges:
|
|
37
|
+
A_true[i,j] = 1
|
|
38
|
+
A_true[j,i] = 1
|
|
39
|
+
|
|
40
|
+
return X, target_feature, A_true
|
|
41
|
+
|
|
42
|
+
# generate data
|
|
43
|
+
X, target_feature, A_true = generate_data(n,p)
|
|
44
|
+
|
|
45
|
+
#--------------------------------
|
|
46
|
+
# Run PFS
|
|
47
|
+
#--------------------------------
|
|
48
|
+
qpath_max = 0.1
|
|
49
|
+
max_radius = 2
|
|
50
|
+
Q = pfs(X, target_feature, qpath_max=qpath_max, max_radius=max_radius, verbose=True)
|
|
51
|
+
|
|
52
|
+
#--------------------------------
|
|
53
|
+
# Plot true and estimated local graphs
|
|
54
|
+
#--------------------------------
|
|
55
|
+
fig, axes = plt.subplots(1, 2, figsize=(18,8))
|
|
56
|
+
|
|
57
|
+
# Plot true graph
|
|
58
|
+
plot_graph(graph=A_true, target_features=target_feature, radius=max_radius, edge_widths=3, ax=axes[0], show_weights=False)
|
|
59
|
+
axes[0].set_title('True Graph', fontsize=24)
|
|
60
|
+
|
|
61
|
+
# Plot estimated graph
|
|
62
|
+
plot_graph(graph=Q, target_features=target_feature, radius=max_radius, true_graph=A_true, edge_widths=3, ax=axes[1])
|
|
63
|
+
axes[1].set_title('Estimated Graph', fontsize=24)
|
|
64
|
+
|
|
65
|
+
plt.tight_layout()
|
|
66
|
+
plt.show()
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Helper functions for the main PFS function, main.py
|
|
2
|
+
|
|
3
|
+
import heapq
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def lightest_paths(Q, target_features, new_features):
|
|
7
|
+
"""
|
|
8
|
+
Computes the lightest path (minimum cumulative q-value) from any target feature
|
|
9
|
+
to all new features using Dijkstra's algorithm.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
Q (dict): Dictionary of edge q-values {(i, j): q}.
|
|
13
|
+
target_features (set): Set of initial target features.
|
|
14
|
+
new_features (set): Set of features for which to compute the lightest paths.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
dict: {feature: minimum cumulative q-value path} for all new_features.
|
|
18
|
+
"""
|
|
19
|
+
if isinstance(target_features, int):
|
|
20
|
+
target_features = [target_features]
|
|
21
|
+
|
|
22
|
+
pq = [(0, f) for f in target_features] # Priority queue: (cumulative q-value, feature)
|
|
23
|
+
heapq.heapify(pq)
|
|
24
|
+
min_q_path = {f: 0 for f in target_features} # Store minimum path values
|
|
25
|
+
|
|
26
|
+
while pq:
|
|
27
|
+
q_path, feature = heapq.heappop(pq)
|
|
28
|
+
|
|
29
|
+
# Stop early if all new_features have been reached
|
|
30
|
+
if new_features.issubset(min_q_path):
|
|
31
|
+
break
|
|
32
|
+
|
|
33
|
+
for neighbor in {j for i, j in Q.keys() if i == feature} | {i for i, j in Q.keys() if j == feature}:
|
|
34
|
+
q_edge = Q[(feature, neighbor)]
|
|
35
|
+
new_q_path = q_path + q_edge
|
|
36
|
+
|
|
37
|
+
if neighbor not in min_q_path or new_q_path < min_q_path[neighbor]:
|
|
38
|
+
min_q_path[neighbor] = new_q_path
|
|
39
|
+
heapq.heappush(pq, (new_q_path, neighbor))
|
|
40
|
+
|
|
41
|
+
# Return only computed paths for `new_features`
|
|
42
|
+
return {f: min_q_path[f] for f in new_features}
|
|
43
|
+
|
|
44
|
+
def prune_graph(Q, target_features, qpath_max, fdr_local, max_radius, custom_nbhd=None, feature_names=None):
|
|
45
|
+
"""
|
|
46
|
+
Prunes the estimated graph by enforcing local FDR and pathwise q-value constraints.
|
|
47
|
+
|
|
48
|
+
This function takes a dictionary of edge-level q-values and prunes it to retain only edges that:
|
|
49
|
+
(1) satisfy local false discovery rate (FDR) thresholds at each radius, and
|
|
50
|
+
(2) lie on a path from a target feature whose cumulative q-value does not exceed qpath_max.
|
|
51
|
+
|
|
52
|
+
Inputs
|
|
53
|
+
----------
|
|
54
|
+
Q : dict
|
|
55
|
+
Dictionary of edge-level q-values. Keys are (i,j) tuples and values are q-values.
|
|
56
|
+
target_features : int or list of int
|
|
57
|
+
Indices of target features around which the local graph is built.
|
|
58
|
+
qpath_max : float
|
|
59
|
+
Maximum allowed sum of q-values along any path from a target to another feature.
|
|
60
|
+
fdr_local : list of float
|
|
61
|
+
List of local FDR thresholds for each radius (length must equal max_radius).
|
|
62
|
+
max_radius : int
|
|
63
|
+
Maximum radius for local graph expansion.
|
|
64
|
+
custom_nbhd : dict, optional
|
|
65
|
+
Dictionary specifying custom FDR thresholds for specific features or substrings (default: None).
|
|
66
|
+
feature_names : list of str, optional
|
|
67
|
+
List of feature names (required if custom_nbhd is not None).
|
|
68
|
+
|
|
69
|
+
Outputs
|
|
70
|
+
-------
|
|
71
|
+
Q_pruned : dict
|
|
72
|
+
Dictionary of pruned edges (i,j) with their corresponding q-values.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
Q_pruned = {}
|
|
76
|
+
|
|
77
|
+
if isinstance(target_features, int):
|
|
78
|
+
target_features = [target_features]
|
|
79
|
+
|
|
80
|
+
cumulative_q = {i: qpath_max + 1 for i in range(max(Q.keys(), key=lambda x: x[1])[1] + 1)}
|
|
81
|
+
for feature in target_features:
|
|
82
|
+
cumulative_q[feature] = 0
|
|
83
|
+
|
|
84
|
+
current_set = set(target_features)
|
|
85
|
+
radius = 0
|
|
86
|
+
|
|
87
|
+
while current_set and radius < max_radius:
|
|
88
|
+
next_set = set()
|
|
89
|
+
|
|
90
|
+
for current in current_set:
|
|
91
|
+
cutoff = fdr_local[radius]
|
|
92
|
+
# Current custom neighborhood
|
|
93
|
+
customize = False
|
|
94
|
+
if custom_nbhd is not None:
|
|
95
|
+
if feature_names is None:
|
|
96
|
+
raise ValueError('Feature names must be provided if custom_nbhd is not None.')
|
|
97
|
+
current_feature_name = feature_names[current]
|
|
98
|
+
if current_feature_name in custom_nbhd:
|
|
99
|
+
current_custom_nbhd = custom_nbhd[current_feature_name]
|
|
100
|
+
current_custom_nbhd.setdefault('nbhd_fdr', cutoff)
|
|
101
|
+
customize = True
|
|
102
|
+
for (i,j), q in Q.items():
|
|
103
|
+
# Only proceed if i is the index of the current feature
|
|
104
|
+
if i != current:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
current_cutoff = cutoff
|
|
108
|
+
|
|
109
|
+
# Apply custom FDR threshold based on matched feature name substrings
|
|
110
|
+
if customize:
|
|
111
|
+
current_cutoff = current_custom_nbhd['nbhd_fdr']
|
|
112
|
+
for string, custom_fdr in custom_nbhd[current_feature_name].items():
|
|
113
|
+
if string != 'nbhd_fdr' and string in feature_names[j]:
|
|
114
|
+
current_cutoff = custom_fdr
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
if j in current_set and radius > 0:
|
|
118
|
+
if q > current_cutoff:
|
|
119
|
+
continue
|
|
120
|
+
elif (j, current) in Q_pruned:
|
|
121
|
+
Q_pruned[(current, j)] = min(q, Q_pruned[(j,current)])
|
|
122
|
+
else:
|
|
123
|
+
Q_pruned[(current, j)] = q
|
|
124
|
+
elif q > current_cutoff:
|
|
125
|
+
continue
|
|
126
|
+
else:
|
|
127
|
+
new_cumulative_q = cumulative_q[current] + q
|
|
128
|
+
if new_cumulative_q <= qpath_max:
|
|
129
|
+
if (j, current) in Q_pruned:
|
|
130
|
+
Q_pruned[(current, j)] = min(q, Q_pruned[(j,current)])
|
|
131
|
+
else:
|
|
132
|
+
Q_pruned[(current, j)] = q
|
|
133
|
+
if new_cumulative_q < cumulative_q[j]:
|
|
134
|
+
cumulative_q[j] = new_cumulative_q
|
|
135
|
+
next_set.add(j)
|
|
136
|
+
|
|
137
|
+
current_set = next_set
|
|
138
|
+
radius += 1
|
|
139
|
+
|
|
140
|
+
return Q_pruned
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Pathwise feature selection (PFS)
|
|
2
|
+
|
|
3
|
+
from ipss import ipss
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .helpers import lightest_paths, prune_graph
|
|
7
|
+
|
|
8
|
+
def pfs(X, target_features, qpath_max, max_radius=3, fdr_local=None, custom_nbhd=None, feature_names=None,
|
|
9
|
+
criterion='min', selector='gb', ipss_args=None, verbose=False):
|
|
10
|
+
"""
|
|
11
|
+
Inputs:
|
|
12
|
+
Required
|
|
13
|
+
----------------
|
|
14
|
+
X: n-by-p data matrix (n = number of samples, p = number of features)
|
|
15
|
+
target_features: list of indices of the target features
|
|
16
|
+
qpath_max: maximum allowed sum of q-values along paths
|
|
17
|
+
|
|
18
|
+
Optional
|
|
19
|
+
----------------
|
|
20
|
+
max_radius: maximum radius of the estimated local graph
|
|
21
|
+
fdr_local: neighborhood FDR threshold at each radius (list of length max_radius)
|
|
22
|
+
custom_nbhd: dictionary of custom neighborhood FDR thresholds for user-specified features
|
|
23
|
+
feature_names: names of the features, used if custom_nbhd is provided
|
|
24
|
+
criterion:
|
|
25
|
+
selector: base method for computing q-values
|
|
26
|
+
ipss_args: additional arguments passed to the IPSS function
|
|
27
|
+
verbose: whether to print progress during selection
|
|
28
|
+
|
|
29
|
+
Outputs:
|
|
30
|
+
A dictionary containing:
|
|
31
|
+
- 'Q': dict mapping edge tuples to q-values
|
|
32
|
+
- 'A': adjusted sparse adjacency matrix (dict of dicts or matrix)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
if fdr_local is None:
|
|
36
|
+
fdr_local = [qpath_max] * max_radius
|
|
37
|
+
|
|
38
|
+
if ipss_args is None:
|
|
39
|
+
ipss_args = {}
|
|
40
|
+
ipss_args['selector'] = selector
|
|
41
|
+
|
|
42
|
+
if isinstance(target_features, int):
|
|
43
|
+
target_features = [target_features]
|
|
44
|
+
|
|
45
|
+
current_features = set(target_features)
|
|
46
|
+
all_visited = set(target_features)
|
|
47
|
+
radius = 0
|
|
48
|
+
Q = {}
|
|
49
|
+
|
|
50
|
+
while current_features and radius < max_radius:
|
|
51
|
+
|
|
52
|
+
if verbose:
|
|
53
|
+
n_current = len(current_features)
|
|
54
|
+
ipss_iteration = 1
|
|
55
|
+
print(f'current features: {current_features} (radius = {radius + 1}/{max_radius})')
|
|
56
|
+
|
|
57
|
+
cutoff = fdr_local[radius]
|
|
58
|
+
fdr_same_layer = cutoff if radius == 0 else fdr_local[radius - 1]
|
|
59
|
+
|
|
60
|
+
new_features = set()
|
|
61
|
+
|
|
62
|
+
for current in current_features:
|
|
63
|
+
|
|
64
|
+
if verbose:
|
|
65
|
+
print(f' - iteration {ipss_iteration}/{len(current_features)}')
|
|
66
|
+
ipss_iteration += 1
|
|
67
|
+
|
|
68
|
+
# current custom neighborhood
|
|
69
|
+
customize = False
|
|
70
|
+
if custom_nbhd is not None:
|
|
71
|
+
if feature_names is None:
|
|
72
|
+
raise ValueError('Feature names must be provided if custom_nbhd is not None.')
|
|
73
|
+
current_feature_name = feature_names[current]
|
|
74
|
+
if current_feature_name in custom_nbhd:
|
|
75
|
+
current_custom_nbhd = custom_nbhd[current_feature_name]
|
|
76
|
+
current_custom_nbhd.setdefault('nbhd_fdr', cutoff)
|
|
77
|
+
customize = True
|
|
78
|
+
|
|
79
|
+
# compute q-values
|
|
80
|
+
X_minus_current = np.delete(X, current, axis=1)
|
|
81
|
+
result = ipss(X_minus_current, X[:,current], **ipss_args)
|
|
82
|
+
|
|
83
|
+
efp_scores = result['efp_scores']
|
|
84
|
+
q_values = result['q_values']
|
|
85
|
+
|
|
86
|
+
for feature_idx, q_value in q_values.items():
|
|
87
|
+
# reindex feature to account for deletion of current feature in X_minus_current
|
|
88
|
+
feature_idx = feature_idx if feature_idx < current else feature_idx + 1
|
|
89
|
+
|
|
90
|
+
current_cutoff = cutoff
|
|
91
|
+
# check for custom fdr cutoffs
|
|
92
|
+
if customize:
|
|
93
|
+
current_cutoff = current_custom_nbhd['nbhd_fdr']
|
|
94
|
+
for string, custom_fdr in custom_nbhd[current_feature_name].items():
|
|
95
|
+
if string != 'nbhd_fdr' and string in feature_names[feature_idx]:
|
|
96
|
+
current_cutoff = custom_fdr
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
# update if feature is entirely new
|
|
100
|
+
if q_value <= current_cutoff and feature_idx not in all_visited:
|
|
101
|
+
Q[(current, feature_idx)] = Q[(feature_idx, current)] = q_value
|
|
102
|
+
new_features.add(feature_idx)
|
|
103
|
+
|
|
104
|
+
# update if feature in same layer as current
|
|
105
|
+
elif feature_idx in current_features:
|
|
106
|
+
if q_value <= fdr_same_layer:
|
|
107
|
+
if (feature_idx, current) not in Q:
|
|
108
|
+
Q[(current, feature_idx)] = Q[(feature_idx, current)] = q_value
|
|
109
|
+
elif Q[(feature_idx, current)] > q_value:
|
|
110
|
+
Q[(current, feature_idx)] = Q[(feature_idx, current)] = q_value
|
|
111
|
+
|
|
112
|
+
# update edge with minimum q-value if criterion is 'min'
|
|
113
|
+
elif q_value <= current_cutoff and criterion == 'min':
|
|
114
|
+
if (feature_idx, current) not in Q:
|
|
115
|
+
Q[(current, feature_idx)] = Q[(feature_idx, current)] = q_value
|
|
116
|
+
elif Q[(feature_idx, current)] > q_value:
|
|
117
|
+
Q[(current, feature_idx)] = Q[(feature_idx, current)] = q_value
|
|
118
|
+
|
|
119
|
+
q_paths = lightest_paths(Q, target_features, new_features)
|
|
120
|
+
new_features = {idx for idx, q_value_sum in q_paths.items() if q_value_sum <= qpath_max}
|
|
121
|
+
|
|
122
|
+
current_features = new_features
|
|
123
|
+
all_visited.update(new_features)
|
|
124
|
+
radius += 1
|
|
125
|
+
|
|
126
|
+
# Compute final Q by applying pathwise threshold qpath_max
|
|
127
|
+
Q = prune_graph(Q, target_features, qpath_max, fdr_local, max_radius)
|
|
128
|
+
|
|
129
|
+
return Q
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
|
|
File without changes
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Helper functions for plot_graph.py
|
|
2
|
+
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import networkx as nx
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
def assign_node_colors(G, target_features, n_layers, colors=None):
|
|
8
|
+
if colors is not None:
|
|
9
|
+
if isinstance(colors, dict):
|
|
10
|
+
return [colors.get(node, 'gray') for node in G.nodes()]
|
|
11
|
+
elif isinstance(colors, (list, np.ndarray)):
|
|
12
|
+
return [colors[node] if node < len(colors) else 'gray' for node in G.nodes()]
|
|
13
|
+
else:
|
|
14
|
+
raise ValueError("`colors` must be a dict, list, or None.")
|
|
15
|
+
|
|
16
|
+
if isinstance(target_features, int):
|
|
17
|
+
target_features = [target_features]
|
|
18
|
+
|
|
19
|
+
# Default: color by distance from target_features
|
|
20
|
+
color_map = {}
|
|
21
|
+
standout_color = 'yellow'
|
|
22
|
+
default_colors = generate_colors(n_layers)
|
|
23
|
+
shortest_paths_from_roots = {
|
|
24
|
+
r: nx.single_source_shortest_path_length(G,r) for r in target_features
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
for node in G.nodes():
|
|
28
|
+
if node in target_features:
|
|
29
|
+
color_map[node] = standout_color
|
|
30
|
+
else:
|
|
31
|
+
min_dist = min(
|
|
32
|
+
paths.get(node, float('inf')) for paths in shortest_paths_from_roots.values()
|
|
33
|
+
)
|
|
34
|
+
if min_dist < float('inf'):
|
|
35
|
+
color_map[node] = default_colors[min(min_dist - 1, len(default_colors) - 1)]
|
|
36
|
+
else:
|
|
37
|
+
color_map[node] = 'skyblue'
|
|
38
|
+
|
|
39
|
+
return [color_map[node] for node in G.nodes()]
|
|
40
|
+
|
|
41
|
+
def generate_colors(n):
|
|
42
|
+
"""Generate a yellow-green-blue gradient from 'gist_rainbow', skipping red/magenta."""
|
|
43
|
+
colormap = plt.cm.get_cmap('gist_rainbow')
|
|
44
|
+
colors = []
|
|
45
|
+
for i in range(n):
|
|
46
|
+
t = 0 if n == 1 else i / (n - 1) # Normalize to [0,1]
|
|
47
|
+
# Remap t to skip red and magenta sections (keep yellow-green-blue)
|
|
48
|
+
t_new = 0.4 + 0.4 * t # Maps t from [0.4, 0.8] in colormap
|
|
49
|
+
# Get color from colormap
|
|
50
|
+
color = np.array(colormap(t_new)[:3])
|
|
51
|
+
# Reduce saturation as nodes get farther
|
|
52
|
+
fade_factor = 0.65 - 0.55 * t # High at t=0 (closer), fades at t=1 (farther)
|
|
53
|
+
color = color * fade_factor + (1 - fade_factor) # Desaturate progressively
|
|
54
|
+
# Convert to hex
|
|
55
|
+
colors.append('#%02x%02x%02x' % tuple(int(255 * c) for c in color))
|
|
56
|
+
return colors
|
|
57
|
+
|
|
58
|
+
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# Plot a local subgraph around target features, with flexible graph input
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import networkx as nx
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from .helpers import assign_node_colors, generate_colors
|
|
10
|
+
|
|
11
|
+
def plot_graph(
|
|
12
|
+
graph, target_features, radius, feature_names=None, true_graph=None,
|
|
13
|
+
graph_layout='kk', node_size=1500, font_size=10, edge_font_size=None, edge_digits=2, edge_widths=1, colors=None,
|
|
14
|
+
show_weights=True, include_outer_edges=False, figsize=(16,8), ax=None, pos=None,
|
|
15
|
+
save_fig=False, save_graph=False, graph_name=None, dpi=300
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
Inputs:
|
|
19
|
+
Required
|
|
20
|
+
----------------
|
|
21
|
+
graph: input graph, either as a dictionary of edge weights, a numpy array (adjacency matrix), or NetworkX graph
|
|
22
|
+
target_features: list of indices of the target features
|
|
23
|
+
radius: maximum radius of the local graph
|
|
24
|
+
|
|
25
|
+
Optional
|
|
26
|
+
----------------
|
|
27
|
+
# Labeling
|
|
28
|
+
feature_names: list of feature names for labeling nodes
|
|
29
|
+
true_graph: ground truth adjacency matrix, if known (used to color true/false edges black/red)
|
|
30
|
+
|
|
31
|
+
# Visualization
|
|
32
|
+
graph_layout: layout algorithm (several options from NetworkX; default is kamada-kawai)
|
|
33
|
+
node_size: size of nodes in the plot
|
|
34
|
+
font_size: size of node labels
|
|
35
|
+
edge_font_size: size of edge weight labels
|
|
36
|
+
edge_digits: number of digits to round edge weights
|
|
37
|
+
edge_widths: width of edges (float for uniform width; 'q_value' to scale by q-values)
|
|
38
|
+
colors: list of custom colors for nodes (default: color by distance from target nodes)
|
|
39
|
+
show_weights: whether to display edge weights
|
|
40
|
+
include_outer_edges: whether to include edges between outermost nodes
|
|
41
|
+
figsize: size of the figure (width, height)
|
|
42
|
+
ax: matplotlib axis object (for use in subplots or custom figures)
|
|
43
|
+
pos: dictionary of node positions (overrides automatic layout)
|
|
44
|
+
|
|
45
|
+
# Saving
|
|
46
|
+
save_fig: whether to save the figure as a PNG
|
|
47
|
+
save_graph: whether to save the graph as a GraphML file
|
|
48
|
+
graph_name: name of the output file if saving
|
|
49
|
+
dpi: resolution of the saved figure
|
|
50
|
+
|
|
51
|
+
Outputs:
|
|
52
|
+
Returns a dictionary containing:
|
|
53
|
+
- 'feature_radius_list': list of (feature_name, radius) pairs for each node in the plotted subgraph
|
|
54
|
+
- 'graph': NetworkX graph object for the plotted subgraph
|
|
55
|
+
- 'positions': dictionary mapping nodes to their 2D coordinates in the plot
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
if edge_font_size is None:
|
|
59
|
+
edge_font_size = font_size
|
|
60
|
+
|
|
61
|
+
# Create networkx graph G from graph
|
|
62
|
+
if isinstance(graph, nx.Graph):
|
|
63
|
+
G = graph.copy()
|
|
64
|
+
elif isinstance(graph, dict):
|
|
65
|
+
G = nx.Graph()
|
|
66
|
+
for (i,j), q in graph.items():
|
|
67
|
+
G.add_edge(i, j, weight=q)
|
|
68
|
+
elif isinstance(graph, np.ndarray):
|
|
69
|
+
G = nx.Graph()
|
|
70
|
+
for i in range(graph.shape[0]):
|
|
71
|
+
for j in range(graph.shape[1]):
|
|
72
|
+
weight = graph[i,j]
|
|
73
|
+
if weight != 0:
|
|
74
|
+
G.add_edge(i, j, weight=weight)
|
|
75
|
+
else:
|
|
76
|
+
raise TypeError("Unsupported graph input type. Must be a NetworkX graph, dict, or numpy array.")
|
|
77
|
+
|
|
78
|
+
# Find nodes within the specified radius
|
|
79
|
+
reachable_nodes = set()
|
|
80
|
+
node_distances = {}
|
|
81
|
+
|
|
82
|
+
if isinstance(target_features, int):
|
|
83
|
+
target_features = [target_features]
|
|
84
|
+
|
|
85
|
+
for root in target_features:
|
|
86
|
+
if G.has_node(root):
|
|
87
|
+
path_lengths = nx.single_source_shortest_path_length(G, root, cutoff=radius)
|
|
88
|
+
reachable_nodes.update(path_lengths.keys())
|
|
89
|
+
for node, dist in path_lengths.items():
|
|
90
|
+
if node not in node_distances or dist < node_distances[node]:
|
|
91
|
+
node_distances[node] = dist
|
|
92
|
+
|
|
93
|
+
# Remove edges between outermost nodes if requested
|
|
94
|
+
if not include_outer_edges:
|
|
95
|
+
outer_nodes = {node for node, dist in node_distances.items() if dist == radius}
|
|
96
|
+
G.remove_edges_from([(u, v) for u, v in G.edges() if u in outer_nodes and v in outer_nodes])
|
|
97
|
+
|
|
98
|
+
# Create subgraph
|
|
99
|
+
G = G.subgraph(reachable_nodes).copy()
|
|
100
|
+
|
|
101
|
+
if G.number_of_nodes() == 0:
|
|
102
|
+
print("Empty graph: All root nodes are isolated.")
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
# Handle plotting
|
|
106
|
+
own_figure = False
|
|
107
|
+
if ax is None:
|
|
108
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
109
|
+
own_figure = True
|
|
110
|
+
|
|
111
|
+
# Graph layout selection
|
|
112
|
+
if pos is not None:
|
|
113
|
+
pos = {node: (float(x), float(y)) for node, (x, y) in pos.items()}
|
|
114
|
+
else:
|
|
115
|
+
if graph_layout == 'circular':
|
|
116
|
+
pos = nx.circular_layout(G)
|
|
117
|
+
elif graph_layout in ['kk', 'kamada_kawai']:
|
|
118
|
+
pos = nx.kamada_kawai_layout(G, weight=False)
|
|
119
|
+
elif graph_layout == 'multipartite':
|
|
120
|
+
pos = nx.multipartite_layout(G)
|
|
121
|
+
elif graph_layout == 'planar':
|
|
122
|
+
pos = nx.planar_layout(G)
|
|
123
|
+
elif graph_layout == 'spectral':
|
|
124
|
+
pos = nx.spectral_layout(G)
|
|
125
|
+
elif graph_layout == 'spring':
|
|
126
|
+
pos = nx.spring_layout(G, weight=None)
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(f"Unsupported graph format: {graph_layout}")
|
|
129
|
+
|
|
130
|
+
# Assign node colors
|
|
131
|
+
node_color = assign_node_colors(G, target_features, radius, colors=colors)
|
|
132
|
+
|
|
133
|
+
# Get weights
|
|
134
|
+
weights = [G[u][v]['weight'] for u, v in G.edges()]
|
|
135
|
+
max_weight = max(weights) if weights else 1
|
|
136
|
+
|
|
137
|
+
# Assign edge colors based on whether they exist in true_graph
|
|
138
|
+
if true_graph is not None:
|
|
139
|
+
true_edges = set()
|
|
140
|
+
for i in range(true_graph.shape[0]):
|
|
141
|
+
for j in range(true_graph.shape[1]):
|
|
142
|
+
if true_graph[i,j] != 0:
|
|
143
|
+
true_edges.add((i,j))
|
|
144
|
+
true_edges.add((j,i))
|
|
145
|
+
edge_colors = ['black' if (u,v) in true_edges else 'red' for u, v in G.edges()]
|
|
146
|
+
else:
|
|
147
|
+
edge_colors = ['black' for u, v, in G.edges()]
|
|
148
|
+
|
|
149
|
+
# Node sizes
|
|
150
|
+
node_sizes = [node_size * 1.5 if node in target_features else node_size for node in G.nodes()]
|
|
151
|
+
|
|
152
|
+
# Edge widths
|
|
153
|
+
edge_widths_clean = str(edge_widths).replace('-', '_').lower()
|
|
154
|
+
if edge_widths_clean in ['by_q_value', 'q_value']:
|
|
155
|
+
q_vals = [G[u][v]['weight'] for u, v in G.edges()]
|
|
156
|
+
q_min, q_max = min(q_vals), max(q_vals)
|
|
157
|
+
if q_max == q_min:
|
|
158
|
+
edge_widths = 1
|
|
159
|
+
else:
|
|
160
|
+
def rescale(q):
|
|
161
|
+
t = (q - q_min) / (q_max - q_min)
|
|
162
|
+
return max(5 * (1 - t), 1)
|
|
163
|
+
edge_widths = [rescale(G[u][v]['weight']) for u, v in G.edges()]
|
|
164
|
+
|
|
165
|
+
# Draw nodes and edges
|
|
166
|
+
nx.draw(G, pos, ax=ax, with_labels=False, node_color=node_color, node_size=node_sizes, edge_color=edge_colors,
|
|
167
|
+
edgecolors='black', linewidths=1, alpha=1, width=edge_widths)
|
|
168
|
+
|
|
169
|
+
if show_weights:
|
|
170
|
+
edge_labels = {(u, v): f'{G[u][v]["weight"]:.{edge_digits}f}' for u, v in G.edges()}
|
|
171
|
+
text_items = nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=edge_font_size, ax=ax)
|
|
172
|
+
# Manually update colors
|
|
173
|
+
for (u,v), text in text_items.items():
|
|
174
|
+
if true_graph is not None:
|
|
175
|
+
edge_color = 'black' if (u,v) in true_edges else 'red'
|
|
176
|
+
else:
|
|
177
|
+
edge_color = 'black'
|
|
178
|
+
text.set_color(edge_color) # Change label color dynamically
|
|
179
|
+
|
|
180
|
+
# Add custom node labels
|
|
181
|
+
if feature_names is None:
|
|
182
|
+
node_labels = {node: f'$X_{{{node + 1}}}$' for node in reachable_nodes}
|
|
183
|
+
else:
|
|
184
|
+
node_labels = {node: feature_names[node] for node in reachable_nodes}
|
|
185
|
+
|
|
186
|
+
# Custom node labels with individual font sizes
|
|
187
|
+
for node, (x,y) in pos.items():
|
|
188
|
+
label = node_labels[node]
|
|
189
|
+
node_font_size = int(font_size * 1.25) if node in target_features else font_size
|
|
190
|
+
ax.text(x, y, label, fontsize=node_font_size, ha='center', va='center',
|
|
191
|
+
bbox=dict(facecolor='white', edgecolor='none', alpha=0)) # Background for readability
|
|
192
|
+
|
|
193
|
+
# Collect (feature_name, radius) pairs
|
|
194
|
+
assigned_nodes = set()
|
|
195
|
+
radius_groups = {}
|
|
196
|
+
|
|
197
|
+
for r in range(max(node_distances.values()) + 1):
|
|
198
|
+
nodes_at_r = {node for node, dist in node_distances.items() if dist == r}
|
|
199
|
+
new_nodes = nodes_at_r - assigned_nodes
|
|
200
|
+
if new_nodes:
|
|
201
|
+
radius_groups[r] = new_nodes
|
|
202
|
+
assigned_nodes.update(new_nodes)
|
|
203
|
+
|
|
204
|
+
feature_radius_list = []
|
|
205
|
+
for r in sorted(radius_groups):
|
|
206
|
+
for node in sorted(radius_groups[r]):
|
|
207
|
+
name = node_labels[node]
|
|
208
|
+
feature_radius_list.append((name, r))
|
|
209
|
+
|
|
210
|
+
if own_figure:
|
|
211
|
+
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
|
|
212
|
+
if save_fig:
|
|
213
|
+
if graph_name is None:
|
|
214
|
+
print(f'Warning: graph_name not provided. Setting graph_name = my_graph')
|
|
215
|
+
graph_name = 'my_graph'
|
|
216
|
+
plt.savefig(f'{graph_name}.png', dpi=dpi)
|
|
217
|
+
plt.show()
|
|
218
|
+
|
|
219
|
+
if save_graph:
|
|
220
|
+
if graph_name is None:
|
|
221
|
+
print(f'Warning: graph_name not provided. Setting graph_name = my_graph')
|
|
222
|
+
graph_name = 'my_graph'
|
|
223
|
+
for node, (x, y) in pos.items():
|
|
224
|
+
G.nodes[node]['x'] = float(x)
|
|
225
|
+
G.nodes[node]['y'] = float(y)
|
|
226
|
+
nx.write_graphml(G, f'{graph_name}.graphml')
|
|
227
|
+
|
|
228
|
+
result = {'feature_radius_list':feature_radius_list, 'graph':G, 'positions':pos}
|
|
229
|
+
if own_figure:
|
|
230
|
+
result['figure'] = fig
|
|
231
|
+
|
|
232
|
+
return result
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: localgraph
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local graph estimation with pathwise feature selection
|
|
5
|
+
Author: Omar Melikechi
|
|
6
|
+
Author-email: omar.melikechi@gmail.com
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.6
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: ipss>=1.1.1
|
|
14
|
+
Requires-Dist: matplotlib>=3.0.0
|
|
15
|
+
Requires-Dist: networkx>=2.0
|
|
16
|
+
Requires-Dist: numpy>=1.16.0
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: author-email
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
# Local graph estimation with pathwise feature selection
|
|
28
|
+
|
|
29
|
+
> **Local graph estimation** is a framework for discovering local graph/network structure around specific variables of interest. **Pathwise feature selection (PFS)** is an algorithm for performing local graph estimation with pathwise false discovery control.
|
|
30
|
+
|
|
31
|
+
## Associated paper
|
|
32
|
+
|
|
33
|
+
- **Local graph estimation: Interpretable network discovery for complex data**
|
|
34
|
+
In preparation
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
```
|
|
38
|
+
pip install localgraph
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
```python
|
|
43
|
+
from localgraph import pfs, plot_graph
|
|
44
|
+
|
|
45
|
+
# Load n-by-p data matrix X (n samples, p features)
|
|
46
|
+
|
|
47
|
+
# Specify the target features (list of indices)
|
|
48
|
+
target_features = [0, 1]
|
|
49
|
+
|
|
50
|
+
# Specify the pathwise q-value threshold
|
|
51
|
+
qpath_max = 0.2
|
|
52
|
+
|
|
53
|
+
# Optional: specify the maximum radius of the local graph (default is 3)
|
|
54
|
+
max_radius = 3
|
|
55
|
+
|
|
56
|
+
# Optional: specify the neighborhood FDR thresholds for nodes in each radius
|
|
57
|
+
fdr_local = [0.2, 0.1, 0.1]
|
|
58
|
+
|
|
59
|
+
# Run PFS
|
|
60
|
+
Q = pfs(X, target_features, qpath_max=qpath_max, max_radius=max_radius, fdr_local=fdr_local)
|
|
61
|
+
|
|
62
|
+
# Plot the estimated subgraph
|
|
63
|
+
plot_graph(graph=Q, target_features=target_features, radius=max_radius)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Outputs
|
|
67
|
+
- `Q`: Dictionary mapping edges `(i,j)` to q-values. Edges are undirected, so `(i,j)` and `(j,i)` are included.
|
|
68
|
+
|
|
69
|
+
### What PFS does
|
|
70
|
+
- Expands the local graph outward, layer by layer, starting from target variables.
|
|
71
|
+
- Performs neighborhood selection with FDR control using [**integrated path stability selection**](https://github.com/omelikechi/ipss).
|
|
72
|
+
- Controls pathwise false discoveries by summing q-values along candidate paths.
|
|
73
|
+
|
|
74
|
+
## Full list of `pfs` arguments
|
|
75
|
+
|
|
76
|
+
### Required arguments:
|
|
77
|
+
- `X`: n-by-p data matrix (NumPy array). Each column is a feature/variable.
|
|
78
|
+
- `target_features`: Feature index or list of indices to center the graph around.
|
|
79
|
+
- `qpath_max`: Maximum allowed sum of q-values along any path.
|
|
80
|
+
|
|
81
|
+
### Optional arguments:
|
|
82
|
+
- `max_radius`: Maximum number of expansion layers around each target (int; default `3`).
|
|
83
|
+
- `fdr_local`: Neighborhood FDR threshold at each radius (list of length `max_radius`; default `[qpath_max]*max_radius`).
|
|
84
|
+
- `custom_nbhd`: Dictionary specifying custom FDR cutoffs for certain features (dict; default `None`).
|
|
85
|
+
- `feature_names`: List of feature names; required if `custom_nbhd` is provided (list of strings).
|
|
86
|
+
- `criterion`: Rule for resolving multiple edges (default `'min'`).
|
|
87
|
+
- `selector`: Feature importance method used by IPSS (str; default `'gb'`). Options:
|
|
88
|
+
- `'gb'`: Gradient boosting
|
|
89
|
+
- `'l1'`: L1-regularized regression (lasso)
|
|
90
|
+
- `'rf'`: Random forest
|
|
91
|
+
- Custom function (see `ipss_args`)
|
|
92
|
+
- `ipss_args`: Dictionary of arguments to pass to `ipss` (dict; default `None`)
|
|
93
|
+
- `verbose`: Whether to print progress during selection (bool; default `False`)
|
|
94
|
+
|
|
95
|
+
## Graph plotting
|
|
96
|
+
|
|
97
|
+
Use `plot_graph` to visualize a local graph up to the specified `radius` around one or more target features.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from localgraph import plot_graph
|
|
101
|
+
|
|
102
|
+
# Plot local graph around target_features using output Q from pfs
|
|
103
|
+
plot_graph(graph=Q, target_features=target_features, radius=3)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Features and customization
|
|
107
|
+
`plot_graph` visualizes a local graph of a user-specified radius around one or more target features. It supports:
|
|
108
|
+
- Flexible input formats: edge dictionary, adjacency matrix, or NetworkX graph
|
|
109
|
+
- Automatic subgraph extraction around the targets
|
|
110
|
+
- Node coloring by distance from the target(s) (default), or user-specified colors (e.g., by variable type)
|
|
111
|
+
- Several layout algorithms (`'kamada_kawai'`, `'spring'`, `'circular'`, etc.)
|
|
112
|
+
- Customizable node size, font sizes, and edge thickness
|
|
113
|
+
- Optional display of q-values; edge widths can reflect q-value strength (`edge_widths='q_value'`)
|
|
114
|
+
- False positives shown in red if the true graph is provided
|
|
115
|
+
- Integration with custom plots via `ax` or `pos`
|
|
116
|
+
- Optional saving of figures (`save_fig`) and graphs (`save_graph`)
|
|
117
|
+
|
|
118
|
+
For a full list of arguments, see the [`plot_graph`](./localgraph/plotting/plot_graph.py) docstring.
|
|
119
|
+
|
|
120
|
+
### Returns
|
|
121
|
+
The function returns a dictionary containing:
|
|
122
|
+
- `feature_radius_list`: List of `(feature name, radius)` pairs for all nodes in the graph.
|
|
123
|
+
- `graph`: The NetworkX subgraph used for plotting.
|
|
124
|
+
- `positions`: Dictionary of node coordinates.
|
|
125
|
+
- `figure`: The matplotlib figure object (only if the function creates the figure).
|
|
126
|
+
|
|
127
|
+
### Further customization
|
|
128
|
+
|
|
129
|
+
To manually adjust node positions for publication-quality figures, you can export graphs to [**Gephi**](https://gephi.org/), edit them interactively, and re-import the updated layout into Python. See: [gephi_instructions.md](./gephi_instructions.md) for a full walkthrough.
|
|
130
|
+
|
|
131
|
+
## Examples
|
|
132
|
+
|
|
133
|
+
The `examples/` folder contains scripts that demonstrate end-to-end usage:
|
|
134
|
+
|
|
135
|
+
- `simple_example.py`: Simulate data, run PFS, and visualize the result.
|
|
136
|
+
|
|
137
|
+
## Evaluation tools
|
|
138
|
+
|
|
139
|
+
The `evaluation/` folder contains helper functions for measuring subgraph recovery in simulation settings.
|
|
140
|
+
|
|
141
|
+
- The `eval.py` script contains two functions:
|
|
142
|
+
- `subgraph_within_radius`: Extract true subgraph around a target node (useful for identifying subgraphs within full graphs)
|
|
143
|
+
- `tp_and_fp`: Count true and false positives compared to ground truth
|
|
144
|
+
|
|
145
|
+
These are useful for benchmarking PFS and other graph estimation methods when the true graph is known.
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
localgraph/__init__.py
|
|
5
|
+
localgraph.egg-info/PKG-INFO
|
|
6
|
+
localgraph.egg-info/SOURCES.txt
|
|
7
|
+
localgraph.egg-info/dependency_links.txt
|
|
8
|
+
localgraph.egg-info/requires.txt
|
|
9
|
+
localgraph.egg-info/top_level.txt
|
|
10
|
+
localgraph/evaluation/__init__.py
|
|
11
|
+
localgraph/evaluation/eval.py
|
|
12
|
+
localgraph/examples/__init__.py
|
|
13
|
+
localgraph/examples/simple_example.py
|
|
14
|
+
localgraph/pfs/__init__.py
|
|
15
|
+
localgraph/pfs/helpers.py
|
|
16
|
+
localgraph/pfs/main.py
|
|
17
|
+
localgraph/plotting/__init__.py
|
|
18
|
+
localgraph/plotting/helpers.py
|
|
19
|
+
localgraph/plotting/plot_graph.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
localgraph
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name='localgraph',
|
|
5
|
+
version='0.1.0',
|
|
6
|
+
author='Omar Melikechi',
|
|
7
|
+
author_email='omar.melikechi@gmail.com',
|
|
8
|
+
description='Local graph estimation with pathwise feature selection',
|
|
9
|
+
long_description=open('README.md').read(),
|
|
10
|
+
long_description_content_type='text/markdown',
|
|
11
|
+
packages=find_packages(include=["localgraph", "localgraph.*"]),
|
|
12
|
+
install_requires=[
|
|
13
|
+
'ipss>=1.1.1',
|
|
14
|
+
'matplotlib>=3.0.0',
|
|
15
|
+
'networkx>=2.0',
|
|
16
|
+
'numpy>=1.16.0',
|
|
17
|
+
],
|
|
18
|
+
python_requires='>=3.6',
|
|
19
|
+
include_package_data=True,
|
|
20
|
+
classifiers=[
|
|
21
|
+
'Programming Language :: Python :: 3',
|
|
22
|
+
'License :: OSI Approved :: MIT License',
|
|
23
|
+
'Operating System :: OS Independent',
|
|
24
|
+
],
|
|
25
|
+
)
|