pyfglt 0.3.0__cp39-cp39-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyfglt/__init__.py +0 -0
- pyfglt/_fglt_c.cp39-win_amd64.dll.a +0 -0
- pyfglt/_fglt_c.cp39-win_amd64.pyd +0 -0
- pyfglt/fglt.py +201 -0
- pyfglt-0.3.0.data/headers/fglt.hpp +70 -0
- pyfglt-0.3.0.dist-info/METADATA +132 -0
- pyfglt-0.3.0.dist-info/RECORD +8 -0
- pyfglt-0.3.0.dist-info/WHEEL +4 -0
pyfglt/__init__.py
ADDED
File without changes
|
Binary file
|
Binary file
|
pyfglt/fglt.py
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
import numpy
|
2
|
+
import scipy.sparse as sp
|
3
|
+
from scipy.sparse import csc_matrix, issparse
|
4
|
+
|
5
|
+
import networkx as nx
|
6
|
+
import numpy as np
|
7
|
+
from typeguard import typechecked
|
8
|
+
from typing import Union
|
9
|
+
|
10
|
+
from pyfglt import _fglt_c
|
11
|
+
|
12
|
+
import pandas as pd
|
13
|
+
|
14
|
+
COLUMNS = [
|
15
|
+
"[0] vertex (==1)",
|
16
|
+
"[1] degree",
|
17
|
+
"[2] 2-path",
|
18
|
+
"[3] bifork",
|
19
|
+
"[4] 3-cycle",
|
20
|
+
"[5] 3-path, end",
|
21
|
+
"[6] 3-path, interior",
|
22
|
+
"[7] claw, leaf",
|
23
|
+
"[8] claw, root",
|
24
|
+
"[9] paw, handle",
|
25
|
+
"[10] paw, base",
|
26
|
+
"[11] paw, center",
|
27
|
+
"[12] 4-cycle",
|
28
|
+
"[13] diamond, off-cord",
|
29
|
+
"[14] diamond, on-cord",
|
30
|
+
"[15] 4-clique",
|
31
|
+
]
|
32
|
+
|
33
|
+
@typechecked
|
34
|
+
def compute(A: Union[nx.Graph, csc_matrix], raw: bool = False) -> Union[pd.DataFrame, tuple[pd.DataFrame, pd.DataFrame]]:
|
35
|
+
"""Compute the counts fo the Fast Graphlet Transform.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
A (Union[nx.Graph, csc_matrix]): Either the graph as a `networkx.Graph` object
|
39
|
+
or the adjacency matrix of the graph in `scipy.sparse.csc_matrix` format.
|
40
|
+
raw (bool): If True, return both the raw and the net counts of the graphlets.
|
41
|
+
If False, then return only the normalized counts.
|
42
|
+
Defaults to False.
|
43
|
+
|
44
|
+
Accepts either an undirected, unweighted NetworkX graph or a CSC sparse matrix.
|
45
|
+
If a NetworkX graph is provided, converts it to a CSC adjacency matrix.
|
46
|
+
If a CSC matrix is provided, verifies that it is unweighted and symmetric.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
F (DataFrame): A dataframe with the net counts of the graphlets.
|
50
|
+
F_raw (DataFrame): A dataframe with the raw counts of the graphlets (if raw=True).
|
51
|
+
"""
|
52
|
+
|
53
|
+
# If input is a NetworkX graph
|
54
|
+
if isinstance(A, nx.Graph):
|
55
|
+
# Ensure it's undirected
|
56
|
+
if A.is_directed():
|
57
|
+
raise ValueError("Graph must be undirected.")
|
58
|
+
|
59
|
+
# Convert to adjacency matrix in CSC format
|
60
|
+
adj_matrix = nx.adjacency_matrix(A)
|
61
|
+
csc_adj = adj_matrix.tocsc()
|
62
|
+
|
63
|
+
# If input is already a CSC matrix
|
64
|
+
elif issparse(A) and isinstance(A, csc_matrix):
|
65
|
+
csc_adj = A # Use directly
|
66
|
+
|
67
|
+
# Ensure symmetry (A == A.T)
|
68
|
+
if not (abs(csc_adj - csc_adj.T)).nnz == 0:
|
69
|
+
raise ValueError("CSC matrix must be symmetric (undirected graph).")
|
70
|
+
|
71
|
+
# Ensure unweighted (all elements are 0 or 1)
|
72
|
+
if not np.all(np.isin(csc_adj.data, [0, 1])):
|
73
|
+
raise ValueError("CSC matrix must be unweighted (contain only 0s and 1s).")
|
74
|
+
|
75
|
+
else:
|
76
|
+
raise TypeError("Input must be either a NetworkX undirected graph or a CSC matrix.")
|
77
|
+
|
78
|
+
f, fn = _fglt_c.count(csc_adj)
|
79
|
+
|
80
|
+
# cast f and fn to int64
|
81
|
+
f = f.astype(numpy.int64)
|
82
|
+
fn = fn.astype(numpy.int64)
|
83
|
+
|
84
|
+
# transpose f and fn
|
85
|
+
f = f.T
|
86
|
+
fn = fn.T
|
87
|
+
|
88
|
+
# transform to dataframe
|
89
|
+
F = pd.DataFrame(f, columns=COLUMNS)
|
90
|
+
FN = pd.DataFrame(fn, columns=COLUMNS)
|
91
|
+
|
92
|
+
# set index name to "Node id (0-based)"
|
93
|
+
F.index.name = "Node id (0-based)"
|
94
|
+
FN.index.name = "Node id (0-based)"
|
95
|
+
|
96
|
+
if raw:
|
97
|
+
return FN, F
|
98
|
+
else:
|
99
|
+
return FN
|
100
|
+
|
101
|
+
@typechecked
|
102
|
+
def compute_rgf_distance(df_g1:pd.DataFrame, df_g2:pd.DataFrame) -> float:
|
103
|
+
"""Relative Graphlet Frequency (RGF)
|
104
|
+
|
105
|
+
Args:
|
106
|
+
df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
|
107
|
+
df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).
|
108
|
+
|
109
|
+
Compute the Relative Graphlet Frequency (RGF) distance between two graphs
|
110
|
+
represented by DataFrames of orbit counts.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
d (float): The RGF distance between the two graphs.
|
114
|
+
"""
|
115
|
+
|
116
|
+
# Sum of orbit counts across all vertices for each orbit
|
117
|
+
orbit_sums_g1 = df_g1.sum(axis=0) # Series of length = number_of_orbits
|
118
|
+
orbit_sums_g2 = df_g2.sum(axis=0)
|
119
|
+
|
120
|
+
# Compute total counts
|
121
|
+
total_g1 = orbit_sums_g1.sum()
|
122
|
+
total_g2 = orbit_sums_g2.sum()
|
123
|
+
|
124
|
+
# Relative frequencies for each orbit
|
125
|
+
rel_freq_g1 = orbit_sums_g1 / total_g1 if total_g1 != 0 else orbit_sums_g1 * 0
|
126
|
+
rel_freq_g2 = orbit_sums_g2 / total_g2 if total_g2 != 0 else orbit_sums_g2 * 0
|
127
|
+
|
128
|
+
# RGF distance = sum of absolute differences
|
129
|
+
rgf_distance = np.sum(np.abs(rel_freq_g1 - rel_freq_g2))
|
130
|
+
return rgf_distance
|
131
|
+
|
132
|
+
|
133
|
+
@typechecked
|
134
|
+
def compute_graphlet_correlation_matrix(df_g:pd.DataFrame, method='spearman'):
|
135
|
+
"""
|
136
|
+
Compute the Graphlet Correlation Matrix (GCM) for a single graph.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
df_g (pd.DataFrame): Orbit counts for a graph (rows=vertices, columns=orbits).
|
140
|
+
method (str) Correlation method. Can be 'pearson', 'spearman', or 'kendall'.
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
C (pd.DataFrame): Correlation matrix of shape (n_orbits, n_orbits).
|
144
|
+
"""
|
145
|
+
return df_g.iloc[:,1:].corr(method=method)
|
146
|
+
|
147
|
+
|
148
|
+
@typechecked
|
149
|
+
def gcm_distance(gcm1:pd.DataFrame, gcm2:pd.DataFrame):
|
150
|
+
"""
|
151
|
+
Compute a simple distance between two correlation matrices.
|
152
|
+
For instance, the sum of absolute differences (L1 distance).
|
153
|
+
|
154
|
+
Args:
|
155
|
+
gcm1 (pd.DataFrame): GCM of the first graph
|
156
|
+
gcm2 (pd.DataFrame): GCM of the second graph
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
d (float): A distance measure between the two GCMs.
|
160
|
+
"""
|
161
|
+
diff = gcm1.values - gcm2.values
|
162
|
+
return np.sum(np.abs(diff))
|
163
|
+
|
164
|
+
|
165
|
+
@typechecked
|
166
|
+
def compute_gdd_agreement(df_g1: pd.DataFrame, df_g2:pd.DataFrame, bins=None):
|
167
|
+
"""
|
168
|
+
Compute Graphlet Degree Distribution (GDD) agreement between two graphs.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
|
172
|
+
df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).
|
173
|
+
bins (Union[int, sequence]): Bins for histogram. If None, will try an automatic approach.
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
s (float): The GDD agreement in [0, 1].
|
177
|
+
"""
|
178
|
+
n_orbits = df_g1.shape[1]
|
179
|
+
# We assume df_g1 and df_g2 have the same shape: #orbits = n_orbits
|
180
|
+
|
181
|
+
# We can find a reasonable range for all orbit degrees combined
|
182
|
+
combined_max = max(df_g1.values.max(), df_g2.values.max())
|
183
|
+
if bins is None:
|
184
|
+
# We'll bin from 0 up to the max count + 1
|
185
|
+
bins = np.arange(0, combined_max + 2) - 0.5 # so that each integer is its own bin
|
186
|
+
|
187
|
+
overlaps = []
|
188
|
+
|
189
|
+
for orbit_col in df_g1.columns:
|
190
|
+
# Distribution for Graph 1, orbit_col
|
191
|
+
hist_g1, _ = np.histogram(df_g1[orbit_col], bins=bins, density=True)
|
192
|
+
# Distribution for Graph 2, orbit_col
|
193
|
+
hist_g2, _ = np.histogram(df_g2[orbit_col], bins=bins, density=True)
|
194
|
+
|
195
|
+
# Overlap for this orbit
|
196
|
+
overlap = np.sum(np.minimum(hist_g1, hist_g2))
|
197
|
+
overlaps.append(overlap)
|
198
|
+
|
199
|
+
# Average overlap across orbits
|
200
|
+
gdd_agreement = np.mean(overlaps)
|
201
|
+
return gdd_agreement
|
@@ -0,0 +1,70 @@
|
|
1
|
+
/*!
|
2
|
+
\file fglt.hpp
|
3
|
+
\brief Header file containing basic function definitinos for FGlT
|
4
|
+
|
5
|
+
\author Dimitris Floros
|
6
|
+
\date 2020-08-18
|
7
|
+
*/
|
8
|
+
|
9
|
+
|
10
|
+
#ifndef FGLT_H_
|
11
|
+
#define FGLT_H_
|
12
|
+
|
13
|
+
#include <stdio.h>
|
14
|
+
#include <cstdlib>
|
15
|
+
#include <cmath>
|
16
|
+
#include <ctype.h>
|
17
|
+
|
18
|
+
#include <sys/time.h>
|
19
|
+
|
20
|
+
// type definitions
|
21
|
+
#ifdef MX_COMPAT_32
|
22
|
+
typedef int mwSize;
|
23
|
+
typedef int mwIndex;
|
24
|
+
#else
|
25
|
+
typedef size_t mwSize; /* unsigned pointer-width integer */
|
26
|
+
typedef size_t mwIndex; /* unsigned pointer-width integer */
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#define NGRAPHLET 16
|
30
|
+
|
31
|
+
|
32
|
+
#ifdef __cplusplus
|
33
|
+
extern "C" {
|
34
|
+
#endif
|
35
|
+
|
36
|
+
/*!
|
37
|
+
* \brief Get the number of parallel workers available.
|
38
|
+
*
|
39
|
+
* \return The number of workers available. Always 1 when built without Cilk support.
|
40
|
+
*/
|
41
|
+
int getWorkers();
|
42
|
+
|
43
|
+
/*!
|
44
|
+
* \brief Perform the FGLT transform.
|
45
|
+
*
|
46
|
+
* \param f [out] An array-of-pointers of size (n, 16) where the raw frequencies should be stored.
|
47
|
+
* \param fn [out] An array-of-pointers of size (n, 16) where the net frequencies should be stored.
|
48
|
+
* \param ii [in] The column indices of the adjacency matrix.
|
49
|
+
* \param jStart [in] The first non-zero row index of each column.
|
50
|
+
* \param n [in] The number of columns of the adjacency matrix.
|
51
|
+
* \param m [in] The number of nonzero elements in the adjacency matrix.
|
52
|
+
* \param np [in] The number of parallel workers to use for the transform.
|
53
|
+
* \return status (0: success, otherwise error)
|
54
|
+
*/
|
55
|
+
int compute
|
56
|
+
(
|
57
|
+
double ** const f,
|
58
|
+
double ** const fn,
|
59
|
+
mwIndex *ii,
|
60
|
+
mwIndex *jStart,
|
61
|
+
mwSize n,
|
62
|
+
mwSize m,
|
63
|
+
mwSize np
|
64
|
+
);
|
65
|
+
|
66
|
+
#ifdef __cplusplus
|
67
|
+
}
|
68
|
+
#endif
|
69
|
+
|
70
|
+
#endif /* FGLT_H_ */
|
@@ -0,0 +1,132 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: pyfglt
|
3
|
+
Version: 0.3.0
|
4
|
+
Summary: Python package/wrapper of Fast Graphlet Transform
|
5
|
+
Author: Dimitris Floros
|
6
|
+
License: Apache-2.0
|
7
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
8
|
+
Project-URL: Homepage, https://github.com/fcdimitr/pyfglt
|
9
|
+
Project-URL: Changelog, https://github.com/fcdimitr/pyfglt/releases
|
10
|
+
Project-URL: Issues, https://github.com/fcdimitr/pyfglt/issues
|
11
|
+
Project-URL: CI, https://github.com/fcdimitr/pyfglt/actions
|
12
|
+
Project-URL: Documentation, https://fcdimitr.github.io/pyfglt/
|
13
|
+
Requires-Python: >=3.8
|
14
|
+
Requires-Dist: numpy>=1.19.5
|
15
|
+
Requires-Dist: scipy>=1.7.3
|
16
|
+
Requires-Dist: pandas>=2.2.0
|
17
|
+
Requires-Dist: networkx>=3.2.1
|
18
|
+
Requires-Dist: typeguard>=4.4.1
|
19
|
+
Provides-Extra: test
|
20
|
+
Requires-Dist: pytest; extra == "test"
|
21
|
+
Provides-Extra: docs
|
22
|
+
Requires-Dist: mkdocs-material>=9.5.50; extra == "docs"
|
23
|
+
Requires-Dist: mkdocstrings>=0.27.0; extra == "docs"
|
24
|
+
Requires-Dist: mkdocstrings-python>=1.13.0; extra == "docs"
|
25
|
+
Requires-Dist: matplotlib>=3.10.0; extra == "docs"
|
26
|
+
Requires-Dist: jupyter>=1.1.1; extra == "docs"
|
27
|
+
Requires-Dist: tabulate>=0.9.0; extra == "docs"
|
28
|
+
Requires-Dist: seaborn>=0.13.2; extra == "docs"
|
29
|
+
Description-Content-Type: text/markdown
|
30
|
+
|
31
|
+
# pyfglt
|
32
|
+
|
33
|
+
[data:image/s3,"s3://crabby-images/6d2e6/6d2e6d2f845d57696a579184b134b18f39f7e4e4" alt="Documentation"](https://fcdimitr.github.io/pyfglt/)
|
34
|
+
[data:image/s3,"s3://crabby-images/07dd6/07dd61cf42cc2093a0dc34cf28c7fb182a7eea6b" alt="PyPI"](https://pypi.org/project/pyfglt/)
|
35
|
+
[data:image/s3,"s3://crabby-images/02dda/02dda263b71db28dfbfc6d2616c0a209a8045725" alt="Tests"](https://github.com/fcdimitr/pyfglt/actions/workflows/test.yml)
|
36
|
+
[data:image/s3,"s3://crabby-images/c4a39/c4a397886cb2e6ffd81a6088511b75619275bc53" alt="Changelog"](https://github.com/fcdimitr/pyfglt/releases)
|
37
|
+
[data:image/s3,"s3://crabby-images/aa420/aa420734b0d1d68631203d98bcd37807119dbfa6" alt="License"](https://github.com/fcdimitr/pyfglt/blob/main/LICENSE)
|
38
|
+
|
39
|
+
Python package/wrapper of Fast Graphlet Transform. See the [documentation
|
40
|
+
overview](https://fcdimitr.github.io/pyfglt/) for more information.
|
41
|
+
|
42
|
+
## Installation
|
43
|
+
|
44
|
+
Install this library using `pip`:
|
45
|
+
```bash
|
46
|
+
pip install pyfglt
|
47
|
+
```
|
48
|
+
## Usage
|
49
|
+
|
50
|
+
See the examples under:
|
51
|
+
|
52
|
+
- [Getting started](https://fcdimitr.github.io/pyfglt/tutorial/01-getting-started)
|
53
|
+
- [Advanced usage](https://fcdimitr.github.io/pyfglt/tutorial/02-graphlet-based-network-properties)
|
54
|
+
|
55
|
+
## Citation
|
56
|
+
|
57
|
+
If you use this package, please cite this paper:
|
58
|
+
|
59
|
+
```bibtex
|
60
|
+
@article{fglt,
|
61
|
+
author = {Floros, Dimitris and Pitsianis, Nikos and Sun, Xiaobai},
|
62
|
+
journal = {IEEE HPEC},
|
63
|
+
pages = {1--8},
|
64
|
+
title = {{Fast graphlet transform of sparse graphs}},
|
65
|
+
year = {2020}
|
66
|
+
}
|
67
|
+
```
|
68
|
+
|
69
|
+
## **How to Contribute?**
|
70
|
+
If you’d like to contribute, feel free to submit a PR! 🚀
|
71
|
+
|
72
|
+
|
73
|
+
To contribute to this library, first checkout the code. Then create a new virtual environment:
|
74
|
+
```bash
|
75
|
+
cd pyfglt
|
76
|
+
python -m venv venv
|
77
|
+
source venv/bin/activate
|
78
|
+
```
|
79
|
+
Now install the dependencies and test dependencies:
|
80
|
+
```bash
|
81
|
+
python -m pip install -e '.[test]'
|
82
|
+
```
|
83
|
+
To run the tests:
|
84
|
+
```bash
|
85
|
+
python -m pytest
|
86
|
+
```
|
87
|
+
|
88
|
+
## **Contributors**
|
89
|
+
|
90
|
+
Below is a categorized list of contributors.
|
91
|
+
|
92
|
+
### **Design and Development**
|
93
|
+
|
94
|
+
<table>
|
95
|
+
<tr>
|
96
|
+
<td align="center"><a href="https://github.com/fcdimitr"><img src="https://github.com/fcdimitr.png" width="60px;" alt=""/><br /><sub><b>Dimitris Floros<br/>@fcdimitr</b></sub></a></td>
|
97
|
+
<td align="center"><a href="https://github.com/pitsianis"><img src="https://github.com/pitsianis.png" width="60px;" alt=""/><br /><sub><b>Nikos Pitsianis<br/>@pitsianis</b></sub></a></td>
|
98
|
+
<td align="center"><a href="https://spire.duke.edu/xiaobai-sun"><img src="https://spire.duke.edu/sites/spire.duke.edu/files/styles/square_small/public/2022-08/xiaobai%20%281%29.jpg?h=735f7c84&itok=mUth7Zqn" width="60px;" alt=""/><br /><sub><b>Xiaobai Sun<br/> _ </b></sub></a></td>
|
99
|
+
</tr>
|
100
|
+
</table>
|
101
|
+
|
102
|
+
---
|
103
|
+
|
104
|
+
### **Development of Python & Julia Wrappers**
|
105
|
+
|
106
|
+
<table>
|
107
|
+
<tr>
|
108
|
+
<td align="center"><a href="https://github.com/nsailor"><img src="https://github.com/nsailor.png" width="60px;" alt=""/><br /><sub><b>Jason Barmparesos<br/>@nsailor</b></sub></a></td>
|
109
|
+
<td align="center"><a href="https://github.com/kitsiosk"><img src="https://github.com/kitsiosk.png" width="60px;" alt=""/><br /><sub><b>Kitsios Konstantinos<br/>@kitsiosk</b></sub></a></td>
|
110
|
+
</tr>
|
111
|
+
</table>
|
112
|
+
|
113
|
+
---
|
114
|
+
|
115
|
+
### **Helpful Comments and Bug Fixes**
|
116
|
+
|
117
|
+
<table>
|
118
|
+
<tr>
|
119
|
+
<td align="center"><a href="https://github.com/georgebisbas"><img src="https://github.com/georgebisbas.png" width="60px;" alt=""/><br /><sub><b>George Bisbas<br/>@georgebisbas</b></sub></a></td>
|
120
|
+
</tr>
|
121
|
+
</table>
|
122
|
+
|
123
|
+
<!-- ---
|
124
|
+
|
125
|
+
### **🫶 We Also Thank the Following People**
|
126
|
+
We appreciate the valuable feedback, discussions, and testing provided by:
|
127
|
+
|
128
|
+
- [@username7](https://github.com/username7)
|
129
|
+
- [@username8](https://github.com/username8)
|
130
|
+
- [@username9](https://github.com/username9) -->
|
131
|
+
|
132
|
+
---
|
@@ -0,0 +1,8 @@
|
|
1
|
+
pyfglt-0.3.0.dist-info/METADATA,sha256=AYlqNaZIiEU6iRpepqLnLAsriYntjTaSHVVTWmsVT_k,4782
|
2
|
+
pyfglt-0.3.0.dist-info/WHEEL,sha256=8AdrFzOtKQ6LLJ-VyqCU3y1iN8N--fMXYqrdkeTKDn0,83
|
3
|
+
pyfglt/_fglt_c.cp39-win_amd64.pyd,sha256=4DRZAAnnK5PGtQ3uLiUxb9E40yDG37hJ8L1ZPmT5CoM,252083
|
4
|
+
pyfglt/_fglt_c.cp39-win_amd64.dll.a,sha256=uTVkOciL9PGQZTHhucrU_n12ErSbz_ERNJIrzUNwUUg,1714
|
5
|
+
pyfglt/fglt.py,sha256=HpfrvKVVNiNuyDCRBAN0L0DLuGSIGJZqmwEsdpaYGC0,6857
|
6
|
+
pyfglt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
pyfglt-0.3.0.data/headers/fglt.hpp,sha256=y-7XcVVNLwVXY-nNQfwGGiw8QkMWQ_stgaLeDzlWzv0,1648
|
8
|
+
pyfglt-0.3.0.dist-info/RECORD,,
|