pyfglt 0.3.0__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
pyfglt/__init__.py ADDED
File without changes
pyfglt/fglt.py ADDED
@@ -0,0 +1,201 @@
1
+ import numpy
2
+ import scipy.sparse as sp
3
+ from scipy.sparse import csc_matrix, issparse
4
+
5
+ import networkx as nx
6
+ import numpy as np
7
+ from typeguard import typechecked
8
+ from typing import Union
9
+
10
+ from pyfglt import _fglt_c
11
+
12
+ import pandas as pd
13
+
14
+ COLUMNS = [
15
+ "[0] vertex (==1)",
16
+ "[1] degree",
17
+ "[2] 2-path",
18
+ "[3] bifork",
19
+ "[4] 3-cycle",
20
+ "[5] 3-path, end",
21
+ "[6] 3-path, interior",
22
+ "[7] claw, leaf",
23
+ "[8] claw, root",
24
+ "[9] paw, handle",
25
+ "[10] paw, base",
26
+ "[11] paw, center",
27
+ "[12] 4-cycle",
28
+ "[13] diamond, off-cord",
29
+ "[14] diamond, on-cord",
30
+ "[15] 4-clique",
31
+ ]
32
+
33
+ @typechecked
34
+ def compute(A: Union[nx.Graph, csc_matrix], raw: bool = False) -> Union[pd.DataFrame, tuple[pd.DataFrame, pd.DataFrame]]:
35
+ """Compute the counts fo the Fast Graphlet Transform.
36
+
37
+ Args:
38
+ A (Union[nx.Graph, csc_matrix]): Either the graph as a `networkx.Graph` object
39
+ or the adjacency matrix of the graph in `scipy.sparse.csc_matrix` format.
40
+ raw (bool): If True, return both the raw and the net counts of the graphlets.
41
+ If False, then return only the normalized counts.
42
+ Defaults to False.
43
+
44
+ Accepts either an undirected, unweighted NetworkX graph or a CSC sparse matrix.
45
+ If a NetworkX graph is provided, converts it to a CSC adjacency matrix.
46
+ If a CSC matrix is provided, verifies that it is unweighted and symmetric.
47
+
48
+ Returns:
49
+ F (DataFrame): A dataframe with the net counts of the graphlets.
50
+ F_raw (DataFrame): A dataframe with the raw counts of the graphlets (if raw=True).
51
+ """
52
+
53
+ # If input is a NetworkX graph
54
+ if isinstance(A, nx.Graph):
55
+ # Ensure it's undirected
56
+ if A.is_directed():
57
+ raise ValueError("Graph must be undirected.")
58
+
59
+ # Convert to adjacency matrix in CSC format
60
+ adj_matrix = nx.adjacency_matrix(A)
61
+ csc_adj = adj_matrix.tocsc()
62
+
63
+ # If input is already a CSC matrix
64
+ elif issparse(A) and isinstance(A, csc_matrix):
65
+ csc_adj = A # Use directly
66
+
67
+ # Ensure symmetry (A == A.T)
68
+ if not (abs(csc_adj - csc_adj.T)).nnz == 0:
69
+ raise ValueError("CSC matrix must be symmetric (undirected graph).")
70
+
71
+ # Ensure unweighted (all elements are 0 or 1)
72
+ if not np.all(np.isin(csc_adj.data, [0, 1])):
73
+ raise ValueError("CSC matrix must be unweighted (contain only 0s and 1s).")
74
+
75
+ else:
76
+ raise TypeError("Input must be either a NetworkX undirected graph or a CSC matrix.")
77
+
78
+ f, fn = _fglt_c.count(csc_adj)
79
+
80
+ # cast f and fn to int64
81
+ f = f.astype(numpy.int64)
82
+ fn = fn.astype(numpy.int64)
83
+
84
+ # transpose f and fn
85
+ f = f.T
86
+ fn = fn.T
87
+
88
+ # transform to dataframe
89
+ F = pd.DataFrame(f, columns=COLUMNS)
90
+ FN = pd.DataFrame(fn, columns=COLUMNS)
91
+
92
+ # set index name to "Node id (0-based)"
93
+ F.index.name = "Node id (0-based)"
94
+ FN.index.name = "Node id (0-based)"
95
+
96
+ if raw:
97
+ return FN, F
98
+ else:
99
+ return FN
100
+
101
+ @typechecked
102
+ def compute_rgf_distance(df_g1:pd.DataFrame, df_g2:pd.DataFrame) -> float:
103
+ """Relative Graphlet Frequency (RGF)
104
+
105
+ Args:
106
+ df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
107
+ df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).
108
+
109
+ Compute the Relative Graphlet Frequency (RGF) distance between two graphs
110
+ represented by DataFrames of orbit counts.
111
+
112
+ Returns:
113
+ d (float): The RGF distance between the two graphs.
114
+ """
115
+
116
+ # Sum of orbit counts across all vertices for each orbit
117
+ orbit_sums_g1 = df_g1.sum(axis=0) # Series of length = number_of_orbits
118
+ orbit_sums_g2 = df_g2.sum(axis=0)
119
+
120
+ # Compute total counts
121
+ total_g1 = orbit_sums_g1.sum()
122
+ total_g2 = orbit_sums_g2.sum()
123
+
124
+ # Relative frequencies for each orbit
125
+ rel_freq_g1 = orbit_sums_g1 / total_g1 if total_g1 != 0 else orbit_sums_g1 * 0
126
+ rel_freq_g2 = orbit_sums_g2 / total_g2 if total_g2 != 0 else orbit_sums_g2 * 0
127
+
128
+ # RGF distance = sum of absolute differences
129
+ rgf_distance = np.sum(np.abs(rel_freq_g1 - rel_freq_g2))
130
+ return rgf_distance
131
+
132
+
133
+ @typechecked
134
+ def compute_graphlet_correlation_matrix(df_g:pd.DataFrame, method='spearman'):
135
+ """
136
+ Compute the Graphlet Correlation Matrix (GCM) for a single graph.
137
+
138
+ Args:
139
+ df_g (pd.DataFrame): Orbit counts for a graph (rows=vertices, columns=orbits).
140
+ method (str) Correlation method. Can be 'pearson', 'spearman', or 'kendall'.
141
+
142
+ Returns:
143
+ C (pd.DataFrame): Correlation matrix of shape (n_orbits, n_orbits).
144
+ """
145
+ return df_g.iloc[:,1:].corr(method=method)
146
+
147
+
148
+ @typechecked
149
+ def gcm_distance(gcm1:pd.DataFrame, gcm2:pd.DataFrame):
150
+ """
151
+ Compute a simple distance between two correlation matrices.
152
+ For instance, the sum of absolute differences (L1 distance).
153
+
154
+ Args:
155
+ gcm1 (pd.DataFrame): GCM of the first graph
156
+ gcm2 (pd.DataFrame): GCM of the second graph
157
+
158
+ Returns:
159
+ d (float): A distance measure between the two GCMs.
160
+ """
161
+ diff = gcm1.values - gcm2.values
162
+ return np.sum(np.abs(diff))
163
+
164
+
165
+ @typechecked
166
+ def compute_gdd_agreement(df_g1: pd.DataFrame, df_g2:pd.DataFrame, bins=None):
167
+ """
168
+ Compute Graphlet Degree Distribution (GDD) agreement between two graphs.
169
+
170
+ Args:
171
+ df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
172
+ df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).
173
+ bins (Union[int, sequence]): Bins for histogram. If None, will try an automatic approach.
174
+
175
+ Returns:
176
+ s (float): The GDD agreement in [0, 1].
177
+ """
178
+ n_orbits = df_g1.shape[1]
179
+ # We assume df_g1 and df_g2 have the same shape: #orbits = n_orbits
180
+
181
+ # We can find a reasonable range for all orbit degrees combined
182
+ combined_max = max(df_g1.values.max(), df_g2.values.max())
183
+ if bins is None:
184
+ # We'll bin from 0 up to the max count + 1
185
+ bins = np.arange(0, combined_max + 2) - 0.5 # so that each integer is its own bin
186
+
187
+ overlaps = []
188
+
189
+ for orbit_col in df_g1.columns:
190
+ # Distribution for Graph 1, orbit_col
191
+ hist_g1, _ = np.histogram(df_g1[orbit_col], bins=bins, density=True)
192
+ # Distribution for Graph 2, orbit_col
193
+ hist_g2, _ = np.histogram(df_g2[orbit_col], bins=bins, density=True)
194
+
195
+ # Overlap for this orbit
196
+ overlap = np.sum(np.minimum(hist_g1, hist_g2))
197
+ overlaps.append(overlap)
198
+
199
+ # Average overlap across orbits
200
+ gdd_agreement = np.mean(overlaps)
201
+ return gdd_agreement
@@ -0,0 +1,70 @@
1
+ /*!
2
+ \file fglt.hpp
3
+ \brief Header file containing basic function definitinos for FGlT
4
+
5
+ \author Dimitris Floros
6
+ \date 2020-08-18
7
+ */
8
+
9
+
10
+ #ifndef FGLT_H_
11
+ #define FGLT_H_
12
+
13
+ #include <stdio.h>
14
+ #include <cstdlib>
15
+ #include <cmath>
16
+ #include <ctype.h>
17
+
18
+ #include <sys/time.h>
19
+
20
+ // type definitions
21
+ #ifdef MX_COMPAT_32
22
+ typedef int mwSize;
23
+ typedef int mwIndex;
24
+ #else
25
+ typedef size_t mwSize; /* unsigned pointer-width integer */
26
+ typedef size_t mwIndex; /* unsigned pointer-width integer */
27
+ #endif
28
+
29
+ #define NGRAPHLET 16
30
+
31
+
32
+ #ifdef __cplusplus
33
+ extern "C" {
34
+ #endif
35
+
36
+ /*!
37
+ * \brief Get the number of parallel workers available.
38
+ *
39
+ * \return The number of workers available. Always 1 when built without Cilk support.
40
+ */
41
+ int getWorkers();
42
+
43
+ /*!
44
+ * \brief Perform the FGLT transform.
45
+ *
46
+ * \param f [out] An array-of-pointers of size (n, 16) where the raw frequencies should be stored.
47
+ * \param fn [out] An array-of-pointers of size (n, 16) where the net frequencies should be stored.
48
+ * \param ii [in] The column indices of the adjacency matrix.
49
+ * \param jStart [in] The first non-zero row index of each column.
50
+ * \param n [in] The number of columns of the adjacency matrix.
51
+ * \param m [in] The number of nonzero elements in the adjacency matrix.
52
+ * \param np [in] The number of parallel workers to use for the transform.
53
+ * \return status (0: success, otherwise error)
54
+ */
55
+ int compute
56
+ (
57
+ double ** const f,
58
+ double ** const fn,
59
+ mwIndex *ii,
60
+ mwIndex *jStart,
61
+ mwSize n,
62
+ mwSize m,
63
+ mwSize np
64
+ );
65
+
66
+ #ifdef __cplusplus
67
+ }
68
+ #endif
69
+
70
+ #endif /* FGLT_H_ */
@@ -0,0 +1,132 @@
1
+ Metadata-Version: 2.1
2
+ Name: pyfglt
3
+ Version: 0.3.0
4
+ Summary: Python package/wrapper of Fast Graphlet Transform
5
+ Author: Dimitris Floros
6
+ License: Apache-2.0
7
+ Classifier: License :: OSI Approved :: Apache Software License
8
+ Project-URL: Homepage, https://github.com/fcdimitr/pyfglt
9
+ Project-URL: Changelog, https://github.com/fcdimitr/pyfglt/releases
10
+ Project-URL: Issues, https://github.com/fcdimitr/pyfglt/issues
11
+ Project-URL: CI, https://github.com/fcdimitr/pyfglt/actions
12
+ Project-URL: Documentation, https://fcdimitr.github.io/pyfglt/
13
+ Requires-Python: >=3.8
14
+ Requires-Dist: numpy>=1.19.5
15
+ Requires-Dist: scipy>=1.7.3
16
+ Requires-Dist: pandas>=2.2.0
17
+ Requires-Dist: networkx>=3.2.1
18
+ Requires-Dist: typeguard>=4.4.1
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest; extra == "test"
21
+ Provides-Extra: docs
22
+ Requires-Dist: mkdocs-material>=9.5.50; extra == "docs"
23
+ Requires-Dist: mkdocstrings>=0.27.0; extra == "docs"
24
+ Requires-Dist: mkdocstrings-python>=1.13.0; extra == "docs"
25
+ Requires-Dist: matplotlib>=3.10.0; extra == "docs"
26
+ Requires-Dist: jupyter>=1.1.1; extra == "docs"
27
+ Requires-Dist: tabulate>=0.9.0; extra == "docs"
28
+ Requires-Dist: seaborn>=0.13.2; extra == "docs"
29
+ Description-Content-Type: text/markdown
30
+
31
+ # pyfglt
32
+
33
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://fcdimitr.github.io/pyfglt/)
34
+ [![PyPI](https://img.shields.io/pypi/v/pyfglt.svg)](https://pypi.org/project/pyfglt/)
35
+ [![Tests](https://github.com/fcdimitr/pyfglt/actions/workflows/test.yml/badge.svg)](https://github.com/fcdimitr/pyfglt/actions/workflows/test.yml)
36
+ [![Changelog](https://img.shields.io/github/v/release/fcdimitr/pyfglt?include_prereleases&label=changelog)](https://github.com/fcdimitr/pyfglt/releases)
37
+ [![License](https://img.shields.io/github/license/fcdimitr/pyfglt)](https://github.com/fcdimitr/pyfglt/blob/main/LICENSE)
38
+
39
+ Python package/wrapper of Fast Graphlet Transform. See the [documentation
40
+ overview](https://fcdimitr.github.io/pyfglt/) for more information.
41
+
42
+ ## Installation
43
+
44
+ Install this library using `pip`:
45
+ ```bash
46
+ pip install pyfglt
47
+ ```
48
+ ## Usage
49
+
50
+ See the examples under:
51
+
52
+ - [Getting started](https://fcdimitr.github.io/pyfglt/tutorial/01-getting-started)
53
+ - [Advanced usage](https://fcdimitr.github.io/pyfglt/tutorial/02-graphlet-based-network-properties)
54
+
55
+ ## Citation
56
+
57
+ If you use this package, please cite this paper:
58
+
59
+ ```bibtex
60
+ @article{fglt,
61
+ author = {Floros, Dimitris and Pitsianis, Nikos and Sun, Xiaobai},
62
+ journal = {IEEE HPEC},
63
+ pages = {1--8},
64
+ title = {{Fast graphlet transform of sparse graphs}},
65
+ year = {2020}
66
+ }
67
+ ```
68
+
69
+ ## **How to Contribute?**
70
+ If you’d like to contribute, feel free to submit a PR! 🚀
71
+
72
+
73
+ To contribute to this library, first checkout the code. Then create a new virtual environment:
74
+ ```bash
75
+ cd pyfglt
76
+ python -m venv venv
77
+ source venv/bin/activate
78
+ ```
79
+ Now install the dependencies and test dependencies:
80
+ ```bash
81
+ python -m pip install -e '.[test]'
82
+ ```
83
+ To run the tests:
84
+ ```bash
85
+ python -m pytest
86
+ ```
87
+
88
+ ## **Contributors**
89
+
90
+ Below is a categorized list of contributors.
91
+
92
+ ### **Design and Development**
93
+
94
+ <table>
95
+ <tr>
96
+ <td align="center"><a href="https://github.com/fcdimitr"><img src="https://github.com/fcdimitr.png" width="60px;" alt=""/><br /><sub><b>Dimitris Floros<br/>@fcdimitr</b></sub></a></td>
97
+ <td align="center"><a href="https://github.com/pitsianis"><img src="https://github.com/pitsianis.png" width="60px;" alt=""/><br /><sub><b>Nikos Pitsianis<br/>@pitsianis</b></sub></a></td>
98
+ <td align="center"><a href="https://spire.duke.edu/xiaobai-sun"><img src="https://spire.duke.edu/sites/spire.duke.edu/files/styles/square_small/public/2022-08/xiaobai%20%281%29.jpg?h=735f7c84&itok=mUth7Zqn" width="60px;" alt=""/><br /><sub><b>Xiaobai Sun<br/> _ </b></sub></a></td>
99
+ </tr>
100
+ </table>
101
+
102
+ ---
103
+
104
+ ### **Development of Python & Julia Wrappers**
105
+
106
+ <table>
107
+ <tr>
108
+ <td align="center"><a href="https://github.com/nsailor"><img src="https://github.com/nsailor.png" width="60px;" alt=""/><br /><sub><b>Jason Barmparesos<br/>@nsailor</b></sub></a></td>
109
+ <td align="center"><a href="https://github.com/kitsiosk"><img src="https://github.com/kitsiosk.png" width="60px;" alt=""/><br /><sub><b>Kitsios Konstantinos<br/>@kitsiosk</b></sub></a></td>
110
+ </tr>
111
+ </table>
112
+
113
+ ---
114
+
115
+ ### **Helpful Comments and Bug Fixes**
116
+
117
+ <table>
118
+ <tr>
119
+ <td align="center"><a href="https://github.com/georgebisbas"><img src="https://github.com/georgebisbas.png" width="60px;" alt=""/><br /><sub><b>George Bisbas<br/>@georgebisbas</b></sub></a></td>
120
+ </tr>
121
+ </table>
122
+
123
+ <!-- ---
124
+
125
+ ### **🫶 We Also Thank the Following People**
126
+ We appreciate the valuable feedback, discussions, and testing provided by:
127
+
128
+ - [@username7](https://github.com/username7)
129
+ - [@username8](https://github.com/username8)
130
+ - [@username9](https://github.com/username9) -->
131
+
132
+ ---
@@ -0,0 +1,7 @@
1
+ pyfglt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pyfglt/_fglt_c.cpython-311-x86_64-linux-gnu.so,sha256=9zQ6ZRuOe3B-rCCZ-fQvJ9nThAiuIw2E-_lLQTc-jbA,268640
3
+ pyfglt/fglt.py,sha256=GH72-TBN5JqOefiPPuYcRt9049gLOJaBfuRJQERG7KI,6657
4
+ pyfglt-0.3.0.data/headers/fglt.hpp,sha256=byO4j_x_31vO70FNqANjEz3_lT_4wecNmTHgPbG1-zA,1578
5
+ pyfglt-0.3.0.dist-info/METADATA,sha256=AYlqNaZIiEU6iRpepqLnLAsriYntjTaSHVVTWmsVT_k,4782
6
+ pyfglt-0.3.0.dist-info/WHEEL,sha256=6uXuBuTHKYVHX38njLnDjCYRk1Z5gwaXJtzFqt6LRKw,137
7
+ pyfglt-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: meson
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-manylinux_2_17_x86_64
5
+ Tag: cp311-cp311-manylinux2014_x86_64
6
+