scikit-network 0.33.3__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +229 -0
- scikit_network-0.33.3.dist-info/WHEEL +6 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- scikit_network.libs/libgomp-d22c30c5.so.1.0.0 +0 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpp +27587 -0
- sknetwork/classification/vote.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpp +31578 -0
- sknetwork/clustering/leiden_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpp +31223 -0
- sknetwork/clustering/louvain_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpp +37871 -0
- sknetwork/hierarchy/paris.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpp +27403 -0
- sknetwork/linalg/diteration.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpp +31075 -0
- sknetwork/linalg/push.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpp +9710 -0
- sknetwork/ranking/betweenness.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpp +32568 -0
- sknetwork/topology/cliques.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpp +30654 -0
- sknetwork/topology/core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpp +27335 -0
- sknetwork/topology/minheap.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpp +8897 -0
- sknetwork/topology/triangles.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27638 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.utils.neighbors import get_degrees
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_membership(labels: np.ndarray, dtype=bool, n_labels: Optional[int] = None) -> sparse.csr_matrix:
|
|
17
|
+
"""Build the binary matrix of the label assignments, of shape n_samples x n_labels.
|
|
18
|
+
Negative labels are ignored.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
labels :
|
|
23
|
+
Label of each node (integers).
|
|
24
|
+
dtype :
|
|
25
|
+
Type of the output. Boolean by default.
|
|
26
|
+
n_labels : int
|
|
27
|
+
Number of labels.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
membership : sparse.csr_matrix
|
|
32
|
+
Binary matrix of label assignments.
|
|
33
|
+
|
|
34
|
+
Example
|
|
35
|
+
-------
|
|
36
|
+
>>> from sknetwork.utils import get_membership
|
|
37
|
+
>>> labels = np.array([0, 0, 1, 2])
|
|
38
|
+
>>> membership = get_membership(labels)
|
|
39
|
+
>>> membership.toarray().astype(int)
|
|
40
|
+
array([[1, 0, 0],
|
|
41
|
+
[1, 0, 0],
|
|
42
|
+
[0, 1, 0],
|
|
43
|
+
[0, 0, 1]])
|
|
44
|
+
"""
|
|
45
|
+
n: int = len(labels)
|
|
46
|
+
if n_labels is None:
|
|
47
|
+
shape = (n, max(labels)+1)
|
|
48
|
+
else:
|
|
49
|
+
shape = (n, n_labels)
|
|
50
|
+
ix = (labels >= 0)
|
|
51
|
+
data = np.ones(ix.sum())
|
|
52
|
+
row = np.arange(n)[ix]
|
|
53
|
+
col = labels[ix]
|
|
54
|
+
return sparse.csr_matrix((data, (row, col)), shape=shape, dtype=dtype)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def from_membership(membership: sparse.csr_matrix) -> np.ndarray:
|
|
58
|
+
"""Get the labels from a membership matrix (n_samples x n_labels).
|
|
59
|
+
Samples without label get -1.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
membership :
|
|
64
|
+
Membership matrix.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
labels : np.ndarray
|
|
69
|
+
Labels (columns indices of the membership matrix).
|
|
70
|
+
Example
|
|
71
|
+
-------
|
|
72
|
+
>>> from scipy import sparse
|
|
73
|
+
>>> from sknetwork.utils import from_membership
|
|
74
|
+
>>> membership = sparse.eye(3).tocsr()
|
|
75
|
+
>>> labels = from_membership(membership)
|
|
76
|
+
>>> labels
|
|
77
|
+
array([0, 1, 2])
|
|
78
|
+
"""
|
|
79
|
+
mask = get_degrees(membership) > 0
|
|
80
|
+
labels = -np.ones(membership.shape[0], dtype=int)
|
|
81
|
+
labels[mask] = membership.indices
|
|
82
|
+
return labels
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on December 29, 2020
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import sparse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_neighbors(input_matrix: sparse.csr_matrix, node: int, transpose: bool = False) -> np.ndarray:
|
|
12
|
+
"""Get the neighbors of a node.
|
|
13
|
+
|
|
14
|
+
If the graph is directed, returns the vector of successors. Set ``transpose=True``
|
|
15
|
+
to get the predecessors.
|
|
16
|
+
|
|
17
|
+
For a biadjacency matrix, returns the neighbors of a row node. Set ``transpose=True``
|
|
18
|
+
to get the neighbors of a column node.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
input_matrix : sparse.csr_matrix
|
|
23
|
+
Adjacency or biadjacency matrix.
|
|
24
|
+
node : int
|
|
25
|
+
Target node.
|
|
26
|
+
transpose :
|
|
27
|
+
If ``True``, transpose the input matrix.
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
neighbors : np.ndarray
|
|
31
|
+
Array of neighbors of the target node.
|
|
32
|
+
|
|
33
|
+
Example
|
|
34
|
+
-------
|
|
35
|
+
>>> from sknetwork.data import house
|
|
36
|
+
>>> adjacency = house()
|
|
37
|
+
>>> get_neighbors(adjacency, node=0)
|
|
38
|
+
array([1, 4], dtype=int32)
|
|
39
|
+
"""
|
|
40
|
+
if transpose:
|
|
41
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
42
|
+
else:
|
|
43
|
+
matrix = input_matrix
|
|
44
|
+
neighbors = matrix.indices[matrix.indptr[node]: matrix.indptr[node + 1]]
|
|
45
|
+
return neighbors
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_degrees(input_matrix: sparse.csr_matrix, transpose: bool = False) -> np.ndarray:
|
|
49
|
+
"""Get the vector of degrees of a graph.
|
|
50
|
+
|
|
51
|
+
If the graph is directed, returns the out-degrees (number of successors). Set ``transpose=True``
|
|
52
|
+
to get the in-degrees (number of predecessors).
|
|
53
|
+
|
|
54
|
+
For a biadjacency matrix, returns the degrees of rows. Set ``transpose=True``
|
|
55
|
+
to get the degrees of columns.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
input_matrix : sparse.csr_matrix
|
|
60
|
+
Adjacency or biadjacency matrix.
|
|
61
|
+
transpose :
|
|
62
|
+
If ``True``, transpose the input matrix.
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
degrees : np.ndarray
|
|
66
|
+
Array of degrees.
|
|
67
|
+
|
|
68
|
+
Example
|
|
69
|
+
-------
|
|
70
|
+
>>> from sknetwork.data import house
|
|
71
|
+
>>> adjacency = house()
|
|
72
|
+
>>> get_degrees(adjacency)
|
|
73
|
+
array([2, 3, 2, 2, 3], dtype=int32)
|
|
74
|
+
"""
|
|
75
|
+
if transpose:
|
|
76
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
77
|
+
else:
|
|
78
|
+
matrix = input_matrix
|
|
79
|
+
degrees = matrix.indptr[1:] - matrix.indptr[:-1]
|
|
80
|
+
return degrees
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_weights(input_matrix: sparse.csr_matrix, transpose: bool = False) -> np.ndarray:
|
|
84
|
+
"""Get the vector of weights of the nodes of a graph. If the graph is not weighted, return the vector of degrees.
|
|
85
|
+
|
|
86
|
+
If the graph is directed, returns the out-weights (total weight of outgoing links). Set ``transpose=True``
|
|
87
|
+
to get the in-weights (total weight of incoming links).
|
|
88
|
+
|
|
89
|
+
For a biadjacency matrix, returns the weights of rows. Set ``transpose=True``
|
|
90
|
+
to get the weights of columns.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
input_matrix : sparse.csr_matrix
|
|
95
|
+
Adjacency or biadjacency matrix.
|
|
96
|
+
transpose :
|
|
97
|
+
If ``True``, transpose the input matrix.
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
weights : np.ndarray
|
|
101
|
+
Array of weights.
|
|
102
|
+
|
|
103
|
+
Example
|
|
104
|
+
-------
|
|
105
|
+
>>> from sknetwork.data import house
|
|
106
|
+
>>> adjacency = house()
|
|
107
|
+
>>> get_weights(adjacency)
|
|
108
|
+
array([2., 3., 2., 2., 3.])
|
|
109
|
+
"""
|
|
110
|
+
if transpose:
|
|
111
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
112
|
+
else:
|
|
113
|
+
matrix = input_matrix
|
|
114
|
+
weights = matrix.dot(np.ones(matrix.shape[1]))
|
|
115
|
+
return weights
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for utils"""
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for check.py"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.data import cyclic_digraph
|
|
7
|
+
from sknetwork.data.test_graphs import test_disconnected_graph
|
|
8
|
+
from sknetwork.utils.check import *
|
|
9
|
+
from sknetwork.utils.format import check_csr_or_slr
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestChecks(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def setUp(self):
|
|
15
|
+
"""Simple graphs for tests."""
|
|
16
|
+
self.adjacency = cyclic_digraph(3)
|
|
17
|
+
self.dense_mat = np.identity(3)
|
|
18
|
+
|
|
19
|
+
def test_check_csr_slr(self):
|
|
20
|
+
with self.assertRaises(TypeError):
|
|
21
|
+
check_csr_or_slr(np.ones(3))
|
|
22
|
+
|
|
23
|
+
def test_check_square(self):
|
|
24
|
+
with self.assertRaises(ValueError):
|
|
25
|
+
check_square(np.ones((3, 7)))
|
|
26
|
+
|
|
27
|
+
def test_check_connected(self):
|
|
28
|
+
with self.assertRaises(ValueError):
|
|
29
|
+
check_connected(test_disconnected_graph())
|
|
30
|
+
|
|
31
|
+
def test_check_symmetry(self):
|
|
32
|
+
with self.assertRaises(ValueError):
|
|
33
|
+
check_symmetry(self.adjacency)
|
|
34
|
+
|
|
35
|
+
def test_nonnegative_entries(self):
|
|
36
|
+
self.assertTrue(has_nonnegative_entries(self.adjacency))
|
|
37
|
+
self.assertTrue(has_nonnegative_entries(self.dense_mat))
|
|
38
|
+
|
|
39
|
+
def test_check_nonnegative(self):
|
|
40
|
+
with self.assertRaises(ValueError):
|
|
41
|
+
check_nonnegative(-self.dense_mat)
|
|
42
|
+
|
|
43
|
+
def test_positive_entries(self):
|
|
44
|
+
self.assertFalse(has_positive_entries(self.dense_mat))
|
|
45
|
+
with self.assertRaises(TypeError):
|
|
46
|
+
# noinspection PyTypeChecker
|
|
47
|
+
has_positive_entries(self.adjacency)
|
|
48
|
+
|
|
49
|
+
def test_check_positive(self):
|
|
50
|
+
check_positive(np.ones(3))
|
|
51
|
+
with self.assertRaises(ValueError):
|
|
52
|
+
check_positive(-self.dense_mat)
|
|
53
|
+
|
|
54
|
+
def test_probas(self):
|
|
55
|
+
self.assertTrue(is_proba_array(np.array([.5, .5])))
|
|
56
|
+
check_is_proba(0.5)
|
|
57
|
+
with self.assertRaises(TypeError):
|
|
58
|
+
is_proba_array(np.ones((2, 2, 2)))
|
|
59
|
+
self.assertRaises(TypeError, check_is_proba, 'toto')
|
|
60
|
+
with self.assertRaises(ValueError):
|
|
61
|
+
check_is_proba(2)
|
|
62
|
+
|
|
63
|
+
def test_damping(self):
|
|
64
|
+
with self.assertRaises(ValueError):
|
|
65
|
+
check_damping_factor(1)
|
|
66
|
+
|
|
67
|
+
def test_error_make_weights(self):
|
|
68
|
+
with self.assertRaises(ValueError):
|
|
69
|
+
make_weights(distribution='junk', adjacency=self.adjacency)
|
|
70
|
+
|
|
71
|
+
def test_error_check_is_proba(self):
|
|
72
|
+
with self.assertRaises(TypeError):
|
|
73
|
+
# noinspection PyTypeChecker
|
|
74
|
+
check_is_proba('junk')
|
|
75
|
+
with self.assertRaises(ValueError):
|
|
76
|
+
check_is_proba(2)
|
|
77
|
+
|
|
78
|
+
def test_error_check_weights(self):
|
|
79
|
+
with self.assertRaises(ValueError):
|
|
80
|
+
check_weights(np.zeros(4), self.adjacency)
|
|
81
|
+
with self.assertRaises(TypeError):
|
|
82
|
+
# noinspection PyTypeChecker
|
|
83
|
+
check_weights(2, self.adjacency)
|
|
84
|
+
with self.assertRaises(ValueError):
|
|
85
|
+
check_weights(np.zeros(3), self.adjacency, positive_entries=True)
|
|
86
|
+
with self.assertRaises(ValueError):
|
|
87
|
+
check_weights(-np.ones(3), self.adjacency)
|
|
88
|
+
|
|
89
|
+
def test_random_state(self):
|
|
90
|
+
random_state = np.random.RandomState(1)
|
|
91
|
+
self.assertEqual(type(check_random_state(random_state)), np.random.RandomState)
|
|
92
|
+
|
|
93
|
+
def test_error_random_state(self):
|
|
94
|
+
with self.assertRaises(TypeError):
|
|
95
|
+
# noinspection PyTypeChecker
|
|
96
|
+
check_random_state('junk')
|
|
97
|
+
|
|
98
|
+
def test_check_labels(self):
|
|
99
|
+
with self.assertRaises(ValueError):
|
|
100
|
+
check_labels(np.ones(3))
|
|
101
|
+
labels = np.ones(5)
|
|
102
|
+
labels[0] = 0
|
|
103
|
+
classes, n_classes = check_labels(labels)
|
|
104
|
+
self.assertTrue(np.equal(classes, np.arange(2)).all())
|
|
105
|
+
self.assertEqual(n_classes, 2)
|
|
106
|
+
|
|
107
|
+
def test_check_n_jobs(self):
|
|
108
|
+
self.assertEqual(check_n_jobs(None), 1)
|
|
109
|
+
self.assertEqual(check_n_jobs(-1), None)
|
|
110
|
+
self.assertEqual(check_n_jobs(8), 8)
|
|
111
|
+
|
|
112
|
+
def test_check_n_neighbors(self):
|
|
113
|
+
with self.assertWarns(Warning):
|
|
114
|
+
check_n_neighbors(10, 5)
|
|
115
|
+
|
|
116
|
+
def test_adj_vector(self):
|
|
117
|
+
n = 10
|
|
118
|
+
vector1 = np.random.rand(n)
|
|
119
|
+
vector2 = sparse.csr_matrix(vector1)
|
|
120
|
+
adj1 = check_adjacency_vector(vector1)
|
|
121
|
+
adj2 = check_adjacency_vector(vector2)
|
|
122
|
+
|
|
123
|
+
self.assertEqual((adj1 - adj2).nnz, 0)
|
|
124
|
+
self.assertEqual(adj1.shape, (1, n))
|
|
125
|
+
|
|
126
|
+
with self.assertRaises(ValueError):
|
|
127
|
+
check_adjacency_vector(vector1, 2 * n)
|
|
128
|
+
|
|
129
|
+
def test_check_n_clusters(self):
|
|
130
|
+
with self.assertRaises(ValueError):
|
|
131
|
+
check_n_clusters(3, 2)
|
|
132
|
+
with self.assertRaises(ValueError):
|
|
133
|
+
check_n_clusters(0, 2, 1)
|
|
134
|
+
|
|
135
|
+
def test_min_size(self):
|
|
136
|
+
with self.assertRaises(ValueError):
|
|
137
|
+
check_min_size(1, 3)
|
|
138
|
+
|
|
139
|
+
def test_min_nnz(self):
|
|
140
|
+
with self.assertRaises(ValueError):
|
|
141
|
+
check_min_nnz(1, 3)
|
|
142
|
+
|
|
143
|
+
def test_dendrogram(self):
|
|
144
|
+
with self.assertRaises(ValueError):
|
|
145
|
+
check_dendrogram(np.ones((3, 3)))
|
|
146
|
+
|
|
147
|
+
def test_n_components(self):
|
|
148
|
+
self.assertEqual(5, check_n_components(5, 10))
|
|
149
|
+
with self.assertWarns(Warning):
|
|
150
|
+
self.assertEqual(2, check_n_components(5, 2))
|
|
151
|
+
|
|
152
|
+
def test_scaling(self):
|
|
153
|
+
adjacency = cyclic_digraph(3)
|
|
154
|
+
with self.assertRaises(ValueError):
|
|
155
|
+
check_scaling(-1, adjacency, regularize=True)
|
|
156
|
+
adjacency = test_disconnected_graph()
|
|
157
|
+
with self.assertRaises(ValueError):
|
|
158
|
+
check_scaling(-1, adjacency, regularize=False)
|
|
159
|
+
|
|
160
|
+
def test_boolean_entries(self):
|
|
161
|
+
with self.assertRaises(TypeError):
|
|
162
|
+
has_boolean_entries([True, 0, 2])
|
|
163
|
+
self.assertFalse(has_boolean_entries(np.array([0, 1, True])))
|
|
164
|
+
|
|
165
|
+
def test_boolean(self):
|
|
166
|
+
check_boolean(np.array([True, False, True]))
|
|
167
|
+
with self.assertRaises(ValueError):
|
|
168
|
+
check_boolean(np.array([True, 0, 2]))
|
|
169
|
+
|
|
170
|
+
def test_check_vector_format(self):
|
|
171
|
+
check_vector_format(np.arange(4), np.ones(4))
|
|
172
|
+
with self.assertRaises(ValueError):
|
|
173
|
+
check_vector_format(np.arange(4), np.ones((4, 3)))
|
|
174
|
+
with self.assertRaises(ValueError):
|
|
175
|
+
check_vector_format(np.arange(4), np.ones(5))
|
|
176
|
+
|
|
177
|
+
def test_has_self_loops(self):
|
|
178
|
+
self.assertTrue(has_self_loops(sparse.csr_matrix(np.array([[1, 0], [1, 1]]))))
|
|
179
|
+
self.assertFalse(has_self_loops(sparse.csr_matrix(np.array([[0, 0], [1, 1]]))))
|
|
180
|
+
|
|
181
|
+
def test_add_self_loops(self):
|
|
182
|
+
# Square adjacency
|
|
183
|
+
adjacency = sparse.csr_matrix(np.array([[0, 0], [1, 1]]))
|
|
184
|
+
self.assertFalse(has_self_loops(adjacency))
|
|
185
|
+
adjacency = add_self_loops(adjacency)
|
|
186
|
+
self.assertTrue(has_self_loops(adjacency))
|
|
187
|
+
# Non square adjacency
|
|
188
|
+
adjacency = sparse.csr_matrix(np.array([[0, 0, 1], [1, 1, 1]]))
|
|
189
|
+
n_row, n_col = adjacency.shape
|
|
190
|
+
self.assertTrue(has_self_loops(add_self_loops(adjacency)[:, :n_row]))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for format.py"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.data.test_graphs import *
|
|
7
|
+
from sknetwork.utils.format import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestFormats(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def setUp(self):
|
|
13
|
+
"""Basic biadjacency for tests."""
|
|
14
|
+
self.biadjacency = test_bigraph()
|
|
15
|
+
|
|
16
|
+
def test_directed2undirected(self):
|
|
17
|
+
adjacency = test_digraph()
|
|
18
|
+
ref = directed2undirected(adjacency)
|
|
19
|
+
self.assertEqual(ref.shape, adjacency.shape)
|
|
20
|
+
self.assertTrue(is_symmetric(ref))
|
|
21
|
+
|
|
22
|
+
adjacency = test_graph().astype(bool)
|
|
23
|
+
n = adjacency.shape[0]
|
|
24
|
+
diff = directed2undirected(adjacency, weighted=False) - adjacency
|
|
25
|
+
self.assertEqual(diff.nnz, 0)
|
|
26
|
+
|
|
27
|
+
slr = SparseLR(adjacency, [(np.zeros(n), np.zeros(n))])
|
|
28
|
+
self.assertRaises(ValueError, directed2undirected, slr, weighted=False)
|
|
29
|
+
slr = 0.5 * directed2undirected(slr)
|
|
30
|
+
self.assertEqual(slr.shape, (n, n))
|
|
31
|
+
|
|
32
|
+
x = np.random.randn(n)
|
|
33
|
+
error = np.linalg.norm(slr.dot(x) - adjacency.dot(x))
|
|
34
|
+
self.assertAlmostEqual(error, 0)
|
|
35
|
+
|
|
36
|
+
def test_bipartite2directed(self):
|
|
37
|
+
n_row, n_col = self.biadjacency.shape
|
|
38
|
+
n = n_row + n_col
|
|
39
|
+
|
|
40
|
+
directed_graph = bipartite2directed(self.biadjacency)
|
|
41
|
+
self.assertEqual(directed_graph.shape, (n, n))
|
|
42
|
+
|
|
43
|
+
slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))])
|
|
44
|
+
directed_graph = bipartite2directed(slr)
|
|
45
|
+
self.assertTrue(type(directed_graph) == SparseLR)
|
|
46
|
+
|
|
47
|
+
def test_bipartite2undirected(self):
|
|
48
|
+
n_row, n_col = self.biadjacency.shape
|
|
49
|
+
n = n_row + n_col
|
|
50
|
+
|
|
51
|
+
undirected_graph = bipartite2undirected(self.biadjacency)
|
|
52
|
+
self.assertEqual(undirected_graph.shape, (n, n))
|
|
53
|
+
self.assertTrue(is_symmetric(undirected_graph))
|
|
54
|
+
|
|
55
|
+
slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))])
|
|
56
|
+
undirected_graph = bipartite2undirected(slr)
|
|
57
|
+
self.assertTrue(type(undirected_graph) == SparseLR)
|
|
58
|
+
|
|
59
|
+
def test_check(self):
|
|
60
|
+
with self.assertRaises(ValueError):
|
|
61
|
+
check_format(sparse.csr_matrix((3, 4)), allow_empty=False)
|
|
62
|
+
adjacency = check_format(np.array([[0, 2], [2, 3]]))
|
|
63
|
+
self.assertTrue(adjacency.shape == (2, 2))
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2022
|
|
5
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
6
|
+
"""
|
|
7
|
+
import unittest
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from sknetwork.utils.membership import get_membership, from_membership
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestMembership(unittest.TestCase):
|
|
15
|
+
|
|
16
|
+
def test_membership(self):
|
|
17
|
+
labels = np.array([0, 0, 1, 2, 1, 1])
|
|
18
|
+
membership = get_membership(labels)
|
|
19
|
+
self.assertEqual(membership.nnz, 6)
|
|
20
|
+
self.assertEqual(np.linalg.norm(labels - from_membership(membership)), 0)
|
|
21
|
+
labels = np.array([0, 0, 1, 2, 1, -1])
|
|
22
|
+
membership = get_membership(labels)
|
|
23
|
+
self.assertEqual(membership.nnz, 5)
|
|
24
|
+
self.assertEqual(np.linalg.norm(labels - from_membership(membership)), 0)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on December 2020
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
import unittest
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from numpy.linalg import norm
|
|
11
|
+
|
|
12
|
+
from sknetwork.data import karate_club, painters
|
|
13
|
+
from sknetwork.utils import get_neighbors, get_degrees, get_weights
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestNeighbors(unittest.TestCase):
|
|
17
|
+
|
|
18
|
+
def test_graph(self):
|
|
19
|
+
adjacency = karate_club()
|
|
20
|
+
neighbors = get_neighbors(adjacency, 5)
|
|
21
|
+
degrees = get_degrees(adjacency)
|
|
22
|
+
neighbors_true = np.array([0, 6, 10, 16])
|
|
23
|
+
self.assertEqual(norm(neighbors - neighbors_true), 0)
|
|
24
|
+
self.assertEqual(degrees[5], 4)
|
|
25
|
+
|
|
26
|
+
def test_digraph(self):
|
|
27
|
+
adjacency = painters()
|
|
28
|
+
neighbors = get_neighbors(adjacency, 0)
|
|
29
|
+
out_degrees = get_degrees(adjacency)
|
|
30
|
+
out_weights = get_weights(adjacency)
|
|
31
|
+
neighbors_true = np.array([3, 10])
|
|
32
|
+
self.assertEqual(norm(neighbors - neighbors_true), 0)
|
|
33
|
+
self.assertEqual(out_degrees[0], 2)
|
|
34
|
+
self.assertEqual(out_weights[0], 2)
|
|
35
|
+
neighbors = get_neighbors(adjacency, 0, transpose=True)
|
|
36
|
+
in_degrees = get_degrees(adjacency, transpose=True)
|
|
37
|
+
in_weights = get_weights(adjacency, transpose=True)
|
|
38
|
+
neighbors_true = np.array([3, 6, 8, 10, 11])
|
|
39
|
+
self.assertEqual(norm(neighbors - neighbors_true), 0)
|
|
40
|
+
self.assertEqual(in_degrees[0], 5)
|
|
41
|
+
self.assertEqual(in_weights[0], 5)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for tfidf.py"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from scipy import sparse
|
|
8
|
+
|
|
9
|
+
from sknetwork.utils.tfidf import get_tfidf
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestTFIDF(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def test_tfidf(self):
|
|
15
|
+
count = sparse.csr_matrix(np.array([[0, 1, 2], [0, 2, 1], [0, 0, 1]]))
|
|
16
|
+
tfidf = get_tfidf(count)
|
|
17
|
+
self.assertEqual(count.shape, tfidf.shape)
|
|
18
|
+
self.assertEqual(tfidf.nnz, 2)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for values.py"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from sknetwork.utils.values import get_values, stack_values, values2prob
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestValues(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def test_get_values(self):
|
|
15
|
+
n = 10
|
|
16
|
+
labels_array = -np.ones(n)
|
|
17
|
+
labels_array[:2] = np.arange(2)
|
|
18
|
+
labels_dict = {0: 0, 1: 1}
|
|
19
|
+
labels_array = get_values((n,), labels_array)
|
|
20
|
+
labels_ = get_values((n,), labels_dict)
|
|
21
|
+
self.assertTrue(np.allclose(labels_array, labels_))
|
|
22
|
+
with self.assertRaises(ValueError):
|
|
23
|
+
get_values((5,), labels_array)
|
|
24
|
+
self.assertRaises(TypeError, get_values, 'toto', 3)
|
|
25
|
+
with self.assertWarns(Warning):
|
|
26
|
+
labels_dict[0] = -1
|
|
27
|
+
get_values((n,), labels_dict)
|
|
28
|
+
|
|
29
|
+
def test_values2probs(self):
|
|
30
|
+
n = 4
|
|
31
|
+
values_array = np.array([0, 1, -1, 0])
|
|
32
|
+
values_dict = {0: 0, 1: 1, 3: 0}
|
|
33
|
+
|
|
34
|
+
probs1 = values2prob(n, values_array)
|
|
35
|
+
probs2 = values2prob(n, values_dict)
|
|
36
|
+
self.assertTrue(np.allclose(probs1, probs2))
|
|
37
|
+
|
|
38
|
+
bad_input = np.array([0, 0, -1, 0])
|
|
39
|
+
with self.assertRaises(ValueError):
|
|
40
|
+
values2prob(n, bad_input)
|
|
41
|
+
|
|
42
|
+
def test_stack_values(self):
|
|
43
|
+
shape = 4, 3
|
|
44
|
+
values_row_array = np.array([0, 1, -1, 0])
|
|
45
|
+
values_row_dict = {0: 0, 1: 1, 3: 0}
|
|
46
|
+
values_col_array = np.array([0, 1, -1])
|
|
47
|
+
values_col_dict = {0: 0, 1: 1}
|
|
48
|
+
|
|
49
|
+
values1 = stack_values(shape, values_row_array, values_col_array)
|
|
50
|
+
values2 = stack_values(shape, values_row_dict, values_col_dict)
|
|
51
|
+
values3 = stack_values(shape, values_row_array, values_col_dict)
|
|
52
|
+
values4 = stack_values(shape, values_row_dict, values_col_array)
|
|
53
|
+
|
|
54
|
+
self.assertTrue(np.allclose(values1, values2))
|
|
55
|
+
self.assertTrue(np.allclose(values2, values3))
|
|
56
|
+
self.assertTrue(np.allclose(values3, values4))
|
|
57
|
+
|
|
58
|
+
values1 = stack_values(shape, values_row_array, None)
|
|
59
|
+
values2 = stack_values(shape, values_row_dict, None)
|
|
60
|
+
|
|
61
|
+
self.assertTrue(np.allclose(values1, values2))
|
|
62
|
+
|
|
63
|
+
values1 = stack_values(shape, None, values_col_array)
|
|
64
|
+
values2 = stack_values(shape, None, values_col_dict)
|
|
65
|
+
|
|
66
|
+
self.assertTrue(np.allclose(values1, values2))
|
sknetwork/utils/tfidf.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in February 2023
|
|
5
|
+
@author: Thomas Bonald <thomas.bonald&telecom-paris.fr>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import sparse
|
|
9
|
+
|
|
10
|
+
from sknetwork.linalg import normalize
|
|
11
|
+
from sknetwork.utils import get_degrees
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_tfidf(count_matrix: sparse.csr_matrix):
|
|
15
|
+
"""Get the tf-idf from a count matrix in sparse format.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
count_matrix : sparse.csr_matrix
|
|
20
|
+
Count matrix, shape (n_documents, n_words).
|
|
21
|
+
|
|
22
|
+
Returns
|
|
23
|
+
-------
|
|
24
|
+
tf_idf : sparse.csr_matrix
|
|
25
|
+
tf-idf matrix, shape (n_documents, n_words).
|
|
26
|
+
|
|
27
|
+
References
|
|
28
|
+
----------
|
|
29
|
+
https://en.wikipedia.org/wiki/Tfidf
|
|
30
|
+
"""
|
|
31
|
+
n_documents, n_words = count_matrix.shape
|
|
32
|
+
tf = normalize(count_matrix)
|
|
33
|
+
freq = get_degrees(count_matrix > 0, transpose=True)
|
|
34
|
+
idf = np.zeros(n_words)
|
|
35
|
+
idf[freq > 0] = np.log(n_documents / freq[freq > 0])
|
|
36
|
+
tf_idf = tf.dot(sparse.diags(idf))
|
|
37
|
+
return tf_idf
|