graphwise 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -2
- package/dist/expansion/frontier-balanced.d.ts +12 -0
- package/dist/expansion/frontier-balanced.d.ts.map +1 -0
- package/dist/expansion/frontier-balanced.unit.test.d.ts +2 -0
- package/dist/expansion/frontier-balanced.unit.test.d.ts.map +1 -0
- package/dist/expansion/index.d.ts +12 -13
- package/dist/expansion/index.d.ts.map +1 -1
- package/dist/expansion/random-priority.d.ts +20 -0
- package/dist/expansion/random-priority.d.ts.map +1 -0
- package/dist/expansion/random-priority.unit.test.d.ts +2 -0
- package/dist/expansion/random-priority.unit.test.d.ts.map +1 -0
- package/dist/expansion/standard-bfs.d.ts +12 -0
- package/dist/expansion/standard-bfs.d.ts.map +1 -0
- package/dist/expansion/standard-bfs.unit.test.d.ts +2 -0
- package/dist/expansion/standard-bfs.unit.test.d.ts.map +1 -0
- package/dist/extraction/index.d.ts +6 -6
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/motif.d.ts.map +1 -1
- package/dist/gpu/context.d.ts.map +1 -1
- package/dist/gpu/csr.d.ts.map +1 -1
- package/dist/gpu/index.cjs +410 -5
- package/dist/gpu/index.cjs.map +1 -0
- package/dist/gpu/index.d.ts +4 -5
- package/dist/gpu/index.d.ts.map +1 -1
- package/dist/gpu/index.js +400 -2
- package/dist/gpu/index.js.map +1 -0
- package/dist/graph/index.cjs +222 -2
- package/dist/graph/index.cjs.map +1 -0
- package/dist/graph/index.d.ts +3 -3
- package/dist/graph/index.d.ts.map +1 -1
- package/dist/graph/index.js +221 -1
- package/dist/graph/index.js.map +1 -0
- package/dist/index/index.cjs +966 -126
- package/dist/index/index.cjs.map +1 -1
- package/dist/index/index.js +939 -126
- package/dist/index/index.js.map +1 -1
- package/dist/{kmeans-B0HEOU6k.cjs → kmeans-87ExSUNZ.js} +27 -13
- package/dist/{kmeans-DgbsOznU.js.map → kmeans-87ExSUNZ.js.map} +1 -1
- package/dist/{kmeans-DgbsOznU.js → kmeans-BIgSyGKu.cjs} +44 -2
- package/dist/{kmeans-B0HEOU6k.cjs.map → kmeans-BIgSyGKu.cjs.map} +1 -1
- package/dist/ranking/baselines/betweenness.d.ts +13 -0
- package/dist/ranking/baselines/betweenness.d.ts.map +1 -0
- package/dist/ranking/baselines/betweenness.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/betweenness.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/communicability.d.ts +13 -0
- package/dist/ranking/baselines/communicability.d.ts.map +1 -0
- package/dist/ranking/baselines/communicability.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/communicability.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/degree-sum.d.ts +13 -0
- package/dist/ranking/baselines/degree-sum.d.ts.map +1 -0
- package/dist/ranking/baselines/degree-sum.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/degree-sum.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/index.d.ts +20 -0
- package/dist/ranking/baselines/index.d.ts.map +1 -0
- package/dist/ranking/baselines/jaccard-arithmetic.d.ts +13 -0
- package/dist/ranking/baselines/jaccard-arithmetic.d.ts.map +1 -0
- package/dist/ranking/baselines/jaccard-arithmetic.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/jaccard-arithmetic.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/katz.d.ts +13 -0
- package/dist/ranking/baselines/katz.d.ts.map +1 -0
- package/dist/ranking/baselines/katz.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/katz.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/pagerank.d.ts +13 -0
- package/dist/ranking/baselines/pagerank.d.ts.map +1 -0
- package/dist/ranking/baselines/pagerank.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/pagerank.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/random-ranking.d.ts +21 -0
- package/dist/ranking/baselines/random-ranking.d.ts.map +1 -0
- package/dist/ranking/baselines/random-ranking.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/random-ranking.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/resistance-distance.d.ts +13 -0
- package/dist/ranking/baselines/resistance-distance.d.ts.map +1 -0
- package/dist/ranking/baselines/resistance-distance.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/resistance-distance.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/widest-path.d.ts +13 -0
- package/dist/ranking/baselines/widest-path.d.ts.map +1 -0
- package/dist/ranking/baselines/widest-path.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/widest-path.unit.test.d.ts.map +1 -0
- package/dist/ranking/index.d.ts +3 -6
- package/dist/ranking/index.d.ts.map +1 -1
- package/dist/ranking/mi/adamic-adar.d.ts.map +1 -1
- package/dist/ranking/mi/adaptive.d.ts +1 -1
- package/dist/ranking/mi/adaptive.d.ts.map +1 -1
- package/dist/ranking/mi/etch.d.ts.map +1 -1
- package/dist/ranking/mi/index.d.ts +9 -9
- package/dist/ranking/mi/index.d.ts.map +1 -1
- package/dist/ranking/mi/jaccard.d.ts.map +1 -1
- package/dist/ranking/mi/notch.d.ts.map +1 -1
- package/dist/ranking/mi/scale.d.ts.map +1 -1
- package/dist/ranking/mi/skew.d.ts.map +1 -1
- package/dist/ranking/mi/span.d.ts.map +1 -1
- package/dist/schemas/index.d.ts +2 -2
- package/dist/schemas/index.d.ts.map +1 -1
- package/dist/seeds/index.cjs +398 -3
- package/dist/seeds/index.cjs.map +1 -0
- package/dist/seeds/index.d.ts +2 -4
- package/dist/seeds/index.d.ts.map +1 -1
- package/dist/seeds/index.js +396 -1
- package/dist/seeds/index.js.map +1 -0
- package/dist/seeds/stratified.d.ts.map +1 -1
- package/dist/structures/index.cjs +133 -2
- package/dist/structures/index.cjs.map +1 -0
- package/dist/structures/index.d.ts +1 -2
- package/dist/structures/index.d.ts.map +1 -1
- package/dist/structures/index.js +132 -1
- package/dist/structures/index.js.map +1 -0
- package/dist/traversal/index.cjs +152 -5
- package/dist/traversal/index.cjs.map +1 -0
- package/dist/traversal/index.d.ts +2 -2
- package/dist/traversal/index.d.ts.map +1 -1
- package/dist/traversal/index.js +148 -1
- package/dist/traversal/index.js.map +1 -0
- package/dist/utils/index.cjs +254 -9
- package/dist/utils/index.cjs.map +1 -0
- package/dist/utils/index.d.ts +4 -3
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +242 -3
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/neighbours.d.ts +54 -0
- package/dist/utils/neighbours.d.ts.map +1 -0
- package/dist/utils/neighbours.unit.test.d.ts +5 -0
- package/dist/utils/neighbours.unit.test.d.ts.map +1 -0
- package/package.json +1 -1
- package/dist/gpu-BJRVYBjx.cjs +0 -338
- package/dist/gpu-BJRVYBjx.cjs.map +0 -1
- package/dist/gpu-BveuXugy.js +0 -315
- package/dist/gpu-BveuXugy.js.map +0 -1
- package/dist/graph-DLWiziLB.js +0 -222
- package/dist/graph-DLWiziLB.js.map +0 -1
- package/dist/graph-az06J1YV.cjs +0 -227
- package/dist/graph-az06J1YV.cjs.map +0 -1
- package/dist/seeds-B6J9oJfU.cjs +0 -404
- package/dist/seeds-B6J9oJfU.cjs.map +0 -1
- package/dist/seeds-UNZxqm_U.js +0 -393
- package/dist/seeds-UNZxqm_U.js.map +0 -1
- package/dist/structures-BPfhfqNP.js +0 -133
- package/dist/structures-BPfhfqNP.js.map +0 -1
- package/dist/structures-CJ_S_7fs.cjs +0 -138
- package/dist/structures-CJ_S_7fs.cjs.map +0 -1
- package/dist/traversal-CQCjUwUJ.js +0 -149
- package/dist/traversal-CQCjUwUJ.js.map +0 -1
- package/dist/traversal-QeHaNUWn.cjs +0 -172
- package/dist/traversal-QeHaNUWn.cjs.map +0 -1
- package/dist/utils-Q_akvlMn.js +0 -164
- package/dist/utils-Q_akvlMn.js.map +0 -1
- package/dist/utils-spZa1ZvS.cjs +0 -205
- package/dist/utils-spZa1ZvS.cjs.map +0 -1
package/dist/utils/index.cjs
CHANGED
|
@@ -1,13 +1,258 @@
|
|
|
1
1
|
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
-
const require_kmeans = require("../kmeans-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
2
|
+
const require_kmeans = require("../kmeans-BIgSyGKu.cjs");
|
|
3
|
+
//#region src/utils/clustering-coefficient.ts
|
|
4
|
+
/**
|
|
5
|
+
* Compute the local clustering coefficient for a single node.
|
|
6
|
+
*
|
|
7
|
+
* The clustering coefficient is defined as:
|
|
8
|
+
* CC(v) = (triangles through v) / (possible triangles)
|
|
9
|
+
* CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))
|
|
10
|
+
*
|
|
11
|
+
* For nodes with degree < 2, the clustering coefficient is 0.
|
|
12
|
+
*
|
|
13
|
+
* @param graph - The graph to compute on
|
|
14
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
15
|
+
* @returns The clustering coefficient in [0, 1], or 0 if undefined
|
|
16
|
+
*/
|
|
17
|
+
function localClusteringCoefficient(graph, nodeId) {
|
|
18
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
19
|
+
const degree = neighbours.length;
|
|
20
|
+
if (degree < 2) return 0;
|
|
21
|
+
let triangleCount = 0;
|
|
22
|
+
for (let i = 0; i < neighbours.length; i++) {
|
|
23
|
+
const u = neighbours[i];
|
|
24
|
+
if (u === void 0) continue;
|
|
25
|
+
for (let j = i + 1; j < neighbours.length; j++) {
|
|
26
|
+
const w = neighbours[j];
|
|
27
|
+
if (w === void 0) continue;
|
|
28
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
32
|
+
return triangleCount / possibleTriangles;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Compute approximate local clustering coefficient using sampling.
|
|
36
|
+
*
|
|
37
|
+
* For nodes with many neighbours, this samples neighbour pairs rather than
|
|
38
|
+
* checking all pairs. Useful for large graphs where exact computation is expensive.
|
|
39
|
+
*
|
|
40
|
+
* @param graph - The graph to compute on
|
|
41
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
42
|
+
* @param sampleSize - Maximum number of neighbour pairs to check (default: 100)
|
|
43
|
+
* @returns The approximate clustering coefficient in [0, 1]
|
|
44
|
+
*/
|
|
45
|
+
function approximateClusteringCoefficient(graph, nodeId, sampleSize = 100) {
|
|
46
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
47
|
+
const degree = neighbours.length;
|
|
48
|
+
if (degree < 2) return 0;
|
|
49
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
50
|
+
if (possibleTriangles <= sampleSize) return localClusteringCoefficient(graph, nodeId);
|
|
51
|
+
let triangleCount = 0;
|
|
52
|
+
let sampled = 0;
|
|
53
|
+
for (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {
|
|
54
|
+
const u = neighbours[i];
|
|
55
|
+
if (u === void 0) continue;
|
|
56
|
+
for (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {
|
|
57
|
+
const w = neighbours[j];
|
|
58
|
+
if (w === void 0) continue;
|
|
59
|
+
sampled++;
|
|
60
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return triangleCount / sampled * (possibleTriangles / possibleTriangles);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Compute clustering coefficients for multiple nodes efficiently.
|
|
67
|
+
*
|
|
68
|
+
* Reuses neighbour sets to avoid repeated iteration.
|
|
69
|
+
*
|
|
70
|
+
* @param graph - The graph to compute on
|
|
71
|
+
* @param nodeIds - The nodes to compute clustering coefficients for
|
|
72
|
+
* @returns Map from nodeId to clustering coefficient
|
|
73
|
+
*/
|
|
74
|
+
function batchClusteringCoefficients(graph, nodeIds) {
|
|
75
|
+
const results = /* @__PURE__ */ new Map();
|
|
76
|
+
for (const nodeId of nodeIds) results.set(nodeId, localClusteringCoefficient(graph, nodeId));
|
|
77
|
+
return results;
|
|
78
|
+
}
|
|
79
|
+
//#endregion
|
|
80
|
+
//#region src/utils/entropy.ts
|
|
81
|
+
/**
|
|
82
|
+
* Entropy computation utilities for graph analysis.
|
|
83
|
+
*
|
|
84
|
+
* Shannon entropy measures uncertainty or randomness in a distribution.
|
|
85
|
+
* Used in EDGE and HAE algorithms for heterogeneity-aware expansion.
|
|
86
|
+
*
|
|
87
|
+
* @packageDocumentation
|
|
88
|
+
*/
|
|
89
|
+
/**
|
|
90
|
+
* Compute Shannon entropy of a probability distribution.
|
|
91
|
+
*
|
|
92
|
+
* Shannon entropy is defined as:
|
|
93
|
+
* H(X) = -Σ p(x) × log₂(p(x))
|
|
94
|
+
*
|
|
95
|
+
* A uniform distribution has maximum entropy.
|
|
96
|
+
* A deterministic distribution (all probability on one value) has zero entropy.
|
|
97
|
+
*
|
|
98
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
99
|
+
* @returns Entropy in bits (log base 2), or 0 if probabilities are invalid
|
|
100
|
+
*/
|
|
101
|
+
function shannonEntropy(probabilities) {
|
|
102
|
+
if (probabilities.length === 0) return 0;
|
|
103
|
+
let entropy = 0;
|
|
104
|
+
for (const p of probabilities) if (p > 0) entropy -= p * Math.log2(p);
|
|
105
|
+
return entropy;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Compute normalised entropy (entropy divided by maximum possible entropy).
|
|
109
|
+
*
|
|
110
|
+
* Normalised entropy is in [0, 1], where:
|
|
111
|
+
* - 0 means the distribution is deterministic (all mass on one value)
|
|
112
|
+
* - 1 means the distribution is uniform (maximum uncertainty)
|
|
113
|
+
*
|
|
114
|
+
* This is useful for comparing entropy across distributions with different
|
|
115
|
+
* numbers of possible values.
|
|
116
|
+
*
|
|
117
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
118
|
+
* @returns Normalised entropy in [0, 1], or 0 if only one category
|
|
119
|
+
*/
|
|
120
|
+
function normalisedEntropy(probabilities) {
|
|
121
|
+
if (probabilities.length <= 1) return 0;
|
|
122
|
+
const H = shannonEntropy(probabilities);
|
|
123
|
+
const Hmax = Math.log2(probabilities.length);
|
|
124
|
+
if (Hmax === 0) return 0;
|
|
125
|
+
return H / Hmax;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Compute entropy from a frequency count.
|
|
129
|
+
*
|
|
130
|
+
* Converts counts to probabilities and then computes entropy.
|
|
131
|
+
* This is a convenience function when you have raw counts rather than
|
|
132
|
+
* normalised probabilities.
|
|
133
|
+
*
|
|
134
|
+
* @param counts - Array of frequency counts
|
|
135
|
+
* @returns Entropy in bits
|
|
136
|
+
*/
|
|
137
|
+
function entropyFromCounts(counts) {
|
|
138
|
+
if (counts.length === 0) return 0;
|
|
139
|
+
const total = counts.reduce((sum, c) => sum + c, 0);
|
|
140
|
+
if (total === 0) return 0;
|
|
141
|
+
return shannonEntropy(counts.map((c) => c / total));
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Compute local type entropy for a node's neighbours.
|
|
145
|
+
*
|
|
146
|
+
* This measures the diversity of types among a node's neighbours.
|
|
147
|
+
* High entropy = heterogeneous neighbourhood (diverse types).
|
|
148
|
+
* Low entropy = homogeneous neighbourhood (similar types).
|
|
149
|
+
*
|
|
150
|
+
* @param neighbourTypes - Array of type labels for neighbours
|
|
151
|
+
* @returns Normalised entropy in [0, 1]
|
|
152
|
+
*/
|
|
153
|
+
function localTypeEntropy(neighbourTypes) {
|
|
154
|
+
if (neighbourTypes.length <= 1) return 0;
|
|
155
|
+
const typeCounts = /* @__PURE__ */ new Map();
|
|
156
|
+
for (const t of neighbourTypes) typeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);
|
|
157
|
+
if (typeCounts.size === 1) return 0;
|
|
158
|
+
const probabilities = [];
|
|
159
|
+
const total = neighbourTypes.length;
|
|
160
|
+
for (const count of typeCounts.values()) probabilities.push(count / total);
|
|
161
|
+
return normalisedEntropy(probabilities);
|
|
162
|
+
}
|
|
163
|
+
//#endregion
|
|
164
|
+
//#region src/utils/neighbours.ts
|
|
165
|
+
/**
|
|
166
|
+
* Collect neighbours into a Set, optionally excluding a specific node.
|
|
167
|
+
*
|
|
168
|
+
* @param graph - The graph to traverse
|
|
169
|
+
* @param nodeId - The source node
|
|
170
|
+
* @param exclude - Optional node ID to exclude from result
|
|
171
|
+
* @returns A ReadonlySet of neighbouring node IDs
|
|
172
|
+
*/
|
|
173
|
+
function neighbourSet(graph, nodeId, exclude) {
|
|
174
|
+
const neighbours = new Set(graph.neighbours(nodeId));
|
|
175
|
+
if (exclude !== void 0) neighbours.delete(exclude);
|
|
176
|
+
return neighbours;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Compute intersection and union sizes of two neighbour sets without allocating the union set.
|
|
180
|
+
*
|
|
181
|
+
* This is more efficient than computing both separately, as it avoids creating a full union Set.
|
|
182
|
+
*
|
|
183
|
+
* @param a - First neighbourhood set
|
|
184
|
+
* @param b - Second neighbourhood set
|
|
185
|
+
* @returns Object with intersection and union sizes
|
|
186
|
+
*/
|
|
187
|
+
function neighbourOverlap(a, b) {
|
|
188
|
+
let intersection = 0;
|
|
189
|
+
const [smaller, larger] = a.size < b.size ? [a, b] : [b, a];
|
|
190
|
+
for (const node of smaller) if (larger.has(node)) intersection++;
|
|
191
|
+
const union = a.size + b.size - intersection;
|
|
192
|
+
return {
|
|
193
|
+
intersection,
|
|
194
|
+
union
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Return the actual intersection set of two neighbourhood sets.
|
|
199
|
+
*
|
|
200
|
+
* Needed by Adamic-Adar (iterates common neighbours) and ETCH (requires edge types of intersection edges).
|
|
201
|
+
*
|
|
202
|
+
* @param a - First neighbourhood set
|
|
203
|
+
* @param b - Second neighbourhood set
|
|
204
|
+
* @returns A ReadonlySet containing nodes in both a and b
|
|
205
|
+
*/
|
|
206
|
+
function neighbourIntersection(a, b) {
|
|
207
|
+
const intersection = /* @__PURE__ */ new Set();
|
|
208
|
+
const [smaller, larger] = a.size < b.size ? [a, b] : [b, a];
|
|
209
|
+
for (const node of smaller) if (larger.has(node)) intersection.add(node);
|
|
210
|
+
return intersection;
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Count the number of edges with a specific type in the graph.
|
|
214
|
+
*
|
|
215
|
+
* Used by ETCH MI variant to compute edge rarity weighting.
|
|
216
|
+
*
|
|
217
|
+
* @param graph - The graph to count edges in
|
|
218
|
+
* @param type - The edge type to count
|
|
219
|
+
* @returns The number of edges with the specified type
|
|
220
|
+
*/
|
|
221
|
+
function countEdgesOfType(graph, type) {
|
|
222
|
+
let count = 0;
|
|
223
|
+
for (const edge of graph.edges()) if (edge.type === type) count++;
|
|
224
|
+
return count;
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Count the number of nodes with a specific type in the graph.
|
|
228
|
+
*
|
|
229
|
+
* Used by NOTCH MI variant to compute node rarity weighting.
|
|
230
|
+
*
|
|
231
|
+
* @param graph - The graph to count nodes in
|
|
232
|
+
* @param type - The node type to count
|
|
233
|
+
* @returns The number of nodes with the specified type
|
|
234
|
+
*/
|
|
235
|
+
function countNodesOfType(graph, type) {
|
|
236
|
+
let count = 0;
|
|
237
|
+
for (const nodeId of graph.nodeIds()) if (graph.getNode(nodeId)?.type === type) count++;
|
|
238
|
+
return count;
|
|
239
|
+
}
|
|
240
|
+
//#endregion
|
|
241
|
+
exports._computeMean = require_kmeans._computeMean;
|
|
242
|
+
exports.approximateClusteringCoefficient = approximateClusteringCoefficient;
|
|
243
|
+
exports.batchClusteringCoefficients = batchClusteringCoefficients;
|
|
244
|
+
exports.countEdgesOfType = countEdgesOfType;
|
|
245
|
+
exports.countNodesOfType = countNodesOfType;
|
|
246
|
+
exports.entropyFromCounts = entropyFromCounts;
|
|
247
|
+
exports.localClusteringCoefficient = localClusteringCoefficient;
|
|
248
|
+
exports.localTypeEntropy = localTypeEntropy;
|
|
9
249
|
exports.miniBatchKMeans = require_kmeans.miniBatchKMeans;
|
|
250
|
+
exports.neighbourIntersection = neighbourIntersection;
|
|
251
|
+
exports.neighbourOverlap = neighbourOverlap;
|
|
252
|
+
exports.neighbourSet = neighbourSet;
|
|
10
253
|
exports.normaliseFeatures = require_kmeans.normaliseFeatures;
|
|
11
|
-
exports.normalisedEntropy = require_utils.normalisedEntropy;
|
|
12
|
-
exports.shannonEntropy = require_utils.shannonEntropy;
|
|
13
254
|
exports.zScoreNormalise = require_kmeans.normaliseFeatures;
|
|
255
|
+
exports.normalisedEntropy = normalisedEntropy;
|
|
256
|
+
exports.shannonEntropy = shannonEntropy;
|
|
257
|
+
|
|
258
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","names":[],"sources":["../../src/utils/clustering-coefficient.ts","../../src/utils/entropy.ts","../../src/utils/neighbours.ts"],"sourcesContent":["/**\n * Clustering coefficient computation for graph nodes.\n *\n * The local clustering coefficient measures how close a node's neighbours\n * are to being a complete graph (clique). It is used in SPAN MI variant\n * and GRASP seed selection.\n *\n * @packageDocumentation\n */\n\nimport type { ReadableGraph, NodeId } from \"../graph\";\n\n/**\n * Compute the local clustering coefficient for a single node.\n *\n * The clustering coefficient is defined as:\n * CC(v) = (triangles through v) / (possible triangles)\n * CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))\n *\n * For nodes with degree < 2, the clustering coefficient is 0.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @returns The clustering coefficient in [0, 1], or 0 if undefined\n */\nexport function localClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\t// Nodes with degree < 2 have no possible triangles\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\t// Count actual triangles: pairs of neighbours that are connected\n\tlet triangleCount = 0;\n\n\tfor (let i = 0; i < neighbours.length; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Possible triangles: deg * (deg - 1) / 2 pairs\n\t// We multiply by 2 because each triangle is counted once\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\treturn triangleCount / possibleTriangles;\n}\n\n/**\n * Compute approximate local clustering coefficient using sampling.\n *\n * For nodes with many neighbours, this samples neighbour pairs rather than\n * checking all pairs. Useful for large graphs where exact computation is expensive.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @param sampleSize - Maximum number of neighbour pairs to check (default: 100)\n * @returns The approximate clustering coefficient in [0, 1]\n */\nexport function approximateClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n\tsampleSize = 100,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\t// If all pairs can be checked within sample limit, use exact computation\n\tif (possibleTriangles <= sampleSize) {\n\t\treturn localClusteringCoefficient(graph, nodeId);\n\t}\n\n\t// Sample pairs uniformly\n\tlet triangleCount = 0;\n\tlet sampled = 0;\n\n\t// Use reservoir sampling style approach for pair selection\n\tfor (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Decide whether to include this pair based on remaining budget\n\t\t\tsampled++;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Extrapolate from sample\n\treturn (triangleCount / sampled) * (possibleTriangles / possibleTriangles);\n}\n\n/**\n * Compute clustering coefficients for multiple nodes efficiently.\n *\n * Reuses neighbour sets to avoid repeated iteration.\n *\n * @param graph - The graph to compute on\n * @param nodeIds - The nodes to compute clustering coefficients for\n * @returns Map from nodeId to clustering coefficient\n */\nexport function batchClusteringCoefficients(\n\tgraph: ReadableGraph,\n\tnodeIds: readonly NodeId[],\n): Map<NodeId, number> {\n\tconst results = new Map<NodeId, number>();\n\n\tfor (const nodeId of nodeIds) {\n\t\tresults.set(nodeId, localClusteringCoefficient(graph, nodeId));\n\t}\n\n\treturn results;\n}\n","/**\n * Entropy computation utilities for graph analysis.\n *\n * Shannon entropy measures uncertainty or randomness in a distribution.\n * Used in EDGE and HAE algorithms for heterogeneity-aware expansion.\n *\n * @packageDocumentation\n */\n\n/**\n * Compute Shannon entropy of a probability distribution.\n *\n * Shannon entropy is defined as:\n * H(X) = -Σ p(x) × log₂(p(x))\n *\n * A uniform distribution has maximum entropy.\n * A deterministic distribution (all probability on one value) has zero entropy.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Entropy in bits (log base 2), or 0 if probabilities are invalid\n */\nexport function shannonEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length === 0) {\n\t\treturn 0;\n\t}\n\n\tlet entropy = 0;\n\tfor (const p of probabilities) {\n\t\t// Skip zero probabilities (log(0) is undefined, but 0 * log(0) = 0)\n\t\tif (p > 0) {\n\t\t\tentropy -= p * Math.log2(p);\n\t\t}\n\t}\n\n\treturn entropy;\n}\n\n/**\n * Compute normalised entropy (entropy divided by maximum possible entropy).\n *\n * Normalised entropy is in [0, 1], where:\n * - 0 means the distribution is deterministic (all mass on one value)\n * - 1 means the distribution is uniform (maximum uncertainty)\n *\n * This is useful for comparing entropy across distributions with different\n * numbers of possible values.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Normalised entropy in [0, 1], or 0 if only one category\n */\nexport function normalisedEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length <= 1) {\n\t\treturn 0;\n\t}\n\n\tconst H = shannonEntropy(probabilities);\n\tconst Hmax = Math.log2(probabilities.length);\n\n\tif (Hmax === 0) {\n\t\treturn 0;\n\t}\n\n\treturn H / Hmax;\n}\n\n/**\n * Compute entropy from a frequency count.\n *\n * Converts counts to probabilities and then computes entropy.\n * This is a convenience function when you have raw counts rather than\n * normalised probabilities.\n *\n * @param counts - Array of frequency counts\n * @returns Entropy in bits\n */\nexport function entropyFromCounts(counts: readonly number[]): number {\n\tif (counts.length === 0) {\n\t\treturn 0;\n\t}\n\n\tconst total = counts.reduce((sum, c) => sum + c, 0);\n\tif (total === 0) {\n\t\treturn 0;\n\t}\n\n\tconst probabilities = counts.map((c) => c / total);\n\treturn shannonEntropy(probabilities);\n}\n\n/**\n * Compute local type entropy for a node's neighbours.\n *\n * This measures the diversity of types among a node's neighbours.\n * High entropy = heterogeneous neighbourhood (diverse types).\n * Low entropy = homogeneous neighbourhood (similar types).\n *\n * @param neighbourTypes - Array of type labels for neighbours\n * @returns Normalised entropy in [0, 1]\n */\nexport function localTypeEntropy(neighbourTypes: readonly string[]): number {\n\tif (neighbourTypes.length <= 1) {\n\t\treturn 0;\n\t}\n\n\t// Count occurrences of each type\n\tconst typeCounts = new Map<string, number>();\n\tfor (const t of neighbourTypes) {\n\t\ttypeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);\n\t}\n\n\t// If all neighbours are the same type, entropy is 0\n\tif (typeCounts.size === 1) {\n\t\treturn 0;\n\t}\n\n\t// Convert to probability array\n\tconst probabilities: number[] = [];\n\tconst total = neighbourTypes.length;\n\tfor (const count of typeCounts.values()) {\n\t\tprobabilities.push(count / total);\n\t}\n\n\treturn normalisedEntropy(probabilities);\n}\n","/**\n * Neighbourhood computation utilities.\n *\n * Shared utilities for neighbourhood operations used by MI variants and other graph algorithms.\n * These functions eliminate duplication of neighbourhood set operations across multiple\n * implementations.\n *\n * @packageDocumentation\n */\n\nimport type { NodeId, NodeData, EdgeData, ReadableGraph } from \"../graph\";\n\n/**\n * Collect neighbours into a Set, optionally excluding a specific node.\n *\n * @param graph - The graph to traverse\n * @param nodeId - The source node\n * @param exclude - Optional node ID to exclude from result\n * @returns A ReadonlySet of neighbouring node IDs\n */\nexport function neighbourSet<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\tnodeId: NodeId,\n\texclude?: NodeId,\n): ReadonlySet<NodeId> {\n\tconst neighbours = new Set(graph.neighbours(nodeId));\n\tif (exclude !== undefined) {\n\t\tneighbours.delete(exclude);\n\t}\n\treturn neighbours;\n}\n\n/**\n * Compute intersection and union sizes of two neighbour sets without allocating the union set.\n *\n * This is more efficient than computing both separately, as it avoids creating a full union Set.\n *\n * @param a - First neighbourhood set\n * @param b - Second neighbourhood set\n * @returns Object with intersection and union sizes\n */\nexport function neighbourOverlap(\n\ta: ReadonlySet<NodeId>,\n\tb: ReadonlySet<NodeId>,\n): { intersection: number; union: number } {\n\tlet intersection = 0;\n\n\t// Count intersection by iterating through the smaller set\n\tconst [smaller, larger] = a.size < b.size ? [a, b] : [b, a];\n\n\tfor (const node of smaller) {\n\t\tif (larger.has(node)) {\n\t\t\tintersection++;\n\t\t}\n\t}\n\n\t// Union size = size(a) + size(b) - intersection\n\tconst union = a.size + b.size - intersection;\n\n\treturn { intersection, union };\n}\n\n/**\n * Return the actual intersection set of two neighbourhood sets.\n *\n * Needed by Adamic-Adar (iterates common neighbours) and ETCH (requires edge types of intersection edges).\n *\n * @param a - First neighbourhood set\n * @param b - Second neighbourhood set\n * @returns A ReadonlySet containing nodes in both a and b\n */\nexport function neighbourIntersection(\n\ta: ReadonlySet<NodeId>,\n\tb: ReadonlySet<NodeId>,\n): ReadonlySet<NodeId> {\n\tconst intersection = new Set<NodeId>();\n\n\t// Iterate through the smaller set for efficiency\n\tconst [smaller, larger] = a.size < b.size ? [a, b] : [b, a];\n\n\tfor (const node of smaller) {\n\t\tif (larger.has(node)) {\n\t\t\tintersection.add(node);\n\t\t}\n\t}\n\n\treturn intersection;\n}\n\n/**\n * Count the number of edges with a specific type in the graph.\n *\n * Used by ETCH MI variant to compute edge rarity weighting.\n *\n * @param graph - The graph to count edges in\n * @param type - The edge type to count\n * @returns The number of edges with the specified type\n */\nexport function countEdgesOfType<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\ttype: string,\n): number {\n\tlet count = 0;\n\tfor (const edge of graph.edges()) {\n\t\tif (edge.type === type) {\n\t\t\tcount++;\n\t\t}\n\t}\n\treturn count;\n}\n\n/**\n * Count the number of nodes with a specific type in the graph.\n *\n * Used by NOTCH MI variant to compute node rarity weighting.\n *\n * @param graph - The graph to count nodes in\n * @param type - The node type to count\n * @returns The number of nodes with the specified type\n */\nexport function countNodesOfType<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\ttype: string,\n): number {\n\tlet count = 0;\n\tfor (const nodeId of graph.nodeIds()) {\n\t\tconst node = graph.getNode(nodeId);\n\t\tif (node?.type === type) {\n\t\t\tcount++;\n\t\t}\n\t}\n\treturn count;\n}\n"],"mappings":";;;;;;;;;;;;;;;;AAyBA,SAAgB,2BACf,OACA,QACS;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAG1B,KAAI,SAAS,EACZ,QAAO;CAIR,IAAI,gBAAgB;AAEpB,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;EAC3C,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;GAC/C,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;CAOH,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAEpD,QAAO,gBAAgB;;;;;;;;;;;;;AAcxB,SAAgB,iCACf,OACA,QACA,aAAa,KACJ;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAE1B,KAAI,SAAS,EACZ,QAAO;CAGR,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAGpD,KAAI,qBAAqB,WACxB,QAAO,2BAA2B,OAAO,OAAO;CAIjD,IAAI,gBAAgB;CACpB,IAAI,UAAU;AAGd,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;EACnE,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;GACvE,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB;AAGA,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;AAMH,QAAQ,gBAAgB,WAAY,oBAAoB;;;;;;;;;;;AAYzD,SAAgB,4BACf,OACA,SACsB;CACtB,MAAM,0BAAU,IAAI,KAAqB;AAEzC,MAAK,MAAM,UAAU,QACpB,SAAQ,IAAI,QAAQ,2BAA2B,OAAO,OAAO,CAAC;AAG/D,QAAO;;;;;;;;;;;;;;;;;;;;;;;;AC3HR,SAAgB,eAAe,eAA0C;AACxE,KAAI,cAAc,WAAW,EAC5B,QAAO;CAGR,IAAI,UAAU;AACd,MAAK,MAAM,KAAK,cAEf,KAAI,IAAI,EACP,YAAW,IAAI,KAAK,KAAK,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;AAgBR,SAAgB,kBAAkB,eAA0C;AAC3E,KAAI,cAAc,UAAU,EAC3B,QAAO;CAGR,MAAM,IAAI,eAAe,cAAc;CACvC,MAAM,OAAO,KAAK,KAAK,cAAc,OAAO;AAE5C,KAAI,SAAS,EACZ,QAAO;AAGR,QAAO,IAAI;;;;;;;;;;;;AAaZ,SAAgB,kBAAkB,QAAmC;AACpE,KAAI,OAAO,WAAW,EACrB,QAAO;CAGR,MAAM,QAAQ,OAAO,QAAQ,KAAK,MAAM,MAAM,GAAG,EAAE;AACnD,KAAI,UAAU,EACb,QAAO;AAIR,QAAO,eADe,OAAO,KAAK,MAAM,IAAI,MAAM,CACd;;;;;;;;;;;;AAarC,SAAgB,iBAAiB,gBAA2C;AAC3E,KAAI,eAAe,UAAU,EAC5B,QAAO;CAIR,MAAM,6BAAa,IAAI,KAAqB;AAC5C,MAAK,MAAM,KAAK,eACf,YAAW,IAAI,IAAI,WAAW,IAAI,EAAE,IAAI,KAAK,EAAE;AAIhD,KAAI,WAAW,SAAS,EACvB,QAAO;CAIR,MAAM,gBAA0B,EAAE;CAClC,MAAM,QAAQ,eAAe;AAC7B,MAAK,MAAM,SAAS,WAAW,QAAQ,CACtC,eAAc,KAAK,QAAQ,MAAM;AAGlC,QAAO,kBAAkB,cAAc;;;;;;;;;;;;ACtGxC,SAAgB,aACf,OACA,QACA,SACsB;CACtB,MAAM,aAAa,IAAI,IAAI,MAAM,WAAW,OAAO,CAAC;AACpD,KAAI,YAAY,KAAA,EACf,YAAW,OAAO,QAAQ;AAE3B,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,GACA,GAC0C;CAC1C,IAAI,eAAe;CAGnB,MAAM,CAAC,SAAS,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE;AAE3D,MAAK,MAAM,QAAQ,QAClB,KAAI,OAAO,IAAI,KAAK,CACnB;CAKF,MAAM,QAAQ,EAAE,OAAO,EAAE,OAAO;AAEhC,QAAO;EAAE;EAAc;EAAO;;;;;;;;;;;AAY/B,SAAgB,sBACf,GACA,GACsB;CACtB,MAAM,+BAAe,IAAI,KAAa;CAGtC,MAAM,CAAC,SAAS,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE;AAE3D,MAAK,MAAM,QAAQ,QAClB,KAAI,OAAO,IAAI,KAAK,CACnB,cAAa,IAAI,KAAK;AAIxB,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,OACA,MACS;CACT,IAAI,QAAQ;AACZ,MAAK,MAAM,QAAQ,MAAM,OAAO,CAC/B,KAAI,KAAK,SAAS,KACjB;AAGF,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,OACA,MACS;CACT,IAAI,QAAQ;AACZ,MAAK,MAAM,UAAU,MAAM,SAAS,CAEnC,KADa,MAAM,QAAQ,OAAO,EACxB,SAAS,KAClB;AAGF,QAAO"}
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @packageDocumentation
|
|
5
5
|
*/
|
|
6
|
-
export
|
|
7
|
-
export
|
|
8
|
-
export
|
|
6
|
+
export * from './clustering-coefficient';
|
|
7
|
+
export * from './kmeans';
|
|
8
|
+
export * from './entropy';
|
|
9
|
+
export * from './neighbours';
|
|
9
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,0BAA0B,CAAC;AACzC,cAAc,UAAU,CAAC;AACzB,cAAc,WAAW,CAAC;AAC1B,cAAc,cAAc,CAAC"}
|
package/dist/utils/index.js
CHANGED
|
@@ -1,3 +1,242 @@
|
|
|
1
|
-
import { n as normaliseFeatures, t as
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import { n as miniBatchKMeans, r as normaliseFeatures, t as _computeMean } from "../kmeans-87ExSUNZ.js";
|
|
2
|
+
//#region src/utils/clustering-coefficient.ts
|
|
3
|
+
/**
|
|
4
|
+
* Compute the local clustering coefficient for a single node.
|
|
5
|
+
*
|
|
6
|
+
* The clustering coefficient is defined as:
|
|
7
|
+
* CC(v) = (triangles through v) / (possible triangles)
|
|
8
|
+
* CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))
|
|
9
|
+
*
|
|
10
|
+
* For nodes with degree < 2, the clustering coefficient is 0.
|
|
11
|
+
*
|
|
12
|
+
* @param graph - The graph to compute on
|
|
13
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
14
|
+
* @returns The clustering coefficient in [0, 1], or 0 if undefined
|
|
15
|
+
*/
|
|
16
|
+
function localClusteringCoefficient(graph, nodeId) {
|
|
17
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
18
|
+
const degree = neighbours.length;
|
|
19
|
+
if (degree < 2) return 0;
|
|
20
|
+
let triangleCount = 0;
|
|
21
|
+
for (let i = 0; i < neighbours.length; i++) {
|
|
22
|
+
const u = neighbours[i];
|
|
23
|
+
if (u === void 0) continue;
|
|
24
|
+
for (let j = i + 1; j < neighbours.length; j++) {
|
|
25
|
+
const w = neighbours[j];
|
|
26
|
+
if (w === void 0) continue;
|
|
27
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
31
|
+
return triangleCount / possibleTriangles;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Compute approximate local clustering coefficient using sampling.
|
|
35
|
+
*
|
|
36
|
+
* For nodes with many neighbours, this samples neighbour pairs rather than
|
|
37
|
+
* checking all pairs. Useful for large graphs where exact computation is expensive.
|
|
38
|
+
*
|
|
39
|
+
* @param graph - The graph to compute on
|
|
40
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
41
|
+
* @param sampleSize - Maximum number of neighbour pairs to check (default: 100)
|
|
42
|
+
* @returns The approximate clustering coefficient in [0, 1]
|
|
43
|
+
*/
|
|
44
|
+
function approximateClusteringCoefficient(graph, nodeId, sampleSize = 100) {
|
|
45
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
46
|
+
const degree = neighbours.length;
|
|
47
|
+
if (degree < 2) return 0;
|
|
48
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
49
|
+
if (possibleTriangles <= sampleSize) return localClusteringCoefficient(graph, nodeId);
|
|
50
|
+
let triangleCount = 0;
|
|
51
|
+
let sampled = 0;
|
|
52
|
+
for (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {
|
|
53
|
+
const u = neighbours[i];
|
|
54
|
+
if (u === void 0) continue;
|
|
55
|
+
for (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {
|
|
56
|
+
const w = neighbours[j];
|
|
57
|
+
if (w === void 0) continue;
|
|
58
|
+
sampled++;
|
|
59
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return triangleCount / sampled * (possibleTriangles / possibleTriangles);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Compute clustering coefficients for multiple nodes efficiently.
|
|
66
|
+
*
|
|
67
|
+
* Reuses neighbour sets to avoid repeated iteration.
|
|
68
|
+
*
|
|
69
|
+
* @param graph - The graph to compute on
|
|
70
|
+
* @param nodeIds - The nodes to compute clustering coefficients for
|
|
71
|
+
* @returns Map from nodeId to clustering coefficient
|
|
72
|
+
*/
|
|
73
|
+
function batchClusteringCoefficients(graph, nodeIds) {
|
|
74
|
+
const results = /* @__PURE__ */ new Map();
|
|
75
|
+
for (const nodeId of nodeIds) results.set(nodeId, localClusteringCoefficient(graph, nodeId));
|
|
76
|
+
return results;
|
|
77
|
+
}
|
|
78
|
+
//#endregion
|
|
79
|
+
//#region src/utils/entropy.ts
|
|
80
|
+
/**
|
|
81
|
+
* Entropy computation utilities for graph analysis.
|
|
82
|
+
*
|
|
83
|
+
* Shannon entropy measures uncertainty or randomness in a distribution.
|
|
84
|
+
* Used in EDGE and HAE algorithms for heterogeneity-aware expansion.
|
|
85
|
+
*
|
|
86
|
+
* @packageDocumentation
|
|
87
|
+
*/
|
|
88
|
+
/**
|
|
89
|
+
* Compute Shannon entropy of a probability distribution.
|
|
90
|
+
*
|
|
91
|
+
* Shannon entropy is defined as:
|
|
92
|
+
* H(X) = -Σ p(x) × log₂(p(x))
|
|
93
|
+
*
|
|
94
|
+
* A uniform distribution has maximum entropy.
|
|
95
|
+
* A deterministic distribution (all probability on one value) has zero entropy.
|
|
96
|
+
*
|
|
97
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
98
|
+
* @returns Entropy in bits (log base 2), or 0 if probabilities are invalid
|
|
99
|
+
*/
|
|
100
|
+
function shannonEntropy(probabilities) {
|
|
101
|
+
if (probabilities.length === 0) return 0;
|
|
102
|
+
let entropy = 0;
|
|
103
|
+
for (const p of probabilities) if (p > 0) entropy -= p * Math.log2(p);
|
|
104
|
+
return entropy;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Compute normalised entropy (entropy divided by maximum possible entropy).
|
|
108
|
+
*
|
|
109
|
+
* Normalised entropy is in [0, 1], where:
|
|
110
|
+
* - 0 means the distribution is deterministic (all mass on one value)
|
|
111
|
+
* - 1 means the distribution is uniform (maximum uncertainty)
|
|
112
|
+
*
|
|
113
|
+
* This is useful for comparing entropy across distributions with different
|
|
114
|
+
* numbers of possible values.
|
|
115
|
+
*
|
|
116
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
117
|
+
* @returns Normalised entropy in [0, 1], or 0 if only one category
|
|
118
|
+
*/
|
|
119
|
+
function normalisedEntropy(probabilities) {
|
|
120
|
+
if (probabilities.length <= 1) return 0;
|
|
121
|
+
const H = shannonEntropy(probabilities);
|
|
122
|
+
const Hmax = Math.log2(probabilities.length);
|
|
123
|
+
if (Hmax === 0) return 0;
|
|
124
|
+
return H / Hmax;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Compute entropy from a frequency count.
|
|
128
|
+
*
|
|
129
|
+
* Converts counts to probabilities and then computes entropy.
|
|
130
|
+
* This is a convenience function when you have raw counts rather than
|
|
131
|
+
* normalised probabilities.
|
|
132
|
+
*
|
|
133
|
+
* @param counts - Array of frequency counts
|
|
134
|
+
* @returns Entropy in bits
|
|
135
|
+
*/
|
|
136
|
+
function entropyFromCounts(counts) {
|
|
137
|
+
if (counts.length === 0) return 0;
|
|
138
|
+
const total = counts.reduce((sum, c) => sum + c, 0);
|
|
139
|
+
if (total === 0) return 0;
|
|
140
|
+
return shannonEntropy(counts.map((c) => c / total));
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Compute local type entropy for a node's neighbours.
|
|
144
|
+
*
|
|
145
|
+
* This measures the diversity of types among a node's neighbours.
|
|
146
|
+
* High entropy = heterogeneous neighbourhood (diverse types).
|
|
147
|
+
* Low entropy = homogeneous neighbourhood (similar types).
|
|
148
|
+
*
|
|
149
|
+
* @param neighbourTypes - Array of type labels for neighbours
|
|
150
|
+
* @returns Normalised entropy in [0, 1]
|
|
151
|
+
*/
|
|
152
|
+
function localTypeEntropy(neighbourTypes) {
|
|
153
|
+
if (neighbourTypes.length <= 1) return 0;
|
|
154
|
+
const typeCounts = /* @__PURE__ */ new Map();
|
|
155
|
+
for (const t of neighbourTypes) typeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);
|
|
156
|
+
if (typeCounts.size === 1) return 0;
|
|
157
|
+
const probabilities = [];
|
|
158
|
+
const total = neighbourTypes.length;
|
|
159
|
+
for (const count of typeCounts.values()) probabilities.push(count / total);
|
|
160
|
+
return normalisedEntropy(probabilities);
|
|
161
|
+
}
|
|
162
|
+
//#endregion
|
|
163
|
+
//#region src/utils/neighbours.ts
|
|
164
|
+
/**
|
|
165
|
+
* Collect neighbours into a Set, optionally excluding a specific node.
|
|
166
|
+
*
|
|
167
|
+
* @param graph - The graph to traverse
|
|
168
|
+
* @param nodeId - The source node
|
|
169
|
+
* @param exclude - Optional node ID to exclude from result
|
|
170
|
+
* @returns A ReadonlySet of neighbouring node IDs
|
|
171
|
+
*/
|
|
172
|
+
function neighbourSet(graph, nodeId, exclude) {
|
|
173
|
+
const neighbours = new Set(graph.neighbours(nodeId));
|
|
174
|
+
if (exclude !== void 0) neighbours.delete(exclude);
|
|
175
|
+
return neighbours;
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Compute intersection and union sizes of two neighbour sets without allocating the union set.
|
|
179
|
+
*
|
|
180
|
+
* This is more efficient than computing both separately, as it avoids creating a full union Set.
|
|
181
|
+
*
|
|
182
|
+
* @param a - First neighbourhood set
|
|
183
|
+
* @param b - Second neighbourhood set
|
|
184
|
+
* @returns Object with intersection and union sizes
|
|
185
|
+
*/
|
|
186
|
+
function neighbourOverlap(a, b) {
|
|
187
|
+
let intersection = 0;
|
|
188
|
+
const [smaller, larger] = a.size < b.size ? [a, b] : [b, a];
|
|
189
|
+
for (const node of smaller) if (larger.has(node)) intersection++;
|
|
190
|
+
const union = a.size + b.size - intersection;
|
|
191
|
+
return {
|
|
192
|
+
intersection,
|
|
193
|
+
union
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Return the actual intersection set of two neighbourhood sets.
|
|
198
|
+
*
|
|
199
|
+
* Needed by Adamic-Adar (iterates common neighbours) and ETCH (requires edge types of intersection edges).
|
|
200
|
+
*
|
|
201
|
+
* @param a - First neighbourhood set
|
|
202
|
+
* @param b - Second neighbourhood set
|
|
203
|
+
* @returns A ReadonlySet containing nodes in both a and b
|
|
204
|
+
*/
|
|
205
|
+
function neighbourIntersection(a, b) {
|
|
206
|
+
const intersection = /* @__PURE__ */ new Set();
|
|
207
|
+
const [smaller, larger] = a.size < b.size ? [a, b] : [b, a];
|
|
208
|
+
for (const node of smaller) if (larger.has(node)) intersection.add(node);
|
|
209
|
+
return intersection;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Count the number of edges with a specific type in the graph.
|
|
213
|
+
*
|
|
214
|
+
* Used by ETCH MI variant to compute edge rarity weighting.
|
|
215
|
+
*
|
|
216
|
+
* @param graph - The graph to count edges in
|
|
217
|
+
* @param type - The edge type to count
|
|
218
|
+
* @returns The number of edges with the specified type
|
|
219
|
+
*/
|
|
220
|
+
function countEdgesOfType(graph, type) {
|
|
221
|
+
let count = 0;
|
|
222
|
+
for (const edge of graph.edges()) if (edge.type === type) count++;
|
|
223
|
+
return count;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Count the number of nodes with a specific type in the graph.
|
|
227
|
+
*
|
|
228
|
+
* Used by NOTCH MI variant to compute node rarity weighting.
|
|
229
|
+
*
|
|
230
|
+
* @param graph - The graph to count nodes in
|
|
231
|
+
* @param type - The node type to count
|
|
232
|
+
* @returns The number of nodes with the specified type
|
|
233
|
+
*/
|
|
234
|
+
function countNodesOfType(graph, type) {
|
|
235
|
+
let count = 0;
|
|
236
|
+
for (const nodeId of graph.nodeIds()) if (graph.getNode(nodeId)?.type === type) count++;
|
|
237
|
+
return count;
|
|
238
|
+
}
|
|
239
|
+
//#endregion
|
|
240
|
+
export { _computeMean, approximateClusteringCoefficient, batchClusteringCoefficients, countEdgesOfType, countNodesOfType, entropyFromCounts, localClusteringCoefficient, localTypeEntropy, miniBatchKMeans, neighbourIntersection, neighbourOverlap, neighbourSet, normaliseFeatures, normaliseFeatures as zScoreNormalise, normalisedEntropy, shannonEntropy };
|
|
241
|
+
|
|
242
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","names":[],"sources":["../../src/utils/clustering-coefficient.ts","../../src/utils/entropy.ts","../../src/utils/neighbours.ts"],"sourcesContent":["/**\n * Clustering coefficient computation for graph nodes.\n *\n * The local clustering coefficient measures how close a node's neighbours\n * are to being a complete graph (clique). It is used in SPAN MI variant\n * and GRASP seed selection.\n *\n * @packageDocumentation\n */\n\nimport type { ReadableGraph, NodeId } from \"../graph\";\n\n/**\n * Compute the local clustering coefficient for a single node.\n *\n * The clustering coefficient is defined as:\n * CC(v) = (triangles through v) / (possible triangles)\n * CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))\n *\n * For nodes with degree < 2, the clustering coefficient is 0.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @returns The clustering coefficient in [0, 1], or 0 if undefined\n */\nexport function localClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\t// Nodes with degree < 2 have no possible triangles\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\t// Count actual triangles: pairs of neighbours that are connected\n\tlet triangleCount = 0;\n\n\tfor (let i = 0; i < neighbours.length; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Possible triangles: deg * (deg - 1) / 2 pairs\n\t// We multiply by 2 because each triangle is counted once\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\treturn triangleCount / possibleTriangles;\n}\n\n/**\n * Compute approximate local clustering coefficient using sampling.\n *\n * For nodes with many neighbours, this samples neighbour pairs rather than\n * checking all pairs. Useful for large graphs where exact computation is expensive.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @param sampleSize - Maximum number of neighbour pairs to check (default: 100)\n * @returns The approximate clustering coefficient in [0, 1]\n */\nexport function approximateClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n\tsampleSize = 100,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\t// If all pairs can be checked within sample limit, use exact computation\n\tif (possibleTriangles <= sampleSize) {\n\t\treturn localClusteringCoefficient(graph, nodeId);\n\t}\n\n\t// Sample pairs uniformly\n\tlet triangleCount = 0;\n\tlet sampled = 0;\n\n\t// Use reservoir sampling style approach for pair selection\n\tfor (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Decide whether to include this pair based on remaining budget\n\t\t\tsampled++;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Extrapolate from sample\n\treturn (triangleCount / sampled) * (possibleTriangles / possibleTriangles);\n}\n\n/**\n * Compute clustering coefficients for multiple nodes efficiently.\n *\n * Reuses neighbour sets to avoid repeated iteration.\n *\n * @param graph - The graph to compute on\n * @param nodeIds - The nodes to compute clustering coefficients for\n * @returns Map from nodeId to clustering coefficient\n */\nexport function batchClusteringCoefficients(\n\tgraph: ReadableGraph,\n\tnodeIds: readonly NodeId[],\n): Map<NodeId, number> {\n\tconst results = new Map<NodeId, number>();\n\n\tfor (const nodeId of nodeIds) {\n\t\tresults.set(nodeId, localClusteringCoefficient(graph, nodeId));\n\t}\n\n\treturn results;\n}\n","/**\n * Entropy computation utilities for graph analysis.\n *\n * Shannon entropy measures uncertainty or randomness in a distribution.\n * Used in EDGE and HAE algorithms for heterogeneity-aware expansion.\n *\n * @packageDocumentation\n */\n\n/**\n * Compute Shannon entropy of a probability distribution.\n *\n * Shannon entropy is defined as:\n * H(X) = -Σ p(x) × log₂(p(x))\n *\n * A uniform distribution has maximum entropy.\n * A deterministic distribution (all probability on one value) has zero entropy.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Entropy in bits (log base 2), or 0 if probabilities are invalid\n */\nexport function shannonEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length === 0) {\n\t\treturn 0;\n\t}\n\n\tlet entropy = 0;\n\tfor (const p of probabilities) {\n\t\t// Skip zero probabilities (log(0) is undefined, but 0 * log(0) = 0)\n\t\tif (p > 0) {\n\t\t\tentropy -= p * Math.log2(p);\n\t\t}\n\t}\n\n\treturn entropy;\n}\n\n/**\n * Compute normalised entropy (entropy divided by maximum possible entropy).\n *\n * Normalised entropy is in [0, 1], where:\n * - 0 means the distribution is deterministic (all mass on one value)\n * - 1 means the distribution is uniform (maximum uncertainty)\n *\n * This is useful for comparing entropy across distributions with different\n * numbers of possible values.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Normalised entropy in [0, 1], or 0 if only one category\n */\nexport function normalisedEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length <= 1) {\n\t\treturn 0;\n\t}\n\n\tconst H = shannonEntropy(probabilities);\n\tconst Hmax = Math.log2(probabilities.length);\n\n\tif (Hmax === 0) {\n\t\treturn 0;\n\t}\n\n\treturn H / Hmax;\n}\n\n/**\n * Compute entropy from a frequency count.\n *\n * Converts counts to probabilities and then computes entropy.\n * This is a convenience function when you have raw counts rather than\n * normalised probabilities.\n *\n * @param counts - Array of frequency counts\n * @returns Entropy in bits\n */\nexport function entropyFromCounts(counts: readonly number[]): number {\n\tif (counts.length === 0) {\n\t\treturn 0;\n\t}\n\n\tconst total = counts.reduce((sum, c) => sum + c, 0);\n\tif (total === 0) {\n\t\treturn 0;\n\t}\n\n\tconst probabilities = counts.map((c) => c / total);\n\treturn shannonEntropy(probabilities);\n}\n\n/**\n * Compute local type entropy for a node's neighbours.\n *\n * This measures the diversity of types among a node's neighbours.\n * High entropy = heterogeneous neighbourhood (diverse types).\n * Low entropy = homogeneous neighbourhood (similar types).\n *\n * @param neighbourTypes - Array of type labels for neighbours\n * @returns Normalised entropy in [0, 1]\n */\nexport function localTypeEntropy(neighbourTypes: readonly string[]): number {\n\tif (neighbourTypes.length <= 1) {\n\t\treturn 0;\n\t}\n\n\t// Count occurrences of each type\n\tconst typeCounts = new Map<string, number>();\n\tfor (const t of neighbourTypes) {\n\t\ttypeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);\n\t}\n\n\t// If all neighbours are the same type, entropy is 0\n\tif (typeCounts.size === 1) {\n\t\treturn 0;\n\t}\n\n\t// Convert to probability array\n\tconst probabilities: number[] = [];\n\tconst total = neighbourTypes.length;\n\tfor (const count of typeCounts.values()) {\n\t\tprobabilities.push(count / total);\n\t}\n\n\treturn normalisedEntropy(probabilities);\n}\n","/**\n * Neighbourhood computation utilities.\n *\n * Shared utilities for neighbourhood operations used by MI variants and other graph algorithms.\n * These functions eliminate duplication of neighbourhood set operations across multiple\n * implementations.\n *\n * @packageDocumentation\n */\n\nimport type { NodeId, NodeData, EdgeData, ReadableGraph } from \"../graph\";\n\n/**\n * Collect neighbours into a Set, optionally excluding a specific node.\n *\n * @param graph - The graph to traverse\n * @param nodeId - The source node\n * @param exclude - Optional node ID to exclude from result\n * @returns A ReadonlySet of neighbouring node IDs\n */\nexport function neighbourSet<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\tnodeId: NodeId,\n\texclude?: NodeId,\n): ReadonlySet<NodeId> {\n\tconst neighbours = new Set(graph.neighbours(nodeId));\n\tif (exclude !== undefined) {\n\t\tneighbours.delete(exclude);\n\t}\n\treturn neighbours;\n}\n\n/**\n * Compute intersection and union sizes of two neighbour sets without allocating the union set.\n *\n * This is more efficient than computing both separately, as it avoids creating a full union Set.\n *\n * @param a - First neighbourhood set\n * @param b - Second neighbourhood set\n * @returns Object with intersection and union sizes\n */\nexport function neighbourOverlap(\n\ta: ReadonlySet<NodeId>,\n\tb: ReadonlySet<NodeId>,\n): { intersection: number; union: number } {\n\tlet intersection = 0;\n\n\t// Count intersection by iterating through the smaller set\n\tconst [smaller, larger] = a.size < b.size ? [a, b] : [b, a];\n\n\tfor (const node of smaller) {\n\t\tif (larger.has(node)) {\n\t\t\tintersection++;\n\t\t}\n\t}\n\n\t// Union size = size(a) + size(b) - intersection\n\tconst union = a.size + b.size - intersection;\n\n\treturn { intersection, union };\n}\n\n/**\n * Return the actual intersection set of two neighbourhood sets.\n *\n * Needed by Adamic-Adar (iterates common neighbours) and ETCH (requires edge types of intersection edges).\n *\n * @param a - First neighbourhood set\n * @param b - Second neighbourhood set\n * @returns A ReadonlySet containing nodes in both a and b\n */\nexport function neighbourIntersection(\n\ta: ReadonlySet<NodeId>,\n\tb: ReadonlySet<NodeId>,\n): ReadonlySet<NodeId> {\n\tconst intersection = new Set<NodeId>();\n\n\t// Iterate through the smaller set for efficiency\n\tconst [smaller, larger] = a.size < b.size ? [a, b] : [b, a];\n\n\tfor (const node of smaller) {\n\t\tif (larger.has(node)) {\n\t\t\tintersection.add(node);\n\t\t}\n\t}\n\n\treturn intersection;\n}\n\n/**\n * Count the number of edges with a specific type in the graph.\n *\n * Used by ETCH MI variant to compute edge rarity weighting.\n *\n * @param graph - The graph to count edges in\n * @param type - The edge type to count\n * @returns The number of edges with the specified type\n */\nexport function countEdgesOfType<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\ttype: string,\n): number {\n\tlet count = 0;\n\tfor (const edge of graph.edges()) {\n\t\tif (edge.type === type) {\n\t\t\tcount++;\n\t\t}\n\t}\n\treturn count;\n}\n\n/**\n * Count the number of nodes with a specific type in the graph.\n *\n * Used by NOTCH MI variant to compute node rarity weighting.\n *\n * @param graph - The graph to count nodes in\n * @param type - The node type to count\n * @returns The number of nodes with the specified type\n */\nexport function countNodesOfType<N extends NodeData, E extends EdgeData>(\n\tgraph: ReadableGraph<N, E>,\n\ttype: string,\n): number {\n\tlet count = 0;\n\tfor (const nodeId of graph.nodeIds()) {\n\t\tconst node = graph.getNode(nodeId);\n\t\tif (node?.type === type) {\n\t\t\tcount++;\n\t\t}\n\t}\n\treturn count;\n}\n"],"mappings":";;;;;;;;;;;;;;;AAyBA,SAAgB,2BACf,OACA,QACS;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAG1B,KAAI,SAAS,EACZ,QAAO;CAIR,IAAI,gBAAgB;AAEpB,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;EAC3C,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;GAC/C,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;CAOH,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAEpD,QAAO,gBAAgB;;;;;;;;;;;;;AAcxB,SAAgB,iCACf,OACA,QACA,aAAa,KACJ;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAE1B,KAAI,SAAS,EACZ,QAAO;CAGR,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAGpD,KAAI,qBAAqB,WACxB,QAAO,2BAA2B,OAAO,OAAO;CAIjD,IAAI,gBAAgB;CACpB,IAAI,UAAU;AAGd,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;EACnE,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;GACvE,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB;AAGA,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;AAMH,QAAQ,gBAAgB,WAAY,oBAAoB;;;;;;;;;;;AAYzD,SAAgB,4BACf,OACA,SACsB;CACtB,MAAM,0BAAU,IAAI,KAAqB;AAEzC,MAAK,MAAM,UAAU,QACpB,SAAQ,IAAI,QAAQ,2BAA2B,OAAO,OAAO,CAAC;AAG/D,QAAO;;;;;;;;;;;;;;;;;;;;;;;;AC3HR,SAAgB,eAAe,eAA0C;AACxE,KAAI,cAAc,WAAW,EAC5B,QAAO;CAGR,IAAI,UAAU;AACd,MAAK,MAAM,KAAK,cAEf,KAAI,IAAI,EACP,YAAW,IAAI,KAAK,KAAK,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;AAgBR,SAAgB,kBAAkB,eAA0C;AAC3E,KAAI,cAAc,UAAU,EAC3B,QAAO;CAGR,MAAM,IAAI,eAAe,cAAc;CACvC,MAAM,OAAO,KAAK,KAAK,cAAc,OAAO;AAE5C,KAAI,SAAS,EACZ,QAAO;AAGR,QAAO,IAAI;;;;;;;;;;;;AAaZ,SAAgB,kBAAkB,QAAmC;AACpE,KAAI,OAAO,WAAW,EACrB,QAAO;CAGR,MAAM,QAAQ,OAAO,QAAQ,KAAK,MAAM,MAAM,GAAG,EAAE;AACnD,KAAI,UAAU,EACb,QAAO;AAIR,QAAO,eADe,OAAO,KAAK,MAAM,IAAI,MAAM,CACd;;;;;;;;;;;;AAarC,SAAgB,iBAAiB,gBAA2C;AAC3E,KAAI,eAAe,UAAU,EAC5B,QAAO;CAIR,MAAM,6BAAa,IAAI,KAAqB;AAC5C,MAAK,MAAM,KAAK,eACf,YAAW,IAAI,IAAI,WAAW,IAAI,EAAE,IAAI,KAAK,EAAE;AAIhD,KAAI,WAAW,SAAS,EACvB,QAAO;CAIR,MAAM,gBAA0B,EAAE;CAClC,MAAM,QAAQ,eAAe;AAC7B,MAAK,MAAM,SAAS,WAAW,QAAQ,CACtC,eAAc,KAAK,QAAQ,MAAM;AAGlC,QAAO,kBAAkB,cAAc;;;;;;;;;;;;ACtGxC,SAAgB,aACf,OACA,QACA,SACsB;CACtB,MAAM,aAAa,IAAI,IAAI,MAAM,WAAW,OAAO,CAAC;AACpD,KAAI,YAAY,KAAA,EACf,YAAW,OAAO,QAAQ;AAE3B,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,GACA,GAC0C;CAC1C,IAAI,eAAe;CAGnB,MAAM,CAAC,SAAS,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE;AAE3D,MAAK,MAAM,QAAQ,QAClB,KAAI,OAAO,IAAI,KAAK,CACnB;CAKF,MAAM,QAAQ,EAAE,OAAO,EAAE,OAAO;AAEhC,QAAO;EAAE;EAAc;EAAO;;;;;;;;;;;AAY/B,SAAgB,sBACf,GACA,GACsB;CACtB,MAAM,+BAAe,IAAI,KAAa;CAGtC,MAAM,CAAC,SAAS,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE;AAE3D,MAAK,MAAM,QAAQ,QAClB,KAAI,OAAO,IAAI,KAAK,CACnB,cAAa,IAAI,KAAK;AAIxB,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,OACA,MACS;CACT,IAAI,QAAQ;AACZ,MAAK,MAAM,QAAQ,MAAM,OAAO,CAC/B,KAAI,KAAK,SAAS,KACjB;AAGF,QAAO;;;;;;;;;;;AAYR,SAAgB,iBACf,OACA,MACS;CACT,IAAI,QAAQ;AACZ,MAAK,MAAM,UAAU,MAAM,SAAS,CAEnC,KADa,MAAM,QAAQ,OAAO,EACxB,SAAS,KAClB;AAGF,QAAO"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { NodeId, NodeData, EdgeData, ReadableGraph } from '../graph';
|
|
2
|
+
/**
|
|
3
|
+
* Collect neighbours into a Set, optionally excluding a specific node.
|
|
4
|
+
*
|
|
5
|
+
* @param graph - The graph to traverse
|
|
6
|
+
* @param nodeId - The source node
|
|
7
|
+
* @param exclude - Optional node ID to exclude from result
|
|
8
|
+
* @returns A ReadonlySet of neighbouring node IDs
|
|
9
|
+
*/
|
|
10
|
+
export declare function neighbourSet<N extends NodeData, E extends EdgeData>(graph: ReadableGraph<N, E>, nodeId: NodeId, exclude?: NodeId): ReadonlySet<NodeId>;
|
|
11
|
+
/**
|
|
12
|
+
* Compute intersection and union sizes of two neighbour sets without allocating the union set.
|
|
13
|
+
*
|
|
14
|
+
* This is more efficient than computing both separately, as it avoids creating a full union Set.
|
|
15
|
+
*
|
|
16
|
+
* @param a - First neighbourhood set
|
|
17
|
+
* @param b - Second neighbourhood set
|
|
18
|
+
* @returns Object with intersection and union sizes
|
|
19
|
+
*/
|
|
20
|
+
export declare function neighbourOverlap(a: ReadonlySet<NodeId>, b: ReadonlySet<NodeId>): {
|
|
21
|
+
intersection: number;
|
|
22
|
+
union: number;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Return the actual intersection set of two neighbourhood sets.
|
|
26
|
+
*
|
|
27
|
+
* Needed by Adamic-Adar (iterates common neighbours) and ETCH (requires edge types of intersection edges).
|
|
28
|
+
*
|
|
29
|
+
* @param a - First neighbourhood set
|
|
30
|
+
* @param b - Second neighbourhood set
|
|
31
|
+
* @returns A ReadonlySet containing nodes in both a and b
|
|
32
|
+
*/
|
|
33
|
+
export declare function neighbourIntersection(a: ReadonlySet<NodeId>, b: ReadonlySet<NodeId>): ReadonlySet<NodeId>;
|
|
34
|
+
/**
|
|
35
|
+
* Count the number of edges with a specific type in the graph.
|
|
36
|
+
*
|
|
37
|
+
* Used by ETCH MI variant to compute edge rarity weighting.
|
|
38
|
+
*
|
|
39
|
+
* @param graph - The graph to count edges in
|
|
40
|
+
* @param type - The edge type to count
|
|
41
|
+
* @returns The number of edges with the specified type
|
|
42
|
+
*/
|
|
43
|
+
export declare function countEdgesOfType<N extends NodeData, E extends EdgeData>(graph: ReadableGraph<N, E>, type: string): number;
|
|
44
|
+
/**
|
|
45
|
+
* Count the number of nodes with a specific type in the graph.
|
|
46
|
+
*
|
|
47
|
+
* Used by NOTCH MI variant to compute node rarity weighting.
|
|
48
|
+
*
|
|
49
|
+
* @param graph - The graph to count nodes in
|
|
50
|
+
* @param type - The node type to count
|
|
51
|
+
* @returns The number of nodes with the specified type
|
|
52
|
+
*/
|
|
53
|
+
export declare function countNodesOfType<N extends NodeData, E extends EdgeData>(graph: ReadableGraph<N, E>, type: string): number;
|
|
54
|
+
//# sourceMappingURL=neighbours.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"neighbours.d.ts","sourceRoot":"","sources":["../../src/utils/neighbours.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE1E;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS,QAAQ,EAAE,CAAC,SAAS,QAAQ,EAClE,KAAK,EAAE,aAAa,CAAC,CAAC,EAAE,CAAC,CAAC,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,MAAM,GACd,WAAW,CAAC,MAAM,CAAC,CAMrB;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAC/B,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,EACtB,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,GACpB;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAgBzC;AAED;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CACpC,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,EACtB,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,GACpB,WAAW,CAAC,MAAM,CAAC,CAarB;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,SAAS,QAAQ,EAAE,CAAC,SAAS,QAAQ,EACtE,KAAK,EAAE,aAAa,CAAC,CAAC,EAAE,CAAC,CAAC,EAC1B,IAAI,EAAE,MAAM,GACV,MAAM,CAQR;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,SAAS,QAAQ,EAAE,CAAC,SAAS,QAAQ,EACtE,KAAK,EAAE,aAAa,CAAC,CAAC,EAAE,CAAC,CAAC,EAC1B,IAAI,EAAE,MAAM,GACV,MAAM,CASR"}
|