graphwise 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/expansion/frontier-balanced.d.ts +12 -0
- package/dist/expansion/frontier-balanced.d.ts.map +1 -0
- package/dist/expansion/frontier-balanced.unit.test.d.ts +2 -0
- package/dist/expansion/frontier-balanced.unit.test.d.ts.map +1 -0
- package/dist/expansion/index.d.ts +12 -13
- package/dist/expansion/index.d.ts.map +1 -1
- package/dist/expansion/random-priority.d.ts +20 -0
- package/dist/expansion/random-priority.d.ts.map +1 -0
- package/dist/expansion/random-priority.unit.test.d.ts +2 -0
- package/dist/expansion/random-priority.unit.test.d.ts.map +1 -0
- package/dist/expansion/standard-bfs.d.ts +12 -0
- package/dist/expansion/standard-bfs.d.ts.map +1 -0
- package/dist/expansion/standard-bfs.unit.test.d.ts +2 -0
- package/dist/expansion/standard-bfs.unit.test.d.ts.map +1 -0
- package/dist/extraction/index.d.ts +6 -6
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/motif.d.ts.map +1 -1
- package/dist/gpu/context.d.ts.map +1 -1
- package/dist/gpu/csr.d.ts.map +1 -1
- package/dist/gpu/index.cjs +410 -5
- package/dist/gpu/index.cjs.map +1 -0
- package/dist/gpu/index.d.ts +4 -5
- package/dist/gpu/index.d.ts.map +1 -1
- package/dist/gpu/index.js +400 -2
- package/dist/gpu/index.js.map +1 -0
- package/dist/graph/index.cjs +222 -2
- package/dist/graph/index.cjs.map +1 -0
- package/dist/graph/index.d.ts +3 -3
- package/dist/graph/index.d.ts.map +1 -1
- package/dist/graph/index.js +221 -1
- package/dist/graph/index.js.map +1 -0
- package/dist/index/index.cjs +902 -10
- package/dist/index/index.cjs.map +1 -1
- package/dist/index/index.js +880 -10
- package/dist/index/index.js.map +1 -1
- package/dist/{kmeans-B0HEOU6k.cjs → kmeans-87ExSUNZ.js} +27 -13
- package/dist/{kmeans-DgbsOznU.js.map → kmeans-87ExSUNZ.js.map} +1 -1
- package/dist/{kmeans-DgbsOznU.js → kmeans-BIgSyGKu.cjs} +44 -2
- package/dist/{kmeans-B0HEOU6k.cjs.map → kmeans-BIgSyGKu.cjs.map} +1 -1
- package/dist/ranking/baselines/betweenness.d.ts +13 -0
- package/dist/ranking/baselines/betweenness.d.ts.map +1 -0
- package/dist/ranking/baselines/betweenness.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/betweenness.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/communicability.d.ts +13 -0
- package/dist/ranking/baselines/communicability.d.ts.map +1 -0
- package/dist/ranking/baselines/communicability.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/communicability.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/degree-sum.d.ts +13 -0
- package/dist/ranking/baselines/degree-sum.d.ts.map +1 -0
- package/dist/ranking/baselines/degree-sum.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/degree-sum.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/index.d.ts +20 -0
- package/dist/ranking/baselines/index.d.ts.map +1 -0
- package/dist/ranking/baselines/jaccard-arithmetic.d.ts +13 -0
- package/dist/ranking/baselines/jaccard-arithmetic.d.ts.map +1 -0
- package/dist/ranking/baselines/jaccard-arithmetic.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/jaccard-arithmetic.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/katz.d.ts +13 -0
- package/dist/ranking/baselines/katz.d.ts.map +1 -0
- package/dist/ranking/baselines/katz.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/katz.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/pagerank.d.ts +13 -0
- package/dist/ranking/baselines/pagerank.d.ts.map +1 -0
- package/dist/ranking/baselines/pagerank.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/pagerank.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/random-ranking.d.ts +21 -0
- package/dist/ranking/baselines/random-ranking.d.ts.map +1 -0
- package/dist/ranking/baselines/random-ranking.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/random-ranking.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/resistance-distance.d.ts +13 -0
- package/dist/ranking/baselines/resistance-distance.d.ts.map +1 -0
- package/dist/ranking/baselines/resistance-distance.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/resistance-distance.unit.test.d.ts.map +1 -0
- package/dist/ranking/baselines/widest-path.d.ts +13 -0
- package/dist/ranking/baselines/widest-path.d.ts.map +1 -0
- package/dist/ranking/baselines/widest-path.unit.test.d.ts +2 -0
- package/dist/ranking/baselines/widest-path.unit.test.d.ts.map +1 -0
- package/dist/ranking/index.d.ts +3 -6
- package/dist/ranking/index.d.ts.map +1 -1
- package/dist/ranking/mi/index.d.ts +9 -9
- package/dist/ranking/mi/index.d.ts.map +1 -1
- package/dist/schemas/index.d.ts +2 -2
- package/dist/schemas/index.d.ts.map +1 -1
- package/dist/seeds/index.cjs +398 -3
- package/dist/seeds/index.cjs.map +1 -0
- package/dist/seeds/index.d.ts +2 -4
- package/dist/seeds/index.d.ts.map +1 -1
- package/dist/seeds/index.js +396 -1
- package/dist/seeds/index.js.map +1 -0
- package/dist/seeds/stratified.d.ts.map +1 -1
- package/dist/structures/index.cjs +133 -2
- package/dist/structures/index.cjs.map +1 -0
- package/dist/structures/index.d.ts +1 -2
- package/dist/structures/index.d.ts.map +1 -1
- package/dist/structures/index.js +132 -1
- package/dist/structures/index.js.map +1 -0
- package/dist/traversal/index.cjs +152 -5
- package/dist/traversal/index.cjs.map +1 -0
- package/dist/traversal/index.d.ts +2 -2
- package/dist/traversal/index.d.ts.map +1 -1
- package/dist/traversal/index.js +148 -1
- package/dist/traversal/index.js.map +1 -0
- package/dist/utils/index.cjs +172 -9
- package/dist/utils/index.cjs.map +1 -0
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +165 -3
- package/dist/utils/index.js.map +1 -0
- package/package.json +1 -1
- package/dist/gpu-BJRVYBjx.cjs +0 -338
- package/dist/gpu-BJRVYBjx.cjs.map +0 -1
- package/dist/gpu-BveuXugy.js +0 -315
- package/dist/gpu-BveuXugy.js.map +0 -1
- package/dist/graph-DLWiziLB.js +0 -222
- package/dist/graph-DLWiziLB.js.map +0 -1
- package/dist/graph-az06J1YV.cjs +0 -227
- package/dist/graph-az06J1YV.cjs.map +0 -1
- package/dist/seeds-B6J9oJfU.cjs +0 -404
- package/dist/seeds-B6J9oJfU.cjs.map +0 -1
- package/dist/seeds-UNZxqm_U.js +0 -393
- package/dist/seeds-UNZxqm_U.js.map +0 -1
- package/dist/structures-BPfhfqNP.js +0 -133
- package/dist/structures-BPfhfqNP.js.map +0 -1
- package/dist/structures-CJ_S_7fs.cjs +0 -138
- package/dist/structures-CJ_S_7fs.cjs.map +0 -1
- package/dist/traversal-CQCjUwUJ.js +0 -149
- package/dist/traversal-CQCjUwUJ.js.map +0 -1
- package/dist/traversal-QeHaNUWn.cjs +0 -172
- package/dist/traversal-QeHaNUWn.cjs.map +0 -1
- package/dist/utils-Q_akvlMn.js +0 -164
- package/dist/utils-Q_akvlMn.js.map +0 -1
- package/dist/utils-spZa1ZvS.cjs +0 -205
- package/dist/utils-spZa1ZvS.cjs.map +0 -1
package/dist/utils/index.cjs
CHANGED
|
@@ -1,13 +1,176 @@
|
|
|
1
1
|
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
-
const require_kmeans = require("../kmeans-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
2
|
+
const require_kmeans = require("../kmeans-BIgSyGKu.cjs");
|
|
3
|
+
//#region src/utils/clustering-coefficient.ts
|
|
4
|
+
/**
|
|
5
|
+
* Compute the local clustering coefficient for a single node.
|
|
6
|
+
*
|
|
7
|
+
* The clustering coefficient is defined as:
|
|
8
|
+
* CC(v) = (triangles through v) / (possible triangles)
|
|
9
|
+
* CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))
|
|
10
|
+
*
|
|
11
|
+
* For nodes with degree < 2, the clustering coefficient is 0.
|
|
12
|
+
*
|
|
13
|
+
* @param graph - The graph to compute on
|
|
14
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
15
|
+
* @returns The clustering coefficient in [0, 1], or 0 if undefined
|
|
16
|
+
*/
|
|
17
|
+
function localClusteringCoefficient(graph, nodeId) {
|
|
18
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
19
|
+
const degree = neighbours.length;
|
|
20
|
+
if (degree < 2) return 0;
|
|
21
|
+
let triangleCount = 0;
|
|
22
|
+
for (let i = 0; i < neighbours.length; i++) {
|
|
23
|
+
const u = neighbours[i];
|
|
24
|
+
if (u === void 0) continue;
|
|
25
|
+
for (let j = i + 1; j < neighbours.length; j++) {
|
|
26
|
+
const w = neighbours[j];
|
|
27
|
+
if (w === void 0) continue;
|
|
28
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
32
|
+
return triangleCount / possibleTriangles;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Compute approximate local clustering coefficient using sampling.
|
|
36
|
+
*
|
|
37
|
+
* For nodes with many neighbours, this samples neighbour pairs rather than
|
|
38
|
+
* checking all pairs. Useful for large graphs where exact computation is expensive.
|
|
39
|
+
*
|
|
40
|
+
* @param graph - The graph to compute on
|
|
41
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
42
|
+
* @param sampleSize - Maximum number of neighbour pairs to check (default: 100)
|
|
43
|
+
* @returns The approximate clustering coefficient in [0, 1]
|
|
44
|
+
*/
|
|
45
|
+
function approximateClusteringCoefficient(graph, nodeId, sampleSize = 100) {
|
|
46
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
47
|
+
const degree = neighbours.length;
|
|
48
|
+
if (degree < 2) return 0;
|
|
49
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
50
|
+
if (possibleTriangles <= sampleSize) return localClusteringCoefficient(graph, nodeId);
|
|
51
|
+
let triangleCount = 0;
|
|
52
|
+
let sampled = 0;
|
|
53
|
+
for (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {
|
|
54
|
+
const u = neighbours[i];
|
|
55
|
+
if (u === void 0) continue;
|
|
56
|
+
for (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {
|
|
57
|
+
const w = neighbours[j];
|
|
58
|
+
if (w === void 0) continue;
|
|
59
|
+
sampled++;
|
|
60
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return triangleCount / sampled * (possibleTriangles / possibleTriangles);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Compute clustering coefficients for multiple nodes efficiently.
|
|
67
|
+
*
|
|
68
|
+
* Reuses neighbour sets to avoid repeated iteration.
|
|
69
|
+
*
|
|
70
|
+
* @param graph - The graph to compute on
|
|
71
|
+
* @param nodeIds - The nodes to compute clustering coefficients for
|
|
72
|
+
* @returns Map from nodeId to clustering coefficient
|
|
73
|
+
*/
|
|
74
|
+
function batchClusteringCoefficients(graph, nodeIds) {
|
|
75
|
+
const results = /* @__PURE__ */ new Map();
|
|
76
|
+
for (const nodeId of nodeIds) results.set(nodeId, localClusteringCoefficient(graph, nodeId));
|
|
77
|
+
return results;
|
|
78
|
+
}
|
|
79
|
+
//#endregion
|
|
80
|
+
//#region src/utils/entropy.ts
|
|
81
|
+
/**
|
|
82
|
+
* Entropy computation utilities for graph analysis.
|
|
83
|
+
*
|
|
84
|
+
* Shannon entropy measures uncertainty or randomness in a distribution.
|
|
85
|
+
* Used in EDGE and HAE algorithms for heterogeneity-aware expansion.
|
|
86
|
+
*
|
|
87
|
+
* @packageDocumentation
|
|
88
|
+
*/
|
|
89
|
+
/**
|
|
90
|
+
* Compute Shannon entropy of a probability distribution.
|
|
91
|
+
*
|
|
92
|
+
* Shannon entropy is defined as:
|
|
93
|
+
* H(X) = -Σ p(x) × log₂(p(x))
|
|
94
|
+
*
|
|
95
|
+
* A uniform distribution has maximum entropy.
|
|
96
|
+
* A deterministic distribution (all probability on one value) has zero entropy.
|
|
97
|
+
*
|
|
98
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
99
|
+
* @returns Entropy in bits (log base 2), or 0 if probabilities are invalid
|
|
100
|
+
*/
|
|
101
|
+
function shannonEntropy(probabilities) {
|
|
102
|
+
if (probabilities.length === 0) return 0;
|
|
103
|
+
let entropy = 0;
|
|
104
|
+
for (const p of probabilities) if (p > 0) entropy -= p * Math.log2(p);
|
|
105
|
+
return entropy;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Compute normalised entropy (entropy divided by maximum possible entropy).
|
|
109
|
+
*
|
|
110
|
+
* Normalised entropy is in [0, 1], where:
|
|
111
|
+
* - 0 means the distribution is deterministic (all mass on one value)
|
|
112
|
+
* - 1 means the distribution is uniform (maximum uncertainty)
|
|
113
|
+
*
|
|
114
|
+
* This is useful for comparing entropy across distributions with different
|
|
115
|
+
* numbers of possible values.
|
|
116
|
+
*
|
|
117
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
118
|
+
* @returns Normalised entropy in [0, 1], or 0 if only one category
|
|
119
|
+
*/
|
|
120
|
+
function normalisedEntropy(probabilities) {
|
|
121
|
+
if (probabilities.length <= 1) return 0;
|
|
122
|
+
const H = shannonEntropy(probabilities);
|
|
123
|
+
const Hmax = Math.log2(probabilities.length);
|
|
124
|
+
if (Hmax === 0) return 0;
|
|
125
|
+
return H / Hmax;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Compute entropy from a frequency count.
|
|
129
|
+
*
|
|
130
|
+
* Converts counts to probabilities and then computes entropy.
|
|
131
|
+
* This is a convenience function when you have raw counts rather than
|
|
132
|
+
* normalised probabilities.
|
|
133
|
+
*
|
|
134
|
+
* @param counts - Array of frequency counts
|
|
135
|
+
* @returns Entropy in bits
|
|
136
|
+
*/
|
|
137
|
+
function entropyFromCounts(counts) {
|
|
138
|
+
if (counts.length === 0) return 0;
|
|
139
|
+
const total = counts.reduce((sum, c) => sum + c, 0);
|
|
140
|
+
if (total === 0) return 0;
|
|
141
|
+
return shannonEntropy(counts.map((c) => c / total));
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Compute local type entropy for a node's neighbours.
|
|
145
|
+
*
|
|
146
|
+
* This measures the diversity of types among a node's neighbours.
|
|
147
|
+
* High entropy = heterogeneous neighbourhood (diverse types).
|
|
148
|
+
* Low entropy = homogeneous neighbourhood (similar types).
|
|
149
|
+
*
|
|
150
|
+
* @param neighbourTypes - Array of type labels for neighbours
|
|
151
|
+
* @returns Normalised entropy in [0, 1]
|
|
152
|
+
*/
|
|
153
|
+
function localTypeEntropy(neighbourTypes) {
|
|
154
|
+
if (neighbourTypes.length <= 1) return 0;
|
|
155
|
+
const typeCounts = /* @__PURE__ */ new Map();
|
|
156
|
+
for (const t of neighbourTypes) typeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);
|
|
157
|
+
if (typeCounts.size === 1) return 0;
|
|
158
|
+
const probabilities = [];
|
|
159
|
+
const total = neighbourTypes.length;
|
|
160
|
+
for (const count of typeCounts.values()) probabilities.push(count / total);
|
|
161
|
+
return normalisedEntropy(probabilities);
|
|
162
|
+
}
|
|
163
|
+
//#endregion
|
|
164
|
+
exports._computeMean = require_kmeans._computeMean;
|
|
165
|
+
exports.approximateClusteringCoefficient = approximateClusteringCoefficient;
|
|
166
|
+
exports.batchClusteringCoefficients = batchClusteringCoefficients;
|
|
167
|
+
exports.entropyFromCounts = entropyFromCounts;
|
|
168
|
+
exports.localClusteringCoefficient = localClusteringCoefficient;
|
|
169
|
+
exports.localTypeEntropy = localTypeEntropy;
|
|
9
170
|
exports.miniBatchKMeans = require_kmeans.miniBatchKMeans;
|
|
10
171
|
exports.normaliseFeatures = require_kmeans.normaliseFeatures;
|
|
11
|
-
exports.normalisedEntropy = require_utils.normalisedEntropy;
|
|
12
|
-
exports.shannonEntropy = require_utils.shannonEntropy;
|
|
13
172
|
exports.zScoreNormalise = require_kmeans.normaliseFeatures;
|
|
173
|
+
exports.normalisedEntropy = normalisedEntropy;
|
|
174
|
+
exports.shannonEntropy = shannonEntropy;
|
|
175
|
+
|
|
176
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","names":[],"sources":["../../src/utils/clustering-coefficient.ts","../../src/utils/entropy.ts"],"sourcesContent":["/**\n * Clustering coefficient computation for graph nodes.\n *\n * The local clustering coefficient measures how close a node's neighbours\n * are to being a complete graph (clique). It is used in SPAN MI variant\n * and GRASP seed selection.\n *\n * @packageDocumentation\n */\n\nimport type { ReadableGraph, NodeId } from \"../graph\";\n\n/**\n * Compute the local clustering coefficient for a single node.\n *\n * The clustering coefficient is defined as:\n * CC(v) = (triangles through v) / (possible triangles)\n * CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))\n *\n * For nodes with degree < 2, the clustering coefficient is 0.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @returns The clustering coefficient in [0, 1], or 0 if undefined\n */\nexport function localClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\t// Nodes with degree < 2 have no possible triangles\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\t// Count actual triangles: pairs of neighbours that are connected\n\tlet triangleCount = 0;\n\n\tfor (let i = 0; i < neighbours.length; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Possible triangles: deg * (deg - 1) / 2 pairs\n\t// We multiply by 2 because each triangle is counted once\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\treturn triangleCount / possibleTriangles;\n}\n\n/**\n * Compute approximate local clustering coefficient using sampling.\n *\n * For nodes with many neighbours, this samples neighbour pairs rather than\n * checking all pairs. Useful for large graphs where exact computation is expensive.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @param sampleSize - Maximum number of neighbour pairs to check (default: 100)\n * @returns The approximate clustering coefficient in [0, 1]\n */\nexport function approximateClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n\tsampleSize = 100,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\t// If all pairs can be checked within sample limit, use exact computation\n\tif (possibleTriangles <= sampleSize) {\n\t\treturn localClusteringCoefficient(graph, nodeId);\n\t}\n\n\t// Sample pairs uniformly\n\tlet triangleCount = 0;\n\tlet sampled = 0;\n\n\t// Use reservoir sampling style approach for pair selection\n\tfor (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Decide whether to include this pair based on remaining budget\n\t\t\tsampled++;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Extrapolate from sample\n\treturn (triangleCount / sampled) * (possibleTriangles / possibleTriangles);\n}\n\n/**\n * Compute clustering coefficients for multiple nodes efficiently.\n *\n * Reuses neighbour sets to avoid repeated iteration.\n *\n * @param graph - The graph to compute on\n * @param nodeIds - The nodes to compute clustering coefficients for\n * @returns Map from nodeId to clustering coefficient\n */\nexport function batchClusteringCoefficients(\n\tgraph: ReadableGraph,\n\tnodeIds: readonly NodeId[],\n): Map<NodeId, number> {\n\tconst results = new Map<NodeId, number>();\n\n\tfor (const nodeId of nodeIds) {\n\t\tresults.set(nodeId, localClusteringCoefficient(graph, nodeId));\n\t}\n\n\treturn results;\n}\n","/**\n * Entropy computation utilities for graph analysis.\n *\n * Shannon entropy measures uncertainty or randomness in a distribution.\n * Used in EDGE and HAE algorithms for heterogeneity-aware expansion.\n *\n * @packageDocumentation\n */\n\n/**\n * Compute Shannon entropy of a probability distribution.\n *\n * Shannon entropy is defined as:\n * H(X) = -Σ p(x) × log₂(p(x))\n *\n * A uniform distribution has maximum entropy.\n * A deterministic distribution (all probability on one value) has zero entropy.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Entropy in bits (log base 2), or 0 if probabilities are invalid\n */\nexport function shannonEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length === 0) {\n\t\treturn 0;\n\t}\n\n\tlet entropy = 0;\n\tfor (const p of probabilities) {\n\t\t// Skip zero probabilities (log(0) is undefined, but 0 * log(0) = 0)\n\t\tif (p > 0) {\n\t\t\tentropy -= p * Math.log2(p);\n\t\t}\n\t}\n\n\treturn entropy;\n}\n\n/**\n * Compute normalised entropy (entropy divided by maximum possible entropy).\n *\n * Normalised entropy is in [0, 1], where:\n * - 0 means the distribution is deterministic (all mass on one value)\n * - 1 means the distribution is uniform (maximum uncertainty)\n *\n * This is useful for comparing entropy across distributions with different\n * numbers of possible values.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Normalised entropy in [0, 1], or 0 if only one category\n */\nexport function normalisedEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length <= 1) {\n\t\treturn 0;\n\t}\n\n\tconst H = shannonEntropy(probabilities);\n\tconst Hmax = Math.log2(probabilities.length);\n\n\tif (Hmax === 0) {\n\t\treturn 0;\n\t}\n\n\treturn H / Hmax;\n}\n\n/**\n * Compute entropy from a frequency count.\n *\n * Converts counts to probabilities and then computes entropy.\n * This is a convenience function when you have raw counts rather than\n * normalised probabilities.\n *\n * @param counts - Array of frequency counts\n * @returns Entropy in bits\n */\nexport function entropyFromCounts(counts: readonly number[]): number {\n\tif (counts.length === 0) {\n\t\treturn 0;\n\t}\n\n\tconst total = counts.reduce((sum, c) => sum + c, 0);\n\tif (total === 0) {\n\t\treturn 0;\n\t}\n\n\tconst probabilities = counts.map((c) => c / total);\n\treturn shannonEntropy(probabilities);\n}\n\n/**\n * Compute local type entropy for a node's neighbours.\n *\n * This measures the diversity of types among a node's neighbours.\n * High entropy = heterogeneous neighbourhood (diverse types).\n * Low entropy = homogeneous neighbourhood (similar types).\n *\n * @param neighbourTypes - Array of type labels for neighbours\n * @returns Normalised entropy in [0, 1]\n */\nexport function localTypeEntropy(neighbourTypes: readonly string[]): number {\n\tif (neighbourTypes.length <= 1) {\n\t\treturn 0;\n\t}\n\n\t// Count occurrences of each type\n\tconst typeCounts = new Map<string, number>();\n\tfor (const t of neighbourTypes) {\n\t\ttypeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);\n\t}\n\n\t// If all neighbours are the same type, entropy is 0\n\tif (typeCounts.size === 1) {\n\t\treturn 0;\n\t}\n\n\t// Convert to probability array\n\tconst probabilities: number[] = [];\n\tconst total = neighbourTypes.length;\n\tfor (const count of typeCounts.values()) {\n\t\tprobabilities.push(count / total);\n\t}\n\n\treturn normalisedEntropy(probabilities);\n}\n"],"mappings":";;;;;;;;;;;;;;;;AAyBA,SAAgB,2BACf,OACA,QACS;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAG1B,KAAI,SAAS,EACZ,QAAO;CAIR,IAAI,gBAAgB;AAEpB,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;EAC3C,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;GAC/C,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;CAOH,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAEpD,QAAO,gBAAgB;;;;;;;;;;;;;AAcxB,SAAgB,iCACf,OACA,QACA,aAAa,KACJ;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAE1B,KAAI,SAAS,EACZ,QAAO;CAGR,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAGpD,KAAI,qBAAqB,WACxB,QAAO,2BAA2B,OAAO,OAAO;CAIjD,IAAI,gBAAgB;CACpB,IAAI,UAAU;AAGd,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;EACnE,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;GACvE,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB;AAGA,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;AAMH,QAAQ,gBAAgB,WAAY,oBAAoB;;;;;;;;;;;AAYzD,SAAgB,4BACf,OACA,SACsB;CACtB,MAAM,0BAAU,IAAI,KAAqB;AAEzC,MAAK,MAAM,UAAU,QACpB,SAAQ,IAAI,QAAQ,2BAA2B,OAAO,OAAO,CAAC;AAG/D,QAAO;;;;;;;;;;;;;;;;;;;;;;;;AC3HR,SAAgB,eAAe,eAA0C;AACxE,KAAI,cAAc,WAAW,EAC5B,QAAO;CAGR,IAAI,UAAU;AACd,MAAK,MAAM,KAAK,cAEf,KAAI,IAAI,EACP,YAAW,IAAI,KAAK,KAAK,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;AAgBR,SAAgB,kBAAkB,eAA0C;AAC3E,KAAI,cAAc,UAAU,EAC3B,QAAO;CAGR,MAAM,IAAI,eAAe,cAAc;CACvC,MAAM,OAAO,KAAK,KAAK,cAAc,OAAO;AAE5C,KAAI,SAAS,EACZ,QAAO;AAGR,QAAO,IAAI;;;;;;;;;;;;AAaZ,SAAgB,kBAAkB,QAAmC;AACpE,KAAI,OAAO,WAAW,EACrB,QAAO;CAGR,MAAM,QAAQ,OAAO,QAAQ,KAAK,MAAM,MAAM,GAAG,EAAE;AACnD,KAAI,UAAU,EACb,QAAO;AAIR,QAAO,eADe,OAAO,KAAK,MAAM,IAAI,MAAM,CACd;;;;;;;;;;;;AAarC,SAAgB,iBAAiB,gBAA2C;AAC3E,KAAI,eAAe,UAAU,EAC5B,QAAO;CAIR,MAAM,6BAAa,IAAI,KAAqB;AAC5C,MAAK,MAAM,KAAK,eACf,YAAW,IAAI,IAAI,WAAW,IAAI,EAAE,IAAI,KAAK,EAAE;AAIhD,KAAI,WAAW,SAAS,EACvB,QAAO;CAIR,MAAM,gBAA0B,EAAE;CAClC,MAAM,QAAQ,eAAe;AAC7B,MAAK,MAAM,SAAS,WAAW,QAAQ,CACtC,eAAc,KAAK,QAAQ,MAAM;AAGlC,QAAO,kBAAkB,cAAc"}
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @packageDocumentation
|
|
5
5
|
*/
|
|
6
|
-
export
|
|
7
|
-
export
|
|
8
|
-
export
|
|
6
|
+
export * from './clustering-coefficient';
|
|
7
|
+
export * from './kmeans';
|
|
8
|
+
export * from './entropy';
|
|
9
9
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,0BAA0B,CAAC;AACzC,cAAc,UAAU,CAAC;AACzB,cAAc,WAAW,CAAC"}
|
package/dist/utils/index.js
CHANGED
|
@@ -1,3 +1,165 @@
|
|
|
1
|
-
import { n as normaliseFeatures, t as
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import { n as miniBatchKMeans, r as normaliseFeatures, t as _computeMean } from "../kmeans-87ExSUNZ.js";
|
|
2
|
+
//#region src/utils/clustering-coefficient.ts
|
|
3
|
+
/**
|
|
4
|
+
* Compute the local clustering coefficient for a single node.
|
|
5
|
+
*
|
|
6
|
+
* The clustering coefficient is defined as:
|
|
7
|
+
* CC(v) = (triangles through v) / (possible triangles)
|
|
8
|
+
* CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))
|
|
9
|
+
*
|
|
10
|
+
* For nodes with degree < 2, the clustering coefficient is 0.
|
|
11
|
+
*
|
|
12
|
+
* @param graph - The graph to compute on
|
|
13
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
14
|
+
* @returns The clustering coefficient in [0, 1], or 0 if undefined
|
|
15
|
+
*/
|
|
16
|
+
function localClusteringCoefficient(graph, nodeId) {
|
|
17
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
18
|
+
const degree = neighbours.length;
|
|
19
|
+
if (degree < 2) return 0;
|
|
20
|
+
let triangleCount = 0;
|
|
21
|
+
for (let i = 0; i < neighbours.length; i++) {
|
|
22
|
+
const u = neighbours[i];
|
|
23
|
+
if (u === void 0) continue;
|
|
24
|
+
for (let j = i + 1; j < neighbours.length; j++) {
|
|
25
|
+
const w = neighbours[j];
|
|
26
|
+
if (w === void 0) continue;
|
|
27
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
31
|
+
return triangleCount / possibleTriangles;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Compute approximate local clustering coefficient using sampling.
|
|
35
|
+
*
|
|
36
|
+
* For nodes with many neighbours, this samples neighbour pairs rather than
|
|
37
|
+
* checking all pairs. Useful for large graphs where exact computation is expensive.
|
|
38
|
+
*
|
|
39
|
+
* @param graph - The graph to compute on
|
|
40
|
+
* @param nodeId - The node to compute clustering coefficient for
|
|
41
|
+
* @param sampleSize - Maximum number of neighbour pairs to check (default: 100)
|
|
42
|
+
* @returns The approximate clustering coefficient in [0, 1]
|
|
43
|
+
*/
|
|
44
|
+
function approximateClusteringCoefficient(graph, nodeId, sampleSize = 100) {
|
|
45
|
+
const neighbours = [...graph.neighbours(nodeId, "both")];
|
|
46
|
+
const degree = neighbours.length;
|
|
47
|
+
if (degree < 2) return 0;
|
|
48
|
+
const possibleTriangles = degree * (degree - 1) / 2;
|
|
49
|
+
if (possibleTriangles <= sampleSize) return localClusteringCoefficient(graph, nodeId);
|
|
50
|
+
let triangleCount = 0;
|
|
51
|
+
let sampled = 0;
|
|
52
|
+
for (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {
|
|
53
|
+
const u = neighbours[i];
|
|
54
|
+
if (u === void 0) continue;
|
|
55
|
+
for (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {
|
|
56
|
+
const w = neighbours[j];
|
|
57
|
+
if (w === void 0) continue;
|
|
58
|
+
sampled++;
|
|
59
|
+
if (graph.getEdge(u, w) !== void 0 || graph.getEdge(w, u) !== void 0) triangleCount++;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return triangleCount / sampled * (possibleTriangles / possibleTriangles);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Compute clustering coefficients for multiple nodes efficiently.
|
|
66
|
+
*
|
|
67
|
+
* Reuses neighbour sets to avoid repeated iteration.
|
|
68
|
+
*
|
|
69
|
+
* @param graph - The graph to compute on
|
|
70
|
+
* @param nodeIds - The nodes to compute clustering coefficients for
|
|
71
|
+
* @returns Map from nodeId to clustering coefficient
|
|
72
|
+
*/
|
|
73
|
+
function batchClusteringCoefficients(graph, nodeIds) {
|
|
74
|
+
const results = /* @__PURE__ */ new Map();
|
|
75
|
+
for (const nodeId of nodeIds) results.set(nodeId, localClusteringCoefficient(graph, nodeId));
|
|
76
|
+
return results;
|
|
77
|
+
}
|
|
78
|
+
//#endregion
|
|
79
|
+
//#region src/utils/entropy.ts
|
|
80
|
+
/**
|
|
81
|
+
* Entropy computation utilities for graph analysis.
|
|
82
|
+
*
|
|
83
|
+
* Shannon entropy measures uncertainty or randomness in a distribution.
|
|
84
|
+
* Used in EDGE and HAE algorithms for heterogeneity-aware expansion.
|
|
85
|
+
*
|
|
86
|
+
* @packageDocumentation
|
|
87
|
+
*/
|
|
88
|
+
/**
|
|
89
|
+
* Compute Shannon entropy of a probability distribution.
|
|
90
|
+
*
|
|
91
|
+
* Shannon entropy is defined as:
|
|
92
|
+
* H(X) = -Σ p(x) × log₂(p(x))
|
|
93
|
+
*
|
|
94
|
+
* A uniform distribution has maximum entropy.
|
|
95
|
+
* A deterministic distribution (all probability on one value) has zero entropy.
|
|
96
|
+
*
|
|
97
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
98
|
+
* @returns Entropy in bits (log base 2), or 0 if probabilities are invalid
|
|
99
|
+
*/
|
|
100
|
+
function shannonEntropy(probabilities) {
|
|
101
|
+
if (probabilities.length === 0) return 0;
|
|
102
|
+
let entropy = 0;
|
|
103
|
+
for (const p of probabilities) if (p > 0) entropy -= p * Math.log2(p);
|
|
104
|
+
return entropy;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Compute normalised entropy (entropy divided by maximum possible entropy).
|
|
108
|
+
*
|
|
109
|
+
* Normalised entropy is in [0, 1], where:
|
|
110
|
+
* - 0 means the distribution is deterministic (all mass on one value)
|
|
111
|
+
* - 1 means the distribution is uniform (maximum uncertainty)
|
|
112
|
+
*
|
|
113
|
+
* This is useful for comparing entropy across distributions with different
|
|
114
|
+
* numbers of possible values.
|
|
115
|
+
*
|
|
116
|
+
* @param probabilities - Array of probabilities (should sum to 1)
|
|
117
|
+
* @returns Normalised entropy in [0, 1], or 0 if only one category
|
|
118
|
+
*/
|
|
119
|
+
function normalisedEntropy(probabilities) {
|
|
120
|
+
if (probabilities.length <= 1) return 0;
|
|
121
|
+
const H = shannonEntropy(probabilities);
|
|
122
|
+
const Hmax = Math.log2(probabilities.length);
|
|
123
|
+
if (Hmax === 0) return 0;
|
|
124
|
+
return H / Hmax;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Compute entropy from a frequency count.
|
|
128
|
+
*
|
|
129
|
+
* Converts counts to probabilities and then computes entropy.
|
|
130
|
+
* This is a convenience function when you have raw counts rather than
|
|
131
|
+
* normalised probabilities.
|
|
132
|
+
*
|
|
133
|
+
* @param counts - Array of frequency counts
|
|
134
|
+
* @returns Entropy in bits
|
|
135
|
+
*/
|
|
136
|
+
function entropyFromCounts(counts) {
|
|
137
|
+
if (counts.length === 0) return 0;
|
|
138
|
+
const total = counts.reduce((sum, c) => sum + c, 0);
|
|
139
|
+
if (total === 0) return 0;
|
|
140
|
+
return shannonEntropy(counts.map((c) => c / total));
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Compute local type entropy for a node's neighbours.
|
|
144
|
+
*
|
|
145
|
+
* This measures the diversity of types among a node's neighbours.
|
|
146
|
+
* High entropy = heterogeneous neighbourhood (diverse types).
|
|
147
|
+
* Low entropy = homogeneous neighbourhood (similar types).
|
|
148
|
+
*
|
|
149
|
+
* @param neighbourTypes - Array of type labels for neighbours
|
|
150
|
+
* @returns Normalised entropy in [0, 1]
|
|
151
|
+
*/
|
|
152
|
+
function localTypeEntropy(neighbourTypes) {
|
|
153
|
+
if (neighbourTypes.length <= 1) return 0;
|
|
154
|
+
const typeCounts = /* @__PURE__ */ new Map();
|
|
155
|
+
for (const t of neighbourTypes) typeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);
|
|
156
|
+
if (typeCounts.size === 1) return 0;
|
|
157
|
+
const probabilities = [];
|
|
158
|
+
const total = neighbourTypes.length;
|
|
159
|
+
for (const count of typeCounts.values()) probabilities.push(count / total);
|
|
160
|
+
return normalisedEntropy(probabilities);
|
|
161
|
+
}
|
|
162
|
+
//#endregion
|
|
163
|
+
export { _computeMean, approximateClusteringCoefficient, batchClusteringCoefficients, entropyFromCounts, localClusteringCoefficient, localTypeEntropy, miniBatchKMeans, normaliseFeatures, normaliseFeatures as zScoreNormalise, normalisedEntropy, shannonEntropy };
|
|
164
|
+
|
|
165
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","names":[],"sources":["../../src/utils/clustering-coefficient.ts","../../src/utils/entropy.ts"],"sourcesContent":["/**\n * Clustering coefficient computation for graph nodes.\n *\n * The local clustering coefficient measures how close a node's neighbours\n * are to being a complete graph (clique). It is used in SPAN MI variant\n * and GRASP seed selection.\n *\n * @packageDocumentation\n */\n\nimport type { ReadableGraph, NodeId } from \"../graph\";\n\n/**\n * Compute the local clustering coefficient for a single node.\n *\n * The clustering coefficient is defined as:\n * CC(v) = (triangles through v) / (possible triangles)\n * CC(v) = 2 * |{(u,w) : u,w in N(v), (u,w) in E}| / (deg(v) * (deg(v) - 1))\n *\n * For nodes with degree < 2, the clustering coefficient is 0.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @returns The clustering coefficient in [0, 1], or 0 if undefined\n */\nexport function localClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\t// Nodes with degree < 2 have no possible triangles\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\t// Count actual triangles: pairs of neighbours that are connected\n\tlet triangleCount = 0;\n\n\tfor (let i = 0; i < neighbours.length; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Possible triangles: deg * (deg - 1) / 2 pairs\n\t// We multiply by 2 because each triangle is counted once\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\treturn triangleCount / possibleTriangles;\n}\n\n/**\n * Compute approximate local clustering coefficient using sampling.\n *\n * For nodes with many neighbours, this samples neighbour pairs rather than\n * checking all pairs. Useful for large graphs where exact computation is expensive.\n *\n * @param graph - The graph to compute on\n * @param nodeId - The node to compute clustering coefficient for\n * @param sampleSize - Maximum number of neighbour pairs to check (default: 100)\n * @returns The approximate clustering coefficient in [0, 1]\n */\nexport function approximateClusteringCoefficient(\n\tgraph: ReadableGraph,\n\tnodeId: NodeId,\n\tsampleSize = 100,\n): number {\n\tconst neighbours = [...graph.neighbours(nodeId, \"both\")];\n\tconst degree = neighbours.length;\n\n\tif (degree < 2) {\n\t\treturn 0;\n\t}\n\n\tconst possibleTriangles = (degree * (degree - 1)) / 2;\n\n\t// If all pairs can be checked within sample limit, use exact computation\n\tif (possibleTriangles <= sampleSize) {\n\t\treturn localClusteringCoefficient(graph, nodeId);\n\t}\n\n\t// Sample pairs uniformly\n\tlet triangleCount = 0;\n\tlet sampled = 0;\n\n\t// Use reservoir sampling style approach for pair selection\n\tfor (let i = 0; i < neighbours.length && sampled < sampleSize; i++) {\n\t\tconst u = neighbours[i];\n\t\tif (u === undefined) continue;\n\n\t\tfor (let j = i + 1; j < neighbours.length && sampled < sampleSize; j++) {\n\t\t\tconst w = neighbours[j];\n\t\t\tif (w === undefined) continue;\n\n\t\t\t// Decide whether to include this pair based on remaining budget\n\t\t\tsampled++;\n\n\t\t\t// Check if u and w are connected\n\t\t\tif (\n\t\t\t\tgraph.getEdge(u, w) !== undefined ||\n\t\t\t\tgraph.getEdge(w, u) !== undefined\n\t\t\t) {\n\t\t\t\ttriangleCount++;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Extrapolate from sample\n\treturn (triangleCount / sampled) * (possibleTriangles / possibleTriangles);\n}\n\n/**\n * Compute clustering coefficients for multiple nodes efficiently.\n *\n * Reuses neighbour sets to avoid repeated iteration.\n *\n * @param graph - The graph to compute on\n * @param nodeIds - The nodes to compute clustering coefficients for\n * @returns Map from nodeId to clustering coefficient\n */\nexport function batchClusteringCoefficients(\n\tgraph: ReadableGraph,\n\tnodeIds: readonly NodeId[],\n): Map<NodeId, number> {\n\tconst results = new Map<NodeId, number>();\n\n\tfor (const nodeId of nodeIds) {\n\t\tresults.set(nodeId, localClusteringCoefficient(graph, nodeId));\n\t}\n\n\treturn results;\n}\n","/**\n * Entropy computation utilities for graph analysis.\n *\n * Shannon entropy measures uncertainty or randomness in a distribution.\n * Used in EDGE and HAE algorithms for heterogeneity-aware expansion.\n *\n * @packageDocumentation\n */\n\n/**\n * Compute Shannon entropy of a probability distribution.\n *\n * Shannon entropy is defined as:\n * H(X) = -Σ p(x) × log₂(p(x))\n *\n * A uniform distribution has maximum entropy.\n * A deterministic distribution (all probability on one value) has zero entropy.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Entropy in bits (log base 2), or 0 if probabilities are invalid\n */\nexport function shannonEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length === 0) {\n\t\treturn 0;\n\t}\n\n\tlet entropy = 0;\n\tfor (const p of probabilities) {\n\t\t// Skip zero probabilities (log(0) is undefined, but 0 * log(0) = 0)\n\t\tif (p > 0) {\n\t\t\tentropy -= p * Math.log2(p);\n\t\t}\n\t}\n\n\treturn entropy;\n}\n\n/**\n * Compute normalised entropy (entropy divided by maximum possible entropy).\n *\n * Normalised entropy is in [0, 1], where:\n * - 0 means the distribution is deterministic (all mass on one value)\n * - 1 means the distribution is uniform (maximum uncertainty)\n *\n * This is useful for comparing entropy across distributions with different\n * numbers of possible values.\n *\n * @param probabilities - Array of probabilities (should sum to 1)\n * @returns Normalised entropy in [0, 1], or 0 if only one category\n */\nexport function normalisedEntropy(probabilities: readonly number[]): number {\n\tif (probabilities.length <= 1) {\n\t\treturn 0;\n\t}\n\n\tconst H = shannonEntropy(probabilities);\n\tconst Hmax = Math.log2(probabilities.length);\n\n\tif (Hmax === 0) {\n\t\treturn 0;\n\t}\n\n\treturn H / Hmax;\n}\n\n/**\n * Compute entropy from a frequency count.\n *\n * Converts counts to probabilities and then computes entropy.\n * This is a convenience function when you have raw counts rather than\n * normalised probabilities.\n *\n * @param counts - Array of frequency counts\n * @returns Entropy in bits\n */\nexport function entropyFromCounts(counts: readonly number[]): number {\n\tif (counts.length === 0) {\n\t\treturn 0;\n\t}\n\n\tconst total = counts.reduce((sum, c) => sum + c, 0);\n\tif (total === 0) {\n\t\treturn 0;\n\t}\n\n\tconst probabilities = counts.map((c) => c / total);\n\treturn shannonEntropy(probabilities);\n}\n\n/**\n * Compute local type entropy for a node's neighbours.\n *\n * This measures the diversity of types among a node's neighbours.\n * High entropy = heterogeneous neighbourhood (diverse types).\n * Low entropy = homogeneous neighbourhood (similar types).\n *\n * @param neighbourTypes - Array of type labels for neighbours\n * @returns Normalised entropy in [0, 1]\n */\nexport function localTypeEntropy(neighbourTypes: readonly string[]): number {\n\tif (neighbourTypes.length <= 1) {\n\t\treturn 0;\n\t}\n\n\t// Count occurrences of each type\n\tconst typeCounts = new Map<string, number>();\n\tfor (const t of neighbourTypes) {\n\t\ttypeCounts.set(t, (typeCounts.get(t) ?? 0) + 1);\n\t}\n\n\t// If all neighbours are the same type, entropy is 0\n\tif (typeCounts.size === 1) {\n\t\treturn 0;\n\t}\n\n\t// Convert to probability array\n\tconst probabilities: number[] = [];\n\tconst total = neighbourTypes.length;\n\tfor (const count of typeCounts.values()) {\n\t\tprobabilities.push(count / total);\n\t}\n\n\treturn normalisedEntropy(probabilities);\n}\n"],"mappings":";;;;;;;;;;;;;;;AAyBA,SAAgB,2BACf,OACA,QACS;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAG1B,KAAI,SAAS,EACZ,QAAO;CAIR,IAAI,gBAAgB;AAEpB,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;EAC3C,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;GAC/C,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;CAOH,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAEpD,QAAO,gBAAgB;;;;;;;;;;;;;AAcxB,SAAgB,iCACf,OACA,QACA,aAAa,KACJ;CACT,MAAM,aAAa,CAAC,GAAG,MAAM,WAAW,QAAQ,OAAO,CAAC;CACxD,MAAM,SAAS,WAAW;AAE1B,KAAI,SAAS,EACZ,QAAO;CAGR,MAAM,oBAAqB,UAAU,SAAS,KAAM;AAGpD,KAAI,qBAAqB,WACxB,QAAO,2BAA2B,OAAO,OAAO;CAIjD,IAAI,gBAAgB;CACpB,IAAI,UAAU;AAGd,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;EACnE,MAAM,IAAI,WAAW;AACrB,MAAI,MAAM,KAAA,EAAW;AAErB,OAAK,IAAI,IAAI,IAAI,GAAG,IAAI,WAAW,UAAU,UAAU,YAAY,KAAK;GACvE,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,KAAA,EAAW;AAGrB;AAGA,OACC,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,KACxB,MAAM,QAAQ,GAAG,EAAE,KAAK,KAAA,EAExB;;;AAMH,QAAQ,gBAAgB,WAAY,oBAAoB;;;;;;;;;;;AAYzD,SAAgB,4BACf,OACA,SACsB;CACtB,MAAM,0BAAU,IAAI,KAAqB;AAEzC,MAAK,MAAM,UAAU,QACpB,SAAQ,IAAI,QAAQ,2BAA2B,OAAO,OAAO,CAAC;AAG/D,QAAO;;;;;;;;;;;;;;;;;;;;;;;;AC3HR,SAAgB,eAAe,eAA0C;AACxE,KAAI,cAAc,WAAW,EAC5B,QAAO;CAGR,IAAI,UAAU;AACd,MAAK,MAAM,KAAK,cAEf,KAAI,IAAI,EACP,YAAW,IAAI,KAAK,KAAK,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;AAgBR,SAAgB,kBAAkB,eAA0C;AAC3E,KAAI,cAAc,UAAU,EAC3B,QAAO;CAGR,MAAM,IAAI,eAAe,cAAc;CACvC,MAAM,OAAO,KAAK,KAAK,cAAc,OAAO;AAE5C,KAAI,SAAS,EACZ,QAAO;AAGR,QAAO,IAAI;;;;;;;;;;;;AAaZ,SAAgB,kBAAkB,QAAmC;AACpE,KAAI,OAAO,WAAW,EACrB,QAAO;CAGR,MAAM,QAAQ,OAAO,QAAQ,KAAK,MAAM,MAAM,GAAG,EAAE;AACnD,KAAI,UAAU,EACb,QAAO;AAIR,QAAO,eADe,OAAO,KAAK,MAAM,IAAI,MAAM,CACd;;;;;;;;;;;;AAarC,SAAgB,iBAAiB,gBAA2C;AAC3E,KAAI,eAAe,UAAU,EAC5B,QAAO;CAIR,MAAM,6BAAa,IAAI,KAAqB;AAC5C,MAAK,MAAM,KAAK,eACf,YAAW,IAAI,IAAI,WAAW,IAAI,EAAE,IAAI,KAAK,EAAE;AAIhD,KAAI,WAAW,SAAS,EACvB,QAAO;CAIR,MAAM,gBAA0B,EAAE;CAClC,MAAM,QAAQ,eAAe;AAC7B,MAAK,MAAM,SAAS,WAAW,QAAQ,CACtC,eAAc,KAAK,QAAQ,MAAM;AAGlC,QAAO,kBAAkB,cAAc"}
|