@ghcrawl/api-core 0.5.0 → 0.7.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cluster/edge-worker.d.ts +2 -0
- package/dist/cluster/edge-worker.d.ts.map +1 -0
- package/dist/cluster/edge-worker.js +48 -0
- package/dist/cluster/edge-worker.js.map +1 -0
- package/dist/cluster/exact-edges.d.ts +20 -0
- package/dist/cluster/exact-edges.d.ts.map +1 -0
- package/dist/cluster/exact-edges.js +80 -0
- package/dist/cluster/exact-edges.js.map +1 -0
- package/dist/cluster/perf.integration.d.ts +2 -0
- package/dist/cluster/perf.integration.d.ts.map +1 -0
- package/dist/cluster/perf.integration.js +287 -0
- package/dist/cluster/perf.integration.js.map +1 -0
- package/dist/search/exact.d.ts +13 -0
- package/dist/search/exact.d.ts.map +1 -1
- package/dist/search/exact.js +58 -6
- package/dist/search/exact.js.map +1 -1
- package/dist/service.d.ts +8 -1
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +198 -61
- package/dist/service.js.map +1 -1
- package/package.json +5 -4
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"edge-worker.d.ts","sourceRoot":"","sources":["../../src/cluster/edge-worker.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { parentPort, workerData } from 'node:worker_threads';
|
|
2
|
+
import { openDb } from '../db/sqlite.js';
|
|
3
|
+
import { normalizeEmbedding } from '../search/exact.js';
|
|
4
|
+
import { buildSourceKindEdges } from './exact-edges.js';
|
|
5
|
+
const port = parentPort;
|
|
6
|
+
if (!port) {
|
|
7
|
+
throw new Error('edge-worker requires a parent port');
|
|
8
|
+
}
|
|
9
|
+
const { dbPath, repoId, sourceKind, limit, minScore } = workerData;
|
|
10
|
+
const db = openDb(dbPath);
|
|
11
|
+
try {
|
|
12
|
+
const rows = db
|
|
13
|
+
.prepare(`select t.id, e.embedding_json
|
|
14
|
+
from document_embeddings e
|
|
15
|
+
join threads t on t.id = e.thread_id
|
|
16
|
+
where t.repo_id = ?
|
|
17
|
+
and t.state = 'open'
|
|
18
|
+
and t.closed_at_local is null
|
|
19
|
+
and e.source_kind = ?`)
|
|
20
|
+
.all(repoId, sourceKind);
|
|
21
|
+
const items = rows.map((row) => {
|
|
22
|
+
const normalized = normalizeEmbedding(JSON.parse(row.embedding_json));
|
|
23
|
+
return {
|
|
24
|
+
id: row.id,
|
|
25
|
+
normalizedEmbedding: normalized.normalized,
|
|
26
|
+
};
|
|
27
|
+
});
|
|
28
|
+
const edges = buildSourceKindEdges(items, {
|
|
29
|
+
limit,
|
|
30
|
+
minScore,
|
|
31
|
+
onProgress: (progress) => {
|
|
32
|
+
port.postMessage({
|
|
33
|
+
type: 'progress',
|
|
34
|
+
sourceKind,
|
|
35
|
+
...progress,
|
|
36
|
+
});
|
|
37
|
+
},
|
|
38
|
+
});
|
|
39
|
+
port.postMessage({
|
|
40
|
+
type: 'result',
|
|
41
|
+
sourceKind,
|
|
42
|
+
edges,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
finally {
|
|
46
|
+
db.close();
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=edge-worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"edge-worker.js","sourceRoot":"","sources":["../../src/cluster/edge-worker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAE7D,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACzC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAexD,MAAM,IAAI,GAAG,UAAU,CAAC;AACxB,IAAI,CAAC,IAAI,EAAE,CAAC;IACV,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,UAAyB,CAAC;AAClF,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;AAE1B,IAAI,CAAC;IACH,MAAM,IAAI,GAAG,EAAE;SACZ,OAAO,CACN;;;;;;+BAMyB,CAC1B;SACA,GAAG,CAAC,MAAM,EAAE,UAAU,CAAU,CAAC;IAEpC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC7B,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAa,CAAC,CAAC;QAClF,OAAO;YACL,EAAE,EAAE,GAAG,CAAC,EAAE;YACV,mBAAmB,EAAE,UAAU,CAAC,UAAU;SAC3C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,oBAAoB,CAAC,KAAK,EAAE;QACxC,KAAK;QACL,QAAQ;QACR,UAAU,EAAE,CAAC,QAAQ,EAAE,EAAE;YACvB,IAAI,CAAC,WAAW,CAAC;gBACf,IAAI,EAAE,UAAU;gBAChB,UAAU;gBACV,GAAG,QAAQ;aACZ,CAAC,CAAC;QACL,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,CAAC,WAAW,CAAC;QACf,IAAI,EAAE,QAAQ;QACd,UAAU;QACV,KAAK;KACN,CAAC,CAAC;AACL,CAAC;QAAS,CAAC;IACT,EAAE,CAAC,KAAK,EAAE,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type SourceEmbeddingItem = {
|
|
2
|
+
id: number;
|
|
3
|
+
normalizedEmbedding: number[];
|
|
4
|
+
};
|
|
5
|
+
export type SourceKindEdge = {
|
|
6
|
+
leftThreadId: number;
|
|
7
|
+
rightThreadId: number;
|
|
8
|
+
score: number;
|
|
9
|
+
};
|
|
10
|
+
export declare function buildSourceKindEdges(items: SourceEmbeddingItem[], params: {
|
|
11
|
+
limit: number;
|
|
12
|
+
minScore: number;
|
|
13
|
+
progressIntervalMs?: number;
|
|
14
|
+
onProgress?: (progress: {
|
|
15
|
+
processedItems: number;
|
|
16
|
+
totalItems: number;
|
|
17
|
+
currentEdgeEstimate: number;
|
|
18
|
+
}) => void;
|
|
19
|
+
}): SourceKindEdge[];
|
|
20
|
+
//# sourceMappingURL=exact-edges.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exact-edges.d.ts","sourceRoot":"","sources":["../../src/cluster/exact-edges.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,mBAAmB,GAAG;IAChC,EAAE,EAAE,MAAM,CAAC;IACX,mBAAmB,EAAE,MAAM,EAAE,CAAC;CAC/B,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAqCF,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,mBAAmB,EAAE,EAC5B,MAAM,EAAE;IACN,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,mBAAmB,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CAC9G,GACA,cAAc,EAAE,CA2DlB"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
const DEFAULT_PROGRESS_INTERVAL_MS = 5_000;
|
|
2
|
+
function dotProduct(left, right) {
|
|
3
|
+
if (left.length !== right.length) {
|
|
4
|
+
throw new Error('Embedding dimensions do not match');
|
|
5
|
+
}
|
|
6
|
+
let dot = 0;
|
|
7
|
+
for (let index = 0; index < left.length; index += 1) {
|
|
8
|
+
dot += left[index] * right[index];
|
|
9
|
+
}
|
|
10
|
+
return dot;
|
|
11
|
+
}
|
|
12
|
+
function insertBoundedNeighbor(neighbors, candidate, limit) {
|
|
13
|
+
const initialLength = neighbors.length;
|
|
14
|
+
let insertAt = neighbors.length;
|
|
15
|
+
while (insertAt > 0 && candidate.score > neighbors[insertAt - 1].score) {
|
|
16
|
+
insertAt -= 1;
|
|
17
|
+
}
|
|
18
|
+
if (insertAt >= limit) {
|
|
19
|
+
return 0;
|
|
20
|
+
}
|
|
21
|
+
neighbors.splice(insertAt, 0, candidate);
|
|
22
|
+
if (neighbors.length > limit) {
|
|
23
|
+
neighbors.length = limit;
|
|
24
|
+
}
|
|
25
|
+
return neighbors.length - initialLength;
|
|
26
|
+
}
|
|
27
|
+
export function buildSourceKindEdges(items, params) {
|
|
28
|
+
const topNeighbors = new Map();
|
|
29
|
+
const totalItems = items.length;
|
|
30
|
+
let processedItems = 0;
|
|
31
|
+
let currentNeighborEntries = 0;
|
|
32
|
+
let lastProgressAt = Date.now();
|
|
33
|
+
for (let leftIndex = 0; leftIndex < items.length; leftIndex += 1) {
|
|
34
|
+
const left = items[leftIndex];
|
|
35
|
+
let leftNeighbors = topNeighbors.get(left.id);
|
|
36
|
+
if (!leftNeighbors) {
|
|
37
|
+
leftNeighbors = [];
|
|
38
|
+
topNeighbors.set(left.id, leftNeighbors);
|
|
39
|
+
}
|
|
40
|
+
for (let rightIndex = leftIndex + 1; rightIndex < items.length; rightIndex += 1) {
|
|
41
|
+
const right = items[rightIndex];
|
|
42
|
+
const score = dotProduct(left.normalizedEmbedding, right.normalizedEmbedding);
|
|
43
|
+
if (score < params.minScore) {
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
currentNeighborEntries += insertBoundedNeighbor(leftNeighbors, { neighborId: right.id, score }, params.limit);
|
|
47
|
+
let rightNeighbors = topNeighbors.get(right.id);
|
|
48
|
+
if (!rightNeighbors) {
|
|
49
|
+
rightNeighbors = [];
|
|
50
|
+
topNeighbors.set(right.id, rightNeighbors);
|
|
51
|
+
}
|
|
52
|
+
currentNeighborEntries += insertBoundedNeighbor(rightNeighbors, { neighborId: left.id, score }, params.limit);
|
|
53
|
+
}
|
|
54
|
+
processedItems += 1;
|
|
55
|
+
const now = Date.now();
|
|
56
|
+
if (params.onProgress && now - lastProgressAt >= (params.progressIntervalMs ?? DEFAULT_PROGRESS_INTERVAL_MS)) {
|
|
57
|
+
params.onProgress({
|
|
58
|
+
processedItems,
|
|
59
|
+
totalItems,
|
|
60
|
+
currentEdgeEstimate: Math.floor(currentNeighborEntries / 2),
|
|
61
|
+
});
|
|
62
|
+
lastProgressAt = now;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const edges = [];
|
|
66
|
+
for (const [threadId, neighbors] of topNeighbors.entries()) {
|
|
67
|
+
for (const neighbor of neighbors) {
|
|
68
|
+
if (threadId >= neighbor.neighborId) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
edges.push({
|
|
72
|
+
leftThreadId: threadId,
|
|
73
|
+
rightThreadId: neighbor.neighborId,
|
|
74
|
+
score: neighbor.score,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return edges;
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=exact-edges.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exact-edges.js","sourceRoot":"","sources":["../../src/cluster/exact-edges.ts"],"names":[],"mappings":"AAWA,MAAM,4BAA4B,GAAG,KAAK,CAAC;AAE3C,SAAS,UAAU,CAAC,IAAc,EAAE,KAAe;IACjD,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,qBAAqB,CAC5B,SAAuD,EACvD,SAAgD,EAChD,KAAa;IAEb,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,CAAC;IACvC,IAAI,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC;IAChC,OAAO,QAAQ,GAAG,CAAC,IAAI,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;QACvE,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;QACtB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,SAAS,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC;IACzC,IAAI,SAAS,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QAC7B,SAAS,CAAC,MAAM,GAAG,KAAK,CAAC;IAC3B,CAAC;IACD,OAAO,SAAS,CAAC,MAAM,GAAG,aAAa,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,KAA4B,EAC5B,MAKC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAwD,CAAC;IACrF,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAChC,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,sBAAsB,GAAG,CAAC,CAAC;IAC/B,IAAI,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEhC,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,SAAS,IAAI,CAAC,EAAE,CAAC;QACjE,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;QAC9B,IAAI,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9C,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,aAAa,GAAG,EAAE,CAAC;YACnB,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,aAAa,CAAC,CAAC;QAC3C,CAAC;QAED,KAAK,IAAI,UAAU,GAAG,SAAS,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,UAAU,IAAI,CAAC,EAAE,CAAC;YAChF,MAAM,KAAK,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAC9E,IAAI,KAAK,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC5B,SAAS;YACX,CAAC;YAED,sBAAsB,IAAI,qBAAqB,CAAC,aAAa,EAAE,EAAE,UAAU,EAAE,KAAK,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAE9G,IAAI,cAAc,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAChD,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,cAAc,GAAG,EAAE,CAAC;gBACpB,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;YAC7C,CAAC;YACD,sBAAsB,IAAI,qBAAqB,CAAC,cAAc,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAChH,CAAC;QAED,cAAc,IAAI,CAAC,CAAC;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,IAAI,MAAM,CAAC,UAAU,IAAI,GAAG,GAAG,cAAc,IAAI,CAAC,MAAM,CAAC,kBAAkB,IAAI,4BAA4B,CAAC,EAAE,CAAC;YAC7G,MAAM,CAAC,UAAU,CAAC;gBAChB,cAAc;gBACd,UAAU;gBACV,mBAAmB,EAAE,IAAI,CAAC,KAAK,CAAC,sBAAsB,GAAG,CAAC,CAAC;aAC5D,CAAC,CAAC;YACH,cAAc,GAAG,GAAG,CAAC;QACvB,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QAC3D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,IAAI,QAAQ,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBACpC,SAAS;YACX,CAAC;YACD,KAAK,CAAC,IAAI,CAAC;gBACT,YAAY,EAAE,QAAQ;gBACtB,aAAa,EAAE,QAAQ,CAAC,UAAU;gBAClC,KAAK,EAAE,QAAQ,CAAC,KAAK;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"perf.integration.d.ts","sourceRoot":"","sources":["../../src/cluster/perf.integration.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { performance } from 'node:perf_hooks';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
import { GHCrawlService } from '../service.js';
|
|
8
|
+
const BASELINE_PATH = fileURLToPath(new URL('./perf-baseline.json', import.meta.url));
|
|
9
|
+
function loadBaseline() {
|
|
10
|
+
return JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf8'));
|
|
11
|
+
}
|
|
12
|
+
function shouldBootstrapBaseline() {
|
|
13
|
+
return process.env.GHCRAWL_CLUSTER_PERF_BOOTSTRAP === '1';
|
|
14
|
+
}
|
|
15
|
+
function formatDurationMs(durationMs) {
|
|
16
|
+
if (!Number.isFinite(durationMs))
|
|
17
|
+
return 'n/a';
|
|
18
|
+
if (durationMs < 1000) {
|
|
19
|
+
return `${durationMs.toFixed(1)} ms`;
|
|
20
|
+
}
|
|
21
|
+
const totalSeconds = durationMs / 1000;
|
|
22
|
+
if (totalSeconds < 60) {
|
|
23
|
+
return `${totalSeconds.toFixed(2)} s`;
|
|
24
|
+
}
|
|
25
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
26
|
+
const seconds = totalSeconds - minutes * 60;
|
|
27
|
+
return `${minutes}m ${seconds.toFixed(1)}s`;
|
|
28
|
+
}
|
|
29
|
+
function formatPercent(value) {
|
|
30
|
+
const sign = value > 0 ? '+' : '';
|
|
31
|
+
return `${sign}${value.toFixed(1)}%`;
|
|
32
|
+
}
|
|
33
|
+
function median(values) {
|
|
34
|
+
const sorted = [...values].sort((left, right) => left - right);
|
|
35
|
+
const middle = Math.floor(sorted.length / 2);
|
|
36
|
+
if (sorted.length % 2 === 0) {
|
|
37
|
+
return (sorted[middle - 1] + sorted[middle]) / 2;
|
|
38
|
+
}
|
|
39
|
+
return sorted[middle] ?? 0;
|
|
40
|
+
}
|
|
41
|
+
function createGitHubStub() {
|
|
42
|
+
return {
|
|
43
|
+
checkAuth: async () => undefined,
|
|
44
|
+
getRepo: async () => ({}),
|
|
45
|
+
listRepositoryIssues: async () => [],
|
|
46
|
+
getIssue: async () => ({}),
|
|
47
|
+
getPull: async () => ({}),
|
|
48
|
+
listIssueComments: async () => [],
|
|
49
|
+
listPullReviews: async () => [],
|
|
50
|
+
listPullReviewComments: async () => [],
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function createService(dbPath) {
|
|
54
|
+
return new GHCrawlService({
|
|
55
|
+
config: {
|
|
56
|
+
workspaceRoot: process.cwd(),
|
|
57
|
+
configDir: path.dirname(dbPath),
|
|
58
|
+
configPath: path.join(path.dirname(dbPath), 'config.json'),
|
|
59
|
+
configFileExists: true,
|
|
60
|
+
dbPath,
|
|
61
|
+
dbPathSource: 'config',
|
|
62
|
+
apiPort: 5179,
|
|
63
|
+
githubToken: 'ghp_testtoken1234567890',
|
|
64
|
+
githubTokenSource: 'config',
|
|
65
|
+
secretProvider: 'plaintext',
|
|
66
|
+
tuiPreferences: {},
|
|
67
|
+
openaiApiKeySource: 'none',
|
|
68
|
+
summaryModel: 'gpt-5-mini',
|
|
69
|
+
embedModel: 'text-embedding-3-large',
|
|
70
|
+
embedBatchSize: 2,
|
|
71
|
+
embedConcurrency: 2,
|
|
72
|
+
embedMaxUnread: 4,
|
|
73
|
+
openSearchIndex: 'ghcrawl-threads',
|
|
74
|
+
},
|
|
75
|
+
github: createGitHubStub(),
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
function deterministicNoise(seed) {
|
|
79
|
+
const next = (Math.imul(seed, 1664525) + 1013904223) >>> 0;
|
|
80
|
+
return (next / 0xffffffff - 0.5) * 0.025;
|
|
81
|
+
}
|
|
82
|
+
function buildDeterministicEmbedding(params) {
|
|
83
|
+
const dimensions = params.clusterCount * params.clusterBlockWidth + params.noiseDimensions + params.sourceKinds.length;
|
|
84
|
+
const embedding = new Array(dimensions).fill(0);
|
|
85
|
+
const clusterBase = params.clusterIndex * params.clusterBlockWidth;
|
|
86
|
+
const sourceBias = 0.02 * (params.sourceIndex + 1);
|
|
87
|
+
const memberBias = 0.01 * ((params.threadOffset % 5) + 1);
|
|
88
|
+
embedding[clusterBase] = 1;
|
|
89
|
+
if (params.clusterBlockWidth > 1)
|
|
90
|
+
embedding[clusterBase + 1] = 0.72 + sourceBias;
|
|
91
|
+
if (params.clusterBlockWidth > 2)
|
|
92
|
+
embedding[clusterBase + 2] = 0.48 + memberBias;
|
|
93
|
+
if (params.clusterBlockWidth > 3)
|
|
94
|
+
embedding[clusterBase + 3] = 0.28 + sourceBias + memberBias;
|
|
95
|
+
const sourceOffset = params.clusterCount * params.clusterBlockWidth + params.sourceIndex;
|
|
96
|
+
embedding[sourceOffset] = 0.12 + sourceBias;
|
|
97
|
+
const noiseBase = params.clusterCount * params.clusterBlockWidth + params.sourceKinds.length;
|
|
98
|
+
for (let index = 0; index < params.noiseDimensions; index += 1) {
|
|
99
|
+
const seed = params.clusterIndex * 10_000 + params.threadOffset * 100 + params.sourceIndex * 10 + index;
|
|
100
|
+
embedding[noiseBase + index] = deterministicNoise(seed);
|
|
101
|
+
}
|
|
102
|
+
return embedding;
|
|
103
|
+
}
|
|
104
|
+
function seedBenchmarkDatabase(dbPath, baseline) {
|
|
105
|
+
const service = createService(dbPath);
|
|
106
|
+
const threadCount = baseline.fixture.clusterCount * baseline.fixture.threadsPerCluster;
|
|
107
|
+
const now = '2026-03-12T12:00:00Z';
|
|
108
|
+
try {
|
|
109
|
+
service.db
|
|
110
|
+
.prepare(`insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at)
|
|
111
|
+
values (?, ?, ?, ?, ?, ?, ?)`)
|
|
112
|
+
.run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{}', now);
|
|
113
|
+
const insertThread = service.db.prepare(`insert into threads (
|
|
114
|
+
id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url,
|
|
115
|
+
labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh,
|
|
116
|
+
merged_at_gh, first_pulled_at, last_pulled_at, updated_at
|
|
117
|
+
) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
118
|
+
const insertEmbedding = service.db.prepare(`insert into document_embeddings (thread_id, source_kind, model, dimensions, content_hash, embedding_json, created_at, updated_at)
|
|
119
|
+
values (?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
120
|
+
for (let clusterIndex = 0; clusterIndex < baseline.fixture.clusterCount; clusterIndex += 1) {
|
|
121
|
+
for (let threadOffset = 0; threadOffset < baseline.fixture.threadsPerCluster; threadOffset += 1) {
|
|
122
|
+
const threadId = clusterIndex * baseline.fixture.threadsPerCluster + threadOffset + 1;
|
|
123
|
+
const threadNumber = 10_000 + threadId;
|
|
124
|
+
const kind = threadOffset % 3 === 0 ? 'pull_request' : 'issue';
|
|
125
|
+
insertThread.run(threadId, 1, `gh-${threadId}`, threadNumber, kind, 'open', `Cluster ${clusterIndex + 1} thread ${threadOffset + 1}`, `Deterministic benchmark fixture body for cluster ${clusterIndex + 1}, thread ${threadOffset + 1}.`, `user${(threadId % 17) + 1}`, 'User', `https://github.com/openclaw/openclaw/${kind === 'issue' ? 'issues' : 'pull'}/${threadNumber}`, '[]', '[]', '{}', `hash-${threadId}`, 0, now, now, null, null, now, now, now);
|
|
126
|
+
for (const [sourceIndex, sourceKind] of baseline.fixture.sourceKinds.entries()) {
|
|
127
|
+
const embedding = buildDeterministicEmbedding({
|
|
128
|
+
clusterIndex,
|
|
129
|
+
threadOffset,
|
|
130
|
+
sourceIndex,
|
|
131
|
+
clusterCount: baseline.fixture.clusterCount,
|
|
132
|
+
clusterBlockWidth: baseline.fixture.clusterBlockWidth,
|
|
133
|
+
noiseDimensions: baseline.fixture.noiseDimensions,
|
|
134
|
+
sourceKinds: baseline.fixture.sourceKinds,
|
|
135
|
+
});
|
|
136
|
+
insertEmbedding.run(threadId, sourceKind, 'text-embedding-3-large', embedding.length, `hash-${threadId}-${sourceKind}`, JSON.stringify(embedding), now, now);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const countRow = service.db.prepare('select count(*) as count from threads').get();
|
|
141
|
+
assert.equal(threadCount, countRow.count);
|
|
142
|
+
}
|
|
143
|
+
finally {
|
|
144
|
+
service.close();
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
async function runSingleCluster(dbPath, baseline) {
|
|
148
|
+
const service = createService(dbPath);
|
|
149
|
+
try {
|
|
150
|
+
const startedAt = performance.now();
|
|
151
|
+
const result = await service.clusterRepository({
|
|
152
|
+
owner: 'openclaw',
|
|
153
|
+
repo: 'openclaw',
|
|
154
|
+
k: baseline.fixture.k,
|
|
155
|
+
minScore: baseline.fixture.minScore,
|
|
156
|
+
});
|
|
157
|
+
const durationMs = performance.now() - startedAt;
|
|
158
|
+
return { durationMs, clusters: result.clusters, edges: result.edges };
|
|
159
|
+
}
|
|
160
|
+
finally {
|
|
161
|
+
service.close();
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
async function measureBenchmark(baseline) {
|
|
165
|
+
if (baseline.baseline.fixtureMedianMs <= 0 && !shouldBootstrapBaseline()) {
|
|
166
|
+
throw new Error(`Cluster perf baseline is not set in ${BASELINE_PATH}. Run the benchmark once, then record fixtureMedianMs before enforcing regressions.`);
|
|
167
|
+
}
|
|
168
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-cluster-perf-'));
|
|
169
|
+
const seedDbPath = path.join(tempRoot, 'seed.sqlite');
|
|
170
|
+
try {
|
|
171
|
+
seedBenchmarkDatabase(seedDbPath, baseline);
|
|
172
|
+
const warmupRuns = baseline.benchmark.warmupRuns;
|
|
173
|
+
const runsPerSample = baseline.benchmark.runsPerSample;
|
|
174
|
+
const sampleDurationsMs = [];
|
|
175
|
+
const benchmarkStartedAt = performance.now();
|
|
176
|
+
let runCounter = 0;
|
|
177
|
+
for (let warmupIndex = 0; warmupIndex < warmupRuns; warmupIndex += 1) {
|
|
178
|
+
const warmupDbPath = path.join(tempRoot, `warmup-${warmupIndex}.sqlite`);
|
|
179
|
+
fs.copyFileSync(seedDbPath, warmupDbPath);
|
|
180
|
+
const warmupResult = await runSingleCluster(warmupDbPath, baseline);
|
|
181
|
+
assert.equal(warmupResult.clusters, baseline.fixture.clusterCount);
|
|
182
|
+
assert.ok(warmupResult.edges > baseline.fixture.clusterCount);
|
|
183
|
+
}
|
|
184
|
+
while (sampleDurationsMs.length < baseline.benchmark.maxSamples) {
|
|
185
|
+
const sampleStartedAt = performance.now();
|
|
186
|
+
for (let runIndex = 0; runIndex < runsPerSample; runIndex += 1) {
|
|
187
|
+
const runDbPath = path.join(tempRoot, `run-${runCounter}.sqlite`);
|
|
188
|
+
runCounter += 1;
|
|
189
|
+
fs.copyFileSync(seedDbPath, runDbPath);
|
|
190
|
+
const result = await runSingleCluster(runDbPath, baseline);
|
|
191
|
+
assert.equal(result.clusters, baseline.fixture.clusterCount);
|
|
192
|
+
assert.ok(result.edges > baseline.fixture.clusterCount);
|
|
193
|
+
}
|
|
194
|
+
sampleDurationsMs.push(performance.now() - sampleStartedAt);
|
|
195
|
+
const elapsedMs = performance.now() - benchmarkStartedAt;
|
|
196
|
+
if (sampleDurationsMs.length >= baseline.benchmark.minSamples && elapsedMs >= baseline.benchmark.maxTotalMs) {
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
const medianMs = median(sampleDurationsMs);
|
|
201
|
+
const baselineMedianMs = baseline.baseline.fixtureMedianMs > 0 ? baseline.baseline.fixtureMedianMs : medianMs;
|
|
202
|
+
const deltaMs = medianMs - baselineMedianMs;
|
|
203
|
+
const deltaPercent = baselineMedianMs > 0 ? (deltaMs / baselineMedianMs) * 100 : 0;
|
|
204
|
+
const projectedOpenclawMs = baseline.baseline.projectedOpenclawMs * (medianMs / baselineMedianMs);
|
|
205
|
+
const projectedBaselineOpenclawMs = baseline.baseline.projectedOpenclawMs;
|
|
206
|
+
const projectedDeltaMs = projectedOpenclawMs - projectedBaselineOpenclawMs;
|
|
207
|
+
const projectedDeltaPercent = (projectedDeltaMs / projectedBaselineOpenclawMs) * 100;
|
|
208
|
+
return {
|
|
209
|
+
sampleDurationsMs,
|
|
210
|
+
medianMs,
|
|
211
|
+
baselineMedianMs,
|
|
212
|
+
deltaMs,
|
|
213
|
+
deltaPercent,
|
|
214
|
+
projectedOpenclawMs,
|
|
215
|
+
projectedBaselineOpenclawMs,
|
|
216
|
+
projectedDeltaMs,
|
|
217
|
+
projectedDeltaPercent,
|
|
218
|
+
samples: sampleDurationsMs.length,
|
|
219
|
+
runsPerSample,
|
|
220
|
+
threadCount: baseline.fixture.clusterCount * baseline.fixture.threadsPerCluster,
|
|
221
|
+
sourceKinds: baseline.fixture.sourceKinds,
|
|
222
|
+
maxRegressionPercent: baseline.thresholds.maxRegressionPercent,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
finally {
|
|
226
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
function buildSummary(result) {
|
|
230
|
+
const status = result.deltaPercent > result.maxRegressionPercent ? 'FAIL' : 'PASS';
|
|
231
|
+
const sampleList = result.sampleDurationsMs.map((value) => formatDurationMs(value)).join(', ');
|
|
232
|
+
const bootstrapLine = result.baselineMedianMs === result.medianMs
|
|
233
|
+
? '- Bootstrap mode: using the current fixture median as the provisional baseline'
|
|
234
|
+
: null;
|
|
235
|
+
return [
|
|
236
|
+
'## Cluster Performance',
|
|
237
|
+
'',
|
|
238
|
+
`- Status: ${status}`,
|
|
239
|
+
`- Fixture median: ${formatDurationMs(result.medianMs)} (${result.samples} samples, ${result.runsPerSample} cluster rebuilds/sample)`,
|
|
240
|
+
`- Fixture baseline: ${formatDurationMs(result.baselineMedianMs)}`,
|
|
241
|
+
`- Fixture delta: ${formatDurationMs(result.deltaMs)} (${formatPercent(result.deltaPercent)})`,
|
|
242
|
+
`- Projected openclaw/openclaw duration: ${formatDurationMs(result.projectedOpenclawMs)}`,
|
|
243
|
+
`- Projected openclaw/openclaw baseline: ${formatDurationMs(result.projectedBaselineOpenclawMs)}`,
|
|
244
|
+
`- Projected delta: ${formatDurationMs(result.projectedDeltaMs)} (${formatPercent(result.projectedDeltaPercent)})`,
|
|
245
|
+
`- Regression threshold: ${formatPercent(result.maxRegressionPercent)}`,
|
|
246
|
+
`- Fixture shape: ${result.threadCount} threads x ${result.sourceKinds.length} source kinds`,
|
|
247
|
+
`- Sample durations: ${sampleList}`,
|
|
248
|
+
bootstrapLine,
|
|
249
|
+
'',
|
|
250
|
+
]
|
|
251
|
+
.filter((line) => line !== null)
|
|
252
|
+
.join('\n');
|
|
253
|
+
}
|
|
254
|
+
function writeOutput(result, summary, bootstrap) {
|
|
255
|
+
const outputPath = process.env.GHCRAWL_CLUSTER_PERF_OUTPUT_PATH;
|
|
256
|
+
if (!outputPath) {
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
260
|
+
fs.writeFileSync(outputPath, JSON.stringify({
|
|
261
|
+
status: result.deltaPercent > result.maxRegressionPercent ? 'FAIL' : 'PASS',
|
|
262
|
+
bootstrap,
|
|
263
|
+
summary,
|
|
264
|
+
result,
|
|
265
|
+
}, null, 2) + '\n');
|
|
266
|
+
}
|
|
267
|
+
async function main() {
|
|
268
|
+
const baseline = loadBaseline();
|
|
269
|
+
const result = await measureBenchmark(baseline);
|
|
270
|
+
const summary = buildSummary(result);
|
|
271
|
+
const bootstrap = shouldBootstrapBaseline();
|
|
272
|
+
const shouldFail = !bootstrap && result.deltaPercent > result.maxRegressionPercent;
|
|
273
|
+
process.stdout.write(`${summary}\n`);
|
|
274
|
+
if (bootstrap) {
|
|
275
|
+
process.stdout.write(`Suggested fixtureMedianMs: ${result.medianMs.toFixed(1)}\n`);
|
|
276
|
+
}
|
|
277
|
+
const summaryPath = process.env.GITHUB_STEP_SUMMARY;
|
|
278
|
+
if (summaryPath) {
|
|
279
|
+
fs.appendFileSync(summaryPath, `${summary}\n`);
|
|
280
|
+
}
|
|
281
|
+
writeOutput(result, summary, bootstrap);
|
|
282
|
+
if (shouldFail) {
|
|
283
|
+
throw new Error(`Cluster perf regression exceeded threshold: ${formatPercent(result.deltaPercent)} > ${formatPercent(result.maxRegressionPercent)}`);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
await main();
|
|
287
|
+
//# sourceMappingURL=perf.integration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"perf.integration.js","sourceRoot":"","sources":["../../src/cluster/perf.integration.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAgD/C,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,sBAAsB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAEtF,SAAS,YAAY;IACnB,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,aAAa,EAAE,MAAM,CAAC,CAAiB,CAAC;AAC5E,CAAC;AAED,SAAS,uBAAuB;IAC9B,OAAO,OAAO,CAAC,GAAG,CAAC,8BAA8B,KAAK,GAAG,CAAC;AAC5D,CAAC;AAED,SAAS,gBAAgB,CAAC,UAAkB;IAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC;QAAE,OAAO,KAAK,CAAC;IAC/C,IAAI,UAAU,GAAG,IAAI,EAAE,CAAC;QACtB,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;IACvC,CAAC;IACD,MAAM,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC;IACvC,IAAI,YAAY,GAAG,EAAE,EAAE,CAAC;QACtB,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACxC,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,YAAY,GAAG,OAAO,GAAG,EAAE,CAAC;IAC5C,OAAO,GAAG,OAAO,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,MAAM,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IAClC,OAAO,GAAG,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACvC,CAAC;AAED,SAAS,MAAM,CAAC,MAAgB;IAC9B,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;IAC/D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO;QACL,SAAS,EAAE,KAAK,IAAI,EAAE,CAAC,SAAS;QAChC,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;QACzB,oBAAoB,EAAE,KAAK,IAAI,EAAE,CAAC,EAAE;QACpC,QAAQ,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;QACzB,iBAAiB,EAAE,KAAK,IAAI,EAAE,CAAC,EAAE;QACjC,eAAe,EAAE,KAAK,IAAI,EAAE,CAAC,EAAE;QAC/B,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,EAAE;KACvC,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,OAAO,IAAI,cAAc,CAAC;QACxB,MAAM,EAAE;YACN,aAAa,EAAE,OAAO,CAAC,GAAG,EAAE;YAC5B,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;YAC/B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC;YAC1D,gBAAgB,EAAE,IAAI;YACtB,MAAM;YACN,YAAY,EAAE,QAAQ;YACtB,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,yBAAyB;YACtC,iBAAiB,EAAE,QAAQ;YAC3B,cAAc,EAAE,WAAW;YAC3B,cAAc,EAAE,EAAE;YAClB,kBAAkB,EAAE,MAAM;YAC1B,YAAY,EAAE,YAAY;YAC1B,UAAU,EAAE,wBAAwB;YACpC,cAAc,EAAE,CAAC;YACjB,gBAAgB,EAAE,CAAC;YACnB,cAAc,EAAE,CAAC;YACjB,eAAe,EAAE,iBAAiB;SACnC;QACD,MAAM,EAAE,gBAAgB,EAAE;KAC3B,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,GAAG,UAAU,GAAG,GAAG,CAAC,GAAG,KAAK,CAAC;AAC3C,CAAC;AAED,SAAS,2BAA2B,CAAC,MAQpC;IACC,MAAM,UAAU,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,iBAAiB,GAAG,MAAM,CAAC,eAAe,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC;IACvH,MAAM,SAAS,GAAG,IAAI,KAAK,CAAS,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxD,MAAM,WAAW,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,iBAAiB,CAAC;IACnE,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC;IACnD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAE1D,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC3B,IAAI,MAAM,CAAC,iBAAiB,GAAG,CAAC;QAAE,SAAS,CAAC,WAAW,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,UAAU,CAAC;IACjF,IAAI,MAAM,CAAC,iBAAiB,GAAG,CAAC;QAAE,SAAS,CAAC,WAAW,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,UAAU,CAAC;IACjF,IAAI,MAAM,CAAC,iBAAiB,GAAG,CAAC;QAAE,SAAS,CAAC,WAAW,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,UAAU,GAAG,UAAU,CAAC;IAE9F,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,iBAAiB,GAAG,MAAM,CAAC,WAAW,CAAC;IACzF,SAAS,CAAC,YAAY,CAAC,GAAG,IAAI,GAAG,UAAU,CAAC;IAE5C,MAAM,SAAS,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,iBAAiB,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC;IAC7F,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,eAAe,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC,YAAY,GAAG,GAAG,GAAG,MAAM,CAAC,WAAW,GAAG,EAAE,GAAG,KAAK,CAAC;QACxG,SAAS,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC1D,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,qBAAqB,CAAC,MAAc,EAAE,QAAsB;IACnE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,iBAAiB,CAAC;IACvF,MAAM,GAAG,GAAG,sBAAsB,CAAC;IAEnC,IAAI,CAAC;QACH,OAAO,CAAC,EAAE;aACP,OAAO,CACN;sCAC8B,CAC/B;aACA,GAAG,CAAC,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,mBAAmB,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;QAEvE,MAAM,YAAY,GAAG,OAAO,CAAC,EAAE,CAAC,OAAO,CACrC;;;;qFAI+E,CAChF,CAAC;QACF,MAAM,eAAe,GAAG,OAAO,CAAC,EAAE,CAAC,OAAO,CACxC;uCACiC,CAClC,CAAC;QAEF,KAAK,IAAI,YAAY,GAAG,CAAC,EAAE,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,YAAY,IAAI,CAAC,EAAE,CAAC;YAC3F,KAAK,IAAI,YAAY,GAAG,CAAC,EAAE,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,iBAAiB,EAAE,YAAY,IAAI,CAAC,EAAE,CAAC;gBAChG,MAAM,QAAQ,GAAG,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,iBAAiB,GAAG,YAAY,GAAG,CAAC,CAAC;gBACtF,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,CAAC;gBACvC,MAAM,IAAI,GAAG,YAAY,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC;gBAC/D,YAAY,CAAC,GAAG,CACd,QAAQ,EACR,CAAC,EACD,MAAM,QAAQ,EAAE,EAChB,YAAY,EACZ,IAAI,EACJ,MAAM,EACN,WAAW,YAAY,GAAG,CAAC,WAAW,YAAY,GAAG,CAAC,EAAE,EACxD,oDAAoD,YAAY,GAAG,CAAC,YAAY,YAAY,GAAG,CAAC,GAAG,EACnG,OAAO,CAAC,QAAQ,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,EAC5B,MAAM,EACN,wCAAwC,IAAI,KAAK,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,IAAI,YAAY,EAAE,EAC9F,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,QAAQ,QAAQ,EAAE,EAClB,CAAC,EACD,GAAG,EACH,GAAG,EACH,IAAI,EACJ,IAAI,EACJ,GAAG,EACH,GAAG,EACH,GAAG,CACJ,CAAC;gBAEF,KAAK,MAAM,CAAC,WAAW,EAAE,UAAU,CAAC,IAAI,QAAQ,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;oBAC/E,MAAM,SAAS,GAAG,2BAA2B,CAAC;wBAC5C,YAAY;wBACZ,YAAY;wBACZ,WAAW;wBACX,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,YAAY;wBAC3C,iBAAiB,EAAE,QAAQ,CAAC,OAAO,CAAC,iBAAiB;wBACrD,eAAe,EAAE,QAAQ,CAAC,OAAO,CAAC,eAAe;wBACjD,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW;qBAC1C,CAAC,CAAC;oBACH,eAAe,CAAC,GAAG,CACjB,QAAQ,EACR,UAAU,EACV,wBAAwB,EACxB,SAAS,CAAC,MAAM,EAChB,QAAQ,QAAQ,IAAI,UAAU,EAAE,EAChC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EACzB,GAAG,EACH,GAAG,CACJ,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC,GAAG,EAAuB,CAAC;QACxG,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC5C,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,MAAc,EAAE,QAAsB;IACpE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC;YAC7C,KAAK,EAAE,UAAU;YACjB,IAAI,EAAE,UAAU;YAChB,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;YACrB,QAAQ,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ;SACpC,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QACjD,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC;IACxE,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,QAAsB;IACpD,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACzE,MAAM,IAAI,KAAK,CACb,uCAAuC,aAAa,qFAAqF,CAC1I,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,uBAAuB,CAAC,CAAC,CAAC;IACjF,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IACtD,IAAI,CAAC;QACH,qBAAqB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAE5C,MAAM,UAAU,GAAG,QAAQ,CAAC,SAAS,CAAC,UAAU,CAAC;QACjD,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,aAAa,CAAC;QACvD,MAAM,iBAAiB,GAAa,EAAE,CAAC;QACvC,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAC7C,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,WAAW,GAAG,CAAC,EAAE,WAAW,GAAG,UAAU,EAAE,WAAW,IAAI,CAAC,EAAE,CAAC;YACrE,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,WAAW,SAAS,CAAC,CAAC;YACzE,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;YAC1C,MAAM,YAAY,GAAG,MAAM,gBAAgB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;YACpE,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACnE,MAAM,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAChE,CAAC;QAED,OAAO,iBAAiB,CAAC,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,UAAU,EAAE,CAAC;YAChE,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC1C,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,aAAa,EAAE,QAAQ,IAAI,CAAC,EAAE,CAAC;gBAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,UAAU,SAAS,CAAC,CAAC;gBAClE,UAAU,IAAI,CAAC,CAAC;gBAChB,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;gBACvC,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;gBAC3D,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;gBAC7D,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC1D,CAAC;YACD,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC,CAAC;YAE5D,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YACzD,IAAI,iBAAiB,CAAC,MAAM,IAAI,QAAQ,CAAC,SAAS,CAAC,UAAU,IAAI,SAAS,IAAI,QAAQ,CAAC,SAAS,CAAC,UAAU,EAAE,CAAC;gBAC5G,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,iBAAiB,CAAC,CAAC;QAC3C,MAAM,gBAAgB,GAAG,QAAQ,CAAC,QAAQ,CAAC,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC9G,MAAM,OAAO,GAAG,QAAQ,GAAG,gBAAgB,CAAC;QAC5C,MAAM,YAAY,GAAG,gBAAgB,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,gBAAgB,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnF,MAAM,mBAAmB,GAAG,QAAQ,CAAC,QAAQ,CAAC,mBAAmB,GAAG,CAAC,QAAQ,GAAG,gBAAgB,CAAC,CAAC;QAClG,MAAM,2BAA2B,GAAG,QAAQ,CAAC,QAAQ,CAAC,mBAAmB,CAAC;QAC1E,MAAM,gBAAgB,GAAG,mBAAmB,GAAG,2BAA2B,CAAC;QAC3E,MAAM,qBAAqB,GAAG,CAAC,gBAAgB,GAAG,2BAA2B,CAAC,GAAG,GAAG,CAAC;QAErF,OAAO;YACL,iBAAiB;YACjB,QAAQ;YACR,gBAAgB;YAChB,OAAO;YACP,YAAY;YACZ,mBAAmB;YACnB,2BAA2B;YAC3B,gBAAgB;YAChB,qBAAqB;YACrB,OAAO,EAAE,iBAAiB,CAAC,MAAM;YACjC,aAAa;YACb,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,iBAAiB;YAC/E,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW;YACzC,oBAAoB,EAAE,QAAQ,CAAC,UAAU,CAAC,oBAAoB;SAC/D,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,MAAqB;IACzC,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,oBAAoB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;IACnF,MAAM,UAAU,GAAG,MAAM,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/F,MAAM,aAAa,GACjB,MAAM,CAAC,gBAAgB,KAAK,MAAM,CAAC,QAAQ;QACzC,CAAC,CAAC,gFAAgF;QAClF,CAAC,CAAC,IAAI,CAAC;IACX,OAAO;QACL,wBAAwB;QACxB,EAAE;QACF,aAAa,MAAM,EAAE;QACrB,qBAAqB,gBAAgB,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,MAAM,CAAC,OAAO,aAAa,MAAM,CAAC,aAAa,2BAA2B;QACrI,uBAAuB,gBAAgB,CAAC,MAAM,CAAC,gBAAgB,CAAC,EAAE;QAClE,oBAAoB,gBAAgB,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,aAAa,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG;QAC9F,2CAA2C,gBAAgB,CAAC,MAAM,CAAC,mBAAmB,CAAC,EAAE;QACzF,2CAA2C,gBAAgB,CAAC,MAAM,CAAC,2BAA2B,CAAC,EAAE;QACjG,sBAAsB,gBAAgB,CAAC,MAAM,CAAC,gBAAgB,CAAC,KAAK,aAAa,CAAC,MAAM,CAAC,qBAAqB,CAAC,GAAG;QAClH,2BAA2B,aAAa,CAAC,MAAM,CAAC,oBAAoB,CAAC,EAAE;QACvE,oBAAoB,MAAM,CAAC,WAAW,cAAc,MAAM,CAAC,WAAW,CAAC,MAAM,eAAe;QAC5F,uBAAuB,UAAU,EAAE;QACnC,aAAa;QACb,EAAE;KACH;SACE,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC;SAC/C,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,WAAW,CAAC,MAAqB,EAAE,OAAe,EAAE,SAAkB;IAC7E,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC;IAChE,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO;IACT,CAAC;IAED,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5D,EAAE,CAAC,aAAa,CACd,UAAU,EACV,IAAI,CAAC,SAAS,CACZ;QACE,MAAM,EAAE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,oBAAoB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;QAC3E,SAAS;QACT,OAAO;QACP,MAAM;KACP,EACD,IAAI,EACJ,CAAC,CACF,GAAG,IAAI,CACT,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,uBAAuB,EAAE,CAAC;IAC5C,MAAM,UAAU,GAAG,CAAC,SAAS,IAAI,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAEnF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACrC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACrF,CAAC;IACD,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;IACpD,IAAI,WAAW,EAAE,CAAC;QAChB,EAAE,CAAC,cAAc,CAAC,WAAW,EAAE,GAAG,OAAO,IAAI,CAAC,CAAC;IACjD,CAAC;IACD,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IAExC,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACb,+CAA+C,aAAa,CAAC,MAAM,CAAC,YAAY,CAAC,MAAM,aAAa,CAAC,MAAM,CAAC,oBAAoB,CAAC,EAAE,CACpI,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,IAAI,EAAE,CAAC"}
|
package/dist/search/exact.d.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
export declare function cosineSimilarity(left: number[], right: number[]): number;
|
|
2
|
+
export declare function normalizeEmbedding(embedding: number[]): {
|
|
3
|
+
normalized: number[];
|
|
4
|
+
norm: number;
|
|
5
|
+
};
|
|
6
|
+
export declare function dotProduct(left: number[], right: number[]): number;
|
|
2
7
|
export declare function rankNearestNeighbors<T extends {
|
|
3
8
|
id: number;
|
|
4
9
|
embedding: number[];
|
|
@@ -11,4 +16,12 @@ export declare function rankNearestNeighbors<T extends {
|
|
|
11
16
|
item: T;
|
|
12
17
|
score: number;
|
|
13
18
|
}>;
|
|
19
|
+
export declare function rankNearestNeighborsByScore<T>(items: T[], params: {
|
|
20
|
+
limit: number;
|
|
21
|
+
score: (item: T) => number;
|
|
22
|
+
minScore?: number;
|
|
23
|
+
}): Array<{
|
|
24
|
+
item: T;
|
|
25
|
+
score: number;
|
|
26
|
+
}>;
|
|
14
27
|
//# sourceMappingURL=exact.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exact.d.ts","sourceRoot":"","sources":["../../src/search/exact.ts"],"names":[],"mappings":"AAAA,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAcxE;AAED,wBAAgB,oBAAoB,CAAC,CAAC,SAAS;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAA;CAAE,EAChF,KAAK,EAAE,CAAC,EAAE,EACV,MAAM,EAAE;IAAE,eAAe,EAAE,MAAM,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACvF,KAAK,CAAC;IAAE,IAAI,EAAE,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"exact.d.ts","sourceRoot":"","sources":["../../src/search/exact.ts"],"names":[],"mappings":"AAAA,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAcxE;AAED,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG;IAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAa9F;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CASlE;AAkBD,wBAAgB,oBAAoB,CAAC,CAAC,SAAS;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAA;CAAE,EAChF,KAAK,EAAE,CAAC,EAAE,EACV,MAAM,EAAE;IAAE,eAAe,EAAE,MAAM,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACvF,KAAK,CAAC;IAAE,IAAI,EAAE,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAUnC;AAED,wBAAgB,2BAA2B,CAAC,CAAC,EAC3C,KAAK,EAAE,CAAC,EAAE,EACV,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACvE,KAAK,CAAC;IAAE,IAAI,EAAE,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CASnC"}
|
package/dist/search/exact.js
CHANGED
|
@@ -14,13 +14,65 @@ export function cosineSimilarity(left, right) {
|
|
|
14
14
|
return 0;
|
|
15
15
|
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
16
16
|
}
|
|
17
|
+
export function normalizeEmbedding(embedding) {
|
|
18
|
+
let normSquared = 0;
|
|
19
|
+
for (let index = 0; index < embedding.length; index += 1) {
|
|
20
|
+
normSquared += embedding[index] * embedding[index];
|
|
21
|
+
}
|
|
22
|
+
const norm = Math.sqrt(normSquared);
|
|
23
|
+
if (norm === 0) {
|
|
24
|
+
return { normalized: embedding.map(() => 0), norm: 0 };
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
normalized: embedding.map((value) => value / norm),
|
|
28
|
+
norm,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export function dotProduct(left, right) {
|
|
32
|
+
if (left.length !== right.length) {
|
|
33
|
+
throw new Error('Embedding dimensions do not match');
|
|
34
|
+
}
|
|
35
|
+
let dot = 0;
|
|
36
|
+
for (let index = 0; index < left.length; index += 1) {
|
|
37
|
+
dot += left[index] * right[index];
|
|
38
|
+
}
|
|
39
|
+
return dot;
|
|
40
|
+
}
|
|
41
|
+
function insertTopK(ranked, candidate, limit) {
|
|
42
|
+
let insertAt = ranked.length;
|
|
43
|
+
while (insertAt > 0 && candidate.score > ranked[insertAt - 1].score) {
|
|
44
|
+
insertAt -= 1;
|
|
45
|
+
}
|
|
46
|
+
if (insertAt >= limit) {
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
ranked.splice(insertAt, 0, candidate);
|
|
50
|
+
if (ranked.length > limit) {
|
|
51
|
+
ranked.length = limit;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
17
54
|
export function rankNearestNeighbors(items, params) {
|
|
18
55
|
const minScore = params.minScore ?? -1;
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
56
|
+
const ranked = [];
|
|
57
|
+
for (const item of items) {
|
|
58
|
+
if (item.id === params.skipId)
|
|
59
|
+
continue;
|
|
60
|
+
const score = cosineSimilarity(params.targetEmbedding, item.embedding);
|
|
61
|
+
if (score < minScore)
|
|
62
|
+
continue;
|
|
63
|
+
insertTopK(ranked, { item, score }, params.limit);
|
|
64
|
+
}
|
|
65
|
+
return ranked;
|
|
66
|
+
}
|
|
67
|
+
export function rankNearestNeighborsByScore(items, params) {
|
|
68
|
+
const minScore = params.minScore ?? -1;
|
|
69
|
+
const ranked = [];
|
|
70
|
+
for (const item of items) {
|
|
71
|
+
const score = params.score(item);
|
|
72
|
+
if (score < minScore)
|
|
73
|
+
continue;
|
|
74
|
+
insertTopK(ranked, { item, score }, params.limit);
|
|
75
|
+
}
|
|
76
|
+
return ranked;
|
|
25
77
|
}
|
|
26
78
|
//# sourceMappingURL=exact.js.map
|
package/dist/search/exact.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exact.js","sourceRoot":"","sources":["../../src/search/exact.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,gBAAgB,CAAC,IAAc,EAAE,KAAe;IAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QACtC,SAAS,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,QAAQ,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChD,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,
|
|
1
|
+
{"version":3,"file":"exact.js","sourceRoot":"","sources":["../../src/search/exact.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,gBAAgB,CAAC,IAAc,EAAE,KAAe;IAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QACtC,SAAS,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,QAAQ,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChD,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,SAAmB;IACpD,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,SAAS,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACzD,WAAW,IAAI,SAAS,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;QACf,OAAO,EAAE,UAAU,EAAE,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;IACzD,CAAC;IACD,OAAO;QACL,UAAU,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,IAAI,CAAC;QAClD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAc,EAAE,KAAe;IACxD,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,UAAU,CAAI,MAAyC,EAAE,SAAqC,EAAE,KAAa;IACpH,IAAI,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC;IAC7B,OAAO,QAAQ,GAAG,CAAC,IAAI,SAAS,CAAC,KAAK,GAAG,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;QACpE,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;QACtB,OAAO;IACT,CAAC;IAED,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC;IACtC,IAAI,MAAM,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QAC1B,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,KAAU,EACV,MAAwF;IAExF,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IACvC,MAAM,MAAM,GAAsC,EAAE,CAAC;IACrD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,EAAE,KAAK,MAAM,CAAC,MAAM;YAAE,SAAS;QACxC,MAAM,KAAK,GAAG,gBAAgB,CAAC,MAAM,CAAC,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QACvE,IAAI,KAAK,GAAG,QAAQ;YAAE,SAAS;QAC/B,UAAU,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,2BAA2B,CACzC,KAAU,EACV,MAAwE;IAExE,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IACvC,MAAM,MAAM,GAAsC,EAAE,CAAC;IACrD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,KAAK,GAAG,QAAQ;YAAE,SAAS;QAC/B,UAAU,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/service.d.ts
CHANGED
|
@@ -169,7 +169,7 @@ export declare class GHCrawlService {
|
|
|
169
169
|
minScore?: number;
|
|
170
170
|
k?: number;
|
|
171
171
|
onProgress?: (message: string) => void;
|
|
172
|
-
}): ClusterResultDto
|
|
172
|
+
}): Promise<ClusterResultDto>;
|
|
173
173
|
searchRepository(params: {
|
|
174
174
|
owner: string;
|
|
175
175
|
repo: string;
|
|
@@ -259,18 +259,25 @@ export declare class GHCrawlService {
|
|
|
259
259
|
private buildEmbeddingTasks;
|
|
260
260
|
private prepareEmbeddingText;
|
|
261
261
|
private estimateEmbeddingTokens;
|
|
262
|
+
private parseEmbeddingContextError;
|
|
262
263
|
private isEmbeddingContextError;
|
|
263
264
|
private embedBatchWithRecovery;
|
|
264
265
|
private embedSingleTaskWithRecovery;
|
|
265
266
|
private shrinkEmbeddingTask;
|
|
267
|
+
private projectEmbeddingRetryLength;
|
|
266
268
|
private chunkEmbeddingTasks;
|
|
267
269
|
private loadStoredEmbeddings;
|
|
268
270
|
private loadParsedStoredEmbeddings;
|
|
271
|
+
private loadNormalizedEmbeddingsForSourceKind;
|
|
272
|
+
private loadClusterableThreadMeta;
|
|
269
273
|
private listStoredClusterNeighbors;
|
|
270
274
|
private getEmbeddingWorkset;
|
|
271
275
|
private loadCombinedSummaryTextMap;
|
|
272
276
|
private edgeKey;
|
|
273
277
|
private aggregateRepositoryEdges;
|
|
278
|
+
private mergeSourceKindEdges;
|
|
279
|
+
private countEmbeddingsForSourceKind;
|
|
280
|
+
private resolveEdgeWorkerRuntime;
|
|
274
281
|
private persistClusterRun;
|
|
275
282
|
private pruneOldClusterRuns;
|
|
276
283
|
private upsertSummary;
|