@claude-flow/cli 3.7.0-alpha.69 → 3.7.0-alpha.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -9
- package/dist/src/commands/benchmark.d.ts.map +1 -1
- package/dist/src/commands/benchmark.js +0 -7
- package/dist/src/commands/benchmark.js.map +1 -1
- package/dist/src/init/claudemd-generator.d.ts.map +1 -1
- package/dist/src/init/claudemd-generator.js +28 -53
- package/dist/src/init/claudemd-generator.js.map +1 -1
- package/dist/src/mcp-tools/embeddings-tools.d.ts.map +1 -1
- package/dist/src/mcp-tools/embeddings-tools.js +0 -797
- package/dist/src/mcp-tools/embeddings-tools.js.map +1 -1
- package/dist/src/mcp-tools/hive-mind-tools.d.ts +0 -7
- package/dist/src/mcp-tools/hive-mind-tools.d.ts.map +1 -1
- package/dist/src/mcp-tools/hive-mind-tools.js +4 -185
- package/dist/src/mcp-tools/hive-mind-tools.js.map +1 -1
- package/dist/src/mcp-tools/memory-tools.d.ts.map +1 -1
- package/dist/src/mcp-tools/memory-tools.js +16 -58
- package/dist/src/mcp-tools/memory-tools.js.map +1 -1
- package/dist/src/plugins/store/discovery.d.ts +9 -1
- package/dist/src/plugins/store/discovery.d.ts.map +1 -1
- package/dist/src/plugins/store/discovery.js +29 -10
- package/dist/src/plugins/store/discovery.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +5 -6
|
@@ -900,802 +900,5 @@ export const embeddingsTools = [
|
|
|
900
900
|
return { success: true, ...getRabitqStatus() };
|
|
901
901
|
},
|
|
902
902
|
},
|
|
903
|
-
{
|
|
904
|
-
// ADR-121 Phase 5 (full) — sidecar MCP tool. Proxies through
|
|
905
|
-
// @claude-flow/embeddings's probeRuvectorSidecar so LLM agents
|
|
906
|
-
// can ask "is the optimized ruvector backend reachable?" before
|
|
907
|
-
// dispatching embedding-heavy work that would benefit from it.
|
|
908
|
-
// Composable: pair with `embeddings_status` for the full local
|
|
909
|
-
// capability picture, or with `agentdb_health` for the full
|
|
910
|
-
// memory-substrate picture.
|
|
911
|
-
name: 'embeddings_check_ruvector_sidecar',
|
|
912
|
-
description: 'Check whether the ruvector CLI MCP sidecar is reachable and report its version + MCP tool surface. Use when a multi-step embedding pipeline could delegate to the optimized native Rust backend instead of going through this JS layer — the report tells you whether that path is available. For "is anything broken?" use ruflo doctor; for "should I use ruvector for this batch?" use this tool. Never throws.',
|
|
913
|
-
category: 'embeddings',
|
|
914
|
-
inputSchema: {
|
|
915
|
-
type: 'object',
|
|
916
|
-
properties: {
|
|
917
|
-
timeoutMs: {
|
|
918
|
-
type: 'number',
|
|
919
|
-
description: 'Per-shellout timeout (default 5000ms).',
|
|
920
|
-
},
|
|
921
|
-
},
|
|
922
|
-
},
|
|
923
|
-
handler: async (input) => {
|
|
924
|
-
// Deep import — the top-level @claude-flow/embeddings barrel's
|
|
925
|
-
// type re-exports don't resolve cleanly through the CLI's
|
|
926
|
-
// bundler-mode TypeScript at workspace-symlink time (the dist
|
|
927
|
-
// file is present + has the export, but TS sees stale types).
|
|
928
|
-
// The dist sub-module path is stable across alpha bumps because
|
|
929
|
-
// `files: ['dist']` ships it.
|
|
930
|
-
// Use the `./<file>` wildcard sub-path (no `.js` suffix) — the
|
|
931
|
-
// exports map appends `.js` for the import target.
|
|
932
|
-
const mod = await import('@claude-flow/embeddings/ruvector-mcp-probe');
|
|
933
|
-
const timeoutMs = typeof input?.timeoutMs === 'number' ? input.timeoutMs : 5_000;
|
|
934
|
-
const report = await mod.probeRuvectorSidecar({ timeoutMs });
|
|
935
|
-
return {
|
|
936
|
-
success: true,
|
|
937
|
-
...report,
|
|
938
|
-
// Single-line summary suitable for inline display.
|
|
939
|
-
summary: mod.formatRuvectorAvailability(report),
|
|
940
|
-
};
|
|
941
|
-
},
|
|
942
|
-
},
|
|
943
|
-
// ============================================================
|
|
944
|
-
// ADR-121 Phase 5b — DiskannSnapshot MCP tools (alpha.47 CLI)
|
|
945
|
-
// ============================================================
|
|
946
|
-
//
|
|
947
|
-
// Process-level registry of named DiskannSnapshot handles. Build
|
|
948
|
-
// an index once, search it many times. Persistence to disk via
|
|
949
|
-
// the snapshot's storagePath; the handle is keyed by `name`.
|
|
950
|
-
// For in-memory snapshots, set persist: false and the index is
|
|
951
|
-
// re-built only while this process lives.
|
|
952
|
-
{
|
|
953
|
-
name: 'embeddings_diskann_build',
|
|
954
|
-
description: 'Build a DiskANN/Vamana ANN index from a batch of (id, vector) pairs. Use when you have a fixed corpus (≥10k vectors typical) that needs sub-millisecond ANN search and you want persistence across process restarts — for streaming inserts at smaller scale use SearchableEmbeddingCache instead, for batch in-memory 32×-compressed use RabitqSnapshot. Pair with embeddings_diskann_search to query, embeddings_diskann_status to inspect. Requires @ruvector/diskann (optional peer dep — throws a clear named error if missing).',
|
|
955
|
-
category: 'embeddings',
|
|
956
|
-
inputSchema: {
|
|
957
|
-
type: 'object',
|
|
958
|
-
properties: {
|
|
959
|
-
name: { type: 'string', description: 'Snapshot handle name (used by search + status). Must be unique per process.' },
|
|
960
|
-
dimension: { type: 'number', description: 'Embedding dimension.' },
|
|
961
|
-
entries: {
|
|
962
|
-
type: 'array',
|
|
963
|
-
description: 'Vectors to index. Each entry: { id: string, vector: number[] }.',
|
|
964
|
-
items: {
|
|
965
|
-
type: 'object',
|
|
966
|
-
properties: {
|
|
967
|
-
id: { type: 'string' },
|
|
968
|
-
vector: { type: 'array', items: { type: 'number' } },
|
|
969
|
-
},
|
|
970
|
-
required: ['id', 'vector'],
|
|
971
|
-
},
|
|
972
|
-
},
|
|
973
|
-
storagePath: { type: 'string', description: 'Optional directory for on-disk persistence (snapshot survives restarts via embeddings_diskann_load).' },
|
|
974
|
-
},
|
|
975
|
-
required: ['name', 'dimension', 'entries'],
|
|
976
|
-
},
|
|
977
|
-
handler: async (input) => {
|
|
978
|
-
const { getDiskannRegistry } = await import('../memory/diskann-registry.js');
|
|
979
|
-
const registry = getDiskannRegistry();
|
|
980
|
-
const name = input.name;
|
|
981
|
-
const dimension = input.dimension;
|
|
982
|
-
const entries = input.entries;
|
|
983
|
-
const storagePath = input.storagePath;
|
|
984
|
-
try {
|
|
985
|
-
const stats = await registry.build({ name, dimension, entries, storagePath });
|
|
986
|
-
return { success: true, name, ...stats };
|
|
987
|
-
}
|
|
988
|
-
catch (err) {
|
|
989
|
-
return { success: false, name, error: err instanceof Error ? err.message : String(err) };
|
|
990
|
-
}
|
|
991
|
-
},
|
|
992
|
-
},
|
|
993
|
-
{
|
|
994
|
-
name: 'embeddings_diskann_search',
|
|
995
|
-
description: 'Search a previously-built DiskANN index for the k nearest neighbors of a query vector. Use when you built an index via embeddings_diskann_build and now need ANN retrieval — returns ids + L2² distances sorted ascending. For text→vector→search in one call, pair with embeddings_generate first. For exact-match key lookup over a streaming cache, use SearchableEmbeddingCache.get instead.',
|
|
996
|
-
category: 'embeddings',
|
|
997
|
-
inputSchema: {
|
|
998
|
-
type: 'object',
|
|
999
|
-
properties: {
|
|
1000
|
-
name: { type: 'string', description: 'Snapshot handle name (set by embeddings_diskann_build).' },
|
|
1001
|
-
vector: { type: 'array', items: { type: 'number' }, description: 'Query vector. Must match the index dimension.' },
|
|
1002
|
-
k: { type: 'number', description: 'Number of nearest neighbors to return.' },
|
|
1003
|
-
},
|
|
1004
|
-
required: ['name', 'vector', 'k'],
|
|
1005
|
-
},
|
|
1006
|
-
handler: async (input) => {
|
|
1007
|
-
const { getDiskannRegistry } = await import('../memory/diskann-registry.js');
|
|
1008
|
-
const registry = getDiskannRegistry();
|
|
1009
|
-
const name = input.name;
|
|
1010
|
-
const vector = new Float32Array(input.vector);
|
|
1011
|
-
const k = input.k;
|
|
1012
|
-
try {
|
|
1013
|
-
const hits = await registry.search(name, vector, k);
|
|
1014
|
-
return { success: true, name, k, hits };
|
|
1015
|
-
}
|
|
1016
|
-
catch (err) {
|
|
1017
|
-
return { success: false, name, error: err instanceof Error ? err.message : String(err) };
|
|
1018
|
-
}
|
|
1019
|
-
},
|
|
1020
|
-
},
|
|
1021
|
-
{
|
|
1022
|
-
name: 'embeddings_diskann_status',
|
|
1023
|
-
description: 'List all DiskANN snapshots currently held by this MCP server, with their dimension, vector count, and storage path. Use when you want to inventory in-process indexes before opening a new one, or when debugging "which snapshot did I build?". For checking whether the @ruvector/diskann peer dep itself is installed, use embeddings_check_ruvector_sidecar (the family probe) instead.',
|
|
1024
|
-
category: 'embeddings',
|
|
1025
|
-
inputSchema: { type: 'object', properties: {} },
|
|
1026
|
-
handler: async () => {
|
|
1027
|
-
const { getDiskannRegistry } = await import('../memory/diskann-registry.js');
|
|
1028
|
-
const registry = getDiskannRegistry();
|
|
1029
|
-
return { success: true, snapshots: registry.list() };
|
|
1030
|
-
},
|
|
1031
|
-
},
|
|
1032
|
-
// ============================================================
|
|
1033
|
-
// ADR-121 Phase 8 (CLI) — AnnRouter MCP tools (alpha.48)
|
|
1034
|
-
// ============================================================
|
|
1035
|
-
//
|
|
1036
|
-
// Composition layer: agents declare the workload shape
|
|
1037
|
-
// (corpusSize/persistent/mutable) and the router picks between
|
|
1038
|
-
// HNSW / RaBitQ / DiskANN. The decision is returned to the agent
|
|
1039
|
-
// so they can see what was picked + why (useful for routing
|
|
1040
|
-
// explanations + cost attribution).
|
|
1041
|
-
{
|
|
1042
|
-
name: 'embeddings_ann_router_build',
|
|
1043
|
-
description: "Build an ANN index using AnnRouter — auto-selects between HNSW / RaBitQ / DiskANN based on workload. Use when you have a corpus to index and don't want to choose between the three backings yourself — declare the workload ({corpusSize, persistent, mutable}) and the router picks. Pair with embeddings_ann_router_search to query, embeddings_ann_router_status to see what was picked. For direct DiskANN control (e.g. for known billion-scale persistent indexes), use embeddings_diskann_build instead.",
|
|
1044
|
-
category: 'embeddings',
|
|
1045
|
-
inputSchema: {
|
|
1046
|
-
type: 'object',
|
|
1047
|
-
properties: {
|
|
1048
|
-
name: { type: 'string', description: 'Handle name. Must be unique per process.' },
|
|
1049
|
-
workload: {
|
|
1050
|
-
type: 'object',
|
|
1051
|
-
properties: {
|
|
1052
|
-
corpusSize: { type: 'number', description: 'Approximate corpus size.' },
|
|
1053
|
-
dimension: { type: 'number' },
|
|
1054
|
-
persistent: { type: 'boolean', description: 'Survive process restarts? Forces DiskANN.' },
|
|
1055
|
-
mutable: { type: 'boolean', description: 'Streaming inserts/deletes after build? Prefers HNSW.' },
|
|
1056
|
-
storagePath: { type: 'string', description: 'On-disk path; required when persistent=true.' },
|
|
1057
|
-
},
|
|
1058
|
-
required: ['corpusSize', 'dimension'],
|
|
1059
|
-
},
|
|
1060
|
-
entries: {
|
|
1061
|
-
type: 'array',
|
|
1062
|
-
items: {
|
|
1063
|
-
type: 'object',
|
|
1064
|
-
properties: { id: { type: 'string' }, vector: { type: 'array', items: { type: 'number' } } },
|
|
1065
|
-
required: ['id', 'vector'],
|
|
1066
|
-
},
|
|
1067
|
-
},
|
|
1068
|
-
},
|
|
1069
|
-
required: ['name', 'workload', 'entries'],
|
|
1070
|
-
},
|
|
1071
|
-
handler: async (input) => {
|
|
1072
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1073
|
-
const registry = getAnnRouterRegistry();
|
|
1074
|
-
try {
|
|
1075
|
-
const result = await registry.build({
|
|
1076
|
-
name: input.name,
|
|
1077
|
-
workload: input.workload,
|
|
1078
|
-
entries: input.entries,
|
|
1079
|
-
});
|
|
1080
|
-
return { success: true, name: input.name, ...result };
|
|
1081
|
-
}
|
|
1082
|
-
catch (err) {
|
|
1083
|
-
return { success: false, name: input.name, error: err instanceof Error ? err.message : String(err) };
|
|
1084
|
-
}
|
|
1085
|
-
},
|
|
1086
|
-
},
|
|
1087
|
-
{
|
|
1088
|
-
name: 'embeddings_ann_router_search',
|
|
1089
|
-
description: 'Search a named AnnRouter handle for the k nearest neighbors. Returns hits with the routing-decision-aware score (cosine sim for HNSW; L2 distance for RaBitQ/DiskANN — interpret relative to embeddings_ann_router_status). For raw DiskANN search use embeddings_diskann_search; for raw HNSW use SearchableEmbeddingCache.search directly.',
|
|
1090
|
-
category: 'embeddings',
|
|
1091
|
-
inputSchema: {
|
|
1092
|
-
type: 'object',
|
|
1093
|
-
properties: {
|
|
1094
|
-
name: { type: 'string' },
|
|
1095
|
-
vector: { type: 'array', items: { type: 'number' } },
|
|
1096
|
-
k: { type: 'number' },
|
|
1097
|
-
},
|
|
1098
|
-
required: ['name', 'vector', 'k'],
|
|
1099
|
-
},
|
|
1100
|
-
handler: async (input) => {
|
|
1101
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1102
|
-
const registry = getAnnRouterRegistry();
|
|
1103
|
-
try {
|
|
1104
|
-
const hits = await registry.search(input.name, new Float32Array(input.vector), input.k);
|
|
1105
|
-
return { success: true, name: input.name, k: input.k, hits };
|
|
1106
|
-
}
|
|
1107
|
-
catch (err) {
|
|
1108
|
-
return { success: false, name: input.name, error: err instanceof Error ? err.message : String(err) };
|
|
1109
|
-
}
|
|
1110
|
-
},
|
|
1111
|
-
},
|
|
1112
|
-
{
|
|
1113
|
-
name: 'embeddings_ann_router_status',
|
|
1114
|
-
description: 'List all AnnRouter handles, each with its decided backing, routing reason, and current count. Use to inventory routed indexes + confirm the router picked what you expected. For peer-dep family availability use embeddings_check_ruvector_sidecar.',
|
|
1115
|
-
category: 'embeddings',
|
|
1116
|
-
inputSchema: { type: 'object', properties: {} },
|
|
1117
|
-
handler: async () => {
|
|
1118
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1119
|
-
const registry = getAnnRouterRegistry();
|
|
1120
|
-
return { success: true, handles: registry.list() };
|
|
1121
|
-
},
|
|
1122
|
-
},
|
|
1123
|
-
// ============================================================
|
|
1124
|
-
// ADR-121 Phase 9 — one-call RAG retrieval (alpha.49 CLI)
|
|
1125
|
-
// ============================================================
|
|
1126
|
-
{
|
|
1127
|
-
name: 'embeddings_search_text',
|
|
1128
|
-
description: "Embed a text query and search a named AnnRouter handle in a single call — the standard RAG retrieval shape. Eliminates the two-call dance of `embeddings_generate` then `embeddings_ann_router_search`. Returns hits plus per-stage latency (embeddingMs + searchMs) so callers can attribute cost. Pair with embeddings_ann_router_build to build the index first. For raw vector input (no embedding step) use embeddings_ann_router_search.",
|
|
1129
|
-
category: 'embeddings',
|
|
1130
|
-
inputSchema: {
|
|
1131
|
-
type: 'object',
|
|
1132
|
-
properties: {
|
|
1133
|
-
text: { type: 'string', description: 'Query text. Will be embedded inline.' },
|
|
1134
|
-
name: { type: 'string', description: 'AnnRouter handle name (set by embeddings_ann_router_build).' },
|
|
1135
|
-
k: { type: 'number', description: 'Number of nearest neighbors.' },
|
|
1136
|
-
},
|
|
1137
|
-
required: ['text', 'name', 'k'],
|
|
1138
|
-
},
|
|
1139
|
-
handler: async (input) => {
|
|
1140
|
-
const config = loadConfig();
|
|
1141
|
-
if (!config) {
|
|
1142
|
-
return {
|
|
1143
|
-
success: false,
|
|
1144
|
-
error: 'Embeddings not initialized. Run embeddings_init first.',
|
|
1145
|
-
};
|
|
1146
|
-
}
|
|
1147
|
-
const text = input.text;
|
|
1148
|
-
const name = input.name;
|
|
1149
|
-
const k = input.k;
|
|
1150
|
-
const tv = validateText(text, 'text');
|
|
1151
|
-
if (!tv.valid)
|
|
1152
|
-
return { success: false, error: tv.error };
|
|
1153
|
-
// Stage 1 — embed the query.
|
|
1154
|
-
const embedT0 = Date.now();
|
|
1155
|
-
let embedding;
|
|
1156
|
-
try {
|
|
1157
|
-
embedding = await generateRealEmbedding(text, config.dimension);
|
|
1158
|
-
}
|
|
1159
|
-
catch (err) {
|
|
1160
|
-
return { success: false, error: `embed failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
1161
|
-
}
|
|
1162
|
-
const embeddingMs = Date.now() - embedT0;
|
|
1163
|
-
// Stage 2 — search the named router handle.
|
|
1164
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1165
|
-
const registry = getAnnRouterRegistry();
|
|
1166
|
-
const searchT0 = Date.now();
|
|
1167
|
-
try {
|
|
1168
|
-
const hits = await registry.search(name, new Float32Array(embedding), k);
|
|
1169
|
-
const searchMs = Date.now() - searchT0;
|
|
1170
|
-
return {
|
|
1171
|
-
success: true,
|
|
1172
|
-
name,
|
|
1173
|
-
k,
|
|
1174
|
-
hits,
|
|
1175
|
-
latency: { embeddingMs, searchMs, totalMs: embeddingMs + searchMs },
|
|
1176
|
-
embeddingDimension: embedding.length,
|
|
1177
|
-
};
|
|
1178
|
-
}
|
|
1179
|
-
catch (err) {
|
|
1180
|
-
return {
|
|
1181
|
-
success: false,
|
|
1182
|
-
name,
|
|
1183
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1184
|
-
latency: { embeddingMs, searchMs: 0 },
|
|
1185
|
-
};
|
|
1186
|
-
}
|
|
1187
|
-
},
|
|
1188
|
-
},
|
|
1189
|
-
// ============================================================
|
|
1190
|
-
// ADR-121 Phase 9b — batch one-call RAG (alpha.50 CLI)
|
|
1191
|
-
// ============================================================
|
|
1192
|
-
{
|
|
1193
|
-
name: 'embeddings_search_text_batch',
|
|
1194
|
-
description: "Embed N text queries and search a named AnnRouter handle for each, in a single MCP call. Standard shape for question-reformulation RAG (expand one user question into N variants, retrieve top-k for each, merge). Embeddings + searches run in parallel where the backing supports it. Returns one results entry per query in input order, plus aggregate latency. For single-query use embeddings_search_text; for raw vector input use embeddings_ann_router_search.",
|
|
1195
|
-
category: 'embeddings',
|
|
1196
|
-
inputSchema: {
|
|
1197
|
-
type: 'object',
|
|
1198
|
-
properties: {
|
|
1199
|
-
texts: {
|
|
1200
|
-
type: 'array',
|
|
1201
|
-
items: { type: 'string' },
|
|
1202
|
-
description: 'Array of query texts. Order preserved in results.',
|
|
1203
|
-
},
|
|
1204
|
-
name: { type: 'string', description: 'AnnRouter handle name.' },
|
|
1205
|
-
k: { type: 'number', description: 'Nearest neighbors per query.' },
|
|
1206
|
-
},
|
|
1207
|
-
required: ['texts', 'name', 'k'],
|
|
1208
|
-
},
|
|
1209
|
-
handler: async (input) => {
|
|
1210
|
-
const config = loadConfig();
|
|
1211
|
-
if (!config) {
|
|
1212
|
-
return { success: false, error: 'Embeddings not initialized. Run embeddings_init first.' };
|
|
1213
|
-
}
|
|
1214
|
-
const texts = input.texts;
|
|
1215
|
-
const name = input.name;
|
|
1216
|
-
const k = input.k;
|
|
1217
|
-
if (!Array.isArray(texts) || texts.length === 0) {
|
|
1218
|
-
return { success: false, error: 'texts must be a non-empty array' };
|
|
1219
|
-
}
|
|
1220
|
-
// Validate every text upfront so we don't half-embed before failing.
|
|
1221
|
-
for (let i = 0; i < texts.length; i++) {
|
|
1222
|
-
const t = texts[i];
|
|
1223
|
-
if (typeof t !== 'string') {
|
|
1224
|
-
return { success: false, error: `texts[${i}] is not a string` };
|
|
1225
|
-
}
|
|
1226
|
-
}
|
|
1227
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1228
|
-
const registry = getAnnRouterRegistry();
|
|
1229
|
-
// Stage 1 — embed all queries in parallel.
|
|
1230
|
-
const embedT0 = Date.now();
|
|
1231
|
-
let embeddings;
|
|
1232
|
-
try {
|
|
1233
|
-
embeddings = await Promise.all(texts.map(t => generateRealEmbedding(t, config.dimension)));
|
|
1234
|
-
}
|
|
1235
|
-
catch (err) {
|
|
1236
|
-
return {
|
|
1237
|
-
success: false,
|
|
1238
|
-
name,
|
|
1239
|
-
error: `batch embed failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1240
|
-
};
|
|
1241
|
-
}
|
|
1242
|
-
const embeddingMs = Date.now() - embedT0;
|
|
1243
|
-
// Stage 2 — search each in parallel against the named handle.
|
|
1244
|
-
// Per-query errors are captured into the results entry rather
|
|
1245
|
-
// than aborting the whole batch — callers see which queries
|
|
1246
|
-
// succeeded and which didn't.
|
|
1247
|
-
const searchT0 = Date.now();
|
|
1248
|
-
const results = await Promise.all(embeddings.map(async (emb, i) => {
|
|
1249
|
-
try {
|
|
1250
|
-
const hits = await registry.search(name, new Float32Array(emb), k);
|
|
1251
|
-
return { index: i, text: texts[i], success: true, hits };
|
|
1252
|
-
}
|
|
1253
|
-
catch (err) {
|
|
1254
|
-
return {
|
|
1255
|
-
index: i,
|
|
1256
|
-
text: texts[i],
|
|
1257
|
-
success: false,
|
|
1258
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1259
|
-
};
|
|
1260
|
-
}
|
|
1261
|
-
}));
|
|
1262
|
-
const searchMs = Date.now() - searchT0;
|
|
1263
|
-
const successCount = results.filter(r => r.success).length;
|
|
1264
|
-
return {
|
|
1265
|
-
success: successCount === results.length,
|
|
1266
|
-
name,
|
|
1267
|
-
k,
|
|
1268
|
-
queryCount: texts.length,
|
|
1269
|
-
successCount,
|
|
1270
|
-
failureCount: results.length - successCount,
|
|
1271
|
-
results,
|
|
1272
|
-
latency: {
|
|
1273
|
-
embeddingMs,
|
|
1274
|
-
searchMs,
|
|
1275
|
-
totalMs: embeddingMs + searchMs,
|
|
1276
|
-
avgPerQueryMs: Math.round(((embeddingMs + searchMs) / texts.length) * 100) / 100,
|
|
1277
|
-
},
|
|
1278
|
-
embeddingDimension: embeddings[0]?.length,
|
|
1279
|
-
};
|
|
1280
|
-
},
|
|
1281
|
-
},
|
|
1282
|
-
// ============================================================
|
|
1283
|
-
// ADR-121 Phase 12 — HyDE embedding-level fusion (alpha.53 CLI)
|
|
1284
|
-
// ============================================================
|
|
1285
|
-
//
|
|
1286
|
-
// HyDE (Gao, Ma, Lin, Callan 2022 — "Precise Zero-Shot Dense
|
|
1287
|
-
// Retrieval without Relevance Labels"): question embeddings live in
|
|
1288
|
-
// "question space" while documents embed into "answer space", so
|
|
1289
|
-
// cosine search systematically underweights relevant docs. The
|
|
1290
|
-
// standard fix: have an LLM generate N hypothetical answers, embed
|
|
1291
|
-
// each, AVERAGE the embeddings into a single query vector, search
|
|
1292
|
-
// once with that.
|
|
1293
|
-
//
|
|
1294
|
-
// Distinct from `embeddings_search_text_ensemble` (Phase 11):
|
|
1295
|
-
// - HyDE fuses at the EMBEDDING level (1 search after average).
|
|
1296
|
-
// Cheaper, finds the centroid hit, interpolates between
|
|
1297
|
-
// hypothetical answers.
|
|
1298
|
-
// - RRF fuses at the RANK level (N searches then merge ranks).
|
|
1299
|
-
// More expensive, preserves intent boundaries between variants.
|
|
1300
|
-
//
|
|
1301
|
-
// Both are useful. Production systems often combine them — HyDE
|
|
1302
|
-
// inside one ranked list, RRF across multiple lists.
|
|
1303
|
-
{
|
|
1304
|
-
name: 'embeddings_search_text_hyde',
|
|
1305
|
-
description: "Embed N hypothetical-answer texts, AVERAGE their embeddings into a single query vector, and search a named AnnRouter handle once. Implements the HyDE recipe (Gao et al. 2022) for zero-shot dense retrieval — the LLM-generated hypothetical answers live in the same answer-space as the corpus, so the averaged vector lands near the true relevant docs. Distinct from embeddings_search_text_ensemble (which fuses at the rank level via RRF — more expensive, preserves intent boundaries); HyDE fuses at the embedding level (cheaper, one search, finds the centroid hit). Optional `weights` per text (e.g. weight the user's original question 0.5× and LLM answers 1.0× each — the paper's recommended recipe). Returns hits + averaged-vector metadata (unitNorm assertion + contributing-text count) for transparency.",
|
|
1306
|
-
category: 'embeddings',
|
|
1307
|
-
inputSchema: {
|
|
1308
|
-
type: 'object',
|
|
1309
|
-
properties: {
|
|
1310
|
-
texts: {
|
|
1311
|
-
type: 'array',
|
|
1312
|
-
items: { type: 'string' },
|
|
1313
|
-
description: 'Array of hypothetical-answer texts (LLM-generated) to fuse. The caller is responsible for generating these — typically via a few-shot prompt to an LLM asking it to answer the user question.',
|
|
1314
|
-
},
|
|
1315
|
-
name: { type: 'string', description: 'AnnRouter handle name (set by embeddings_ann_router_build).' },
|
|
1316
|
-
k: { type: 'number', description: 'Number of nearest neighbors to return.' },
|
|
1317
|
-
weights: {
|
|
1318
|
-
type: 'array',
|
|
1319
|
-
items: { type: 'number' },
|
|
1320
|
-
description: 'Per-text weights for the embedding average. Length must equal texts.length. Default uniform.',
|
|
1321
|
-
},
|
|
1322
|
-
},
|
|
1323
|
-
required: ['texts', 'name', 'k'],
|
|
1324
|
-
},
|
|
1325
|
-
handler: async (input) => {
|
|
1326
|
-
const config = loadConfig();
|
|
1327
|
-
if (!config) {
|
|
1328
|
-
return { success: false, error: 'Embeddings not initialized. Run embeddings_init first.' };
|
|
1329
|
-
}
|
|
1330
|
-
const texts = input.texts;
|
|
1331
|
-
const name = input.name;
|
|
1332
|
-
const k = input.k;
|
|
1333
|
-
const weights = Array.isArray(input.weights) ? input.weights : undefined;
|
|
1334
|
-
if (!Array.isArray(texts) || texts.length === 0) {
|
|
1335
|
-
return { success: false, error: 'texts must be a non-empty array' };
|
|
1336
|
-
}
|
|
1337
|
-
if (!Number.isInteger(k) || k < 1) {
|
|
1338
|
-
return { success: false, error: 'k must be a positive integer' };
|
|
1339
|
-
}
|
|
1340
|
-
for (let i = 0; i < texts.length; i++) {
|
|
1341
|
-
if (typeof texts[i] !== 'string') {
|
|
1342
|
-
return { success: false, error: `texts[${i}] is not a string` };
|
|
1343
|
-
}
|
|
1344
|
-
}
|
|
1345
|
-
if (weights && weights.length !== texts.length) {
|
|
1346
|
-
return {
|
|
1347
|
-
success: false,
|
|
1348
|
-
error: `weights.length (${weights.length}) must match texts.length (${texts.length})`,
|
|
1349
|
-
};
|
|
1350
|
-
}
|
|
1351
|
-
if (weights) {
|
|
1352
|
-
for (let i = 0; i < weights.length; i++) {
|
|
1353
|
-
if (weights[i] < 0) {
|
|
1354
|
-
return { success: false, error: `weights[${i}] is negative` };
|
|
1355
|
-
}
|
|
1356
|
-
}
|
|
1357
|
-
}
|
|
1358
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1359
|
-
const registry = getAnnRouterRegistry();
|
|
1360
|
-
// Stage 1 — embed each hypothetical text in parallel.
|
|
1361
|
-
const embedT0 = Date.now();
|
|
1362
|
-
let embeddings;
|
|
1363
|
-
try {
|
|
1364
|
-
embeddings = await Promise.all(texts.map(t => generateRealEmbedding(t, config.dimension)));
|
|
1365
|
-
}
|
|
1366
|
-
catch (err) {
|
|
1367
|
-
return { success: false, name, error: `batch embed failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
1368
|
-
}
|
|
1369
|
-
const embeddingMs = Date.now() - embedT0;
|
|
1370
|
-
// Stage 2 — average the embeddings (HyDE fusion).
|
|
1371
|
-
const { averageEmbeddings, isUnitNorm } = await import('@claude-flow/embeddings/embedding-fusion');
|
|
1372
|
-
const fuseT0 = Date.now();
|
|
1373
|
-
const avgVec = averageEmbeddings(embeddings, {
|
|
1374
|
-
weights,
|
|
1375
|
-
normalizeInputs: true,
|
|
1376
|
-
normalizeOutput: true,
|
|
1377
|
-
});
|
|
1378
|
-
const fuseMs = Date.now() - fuseT0;
|
|
1379
|
-
const fusedUnit = isUnitNorm(avgVec);
|
|
1380
|
-
// Stage 3 — single search with the averaged vector.
|
|
1381
|
-
const searchT0 = Date.now();
|
|
1382
|
-
let hits;
|
|
1383
|
-
try {
|
|
1384
|
-
hits = await registry.search(name, avgVec, k);
|
|
1385
|
-
}
|
|
1386
|
-
catch (err) {
|
|
1387
|
-
return {
|
|
1388
|
-
success: false,
|
|
1389
|
-
name,
|
|
1390
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1391
|
-
latency: { embeddingMs, fuseMs, searchMs: 0 },
|
|
1392
|
-
};
|
|
1393
|
-
}
|
|
1394
|
-
const searchMs = Date.now() - searchT0;
|
|
1395
|
-
return {
|
|
1396
|
-
success: true,
|
|
1397
|
-
name,
|
|
1398
|
-
k,
|
|
1399
|
-
hits,
|
|
1400
|
-
hyde: {
|
|
1401
|
-
textsFused: texts.length,
|
|
1402
|
-
weights: weights ?? null,
|
|
1403
|
-
averagedVectorUnitNorm: fusedUnit,
|
|
1404
|
-
dimension: avgVec.length,
|
|
1405
|
-
},
|
|
1406
|
-
latency: {
|
|
1407
|
-
embeddingMs,
|
|
1408
|
-
fuseMs,
|
|
1409
|
-
searchMs,
|
|
1410
|
-
totalMs: embeddingMs + fuseMs + searchMs,
|
|
1411
|
-
},
|
|
1412
|
-
};
|
|
1413
|
-
},
|
|
1414
|
-
},
|
|
1415
|
-
// ============================================================
|
|
1416
|
-
// ADR-121 Phase 11 — RRF ensemble retrieval (alpha.52 CLI)
|
|
1417
|
-
// ============================================================
|
|
1418
|
-
//
|
|
1419
|
-
// Question-reformulation pipelines produce N parallel result lists.
|
|
1420
|
-
// Reciprocal Rank Fusion (Cormack-Clarke-Büttcher 2009) fuses them
|
|
1421
|
-
// into a single ranking without needing score comparability across
|
|
1422
|
-
// lists. Composes `embeddings_search_text_batch` (N parallel
|
|
1423
|
-
// searches) with `reciprocalRankFusion` (rank-level merge).
|
|
1424
|
-
//
|
|
1425
|
-
// Standard production ensemble-RAG pattern. Pairs naturally with
|
|
1426
|
-
// LLM query rewriting upstream (expand "how does auth work?" into
|
|
1427
|
-
// {"how does authentication work?", "what's the login flow?",
|
|
1428
|
-
// "describe the OAuth2 handshake"} → batch search each →
|
|
1429
|
-
// RRF-fuse) — recovers more relevant docs than a single search.
|
|
1430
|
-
{
|
|
1431
|
-
name: 'embeddings_search_text_ensemble',
|
|
1432
|
-
description: "Embed N text query variants, search a named AnnRouter handle for each in parallel, then RRF-fuse (Reciprocal Rank Fusion, Cormack-Clarke-Büttcher 2009) the N hit-lists into a single merged top-k ranking. Standard production shape for question-reformulation RAG: agents expand one user question into N variants, retrieve top-k for each, get a single fused list back — items appearing high in MORE variants outrank items appearing high in only one. Returns fused hits with per-list ranks for transparency + aggregate latency. λ-equivalent here is `kRrf` (default 60 per SIGIR 2009). Per-list `listWeights` available for biased ensemble (e.g. weight the original-query list 2× over reformulations). For non-fused multi-query results use embeddings_search_text_batch; for single-query diverse retrieval use embeddings_search_text_diverse.",
|
|
1433
|
-
category: 'embeddings',
|
|
1434
|
-
inputSchema: {
|
|
1435
|
-
type: 'object',
|
|
1436
|
-
properties: {
|
|
1437
|
-
texts: {
|
|
1438
|
-
type: 'array',
|
|
1439
|
-
items: { type: 'string' },
|
|
1440
|
-
description: 'Array of query text variants. Order preserved in per-list ranks.',
|
|
1441
|
-
},
|
|
1442
|
-
name: { type: 'string', description: 'AnnRouter handle name (set by embeddings_ann_router_build).' },
|
|
1443
|
-
k: { type: 'number', description: 'Number of fused results to return.' },
|
|
1444
|
-
perQueryK: {
|
|
1445
|
-
type: 'number',
|
|
1446
|
-
description: 'Top-k per query before fusion. Default 2*k. Larger = wider candidate pool, more compute.',
|
|
1447
|
-
},
|
|
1448
|
-
kRrf: {
|
|
1449
|
-
type: 'number',
|
|
1450
|
-
description: 'RRF smoothing constant. Default 60 (SIGIR 2009). Smaller = top-rank dominance.',
|
|
1451
|
-
},
|
|
1452
|
-
listWeights: {
|
|
1453
|
-
type: 'array',
|
|
1454
|
-
items: { type: 'number' },
|
|
1455
|
-
description: 'Per-query weights. Length must equal texts.length. Default = 1 each.',
|
|
1456
|
-
},
|
|
1457
|
-
},
|
|
1458
|
-
required: ['texts', 'name', 'k'],
|
|
1459
|
-
},
|
|
1460
|
-
handler: async (input) => {
|
|
1461
|
-
const config = loadConfig();
|
|
1462
|
-
if (!config) {
|
|
1463
|
-
return { success: false, error: 'Embeddings not initialized. Run embeddings_init first.' };
|
|
1464
|
-
}
|
|
1465
|
-
const texts = input.texts;
|
|
1466
|
-
const name = input.name;
|
|
1467
|
-
const k = input.k;
|
|
1468
|
-
const perQueryK = typeof input.perQueryK === 'number' && input.perQueryK >= 1
|
|
1469
|
-
? input.perQueryK
|
|
1470
|
-
: Math.max(k * 2, k);
|
|
1471
|
-
const kRrf = typeof input.kRrf === 'number' && input.kRrf > 0 ? input.kRrf : 60;
|
|
1472
|
-
const listWeights = Array.isArray(input.listWeights) ? input.listWeights : undefined;
|
|
1473
|
-
if (!Array.isArray(texts) || texts.length === 0) {
|
|
1474
|
-
return { success: false, error: 'texts must be a non-empty array' };
|
|
1475
|
-
}
|
|
1476
|
-
if (!Number.isInteger(k) || k < 1) {
|
|
1477
|
-
return { success: false, error: 'k must be a positive integer' };
|
|
1478
|
-
}
|
|
1479
|
-
for (let i = 0; i < texts.length; i++) {
|
|
1480
|
-
if (typeof texts[i] !== 'string') {
|
|
1481
|
-
return { success: false, error: `texts[${i}] is not a string` };
|
|
1482
|
-
}
|
|
1483
|
-
}
|
|
1484
|
-
if (listWeights && listWeights.length !== texts.length) {
|
|
1485
|
-
return {
|
|
1486
|
-
success: false,
|
|
1487
|
-
error: `listWeights.length (${listWeights.length}) must match texts.length (${texts.length})`,
|
|
1488
|
-
};
|
|
1489
|
-
}
|
|
1490
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1491
|
-
const registry = getAnnRouterRegistry();
|
|
1492
|
-
// Stage 1 — embed all queries in parallel.
|
|
1493
|
-
const embedT0 = Date.now();
|
|
1494
|
-
let embeddings;
|
|
1495
|
-
try {
|
|
1496
|
-
embeddings = await Promise.all(texts.map(t => generateRealEmbedding(t, config.dimension)));
|
|
1497
|
-
}
|
|
1498
|
-
catch (err) {
|
|
1499
|
-
return { success: false, name, error: `batch embed failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
1500
|
-
}
|
|
1501
|
-
const embeddingMs = Date.now() - embedT0;
|
|
1502
|
-
// Stage 2 — search each variant in parallel. Per-query errors
|
|
1503
|
-
// become empty result lists (the variant just doesn't contribute
|
|
1504
|
-
// to the fusion) rather than aborting the ensemble.
|
|
1505
|
-
const searchT0 = Date.now();
|
|
1506
|
-
const perQueryResults = await Promise.all(embeddings.map(async (emb, i) => {
|
|
1507
|
-
try {
|
|
1508
|
-
const hits = await registry.search(name, new Float32Array(emb), perQueryK);
|
|
1509
|
-
return { index: i, text: texts[i], hits, success: true };
|
|
1510
|
-
}
|
|
1511
|
-
catch (err) {
|
|
1512
|
-
return {
|
|
1513
|
-
index: i,
|
|
1514
|
-
text: texts[i],
|
|
1515
|
-
hits: [],
|
|
1516
|
-
success: false,
|
|
1517
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1518
|
-
};
|
|
1519
|
-
}
|
|
1520
|
-
}));
|
|
1521
|
-
const searchMs = Date.now() - searchT0;
|
|
1522
|
-
// Stage 3 — RRF fusion across the per-query lists.
|
|
1523
|
-
const { reciprocalRankFusion } = await import('@claude-flow/embeddings/rrf');
|
|
1524
|
-
const fuseT0 = Date.now();
|
|
1525
|
-
const lists = perQueryResults.map(r => r.hits.map(h => ({ id: h.id, payload: { score: h.score } })));
|
|
1526
|
-
const fused = reciprocalRankFusion(lists, { k, kRrf, listWeights });
|
|
1527
|
-
const fuseMs = Date.now() - fuseT0;
|
|
1528
|
-
const successCount = perQueryResults.filter(r => r.success).length;
|
|
1529
|
-
return {
|
|
1530
|
-
success: successCount === perQueryResults.length,
|
|
1531
|
-
name,
|
|
1532
|
-
k,
|
|
1533
|
-
queryCount: texts.length,
|
|
1534
|
-
perQueryK,
|
|
1535
|
-
kRrf,
|
|
1536
|
-
listWeights: listWeights ?? null,
|
|
1537
|
-
successCount,
|
|
1538
|
-
failureCount: perQueryResults.length - successCount,
|
|
1539
|
-
hits: fused,
|
|
1540
|
-
perQuery: perQueryResults.map(r => ({
|
|
1541
|
-
index: r.index,
|
|
1542
|
-
text: r.text,
|
|
1543
|
-
success: r.success,
|
|
1544
|
-
hitCount: r.hits.length,
|
|
1545
|
-
error: r.success ? undefined : r.error,
|
|
1546
|
-
})),
|
|
1547
|
-
latency: {
|
|
1548
|
-
embeddingMs,
|
|
1549
|
-
searchMs,
|
|
1550
|
-
fuseMs,
|
|
1551
|
-
totalMs: embeddingMs + searchMs + fuseMs,
|
|
1552
|
-
avgPerQueryMs: Math.round(((embeddingMs + searchMs) / texts.length) * 100) / 100,
|
|
1553
|
-
},
|
|
1554
|
-
embeddingDimension: embeddings[0]?.length,
|
|
1555
|
-
};
|
|
1556
|
-
},
|
|
1557
|
-
},
|
|
1558
|
-
// ============================================================
|
|
1559
|
-
// ADR-121 Phase 10 — MMR diversity rerank (alpha.51 CLI)
|
|
1560
|
-
// ============================================================
|
|
1561
|
-
//
|
|
1562
|
-
// Plain top-k often returns near-duplicate chunks. MMR picks a
|
|
1563
|
-
// diversified top-k by trading off relevance against redundancy
|
|
1564
|
-
// (Carbonell & Goldstein 1998). Fetches `fetchMultiplier * k`
|
|
1565
|
-
// candidates from AnnRouter, then reranks to k via mmrRerank.
|
|
1566
|
-
//
|
|
1567
|
-
// Pairs with embeddings_search_text (relevance only). Same caller
|
|
1568
|
-
// contract — text + handle name + k — plus optional `lambda` and
|
|
1569
|
-
// `fetchMultiplier`. Returns diversity stats so callers can
|
|
1570
|
-
// confirm the rerank actually spread the result.
|
|
1571
|
-
{
|
|
1572
|
-
name: 'embeddings_search_text_diverse',
|
|
1573
|
-
description: "Embed a text query, fetch a wider candidate pool from a named AnnRouter handle, and rerank with MMR (Maximal Marginal Relevance) to return a diverse top-k. Use when plain top-k tends to return near-duplicates (e.g. corpora with many paraphrased chunks). λ controls relevance/diversity tradeoff: 1.0 = same as embeddings_search_text, 0.5 = balanced (default), 0.0 = pure diversity. fetchMultiplier controls how many candidates to consider before reranking (default 5×k). Returns hits + diversification stats (averagePairwiseSimilarity — lower is more diverse). For plain (non-diversified) RAG use embeddings_search_text.",
|
|
1574
|
-
category: 'embeddings',
|
|
1575
|
-
inputSchema: {
|
|
1576
|
-
type: 'object',
|
|
1577
|
-
properties: {
|
|
1578
|
-
text: { type: 'string', description: 'Query text. Will be embedded inline.' },
|
|
1579
|
-
name: { type: 'string', description: 'AnnRouter handle name (set by embeddings_ann_router_build).' },
|
|
1580
|
-
k: { type: 'number', description: 'Number of diverse nearest neighbors to return.' },
|
|
1581
|
-
lambda: {
|
|
1582
|
-
type: 'number',
|
|
1583
|
-
description: 'Relevance/diversity tradeoff in [0,1]. 1=pure relevance, 0=pure diversity. Default 0.5.',
|
|
1584
|
-
},
|
|
1585
|
-
fetchMultiplier: {
|
|
1586
|
-
type: 'number',
|
|
1587
|
-
description: 'Candidate pool size = fetchMultiplier * k. Larger = more candidates to diversify from, more compute. Default 5.',
|
|
1588
|
-
},
|
|
1589
|
-
},
|
|
1590
|
-
required: ['text', 'name', 'k'],
|
|
1591
|
-
},
|
|
1592
|
-
handler: async (input) => {
|
|
1593
|
-
const config = loadConfig();
|
|
1594
|
-
if (!config) {
|
|
1595
|
-
return { success: false, error: 'Embeddings not initialized. Run embeddings_init first.' };
|
|
1596
|
-
}
|
|
1597
|
-
const text = input.text;
|
|
1598
|
-
const name = input.name;
|
|
1599
|
-
const k = input.k;
|
|
1600
|
-
const lambda = typeof input.lambda === 'number' ? input.lambda : 0.5;
|
|
1601
|
-
const fetchMultiplier = typeof input.fetchMultiplier === 'number' && input.fetchMultiplier >= 1
|
|
1602
|
-
? input.fetchMultiplier
|
|
1603
|
-
: 5;
|
|
1604
|
-
const tv = validateText(text, 'text');
|
|
1605
|
-
if (!tv.valid)
|
|
1606
|
-
return { success: false, error: tv.error };
|
|
1607
|
-
if (!Number.isInteger(k) || k < 1) {
|
|
1608
|
-
return { success: false, error: 'k must be a positive integer' };
|
|
1609
|
-
}
|
|
1610
|
-
// Stage 1 — embed the query.
|
|
1611
|
-
const embedT0 = Date.now();
|
|
1612
|
-
let embedding;
|
|
1613
|
-
try {
|
|
1614
|
-
embedding = await generateRealEmbedding(text, config.dimension);
|
|
1615
|
-
}
|
|
1616
|
-
catch (err) {
|
|
1617
|
-
return { success: false, error: `embed failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
1618
|
-
}
|
|
1619
|
-
const embeddingMs = Date.now() - embedT0;
|
|
1620
|
-
const queryVec = new Float32Array(embedding);
|
|
1621
|
-
// Stage 2 — fetch a wider pool of candidates from the router.
|
|
1622
|
-
const { getAnnRouterRegistry } = await import('../memory/ann-router-registry.js');
|
|
1623
|
-
const registry = getAnnRouterRegistry();
|
|
1624
|
-
const fetchK = Math.max(k, Math.floor(k * fetchMultiplier));
|
|
1625
|
-
const searchT0 = Date.now();
|
|
1626
|
-
let candidatesRaw;
|
|
1627
|
-
try {
|
|
1628
|
-
candidatesRaw = await registry.search(name, queryVec, fetchK);
|
|
1629
|
-
}
|
|
1630
|
-
catch (err) {
|
|
1631
|
-
return {
|
|
1632
|
-
success: false,
|
|
1633
|
-
name,
|
|
1634
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1635
|
-
latency: { embeddingMs, searchMs: 0, rerankMs: 0 },
|
|
1636
|
-
};
|
|
1637
|
-
}
|
|
1638
|
-
const searchMs = Date.now() - searchT0;
|
|
1639
|
-
// Filter to candidates that have a vector (MMR needs it).
|
|
1640
|
-
// Routers that don't surface vectors (rare) degrade to
|
|
1641
|
-
// plain top-k for safety rather than throwing.
|
|
1642
|
-
const candidatesWithVec = candidatesRaw.filter(c => c.vector != null);
|
|
1643
|
-
if (candidatesWithVec.length === 0) {
|
|
1644
|
-
return {
|
|
1645
|
-
success: true,
|
|
1646
|
-
name,
|
|
1647
|
-
k,
|
|
1648
|
-
hits: candidatesRaw.slice(0, k),
|
|
1649
|
-
mmr: { applied: false, reason: 'no candidate vectors available — degraded to plain top-k' },
|
|
1650
|
-
latency: { embeddingMs, searchMs, rerankMs: 0, totalMs: embeddingMs + searchMs },
|
|
1651
|
-
};
|
|
1652
|
-
}
|
|
1653
|
-
// Stage 3 — MMR rerank.
|
|
1654
|
-
// Sub-path import bypasses the index barrel — TS resolves the
|
|
1655
|
-
// mmr.d.ts directly via the './*' export condition, which
|
|
1656
|
-
// sidesteps a stale-cache issue with the aggregate index.d.ts.
|
|
1657
|
-
const { mmrRerank, averagePairwiseSimilarity } = await import('@claude-flow/embeddings/mmr');
|
|
1658
|
-
const rerankT0 = Date.now();
|
|
1659
|
-
const picked = mmrRerank(candidatesWithVec.map(c => ({
|
|
1660
|
-
id: c.id,
|
|
1661
|
-
vector: c.vector,
|
|
1662
|
-
score: c.score,
|
|
1663
|
-
payload: c.payload,
|
|
1664
|
-
})), queryVec, { k, lambda });
|
|
1665
|
-
const rerankMs = Date.now() - rerankT0;
|
|
1666
|
-
const avgPairSim = averagePairwiseSimilarity(picked);
|
|
1667
|
-
// Strip vectors from the response to keep stdout sane.
|
|
1668
|
-
// Callers wanting the vectors can re-fetch via search.
|
|
1669
|
-
const hits = picked.map(p => ({
|
|
1670
|
-
id: p.id,
|
|
1671
|
-
score: p.relevance,
|
|
1672
|
-
mmrScore: p.mmrScore,
|
|
1673
|
-
relevance: p.relevance,
|
|
1674
|
-
redundancy: p.redundancy,
|
|
1675
|
-
pickOrder: p.pickOrder,
|
|
1676
|
-
payload: p.payload,
|
|
1677
|
-
}));
|
|
1678
|
-
return {
|
|
1679
|
-
success: true,
|
|
1680
|
-
name,
|
|
1681
|
-
k,
|
|
1682
|
-
hits,
|
|
1683
|
-
mmr: {
|
|
1684
|
-
applied: true,
|
|
1685
|
-
lambda,
|
|
1686
|
-
fetchMultiplier,
|
|
1687
|
-
candidatesConsidered: candidatesWithVec.length,
|
|
1688
|
-
averagePairwiseSimilarity: avgPairSim,
|
|
1689
|
-
},
|
|
1690
|
-
latency: {
|
|
1691
|
-
embeddingMs,
|
|
1692
|
-
searchMs,
|
|
1693
|
-
rerankMs,
|
|
1694
|
-
totalMs: embeddingMs + searchMs + rerankMs,
|
|
1695
|
-
},
|
|
1696
|
-
embeddingDimension: embedding.length,
|
|
1697
|
-
};
|
|
1698
|
-
},
|
|
1699
|
-
},
|
|
1700
903
|
];
|
|
1701
904
|
//# sourceMappingURL=embeddings-tools.js.map
|