@absolutejs/absolute 0.19.0-beta.489 → 0.19.0-beta.491
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +76 -20
- package/dist/ai/client/index.js.map +3 -3
- package/dist/ai/index.js +704 -32
- package/dist/ai/index.js.map +11 -9
- package/dist/react/ai/index.js +76 -20
- package/dist/react/ai/index.js.map +3 -3
- package/dist/src/ai/index.d.ts +2 -2
- package/dist/src/ai/rag/collection.d.ts +4 -2
- package/dist/src/ai/rag/index.d.ts +4 -2
- package/dist/src/ai/rag/lexical.d.ts +36 -0
- package/dist/src/ai/rag/quality.d.ts +8 -1
- package/dist/src/ai/rag/queryTransforms.d.ts +17 -0
- package/dist/src/ai/rag/types.d.ts +1 -1
- package/dist/src/vue/ai/useRAG.d.ts +28 -0
- package/dist/src/vue/ai/useRAGSearch.d.ts +28 -0
- package/dist/svelte/ai/index.js +76 -20
- package/dist/svelte/ai/index.js.map +3 -3
- package/dist/types/ai.d.ts +64 -0
- package/dist/vue/ai/index.js +76 -20
- package/dist/vue/ai/index.js.map +3 -3
- package/package.json +7 -7
package/dist/ai/index.js
CHANGED
|
@@ -2113,18 +2113,484 @@ var aiChat = (config) => {
|
|
|
2113
2113
|
init_constants();
|
|
2114
2114
|
import { Elysia as Elysia2 } from "elysia";
|
|
2115
2115
|
|
|
2116
|
+
// src/ai/rag/lexical.ts
|
|
2117
|
+
var DEFAULT_FUSION_CONSTANT = 60;
|
|
2118
|
+
var STOP_WORDS = new Set([
|
|
2119
|
+
"a",
|
|
2120
|
+
"an",
|
|
2121
|
+
"and",
|
|
2122
|
+
"are",
|
|
2123
|
+
"as",
|
|
2124
|
+
"at",
|
|
2125
|
+
"be",
|
|
2126
|
+
"by",
|
|
2127
|
+
"does",
|
|
2128
|
+
"every",
|
|
2129
|
+
"explain",
|
|
2130
|
+
"explains",
|
|
2131
|
+
"for",
|
|
2132
|
+
"how",
|
|
2133
|
+
"in",
|
|
2134
|
+
"is",
|
|
2135
|
+
"it",
|
|
2136
|
+
"of",
|
|
2137
|
+
"on",
|
|
2138
|
+
"or",
|
|
2139
|
+
"say",
|
|
2140
|
+
"says",
|
|
2141
|
+
"should",
|
|
2142
|
+
"stay",
|
|
2143
|
+
"the",
|
|
2144
|
+
"this",
|
|
2145
|
+
"to",
|
|
2146
|
+
"track",
|
|
2147
|
+
"what",
|
|
2148
|
+
"which",
|
|
2149
|
+
"why"
|
|
2150
|
+
]);
|
|
2151
|
+
var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2152
|
+
var collectMetadataStrings = (value) => {
|
|
2153
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
2154
|
+
return [String(value)];
|
|
2155
|
+
}
|
|
2156
|
+
if (Array.isArray(value)) {
|
|
2157
|
+
return value.flatMap((entry) => collectMetadataStrings(entry));
|
|
2158
|
+
}
|
|
2159
|
+
if (value && typeof value === "object") {
|
|
2160
|
+
return Object.values(value).flatMap((entry) => collectMetadataStrings(entry));
|
|
2161
|
+
}
|
|
2162
|
+
return [];
|
|
2163
|
+
};
|
|
2164
|
+
var buildRAGLexicalHaystack = (result) => [
|
|
2165
|
+
result.title,
|
|
2166
|
+
result.source,
|
|
2167
|
+
typeof result.source === "string" ? result.source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle") : undefined,
|
|
2168
|
+
result.chunkText,
|
|
2169
|
+
...collectMetadataStrings(result.metadata)
|
|
2170
|
+
].filter((value) => Boolean(value)).join(" ");
|
|
2171
|
+
var scoreRAGLexicalMatch = (query, result) => {
|
|
2172
|
+
const queryTokens = tokenize(query);
|
|
2173
|
+
if (queryTokens.length === 0) {
|
|
2174
|
+
return 0;
|
|
2175
|
+
}
|
|
2176
|
+
const haystack = buildRAGLexicalHaystack(result).toLowerCase();
|
|
2177
|
+
const haystackTokens = tokenize(haystack);
|
|
2178
|
+
const haystackSet = new Set(haystackTokens);
|
|
2179
|
+
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2180
|
+
if (overlap === 0) {
|
|
2181
|
+
return 0;
|
|
2182
|
+
}
|
|
2183
|
+
const exactPhraseBoost = haystack.includes(query.toLowerCase()) ? 1 : 0;
|
|
2184
|
+
const sourceBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2185
|
+
const coverageBoost = overlap / queryTokens.length;
|
|
2186
|
+
const fileKindBoost = resolveFileKindBoost(queryTokens, result.metadata);
|
|
2187
|
+
const transcriptBoost = resolveTranscriptBoost(queryTokens, result.metadata);
|
|
2188
|
+
const archiveBoost = resolveArchiveBoost(queryTokens, result);
|
|
2189
|
+
return coverageBoost + exactPhraseBoost + sourceBoost + fileKindBoost + transcriptBoost + archiveBoost;
|
|
2190
|
+
};
|
|
2191
|
+
var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
|
|
2192
|
+
var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
2193
|
+
const fileKind = typeof metadata?.fileKind === "string" ? metadata.fileKind : "";
|
|
2194
|
+
if (fileKind === "office" && hasAnyToken(queryTokens, [
|
|
2195
|
+
"sheet",
|
|
2196
|
+
"worksheet",
|
|
2197
|
+
"workbook",
|
|
2198
|
+
"spreadsheet"
|
|
2199
|
+
])) {
|
|
2200
|
+
return 0.75;
|
|
2201
|
+
}
|
|
2202
|
+
if (fileKind === "archive" && hasAnyToken(queryTokens, [
|
|
2203
|
+
"archive",
|
|
2204
|
+
"bundle",
|
|
2205
|
+
"entry",
|
|
2206
|
+
"runbook",
|
|
2207
|
+
"recovery"
|
|
2208
|
+
])) {
|
|
2209
|
+
return 0.85;
|
|
2210
|
+
}
|
|
2211
|
+
if (fileKind === "media" && hasAnyToken(queryTokens, [
|
|
2212
|
+
"frontend",
|
|
2213
|
+
"framework",
|
|
2214
|
+
"transcript",
|
|
2215
|
+
"audio",
|
|
2216
|
+
"video"
|
|
2217
|
+
])) {
|
|
2218
|
+
return 0.75;
|
|
2219
|
+
}
|
|
2220
|
+
return 0;
|
|
2221
|
+
};
|
|
2222
|
+
var resolveTranscriptBoost = (queryTokens, metadata) => {
|
|
2223
|
+
const segments = Array.isArray(metadata?.mediaSegments) ? metadata.mediaSegments : [];
|
|
2224
|
+
if (segments.length === 0) {
|
|
2225
|
+
return 0;
|
|
2226
|
+
}
|
|
2227
|
+
const segmentText = segments.map((segment) => segment && typeof segment === "object" && ("text" in segment) ? String(segment.text ?? "") : "").join(" ").toLowerCase();
|
|
2228
|
+
if (segmentText.length === 0) {
|
|
2229
|
+
return 0;
|
|
2230
|
+
}
|
|
2231
|
+
const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
|
|
2232
|
+
return overlap / Math.max(1, queryTokens.length);
|
|
2233
|
+
};
|
|
2234
|
+
var resolveArchiveBoost = (queryTokens, result) => {
|
|
2235
|
+
const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
|
|
2236
|
+
if (!archivePath) {
|
|
2237
|
+
return 0;
|
|
2238
|
+
}
|
|
2239
|
+
if (queryTokens.includes("recovery") && archivePath.includes("recovery")) {
|
|
2240
|
+
return 1;
|
|
2241
|
+
}
|
|
2242
|
+
if (queryTokens.includes("runbook") && archivePath.includes("runbook")) {
|
|
2243
|
+
return 0.8;
|
|
2244
|
+
}
|
|
2245
|
+
return 0;
|
|
2246
|
+
};
|
|
2247
|
+
var rankResults = (results) => results.map((result, index) => ({
|
|
2248
|
+
rank: index + 1,
|
|
2249
|
+
result
|
|
2250
|
+
}));
|
|
2251
|
+
var fuseRAGQueryResults = ({
|
|
2252
|
+
fusion = "rrf",
|
|
2253
|
+
fusionConstant = DEFAULT_FUSION_CONSTANT,
|
|
2254
|
+
lexical = [],
|
|
2255
|
+
lexicalWeight = 2,
|
|
2256
|
+
vector = [],
|
|
2257
|
+
vectorWeight = 1
|
|
2258
|
+
}) => {
|
|
2259
|
+
const merged = new Map;
|
|
2260
|
+
const vectorContributionWeight = Math.max(0, vectorWeight);
|
|
2261
|
+
const lexicalContributionWeight = Math.max(0, lexicalWeight);
|
|
2262
|
+
const applyRanked = (ranked, source) => {
|
|
2263
|
+
for (const entry of ranked) {
|
|
2264
|
+
const existing = merged.get(entry.result.chunkId);
|
|
2265
|
+
const weight = source === "lexical" ? lexicalContributionWeight : vectorContributionWeight;
|
|
2266
|
+
const contribution = fusion === "max" ? entry.result.score * weight : weight / (fusionConstant + entry.rank);
|
|
2267
|
+
const baseResult = existing?.result ?? entry.result;
|
|
2268
|
+
const existingSignals = existing?.result.metadata && typeof existing.result.metadata.retrievalSignals === "object" && existing.result.metadata.retrievalSignals !== null ? existing.result.metadata.retrievalSignals : {};
|
|
2269
|
+
const nextScore = fusion === "max" ? Math.max(existing?.score ?? 0, contribution) : (existing?.score ?? 0) + contribution;
|
|
2270
|
+
merged.set(entry.result.chunkId, {
|
|
2271
|
+
result: {
|
|
2272
|
+
...baseResult,
|
|
2273
|
+
score: nextScore,
|
|
2274
|
+
metadata: {
|
|
2275
|
+
...baseResult.metadata,
|
|
2276
|
+
retrievalSignals: {
|
|
2277
|
+
lexical: source === "lexical" || existingSignals.lexical === true,
|
|
2278
|
+
vector: source === "vector" || existingSignals.vector === true
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
},
|
|
2282
|
+
score: nextScore
|
|
2283
|
+
});
|
|
2284
|
+
}
|
|
2285
|
+
};
|
|
2286
|
+
applyRanked(rankResults(vector), "vector");
|
|
2287
|
+
applyRanked(rankResults(lexical), "lexical");
|
|
2288
|
+
return [...merged.values()].map(({ result, score }) => ({ ...result, score })).sort((left, right) => {
|
|
2289
|
+
if (right.score !== left.score) {
|
|
2290
|
+
return right.score - left.score;
|
|
2291
|
+
}
|
|
2292
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
2293
|
+
});
|
|
2294
|
+
};
|
|
2295
|
+
var resolveRAGHybridSearchOptions = (retrieval) => {
|
|
2296
|
+
if (!retrieval) {
|
|
2297
|
+
return {
|
|
2298
|
+
fusion: "rrf",
|
|
2299
|
+
fusionConstant: DEFAULT_FUSION_CONSTANT,
|
|
2300
|
+
lexicalTopK: undefined,
|
|
2301
|
+
lexicalWeight: 2,
|
|
2302
|
+
mode: "vector",
|
|
2303
|
+
vectorWeight: 1
|
|
2304
|
+
};
|
|
2305
|
+
}
|
|
2306
|
+
if (typeof retrieval === "string") {
|
|
2307
|
+
return {
|
|
2308
|
+
fusion: "rrf",
|
|
2309
|
+
fusionConstant: DEFAULT_FUSION_CONSTANT,
|
|
2310
|
+
lexicalTopK: undefined,
|
|
2311
|
+
lexicalWeight: 2,
|
|
2312
|
+
mode: retrieval,
|
|
2313
|
+
vectorWeight: 1
|
|
2314
|
+
};
|
|
2315
|
+
}
|
|
2316
|
+
return {
|
|
2317
|
+
fusion: retrieval.fusion ?? "rrf",
|
|
2318
|
+
fusionConstant: Math.max(1, Math.floor(retrieval.fusionConstant ?? DEFAULT_FUSION_CONSTANT)),
|
|
2319
|
+
lexicalTopK: retrieval.lexicalTopK,
|
|
2320
|
+
lexicalWeight: Math.max(0, retrieval.lexicalWeight ?? 2),
|
|
2321
|
+
mode: retrieval.mode ?? "vector",
|
|
2322
|
+
vectorWeight: Math.max(0, retrieval.vectorWeight ?? 1)
|
|
2323
|
+
};
|
|
2324
|
+
};
|
|
2325
|
+
|
|
2326
|
+
// src/ai/rag/queryTransforms.ts
|
|
2327
|
+
var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS2.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
|
|
2328
|
+
var STOP_WORDS2 = new Set([
|
|
2329
|
+
"a",
|
|
2330
|
+
"an",
|
|
2331
|
+
"and",
|
|
2332
|
+
"are",
|
|
2333
|
+
"as",
|
|
2334
|
+
"at",
|
|
2335
|
+
"be",
|
|
2336
|
+
"by",
|
|
2337
|
+
"does",
|
|
2338
|
+
"every",
|
|
2339
|
+
"explain",
|
|
2340
|
+
"explains",
|
|
2341
|
+
"for",
|
|
2342
|
+
"how",
|
|
2343
|
+
"in",
|
|
2344
|
+
"is",
|
|
2345
|
+
"it",
|
|
2346
|
+
"of",
|
|
2347
|
+
"on",
|
|
2348
|
+
"or",
|
|
2349
|
+
"say",
|
|
2350
|
+
"says",
|
|
2351
|
+
"should",
|
|
2352
|
+
"stay",
|
|
2353
|
+
"the",
|
|
2354
|
+
"this",
|
|
2355
|
+
"to",
|
|
2356
|
+
"track",
|
|
2357
|
+
"what",
|
|
2358
|
+
"which",
|
|
2359
|
+
"why"
|
|
2360
|
+
]);
|
|
2361
|
+
var DOMAIN_EXPANSIONS = {
|
|
2362
|
+
archive: [
|
|
2363
|
+
"archive",
|
|
2364
|
+
"zip",
|
|
2365
|
+
"bundle",
|
|
2366
|
+
"entry",
|
|
2367
|
+
"runbook",
|
|
2368
|
+
"recovery",
|
|
2369
|
+
"procedure",
|
|
2370
|
+
"runbooks/recovery"
|
|
2371
|
+
],
|
|
2372
|
+
audio: [
|
|
2373
|
+
"audio",
|
|
2374
|
+
"media",
|
|
2375
|
+
"recording",
|
|
2376
|
+
"speaker",
|
|
2377
|
+
"transcript",
|
|
2378
|
+
"standup",
|
|
2379
|
+
"mp3"
|
|
2380
|
+
],
|
|
2381
|
+
deck: ["slide", "deck", "presentation", "pptx", "roadmap"],
|
|
2382
|
+
email: ["email", "mail", "thread", "message", "attachment"],
|
|
2383
|
+
frontend: [
|
|
2384
|
+
"frontend",
|
|
2385
|
+
"framework",
|
|
2386
|
+
"react",
|
|
2387
|
+
"vue",
|
|
2388
|
+
"svelte",
|
|
2389
|
+
"angular",
|
|
2390
|
+
"html",
|
|
2391
|
+
"htmx"
|
|
2392
|
+
],
|
|
2393
|
+
image: ["image", "ocr", "scan", "screenshot", "receipt"],
|
|
2394
|
+
pdf: ["pdf", "document", "page", "ocr", "scan"],
|
|
2395
|
+
spreadsheet: [
|
|
2396
|
+
"sheet",
|
|
2397
|
+
"worksheet",
|
|
2398
|
+
"workbook",
|
|
2399
|
+
"spreadsheet",
|
|
2400
|
+
"xlsx",
|
|
2401
|
+
"regional",
|
|
2402
|
+
"growth"
|
|
2403
|
+
],
|
|
2404
|
+
video: ["video", "media", "recording", "transcript", "timestamp"]
|
|
2405
|
+
};
|
|
2406
|
+
var TERM_EXPANSIONS = {
|
|
2407
|
+
frontend: [
|
|
2408
|
+
"frontend",
|
|
2409
|
+
"framework",
|
|
2410
|
+
"react",
|
|
2411
|
+
"vue",
|
|
2412
|
+
"svelte",
|
|
2413
|
+
"angular",
|
|
2414
|
+
"html",
|
|
2415
|
+
"htmx"
|
|
2416
|
+
],
|
|
2417
|
+
framework: [
|
|
2418
|
+
"frontend",
|
|
2419
|
+
"framework",
|
|
2420
|
+
"react",
|
|
2421
|
+
"vue",
|
|
2422
|
+
"svelte",
|
|
2423
|
+
"angular",
|
|
2424
|
+
"html",
|
|
2425
|
+
"htmx"
|
|
2426
|
+
],
|
|
2427
|
+
procedure: ["recovery", "runbook"],
|
|
2428
|
+
procedur: ["recovery", "runbook"],
|
|
2429
|
+
receipt: ["invoice", "ocr", "pdf"],
|
|
2430
|
+
sheet: ["worksheet", "workbook", "xlsx"],
|
|
2431
|
+
transcript: ["audio", "video", "media"],
|
|
2432
|
+
workbook: ["sheet", "spreadsheet", "xlsx"]
|
|
2433
|
+
};
|
|
2434
|
+
var detectDomains = (tokens) => {
|
|
2435
|
+
const tokenSet = new Set(tokens);
|
|
2436
|
+
const domains = new Set;
|
|
2437
|
+
for (const token of tokenSet) {
|
|
2438
|
+
if (token === "sheet" || token === "worksheet" || token === "workbook" || token === "spreadsheet" || token === "xlsx") {
|
|
2439
|
+
domains.add("spreadsheet");
|
|
2440
|
+
}
|
|
2441
|
+
if (token === "archive" || token === "zip" || token === "bundle") {
|
|
2442
|
+
domains.add("archive");
|
|
2443
|
+
}
|
|
2444
|
+
if (token === "audio" || token === "speaker") {
|
|
2445
|
+
domains.add("audio");
|
|
2446
|
+
}
|
|
2447
|
+
if (token === "video" || token === "timestamp") {
|
|
2448
|
+
domains.add("video");
|
|
2449
|
+
}
|
|
2450
|
+
if (token === "pdf" || token === "page" || token === "ocr") {
|
|
2451
|
+
domains.add("pdf");
|
|
2452
|
+
}
|
|
2453
|
+
if (token === "slide" || token === "deck" || token === "presentation") {
|
|
2454
|
+
domains.add("deck");
|
|
2455
|
+
}
|
|
2456
|
+
if (token === "frontend" || token === "framework" || token === "react" || token === "vue" || token === "svelte" || token === "angular" || token === "html" || token === "htmx") {
|
|
2457
|
+
domains.add("frontend");
|
|
2458
|
+
}
|
|
2459
|
+
if (token === "email" || token === "mail" || token === "thread") {
|
|
2460
|
+
domains.add("email");
|
|
2461
|
+
}
|
|
2462
|
+
if (token === "image" || token === "scan" || token === "screenshot") {
|
|
2463
|
+
domains.add("image");
|
|
2464
|
+
}
|
|
2465
|
+
}
|
|
2466
|
+
return [...domains];
|
|
2467
|
+
};
|
|
2468
|
+
var uniqueQueryStrings = (values) => Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
2469
|
+
var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform({
|
|
2470
|
+
defaultModel: options.defaultModel ?? "absolute-heuristic-query-transform",
|
|
2471
|
+
providerName: options.providerName ?? "absolute_heuristic",
|
|
2472
|
+
transform: ({ query }) => {
|
|
2473
|
+
const tokens = tokenize2(query);
|
|
2474
|
+
const domains = detectDomains(tokens);
|
|
2475
|
+
if (domains.length === 0) {
|
|
2476
|
+
return { query };
|
|
2477
|
+
}
|
|
2478
|
+
const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
|
|
2479
|
+
const tokenExpansions = tokens.flatMap((token) => TERM_EXPANSIONS[token] ?? []);
|
|
2480
|
+
const rewrittenQuery = uniqueQueryStrings([
|
|
2481
|
+
...tokens,
|
|
2482
|
+
...expandedTerms,
|
|
2483
|
+
...tokenExpansions
|
|
2484
|
+
]).join(" ");
|
|
2485
|
+
const variants = domains.map((domain) => uniqueQueryStrings([
|
|
2486
|
+
...tokens,
|
|
2487
|
+
...DOMAIN_EXPANSIONS[domain] ?? [],
|
|
2488
|
+
...tokenExpansions
|
|
2489
|
+
]).join(" "));
|
|
2490
|
+
return {
|
|
2491
|
+
query: rewrittenQuery,
|
|
2492
|
+
variants
|
|
2493
|
+
};
|
|
2494
|
+
}
|
|
2495
|
+
});
|
|
2496
|
+
var createRAGQueryTransform = (options) => ({
|
|
2497
|
+
defaultModel: options.defaultModel,
|
|
2498
|
+
providerName: options.providerName,
|
|
2499
|
+
transform: options.transform
|
|
2500
|
+
});
|
|
2501
|
+
var resolveRAGQueryTransform = (queryTransform) => {
|
|
2502
|
+
if (!queryTransform) {
|
|
2503
|
+
return null;
|
|
2504
|
+
}
|
|
2505
|
+
if (typeof queryTransform === "function") {
|
|
2506
|
+
return {
|
|
2507
|
+
defaultModel: undefined,
|
|
2508
|
+
providerName: undefined,
|
|
2509
|
+
transform: queryTransform
|
|
2510
|
+
};
|
|
2511
|
+
}
|
|
2512
|
+
return queryTransform;
|
|
2513
|
+
};
|
|
2514
|
+
var applyRAGQueryTransform = async ({
|
|
2515
|
+
input,
|
|
2516
|
+
queryTransform
|
|
2517
|
+
}) => {
|
|
2518
|
+
const resolved = resolveRAGQueryTransform(queryTransform);
|
|
2519
|
+
if (!resolved) {
|
|
2520
|
+
return {
|
|
2521
|
+
query: input.query,
|
|
2522
|
+
variants: []
|
|
2523
|
+
};
|
|
2524
|
+
}
|
|
2525
|
+
return Promise.resolve(resolved.transform({
|
|
2526
|
+
...input,
|
|
2527
|
+
model: input.model ?? resolved.defaultModel
|
|
2528
|
+
}));
|
|
2529
|
+
};
|
|
2530
|
+
|
|
2116
2531
|
// src/ai/rag/reranking.ts
|
|
2117
|
-
var
|
|
2532
|
+
var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS3.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
|
|
2533
|
+
var STOP_WORDS3 = new Set([
|
|
2534
|
+
"a",
|
|
2535
|
+
"an",
|
|
2536
|
+
"and",
|
|
2537
|
+
"are",
|
|
2538
|
+
"as",
|
|
2539
|
+
"at",
|
|
2540
|
+
"be",
|
|
2541
|
+
"by",
|
|
2542
|
+
"does",
|
|
2543
|
+
"every",
|
|
2544
|
+
"explain",
|
|
2545
|
+
"explains",
|
|
2546
|
+
"for",
|
|
2547
|
+
"how",
|
|
2548
|
+
"in",
|
|
2549
|
+
"is",
|
|
2550
|
+
"it",
|
|
2551
|
+
"of",
|
|
2552
|
+
"on",
|
|
2553
|
+
"or",
|
|
2554
|
+
"say",
|
|
2555
|
+
"says",
|
|
2556
|
+
"should",
|
|
2557
|
+
"stay",
|
|
2558
|
+
"the",
|
|
2559
|
+
"this",
|
|
2560
|
+
"to",
|
|
2561
|
+
"track",
|
|
2562
|
+
"what",
|
|
2563
|
+
"which",
|
|
2564
|
+
"why"
|
|
2565
|
+
]);
|
|
2566
|
+
var collectMetadataStrings2 = (value) => {
|
|
2567
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
2568
|
+
return [String(value)];
|
|
2569
|
+
}
|
|
2570
|
+
if (Array.isArray(value)) {
|
|
2571
|
+
return value.flatMap((entry) => collectMetadataStrings2(entry));
|
|
2572
|
+
}
|
|
2573
|
+
if (value && typeof value === "object") {
|
|
2574
|
+
return Object.values(value).flatMap((entry) => collectMetadataStrings2(entry));
|
|
2575
|
+
}
|
|
2576
|
+
return [];
|
|
2577
|
+
};
|
|
2118
2578
|
var scoreHeuristicMatch = (queryTokens, result) => {
|
|
2119
2579
|
if (queryTokens.length === 0) {
|
|
2120
2580
|
return result.score;
|
|
2121
2581
|
}
|
|
2122
|
-
const
|
|
2582
|
+
const metadataValues = collectMetadataStrings2(result.metadata);
|
|
2583
|
+
const haystack = tokenize3([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" "));
|
|
2123
2584
|
const haystackSet = new Set(haystack);
|
|
2124
2585
|
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2125
2586
|
const overlapBoost = overlap / queryTokens.length;
|
|
2126
|
-
|
|
2587
|
+
const normalizedQuery = queryTokens.join(" ");
|
|
2588
|
+
const exactPhraseBoost = normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(normalizedQuery) ? 1 : 0;
|
|
2589
|
+
const sourcePathBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2590
|
+
const metadataBoost = metadataValues.length > 0 ? queryTokens.filter((token) => metadataValues.some((value) => value.toLowerCase().includes(token))).length / queryTokens.length : 0;
|
|
2591
|
+
return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost;
|
|
2127
2592
|
};
|
|
2593
|
+
var normalizeText = (value) => tokenize3(value).join(" ");
|
|
2128
2594
|
var applyRAGReranking = async ({
|
|
2129
2595
|
input,
|
|
2130
2596
|
reranker
|
|
@@ -2143,7 +2609,7 @@ var createHeuristicRAGReranker = (options = {}) => createRAGReranker({
|
|
|
2143
2609
|
defaultModel: options.defaultModel ?? "absolute-heuristic-reranker",
|
|
2144
2610
|
providerName: options.providerName ?? "absolute_heuristic",
|
|
2145
2611
|
rerank: ({ query, results }) => {
|
|
2146
|
-
const queryTokens =
|
|
2612
|
+
const queryTokens = tokenize3(query);
|
|
2147
2613
|
return [...results].map((result, index) => ({
|
|
2148
2614
|
index,
|
|
2149
2615
|
result,
|
|
@@ -3378,8 +3844,26 @@ var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await lo
|
|
|
3378
3844
|
|
|
3379
3845
|
// src/ai/rag/collection.ts
|
|
3380
3846
|
var DEFAULT_TOP_K = 6;
|
|
3847
|
+
var mergeQueryResults = (results) => {
|
|
3848
|
+
const merged = new Map;
|
|
3849
|
+
for (const result of results) {
|
|
3850
|
+
const existing = merged.get(result.chunkId);
|
|
3851
|
+
if (!existing || result.score > existing.score) {
|
|
3852
|
+
merged.set(result.chunkId, result);
|
|
3853
|
+
}
|
|
3854
|
+
}
|
|
3855
|
+
return [...merged.values()].sort((left, right) => {
|
|
3856
|
+
if (right.score !== left.score) {
|
|
3857
|
+
return right.score - left.score;
|
|
3858
|
+
}
|
|
3859
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
3860
|
+
});
|
|
3861
|
+
};
|
|
3862
|
+
var shouldRunVectorRetrieval = (mode) => mode === "vector" || mode === "hybrid";
|
|
3863
|
+
var shouldRunLexicalRetrieval = (mode, store) => mode === "lexical" || mode === "hybrid" && Boolean(store.queryLexical);
|
|
3381
3864
|
var createRAGCollection = (options) => {
|
|
3382
3865
|
const defaultTopK = options.defaultTopK ?? DEFAULT_TOP_K;
|
|
3866
|
+
const defaultCandidateMultiplier = Math.max(1, Math.floor(options.defaultCandidateMultiplier ?? 4));
|
|
3383
3867
|
const { getCapabilities } = options.store;
|
|
3384
3868
|
const { getStatus } = options.store;
|
|
3385
3869
|
const embeddingProvider = resolveRAGEmbeddingProvider(options.embedding, options.store.embed, options.defaultModel);
|
|
@@ -3391,21 +3875,64 @@ var createRAGCollection = (options) => {
|
|
|
3391
3875
|
};
|
|
3392
3876
|
const search = async (input) => {
|
|
3393
3877
|
const model = input.model ?? options.defaultModel;
|
|
3394
|
-
const
|
|
3878
|
+
const topK = input.topK ?? defaultTopK;
|
|
3879
|
+
const hasReranker = Boolean(input.rerank ?? options.rerank);
|
|
3880
|
+
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (hasReranker ? topK * defaultCandidateMultiplier : topK)));
|
|
3881
|
+
const retrieval = resolveRAGHybridSearchOptions(input.retrieval);
|
|
3882
|
+
const transformed = await applyRAGQueryTransform({
|
|
3883
|
+
input: {
|
|
3884
|
+
candidateTopK,
|
|
3885
|
+
filter: input.filter,
|
|
3886
|
+
model,
|
|
3887
|
+
query: input.query,
|
|
3888
|
+
scoreThreshold: input.scoreThreshold,
|
|
3889
|
+
topK
|
|
3890
|
+
},
|
|
3891
|
+
queryTransform: input.queryTransform ?? options.queryTransform
|
|
3892
|
+
});
|
|
3893
|
+
const searchQueries = Array.from(new Set([transformed.query, ...transformed.variants ?? []])).filter(Boolean);
|
|
3894
|
+
const runVector = shouldRunVectorRetrieval(retrieval.mode);
|
|
3895
|
+
const runLexical = shouldRunLexicalRetrieval(retrieval.mode, options.store);
|
|
3896
|
+
const lexicalTopK = Math.max(topK, Math.floor(retrieval.lexicalTopK ?? candidateTopK));
|
|
3897
|
+
const queryVector = runVector ? await embed({
|
|
3395
3898
|
model,
|
|
3396
3899
|
signal: input.signal,
|
|
3397
3900
|
text: input.query
|
|
3398
|
-
}, "query");
|
|
3399
|
-
const
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3901
|
+
}, "query") : [];
|
|
3902
|
+
const resultGroups = await Promise.all(searchQueries.map(async (query) => {
|
|
3903
|
+
const [vectorResults2, lexicalResults2] = await Promise.all([
|
|
3904
|
+
runVector ? embed({
|
|
3905
|
+
model,
|
|
3906
|
+
signal: input.signal,
|
|
3907
|
+
text: query
|
|
3908
|
+
}, "query").then((nextQueryVector) => options.store.query({
|
|
3909
|
+
filter: input.filter,
|
|
3910
|
+
queryVector: nextQueryVector,
|
|
3911
|
+
topK: candidateTopK
|
|
3912
|
+
})) : Promise.resolve([]),
|
|
3913
|
+
runLexical ? options.store.queryLexical?.({
|
|
3914
|
+
filter: input.filter,
|
|
3915
|
+
query,
|
|
3916
|
+
topK: lexicalTopK
|
|
3917
|
+
}) ?? Promise.resolve([]) : Promise.resolve([])
|
|
3918
|
+
]);
|
|
3919
|
+
return { lexicalResults: lexicalResults2, vectorResults: vectorResults2 };
|
|
3920
|
+
}));
|
|
3921
|
+
const vectorResults = mergeQueryResults(resultGroups.flatMap((group) => group.vectorResults));
|
|
3922
|
+
const lexicalResults = mergeQueryResults(resultGroups.flatMap((group) => group.lexicalResults));
|
|
3923
|
+
const results = retrieval.mode === "lexical" ? lexicalResults : retrieval.mode === "vector" ? vectorResults : fuseRAGQueryResults({
|
|
3924
|
+
fusion: retrieval.fusion,
|
|
3925
|
+
fusionConstant: retrieval.fusionConstant,
|
|
3926
|
+
lexical: lexicalResults,
|
|
3927
|
+
lexicalWeight: retrieval.lexicalWeight,
|
|
3928
|
+
vector: vectorResults,
|
|
3929
|
+
vectorWeight: retrieval.vectorWeight
|
|
3404
3930
|
});
|
|
3405
3931
|
const rerankInput = {
|
|
3932
|
+
candidateTopK,
|
|
3406
3933
|
filter: input.filter,
|
|
3407
3934
|
model,
|
|
3408
|
-
query:
|
|
3935
|
+
query: transformed.query,
|
|
3409
3936
|
queryVector,
|
|
3410
3937
|
results,
|
|
3411
3938
|
scoreThreshold: input.scoreThreshold,
|
|
@@ -3666,6 +4193,79 @@ var compareRAGRerankers = async ({
|
|
|
3666
4193
|
suiteLabel: suite.label ?? suite.id
|
|
3667
4194
|
};
|
|
3668
4195
|
};
|
|
4196
|
+
var summarizeEvaluationResponseComparison = (entries, idKey) => {
|
|
4197
|
+
if (entries.length === 0) {
|
|
4198
|
+
return {};
|
|
4199
|
+
}
|
|
4200
|
+
const byPassingRate = [...entries].sort((left, right) => {
|
|
4201
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
4202
|
+
return right.response.passingRate - left.response.passingRate;
|
|
4203
|
+
}
|
|
4204
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
4205
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
4206
|
+
}
|
|
4207
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
4208
|
+
});
|
|
4209
|
+
const byAverageF1 = [...entries].sort((left, right) => right.response.summary.averageF1 - left.response.summary.averageF1);
|
|
4210
|
+
const byLatency = [...entries].sort((left, right) => left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs);
|
|
4211
|
+
const getId = (entry) => typeof entry[idKey] === "string" ? entry[idKey] : undefined;
|
|
4212
|
+
return {
|
|
4213
|
+
bestByAverageF1: getId(byAverageF1[0]),
|
|
4214
|
+
bestByPassingRate: getId(byPassingRate[0]),
|
|
4215
|
+
fastest: getId(byLatency[0])
|
|
4216
|
+
};
|
|
4217
|
+
};
|
|
4218
|
+
var resolveRetrievalMode = (candidate) => {
|
|
4219
|
+
if (!candidate.retrieval) {
|
|
4220
|
+
return "vector";
|
|
4221
|
+
}
|
|
4222
|
+
return typeof candidate.retrieval === "string" ? candidate.retrieval : candidate.retrieval.mode ?? "vector";
|
|
4223
|
+
};
|
|
4224
|
+
var compareRAGRetrievalStrategies = async ({
|
|
4225
|
+
collection,
|
|
4226
|
+
suite,
|
|
4227
|
+
retrievals,
|
|
4228
|
+
defaultTopK = DEFAULT_TOP_K2
|
|
4229
|
+
}) => {
|
|
4230
|
+
const entries = await Promise.all(retrievals.map(async (candidate) => {
|
|
4231
|
+
const response = await evaluateRAGCollection({
|
|
4232
|
+
collection: {
|
|
4233
|
+
...collection,
|
|
4234
|
+
search: (input) => collection.search({
|
|
4235
|
+
...input,
|
|
4236
|
+
queryTransform: candidate.queryTransform ?? input.queryTransform,
|
|
4237
|
+
rerank: candidate.rerank ?? input.rerank,
|
|
4238
|
+
retrieval: candidate.retrieval ?? input.retrieval
|
|
4239
|
+
})
|
|
4240
|
+
},
|
|
4241
|
+
defaultTopK,
|
|
4242
|
+
input: suite.input,
|
|
4243
|
+
rerank: candidate.rerank
|
|
4244
|
+
});
|
|
4245
|
+
return {
|
|
4246
|
+
label: candidate.label ?? candidate.id,
|
|
4247
|
+
response,
|
|
4248
|
+
retrievalId: candidate.id,
|
|
4249
|
+
retrievalMode: resolveRetrievalMode(candidate)
|
|
4250
|
+
};
|
|
4251
|
+
}));
|
|
4252
|
+
const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
|
|
4253
|
+
elapsedMs: entry.response.elapsedMs,
|
|
4254
|
+
finishedAt: 0,
|
|
4255
|
+
id: entry.retrievalId,
|
|
4256
|
+
label: entry.label,
|
|
4257
|
+
response: entry.response,
|
|
4258
|
+
startedAt: 0,
|
|
4259
|
+
suiteId: suite.id
|
|
4260
|
+
})));
|
|
4261
|
+
return {
|
|
4262
|
+
entries,
|
|
4263
|
+
leaderboard,
|
|
4264
|
+
summary: summarizeRAGRetrievalComparison(entries),
|
|
4265
|
+
suiteId: suite.id,
|
|
4266
|
+
suiteLabel: suite.label ?? suite.id
|
|
4267
|
+
};
|
|
4268
|
+
};
|
|
3669
4269
|
var createRAGEvaluationSuite = (suite) => suite;
|
|
3670
4270
|
var evaluateRAGCollection = async ({
|
|
3671
4271
|
collection,
|
|
@@ -3794,26 +4394,9 @@ var summarizeRAGEvaluationCase = ({
|
|
|
3794
4394
|
};
|
|
3795
4395
|
};
|
|
3796
4396
|
var summarizeRAGRerankerComparison = (entries) => {
|
|
3797
|
-
|
|
3798
|
-
return {};
|
|
3799
|
-
}
|
|
3800
|
-
const byPassingRate = [...entries].sort((left, right) => {
|
|
3801
|
-
if (right.response.passingRate !== left.response.passingRate) {
|
|
3802
|
-
return right.response.passingRate - left.response.passingRate;
|
|
3803
|
-
}
|
|
3804
|
-
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
3805
|
-
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
3806
|
-
}
|
|
3807
|
-
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
3808
|
-
});
|
|
3809
|
-
const byAverageF1 = [...entries].sort((left, right) => right.response.summary.averageF1 - left.response.summary.averageF1);
|
|
3810
|
-
const byLatency = [...entries].sort((left, right) => left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs);
|
|
3811
|
-
return {
|
|
3812
|
-
bestByAverageF1: byAverageF1[0]?.rerankerId,
|
|
3813
|
-
bestByPassingRate: byPassingRate[0]?.rerankerId,
|
|
3814
|
-
fastest: byLatency[0]?.rerankerId
|
|
3815
|
-
};
|
|
4397
|
+
return summarizeEvaluationResponseComparison(entries, "rerankerId");
|
|
3816
4398
|
};
|
|
4399
|
+
var summarizeRAGRetrievalComparison = (entries) => summarizeEvaluationResponseComparison(entries, "retrievalId");
|
|
3817
4400
|
|
|
3818
4401
|
// src/ai/rag/types.ts
|
|
3819
4402
|
var buildRAGContext = (hits) => {
|
|
@@ -6098,6 +6681,35 @@ var createInMemoryStatus = (dimensions) => ({
|
|
|
6098
6681
|
vectorMode: "in_memory"
|
|
6099
6682
|
});
|
|
6100
6683
|
var createInMemoryRAGStore = (options = {}) => {
|
|
6684
|
+
const valuesMatch = (expected, actual) => {
|
|
6685
|
+
if (actual === expected) {
|
|
6686
|
+
return true;
|
|
6687
|
+
}
|
|
6688
|
+
if (typeof actual === "object" && actual !== null && typeof expected === "object" && expected !== null) {
|
|
6689
|
+
return JSON.stringify(actual) === JSON.stringify(expected);
|
|
6690
|
+
}
|
|
6691
|
+
return false;
|
|
6692
|
+
};
|
|
6693
|
+
const matchesFilter = (chunk, filter) => {
|
|
6694
|
+
if (!filter) {
|
|
6695
|
+
return true;
|
|
6696
|
+
}
|
|
6697
|
+
return Object.entries(filter).every(([key, value]) => {
|
|
6698
|
+
if (key === "chunkId") {
|
|
6699
|
+
return valuesMatch(value, chunk.chunkId);
|
|
6700
|
+
}
|
|
6701
|
+
if (key === "source") {
|
|
6702
|
+
return valuesMatch(value, chunk.source);
|
|
6703
|
+
}
|
|
6704
|
+
if (key === "title") {
|
|
6705
|
+
return valuesMatch(value, chunk.title);
|
|
6706
|
+
}
|
|
6707
|
+
if (!chunk.metadata) {
|
|
6708
|
+
return false;
|
|
6709
|
+
}
|
|
6710
|
+
return valuesMatch(value, chunk.metadata[key]);
|
|
6711
|
+
});
|
|
6712
|
+
};
|
|
6101
6713
|
const storeChunk = (chunk) => {
|
|
6102
6714
|
const existingIndex = chunks.findIndex((item) => item.chunkId === chunk.chunkId);
|
|
6103
6715
|
if (existingIndex < 0) {
|
|
@@ -6142,6 +6754,30 @@ var createInMemoryRAGStore = (options = {}) => {
|
|
|
6142
6754
|
title: entry.chunk.title
|
|
6143
6755
|
}));
|
|
6144
6756
|
};
|
|
6757
|
+
const queryLexical = async (input) => {
|
|
6758
|
+
const results = chunks.map((chunk) => ({
|
|
6759
|
+
chunk,
|
|
6760
|
+
score: scoreRAGLexicalMatch(input.query, {
|
|
6761
|
+
chunkText: chunk.text,
|
|
6762
|
+
metadata: chunk.metadata,
|
|
6763
|
+
source: chunk.source,
|
|
6764
|
+
title: chunk.title
|
|
6765
|
+
})
|
|
6766
|
+
})).filter(({ chunk }) => matchesFilter(chunk, input.filter)).filter(({ score }) => score > 0).sort((left, right) => {
|
|
6767
|
+
if (right.score !== left.score) {
|
|
6768
|
+
return right.score - left.score;
|
|
6769
|
+
}
|
|
6770
|
+
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
6771
|
+
});
|
|
6772
|
+
return results.slice(0, input.topK).map((entry) => ({
|
|
6773
|
+
chunkId: entry.chunk.chunkId,
|
|
6774
|
+
chunkText: entry.chunk.text,
|
|
6775
|
+
metadata: entry.chunk.metadata,
|
|
6776
|
+
score: entry.score,
|
|
6777
|
+
source: entry.chunk.source,
|
|
6778
|
+
title: entry.chunk.title
|
|
6779
|
+
}));
|
|
6780
|
+
};
|
|
6145
6781
|
const upsert = async (input) => {
|
|
6146
6782
|
const next = await Promise.all(input.chunks.map(async (chunk) => ({
|
|
6147
6783
|
...chunk,
|
|
@@ -6158,6 +6794,7 @@ var createInMemoryRAGStore = (options = {}) => {
|
|
|
6158
6794
|
clear,
|
|
6159
6795
|
embed,
|
|
6160
6796
|
query,
|
|
6797
|
+
queryLexical,
|
|
6161
6798
|
upsert,
|
|
6162
6799
|
getCapabilities: () => capabilities,
|
|
6163
6800
|
getStatus: () => createInMemoryStatus(dimensions)
|
|
@@ -6767,6 +7404,31 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
6767
7404
|
return queryFallback(input);
|
|
6768
7405
|
}
|
|
6769
7406
|
};
|
|
7407
|
+
const queryLexical = async (input) => {
|
|
7408
|
+
const rawRows = toStoredRows(jsonStatements.query.all());
|
|
7409
|
+
const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter)).map((chunk) => ({
|
|
7410
|
+
chunk,
|
|
7411
|
+
score: scoreRAGLexicalMatch(input.query, {
|
|
7412
|
+
chunkText: chunk.text,
|
|
7413
|
+
metadata: chunk.metadata,
|
|
7414
|
+
source: chunk.source,
|
|
7415
|
+
title: chunk.title
|
|
7416
|
+
})
|
|
7417
|
+
})).filter(({ score }) => score > 0).sort((left, right) => {
|
|
7418
|
+
if (right.score !== left.score) {
|
|
7419
|
+
return right.score - left.score;
|
|
7420
|
+
}
|
|
7421
|
+
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
7422
|
+
});
|
|
7423
|
+
return chunks.slice(0, input.topK).map(({ chunk, score }) => ({
|
|
7424
|
+
chunkId: chunk.chunkId,
|
|
7425
|
+
chunkText: chunk.text,
|
|
7426
|
+
metadata: chunk.metadata,
|
|
7427
|
+
score,
|
|
7428
|
+
source: chunk.source,
|
|
7429
|
+
title: chunk.title
|
|
7430
|
+
}));
|
|
7431
|
+
};
|
|
6770
7432
|
const upsert = async (input) => {
|
|
6771
7433
|
const chunks = input.chunks.length > 0 ? await Promise.all(input.chunks.map(async (chunk) => ({
|
|
6772
7434
|
chunkId: chunk.chunkId,
|
|
@@ -6806,6 +7468,7 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
6806
7468
|
clear,
|
|
6807
7469
|
embed,
|
|
6808
7470
|
query,
|
|
7471
|
+
queryLexical,
|
|
6809
7472
|
upsert,
|
|
6810
7473
|
getCapabilities: () => createSQLiteCapabilities(useNative),
|
|
6811
7474
|
getStatus: () => createSQLiteStatus(dimensions, nativeDiagnostics, useNative)
|
|
@@ -7823,9 +8486,12 @@ export {
|
|
|
7823
8486
|
streamAI,
|
|
7824
8487
|
serializeAIMessage,
|
|
7825
8488
|
searchDocuments,
|
|
8489
|
+
scoreRAGLexicalMatch,
|
|
7826
8490
|
runRAGEvaluationSuite,
|
|
7827
8491
|
resolveRAGStreamStage,
|
|
7828
8492
|
resolveRAGReranker,
|
|
8493
|
+
resolveRAGQueryTransform,
|
|
8494
|
+
resolveRAGHybridSearchOptions,
|
|
7829
8495
|
resolveRAGEmbeddingProvider,
|
|
7830
8496
|
resolveAbsoluteSQLiteVecExtensionPath,
|
|
7831
8497
|
resolveAbsoluteSQLiteVec,
|
|
@@ -7871,6 +8537,7 @@ export {
|
|
|
7871
8537
|
geminiOCR,
|
|
7872
8538
|
geminiEmbeddings,
|
|
7873
8539
|
gemini,
|
|
8540
|
+
fuseRAGQueryResults,
|
|
7874
8541
|
executeDryRunRAGEvaluation,
|
|
7875
8542
|
evaluateRAGCollection,
|
|
7876
8543
|
deepseekEmbeddings,
|
|
@@ -7880,6 +8547,7 @@ export {
|
|
|
7880
8547
|
createRAGVector,
|
|
7881
8548
|
createAIStream as createRAGTransport,
|
|
7882
8549
|
createRAGReranker,
|
|
8550
|
+
createRAGQueryTransform,
|
|
7883
8551
|
createRAGPDFOCRExtractor,
|
|
7884
8552
|
createRAGOCRProvider,
|
|
7885
8553
|
createRAGMediaTranscriber,
|
|
@@ -7901,10 +8569,12 @@ export {
|
|
|
7901
8569
|
createLegacyDocumentExtractor,
|
|
7902
8570
|
createInMemoryRAGStore,
|
|
7903
8571
|
createHeuristicRAGReranker,
|
|
8572
|
+
createHeuristicRAGQueryTransform,
|
|
7904
8573
|
createEmailExtractor,
|
|
7905
8574
|
createEPUBExtractor,
|
|
7906
8575
|
createConversationManager,
|
|
7907
8576
|
createBuiltinArchiveExpander,
|
|
8577
|
+
compareRAGRetrievalStrategies,
|
|
7908
8578
|
compareRAGRerankers,
|
|
7909
8579
|
buildRAGUpsertInputFromUploads,
|
|
7910
8580
|
buildRAGUpsertInputFromURLs,
|
|
@@ -7913,6 +8583,7 @@ export {
|
|
|
7913
8583
|
buildRAGStreamProgress as buildRAGTransportProgress,
|
|
7914
8584
|
buildRAGSourceSummaries,
|
|
7915
8585
|
buildRAGSourceGroups,
|
|
8586
|
+
buildRAGLexicalHaystack,
|
|
7916
8587
|
buildRAGGroundingReferences,
|
|
7917
8588
|
buildRAGGroundedAnswer,
|
|
7918
8589
|
buildRAGEvaluationResponse,
|
|
@@ -7920,11 +8591,12 @@ export {
|
|
|
7920
8591
|
buildRAGCitations,
|
|
7921
8592
|
buildRAGCitationReferenceMap,
|
|
7922
8593
|
applyRAGReranking,
|
|
8594
|
+
applyRAGQueryTransform,
|
|
7923
8595
|
anthropicOCR,
|
|
7924
8596
|
alibabaEmbeddings,
|
|
7925
8597
|
alibaba,
|
|
7926
8598
|
aiChat
|
|
7927
8599
|
};
|
|
7928
8600
|
|
|
7929
|
-
//# debugId=
|
|
8601
|
+
//# debugId=F84CC012821C275C64756E2164756E21
|
|
7930
8602
|
//# sourceMappingURL=index.js.map
|