@absolutejs/absolute 0.19.0-beta.489 → 0.19.0-beta.490
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +76 -20
- package/dist/ai/client/index.js.map +3 -3
- package/dist/ai/index.js +476 -32
- package/dist/ai/index.js.map +11 -9
- package/dist/angular/index.js +2 -2
- package/dist/angular/index.js.map +1 -1
- package/dist/angular/server.js +2 -2
- package/dist/angular/server.js.map +1 -1
- package/dist/build.js +2 -2
- package/dist/build.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/react/ai/index.js +76 -20
- package/dist/react/ai/index.js.map +3 -3
- package/dist/src/ai/index.d.ts +2 -2
- package/dist/src/ai/rag/collection.d.ts +4 -2
- package/dist/src/ai/rag/index.d.ts +4 -2
- package/dist/src/ai/rag/lexical.d.ts +36 -0
- package/dist/src/ai/rag/quality.d.ts +8 -1
- package/dist/src/ai/rag/queryTransforms.d.ts +17 -0
- package/dist/src/ai/rag/types.d.ts +1 -1
- package/dist/src/vue/ai/useRAG.d.ts +28 -0
- package/dist/src/vue/ai/useRAGSearch.d.ts +28 -0
- package/dist/svelte/ai/index.js +76 -20
- package/dist/svelte/ai/index.js.map +3 -3
- package/dist/types/ai.d.ts +64 -0
- package/dist/vue/ai/index.js +76 -20
- package/dist/vue/ai/index.js.map +3 -3
- package/package.json +7 -7
package/dist/ai/index.js
CHANGED
|
@@ -2113,18 +2113,256 @@ var aiChat = (config) => {
|
|
|
2113
2113
|
init_constants();
|
|
2114
2114
|
import { Elysia as Elysia2 } from "elysia";
|
|
2115
2115
|
|
|
2116
|
-
// src/ai/rag/
|
|
2116
|
+
// src/ai/rag/lexical.ts
|
|
2117
|
+
var DEFAULT_FUSION_CONSTANT = 60;
|
|
2117
2118
|
var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => token.length > 1);
|
|
2119
|
+
var collectMetadataStrings = (value) => {
|
|
2120
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
2121
|
+
return [String(value)];
|
|
2122
|
+
}
|
|
2123
|
+
if (Array.isArray(value)) {
|
|
2124
|
+
return value.flatMap((entry) => collectMetadataStrings(entry));
|
|
2125
|
+
}
|
|
2126
|
+
if (value && typeof value === "object") {
|
|
2127
|
+
return Object.values(value).flatMap((entry) => collectMetadataStrings(entry));
|
|
2128
|
+
}
|
|
2129
|
+
return [];
|
|
2130
|
+
};
|
|
2131
|
+
var buildRAGLexicalHaystack = (result) => [
|
|
2132
|
+
result.title,
|
|
2133
|
+
result.source,
|
|
2134
|
+
result.chunkText,
|
|
2135
|
+
...collectMetadataStrings(result.metadata)
|
|
2136
|
+
].filter((value) => Boolean(value)).join(" ");
|
|
2137
|
+
var scoreRAGLexicalMatch = (query, result) => {
|
|
2138
|
+
const queryTokens = tokenize(query);
|
|
2139
|
+
if (queryTokens.length === 0) {
|
|
2140
|
+
return 0;
|
|
2141
|
+
}
|
|
2142
|
+
const haystack = buildRAGLexicalHaystack(result).toLowerCase();
|
|
2143
|
+
const haystackTokens = tokenize(haystack);
|
|
2144
|
+
const haystackSet = new Set(haystackTokens);
|
|
2145
|
+
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2146
|
+
if (overlap === 0) {
|
|
2147
|
+
return 0;
|
|
2148
|
+
}
|
|
2149
|
+
const exactPhraseBoost = haystack.includes(query.toLowerCase()) ? 1 : 0;
|
|
2150
|
+
const sourceBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2151
|
+
const coverageBoost = overlap / queryTokens.length;
|
|
2152
|
+
return coverageBoost + exactPhraseBoost + sourceBoost;
|
|
2153
|
+
};
|
|
2154
|
+
var rankResults = (results) => results.map((result, index) => ({
|
|
2155
|
+
rank: index + 1,
|
|
2156
|
+
result
|
|
2157
|
+
}));
|
|
2158
|
+
var fuseRAGQueryResults = ({
|
|
2159
|
+
fusion = "rrf",
|
|
2160
|
+
fusionConstant = DEFAULT_FUSION_CONSTANT,
|
|
2161
|
+
lexical = [],
|
|
2162
|
+
lexicalWeight = 2,
|
|
2163
|
+
vector = [],
|
|
2164
|
+
vectorWeight = 1
|
|
2165
|
+
}) => {
|
|
2166
|
+
const merged = new Map;
|
|
2167
|
+
const vectorContributionWeight = Math.max(0, vectorWeight);
|
|
2168
|
+
const lexicalContributionWeight = Math.max(0, lexicalWeight);
|
|
2169
|
+
const applyRanked = (ranked, source) => {
|
|
2170
|
+
for (const entry of ranked) {
|
|
2171
|
+
const existing = merged.get(entry.result.chunkId);
|
|
2172
|
+
const weight = source === "lexical" ? lexicalContributionWeight : vectorContributionWeight;
|
|
2173
|
+
const contribution = fusion === "max" ? entry.result.score * weight : weight / (fusionConstant + entry.rank);
|
|
2174
|
+
const baseResult = existing?.result ?? entry.result;
|
|
2175
|
+
const existingSignals = existing?.result.metadata && typeof existing.result.metadata.retrievalSignals === "object" && existing.result.metadata.retrievalSignals !== null ? existing.result.metadata.retrievalSignals : {};
|
|
2176
|
+
const nextScore = fusion === "max" ? Math.max(existing?.score ?? 0, contribution) : (existing?.score ?? 0) + contribution;
|
|
2177
|
+
merged.set(entry.result.chunkId, {
|
|
2178
|
+
result: {
|
|
2179
|
+
...baseResult,
|
|
2180
|
+
score: nextScore,
|
|
2181
|
+
metadata: {
|
|
2182
|
+
...baseResult.metadata,
|
|
2183
|
+
retrievalSignals: {
|
|
2184
|
+
lexical: source === "lexical" || existingSignals.lexical === true,
|
|
2185
|
+
vector: source === "vector" || existingSignals.vector === true
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
},
|
|
2189
|
+
score: nextScore
|
|
2190
|
+
});
|
|
2191
|
+
}
|
|
2192
|
+
};
|
|
2193
|
+
applyRanked(rankResults(vector), "vector");
|
|
2194
|
+
applyRanked(rankResults(lexical), "lexical");
|
|
2195
|
+
return [...merged.values()].map(({ result, score }) => ({ ...result, score })).sort((left, right) => {
|
|
2196
|
+
if (right.score !== left.score) {
|
|
2197
|
+
return right.score - left.score;
|
|
2198
|
+
}
|
|
2199
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
2200
|
+
});
|
|
2201
|
+
};
|
|
2202
|
+
var resolveRAGHybridSearchOptions = (retrieval) => {
|
|
2203
|
+
if (!retrieval) {
|
|
2204
|
+
return {
|
|
2205
|
+
fusion: "rrf",
|
|
2206
|
+
fusionConstant: DEFAULT_FUSION_CONSTANT,
|
|
2207
|
+
lexicalTopK: undefined,
|
|
2208
|
+
lexicalWeight: 2,
|
|
2209
|
+
mode: "vector",
|
|
2210
|
+
vectorWeight: 1
|
|
2211
|
+
};
|
|
2212
|
+
}
|
|
2213
|
+
if (typeof retrieval === "string") {
|
|
2214
|
+
return {
|
|
2215
|
+
fusion: "rrf",
|
|
2216
|
+
fusionConstant: DEFAULT_FUSION_CONSTANT,
|
|
2217
|
+
lexicalTopK: undefined,
|
|
2218
|
+
lexicalWeight: 2,
|
|
2219
|
+
mode: retrieval,
|
|
2220
|
+
vectorWeight: 1
|
|
2221
|
+
};
|
|
2222
|
+
}
|
|
2223
|
+
return {
|
|
2224
|
+
fusion: retrieval.fusion ?? "rrf",
|
|
2225
|
+
fusionConstant: Math.max(1, Math.floor(retrieval.fusionConstant ?? DEFAULT_FUSION_CONSTANT)),
|
|
2226
|
+
lexicalTopK: retrieval.lexicalTopK,
|
|
2227
|
+
lexicalWeight: Math.max(0, retrieval.lexicalWeight ?? 2),
|
|
2228
|
+
mode: retrieval.mode ?? "vector",
|
|
2229
|
+
vectorWeight: Math.max(0, retrieval.vectorWeight ?? 1)
|
|
2230
|
+
};
|
|
2231
|
+
};
|
|
2232
|
+
|
|
2233
|
+
// src/ai/rag/queryTransforms.ts
|
|
2234
|
+
var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2235
|
+
var DOMAIN_EXPANSIONS = {
|
|
2236
|
+
archive: ["archive", "zip", "bundle", "entry", "runbook", "recovery"],
|
|
2237
|
+
audio: ["audio", "media", "recording", "speaker", "transcript"],
|
|
2238
|
+
deck: ["slide", "deck", "presentation", "pptx"],
|
|
2239
|
+
email: ["email", "mail", "thread", "message", "attachment"],
|
|
2240
|
+
image: ["image", "ocr", "scan", "screenshot"],
|
|
2241
|
+
pdf: ["pdf", "document", "page", "ocr", "scan"],
|
|
2242
|
+
spreadsheet: ["sheet", "worksheet", "workbook", "spreadsheet", "xlsx"],
|
|
2243
|
+
video: ["video", "media", "recording", "transcript", "timestamp"]
|
|
2244
|
+
};
|
|
2245
|
+
var detectDomains = (tokens) => {
|
|
2246
|
+
const tokenSet = new Set(tokens);
|
|
2247
|
+
const domains = new Set;
|
|
2248
|
+
for (const token of tokenSet) {
|
|
2249
|
+
if (token === "sheet" || token === "worksheet" || token === "workbook" || token === "spreadsheet" || token === "xlsx") {
|
|
2250
|
+
domains.add("spreadsheet");
|
|
2251
|
+
}
|
|
2252
|
+
if (token === "archive" || token === "zip" || token === "bundle") {
|
|
2253
|
+
domains.add("archive");
|
|
2254
|
+
}
|
|
2255
|
+
if (token === "audio" || token === "speaker") {
|
|
2256
|
+
domains.add("audio");
|
|
2257
|
+
}
|
|
2258
|
+
if (token === "video" || token === "timestamp") {
|
|
2259
|
+
domains.add("video");
|
|
2260
|
+
}
|
|
2261
|
+
if (token === "pdf" || token === "page" || token === "ocr") {
|
|
2262
|
+
domains.add("pdf");
|
|
2263
|
+
}
|
|
2264
|
+
if (token === "slide" || token === "deck" || token === "presentation") {
|
|
2265
|
+
domains.add("deck");
|
|
2266
|
+
}
|
|
2267
|
+
if (token === "email" || token === "mail" || token === "thread") {
|
|
2268
|
+
domains.add("email");
|
|
2269
|
+
}
|
|
2270
|
+
if (token === "image" || token === "scan" || token === "screenshot") {
|
|
2271
|
+
domains.add("image");
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
return [...domains];
|
|
2275
|
+
};
|
|
2276
|
+
var uniqueQueryStrings = (values) => Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
2277
|
+
var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform({
|
|
2278
|
+
defaultModel: options.defaultModel ?? "absolute-heuristic-query-transform",
|
|
2279
|
+
providerName: options.providerName ?? "absolute_heuristic",
|
|
2280
|
+
transform: ({ query }) => {
|
|
2281
|
+
const tokens = tokenize2(query);
|
|
2282
|
+
const domains = detectDomains(tokens);
|
|
2283
|
+
if (domains.length === 0) {
|
|
2284
|
+
return { query };
|
|
2285
|
+
}
|
|
2286
|
+
const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
|
|
2287
|
+
const rewrittenQuery = uniqueQueryStrings([
|
|
2288
|
+
...tokens,
|
|
2289
|
+
...expandedTerms
|
|
2290
|
+
]).join(" ");
|
|
2291
|
+
const variants = domains.map((domain) => uniqueQueryStrings([
|
|
2292
|
+
...tokens,
|
|
2293
|
+
...DOMAIN_EXPANSIONS[domain] ?? []
|
|
2294
|
+
]).join(" "));
|
|
2295
|
+
return {
|
|
2296
|
+
query: rewrittenQuery,
|
|
2297
|
+
variants
|
|
2298
|
+
};
|
|
2299
|
+
}
|
|
2300
|
+
});
|
|
2301
|
+
var createRAGQueryTransform = (options) => ({
|
|
2302
|
+
defaultModel: options.defaultModel,
|
|
2303
|
+
providerName: options.providerName,
|
|
2304
|
+
transform: options.transform
|
|
2305
|
+
});
|
|
2306
|
+
var resolveRAGQueryTransform = (queryTransform) => {
|
|
2307
|
+
if (!queryTransform) {
|
|
2308
|
+
return null;
|
|
2309
|
+
}
|
|
2310
|
+
if (typeof queryTransform === "function") {
|
|
2311
|
+
return {
|
|
2312
|
+
defaultModel: undefined,
|
|
2313
|
+
providerName: undefined,
|
|
2314
|
+
transform: queryTransform
|
|
2315
|
+
};
|
|
2316
|
+
}
|
|
2317
|
+
return queryTransform;
|
|
2318
|
+
};
|
|
2319
|
+
var applyRAGQueryTransform = async ({
|
|
2320
|
+
input,
|
|
2321
|
+
queryTransform
|
|
2322
|
+
}) => {
|
|
2323
|
+
const resolved = resolveRAGQueryTransform(queryTransform);
|
|
2324
|
+
if (!resolved) {
|
|
2325
|
+
return {
|
|
2326
|
+
query: input.query,
|
|
2327
|
+
variants: []
|
|
2328
|
+
};
|
|
2329
|
+
}
|
|
2330
|
+
return Promise.resolve(resolved.transform({
|
|
2331
|
+
...input,
|
|
2332
|
+
model: input.model ?? resolved.defaultModel
|
|
2333
|
+
}));
|
|
2334
|
+
};
|
|
2335
|
+
|
|
2336
|
+
// src/ai/rag/reranking.ts
|
|
2337
|
+
var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2338
|
+
var collectMetadataStrings2 = (value) => {
|
|
2339
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
2340
|
+
return [String(value)];
|
|
2341
|
+
}
|
|
2342
|
+
if (Array.isArray(value)) {
|
|
2343
|
+
return value.flatMap((entry) => collectMetadataStrings2(entry));
|
|
2344
|
+
}
|
|
2345
|
+
if (value && typeof value === "object") {
|
|
2346
|
+
return Object.values(value).flatMap((entry) => collectMetadataStrings2(entry));
|
|
2347
|
+
}
|
|
2348
|
+
return [];
|
|
2349
|
+
};
|
|
2118
2350
|
var scoreHeuristicMatch = (queryTokens, result) => {
|
|
2119
2351
|
if (queryTokens.length === 0) {
|
|
2120
2352
|
return result.score;
|
|
2121
2353
|
}
|
|
2122
|
-
const
|
|
2354
|
+
const metadataValues = collectMetadataStrings2(result.metadata);
|
|
2355
|
+
const haystack = tokenize3([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" "));
|
|
2123
2356
|
const haystackSet = new Set(haystack);
|
|
2124
2357
|
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2125
2358
|
const overlapBoost = overlap / queryTokens.length;
|
|
2126
|
-
|
|
2359
|
+
const normalizedQuery = queryTokens.join(" ");
|
|
2360
|
+
const exactPhraseBoost = normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(normalizedQuery) ? 1 : 0;
|
|
2361
|
+
const sourcePathBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2362
|
+
const metadataBoost = metadataValues.length > 0 ? queryTokens.filter((token) => metadataValues.some((value) => value.toLowerCase().includes(token))).length / queryTokens.length : 0;
|
|
2363
|
+
return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost;
|
|
2127
2364
|
};
|
|
2365
|
+
var normalizeText = (value) => tokenize3(value).join(" ");
|
|
2128
2366
|
var applyRAGReranking = async ({
|
|
2129
2367
|
input,
|
|
2130
2368
|
reranker
|
|
@@ -2143,7 +2381,7 @@ var createHeuristicRAGReranker = (options = {}) => createRAGReranker({
|
|
|
2143
2381
|
defaultModel: options.defaultModel ?? "absolute-heuristic-reranker",
|
|
2144
2382
|
providerName: options.providerName ?? "absolute_heuristic",
|
|
2145
2383
|
rerank: ({ query, results }) => {
|
|
2146
|
-
const queryTokens =
|
|
2384
|
+
const queryTokens = tokenize3(query);
|
|
2147
2385
|
return [...results].map((result, index) => ({
|
|
2148
2386
|
index,
|
|
2149
2387
|
result,
|
|
@@ -3378,8 +3616,26 @@ var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await lo
|
|
|
3378
3616
|
|
|
3379
3617
|
// src/ai/rag/collection.ts
|
|
3380
3618
|
var DEFAULT_TOP_K = 6;
|
|
3619
|
+
var mergeQueryResults = (results) => {
|
|
3620
|
+
const merged = new Map;
|
|
3621
|
+
for (const result of results) {
|
|
3622
|
+
const existing = merged.get(result.chunkId);
|
|
3623
|
+
if (!existing || result.score > existing.score) {
|
|
3624
|
+
merged.set(result.chunkId, result);
|
|
3625
|
+
}
|
|
3626
|
+
}
|
|
3627
|
+
return [...merged.values()].sort((left, right) => {
|
|
3628
|
+
if (right.score !== left.score) {
|
|
3629
|
+
return right.score - left.score;
|
|
3630
|
+
}
|
|
3631
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
3632
|
+
});
|
|
3633
|
+
};
|
|
3634
|
+
var shouldRunVectorRetrieval = (mode) => mode === "vector" || mode === "hybrid";
|
|
3635
|
+
var shouldRunLexicalRetrieval = (mode, store) => mode === "lexical" || mode === "hybrid" && Boolean(store.queryLexical);
|
|
3381
3636
|
var createRAGCollection = (options) => {
|
|
3382
3637
|
const defaultTopK = options.defaultTopK ?? DEFAULT_TOP_K;
|
|
3638
|
+
const defaultCandidateMultiplier = Math.max(1, Math.floor(options.defaultCandidateMultiplier ?? 4));
|
|
3383
3639
|
const { getCapabilities } = options.store;
|
|
3384
3640
|
const { getStatus } = options.store;
|
|
3385
3641
|
const embeddingProvider = resolveRAGEmbeddingProvider(options.embedding, options.store.embed, options.defaultModel);
|
|
@@ -3391,21 +3647,64 @@ var createRAGCollection = (options) => {
|
|
|
3391
3647
|
};
|
|
3392
3648
|
const search = async (input) => {
|
|
3393
3649
|
const model = input.model ?? options.defaultModel;
|
|
3394
|
-
const
|
|
3650
|
+
const topK = input.topK ?? defaultTopK;
|
|
3651
|
+
const hasReranker = Boolean(input.rerank ?? options.rerank);
|
|
3652
|
+
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (hasReranker ? topK * defaultCandidateMultiplier : topK)));
|
|
3653
|
+
const retrieval = resolveRAGHybridSearchOptions(input.retrieval);
|
|
3654
|
+
const transformed = await applyRAGQueryTransform({
|
|
3655
|
+
input: {
|
|
3656
|
+
candidateTopK,
|
|
3657
|
+
filter: input.filter,
|
|
3658
|
+
model,
|
|
3659
|
+
query: input.query,
|
|
3660
|
+
scoreThreshold: input.scoreThreshold,
|
|
3661
|
+
topK
|
|
3662
|
+
},
|
|
3663
|
+
queryTransform: input.queryTransform ?? options.queryTransform
|
|
3664
|
+
});
|
|
3665
|
+
const searchQueries = Array.from(new Set([transformed.query, ...transformed.variants ?? []])).filter(Boolean);
|
|
3666
|
+
const runVector = shouldRunVectorRetrieval(retrieval.mode);
|
|
3667
|
+
const runLexical = shouldRunLexicalRetrieval(retrieval.mode, options.store);
|
|
3668
|
+
const lexicalTopK = Math.max(topK, Math.floor(retrieval.lexicalTopK ?? candidateTopK));
|
|
3669
|
+
const queryVector = runVector ? await embed({
|
|
3395
3670
|
model,
|
|
3396
3671
|
signal: input.signal,
|
|
3397
3672
|
text: input.query
|
|
3398
|
-
}, "query");
|
|
3399
|
-
const
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3673
|
+
}, "query") : [];
|
|
3674
|
+
const resultGroups = await Promise.all(searchQueries.map(async (query) => {
|
|
3675
|
+
const [vectorResults2, lexicalResults2] = await Promise.all([
|
|
3676
|
+
runVector ? embed({
|
|
3677
|
+
model,
|
|
3678
|
+
signal: input.signal,
|
|
3679
|
+
text: query
|
|
3680
|
+
}, "query").then((nextQueryVector) => options.store.query({
|
|
3681
|
+
filter: input.filter,
|
|
3682
|
+
queryVector: nextQueryVector,
|
|
3683
|
+
topK: candidateTopK
|
|
3684
|
+
})) : Promise.resolve([]),
|
|
3685
|
+
runLexical ? options.store.queryLexical?.({
|
|
3686
|
+
filter: input.filter,
|
|
3687
|
+
query,
|
|
3688
|
+
topK: lexicalTopK
|
|
3689
|
+
}) ?? Promise.resolve([]) : Promise.resolve([])
|
|
3690
|
+
]);
|
|
3691
|
+
return { lexicalResults: lexicalResults2, vectorResults: vectorResults2 };
|
|
3692
|
+
}));
|
|
3693
|
+
const vectorResults = mergeQueryResults(resultGroups.flatMap((group) => group.vectorResults));
|
|
3694
|
+
const lexicalResults = mergeQueryResults(resultGroups.flatMap((group) => group.lexicalResults));
|
|
3695
|
+
const results = retrieval.mode === "lexical" ? lexicalResults : retrieval.mode === "vector" ? vectorResults : fuseRAGQueryResults({
|
|
3696
|
+
fusion: retrieval.fusion,
|
|
3697
|
+
fusionConstant: retrieval.fusionConstant,
|
|
3698
|
+
lexical: lexicalResults,
|
|
3699
|
+
lexicalWeight: retrieval.lexicalWeight,
|
|
3700
|
+
vector: vectorResults,
|
|
3701
|
+
vectorWeight: retrieval.vectorWeight
|
|
3404
3702
|
});
|
|
3405
3703
|
const rerankInput = {
|
|
3704
|
+
candidateTopK,
|
|
3406
3705
|
filter: input.filter,
|
|
3407
3706
|
model,
|
|
3408
|
-
query:
|
|
3707
|
+
query: transformed.query,
|
|
3409
3708
|
queryVector,
|
|
3410
3709
|
results,
|
|
3411
3710
|
scoreThreshold: input.scoreThreshold,
|
|
@@ -3666,6 +3965,79 @@ var compareRAGRerankers = async ({
|
|
|
3666
3965
|
suiteLabel: suite.label ?? suite.id
|
|
3667
3966
|
};
|
|
3668
3967
|
};
|
|
3968
|
+
var summarizeEvaluationResponseComparison = (entries, idKey) => {
|
|
3969
|
+
if (entries.length === 0) {
|
|
3970
|
+
return {};
|
|
3971
|
+
}
|
|
3972
|
+
const byPassingRate = [...entries].sort((left, right) => {
|
|
3973
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
3974
|
+
return right.response.passingRate - left.response.passingRate;
|
|
3975
|
+
}
|
|
3976
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
3977
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
3978
|
+
}
|
|
3979
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
3980
|
+
});
|
|
3981
|
+
const byAverageF1 = [...entries].sort((left, right) => right.response.summary.averageF1 - left.response.summary.averageF1);
|
|
3982
|
+
const byLatency = [...entries].sort((left, right) => left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs);
|
|
3983
|
+
const getId = (entry) => typeof entry[idKey] === "string" ? entry[idKey] : undefined;
|
|
3984
|
+
return {
|
|
3985
|
+
bestByAverageF1: getId(byAverageF1[0]),
|
|
3986
|
+
bestByPassingRate: getId(byPassingRate[0]),
|
|
3987
|
+
fastest: getId(byLatency[0])
|
|
3988
|
+
};
|
|
3989
|
+
};
|
|
3990
|
+
var resolveRetrievalMode = (candidate) => {
|
|
3991
|
+
if (!candidate.retrieval) {
|
|
3992
|
+
return "vector";
|
|
3993
|
+
}
|
|
3994
|
+
return typeof candidate.retrieval === "string" ? candidate.retrieval : candidate.retrieval.mode ?? "vector";
|
|
3995
|
+
};
|
|
3996
|
+
var compareRAGRetrievalStrategies = async ({
|
|
3997
|
+
collection,
|
|
3998
|
+
suite,
|
|
3999
|
+
retrievals,
|
|
4000
|
+
defaultTopK = DEFAULT_TOP_K2
|
|
4001
|
+
}) => {
|
|
4002
|
+
const entries = await Promise.all(retrievals.map(async (candidate) => {
|
|
4003
|
+
const response = await evaluateRAGCollection({
|
|
4004
|
+
collection: {
|
|
4005
|
+
...collection,
|
|
4006
|
+
search: (input) => collection.search({
|
|
4007
|
+
...input,
|
|
4008
|
+
queryTransform: candidate.queryTransform ?? input.queryTransform,
|
|
4009
|
+
rerank: candidate.rerank ?? input.rerank,
|
|
4010
|
+
retrieval: candidate.retrieval ?? input.retrieval
|
|
4011
|
+
})
|
|
4012
|
+
},
|
|
4013
|
+
defaultTopK,
|
|
4014
|
+
input: suite.input,
|
|
4015
|
+
rerank: candidate.rerank
|
|
4016
|
+
});
|
|
4017
|
+
return {
|
|
4018
|
+
label: candidate.label ?? candidate.id,
|
|
4019
|
+
response,
|
|
4020
|
+
retrievalId: candidate.id,
|
|
4021
|
+
retrievalMode: resolveRetrievalMode(candidate)
|
|
4022
|
+
};
|
|
4023
|
+
}));
|
|
4024
|
+
const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
|
|
4025
|
+
elapsedMs: entry.response.elapsedMs,
|
|
4026
|
+
finishedAt: 0,
|
|
4027
|
+
id: entry.retrievalId,
|
|
4028
|
+
label: entry.label,
|
|
4029
|
+
response: entry.response,
|
|
4030
|
+
startedAt: 0,
|
|
4031
|
+
suiteId: suite.id
|
|
4032
|
+
})));
|
|
4033
|
+
return {
|
|
4034
|
+
entries,
|
|
4035
|
+
leaderboard,
|
|
4036
|
+
summary: summarizeRAGRetrievalComparison(entries),
|
|
4037
|
+
suiteId: suite.id,
|
|
4038
|
+
suiteLabel: suite.label ?? suite.id
|
|
4039
|
+
};
|
|
4040
|
+
};
|
|
3669
4041
|
var createRAGEvaluationSuite = (suite) => suite;
|
|
3670
4042
|
var evaluateRAGCollection = async ({
|
|
3671
4043
|
collection,
|
|
@@ -3794,26 +4166,9 @@ var summarizeRAGEvaluationCase = ({
|
|
|
3794
4166
|
};
|
|
3795
4167
|
};
|
|
3796
4168
|
var summarizeRAGRerankerComparison = (entries) => {
|
|
3797
|
-
|
|
3798
|
-
return {};
|
|
3799
|
-
}
|
|
3800
|
-
const byPassingRate = [...entries].sort((left, right) => {
|
|
3801
|
-
if (right.response.passingRate !== left.response.passingRate) {
|
|
3802
|
-
return right.response.passingRate - left.response.passingRate;
|
|
3803
|
-
}
|
|
3804
|
-
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
3805
|
-
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
3806
|
-
}
|
|
3807
|
-
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
3808
|
-
});
|
|
3809
|
-
const byAverageF1 = [...entries].sort((left, right) => right.response.summary.averageF1 - left.response.summary.averageF1);
|
|
3810
|
-
const byLatency = [...entries].sort((left, right) => left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs);
|
|
3811
|
-
return {
|
|
3812
|
-
bestByAverageF1: byAverageF1[0]?.rerankerId,
|
|
3813
|
-
bestByPassingRate: byPassingRate[0]?.rerankerId,
|
|
3814
|
-
fastest: byLatency[0]?.rerankerId
|
|
3815
|
-
};
|
|
4169
|
+
return summarizeEvaluationResponseComparison(entries, "rerankerId");
|
|
3816
4170
|
};
|
|
4171
|
+
var summarizeRAGRetrievalComparison = (entries) => summarizeEvaluationResponseComparison(entries, "retrievalId");
|
|
3817
4172
|
|
|
3818
4173
|
// src/ai/rag/types.ts
|
|
3819
4174
|
var buildRAGContext = (hits) => {
|
|
@@ -6098,6 +6453,35 @@ var createInMemoryStatus = (dimensions) => ({
|
|
|
6098
6453
|
vectorMode: "in_memory"
|
|
6099
6454
|
});
|
|
6100
6455
|
var createInMemoryRAGStore = (options = {}) => {
|
|
6456
|
+
const valuesMatch = (expected, actual) => {
|
|
6457
|
+
if (actual === expected) {
|
|
6458
|
+
return true;
|
|
6459
|
+
}
|
|
6460
|
+
if (typeof actual === "object" && actual !== null && typeof expected === "object" && expected !== null) {
|
|
6461
|
+
return JSON.stringify(actual) === JSON.stringify(expected);
|
|
6462
|
+
}
|
|
6463
|
+
return false;
|
|
6464
|
+
};
|
|
6465
|
+
const matchesFilter = (chunk, filter) => {
|
|
6466
|
+
if (!filter) {
|
|
6467
|
+
return true;
|
|
6468
|
+
}
|
|
6469
|
+
return Object.entries(filter).every(([key, value]) => {
|
|
6470
|
+
if (key === "chunkId") {
|
|
6471
|
+
return valuesMatch(value, chunk.chunkId);
|
|
6472
|
+
}
|
|
6473
|
+
if (key === "source") {
|
|
6474
|
+
return valuesMatch(value, chunk.source);
|
|
6475
|
+
}
|
|
6476
|
+
if (key === "title") {
|
|
6477
|
+
return valuesMatch(value, chunk.title);
|
|
6478
|
+
}
|
|
6479
|
+
if (!chunk.metadata) {
|
|
6480
|
+
return false;
|
|
6481
|
+
}
|
|
6482
|
+
return valuesMatch(value, chunk.metadata[key]);
|
|
6483
|
+
});
|
|
6484
|
+
};
|
|
6101
6485
|
const storeChunk = (chunk) => {
|
|
6102
6486
|
const existingIndex = chunks.findIndex((item) => item.chunkId === chunk.chunkId);
|
|
6103
6487
|
if (existingIndex < 0) {
|
|
@@ -6142,6 +6526,30 @@ var createInMemoryRAGStore = (options = {}) => {
|
|
|
6142
6526
|
title: entry.chunk.title
|
|
6143
6527
|
}));
|
|
6144
6528
|
};
|
|
6529
|
+
const queryLexical = async (input) => {
|
|
6530
|
+
const results = chunks.map((chunk) => ({
|
|
6531
|
+
chunk,
|
|
6532
|
+
score: scoreRAGLexicalMatch(input.query, {
|
|
6533
|
+
chunkText: chunk.text,
|
|
6534
|
+
metadata: chunk.metadata,
|
|
6535
|
+
source: chunk.source,
|
|
6536
|
+
title: chunk.title
|
|
6537
|
+
})
|
|
6538
|
+
})).filter(({ chunk }) => matchesFilter(chunk, input.filter)).filter(({ score }) => score > 0).sort((left, right) => {
|
|
6539
|
+
if (right.score !== left.score) {
|
|
6540
|
+
return right.score - left.score;
|
|
6541
|
+
}
|
|
6542
|
+
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
6543
|
+
});
|
|
6544
|
+
return results.slice(0, input.topK).map((entry) => ({
|
|
6545
|
+
chunkId: entry.chunk.chunkId,
|
|
6546
|
+
chunkText: entry.chunk.text,
|
|
6547
|
+
metadata: entry.chunk.metadata,
|
|
6548
|
+
score: entry.score,
|
|
6549
|
+
source: entry.chunk.source,
|
|
6550
|
+
title: entry.chunk.title
|
|
6551
|
+
}));
|
|
6552
|
+
};
|
|
6145
6553
|
const upsert = async (input) => {
|
|
6146
6554
|
const next = await Promise.all(input.chunks.map(async (chunk) => ({
|
|
6147
6555
|
...chunk,
|
|
@@ -6158,6 +6566,7 @@ var createInMemoryRAGStore = (options = {}) => {
|
|
|
6158
6566
|
clear,
|
|
6159
6567
|
embed,
|
|
6160
6568
|
query,
|
|
6569
|
+
queryLexical,
|
|
6161
6570
|
upsert,
|
|
6162
6571
|
getCapabilities: () => capabilities,
|
|
6163
6572
|
getStatus: () => createInMemoryStatus(dimensions)
|
|
@@ -6767,6 +7176,31 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
6767
7176
|
return queryFallback(input);
|
|
6768
7177
|
}
|
|
6769
7178
|
};
|
|
7179
|
+
const queryLexical = async (input) => {
|
|
7180
|
+
const rawRows = toStoredRows(jsonStatements.query.all());
|
|
7181
|
+
const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter)).map((chunk) => ({
|
|
7182
|
+
chunk,
|
|
7183
|
+
score: scoreRAGLexicalMatch(input.query, {
|
|
7184
|
+
chunkText: chunk.text,
|
|
7185
|
+
metadata: chunk.metadata,
|
|
7186
|
+
source: chunk.source,
|
|
7187
|
+
title: chunk.title
|
|
7188
|
+
})
|
|
7189
|
+
})).filter(({ score }) => score > 0).sort((left, right) => {
|
|
7190
|
+
if (right.score !== left.score) {
|
|
7191
|
+
return right.score - left.score;
|
|
7192
|
+
}
|
|
7193
|
+
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
7194
|
+
});
|
|
7195
|
+
return chunks.slice(0, input.topK).map(({ chunk, score }) => ({
|
|
7196
|
+
chunkId: chunk.chunkId,
|
|
7197
|
+
chunkText: chunk.text,
|
|
7198
|
+
metadata: chunk.metadata,
|
|
7199
|
+
score,
|
|
7200
|
+
source: chunk.source,
|
|
7201
|
+
title: chunk.title
|
|
7202
|
+
}));
|
|
7203
|
+
};
|
|
6770
7204
|
const upsert = async (input) => {
|
|
6771
7205
|
const chunks = input.chunks.length > 0 ? await Promise.all(input.chunks.map(async (chunk) => ({
|
|
6772
7206
|
chunkId: chunk.chunkId,
|
|
@@ -6806,6 +7240,7 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
6806
7240
|
clear,
|
|
6807
7241
|
embed,
|
|
6808
7242
|
query,
|
|
7243
|
+
queryLexical,
|
|
6809
7244
|
upsert,
|
|
6810
7245
|
getCapabilities: () => createSQLiteCapabilities(useNative),
|
|
6811
7246
|
getStatus: () => createSQLiteStatus(dimensions, nativeDiagnostics, useNative)
|
|
@@ -7823,9 +8258,12 @@ export {
|
|
|
7823
8258
|
streamAI,
|
|
7824
8259
|
serializeAIMessage,
|
|
7825
8260
|
searchDocuments,
|
|
8261
|
+
scoreRAGLexicalMatch,
|
|
7826
8262
|
runRAGEvaluationSuite,
|
|
7827
8263
|
resolveRAGStreamStage,
|
|
7828
8264
|
resolveRAGReranker,
|
|
8265
|
+
resolveRAGQueryTransform,
|
|
8266
|
+
resolveRAGHybridSearchOptions,
|
|
7829
8267
|
resolveRAGEmbeddingProvider,
|
|
7830
8268
|
resolveAbsoluteSQLiteVecExtensionPath,
|
|
7831
8269
|
resolveAbsoluteSQLiteVec,
|
|
@@ -7871,6 +8309,7 @@ export {
|
|
|
7871
8309
|
geminiOCR,
|
|
7872
8310
|
geminiEmbeddings,
|
|
7873
8311
|
gemini,
|
|
8312
|
+
fuseRAGQueryResults,
|
|
7874
8313
|
executeDryRunRAGEvaluation,
|
|
7875
8314
|
evaluateRAGCollection,
|
|
7876
8315
|
deepseekEmbeddings,
|
|
@@ -7880,6 +8319,7 @@ export {
|
|
|
7880
8319
|
createRAGVector,
|
|
7881
8320
|
createAIStream as createRAGTransport,
|
|
7882
8321
|
createRAGReranker,
|
|
8322
|
+
createRAGQueryTransform,
|
|
7883
8323
|
createRAGPDFOCRExtractor,
|
|
7884
8324
|
createRAGOCRProvider,
|
|
7885
8325
|
createRAGMediaTranscriber,
|
|
@@ -7901,10 +8341,12 @@ export {
|
|
|
7901
8341
|
createLegacyDocumentExtractor,
|
|
7902
8342
|
createInMemoryRAGStore,
|
|
7903
8343
|
createHeuristicRAGReranker,
|
|
8344
|
+
createHeuristicRAGQueryTransform,
|
|
7904
8345
|
createEmailExtractor,
|
|
7905
8346
|
createEPUBExtractor,
|
|
7906
8347
|
createConversationManager,
|
|
7907
8348
|
createBuiltinArchiveExpander,
|
|
8349
|
+
compareRAGRetrievalStrategies,
|
|
7908
8350
|
compareRAGRerankers,
|
|
7909
8351
|
buildRAGUpsertInputFromUploads,
|
|
7910
8352
|
buildRAGUpsertInputFromURLs,
|
|
@@ -7913,6 +8355,7 @@ export {
|
|
|
7913
8355
|
buildRAGStreamProgress as buildRAGTransportProgress,
|
|
7914
8356
|
buildRAGSourceSummaries,
|
|
7915
8357
|
buildRAGSourceGroups,
|
|
8358
|
+
buildRAGLexicalHaystack,
|
|
7916
8359
|
buildRAGGroundingReferences,
|
|
7917
8360
|
buildRAGGroundedAnswer,
|
|
7918
8361
|
buildRAGEvaluationResponse,
|
|
@@ -7920,11 +8363,12 @@ export {
|
|
|
7920
8363
|
buildRAGCitations,
|
|
7921
8364
|
buildRAGCitationReferenceMap,
|
|
7922
8365
|
applyRAGReranking,
|
|
8366
|
+
applyRAGQueryTransform,
|
|
7923
8367
|
anthropicOCR,
|
|
7924
8368
|
alibabaEmbeddings,
|
|
7925
8369
|
alibaba,
|
|
7926
8370
|
aiChat
|
|
7927
8371
|
};
|
|
7928
8372
|
|
|
7929
|
-
//# debugId=
|
|
8373
|
+
//# debugId=4FC06DD026EF238A64756E2164756E21
|
|
7930
8374
|
//# sourceMappingURL=index.js.map
|