@absolutejs/absolute 0.19.0-beta.490 → 0.19.0-beta.492
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +243 -13
- package/dist/ai/index.js.map +6 -6
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2115,7 +2115,40 @@ import { Elysia as Elysia2 } from "elysia";
|
|
|
2115
2115
|
|
|
2116
2116
|
// src/ai/rag/lexical.ts
|
|
2117
2117
|
var DEFAULT_FUSION_CONSTANT = 60;
|
|
2118
|
-
var
|
|
2118
|
+
var STOP_WORDS = new Set([
|
|
2119
|
+
"a",
|
|
2120
|
+
"an",
|
|
2121
|
+
"and",
|
|
2122
|
+
"are",
|
|
2123
|
+
"as",
|
|
2124
|
+
"at",
|
|
2125
|
+
"be",
|
|
2126
|
+
"by",
|
|
2127
|
+
"does",
|
|
2128
|
+
"every",
|
|
2129
|
+
"explain",
|
|
2130
|
+
"explains",
|
|
2131
|
+
"for",
|
|
2132
|
+
"how",
|
|
2133
|
+
"in",
|
|
2134
|
+
"is",
|
|
2135
|
+
"it",
|
|
2136
|
+
"of",
|
|
2137
|
+
"on",
|
|
2138
|
+
"or",
|
|
2139
|
+
"say",
|
|
2140
|
+
"says",
|
|
2141
|
+
"should",
|
|
2142
|
+
"stay",
|
|
2143
|
+
"the",
|
|
2144
|
+
"this",
|
|
2145
|
+
"to",
|
|
2146
|
+
"track",
|
|
2147
|
+
"what",
|
|
2148
|
+
"which",
|
|
2149
|
+
"why"
|
|
2150
|
+
]);
|
|
2151
|
+
var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2119
2152
|
var collectMetadataStrings = (value) => {
|
|
2120
2153
|
if (typeof value === "string" || typeof value === "number") {
|
|
2121
2154
|
return [String(value)];
|
|
@@ -2131,6 +2164,7 @@ var collectMetadataStrings = (value) => {
|
|
|
2131
2164
|
var buildRAGLexicalHaystack = (result) => [
|
|
2132
2165
|
result.title,
|
|
2133
2166
|
result.source,
|
|
2167
|
+
typeof result.source === "string" ? result.source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle") : undefined,
|
|
2134
2168
|
result.chunkText,
|
|
2135
2169
|
...collectMetadataStrings(result.metadata)
|
|
2136
2170
|
].filter((value) => Boolean(value)).join(" ");
|
|
@@ -2149,7 +2183,66 @@ var scoreRAGLexicalMatch = (query, result) => {
|
|
|
2149
2183
|
const exactPhraseBoost = haystack.includes(query.toLowerCase()) ? 1 : 0;
|
|
2150
2184
|
const sourceBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2151
2185
|
const coverageBoost = overlap / queryTokens.length;
|
|
2152
|
-
|
|
2186
|
+
const fileKindBoost = resolveFileKindBoost(queryTokens, result.metadata);
|
|
2187
|
+
const transcriptBoost = resolveTranscriptBoost(queryTokens, result.metadata);
|
|
2188
|
+
const archiveBoost = resolveArchiveBoost(queryTokens, result);
|
|
2189
|
+
return coverageBoost + exactPhraseBoost + sourceBoost + fileKindBoost + transcriptBoost + archiveBoost;
|
|
2190
|
+
};
|
|
2191
|
+
var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
|
|
2192
|
+
var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
2193
|
+
const fileKind = typeof metadata?.fileKind === "string" ? metadata.fileKind : "";
|
|
2194
|
+
if (fileKind === "office" && hasAnyToken(queryTokens, [
|
|
2195
|
+
"sheet",
|
|
2196
|
+
"worksheet",
|
|
2197
|
+
"workbook",
|
|
2198
|
+
"spreadsheet"
|
|
2199
|
+
])) {
|
|
2200
|
+
return 0.75;
|
|
2201
|
+
}
|
|
2202
|
+
if (fileKind === "archive" && hasAnyToken(queryTokens, [
|
|
2203
|
+
"archive",
|
|
2204
|
+
"bundle",
|
|
2205
|
+
"entry",
|
|
2206
|
+
"runbook",
|
|
2207
|
+
"recovery"
|
|
2208
|
+
])) {
|
|
2209
|
+
return 0.85;
|
|
2210
|
+
}
|
|
2211
|
+
if (fileKind === "media" && hasAnyToken(queryTokens, [
|
|
2212
|
+
"frontend",
|
|
2213
|
+
"framework",
|
|
2214
|
+
"transcript",
|
|
2215
|
+
"audio",
|
|
2216
|
+
"video"
|
|
2217
|
+
])) {
|
|
2218
|
+
return 0.75;
|
|
2219
|
+
}
|
|
2220
|
+
return 0;
|
|
2221
|
+
};
|
|
2222
|
+
var resolveTranscriptBoost = (queryTokens, metadata) => {
|
|
2223
|
+
const segments = Array.isArray(metadata?.mediaSegments) ? metadata.mediaSegments : [];
|
|
2224
|
+
if (segments.length === 0) {
|
|
2225
|
+
return 0;
|
|
2226
|
+
}
|
|
2227
|
+
const segmentText = segments.map((segment) => segment && typeof segment === "object" && ("text" in segment) ? String(segment.text ?? "") : "").join(" ").toLowerCase();
|
|
2228
|
+
if (segmentText.length === 0) {
|
|
2229
|
+
return 0;
|
|
2230
|
+
}
|
|
2231
|
+
const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
|
|
2232
|
+
return overlap / Math.max(1, queryTokens.length);
|
|
2233
|
+
};
|
|
2234
|
+
var resolveArchiveBoost = (queryTokens, result) => {
|
|
2235
|
+
const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
|
|
2236
|
+
if (!archivePath) {
|
|
2237
|
+
return 0;
|
|
2238
|
+
}
|
|
2239
|
+
if (queryTokens.includes("recovery") && archivePath.includes("recovery")) {
|
|
2240
|
+
return 1;
|
|
2241
|
+
}
|
|
2242
|
+
if (queryTokens.includes("runbook") && archivePath.includes("runbook")) {
|
|
2243
|
+
return 0.8;
|
|
2244
|
+
}
|
|
2245
|
+
return 0;
|
|
2153
2246
|
};
|
|
2154
2247
|
var rankResults = (results) => results.map((result, index) => ({
|
|
2155
2248
|
rank: index + 1,
|
|
@@ -2231,17 +2324,113 @@ var resolveRAGHybridSearchOptions = (retrieval) => {
|
|
|
2231
2324
|
};
|
|
2232
2325
|
|
|
2233
2326
|
// src/ai/rag/queryTransforms.ts
|
|
2234
|
-
var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2327
|
+
var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS2.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
|
|
2328
|
+
var STOP_WORDS2 = new Set([
|
|
2329
|
+
"a",
|
|
2330
|
+
"an",
|
|
2331
|
+
"and",
|
|
2332
|
+
"are",
|
|
2333
|
+
"as",
|
|
2334
|
+
"at",
|
|
2335
|
+
"be",
|
|
2336
|
+
"by",
|
|
2337
|
+
"does",
|
|
2338
|
+
"every",
|
|
2339
|
+
"explain",
|
|
2340
|
+
"explains",
|
|
2341
|
+
"for",
|
|
2342
|
+
"how",
|
|
2343
|
+
"in",
|
|
2344
|
+
"is",
|
|
2345
|
+
"it",
|
|
2346
|
+
"of",
|
|
2347
|
+
"on",
|
|
2348
|
+
"or",
|
|
2349
|
+
"say",
|
|
2350
|
+
"says",
|
|
2351
|
+
"should",
|
|
2352
|
+
"stay",
|
|
2353
|
+
"the",
|
|
2354
|
+
"this",
|
|
2355
|
+
"to",
|
|
2356
|
+
"track",
|
|
2357
|
+
"what",
|
|
2358
|
+
"which",
|
|
2359
|
+
"why"
|
|
2360
|
+
]);
|
|
2235
2361
|
var DOMAIN_EXPANSIONS = {
|
|
2236
|
-
archive: [
|
|
2237
|
-
|
|
2238
|
-
|
|
2362
|
+
archive: [
|
|
2363
|
+
"archive",
|
|
2364
|
+
"zip",
|
|
2365
|
+
"bundle",
|
|
2366
|
+
"entry",
|
|
2367
|
+
"runbook",
|
|
2368
|
+
"recovery",
|
|
2369
|
+
"procedure",
|
|
2370
|
+
"runbooks/recovery"
|
|
2371
|
+
],
|
|
2372
|
+
audio: [
|
|
2373
|
+
"audio",
|
|
2374
|
+
"media",
|
|
2375
|
+
"recording",
|
|
2376
|
+
"speaker",
|
|
2377
|
+
"transcript",
|
|
2378
|
+
"standup",
|
|
2379
|
+
"mp3"
|
|
2380
|
+
],
|
|
2381
|
+
deck: ["slide", "deck", "presentation", "pptx", "roadmap"],
|
|
2239
2382
|
email: ["email", "mail", "thread", "message", "attachment"],
|
|
2240
|
-
|
|
2383
|
+
frontend: [
|
|
2384
|
+
"frontend",
|
|
2385
|
+
"framework",
|
|
2386
|
+
"react",
|
|
2387
|
+
"vue",
|
|
2388
|
+
"svelte",
|
|
2389
|
+
"angular",
|
|
2390
|
+
"html",
|
|
2391
|
+
"htmx"
|
|
2392
|
+
],
|
|
2393
|
+
image: ["image", "ocr", "scan", "screenshot", "receipt"],
|
|
2241
2394
|
pdf: ["pdf", "document", "page", "ocr", "scan"],
|
|
2242
|
-
spreadsheet: [
|
|
2395
|
+
spreadsheet: [
|
|
2396
|
+
"sheet",
|
|
2397
|
+
"worksheet",
|
|
2398
|
+
"workbook",
|
|
2399
|
+
"spreadsheet",
|
|
2400
|
+
"xlsx",
|
|
2401
|
+
"regional",
|
|
2402
|
+
"growth"
|
|
2403
|
+
],
|
|
2243
2404
|
video: ["video", "media", "recording", "transcript", "timestamp"]
|
|
2244
2405
|
};
|
|
2406
|
+
var TERM_EXPANSIONS = {
|
|
2407
|
+
frontend: [
|
|
2408
|
+
"frontend",
|
|
2409
|
+
"framework",
|
|
2410
|
+
"react",
|
|
2411
|
+
"vue",
|
|
2412
|
+
"svelte",
|
|
2413
|
+
"angular",
|
|
2414
|
+
"html",
|
|
2415
|
+
"htmx"
|
|
2416
|
+
],
|
|
2417
|
+
framework: [
|
|
2418
|
+
"frontend",
|
|
2419
|
+
"framework",
|
|
2420
|
+
"react",
|
|
2421
|
+
"vue",
|
|
2422
|
+
"svelte",
|
|
2423
|
+
"angular",
|
|
2424
|
+
"html",
|
|
2425
|
+
"htmx"
|
|
2426
|
+
],
|
|
2427
|
+
procedure: ["recovery", "runbook"],
|
|
2428
|
+
procedur: ["recovery", "runbook"],
|
|
2429
|
+
receipt: ["invoice", "ocr", "pdf"],
|
|
2430
|
+
sheet: ["worksheet", "workbook", "xlsx"],
|
|
2431
|
+
transcript: ["audio", "video", "media"],
|
|
2432
|
+
workbook: ["sheet", "spreadsheet", "xlsx"]
|
|
2433
|
+
};
|
|
2245
2434
|
var detectDomains = (tokens) => {
|
|
2246
2435
|
const tokenSet = new Set(tokens);
|
|
2247
2436
|
const domains = new Set;
|
|
@@ -2264,6 +2453,9 @@ var detectDomains = (tokens) => {
|
|
|
2264
2453
|
if (token === "slide" || token === "deck" || token === "presentation") {
|
|
2265
2454
|
domains.add("deck");
|
|
2266
2455
|
}
|
|
2456
|
+
if (token === "frontend" || token === "framework" || token === "react" || token === "vue" || token === "svelte" || token === "angular" || token === "html" || token === "htmx") {
|
|
2457
|
+
domains.add("frontend");
|
|
2458
|
+
}
|
|
2267
2459
|
if (token === "email" || token === "mail" || token === "thread") {
|
|
2268
2460
|
domains.add("email");
|
|
2269
2461
|
}
|
|
@@ -2284,13 +2476,16 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
|
|
|
2284
2476
|
return { query };
|
|
2285
2477
|
}
|
|
2286
2478
|
const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
|
|
2479
|
+
const tokenExpansions = tokens.flatMap((token) => TERM_EXPANSIONS[token] ?? []);
|
|
2287
2480
|
const rewrittenQuery = uniqueQueryStrings([
|
|
2288
2481
|
...tokens,
|
|
2289
|
-
...expandedTerms
|
|
2482
|
+
...expandedTerms,
|
|
2483
|
+
...tokenExpansions
|
|
2290
2484
|
]).join(" ");
|
|
2291
2485
|
const variants = domains.map((domain) => uniqueQueryStrings([
|
|
2292
2486
|
...tokens,
|
|
2293
|
-
...DOMAIN_EXPANSIONS[domain] ?? []
|
|
2487
|
+
...DOMAIN_EXPANSIONS[domain] ?? [],
|
|
2488
|
+
...tokenExpansions
|
|
2294
2489
|
]).join(" "));
|
|
2295
2490
|
return {
|
|
2296
2491
|
query: rewrittenQuery,
|
|
@@ -2334,7 +2529,40 @@ var applyRAGQueryTransform = async ({
|
|
|
2334
2529
|
};
|
|
2335
2530
|
|
|
2336
2531
|
// src/ai/rag/reranking.ts
|
|
2337
|
-
var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2532
|
+
var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS3.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
|
|
2533
|
+
var STOP_WORDS3 = new Set([
|
|
2534
|
+
"a",
|
|
2535
|
+
"an",
|
|
2536
|
+
"and",
|
|
2537
|
+
"are",
|
|
2538
|
+
"as",
|
|
2539
|
+
"at",
|
|
2540
|
+
"be",
|
|
2541
|
+
"by",
|
|
2542
|
+
"does",
|
|
2543
|
+
"every",
|
|
2544
|
+
"explain",
|
|
2545
|
+
"explains",
|
|
2546
|
+
"for",
|
|
2547
|
+
"how",
|
|
2548
|
+
"in",
|
|
2549
|
+
"is",
|
|
2550
|
+
"it",
|
|
2551
|
+
"of",
|
|
2552
|
+
"on",
|
|
2553
|
+
"or",
|
|
2554
|
+
"say",
|
|
2555
|
+
"says",
|
|
2556
|
+
"should",
|
|
2557
|
+
"stay",
|
|
2558
|
+
"the",
|
|
2559
|
+
"this",
|
|
2560
|
+
"to",
|
|
2561
|
+
"track",
|
|
2562
|
+
"what",
|
|
2563
|
+
"which",
|
|
2564
|
+
"why"
|
|
2565
|
+
]);
|
|
2338
2566
|
var collectMetadataStrings2 = (value) => {
|
|
2339
2567
|
if (typeof value === "string" || typeof value === "number") {
|
|
2340
2568
|
return [String(value)];
|
|
@@ -3649,8 +3877,10 @@ var createRAGCollection = (options) => {
|
|
|
3649
3877
|
const model = input.model ?? options.defaultModel;
|
|
3650
3878
|
const topK = input.topK ?? defaultTopK;
|
|
3651
3879
|
const hasReranker = Boolean(input.rerank ?? options.rerank);
|
|
3652
|
-
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (hasReranker ? topK * defaultCandidateMultiplier : topK)));
|
|
3653
3880
|
const retrieval = resolveRAGHybridSearchOptions(input.retrieval);
|
|
3881
|
+
const hasQueryTransform = Boolean(input.queryTransform ?? options.queryTransform);
|
|
3882
|
+
const shouldExpandCandidates = hasReranker || hasQueryTransform || retrieval.mode !== "vector";
|
|
3883
|
+
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (shouldExpandCandidates ? topK * defaultCandidateMultiplier : topK)));
|
|
3654
3884
|
const transformed = await applyRAGQueryTransform({
|
|
3655
3885
|
input: {
|
|
3656
3886
|
candidateTopK,
|
|
@@ -8370,5 +8600,5 @@ export {
|
|
|
8370
8600
|
aiChat
|
|
8371
8601
|
};
|
|
8372
8602
|
|
|
8373
|
-
//# debugId=
|
|
8603
|
+
//# debugId=8B383E0793D06CEF64756E2164756E21
|
|
8374
8604
|
//# sourceMappingURL=index.js.map
|