@absolutejs/absolute 0.19.0-beta.490 → 0.19.0-beta.491

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2115,7 +2115,40 @@ import { Elysia as Elysia2 } from "elysia";
2115
2115
 
2116
2116
  // src/ai/rag/lexical.ts
2117
2117
  var DEFAULT_FUSION_CONSTANT = 60;
2118
- var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => token.length > 1);
2118
+ var STOP_WORDS = new Set([
2119
+ "a",
2120
+ "an",
2121
+ "and",
2122
+ "are",
2123
+ "as",
2124
+ "at",
2125
+ "be",
2126
+ "by",
2127
+ "does",
2128
+ "every",
2129
+ "explain",
2130
+ "explains",
2131
+ "for",
2132
+ "how",
2133
+ "in",
2134
+ "is",
2135
+ "it",
2136
+ "of",
2137
+ "on",
2138
+ "or",
2139
+ "say",
2140
+ "says",
2141
+ "should",
2142
+ "stay",
2143
+ "the",
2144
+ "this",
2145
+ "to",
2146
+ "track",
2147
+ "what",
2148
+ "which",
2149
+ "why"
2150
+ ]);
2151
+ var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
2119
2152
  var collectMetadataStrings = (value) => {
2120
2153
  if (typeof value === "string" || typeof value === "number") {
2121
2154
  return [String(value)];
@@ -2131,6 +2164,7 @@ var collectMetadataStrings = (value) => {
2131
2164
  var buildRAGLexicalHaystack = (result) => [
2132
2165
  result.title,
2133
2166
  result.source,
2167
+ typeof result.source === "string" ? result.source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle") : undefined,
2134
2168
  result.chunkText,
2135
2169
  ...collectMetadataStrings(result.metadata)
2136
2170
  ].filter((value) => Boolean(value)).join(" ");
@@ -2149,7 +2183,66 @@ var scoreRAGLexicalMatch = (query, result) => {
2149
2183
  const exactPhraseBoost = haystack.includes(query.toLowerCase()) ? 1 : 0;
2150
2184
  const sourceBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
2151
2185
  const coverageBoost = overlap / queryTokens.length;
2152
- return coverageBoost + exactPhraseBoost + sourceBoost;
2186
+ const fileKindBoost = resolveFileKindBoost(queryTokens, result.metadata);
2187
+ const transcriptBoost = resolveTranscriptBoost(queryTokens, result.metadata);
2188
+ const archiveBoost = resolveArchiveBoost(queryTokens, result);
2189
+ return coverageBoost + exactPhraseBoost + sourceBoost + fileKindBoost + transcriptBoost + archiveBoost;
2190
+ };
2191
+ var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
2192
+ var resolveFileKindBoost = (queryTokens, metadata) => {
2193
+ const fileKind = typeof metadata?.fileKind === "string" ? metadata.fileKind : "";
2194
+ if (fileKind === "office" && hasAnyToken(queryTokens, [
2195
+ "sheet",
2196
+ "worksheet",
2197
+ "workbook",
2198
+ "spreadsheet"
2199
+ ])) {
2200
+ return 0.75;
2201
+ }
2202
+ if (fileKind === "archive" && hasAnyToken(queryTokens, [
2203
+ "archive",
2204
+ "bundle",
2205
+ "entry",
2206
+ "runbook",
2207
+ "recovery"
2208
+ ])) {
2209
+ return 0.85;
2210
+ }
2211
+ if (fileKind === "media" && hasAnyToken(queryTokens, [
2212
+ "frontend",
2213
+ "framework",
2214
+ "transcript",
2215
+ "audio",
2216
+ "video"
2217
+ ])) {
2218
+ return 0.75;
2219
+ }
2220
+ return 0;
2221
+ };
2222
+ var resolveTranscriptBoost = (queryTokens, metadata) => {
2223
+ const segments = Array.isArray(metadata?.mediaSegments) ? metadata.mediaSegments : [];
2224
+ if (segments.length === 0) {
2225
+ return 0;
2226
+ }
2227
+ const segmentText = segments.map((segment) => segment && typeof segment === "object" && ("text" in segment) ? String(segment.text ?? "") : "").join(" ").toLowerCase();
2228
+ if (segmentText.length === 0) {
2229
+ return 0;
2230
+ }
2231
+ const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
2232
+ return overlap / Math.max(1, queryTokens.length);
2233
+ };
2234
+ var resolveArchiveBoost = (queryTokens, result) => {
2235
+ const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
2236
+ if (!archivePath) {
2237
+ return 0;
2238
+ }
2239
+ if (queryTokens.includes("recovery") && archivePath.includes("recovery")) {
2240
+ return 1;
2241
+ }
2242
+ if (queryTokens.includes("runbook") && archivePath.includes("runbook")) {
2243
+ return 0.8;
2244
+ }
2245
+ return 0;
2153
2246
  };
2154
2247
  var rankResults = (results) => results.map((result, index) => ({
2155
2248
  rank: index + 1,
@@ -2231,17 +2324,113 @@ var resolveRAGHybridSearchOptions = (retrieval) => {
2231
2324
  };
2232
2325
 
2233
2326
  // src/ai/rag/queryTransforms.ts
2234
- var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
2327
+ var tokenize2 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS2.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
2328
+ var STOP_WORDS2 = new Set([
2329
+ "a",
2330
+ "an",
2331
+ "and",
2332
+ "are",
2333
+ "as",
2334
+ "at",
2335
+ "be",
2336
+ "by",
2337
+ "does",
2338
+ "every",
2339
+ "explain",
2340
+ "explains",
2341
+ "for",
2342
+ "how",
2343
+ "in",
2344
+ "is",
2345
+ "it",
2346
+ "of",
2347
+ "on",
2348
+ "or",
2349
+ "say",
2350
+ "says",
2351
+ "should",
2352
+ "stay",
2353
+ "the",
2354
+ "this",
2355
+ "to",
2356
+ "track",
2357
+ "what",
2358
+ "which",
2359
+ "why"
2360
+ ]);
2235
2361
  var DOMAIN_EXPANSIONS = {
2236
- archive: ["archive", "zip", "bundle", "entry", "runbook", "recovery"],
2237
- audio: ["audio", "media", "recording", "speaker", "transcript"],
2238
- deck: ["slide", "deck", "presentation", "pptx"],
2362
+ archive: [
2363
+ "archive",
2364
+ "zip",
2365
+ "bundle",
2366
+ "entry",
2367
+ "runbook",
2368
+ "recovery",
2369
+ "procedure",
2370
+ "runbooks/recovery"
2371
+ ],
2372
+ audio: [
2373
+ "audio",
2374
+ "media",
2375
+ "recording",
2376
+ "speaker",
2377
+ "transcript",
2378
+ "standup",
2379
+ "mp3"
2380
+ ],
2381
+ deck: ["slide", "deck", "presentation", "pptx", "roadmap"],
2239
2382
  email: ["email", "mail", "thread", "message", "attachment"],
2240
- image: ["image", "ocr", "scan", "screenshot"],
2383
+ frontend: [
2384
+ "frontend",
2385
+ "framework",
2386
+ "react",
2387
+ "vue",
2388
+ "svelte",
2389
+ "angular",
2390
+ "html",
2391
+ "htmx"
2392
+ ],
2393
+ image: ["image", "ocr", "scan", "screenshot", "receipt"],
2241
2394
  pdf: ["pdf", "document", "page", "ocr", "scan"],
2242
- spreadsheet: ["sheet", "worksheet", "workbook", "spreadsheet", "xlsx"],
2395
+ spreadsheet: [
2396
+ "sheet",
2397
+ "worksheet",
2398
+ "workbook",
2399
+ "spreadsheet",
2400
+ "xlsx",
2401
+ "regional",
2402
+ "growth"
2403
+ ],
2243
2404
  video: ["video", "media", "recording", "transcript", "timestamp"]
2244
2405
  };
2406
+ var TERM_EXPANSIONS = {
2407
+ frontend: [
2408
+ "frontend",
2409
+ "framework",
2410
+ "react",
2411
+ "vue",
2412
+ "svelte",
2413
+ "angular",
2414
+ "html",
2415
+ "htmx"
2416
+ ],
2417
+ framework: [
2418
+ "frontend",
2419
+ "framework",
2420
+ "react",
2421
+ "vue",
2422
+ "svelte",
2423
+ "angular",
2424
+ "html",
2425
+ "htmx"
2426
+ ],
2427
+ procedure: ["recovery", "runbook"],
2428
+ procedur: ["recovery", "runbook"],
2429
+ receipt: ["invoice", "ocr", "pdf"],
2430
+ sheet: ["worksheet", "workbook", "xlsx"],
2431
+ transcript: ["audio", "video", "media"],
2432
+ workbook: ["sheet", "spreadsheet", "xlsx"]
2433
+ };
2245
2434
  var detectDomains = (tokens) => {
2246
2435
  const tokenSet = new Set(tokens);
2247
2436
  const domains = new Set;
@@ -2264,6 +2453,9 @@ var detectDomains = (tokens) => {
2264
2453
  if (token === "slide" || token === "deck" || token === "presentation") {
2265
2454
  domains.add("deck");
2266
2455
  }
2456
+ if (token === "frontend" || token === "framework" || token === "react" || token === "vue" || token === "svelte" || token === "angular" || token === "html" || token === "htmx") {
2457
+ domains.add("frontend");
2458
+ }
2267
2459
  if (token === "email" || token === "mail" || token === "thread") {
2268
2460
  domains.add("email");
2269
2461
  }
@@ -2284,13 +2476,16 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
2284
2476
  return { query };
2285
2477
  }
2286
2478
  const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
2479
+ const tokenExpansions = tokens.flatMap((token) => TERM_EXPANSIONS[token] ?? []);
2287
2480
  const rewrittenQuery = uniqueQueryStrings([
2288
2481
  ...tokens,
2289
- ...expandedTerms
2482
+ ...expandedTerms,
2483
+ ...tokenExpansions
2290
2484
  ]).join(" ");
2291
2485
  const variants = domains.map((domain) => uniqueQueryStrings([
2292
2486
  ...tokens,
2293
- ...DOMAIN_EXPANSIONS[domain] ?? []
2487
+ ...DOMAIN_EXPANSIONS[domain] ?? [],
2488
+ ...tokenExpansions
2294
2489
  ]).join(" "));
2295
2490
  return {
2296
2491
  query: rewrittenQuery,
@@ -2334,7 +2529,40 @@ var applyRAGQueryTransform = async ({
2334
2529
  };
2335
2530
 
2336
2531
  // src/ai/rag/reranking.ts
2337
- var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
2532
+ var tokenize3 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS3.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).map((token) => token.endsWith("ck") && token.length > 4 ? token.slice(0, -1) : token).map((token) => token.endsWith("ay") && token.length > 4 ? `${token.slice(0, -2)}i` : token).filter((token) => token.length > 1);
2533
+ var STOP_WORDS3 = new Set([
2534
+ "a",
2535
+ "an",
2536
+ "and",
2537
+ "are",
2538
+ "as",
2539
+ "at",
2540
+ "be",
2541
+ "by",
2542
+ "does",
2543
+ "every",
2544
+ "explain",
2545
+ "explains",
2546
+ "for",
2547
+ "how",
2548
+ "in",
2549
+ "is",
2550
+ "it",
2551
+ "of",
2552
+ "on",
2553
+ "or",
2554
+ "say",
2555
+ "says",
2556
+ "should",
2557
+ "stay",
2558
+ "the",
2559
+ "this",
2560
+ "to",
2561
+ "track",
2562
+ "what",
2563
+ "which",
2564
+ "why"
2565
+ ]);
2338
2566
  var collectMetadataStrings2 = (value) => {
2339
2567
  if (typeof value === "string" || typeof value === "number") {
2340
2568
  return [String(value)];
@@ -8370,5 +8598,5 @@ export {
8370
8598
  aiChat
8371
8599
  };
8372
8600
 
8373
- //# debugId=4FC06DD026EF238A64756E2164756E21
8601
+ //# debugId=F84CC012821C275C64756E2164756E21
8374
8602
  //# sourceMappingURL=index.js.map