@absolutejs/absolute 0.19.0-beta.493 → 0.19.0-beta.495

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2149,6 +2149,8 @@ var STOP_WORDS = new Set([
2149
2149
  "why"
2150
2150
  ]);
2151
2151
  var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
2152
+ var BM25_K1 = 1.2;
2153
+ var BM25_B = 0.75;
2152
2154
  var collectMetadataStrings = (value) => {
2153
2155
  if (typeof value === "string" || typeof value === "number") {
2154
2156
  return [String(value)];
@@ -2164,7 +2166,7 @@ var collectMetadataStrings = (value) => {
2164
2166
  var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
2165
2167
  var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
2166
2168
  var scoreTokenCoverage = (queryTokens, text) => {
2167
- const normalizedText = text.toLowerCase();
2169
+ const normalizedText = (text ?? "").toLowerCase();
2168
2170
  if (normalizedText.length === 0) {
2169
2171
  return 0;
2170
2172
  }
@@ -2178,7 +2180,7 @@ var scoreTokenCoverage = (queryTokens, text) => {
2178
2180
  };
2179
2181
  var scorePhraseMatch = (query, text) => {
2180
2182
  const normalizedQuery = tokenize(query).join(" ");
2181
- const normalizedText = tokenize(text).join(" ");
2183
+ const normalizedText = tokenize(text ?? "").join(" ");
2182
2184
  if (normalizedQuery.length === 0 || normalizedText.length === 0) {
2183
2185
  return 0;
2184
2186
  }
@@ -2190,7 +2192,7 @@ var scoreWeightedField = ({
2190
2192
  query,
2191
2193
  queryTokens,
2192
2194
  text
2193
- }) => scoreTokenCoverage(queryTokens, text) * coverageWeight + scorePhraseMatch(query, text) * phraseWeight;
2195
+ }) => scoreTokenCoverage(queryTokens, text ?? "") * coverageWeight + scorePhraseMatch(query, text ?? "") * phraseWeight;
2194
2196
  var extractWeightedLexicalFields = (result) => {
2195
2197
  const metadata = result.metadata ?? {};
2196
2198
  const source = result.source ?? "";
@@ -2209,7 +2211,7 @@ var extractWeightedLexicalFields = (result) => {
2209
2211
  ].flatMap((value) => collectMetadataStrings(value)).join(" ");
2210
2212
  return {
2211
2213
  archivePath,
2212
- chunkText: result.chunkText,
2214
+ chunkText: result.text,
2213
2215
  mediaSegments,
2214
2216
  metadataFocus,
2215
2217
  metadataText: toFieldText(metadata),
@@ -2217,6 +2219,34 @@ var extractWeightedLexicalFields = (result) => {
2217
2219
  title: result.title ?? ""
2218
2220
  };
2219
2221
  };
2222
+ var FIELD_WEIGHTS = {
2223
+ archivePath: 4.2,
2224
+ chunkText: 1,
2225
+ mediaSegments: 3.8,
2226
+ metadataFocus: 3.2,
2227
+ metadataText: 1.4,
2228
+ source: 3.4,
2229
+ title: 2
2230
+ };
2231
+ var getWeightedFieldTokens = (result) => {
2232
+ const fields = extractWeightedLexicalFields({
2233
+ metadata: result.metadata,
2234
+ source: result.source,
2235
+ text: result.text,
2236
+ title: result.title
2237
+ });
2238
+ return {
2239
+ archivePath: tokenize(fields.archivePath ?? ""),
2240
+ chunkText: tokenize(fields.chunkText ?? ""),
2241
+ mediaSegments: tokenize(fields.mediaSegments ?? ""),
2242
+ metadataFocus: tokenize(fields.metadataFocus ?? ""),
2243
+ metadataText: tokenize(fields.metadataText ?? ""),
2244
+ source: tokenize(fields.source ?? ""),
2245
+ title: tokenize(fields.title ?? "")
2246
+ };
2247
+ };
2248
+ var countWeightedTermFrequency = (fieldTokens, token) => Object.keys(FIELD_WEIGHTS).reduce((total, fieldName) => total + fieldTokens[fieldName].filter((value) => value === token).length * FIELD_WEIGHTS[fieldName], 0);
2249
+ var computeWeightedDocumentLength = (fieldTokens) => Object.keys(FIELD_WEIGHTS).reduce((total, fieldName) => total + fieldTokens[fieldName].length * FIELD_WEIGHTS[fieldName], 0);
2220
2250
  var buildRAGLexicalHaystack = (result) => [
2221
2251
  result.title,
2222
2252
  result.source,
@@ -2229,7 +2259,12 @@ var scoreRAGLexicalMatch = (query, result) => {
2229
2259
  if (queryTokens.length === 0) {
2230
2260
  return 0;
2231
2261
  }
2232
- const fields = extractWeightedLexicalFields(result);
2262
+ const fields = extractWeightedLexicalFields({
2263
+ metadata: result.metadata,
2264
+ source: result.source,
2265
+ text: result.chunkText,
2266
+ title: result.title
2267
+ });
2233
2268
  const haystack = buildRAGLexicalHaystack(result).toLowerCase();
2234
2269
  const overallCoverage = scoreTokenCoverage(queryTokens, haystack);
2235
2270
  if (overallCoverage === 0) {
@@ -2291,6 +2326,65 @@ var scoreRAGLexicalMatch = (query, result) => {
2291
2326
  const archiveBoost = resolveArchiveBoost(queryTokens, result);
2292
2327
  return titleScore + sourceScore + metadataFocusScore + archivePathScore + mediaSegmentScore + metadataScore + chunkScore + coverageBoost + exactPhraseBoost + fileKindBoost + transcriptBoost + archiveBoost;
2293
2328
  };
2329
+ var rankRAGLexicalMatches = (query, results) => {
2330
+ const queryTokens = tokenize(query);
2331
+ if (queryTokens.length === 0 || results.length === 0) {
2332
+ return [];
2333
+ }
2334
+ const candidates = results.map((result) => {
2335
+ const fieldTokens = getWeightedFieldTokens(result);
2336
+ return {
2337
+ fieldTokens,
2338
+ length: computeWeightedDocumentLength(fieldTokens),
2339
+ result
2340
+ };
2341
+ });
2342
+ const averageDocumentLength = candidates.reduce((total, candidate) => total + candidate.length, 0) / Math.max(1, candidates.length);
2343
+ const uniqueQueryTokens = [...new Set(queryTokens)];
2344
+ const documentFrequency = new Map;
2345
+ for (const token of uniqueQueryTokens) {
2346
+ let seen = 0;
2347
+ for (const candidate of candidates) {
2348
+ const tf = countWeightedTermFrequency(candidate.fieldTokens, token);
2349
+ if (tf > 0) {
2350
+ seen += 1;
2351
+ }
2352
+ }
2353
+ documentFrequency.set(token, seen);
2354
+ }
2355
+ return candidates.map((candidate, index) => {
2356
+ let bm25Score = 0;
2357
+ for (const token of uniqueQueryTokens) {
2358
+ const termFrequency = countWeightedTermFrequency(candidate.fieldTokens, token);
2359
+ if (termFrequency <= 0) {
2360
+ continue;
2361
+ }
2362
+ const df = documentFrequency.get(token) ?? 0;
2363
+ const idf = Math.log(1 + (candidates.length - df + 0.5) / (df + 0.5));
2364
+ const denominator = termFrequency + BM25_K1 * (1 - BM25_B + BM25_B * (candidate.length / Math.max(1, averageDocumentLength)));
2365
+ bm25Score += idf * (termFrequency * (BM25_K1 + 1) / Math.max(0.000000001, denominator));
2366
+ }
2367
+ const heuristicScore = scoreRAGLexicalMatch(query, {
2368
+ chunkText: candidate.result.text,
2369
+ metadata: candidate.result.metadata,
2370
+ source: candidate.result.source,
2371
+ title: candidate.result.title
2372
+ });
2373
+ return {
2374
+ index,
2375
+ result: candidate.result,
2376
+ score: bm25Score + heuristicScore * 0.35
2377
+ };
2378
+ }).filter((entry) => entry.score > 0).sort((left, right) => {
2379
+ if (right.score !== left.score) {
2380
+ return right.score - left.score;
2381
+ }
2382
+ return left.index - right.index;
2383
+ }).map(({ result, score }) => ({
2384
+ result,
2385
+ score
2386
+ }));
2387
+ };
2294
2388
  var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
2295
2389
  var resolveFileKindBoost = (queryTokens, metadata) => {
2296
2390
  const fileKind = typeof metadata?.fileKind === "string" ? metadata.fileKind : "";
@@ -3138,6 +3232,30 @@ var spreadsheetText = (entries) => {
3138
3232
  return normalizeWhitespace(sheetValues.join(`
3139
3233
  `));
3140
3234
  };
3235
+ var spreadsheetSheetTexts = (entries) => {
3236
+ const sharedStrings = entries.filter((entry) => entry.path === "xl/sharedStrings.xml").flatMap((entry) => [
3237
+ ...decodeUtf8(entry.data).matchAll(/<t[^>]*>([\s\S]*?)<\/t>/g)
3238
+ ].map((match) => decodeHtmlEntities(match[1] ?? "")));
3239
+ const sheetNames = spreadsheetSheetNames(entries);
3240
+ const sheetEntries = entries.filter((entry) => entry.path.startsWith("xl/worksheets/") && entry.path.endsWith(".xml")).sort((left, right) => left.path.localeCompare(right.path));
3241
+ return sheetEntries.map((entry, index) => {
3242
+ const values = [
3243
+ ...decodeUtf8(entry.data).matchAll(/<v>([\s\S]*?)<\/v>/g)
3244
+ ].map((match) => match[1] ?? "").map((value) => {
3245
+ const sharedStringIndex = Number(value);
3246
+ return Number.isInteger(sharedStringIndex) && sharedStrings[sharedStringIndex] ? sharedStrings[sharedStringIndex] : value;
3247
+ });
3248
+ const text = normalizeWhitespace(values.join(`
3249
+ `));
3250
+ if (!text) {
3251
+ return null;
3252
+ }
3253
+ return {
3254
+ name: sheetNames[index] ?? `Sheet ${index + 1}`,
3255
+ text
3256
+ };
3257
+ }).filter((entry) => Boolean(entry));
3258
+ };
3141
3259
  var spreadsheetSheetNames = (entries) => entries.filter((entry) => entry.path === "xl/workbook.xml").flatMap((entry) => [
3142
3260
  ...decodeUtf8(entry.data).matchAll(/<sheet[^>]*name="([^"]+)"/g)
3143
3261
  ].map((match) => match[1] ?? "")).filter(Boolean);
@@ -3147,6 +3265,10 @@ var presentationText = (entries) => {
3147
3265
 
3148
3266
  `));
3149
3267
  };
3268
+ var presentationSlides = (entries) => entries.filter((entry) => entry.path.startsWith("ppt/slides/") && entry.path.endsWith(".xml")).sort((left, right) => left.path.localeCompare(right.path)).map((entry, index) => ({
3269
+ index,
3270
+ text: normalizeWhitespace(extractXmlText(decodeUtf8(entry.data)))
3271
+ })).filter((slide) => Boolean(slide.text));
3150
3272
  var presentationSlideCount = (entries) => entries.filter((entry) => entry.path.startsWith("ppt/slides/") && entry.path.endsWith(".xml")).length;
3151
3273
  var epubText = (entries) => {
3152
3274
  const htmlEntries = entries.filter((entry) => /\.(xhtml|html|htm)$/i.test(entry.path));
@@ -3364,6 +3486,7 @@ var createOfficeDocumentExtractor = () => ({
3364
3486
  const entries = unzipEntries(input.data);
3365
3487
  let text = "";
3366
3488
  let officeMetadata = {};
3489
+ let structuredDocuments = [];
3367
3490
  if (extension === ".docx" || extension === ".odt") {
3368
3491
  text = officeDocumentText(entries);
3369
3492
  officeMetadata = {
@@ -3371,19 +3494,53 @@ var createOfficeDocumentExtractor = () => ({
3371
3494
  };
3372
3495
  } else if (extension === ".xlsx" || extension === ".ods") {
3373
3496
  text = spreadsheetText(entries);
3497
+ const sheets = spreadsheetSheetTexts(entries);
3374
3498
  officeMetadata = {
3375
3499
  sheetNames: spreadsheetSheetNames(entries)
3376
3500
  };
3501
+ structuredDocuments = sheets.map((sheet, index) => ({
3502
+ chunking: input.chunking,
3503
+ contentType: input.contentType,
3504
+ format: "text",
3505
+ metadata: {
3506
+ ...input.metadata ?? {},
3507
+ fileKind: "office",
3508
+ ...officeMetadata,
3509
+ sheetIndex: index,
3510
+ sheetName: sheet.name
3511
+ },
3512
+ source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
3513
+ text: `Sheet ${sheet.name}
3514
+ ${sheet.text}`,
3515
+ title: input.title ? `${input.title} \xB7 ${sheet.name}` : sheet.name
3516
+ }));
3377
3517
  } else if (extension === ".pptx" || extension === ".odp") {
3378
3518
  text = presentationText(entries);
3519
+ const slides = presentationSlides(entries);
3379
3520
  officeMetadata = {
3380
3521
  slideCount: presentationSlideCount(entries)
3381
3522
  };
3523
+ structuredDocuments = slides.map((slide) => ({
3524
+ chunking: input.chunking,
3525
+ contentType: input.contentType,
3526
+ format: "text",
3527
+ metadata: {
3528
+ ...input.metadata ?? {},
3529
+ fileKind: "office",
3530
+ ...officeMetadata,
3531
+ slideIndex: slide.index,
3532
+ slideNumber: slide.index + 1
3533
+ },
3534
+ source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
3535
+ text: `Slide ${slide.index + 1}
3536
+ ${slide.text}`,
3537
+ title: input.title ? `${input.title} \xB7 Slide ${slide.index + 1}` : `Slide ${slide.index + 1}`
3538
+ }));
3382
3539
  }
3383
3540
  if (!text) {
3384
3541
  throw new Error(`AbsoluteJS could not extract readable text from ${inferNameFromInput(input)}`);
3385
3542
  }
3386
- return {
3543
+ const summaryDocument = {
3387
3544
  chunking: input.chunking,
3388
3545
  contentType: input.contentType,
3389
3546
  format: "text",
@@ -3396,6 +3553,7 @@ var createOfficeDocumentExtractor = () => ({
3396
3553
  text,
3397
3554
  title: input.title
3398
3555
  };
3556
+ return [summaryDocument, ...structuredDocuments];
3399
3557
  }
3400
3558
  });
3401
3559
  var createRAGArchiveExpander = (expander) => expander;
@@ -3425,7 +3583,36 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3425
3583
  supports: mediaExtractorSupports,
3426
3584
  extract: async (input) => {
3427
3585
  const result = await transcriber.transcribe(input);
3428
- return {
3586
+ const source = input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.media.txt`;
3587
+ const segmentDocuments = [];
3588
+ for (const [index, segment] of (result.segments ?? []).entries()) {
3589
+ const text = normalizeWhitespace(segment.text ?? "");
3590
+ if (!text) {
3591
+ continue;
3592
+ }
3593
+ const startMs = typeof segment.startMs === "number" ? segment.startMs : undefined;
3594
+ const endMs = typeof segment.endMs === "number" ? segment.endMs : undefined;
3595
+ segmentDocuments.push({
3596
+ chunking: input.chunking,
3597
+ contentType: input.contentType,
3598
+ format: "text",
3599
+ metadata: {
3600
+ ...input.metadata ?? {},
3601
+ ...result.metadata ?? {},
3602
+ fileKind: "media",
3603
+ mediaSegmentIndex: index,
3604
+ mediaSegmentStartMs: startMs,
3605
+ mediaSegmentEndMs: endMs,
3606
+ mediaSegments: [segment],
3607
+ speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
3608
+ },
3609
+ source,
3610
+ text: `Transcript segment${typeof startMs === "number" ? ` ${startMs}-${endMs ?? startMs}ms` : ""}
3611
+ ${text}`,
3612
+ title: input.title ? `${input.title} \xB7 Segment ${index + 1}` : `Segment ${index + 1}`
3613
+ });
3614
+ }
3615
+ const summaryDocument = {
3429
3616
  chunking: input.chunking,
3430
3617
  contentType: input.contentType,
3431
3618
  format: "text",
@@ -3435,10 +3622,11 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3435
3622
  fileKind: "media",
3436
3623
  mediaSegments: result.segments
3437
3624
  },
3438
- source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.media.txt`,
3625
+ source,
3439
3626
  text: result.text,
3440
3627
  title: result.title ?? input.title
3441
3628
  };
3629
+ return [summaryDocument, ...segmentDocuments];
3442
3630
  }
3443
3631
  });
3444
3632
  var createRAGMediaTranscriber = (transcriber) => transcriber;
@@ -3470,7 +3658,7 @@ var expandArchiveEntry = async (entry, archiveInput, extractors) => {
3470
3658
  },
3471
3659
  name: basename(entry.path),
3472
3660
  source: archiveInput.source && !archiveInput.source.startsWith("http") ? `${archiveInput.source}#${entry.path}` : entry.path,
3473
- title: archiveInput.title
3661
+ title: basename(entry.path)
3474
3662
  }, extractors);
3475
3663
  return documents;
3476
3664
  };
@@ -3599,6 +3787,7 @@ var getFirstExtractedDocument = (documents, label) => {
3599
3787
  }
3600
3788
  return document;
3601
3789
  };
3790
+ var loadExtractedDocuments = async (input, extractors) => extractRAGFileDocuments(input, extractors);
3602
3791
  var sentenceUnits = (text) => {
3603
3792
  const matches = text.match(/[^.!?\n]+(?:[.!?]+|$)/g);
3604
3793
  if (!matches) {
@@ -3821,32 +4010,55 @@ var loadRAGDocumentFromURL = async (input) => {
3821
4010
  };
3822
4011
  var loadRAGDocumentsFromUploads = async (input) => {
3823
4012
  const documents = await Promise.all(input.uploads.map(async (upload) => {
3824
- const loaded = await loadRAGDocumentUpload({
3825
- ...upload,
3826
- extractors: input.extractors
3827
- });
3828
- return {
3829
- ...loaded,
3830
- metadata: mergeMetadata(loaded.metadata, { uploadFile: upload.name }, input.baseMetadata)
3831
- };
4013
+ const loaded = await loadExtractedDocuments({
4014
+ chunking: upload.chunking,
4015
+ contentType: upload.contentType,
4016
+ data: decodeUploadContent(upload),
4017
+ format: upload.format,
4018
+ metadata: upload.metadata,
4019
+ name: upload.name,
4020
+ source: upload.source ?? upload.name,
4021
+ title: upload.title
4022
+ }, input.extractors);
4023
+ return loaded.map((document) => ({
4024
+ ...document,
4025
+ metadata: mergeMetadata(document.metadata, { uploadFile: upload.name }, input.baseMetadata)
4026
+ }));
3832
4027
  }));
3833
4028
  return {
3834
4029
  defaultChunking: input.defaultChunking,
3835
- documents
4030
+ documents: documents.flat()
3836
4031
  };
3837
4032
  };
3838
4033
  var loadRAGDocumentsFromURLs = async (input) => {
3839
- const documents = await Promise.all(input.urls.map(async (urlInput) => loadRAGDocumentFromURL({
3840
- ...urlInput,
3841
- metadata: mergeMetadata(urlInput.metadata, {
3842
- sourceUrl: urlInput.url
3843
- }, input.baseMetadata),
3844
- contentType: urlInput.contentType,
3845
- extractors: urlInput.extractors ?? input.extractors
3846
- })));
4034
+ const documents = await Promise.all(input.urls.map(async (urlInput) => {
4035
+ const url = urlInput.url.trim();
4036
+ if (!url) {
4037
+ throw new Error("RAG URL is required");
4038
+ }
4039
+ const response = await fetch(url);
4040
+ if (!response.ok) {
4041
+ throw new Error(`Failed to fetch RAG URL ${url}: ${response.status} ${response.statusText}`);
4042
+ }
4043
+ const data = new Uint8Array(await response.arrayBuffer());
4044
+ const loaded = await loadExtractedDocuments({
4045
+ chunking: urlInput.chunking,
4046
+ contentType: urlInput.contentType ?? response.headers.get("content-type") ?? undefined,
4047
+ data,
4048
+ format: urlInput.format ?? inferFormatFromUrl(url),
4049
+ metadata: urlInput.metadata,
4050
+ name: basename(new URL(url).pathname),
4051
+ source: urlInput.source ?? url,
4052
+ title: urlInput.title
4053
+ }, urlInput.extractors ?? input.extractors);
4054
+ return loaded.map((document) => ({
4055
+ ...document,
4056
+ metadata: mergeMetadata(document.metadata, { sourceUrl: urlInput.url }, input.baseMetadata)
4057
+ }));
4058
+ }));
3847
4059
  return {
3848
4060
  defaultChunking: input.defaultChunking,
3849
- documents
4061
+ documents: documents.flat()
3850
4062
  };
3851
4063
  };
3852
4064
  var loadRAGDocumentUpload = async (input) => {
@@ -3926,21 +4138,25 @@ var loadRAGDocumentsFromDirectory = async (input) => {
3926
4138
  const files = await collectDirectoryFiles(root, input.recursive !== false, includeExtensions);
3927
4139
  const documents = await Promise.all(files.map(async (path) => {
3928
4140
  const source = relative(root, path).replace(/\\/g, "/");
3929
- const loaded = await loadRAGDocumentFile({
4141
+ const data = await readFile(path);
4142
+ const loaded = await loadExtractedDocuments({
4143
+ chunking: input.defaultChunking,
4144
+ data,
3930
4145
  metadata: {
3931
- ...input.baseMetadata ?? {},
3932
4146
  fileName: basename(path),
3933
4147
  relativePath: source
3934
4148
  },
3935
4149
  path,
3936
- source,
3937
- extractors: input.extractors
3938
- });
3939
- return loaded;
4150
+ source
4151
+ }, input.extractors);
4152
+ return loaded.map((document) => ({
4153
+ ...document,
4154
+ metadata: mergeMetadata(document.metadata, undefined, input.baseMetadata)
4155
+ }));
3940
4156
  }));
3941
4157
  return {
3942
4158
  defaultChunking: input.defaultChunking,
3943
- documents
4159
+ documents: documents.flat()
3944
4160
  };
3945
4161
  };
3946
4162
  var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await loadRAGDocumentsFromDirectory(input));
@@ -6860,27 +7076,15 @@ var createInMemoryRAGStore = (options = {}) => {
6860
7076
  }));
6861
7077
  };
6862
7078
  const queryLexical = async (input) => {
6863
- const results = chunks.map((chunk) => ({
6864
- chunk,
6865
- score: scoreRAGLexicalMatch(input.query, {
6866
- chunkText: chunk.text,
6867
- metadata: chunk.metadata,
6868
- source: chunk.source,
6869
- title: chunk.title
6870
- })
6871
- })).filter(({ chunk }) => matchesFilter(chunk, input.filter)).filter(({ score }) => score > 0).sort((left, right) => {
6872
- if (right.score !== left.score) {
6873
- return right.score - left.score;
6874
- }
6875
- return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
6876
- });
6877
- return results.slice(0, input.topK).map((entry) => ({
6878
- chunkId: entry.chunk.chunkId,
6879
- chunkText: entry.chunk.text,
6880
- metadata: entry.chunk.metadata,
6881
- score: entry.score,
6882
- source: entry.chunk.source,
6883
- title: entry.chunk.title
7079
+ const filtered = chunks.filter((chunk) => matchesFilter(chunk, input.filter));
7080
+ const ranked = rankRAGLexicalMatches(input.query, filtered);
7081
+ return ranked.slice(0, input.topK).map(({ result, score }) => ({
7082
+ chunkId: result.chunkId,
7083
+ chunkText: result.text,
7084
+ metadata: result.metadata,
7085
+ score,
7086
+ source: result.source,
7087
+ title: result.title
6884
7088
  }));
6885
7089
  };
6886
7090
  const upsert = async (input) => {
@@ -7511,27 +7715,15 @@ var createSQLiteRAGStore = (options = {}) => {
7511
7715
  };
7512
7716
  const queryLexical = async (input) => {
7513
7717
  const rawRows = toStoredRows(jsonStatements.query.all());
7514
- const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter)).map((chunk) => ({
7515
- chunk,
7516
- score: scoreRAGLexicalMatch(input.query, {
7517
- chunkText: chunk.text,
7518
- metadata: chunk.metadata,
7519
- source: chunk.source,
7520
- title: chunk.title
7521
- })
7522
- })).filter(({ score }) => score > 0).sort((left, right) => {
7523
- if (right.score !== left.score) {
7524
- return right.score - left.score;
7525
- }
7526
- return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
7527
- });
7528
- return chunks.slice(0, input.topK).map(({ chunk, score }) => ({
7529
- chunkId: chunk.chunkId,
7530
- chunkText: chunk.text,
7531
- metadata: chunk.metadata,
7718
+ const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter));
7719
+ const ranked = rankRAGLexicalMatches(input.query, chunks);
7720
+ return ranked.slice(0, input.topK).map(({ result, score }) => ({
7721
+ chunkId: result.chunkId,
7722
+ chunkText: result.text,
7723
+ metadata: result.metadata,
7532
7724
  score,
7533
- source: chunk.source,
7534
- title: chunk.title
7725
+ source: result.source,
7726
+ title: result.title
7535
7727
  }));
7536
7728
  };
7537
7729
  const upsert = async (input) => {
@@ -8703,5 +8895,5 @@ export {
8703
8895
  aiChat
8704
8896
  };
8705
8897
 
8706
- //# debugId=36F6407CE8163A4F64756E2164756E21
8898
+ //# debugId=A1829EEFE0D80F9264756E2164756E21
8707
8899
  //# sourceMappingURL=index.js.map