@absolutejs/absolute 0.19.0-beta.495 → 0.19.0-beta.497

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2165,6 +2165,31 @@ var collectMetadataStrings = (value) => {
2165
2165
  };
2166
2166
  var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
2167
2167
  var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
2168
+ var normalizeLooseText = (value) => value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim().replace(/\s+/g, " ");
2169
+ var scoreLoosePhraseMatch = (query, text) => {
2170
+ const normalizedQuery = normalizeLooseText(query);
2171
+ const normalizedText = normalizeLooseText(text ?? "");
2172
+ if (normalizedQuery.length === 0 || normalizedText.length === 0) {
2173
+ return 0;
2174
+ }
2175
+ if (normalizedText.includes(normalizedQuery)) {
2176
+ return 1;
2177
+ }
2178
+ const words = normalizedQuery.split(" ").filter(Boolean);
2179
+ for (let size = Math.min(5, words.length);size >= 2; size -= 1) {
2180
+ for (let index = 0;index <= words.length - size; index += 1) {
2181
+ const phraseWords = words.slice(index, index + size);
2182
+ if (phraseWords.every((word) => STOP_WORDS.has(word))) {
2183
+ continue;
2184
+ }
2185
+ const phrase = phraseWords.join(" ");
2186
+ if (normalizedText.includes(phrase)) {
2187
+ return Math.min(1, size / 4);
2188
+ }
2189
+ }
2190
+ }
2191
+ return 0;
2192
+ };
2168
2193
  var scoreTokenCoverage = (queryTokens, text) => {
2169
2194
  const normalizedText = (text ?? "").toLowerCase();
2170
2195
  if (normalizedText.length === 0) {
@@ -2181,10 +2206,8 @@ var scoreTokenCoverage = (queryTokens, text) => {
2181
2206
  var scorePhraseMatch = (query, text) => {
2182
2207
  const normalizedQuery = tokenize(query).join(" ");
2183
2208
  const normalizedText = tokenize(text ?? "").join(" ");
2184
- if (normalizedQuery.length === 0 || normalizedText.length === 0) {
2185
- return 0;
2186
- }
2187
- return normalizedText.includes(normalizedQuery) ? 1 : 0;
2209
+ const tokenPhraseMatch = normalizedQuery.length > 0 && normalizedText.length > 0 ? normalizedText.includes(normalizedQuery) ? 1 : 0 : 0;
2210
+ return Math.max(tokenPhraseMatch, scoreLoosePhraseMatch(query, text ?? ""));
2188
2211
  };
2189
2212
  var scoreWeightedField = ({
2190
2213
  coverageWeight,
@@ -2199,8 +2222,10 @@ var extractWeightedLexicalFields = (result) => {
2199
2222
  const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
2200
2223
  const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
2201
2224
  const metadataFocus = [
2225
+ metadata.sourceNativeKind,
2202
2226
  metadata.sheetName,
2203
2227
  metadata.sheetNames,
2228
+ metadata.slideNumber,
2204
2229
  metadata.slideTitle,
2205
2230
  metadata.slideTitles,
2206
2231
  metadata.threadTopic,
@@ -2772,7 +2797,36 @@ var collectMetadataStrings2 = (value) => {
2772
2797
  }
2773
2798
  return [];
2774
2799
  };
2775
- var scoreHeuristicMatch = (queryTokens, result) => {
2800
+ var normalizeLooseText2 = (value) => value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim().replace(/\s+/g, " ");
2801
+ var scoreLoosePhraseMatch2 = (query, text) => {
2802
+ const normalizedQuery = normalizeLooseText2(query);
2803
+ const normalizedText = normalizeLooseText2(text);
2804
+ if (normalizedQuery.length === 0 || normalizedText.length === 0) {
2805
+ return 0;
2806
+ }
2807
+ if (normalizedText.includes(normalizedQuery)) {
2808
+ return 1;
2809
+ }
2810
+ const words = normalizedQuery.split(" ").filter(Boolean);
2811
+ for (let size = Math.min(5, words.length);size >= 2; size -= 1) {
2812
+ for (let index = 0;index <= words.length - size; index += 1) {
2813
+ const phraseWords = words.slice(index, index + size);
2814
+ if (phraseWords.every((word) => STOP_WORDS3.has(word))) {
2815
+ continue;
2816
+ }
2817
+ const phrase = phraseWords.join(" ");
2818
+ if (normalizedText.includes(phrase)) {
2819
+ return Math.min(1, size / 4);
2820
+ }
2821
+ }
2822
+ }
2823
+ return 0;
2824
+ };
2825
+ var scoreHeuristicMatch = ({
2826
+ query,
2827
+ queryTokens,
2828
+ result
2829
+ }) => {
2776
2830
  if (queryTokens.length === 0) {
2777
2831
  return result.score;
2778
2832
  }
@@ -2781,8 +2835,7 @@ var scoreHeuristicMatch = (queryTokens, result) => {
2781
2835
  const haystackSet = new Set(haystack);
2782
2836
  const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
2783
2837
  const overlapBoost = overlap / queryTokens.length;
2784
- const normalizedQuery = queryTokens.join(" ");
2785
- const exactPhraseBoost = normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(normalizedQuery) ? 1 : 0;
2838
+ const exactPhraseBoost = Math.max(normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(queryTokens.join(" ")) ? 1 : 0, scoreLoosePhraseMatch2(query, [result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")));
2786
2839
  const sourcePathBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
2787
2840
  const metadataBoost = metadataValues.length > 0 ? queryTokens.filter((token) => metadataValues.some((value) => value.toLowerCase().includes(token))).length / queryTokens.length : 0;
2788
2841
  return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost;
@@ -2810,7 +2863,11 @@ var createHeuristicRAGReranker = (options = {}) => createRAGReranker({
2810
2863
  return [...results].map((result, index) => ({
2811
2864
  index,
2812
2865
  result,
2813
- score: scoreHeuristicMatch(queryTokens, result)
2866
+ score: scoreHeuristicMatch({
2867
+ query,
2868
+ queryTokens,
2869
+ result
2870
+ })
2814
2871
  })).sort((left, right) => {
2815
2872
  if (right.score !== left.score) {
2816
2873
  return right.score - left.score;
@@ -3506,12 +3563,13 @@ var createOfficeDocumentExtractor = () => ({
3506
3563
  ...input.metadata ?? {},
3507
3564
  fileKind: "office",
3508
3565
  ...officeMetadata,
3566
+ sourceNativeKind: "spreadsheet_sheet",
3509
3567
  sheetIndex: index,
3510
3568
  sheetName: sheet.name
3511
3569
  },
3512
3570
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
3513
- text: `Sheet ${sheet.name}
3514
- ${sheet.text}`,
3571
+ text: normalizeWhitespace(`Spreadsheet sheet ${sheet.name} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
3572
+ ${sheet.text}`),
3515
3573
  title: input.title ? `${input.title} \xB7 ${sheet.name}` : sheet.name
3516
3574
  }));
3517
3575
  } else if (extension === ".pptx" || extension === ".odp") {
@@ -3528,12 +3586,13 @@ ${sheet.text}`,
3528
3586
  ...input.metadata ?? {},
3529
3587
  fileKind: "office",
3530
3588
  ...officeMetadata,
3589
+ sourceNativeKind: "presentation_slide",
3531
3590
  slideIndex: slide.index,
3532
3591
  slideNumber: slide.index + 1
3533
3592
  },
3534
3593
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
3535
- text: `Slide ${slide.index + 1}
3536
- ${slide.text}`,
3594
+ text: normalizeWhitespace(`Presentation slide ${slide.index + 1} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
3595
+ ${slide.text}`),
3537
3596
  title: input.title ? `${input.title} \xB7 Slide ${slide.index + 1}` : `Slide ${slide.index + 1}`
3538
3597
  }));
3539
3598
  }
@@ -3600,6 +3659,7 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3600
3659
  ...input.metadata ?? {},
3601
3660
  ...result.metadata ?? {},
3602
3661
  fileKind: "media",
3662
+ sourceNativeKind: "media_segment",
3603
3663
  mediaSegmentIndex: index,
3604
3664
  mediaSegmentStartMs: startMs,
3605
3665
  mediaSegmentEndMs: endMs,
@@ -3607,8 +3667,8 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3607
3667
  speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
3608
3668
  },
3609
3669
  source,
3610
- text: `Transcript segment${typeof startMs === "number" ? ` ${startMs}-${endMs ?? startMs}ms` : ""}
3611
- ${text}`,
3670
+ text: normalizeWhitespace(`Media transcript segment${typeof startMs === "number" ? ` ${startMs}-${endMs ?? startMs}ms` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
3671
+ ${text}`),
3612
3672
  title: input.title ? `${input.title} \xB7 Segment ${index + 1}` : `Segment ${index + 1}`
3613
3673
  });
3614
3674
  }
@@ -8895,5 +8955,5 @@ export {
8895
8955
  aiChat
8896
8956
  };
8897
8957
 
8898
- //# debugId=A1829EEFE0D80F9264756E2164756E21
8958
+ //# debugId=55FD05298CEAFBDB64756E2164756E21
8899
8959
  //# sourceMappingURL=index.js.map