@absolutejs/absolute 0.19.0-beta.495 → 0.19.0-beta.497
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +75 -15
- package/dist/ai/index.js.map +5 -5
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2165,6 +2165,31 @@ var collectMetadataStrings = (value) => {
|
|
|
2165
2165
|
};
|
|
2166
2166
|
var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
|
|
2167
2167
|
var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
|
|
2168
|
+
var normalizeLooseText = (value) => value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim().replace(/\s+/g, " ");
|
|
2169
|
+
var scoreLoosePhraseMatch = (query, text) => {
|
|
2170
|
+
const normalizedQuery = normalizeLooseText(query);
|
|
2171
|
+
const normalizedText = normalizeLooseText(text ?? "");
|
|
2172
|
+
if (normalizedQuery.length === 0 || normalizedText.length === 0) {
|
|
2173
|
+
return 0;
|
|
2174
|
+
}
|
|
2175
|
+
if (normalizedText.includes(normalizedQuery)) {
|
|
2176
|
+
return 1;
|
|
2177
|
+
}
|
|
2178
|
+
const words = normalizedQuery.split(" ").filter(Boolean);
|
|
2179
|
+
for (let size = Math.min(5, words.length);size >= 2; size -= 1) {
|
|
2180
|
+
for (let index = 0;index <= words.length - size; index += 1) {
|
|
2181
|
+
const phraseWords = words.slice(index, index + size);
|
|
2182
|
+
if (phraseWords.every((word) => STOP_WORDS.has(word))) {
|
|
2183
|
+
continue;
|
|
2184
|
+
}
|
|
2185
|
+
const phrase = phraseWords.join(" ");
|
|
2186
|
+
if (normalizedText.includes(phrase)) {
|
|
2187
|
+
return Math.min(1, size / 4);
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
}
|
|
2191
|
+
return 0;
|
|
2192
|
+
};
|
|
2168
2193
|
var scoreTokenCoverage = (queryTokens, text) => {
|
|
2169
2194
|
const normalizedText = (text ?? "").toLowerCase();
|
|
2170
2195
|
if (normalizedText.length === 0) {
|
|
@@ -2181,10 +2206,8 @@ var scoreTokenCoverage = (queryTokens, text) => {
|
|
|
2181
2206
|
var scorePhraseMatch = (query, text) => {
|
|
2182
2207
|
const normalizedQuery = tokenize(query).join(" ");
|
|
2183
2208
|
const normalizedText = tokenize(text ?? "").join(" ");
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
}
|
|
2187
|
-
return normalizedText.includes(normalizedQuery) ? 1 : 0;
|
|
2209
|
+
const tokenPhraseMatch = normalizedQuery.length > 0 && normalizedText.length > 0 ? normalizedText.includes(normalizedQuery) ? 1 : 0 : 0;
|
|
2210
|
+
return Math.max(tokenPhraseMatch, scoreLoosePhraseMatch(query, text ?? ""));
|
|
2188
2211
|
};
|
|
2189
2212
|
var scoreWeightedField = ({
|
|
2190
2213
|
coverageWeight,
|
|
@@ -2199,8 +2222,10 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2199
2222
|
const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
|
|
2200
2223
|
const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
|
|
2201
2224
|
const metadataFocus = [
|
|
2225
|
+
metadata.sourceNativeKind,
|
|
2202
2226
|
metadata.sheetName,
|
|
2203
2227
|
metadata.sheetNames,
|
|
2228
|
+
metadata.slideNumber,
|
|
2204
2229
|
metadata.slideTitle,
|
|
2205
2230
|
metadata.slideTitles,
|
|
2206
2231
|
metadata.threadTopic,
|
|
@@ -2772,7 +2797,36 @@ var collectMetadataStrings2 = (value) => {
|
|
|
2772
2797
|
}
|
|
2773
2798
|
return [];
|
|
2774
2799
|
};
|
|
2775
|
-
var
|
|
2800
|
+
var normalizeLooseText2 = (value) => value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim().replace(/\s+/g, " ");
|
|
2801
|
+
var scoreLoosePhraseMatch2 = (query, text) => {
|
|
2802
|
+
const normalizedQuery = normalizeLooseText2(query);
|
|
2803
|
+
const normalizedText = normalizeLooseText2(text);
|
|
2804
|
+
if (normalizedQuery.length === 0 || normalizedText.length === 0) {
|
|
2805
|
+
return 0;
|
|
2806
|
+
}
|
|
2807
|
+
if (normalizedText.includes(normalizedQuery)) {
|
|
2808
|
+
return 1;
|
|
2809
|
+
}
|
|
2810
|
+
const words = normalizedQuery.split(" ").filter(Boolean);
|
|
2811
|
+
for (let size = Math.min(5, words.length);size >= 2; size -= 1) {
|
|
2812
|
+
for (let index = 0;index <= words.length - size; index += 1) {
|
|
2813
|
+
const phraseWords = words.slice(index, index + size);
|
|
2814
|
+
if (phraseWords.every((word) => STOP_WORDS3.has(word))) {
|
|
2815
|
+
continue;
|
|
2816
|
+
}
|
|
2817
|
+
const phrase = phraseWords.join(" ");
|
|
2818
|
+
if (normalizedText.includes(phrase)) {
|
|
2819
|
+
return Math.min(1, size / 4);
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2822
|
+
}
|
|
2823
|
+
return 0;
|
|
2824
|
+
};
|
|
2825
|
+
var scoreHeuristicMatch = ({
|
|
2826
|
+
query,
|
|
2827
|
+
queryTokens,
|
|
2828
|
+
result
|
|
2829
|
+
}) => {
|
|
2776
2830
|
if (queryTokens.length === 0) {
|
|
2777
2831
|
return result.score;
|
|
2778
2832
|
}
|
|
@@ -2781,8 +2835,7 @@ var scoreHeuristicMatch = (queryTokens, result) => {
|
|
|
2781
2835
|
const haystackSet = new Set(haystack);
|
|
2782
2836
|
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2783
2837
|
const overlapBoost = overlap / queryTokens.length;
|
|
2784
|
-
const
|
|
2785
|
-
const exactPhraseBoost = normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(normalizedQuery) ? 1 : 0;
|
|
2838
|
+
const exactPhraseBoost = Math.max(normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(queryTokens.join(" ")) ? 1 : 0, scoreLoosePhraseMatch2(query, [result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")));
|
|
2786
2839
|
const sourcePathBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
|
|
2787
2840
|
const metadataBoost = metadataValues.length > 0 ? queryTokens.filter((token) => metadataValues.some((value) => value.toLowerCase().includes(token))).length / queryTokens.length : 0;
|
|
2788
2841
|
return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost;
|
|
@@ -2810,7 +2863,11 @@ var createHeuristicRAGReranker = (options = {}) => createRAGReranker({
|
|
|
2810
2863
|
return [...results].map((result, index) => ({
|
|
2811
2864
|
index,
|
|
2812
2865
|
result,
|
|
2813
|
-
score: scoreHeuristicMatch(
|
|
2866
|
+
score: scoreHeuristicMatch({
|
|
2867
|
+
query,
|
|
2868
|
+
queryTokens,
|
|
2869
|
+
result
|
|
2870
|
+
})
|
|
2814
2871
|
})).sort((left, right) => {
|
|
2815
2872
|
if (right.score !== left.score) {
|
|
2816
2873
|
return right.score - left.score;
|
|
@@ -3506,12 +3563,13 @@ var createOfficeDocumentExtractor = () => ({
|
|
|
3506
3563
|
...input.metadata ?? {},
|
|
3507
3564
|
fileKind: "office",
|
|
3508
3565
|
...officeMetadata,
|
|
3566
|
+
sourceNativeKind: "spreadsheet_sheet",
|
|
3509
3567
|
sheetIndex: index,
|
|
3510
3568
|
sheetName: sheet.name
|
|
3511
3569
|
},
|
|
3512
3570
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
|
|
3513
|
-
text: `
|
|
3514
|
-
${sheet.text}
|
|
3571
|
+
text: normalizeWhitespace(`Spreadsheet sheet ${sheet.name} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
|
|
3572
|
+
${sheet.text}`),
|
|
3515
3573
|
title: input.title ? `${input.title} \xB7 ${sheet.name}` : sheet.name
|
|
3516
3574
|
}));
|
|
3517
3575
|
} else if (extension === ".pptx" || extension === ".odp") {
|
|
@@ -3528,12 +3586,13 @@ ${sheet.text}`,
|
|
|
3528
3586
|
...input.metadata ?? {},
|
|
3529
3587
|
fileKind: "office",
|
|
3530
3588
|
...officeMetadata,
|
|
3589
|
+
sourceNativeKind: "presentation_slide",
|
|
3531
3590
|
slideIndex: slide.index,
|
|
3532
3591
|
slideNumber: slide.index + 1
|
|
3533
3592
|
},
|
|
3534
3593
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
|
|
3535
|
-
text: `
|
|
3536
|
-
${slide.text}
|
|
3594
|
+
text: normalizeWhitespace(`Presentation slide ${slide.index + 1} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
|
|
3595
|
+
${slide.text}`),
|
|
3537
3596
|
title: input.title ? `${input.title} \xB7 Slide ${slide.index + 1}` : `Slide ${slide.index + 1}`
|
|
3538
3597
|
}));
|
|
3539
3598
|
}
|
|
@@ -3600,6 +3659,7 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3600
3659
|
...input.metadata ?? {},
|
|
3601
3660
|
...result.metadata ?? {},
|
|
3602
3661
|
fileKind: "media",
|
|
3662
|
+
sourceNativeKind: "media_segment",
|
|
3603
3663
|
mediaSegmentIndex: index,
|
|
3604
3664
|
mediaSegmentStartMs: startMs,
|
|
3605
3665
|
mediaSegmentEndMs: endMs,
|
|
@@ -3607,8 +3667,8 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3607
3667
|
speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
|
|
3608
3668
|
},
|
|
3609
3669
|
source,
|
|
3610
|
-
text: `
|
|
3611
|
-
${text}
|
|
3670
|
+
text: normalizeWhitespace(`Media transcript segment${typeof startMs === "number" ? ` ${startMs}-${endMs ?? startMs}ms` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
|
|
3671
|
+
${text}`),
|
|
3612
3672
|
title: input.title ? `${input.title} \xB7 Segment ${index + 1}` : `Segment ${index + 1}`
|
|
3613
3673
|
});
|
|
3614
3674
|
}
|
|
@@ -8895,5 +8955,5 @@ export {
|
|
|
8895
8955
|
aiChat
|
|
8896
8956
|
};
|
|
8897
8957
|
|
|
8898
|
-
//# debugId=
|
|
8958
|
+
//# debugId=55FD05298CEAFBDB64756E2164756E21
|
|
8899
8959
|
//# sourceMappingURL=index.js.map
|