@absolutejs/absolute 0.19.0-beta.491 → 0.19.0-beta.493
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +116 -11
- package/dist/ai/index.js.map +4 -4
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2161,10 +2161,66 @@ var collectMetadataStrings = (value) => {
|
|
|
2161
2161
|
}
|
|
2162
2162
|
return [];
|
|
2163
2163
|
};
|
|
2164
|
+
var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
|
|
2165
|
+
var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
|
|
2166
|
+
var scoreTokenCoverage = (queryTokens, text) => {
|
|
2167
|
+
const normalizedText = text.toLowerCase();
|
|
2168
|
+
if (normalizedText.length === 0) {
|
|
2169
|
+
return 0;
|
|
2170
|
+
}
|
|
2171
|
+
const tokens = tokenize(normalizedText);
|
|
2172
|
+
if (tokens.length === 0) {
|
|
2173
|
+
return 0;
|
|
2174
|
+
}
|
|
2175
|
+
const tokenSet = new Set(tokens);
|
|
2176
|
+
const overlap = queryTokens.filter((token) => tokenSet.has(token)).length;
|
|
2177
|
+
return overlap / Math.max(1, queryTokens.length);
|
|
2178
|
+
};
|
|
2179
|
+
var scorePhraseMatch = (query, text) => {
|
|
2180
|
+
const normalizedQuery = tokenize(query).join(" ");
|
|
2181
|
+
const normalizedText = tokenize(text).join(" ");
|
|
2182
|
+
if (normalizedQuery.length === 0 || normalizedText.length === 0) {
|
|
2183
|
+
return 0;
|
|
2184
|
+
}
|
|
2185
|
+
return normalizedText.includes(normalizedQuery) ? 1 : 0;
|
|
2186
|
+
};
|
|
2187
|
+
var scoreWeightedField = ({
|
|
2188
|
+
coverageWeight,
|
|
2189
|
+
phraseWeight,
|
|
2190
|
+
query,
|
|
2191
|
+
queryTokens,
|
|
2192
|
+
text
|
|
2193
|
+
}) => scoreTokenCoverage(queryTokens, text) * coverageWeight + scorePhraseMatch(query, text) * phraseWeight;
|
|
2194
|
+
var extractWeightedLexicalFields = (result) => {
|
|
2195
|
+
const metadata = result.metadata ?? {};
|
|
2196
|
+
const source = result.source ?? "";
|
|
2197
|
+
const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
|
|
2198
|
+
const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
|
|
2199
|
+
const metadataFocus = [
|
|
2200
|
+
metadata.sheetName,
|
|
2201
|
+
metadata.sheetNames,
|
|
2202
|
+
metadata.slideTitle,
|
|
2203
|
+
metadata.slideTitles,
|
|
2204
|
+
metadata.threadTopic,
|
|
2205
|
+
metadata.speaker,
|
|
2206
|
+
metadata.fileKind,
|
|
2207
|
+
metadata.transcriptSource,
|
|
2208
|
+
metadata.archiveType
|
|
2209
|
+
].flatMap((value) => collectMetadataStrings(value)).join(" ");
|
|
2210
|
+
return {
|
|
2211
|
+
archivePath,
|
|
2212
|
+
chunkText: result.chunkText,
|
|
2213
|
+
mediaSegments,
|
|
2214
|
+
metadataFocus,
|
|
2215
|
+
metadataText: toFieldText(metadata),
|
|
2216
|
+
source: source ? normalizeSourceForLexical(source) : "",
|
|
2217
|
+
title: result.title ?? ""
|
|
2218
|
+
};
|
|
2219
|
+
};
|
|
2164
2220
|
var buildRAGLexicalHaystack = (result) => [
|
|
2165
2221
|
result.title,
|
|
2166
2222
|
result.source,
|
|
2167
|
-
typeof result.source === "string" ? result.source
|
|
2223
|
+
typeof result.source === "string" ? normalizeSourceForLexical(result.source) : undefined,
|
|
2168
2224
|
result.chunkText,
|
|
2169
2225
|
...collectMetadataStrings(result.metadata)
|
|
2170
2226
|
].filter((value) => Boolean(value)).join(" ");
|
|
@@ -2173,20 +2229,67 @@ var scoreRAGLexicalMatch = (query, result) => {
|
|
|
2173
2229
|
if (queryTokens.length === 0) {
|
|
2174
2230
|
return 0;
|
|
2175
2231
|
}
|
|
2232
|
+
const fields = extractWeightedLexicalFields(result);
|
|
2176
2233
|
const haystack = buildRAGLexicalHaystack(result).toLowerCase();
|
|
2177
|
-
const
|
|
2178
|
-
|
|
2179
|
-
const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
|
|
2180
|
-
if (overlap === 0) {
|
|
2234
|
+
const overallCoverage = scoreTokenCoverage(queryTokens, haystack);
|
|
2235
|
+
if (overallCoverage === 0) {
|
|
2181
2236
|
return 0;
|
|
2182
2237
|
}
|
|
2183
|
-
const
|
|
2184
|
-
|
|
2185
|
-
|
|
2238
|
+
const titleScore = scoreWeightedField({
|
|
2239
|
+
coverageWeight: 1.8,
|
|
2240
|
+
phraseWeight: 1.2,
|
|
2241
|
+
query,
|
|
2242
|
+
queryTokens,
|
|
2243
|
+
text: fields.title
|
|
2244
|
+
});
|
|
2245
|
+
const sourceScore = scoreWeightedField({
|
|
2246
|
+
coverageWeight: 2.6,
|
|
2247
|
+
phraseWeight: 1.4,
|
|
2248
|
+
query,
|
|
2249
|
+
queryTokens,
|
|
2250
|
+
text: fields.source
|
|
2251
|
+
});
|
|
2252
|
+
const metadataFocusScore = scoreWeightedField({
|
|
2253
|
+
coverageWeight: 2.8,
|
|
2254
|
+
phraseWeight: 1.6,
|
|
2255
|
+
query,
|
|
2256
|
+
queryTokens,
|
|
2257
|
+
text: fields.metadataFocus
|
|
2258
|
+
});
|
|
2259
|
+
const archivePathScore = scoreWeightedField({
|
|
2260
|
+
coverageWeight: 3.2,
|
|
2261
|
+
phraseWeight: 2.2,
|
|
2262
|
+
query,
|
|
2263
|
+
queryTokens,
|
|
2264
|
+
text: fields.archivePath
|
|
2265
|
+
});
|
|
2266
|
+
const mediaSegmentScore = scoreWeightedField({
|
|
2267
|
+
coverageWeight: 3,
|
|
2268
|
+
phraseWeight: 1.8,
|
|
2269
|
+
query,
|
|
2270
|
+
queryTokens,
|
|
2271
|
+
text: fields.mediaSegments
|
|
2272
|
+
});
|
|
2273
|
+
const metadataScore = scoreWeightedField({
|
|
2274
|
+
coverageWeight: 1.2,
|
|
2275
|
+
phraseWeight: 0.8,
|
|
2276
|
+
query,
|
|
2277
|
+
queryTokens,
|
|
2278
|
+
text: fields.metadataText
|
|
2279
|
+
});
|
|
2280
|
+
const chunkScore = scoreWeightedField({
|
|
2281
|
+
coverageWeight: 0.9,
|
|
2282
|
+
phraseWeight: 0.6,
|
|
2283
|
+
query,
|
|
2284
|
+
queryTokens,
|
|
2285
|
+
text: fields.chunkText
|
|
2286
|
+
});
|
|
2287
|
+
const exactPhraseBoost = scorePhraseMatch(query, haystack);
|
|
2288
|
+
const coverageBoost = overallCoverage;
|
|
2186
2289
|
const fileKindBoost = resolveFileKindBoost(queryTokens, result.metadata);
|
|
2187
2290
|
const transcriptBoost = resolveTranscriptBoost(queryTokens, result.metadata);
|
|
2188
2291
|
const archiveBoost = resolveArchiveBoost(queryTokens, result);
|
|
2189
|
-
return
|
|
2292
|
+
return titleScore + sourceScore + metadataFocusScore + archivePathScore + mediaSegmentScore + metadataScore + chunkScore + coverageBoost + exactPhraseBoost + fileKindBoost + transcriptBoost + archiveBoost;
|
|
2190
2293
|
};
|
|
2191
2294
|
var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
|
|
2192
2295
|
var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
@@ -3877,8 +3980,10 @@ var createRAGCollection = (options) => {
|
|
|
3877
3980
|
const model = input.model ?? options.defaultModel;
|
|
3878
3981
|
const topK = input.topK ?? defaultTopK;
|
|
3879
3982
|
const hasReranker = Boolean(input.rerank ?? options.rerank);
|
|
3880
|
-
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (hasReranker ? topK * defaultCandidateMultiplier : topK)));
|
|
3881
3983
|
const retrieval = resolveRAGHybridSearchOptions(input.retrieval);
|
|
3984
|
+
const hasQueryTransform = Boolean(input.queryTransform ?? options.queryTransform);
|
|
3985
|
+
const shouldExpandCandidates = hasReranker || hasQueryTransform || retrieval.mode !== "vector";
|
|
3986
|
+
const candidateTopK = Math.max(topK, Math.floor(input.candidateTopK ?? (shouldExpandCandidates ? topK * defaultCandidateMultiplier : topK)));
|
|
3882
3987
|
const transformed = await applyRAGQueryTransform({
|
|
3883
3988
|
input: {
|
|
3884
3989
|
candidateTopK,
|
|
@@ -8598,5 +8703,5 @@ export {
|
|
|
8598
8703
|
aiChat
|
|
8599
8704
|
};
|
|
8600
8705
|
|
|
8601
|
-
//# debugId=
|
|
8706
|
+
//# debugId=36F6407CE8163A4F64756E2164756E21
|
|
8602
8707
|
//# sourceMappingURL=index.js.map
|