@absolutejs/absolute 0.19.0-beta.493 → 0.19.0-beta.494
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +117 -47
- package/dist/ai/index.js.map +5 -5
- package/dist/angular/index.js +2 -2
- package/dist/angular/index.js.map +1 -1
- package/dist/angular/server.js +2 -2
- package/dist/angular/server.js.map +1 -1
- package/dist/build.js +2 -2
- package/dist/build.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/src/ai/rag/lexical.d.ts +12 -0
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2149,6 +2149,8 @@ var STOP_WORDS = new Set([
|
|
|
2149
2149
|
"why"
|
|
2150
2150
|
]);
|
|
2151
2151
|
var tokenize = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => !STOP_WORDS.has(token)).map((token) => token.endsWith("ies") && token.length > 3 ? `${token.slice(0, -3)}y` : token.endsWith("ing") && token.length > 5 ? token.slice(0, -3) : token.endsWith("ed") && token.length > 4 ? token.slice(0, -2) : token.endsWith("es") && token.length > 4 ? token.slice(0, -2) : token.endsWith("s") && token.length > 3 ? token.slice(0, -1) : token).filter((token) => token.length > 1);
|
|
2152
|
+
var BM25_K1 = 1.2;
|
|
2153
|
+
var BM25_B = 0.75;
|
|
2152
2154
|
var collectMetadataStrings = (value) => {
|
|
2153
2155
|
if (typeof value === "string" || typeof value === "number") {
|
|
2154
2156
|
return [String(value)];
|
|
@@ -2164,7 +2166,7 @@ var collectMetadataStrings = (value) => {
|
|
|
2164
2166
|
var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
|
|
2165
2167
|
var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
|
|
2166
2168
|
var scoreTokenCoverage = (queryTokens, text) => {
|
|
2167
|
-
const normalizedText = text.toLowerCase();
|
|
2169
|
+
const normalizedText = (text ?? "").toLowerCase();
|
|
2168
2170
|
if (normalizedText.length === 0) {
|
|
2169
2171
|
return 0;
|
|
2170
2172
|
}
|
|
@@ -2178,7 +2180,7 @@ var scoreTokenCoverage = (queryTokens, text) => {
|
|
|
2178
2180
|
};
|
|
2179
2181
|
var scorePhraseMatch = (query, text) => {
|
|
2180
2182
|
const normalizedQuery = tokenize(query).join(" ");
|
|
2181
|
-
const normalizedText = tokenize(text).join(" ");
|
|
2183
|
+
const normalizedText = tokenize(text ?? "").join(" ");
|
|
2182
2184
|
if (normalizedQuery.length === 0 || normalizedText.length === 0) {
|
|
2183
2185
|
return 0;
|
|
2184
2186
|
}
|
|
@@ -2190,7 +2192,7 @@ var scoreWeightedField = ({
|
|
|
2190
2192
|
query,
|
|
2191
2193
|
queryTokens,
|
|
2192
2194
|
text
|
|
2193
|
-
}) => scoreTokenCoverage(queryTokens, text) * coverageWeight + scorePhraseMatch(query, text) * phraseWeight;
|
|
2195
|
+
}) => scoreTokenCoverage(queryTokens, text ?? "") * coverageWeight + scorePhraseMatch(query, text ?? "") * phraseWeight;
|
|
2194
2196
|
var extractWeightedLexicalFields = (result) => {
|
|
2195
2197
|
const metadata = result.metadata ?? {};
|
|
2196
2198
|
const source = result.source ?? "";
|
|
@@ -2209,7 +2211,7 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2209
2211
|
].flatMap((value) => collectMetadataStrings(value)).join(" ");
|
|
2210
2212
|
return {
|
|
2211
2213
|
archivePath,
|
|
2212
|
-
chunkText: result.
|
|
2214
|
+
chunkText: result.text,
|
|
2213
2215
|
mediaSegments,
|
|
2214
2216
|
metadataFocus,
|
|
2215
2217
|
metadataText: toFieldText(metadata),
|
|
@@ -2217,6 +2219,34 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2217
2219
|
title: result.title ?? ""
|
|
2218
2220
|
};
|
|
2219
2221
|
};
|
|
2222
|
+
var FIELD_WEIGHTS = {
|
|
2223
|
+
archivePath: 4.2,
|
|
2224
|
+
chunkText: 1,
|
|
2225
|
+
mediaSegments: 3.8,
|
|
2226
|
+
metadataFocus: 3.2,
|
|
2227
|
+
metadataText: 1.4,
|
|
2228
|
+
source: 3.4,
|
|
2229
|
+
title: 2
|
|
2230
|
+
};
|
|
2231
|
+
var getWeightedFieldTokens = (result) => {
|
|
2232
|
+
const fields = extractWeightedLexicalFields({
|
|
2233
|
+
metadata: result.metadata,
|
|
2234
|
+
source: result.source,
|
|
2235
|
+
text: result.text,
|
|
2236
|
+
title: result.title
|
|
2237
|
+
});
|
|
2238
|
+
return {
|
|
2239
|
+
archivePath: tokenize(fields.archivePath ?? ""),
|
|
2240
|
+
chunkText: tokenize(fields.chunkText ?? ""),
|
|
2241
|
+
mediaSegments: tokenize(fields.mediaSegments ?? ""),
|
|
2242
|
+
metadataFocus: tokenize(fields.metadataFocus ?? ""),
|
|
2243
|
+
metadataText: tokenize(fields.metadataText ?? ""),
|
|
2244
|
+
source: tokenize(fields.source ?? ""),
|
|
2245
|
+
title: tokenize(fields.title ?? "")
|
|
2246
|
+
};
|
|
2247
|
+
};
|
|
2248
|
+
var countWeightedTermFrequency = (fieldTokens, token) => Object.keys(FIELD_WEIGHTS).reduce((total, fieldName) => total + fieldTokens[fieldName].filter((value) => value === token).length * FIELD_WEIGHTS[fieldName], 0);
|
|
2249
|
+
var computeWeightedDocumentLength = (fieldTokens) => Object.keys(FIELD_WEIGHTS).reduce((total, fieldName) => total + fieldTokens[fieldName].length * FIELD_WEIGHTS[fieldName], 0);
|
|
2220
2250
|
var buildRAGLexicalHaystack = (result) => [
|
|
2221
2251
|
result.title,
|
|
2222
2252
|
result.source,
|
|
@@ -2229,7 +2259,12 @@ var scoreRAGLexicalMatch = (query, result) => {
|
|
|
2229
2259
|
if (queryTokens.length === 0) {
|
|
2230
2260
|
return 0;
|
|
2231
2261
|
}
|
|
2232
|
-
const fields = extractWeightedLexicalFields(
|
|
2262
|
+
const fields = extractWeightedLexicalFields({
|
|
2263
|
+
metadata: result.metadata,
|
|
2264
|
+
source: result.source,
|
|
2265
|
+
text: result.chunkText,
|
|
2266
|
+
title: result.title
|
|
2267
|
+
});
|
|
2233
2268
|
const haystack = buildRAGLexicalHaystack(result).toLowerCase();
|
|
2234
2269
|
const overallCoverage = scoreTokenCoverage(queryTokens, haystack);
|
|
2235
2270
|
if (overallCoverage === 0) {
|
|
@@ -2291,6 +2326,65 @@ var scoreRAGLexicalMatch = (query, result) => {
|
|
|
2291
2326
|
const archiveBoost = resolveArchiveBoost(queryTokens, result);
|
|
2292
2327
|
return titleScore + sourceScore + metadataFocusScore + archivePathScore + mediaSegmentScore + metadataScore + chunkScore + coverageBoost + exactPhraseBoost + fileKindBoost + transcriptBoost + archiveBoost;
|
|
2293
2328
|
};
|
|
2329
|
+
var rankRAGLexicalMatches = (query, results) => {
|
|
2330
|
+
const queryTokens = tokenize(query);
|
|
2331
|
+
if (queryTokens.length === 0 || results.length === 0) {
|
|
2332
|
+
return [];
|
|
2333
|
+
}
|
|
2334
|
+
const candidates = results.map((result) => {
|
|
2335
|
+
const fieldTokens = getWeightedFieldTokens(result);
|
|
2336
|
+
return {
|
|
2337
|
+
fieldTokens,
|
|
2338
|
+
length: computeWeightedDocumentLength(fieldTokens),
|
|
2339
|
+
result
|
|
2340
|
+
};
|
|
2341
|
+
});
|
|
2342
|
+
const averageDocumentLength = candidates.reduce((total, candidate) => total + candidate.length, 0) / Math.max(1, candidates.length);
|
|
2343
|
+
const uniqueQueryTokens = [...new Set(queryTokens)];
|
|
2344
|
+
const documentFrequency = new Map;
|
|
2345
|
+
for (const token of uniqueQueryTokens) {
|
|
2346
|
+
let seen = 0;
|
|
2347
|
+
for (const candidate of candidates) {
|
|
2348
|
+
const tf = countWeightedTermFrequency(candidate.fieldTokens, token);
|
|
2349
|
+
if (tf > 0) {
|
|
2350
|
+
seen += 1;
|
|
2351
|
+
}
|
|
2352
|
+
}
|
|
2353
|
+
documentFrequency.set(token, seen);
|
|
2354
|
+
}
|
|
2355
|
+
return candidates.map((candidate, index) => {
|
|
2356
|
+
let bm25Score = 0;
|
|
2357
|
+
for (const token of uniqueQueryTokens) {
|
|
2358
|
+
const termFrequency = countWeightedTermFrequency(candidate.fieldTokens, token);
|
|
2359
|
+
if (termFrequency <= 0) {
|
|
2360
|
+
continue;
|
|
2361
|
+
}
|
|
2362
|
+
const df = documentFrequency.get(token) ?? 0;
|
|
2363
|
+
const idf = Math.log(1 + (candidates.length - df + 0.5) / (df + 0.5));
|
|
2364
|
+
const denominator = termFrequency + BM25_K1 * (1 - BM25_B + BM25_B * (candidate.length / Math.max(1, averageDocumentLength)));
|
|
2365
|
+
bm25Score += idf * (termFrequency * (BM25_K1 + 1) / Math.max(0.000000001, denominator));
|
|
2366
|
+
}
|
|
2367
|
+
const heuristicScore = scoreRAGLexicalMatch(query, {
|
|
2368
|
+
chunkText: candidate.result.text,
|
|
2369
|
+
metadata: candidate.result.metadata,
|
|
2370
|
+
source: candidate.result.source,
|
|
2371
|
+
title: candidate.result.title
|
|
2372
|
+
});
|
|
2373
|
+
return {
|
|
2374
|
+
index,
|
|
2375
|
+
result: candidate.result,
|
|
2376
|
+
score: bm25Score + heuristicScore * 0.35
|
|
2377
|
+
};
|
|
2378
|
+
}).filter((entry) => entry.score > 0).sort((left, right) => {
|
|
2379
|
+
if (right.score !== left.score) {
|
|
2380
|
+
return right.score - left.score;
|
|
2381
|
+
}
|
|
2382
|
+
return left.index - right.index;
|
|
2383
|
+
}).map(({ result, score }) => ({
|
|
2384
|
+
result,
|
|
2385
|
+
score
|
|
2386
|
+
}));
|
|
2387
|
+
};
|
|
2294
2388
|
var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
|
|
2295
2389
|
var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
2296
2390
|
const fileKind = typeof metadata?.fileKind === "string" ? metadata.fileKind : "";
|
|
@@ -6860,27 +6954,15 @@ var createInMemoryRAGStore = (options = {}) => {
|
|
|
6860
6954
|
}));
|
|
6861
6955
|
};
|
|
6862
6956
|
const queryLexical = async (input) => {
|
|
6863
|
-
const
|
|
6864
|
-
|
|
6865
|
-
|
|
6866
|
-
|
|
6867
|
-
|
|
6868
|
-
|
|
6869
|
-
|
|
6870
|
-
|
|
6871
|
-
|
|
6872
|
-
if (right.score !== left.score) {
|
|
6873
|
-
return right.score - left.score;
|
|
6874
|
-
}
|
|
6875
|
-
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
6876
|
-
});
|
|
6877
|
-
return results.slice(0, input.topK).map((entry) => ({
|
|
6878
|
-
chunkId: entry.chunk.chunkId,
|
|
6879
|
-
chunkText: entry.chunk.text,
|
|
6880
|
-
metadata: entry.chunk.metadata,
|
|
6881
|
-
score: entry.score,
|
|
6882
|
-
source: entry.chunk.source,
|
|
6883
|
-
title: entry.chunk.title
|
|
6957
|
+
const filtered = chunks.filter((chunk) => matchesFilter(chunk, input.filter));
|
|
6958
|
+
const ranked = rankRAGLexicalMatches(input.query, filtered);
|
|
6959
|
+
return ranked.slice(0, input.topK).map(({ result, score }) => ({
|
|
6960
|
+
chunkId: result.chunkId,
|
|
6961
|
+
chunkText: result.text,
|
|
6962
|
+
metadata: result.metadata,
|
|
6963
|
+
score,
|
|
6964
|
+
source: result.source,
|
|
6965
|
+
title: result.title
|
|
6884
6966
|
}));
|
|
6885
6967
|
};
|
|
6886
6968
|
const upsert = async (input) => {
|
|
@@ -7511,27 +7593,15 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
7511
7593
|
};
|
|
7512
7594
|
const queryLexical = async (input) => {
|
|
7513
7595
|
const rawRows = toStoredRows(jsonStatements.query.all());
|
|
7514
|
-
const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter))
|
|
7515
|
-
|
|
7516
|
-
|
|
7517
|
-
|
|
7518
|
-
|
|
7519
|
-
|
|
7520
|
-
title: chunk.title
|
|
7521
|
-
})
|
|
7522
|
-
})).filter(({ score }) => score > 0).sort((left, right) => {
|
|
7523
|
-
if (right.score !== left.score) {
|
|
7524
|
-
return right.score - left.score;
|
|
7525
|
-
}
|
|
7526
|
-
return left.chunk.chunkId.localeCompare(right.chunk.chunkId);
|
|
7527
|
-
});
|
|
7528
|
-
return chunks.slice(0, input.topK).map(({ chunk, score }) => ({
|
|
7529
|
-
chunkId: chunk.chunkId,
|
|
7530
|
-
chunkText: chunk.text,
|
|
7531
|
-
metadata: chunk.metadata,
|
|
7596
|
+
const chunks = mapFilterToRows(rawRows).filter((chunk) => matchesFilter(chunk, input.filter));
|
|
7597
|
+
const ranked = rankRAGLexicalMatches(input.query, chunks);
|
|
7598
|
+
return ranked.slice(0, input.topK).map(({ result, score }) => ({
|
|
7599
|
+
chunkId: result.chunkId,
|
|
7600
|
+
chunkText: result.text,
|
|
7601
|
+
metadata: result.metadata,
|
|
7532
7602
|
score,
|
|
7533
|
-
source:
|
|
7534
|
-
title:
|
|
7603
|
+
source: result.source,
|
|
7604
|
+
title: result.title
|
|
7535
7605
|
}));
|
|
7536
7606
|
};
|
|
7537
7607
|
const upsert = async (input) => {
|
|
@@ -8703,5 +8773,5 @@ export {
|
|
|
8703
8773
|
aiChat
|
|
8704
8774
|
};
|
|
8705
8775
|
|
|
8706
|
-
//# debugId=
|
|
8776
|
+
//# debugId=F37A373F20F3691864756E2164756E21
|
|
8707
8777
|
//# sourceMappingURL=index.js.map
|