@absolutejs/rag 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/rag/index.js
CHANGED
|
@@ -13262,6 +13262,7 @@ var validateRAGEmbeddingDimensions = (vector, expectedDimensions, context) => {
|
|
|
13262
13262
|
import { readdir, readFile as readFile2 } from "fs/promises";
|
|
13263
13263
|
import { basename, extname, join, relative, resolve } from "path";
|
|
13264
13264
|
import { gunzipSync, inflateRawSync } from "zlib";
|
|
13265
|
+
var h2IfHttps = (url) => url.startsWith("https://") ? { protocol: "http2" } : {};
|
|
13265
13266
|
var DEFAULT_MAX_CHUNK_LENGTH = 900;
|
|
13266
13267
|
var DEFAULT_CHUNK_OVERLAP = 120;
|
|
13267
13268
|
var DEFAULT_MIN_CHUNK_LENGTH = 80;
|
|
@@ -19440,7 +19441,7 @@ var loadRAGDocumentFromURL = async (input) => {
|
|
|
19440
19441
|
if (!url) {
|
|
19441
19442
|
throw new Error("RAG URL is required");
|
|
19442
19443
|
}
|
|
19443
|
-
const response = await fetch(url);
|
|
19444
|
+
const response = await fetch(url, h2IfHttps(url));
|
|
19444
19445
|
if (!response.ok) {
|
|
19445
19446
|
throw new Error(`Failed to fetch RAG URL ${url}: ${response.status} ${response.statusText}`);
|
|
19446
19447
|
}
|
|
@@ -19488,7 +19489,7 @@ var loadRAGDocumentsFromURLs = async (input) => {
|
|
|
19488
19489
|
if (!url) {
|
|
19489
19490
|
throw new Error("RAG URL is required");
|
|
19490
19491
|
}
|
|
19491
|
-
const response = await fetch(url);
|
|
19492
|
+
const response = await fetch(url, h2IfHttps(url));
|
|
19492
19493
|
if (!response.ok) {
|
|
19493
19494
|
throw new Error(`Failed to fetch RAG URL ${url}: ${response.status} ${response.statusText}`);
|
|
19494
19495
|
}
|
|
@@ -30865,6 +30866,94 @@ var ragChat = (config) => {
|
|
|
30865
30866
|
// src/ai/rag/htmxConfig.ts
|
|
30866
30867
|
var createRAGHTMXConfig = (config) => config;
|
|
30867
30868
|
var createRAGHTMXWorkflowRenderConfig = (config) => config;
|
|
30869
|
+
// src/ai/rag/rerankerProviders.ts
|
|
30870
|
+
var rowScore = (row) => row.relevance_score ?? row.relevanceScore ?? row.score ?? 0;
|
|
30871
|
+
var applyRanking = (candidates, rows, topK) => {
|
|
30872
|
+
const ranked = [];
|
|
30873
|
+
for (const row of rows) {
|
|
30874
|
+
const candidate = candidates[row.index];
|
|
30875
|
+
if (!candidate)
|
|
30876
|
+
continue;
|
|
30877
|
+
ranked.push({ ...candidate, score: rowScore(row) });
|
|
30878
|
+
}
|
|
30879
|
+
ranked.sort((left, right) => right.score - left.score);
|
|
30880
|
+
return ranked.slice(0, topK);
|
|
30881
|
+
};
|
|
30882
|
+
var limitCandidates = (input) => {
|
|
30883
|
+
const cap = input.candidateTopK ?? input.results.length;
|
|
30884
|
+
return input.results.slice(0, Math.max(0, cap));
|
|
30885
|
+
};
|
|
30886
|
+
var createHttpCrossEncoderReranker = (options) => {
|
|
30887
|
+
const fetchImpl = options.config.fetch ?? fetch;
|
|
30888
|
+
const defaultModel = options.config.defaultModel ?? options.fallbackModel;
|
|
30889
|
+
return {
|
|
30890
|
+
defaultModel,
|
|
30891
|
+
providerName: options.providerName,
|
|
30892
|
+
rerank: async (input) => {
|
|
30893
|
+
const candidates = limitCandidates(input);
|
|
30894
|
+
if (candidates.length === 0)
|
|
30895
|
+
return [];
|
|
30896
|
+
const model = input.model ?? defaultModel;
|
|
30897
|
+
const documents = candidates.map((candidate) => candidate.chunkText);
|
|
30898
|
+
const topN = Math.min(input.topK, candidates.length);
|
|
30899
|
+
const response = await fetchImpl(options.endpoint, {
|
|
30900
|
+
body: JSON.stringify(options.buildBody({ documents, model, query: input.query, topN })),
|
|
30901
|
+
headers: {
|
|
30902
|
+
Authorization: `Bearer ${options.config.apiKey}`,
|
|
30903
|
+
"Content-Type": "application/json",
|
|
30904
|
+
...options.config.headers
|
|
30905
|
+
},
|
|
30906
|
+
method: "POST"
|
|
30907
|
+
});
|
|
30908
|
+
if (!response.ok) {
|
|
30909
|
+
throw new Error(`${options.providerName} rerank failed: HTTP ${response.status}`);
|
|
30910
|
+
}
|
|
30911
|
+
const payload = await response.json();
|
|
30912
|
+
const rows = payload.results ?? payload.data ?? [];
|
|
30913
|
+
const ranked = applyRanking(candidates, rows, input.topK);
|
|
30914
|
+
if (input.scoreThreshold !== undefined) {
|
|
30915
|
+
return ranked.filter((result) => result.score >= input.scoreThreshold);
|
|
30916
|
+
}
|
|
30917
|
+
return ranked;
|
|
30918
|
+
}
|
|
30919
|
+
};
|
|
30920
|
+
};
|
|
30921
|
+
var createCohereRAGReranker = (config) => createHttpCrossEncoderReranker({
|
|
30922
|
+
buildBody: ({ model, query, documents, topN }) => ({
|
|
30923
|
+
documents,
|
|
30924
|
+
model,
|
|
30925
|
+
query,
|
|
30926
|
+
top_n: topN
|
|
30927
|
+
}),
|
|
30928
|
+
config,
|
|
30929
|
+
endpoint: `${config.baseUrl ?? "https://api.cohere.com"}/v2/rerank`,
|
|
30930
|
+
fallbackModel: "rerank-v3.5",
|
|
30931
|
+
providerName: "cohere"
|
|
30932
|
+
});
|
|
30933
|
+
var createVoyageRAGReranker = (config) => createHttpCrossEncoderReranker({
|
|
30934
|
+
buildBody: ({ model, query, documents, topN }) => ({
|
|
30935
|
+
documents,
|
|
30936
|
+
model,
|
|
30937
|
+
query,
|
|
30938
|
+
top_k: topN
|
|
30939
|
+
}),
|
|
30940
|
+
config,
|
|
30941
|
+
endpoint: `${config.baseUrl ?? "https://api.voyageai.com"}/v1/rerank`,
|
|
30942
|
+
fallbackModel: "rerank-2",
|
|
30943
|
+
providerName: "voyage"
|
|
30944
|
+
});
|
|
30945
|
+
var createJinaRAGReranker = (config) => createHttpCrossEncoderReranker({
|
|
30946
|
+
buildBody: ({ model, query, documents, topN }) => ({
|
|
30947
|
+
documents,
|
|
30948
|
+
model,
|
|
30949
|
+
query,
|
|
30950
|
+
top_n: topN
|
|
30951
|
+
}),
|
|
30952
|
+
config,
|
|
30953
|
+
endpoint: `${config.baseUrl ?? "https://api.jina.ai"}/v1/rerank`,
|
|
30954
|
+
fallbackModel: "jina-reranker-v2-base-multilingual",
|
|
30955
|
+
providerName: "jina"
|
|
30956
|
+
});
|
|
30868
30957
|
// src/ai/rag/retrievalStrategies.ts
|
|
30869
30958
|
var tokenize4 = (value) => value.toLowerCase().split(/[^a-z0-9]+/i).map((token) => token.trim()).filter((token) => token.length > 0);
|
|
30870
30959
|
var hasAnyToken2 = (tokens, values) => values.some((value) => tokens.includes(value));
|
|
@@ -32176,6 +32265,7 @@ var {S3Client } = globalThis.Bun;
|
|
|
32176
32265
|
import { createHash } from "crypto";
|
|
32177
32266
|
import { mkdir as mkdir2, readFile as readFile3, readdir as readdir2 } from "fs/promises";
|
|
32178
32267
|
import { basename as basename2, dirname as dirname3, extname as extname2, join as join2, relative as relative2, resolve as resolve2 } from "path";
|
|
32268
|
+
var h2IfHttps2 = (url) => url.startsWith("https://") ? { protocol: "http2" } : {};
|
|
32179
32269
|
var toSyncError = (caught) => caught instanceof Error ? caught.message : String(caught);
|
|
32180
32270
|
var wait = async (delayMs) => {
|
|
32181
32271
|
if (!(delayMs > 0)) {
|
|
@@ -32988,7 +33078,7 @@ var isFeedDocument = (value) => {
|
|
|
32988
33078
|
return trimmed.includes("<rss") || trimmed.includes("<channel") || trimmed.includes("<feed") || trimmed.includes("<entry");
|
|
32989
33079
|
};
|
|
32990
33080
|
var discoverFeedsFromHTML = async (feed) => {
|
|
32991
|
-
const response = await fetch(feed.url);
|
|
33081
|
+
const response = await fetch(feed.url, h2IfHttps2(feed.url));
|
|
32992
33082
|
if (!response.ok) {
|
|
32993
33083
|
return [];
|
|
32994
33084
|
}
|
|
@@ -33034,7 +33124,7 @@ var discoverFeedsFromHTML = async (feed) => {
|
|
|
33034
33124
|
}
|
|
33035
33125
|
const validated = [];
|
|
33036
33126
|
for (const candidate of discovered.values()) {
|
|
33037
|
-
const candidateResponse = await fetch(candidate.url);
|
|
33127
|
+
const candidateResponse = await fetch(candidate.url, h2IfHttps2(candidate.url));
|
|
33038
33128
|
if (!candidateResponse.ok) {
|
|
33039
33129
|
continue;
|
|
33040
33130
|
}
|
|
@@ -33098,7 +33188,7 @@ var parseSitemapEntries = (sitemap, value) => {
|
|
|
33098
33188
|
};
|
|
33099
33189
|
var discoverSitemapsFromRobots = async (sitemap) => {
|
|
33100
33190
|
const robotsURL = resolveSiblingURL(sitemap.url, "/robots.txt");
|
|
33101
|
-
const response = await fetch(robotsURL);
|
|
33191
|
+
const response = await fetch(robotsURL, h2IfHttps2(robotsURL));
|
|
33102
33192
|
if (!response.ok) {
|
|
33103
33193
|
return [];
|
|
33104
33194
|
}
|
|
@@ -33119,7 +33209,7 @@ var discoverSitemapsFromRobots = async (sitemap) => {
|
|
|
33119
33209
|
};
|
|
33120
33210
|
var loadRobotsDisallowRules = async (siteURL) => {
|
|
33121
33211
|
const robotsURL = resolveSiblingURL(siteURL, "/robots.txt");
|
|
33122
|
-
const response = await fetch(robotsURL);
|
|
33212
|
+
const response = await fetch(robotsURL, h2IfHttps2(robotsURL));
|
|
33123
33213
|
if (!response.ok) {
|
|
33124
33214
|
return [];
|
|
33125
33215
|
}
|
|
@@ -33160,7 +33250,7 @@ var discoverRecursiveSitemapURLs = async (input) => {
|
|
|
33160
33250
|
}
|
|
33161
33251
|
seen.add(current.sitemap.url);
|
|
33162
33252
|
resolved.push(current.sitemap);
|
|
33163
|
-
const response = await fetch(current.sitemap.url);
|
|
33253
|
+
const response = await fetch(current.sitemap.url, h2IfHttps2(current.sitemap.url));
|
|
33164
33254
|
if (!response.ok) {
|
|
33165
33255
|
throw new Error(`Failed to load sitemap ${current.sitemap.url}: ${response.status} ${response.statusText}`);
|
|
33166
33256
|
}
|
|
@@ -33311,7 +33401,7 @@ var discoverLinkedPagesFromHTML = async (input) => {
|
|
|
33311
33401
|
pruneCounts.robotsBlockedCount += 1;
|
|
33312
33402
|
continue;
|
|
33313
33403
|
}
|
|
33314
|
-
const response = await fetch(current.url);
|
|
33404
|
+
const response = await fetch(current.url, h2IfHttps2(current.url));
|
|
33315
33405
|
if (!response.ok) {
|
|
33316
33406
|
continue;
|
|
33317
33407
|
}
|
|
@@ -33566,6 +33656,7 @@ var loadDiscoveredGitHubRepositoryFiles = async (input) => {
|
|
|
33566
33656
|
repo: input.repo
|
|
33567
33657
|
});
|
|
33568
33658
|
const response = await fetch(requestURL, {
|
|
33659
|
+
...h2IfHttps2(requestURL),
|
|
33569
33660
|
headers: input.requestHeaders
|
|
33570
33661
|
});
|
|
33571
33662
|
if (!response.ok) {
|
|
@@ -33729,7 +33820,7 @@ var createRAGFeedSyncSource = (options) => ({
|
|
|
33729
33820
|
}
|
|
33730
33821
|
}
|
|
33731
33822
|
const discoveredEntries = (await Promise.all([...feedMap.values()].map(async (feed) => {
|
|
33732
|
-
const response = await fetch(feed.url);
|
|
33823
|
+
const response = await fetch(feed.url, h2IfHttps2(feed.url));
|
|
33733
33824
|
if (!response.ok) {
|
|
33734
33825
|
throw new Error(`Failed to load feed ${feed.url}: ${response.status} ${response.statusText}`);
|
|
33735
33826
|
}
|
|
@@ -33803,7 +33894,7 @@ var createRAGSitemapSyncSource = (options) => ({
|
|
|
33803
33894
|
}
|
|
33804
33895
|
}
|
|
33805
33896
|
const discoveredEntries = (await Promise.all([...resolvedSitemapMap.values()].map(async (sitemap) => {
|
|
33806
|
-
const response = await fetch(sitemap.url);
|
|
33897
|
+
const response = await fetch(sitemap.url, h2IfHttps2(sitemap.url));
|
|
33807
33898
|
if (!response.ok) {
|
|
33808
33899
|
throw new Error(`Failed to load sitemap ${sitemap.url}: ${response.status} ${response.statusText}`);
|
|
33809
33900
|
}
|
|
@@ -33868,7 +33959,7 @@ var createRAGSiteDiscoverySyncSource = (options) => ({
|
|
|
33868
33959
|
}
|
|
33869
33960
|
}
|
|
33870
33961
|
const feedEntries = (await Promise.all([...feedMap.values()].map(async (feed) => {
|
|
33871
|
-
const response = await fetch(feed.url);
|
|
33962
|
+
const response = await fetch(feed.url, h2IfHttps2(feed.url));
|
|
33872
33963
|
if (!response.ok) {
|
|
33873
33964
|
throw new Error(`Failed to load feed ${feed.url}: ${response.status} ${response.statusText}`);
|
|
33874
33965
|
}
|
|
@@ -33918,7 +34009,7 @@ var createRAGSiteDiscoverySyncSource = (options) => ({
|
|
|
33918
34009
|
sitemap
|
|
33919
34010
|
})))).flat();
|
|
33920
34011
|
const sitemapEntries = (await Promise.all(resolvedSitemaps.map(async (sitemap) => {
|
|
33921
|
-
const response = await fetch(sitemap.url);
|
|
34012
|
+
const response = await fetch(sitemap.url, h2IfHttps2(sitemap.url));
|
|
33922
34013
|
if (!response.ok) {
|
|
33923
34014
|
throw new Error(`Failed to load sitemap ${sitemap.url}: ${response.status} ${response.statusText}`);
|
|
33924
34015
|
}
|
|
@@ -37325,6 +37416,7 @@ export {
|
|
|
37325
37416
|
evaluateRAGAnswerGroundingCase,
|
|
37326
37417
|
evaluateRAGAnswerGrounding,
|
|
37327
37418
|
deepseekEmbeddings,
|
|
37419
|
+
createVoyageRAGReranker,
|
|
37328
37420
|
createTextFileExtractor,
|
|
37329
37421
|
createSQLiteRAGStore,
|
|
37330
37422
|
createRAGVector,
|
|
@@ -37422,12 +37514,14 @@ export {
|
|
|
37422
37514
|
createPDFFileExtractor,
|
|
37423
37515
|
createOfficeDocumentExtractor,
|
|
37424
37516
|
createLegacyDocumentExtractor,
|
|
37517
|
+
createJinaRAGReranker,
|
|
37425
37518
|
createInMemoryRAGStore,
|
|
37426
37519
|
createHeuristicRAGRetrievalStrategy,
|
|
37427
37520
|
createHeuristicRAGReranker,
|
|
37428
37521
|
createHeuristicRAGQueryTransform,
|
|
37429
37522
|
createEmailExtractor,
|
|
37430
37523
|
createEPUBExtractor,
|
|
37524
|
+
createCohereRAGReranker,
|
|
37431
37525
|
createBuiltinArchiveExpander,
|
|
37432
37526
|
compareRAGRetrievalTraceSummaries,
|
|
37433
37527
|
compareRAGRetrievalStrategies,
|
|
@@ -37511,5 +37605,5 @@ export {
|
|
|
37511
37605
|
addRAGEvaluationSuiteCase
|
|
37512
37606
|
};
|
|
37513
37607
|
|
|
37514
|
-
//# debugId=
|
|
37608
|
+
//# debugId=20E465BEF0B96FD164756E2164756E21
|
|
37515
37609
|
//# sourceMappingURL=index.js.map
|