@absolutejs/absolute 0.19.0-beta.638 → 0.19.0-beta.639
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +267 -2
- package/dist/ai/index.js.map +5 -5
- package/dist/src/ai/index.d.ts +2 -2
- package/dist/src/ai/rag/index.d.ts +2 -2
- package/dist/src/ai/rag/sync.d.ts +2 -1
- package/dist/src/ai/rag/types.d.ts +1 -1
- package/dist/types/ai.d.ts +28 -0
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -26430,6 +26430,8 @@ var DEFAULT_DIRECTORY_EXTENSIONS2 = [
|
|
|
26430
26430
|
".yml",
|
|
26431
26431
|
".pdf"
|
|
26432
26432
|
];
|
|
26433
|
+
var DEFAULT_GITHUB_EXTENSION_FILTER = DEFAULT_DIRECTORY_EXTENSIONS2;
|
|
26434
|
+
var DEFAULT_GITHUB_MAX_DEPTH = 12;
|
|
26433
26435
|
var isSyncExtractionFailure = (message) => message.startsWith("No RAG file extractor matched") || message.includes("could not extract readable text from this PDF") || message.includes("detected malformed JSONL") || message.includes("detected malformed CSV") || message.includes("detected malformed TSV") || message.includes("detected malformed XML") || message.includes("detected malformed YAML") || message.startsWith("RAG extractor ") || message.includes("extract failed");
|
|
26434
26436
|
var inferSyncExtractionRemediation = (message) => {
|
|
26435
26437
|
if (message.includes("could not extract readable text from this PDF")) {
|
|
@@ -27671,6 +27673,248 @@ var loadDiscoveredURLDocuments = async (input) => {
|
|
|
27671
27673
|
}
|
|
27672
27674
|
};
|
|
27673
27675
|
};
|
|
27676
|
+
var normalizeGitHubPath = (path) => path?.trim().replace(/^[\\/]+/g, "").replace(/[\\]+/g, "/").replace(/\/+/g, "/").replace(/\/$/, "");
|
|
27677
|
+
var normalizeGitHubPathFilter = (path) => normalizeGitHubPath(path)?.toLowerCase();
|
|
27678
|
+
var matchesPathFilter = (path, pattern) => {
|
|
27679
|
+
const normalizedPath = normalizeGitHubPath(path)?.toLowerCase();
|
|
27680
|
+
const normalizedPattern = normalizeGitHubPathFilter(pattern);
|
|
27681
|
+
if (!normalizedPath || !normalizedPattern) {
|
|
27682
|
+
return false;
|
|
27683
|
+
}
|
|
27684
|
+
const isDirectory = normalizedPattern.endsWith("/");
|
|
27685
|
+
const patternWithoutTrailingSlash = isDirectory ? normalizedPattern.replace(/\/$/, "") : normalizedPattern;
|
|
27686
|
+
if (normalizedPath === patternWithoutTrailingSlash) {
|
|
27687
|
+
return true;
|
|
27688
|
+
}
|
|
27689
|
+
if (isDirectory && normalizedPath.startsWith(`${patternWithoutTrailingSlash}/`)) {
|
|
27690
|
+
return true;
|
|
27691
|
+
}
|
|
27692
|
+
return normalizedPath.includes(normalizedPattern);
|
|
27693
|
+
};
|
|
27694
|
+
var shouldIncludeGitHubPath = (path, input) => {
|
|
27695
|
+
const normalizedPath = normalizeGitHubPath(path)?.toLowerCase();
|
|
27696
|
+
if (!normalizedPath) {
|
|
27697
|
+
return false;
|
|
27698
|
+
}
|
|
27699
|
+
const extension = normalizedPath.includes(".") ? normalizedPath.slice(normalizedPath.lastIndexOf(".")) : "";
|
|
27700
|
+
if (!input.includeExtensions.has(extension)) {
|
|
27701
|
+
return false;
|
|
27702
|
+
}
|
|
27703
|
+
if ((input.includePaths?.length ?? 0) > 0) {
|
|
27704
|
+
const matchedInclude = input.includePaths?.some((pattern) => matchesPathFilter(normalizedPath, pattern));
|
|
27705
|
+
if (!matchedInclude) {
|
|
27706
|
+
return false;
|
|
27707
|
+
}
|
|
27708
|
+
}
|
|
27709
|
+
if ((input.excludePaths?.length ?? 0) > 0) {
|
|
27710
|
+
if ((input.excludePaths ?? []).some((pattern) => matchesPathFilter(normalizedPath, pattern))) {
|
|
27711
|
+
return false;
|
|
27712
|
+
}
|
|
27713
|
+
}
|
|
27714
|
+
return true;
|
|
27715
|
+
};
|
|
27716
|
+
var buildGitHubHeaders = (token) => {
|
|
27717
|
+
if (!token) {
|
|
27718
|
+
return;
|
|
27719
|
+
}
|
|
27720
|
+
return {
|
|
27721
|
+
Authorization: `Bearer ${token}`,
|
|
27722
|
+
Accept: "application/vnd.github+json",
|
|
27723
|
+
"X-GitHub-Api-Version": "2022-11-28"
|
|
27724
|
+
};
|
|
27725
|
+
};
|
|
27726
|
+
var buildGitHubContentsURL = (input) => {
|
|
27727
|
+
const apiBase = input.apiBaseURL.replace(/\/$/, "");
|
|
27728
|
+
const normalizedPath = normalizeGitHubPath(input.path);
|
|
27729
|
+
const encodedPath = normalizedPath?.split("/").filter(Boolean).map((segment) => encodeURIComponent(segment)).join("/") ?? "";
|
|
27730
|
+
const endpoint = `/repos/${encodeURIComponent(input.repo.owner)}/${encodeURIComponent(input.repo.repo)}/contents`;
|
|
27731
|
+
const url = new URL(encodedPath ? `${endpoint}/${encodedPath}` : endpoint, `${apiBase}/`);
|
|
27732
|
+
if (input.branch) {
|
|
27733
|
+
url.searchParams.set("ref", input.branch);
|
|
27734
|
+
}
|
|
27735
|
+
url.searchParams.set("per_page", "100");
|
|
27736
|
+
return url.toString();
|
|
27737
|
+
};
|
|
27738
|
+
var parseGitHubContents = async (response, path) => {
|
|
27739
|
+
const body = await response.json();
|
|
27740
|
+
if (Array.isArray(body)) {
|
|
27741
|
+
return body;
|
|
27742
|
+
}
|
|
27743
|
+
if (body && typeof body === "object" && typeof body.type === "string") {
|
|
27744
|
+
return [body];
|
|
27745
|
+
}
|
|
27746
|
+
throw new Error(`Unexpected GitHub contents response at ${path}`);
|
|
27747
|
+
};
|
|
27748
|
+
var buildGitHubRawURL = (input) => {
|
|
27749
|
+
if (input.fallbackDownloadURL && typeof input.fallbackDownloadURL === "string") {
|
|
27750
|
+
return input.fallbackDownloadURL;
|
|
27751
|
+
}
|
|
27752
|
+
const branch = input.branch ?? "main";
|
|
27753
|
+
const encodedPath = normalizeGitHubPath(input.path)?.split("/").filter(Boolean).map((segment) => encodeURIComponent(segment)).join("/") ?? "";
|
|
27754
|
+
return `https://raw.githubusercontent.com/${encodeURIComponent(input.repo.owner)}/${encodeURIComponent(input.repo.repo)}/${encodeURIComponent(branch)}/${encodedPath}`;
|
|
27755
|
+
};
|
|
27756
|
+
var loadDiscoveredGitHubRepositoryFiles = async (input) => {
|
|
27757
|
+
const queue = [
|
|
27758
|
+
{ depth: 0, path: normalizeGitHubPath(input.repo.pathPrefix) }
|
|
27759
|
+
];
|
|
27760
|
+
const seen = new Set;
|
|
27761
|
+
const collected = [];
|
|
27762
|
+
while (queue.length > 0) {
|
|
27763
|
+
const current = queue.shift();
|
|
27764
|
+
if (!current) {
|
|
27765
|
+
continue;
|
|
27766
|
+
}
|
|
27767
|
+
const currentPath = normalizeGitHubPath(current.path) ?? "";
|
|
27768
|
+
if (seen.has(currentPath)) {
|
|
27769
|
+
continue;
|
|
27770
|
+
}
|
|
27771
|
+
seen.add(currentPath);
|
|
27772
|
+
const requestURL = buildGitHubContentsURL({
|
|
27773
|
+
apiBaseURL: input.apiBaseURL,
|
|
27774
|
+
branch: input.branch ?? input.repo.branch,
|
|
27775
|
+
path: currentPath,
|
|
27776
|
+
repo: input.repo
|
|
27777
|
+
});
|
|
27778
|
+
const response = await fetch(requestURL, {
|
|
27779
|
+
headers: input.requestHeaders
|
|
27780
|
+
});
|
|
27781
|
+
if (!response.ok) {
|
|
27782
|
+
throw new Error(`Failed to list GitHub repo contents at ${currentPath || `${input.repo.owner}/${input.repo.repo}`}: ${response.status} ${response.statusText}`);
|
|
27783
|
+
}
|
|
27784
|
+
const entries = await parseGitHubContents(response, requestURL);
|
|
27785
|
+
for (const entry of entries) {
|
|
27786
|
+
if (typeof entry.path !== "string" || typeof entry.type !== "string") {
|
|
27787
|
+
continue;
|
|
27788
|
+
}
|
|
27789
|
+
if (entry.type === "file") {
|
|
27790
|
+
if (!shouldIncludeGitHubPath(entry.path, {
|
|
27791
|
+
excludePaths: input.repo.excludePaths,
|
|
27792
|
+
includeExtensions: input.includeExtensions,
|
|
27793
|
+
includePaths: input.repo.includePaths
|
|
27794
|
+
})) {
|
|
27795
|
+
continue;
|
|
27796
|
+
}
|
|
27797
|
+
const repoBranch = input.repo.branch ?? input.branch;
|
|
27798
|
+
const fileURL = buildGitHubRawURL({
|
|
27799
|
+
repo: input.repo,
|
|
27800
|
+
branch: repoBranch,
|
|
27801
|
+
fallbackDownloadURL: entry.download_url,
|
|
27802
|
+
path: entry.path
|
|
27803
|
+
});
|
|
27804
|
+
const fileRepo = `${input.repo.owner}/${input.repo.repo}`;
|
|
27805
|
+
collected.push({
|
|
27806
|
+
repository: fileRepo,
|
|
27807
|
+
repoBranch,
|
|
27808
|
+
repoPath: currentPath,
|
|
27809
|
+
metadata: {
|
|
27810
|
+
...input.defaults?.repoMetadata ?? {},
|
|
27811
|
+
repo: fileRepo,
|
|
27812
|
+
repoBranch,
|
|
27813
|
+
repoName: input.repo.repo,
|
|
27814
|
+
repoOwner: input.repo.owner,
|
|
27815
|
+
repoPath: entry.path,
|
|
27816
|
+
...input.repo.metadata ?? {},
|
|
27817
|
+
source: input.source
|
|
27818
|
+
},
|
|
27819
|
+
source: input.source,
|
|
27820
|
+
path: entry.path,
|
|
27821
|
+
title: `${input.repo.owner}/${input.repo.repo}:${entry.path}`,
|
|
27822
|
+
url: fileURL
|
|
27823
|
+
});
|
|
27824
|
+
if (typeof input.maxFilesPerRepo === "number" && collected.length >= input.maxFilesPerRepo) {
|
|
27825
|
+
return collected;
|
|
27826
|
+
}
|
|
27827
|
+
continue;
|
|
27828
|
+
}
|
|
27829
|
+
if (entry.type === "dir" && current.depth < input.maxDepth) {
|
|
27830
|
+
queue.push({ depth: current.depth + 1, path: entry.path });
|
|
27831
|
+
}
|
|
27832
|
+
}
|
|
27833
|
+
}
|
|
27834
|
+
return collected;
|
|
27835
|
+
};
|
|
27836
|
+
var buildGitHubExtensionSet = (value) => {
|
|
27837
|
+
const extensionValues = value === undefined || value.length === 0 ? DEFAULT_GITHUB_EXTENSION_FILTER : value;
|
|
27838
|
+
const extensions = new Set;
|
|
27839
|
+
for (const raw of extensionValues) {
|
|
27840
|
+
const normalized = typeof raw === "string" && raw.trim().length > 0 ? raw.trim().startsWith(".") ? raw.trim().toLowerCase() : `.${raw.trim().toLowerCase()}` : undefined;
|
|
27841
|
+
if (normalized) {
|
|
27842
|
+
extensions.add(normalized);
|
|
27843
|
+
}
|
|
27844
|
+
}
|
|
27845
|
+
if (extensions.size === 0) {
|
|
27846
|
+
for (const extension of DEFAULT_GITHUB_EXTENSION_FILTER) {
|
|
27847
|
+
extensions.add(extension);
|
|
27848
|
+
}
|
|
27849
|
+
}
|
|
27850
|
+
return extensions;
|
|
27851
|
+
};
|
|
27852
|
+
var createRAGGitHubSyncSource = (options) => ({
|
|
27853
|
+
description: options.description,
|
|
27854
|
+
id: options.id,
|
|
27855
|
+
kind: "url",
|
|
27856
|
+
label: options.label,
|
|
27857
|
+
metadata: options.metadata,
|
|
27858
|
+
retryAttempts: options.retryAttempts,
|
|
27859
|
+
retryDelayMs: options.retryDelayMs,
|
|
27860
|
+
target: options.repos.length === 1 ? `${options.repos[0]?.owner ?? "unknown"}/${options.repos[0]?.repo ?? "repo"}` : `${options.repos.length} repos`,
|
|
27861
|
+
sync: async ({ collection, deleteDocument, listDocuments }) => {
|
|
27862
|
+
const requestHeaders = buildGitHubHeaders(options.token);
|
|
27863
|
+
const extensionFilter = buildGitHubExtensionSet(options.includeExtensions);
|
|
27864
|
+
const apiBaseURL = options.apiBaseUrl?.trim().replace(/\/$/, "") || "https://api.github.com";
|
|
27865
|
+
const maxDepth = Math.max(0, Math.min(options.maxDepth ?? DEFAULT_GITHUB_MAX_DEPTH, 64));
|
|
27866
|
+
const discoveredFiles = (await Promise.all(options.repos.map(async (repo) => {
|
|
27867
|
+
return loadDiscoveredGitHubRepositoryFiles({
|
|
27868
|
+
branch: repo.branch,
|
|
27869
|
+
apiBaseURL,
|
|
27870
|
+
includeExtensions: extensionFilter,
|
|
27871
|
+
maxDepth,
|
|
27872
|
+
maxFilesPerRepo: options.maxFilesPerRepo,
|
|
27873
|
+
repo,
|
|
27874
|
+
requestHeaders,
|
|
27875
|
+
source: options.label,
|
|
27876
|
+
defaults: {
|
|
27877
|
+
repoMetadata: {
|
|
27878
|
+
repoOwner: repo.owner,
|
|
27879
|
+
repoName: repo.repo,
|
|
27880
|
+
repoBranch: repo.branch,
|
|
27881
|
+
repoPrefix: repo.pathPrefix ?? ""
|
|
27882
|
+
}
|
|
27883
|
+
}
|
|
27884
|
+
});
|
|
27885
|
+
}))).flat();
|
|
27886
|
+
const result = await loadDiscoveredURLDocuments({
|
|
27887
|
+
baseMetadata: options.baseMetadata,
|
|
27888
|
+
chunkingRegistry: options.chunkingRegistry,
|
|
27889
|
+
collection,
|
|
27890
|
+
defaultChunking: options.defaultChunking,
|
|
27891
|
+
deleteDocument,
|
|
27892
|
+
extractorRegistry: options.extractorRegistry,
|
|
27893
|
+
extractors: options.extractors,
|
|
27894
|
+
listDocuments,
|
|
27895
|
+
sourceId: options.id,
|
|
27896
|
+
urlEntries: discoveredFiles.map((entry) => ({
|
|
27897
|
+
metadata: {
|
|
27898
|
+
...entry.metadata,
|
|
27899
|
+
repoPath: entry.path,
|
|
27900
|
+
repoBranch: entry.repoBranch,
|
|
27901
|
+
repo: entry.repository,
|
|
27902
|
+
sourcePath: entry.path
|
|
27903
|
+
},
|
|
27904
|
+
title: entry.title,
|
|
27905
|
+
url: entry.url
|
|
27906
|
+
}))
|
|
27907
|
+
});
|
|
27908
|
+
return {
|
|
27909
|
+
...result,
|
|
27910
|
+
metadata: {
|
|
27911
|
+
...result.metadata ?? {},
|
|
27912
|
+
discoveredFileCount: discoveredFiles.length,
|
|
27913
|
+
repoCount: options.repos.length
|
|
27914
|
+
}
|
|
27915
|
+
};
|
|
27916
|
+
}
|
|
27917
|
+
});
|
|
27674
27918
|
var createRAGFeedSyncSource = (options) => ({
|
|
27675
27919
|
description: options.description,
|
|
27676
27920
|
id: options.id,
|
|
@@ -29065,6 +29309,26 @@ var buildPostgresFilterPlan = (filter, startIndex = 0) => {
|
|
|
29065
29309
|
const comparison = operator === "$gt" ? ">" : operator === "$gte" ? ">=" : operator === "$lt" ? "<" : "<=";
|
|
29066
29310
|
return `((${actualSql}) ~ '^-?[0-9]+(\\.[0-9]+)?$' AND (${actualSql})::double precision ${comparison} ${bind(expected)})`;
|
|
29067
29311
|
}
|
|
29312
|
+
case "$contains":
|
|
29313
|
+
if (isScalarField) {
|
|
29314
|
+
return null;
|
|
29315
|
+
}
|
|
29316
|
+
if (toPostgresFilterBinding(expected) === undefined) {
|
|
29317
|
+
return null;
|
|
29318
|
+
}
|
|
29319
|
+
return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ? ${bind(String(expected))})`;
|
|
29320
|
+
case "$containsAny":
|
|
29321
|
+
case "$containsAll": {
|
|
29322
|
+
if (isScalarField || !Array.isArray(expected)) {
|
|
29323
|
+
return null;
|
|
29324
|
+
}
|
|
29325
|
+
const values = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
|
|
29326
|
+
if (values.length === 0 || values.length !== expected.length) {
|
|
29327
|
+
return null;
|
|
29328
|
+
}
|
|
29329
|
+
const sqlArray = `ARRAY[${values.map((value2) => bind(String(value2))).join(", ")}]::text[]`;
|
|
29330
|
+
return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ${operator === "$containsAny" ? "?|" : "?&"} ${sqlArray})`;
|
|
29331
|
+
}
|
|
29068
29332
|
default:
|
|
29069
29333
|
return null;
|
|
29070
29334
|
}
|
|
@@ -29105,7 +29369,7 @@ var buildPostgresPushdownFilter = (filter) => {
|
|
|
29105
29369
|
}
|
|
29106
29370
|
continue;
|
|
29107
29371
|
}
|
|
29108
|
-
if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => operator === "$contains" || operator === "$containsAny" || operator === "$containsAll")) {
|
|
29372
|
+
if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => !(operator === "$exists" || operator === "$in" || operator === "$contains" || operator === "$containsAny" || operator === "$containsAll" || operator === "$ne" || operator === "$gt" || operator === "$gte" || operator === "$lt" || operator === "$lte"))) {
|
|
29109
29373
|
continue;
|
|
29110
29374
|
}
|
|
29111
29375
|
const isScalarColumnKey = ["chunkId", "source", "title"].includes(key);
|
|
@@ -32619,6 +32883,7 @@ export {
|
|
|
32619
32883
|
createRAGHTMXConfig,
|
|
32620
32884
|
createRAGGraphEmailSyncClient,
|
|
32621
32885
|
createRAGGmailEmailSyncClient,
|
|
32886
|
+
createRAGGitHubSyncSource,
|
|
32622
32887
|
createRAGFileSyncStateStore,
|
|
32623
32888
|
createRAGFileSearchTraceStore,
|
|
32624
32889
|
createRAGFileSearchTracePruneHistoryStore,
|
|
@@ -32704,5 +32969,5 @@ export {
|
|
|
32704
32969
|
addRAGEvaluationSuiteCase
|
|
32705
32970
|
};
|
|
32706
32971
|
|
|
32707
|
-
//# debugId=
|
|
32972
|
+
//# debugId=7B7D71C6EA35C8D164756E2164756E21
|
|
32708
32973
|
//# sourceMappingURL=index.js.map
|