@absolutejs/absolute 0.19.0-beta.638 → 0.19.0-beta.639

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -26430,6 +26430,8 @@ var DEFAULT_DIRECTORY_EXTENSIONS2 = [
26430
26430
  ".yml",
26431
26431
  ".pdf"
26432
26432
  ];
26433
+ var DEFAULT_GITHUB_EXTENSION_FILTER = DEFAULT_DIRECTORY_EXTENSIONS2;
26434
+ var DEFAULT_GITHUB_MAX_DEPTH = 12;
26433
26435
  var isSyncExtractionFailure = (message) => message.startsWith("No RAG file extractor matched") || message.includes("could not extract readable text from this PDF") || message.includes("detected malformed JSONL") || message.includes("detected malformed CSV") || message.includes("detected malformed TSV") || message.includes("detected malformed XML") || message.includes("detected malformed YAML") || message.startsWith("RAG extractor ") || message.includes("extract failed");
26434
26436
  var inferSyncExtractionRemediation = (message) => {
26435
26437
  if (message.includes("could not extract readable text from this PDF")) {
@@ -27671,6 +27673,248 @@ var loadDiscoveredURLDocuments = async (input) => {
27671
27673
  }
27672
27674
  };
27673
27675
  };
27676
+ var normalizeGitHubPath = (path) => path?.trim().replace(/^[\\/]+/g, "").replace(/[\\]+/g, "/").replace(/\/+/g, "/").replace(/\/$/, "");
27677
+ var normalizeGitHubPathFilter = (path) => normalizeGitHubPath(path)?.toLowerCase();
27678
+ var matchesPathFilter = (path, pattern) => {
27679
+ const normalizedPath = normalizeGitHubPath(path)?.toLowerCase();
27680
+ const normalizedPattern = normalizeGitHubPathFilter(pattern);
27681
+ if (!normalizedPath || !normalizedPattern) {
27682
+ return false;
27683
+ }
27684
+ const isDirectory = normalizedPattern.endsWith("/");
27685
+ const patternWithoutTrailingSlash = isDirectory ? normalizedPattern.replace(/\/$/, "") : normalizedPattern;
27686
+ if (normalizedPath === patternWithoutTrailingSlash) {
27687
+ return true;
27688
+ }
27689
+ if (isDirectory && normalizedPath.startsWith(`${patternWithoutTrailingSlash}/`)) {
27690
+ return true;
27691
+ }
27692
+ return normalizedPath.includes(normalizedPattern);
27693
+ };
27694
+ var shouldIncludeGitHubPath = (path, input) => {
27695
+ const normalizedPath = normalizeGitHubPath(path)?.toLowerCase();
27696
+ if (!normalizedPath) {
27697
+ return false;
27698
+ }
27699
+ const extension = normalizedPath.includes(".") ? normalizedPath.slice(normalizedPath.lastIndexOf(".")) : "";
27700
+ if (!input.includeExtensions.has(extension)) {
27701
+ return false;
27702
+ }
27703
+ if ((input.includePaths?.length ?? 0) > 0) {
27704
+ const matchedInclude = input.includePaths?.some((pattern) => matchesPathFilter(normalizedPath, pattern));
27705
+ if (!matchedInclude) {
27706
+ return false;
27707
+ }
27708
+ }
27709
+ if ((input.excludePaths?.length ?? 0) > 0) {
27710
+ if ((input.excludePaths ?? []).some((pattern) => matchesPathFilter(normalizedPath, pattern))) {
27711
+ return false;
27712
+ }
27713
+ }
27714
+ return true;
27715
+ };
27716
+ var buildGitHubHeaders = (token) => {
27717
+ if (!token) {
27718
+ return;
27719
+ }
27720
+ return {
27721
+ Authorization: `Bearer ${token}`,
27722
+ Accept: "application/vnd.github+json",
27723
+ "X-GitHub-Api-Version": "2022-11-28"
27724
+ };
27725
+ };
27726
+ var buildGitHubContentsURL = (input) => {
27727
+ const apiBase = input.apiBaseURL.replace(/\/$/, "");
27728
+ const normalizedPath = normalizeGitHubPath(input.path);
27729
+ const encodedPath = normalizedPath?.split("/").filter(Boolean).map((segment) => encodeURIComponent(segment)).join("/") ?? "";
27730
+ const endpoint = `/repos/${encodeURIComponent(input.repo.owner)}/${encodeURIComponent(input.repo.repo)}/contents`;
27731
+ const url = new URL(encodedPath ? `${endpoint}/${encodedPath}` : endpoint, `${apiBase}/`);
27732
+ if (input.branch) {
27733
+ url.searchParams.set("ref", input.branch);
27734
+ }
27735
+ url.searchParams.set("per_page", "100");
27736
+ return url.toString();
27737
+ };
27738
+ var parseGitHubContents = async (response, path) => {
27739
+ const body = await response.json();
27740
+ if (Array.isArray(body)) {
27741
+ return body;
27742
+ }
27743
+ if (body && typeof body === "object" && typeof body.type === "string") {
27744
+ return [body];
27745
+ }
27746
+ throw new Error(`Unexpected GitHub contents response at ${path}`);
27747
+ };
27748
+ var buildGitHubRawURL = (input) => {
27749
+ if (input.fallbackDownloadURL && typeof input.fallbackDownloadURL === "string") {
27750
+ return input.fallbackDownloadURL;
27751
+ }
27752
+ const branch = input.branch ?? "main";
27753
+ const encodedPath = normalizeGitHubPath(input.path)?.split("/").filter(Boolean).map((segment) => encodeURIComponent(segment)).join("/") ?? "";
27754
+ return `https://raw.githubusercontent.com/${encodeURIComponent(input.repo.owner)}/${encodeURIComponent(input.repo.repo)}/${encodeURIComponent(branch)}/${encodedPath}`;
27755
+ };
27756
+ var loadDiscoveredGitHubRepositoryFiles = async (input) => {
27757
+ const queue = [
27758
+ { depth: 0, path: normalizeGitHubPath(input.repo.pathPrefix) }
27759
+ ];
27760
+ const seen = new Set;
27761
+ const collected = [];
27762
+ while (queue.length > 0) {
27763
+ const current = queue.shift();
27764
+ if (!current) {
27765
+ continue;
27766
+ }
27767
+ const currentPath = normalizeGitHubPath(current.path) ?? "";
27768
+ if (seen.has(currentPath)) {
27769
+ continue;
27770
+ }
27771
+ seen.add(currentPath);
27772
+ const requestURL = buildGitHubContentsURL({
27773
+ apiBaseURL: input.apiBaseURL,
27774
+ branch: input.branch ?? input.repo.branch,
27775
+ path: currentPath,
27776
+ repo: input.repo
27777
+ });
27778
+ const response = await fetch(requestURL, {
27779
+ headers: input.requestHeaders
27780
+ });
27781
+ if (!response.ok) {
27782
+ throw new Error(`Failed to list GitHub repo contents at ${currentPath || `${input.repo.owner}/${input.repo.repo}`}: ${response.status} ${response.statusText}`);
27783
+ }
27784
+ const entries = await parseGitHubContents(response, requestURL);
27785
+ for (const entry of entries) {
27786
+ if (typeof entry.path !== "string" || typeof entry.type !== "string") {
27787
+ continue;
27788
+ }
27789
+ if (entry.type === "file") {
27790
+ if (!shouldIncludeGitHubPath(entry.path, {
27791
+ excludePaths: input.repo.excludePaths,
27792
+ includeExtensions: input.includeExtensions,
27793
+ includePaths: input.repo.includePaths
27794
+ })) {
27795
+ continue;
27796
+ }
27797
+ const repoBranch = input.repo.branch ?? input.branch;
27798
+ const fileURL = buildGitHubRawURL({
27799
+ repo: input.repo,
27800
+ branch: repoBranch,
27801
+ fallbackDownloadURL: entry.download_url,
27802
+ path: entry.path
27803
+ });
27804
+ const fileRepo = `${input.repo.owner}/${input.repo.repo}`;
27805
+ collected.push({
27806
+ repository: fileRepo,
27807
+ repoBranch,
27808
+ repoPath: currentPath,
27809
+ metadata: {
27810
+ ...input.defaults?.repoMetadata ?? {},
27811
+ repo: fileRepo,
27812
+ repoBranch,
27813
+ repoName: input.repo.repo,
27814
+ repoOwner: input.repo.owner,
27815
+ repoPath: entry.path,
27816
+ ...input.repo.metadata ?? {},
27817
+ source: input.source
27818
+ },
27819
+ source: input.source,
27820
+ path: entry.path,
27821
+ title: `${input.repo.owner}/${input.repo.repo}:${entry.path}`,
27822
+ url: fileURL
27823
+ });
27824
+ if (typeof input.maxFilesPerRepo === "number" && collected.length >= input.maxFilesPerRepo) {
27825
+ return collected;
27826
+ }
27827
+ continue;
27828
+ }
27829
+ if (entry.type === "dir" && current.depth < input.maxDepth) {
27830
+ queue.push({ depth: current.depth + 1, path: entry.path });
27831
+ }
27832
+ }
27833
+ }
27834
+ return collected;
27835
+ };
27836
+ var buildGitHubExtensionSet = (value) => {
27837
+ const extensionValues = value === undefined || value.length === 0 ? DEFAULT_GITHUB_EXTENSION_FILTER : value;
27838
+ const extensions = new Set;
27839
+ for (const raw of extensionValues) {
27840
+ const normalized = typeof raw === "string" && raw.trim().length > 0 ? raw.trim().startsWith(".") ? raw.trim().toLowerCase() : `.${raw.trim().toLowerCase()}` : undefined;
27841
+ if (normalized) {
27842
+ extensions.add(normalized);
27843
+ }
27844
+ }
27845
+ if (extensions.size === 0) {
27846
+ for (const extension of DEFAULT_GITHUB_EXTENSION_FILTER) {
27847
+ extensions.add(extension);
27848
+ }
27849
+ }
27850
+ return extensions;
27851
+ };
27852
+ var createRAGGitHubSyncSource = (options) => ({
27853
+ description: options.description,
27854
+ id: options.id,
27855
+ kind: "url",
27856
+ label: options.label,
27857
+ metadata: options.metadata,
27858
+ retryAttempts: options.retryAttempts,
27859
+ retryDelayMs: options.retryDelayMs,
27860
+ target: options.repos.length === 1 ? `${options.repos[0]?.owner ?? "unknown"}/${options.repos[0]?.repo ?? "repo"}` : `${options.repos.length} repos`,
27861
+ sync: async ({ collection, deleteDocument, listDocuments }) => {
27862
+ const requestHeaders = buildGitHubHeaders(options.token);
27863
+ const extensionFilter = buildGitHubExtensionSet(options.includeExtensions);
27864
+ const apiBaseURL = options.apiBaseUrl?.trim().replace(/\/$/, "") || "https://api.github.com";
27865
+ const maxDepth = Math.max(0, Math.min(options.maxDepth ?? DEFAULT_GITHUB_MAX_DEPTH, 64));
27866
+ const discoveredFiles = (await Promise.all(options.repos.map(async (repo) => {
27867
+ return loadDiscoveredGitHubRepositoryFiles({
27868
+ branch: repo.branch,
27869
+ apiBaseURL,
27870
+ includeExtensions: extensionFilter,
27871
+ maxDepth,
27872
+ maxFilesPerRepo: options.maxFilesPerRepo,
27873
+ repo,
27874
+ requestHeaders,
27875
+ source: options.label,
27876
+ defaults: {
27877
+ repoMetadata: {
27878
+ repoOwner: repo.owner,
27879
+ repoName: repo.repo,
27880
+ repoBranch: repo.branch,
27881
+ repoPrefix: repo.pathPrefix ?? ""
27882
+ }
27883
+ }
27884
+ });
27885
+ }))).flat();
27886
+ const result = await loadDiscoveredURLDocuments({
27887
+ baseMetadata: options.baseMetadata,
27888
+ chunkingRegistry: options.chunkingRegistry,
27889
+ collection,
27890
+ defaultChunking: options.defaultChunking,
27891
+ deleteDocument,
27892
+ extractorRegistry: options.extractorRegistry,
27893
+ extractors: options.extractors,
27894
+ listDocuments,
27895
+ sourceId: options.id,
27896
+ urlEntries: discoveredFiles.map((entry) => ({
27897
+ metadata: {
27898
+ ...entry.metadata,
27899
+ repoPath: entry.path,
27900
+ repoBranch: entry.repoBranch,
27901
+ repo: entry.repository,
27902
+ sourcePath: entry.path
27903
+ },
27904
+ title: entry.title,
27905
+ url: entry.url
27906
+ }))
27907
+ });
27908
+ return {
27909
+ ...result,
27910
+ metadata: {
27911
+ ...result.metadata ?? {},
27912
+ discoveredFileCount: discoveredFiles.length,
27913
+ repoCount: options.repos.length
27914
+ }
27915
+ };
27916
+ }
27917
+ });
27674
27918
  var createRAGFeedSyncSource = (options) => ({
27675
27919
  description: options.description,
27676
27920
  id: options.id,
@@ -29065,6 +29309,26 @@ var buildPostgresFilterPlan = (filter, startIndex = 0) => {
29065
29309
  const comparison = operator === "$gt" ? ">" : operator === "$gte" ? ">=" : operator === "$lt" ? "<" : "<=";
29066
29310
  return `((${actualSql}) ~ '^-?[0-9]+(\\.[0-9]+)?$' AND (${actualSql})::double precision ${comparison} ${bind(expected)})`;
29067
29311
  }
29312
+ case "$contains":
29313
+ if (isScalarField) {
29314
+ return null;
29315
+ }
29316
+ if (toPostgresFilterBinding(expected) === undefined) {
29317
+ return null;
29318
+ }
29319
+ return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ? ${bind(String(expected))})`;
29320
+ case "$containsAny":
29321
+ case "$containsAll": {
29322
+ if (isScalarField || !Array.isArray(expected)) {
29323
+ return null;
29324
+ }
29325
+ const values = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
29326
+ if (values.length === 0 || values.length !== expected.length) {
29327
+ return null;
29328
+ }
29329
+ const sqlArray = `ARRAY[${values.map((value2) => bind(String(value2))).join(", ")}]::text[]`;
29330
+ return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ${operator === "$containsAny" ? "?|" : "?&"} ${sqlArray})`;
29331
+ }
29068
29332
  default:
29069
29333
  return null;
29070
29334
  }
@@ -29105,7 +29369,7 @@ var buildPostgresPushdownFilter = (filter) => {
29105
29369
  }
29106
29370
  continue;
29107
29371
  }
29108
- if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => operator === "$contains" || operator === "$containsAny" || operator === "$containsAll")) {
29372
+ if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => !(operator === "$exists" || operator === "$in" || operator === "$contains" || operator === "$containsAny" || operator === "$containsAll" || operator === "$ne" || operator === "$gt" || operator === "$gte" || operator === "$lt" || operator === "$lte"))) {
29109
29373
  continue;
29110
29374
  }
29111
29375
  const isScalarColumnKey = ["chunkId", "source", "title"].includes(key);
@@ -32619,6 +32883,7 @@ export {
32619
32883
  createRAGHTMXConfig,
32620
32884
  createRAGGraphEmailSyncClient,
32621
32885
  createRAGGmailEmailSyncClient,
32886
+ createRAGGitHubSyncSource,
32622
32887
  createRAGFileSyncStateStore,
32623
32888
  createRAGFileSearchTraceStore,
32624
32889
  createRAGFileSearchTracePruneHistoryStore,
@@ -32704,5 +32969,5 @@ export {
32704
32969
  addRAGEvaluationSuiteCase
32705
32970
  };
32706
32971
 
32707
- //# debugId=BF014C168CA3DA5064756E2164756E21
32972
+ //# debugId=7B7D71C6EA35C8D164756E2164756E21
32708
32973
  //# sourceMappingURL=index.js.map