npm - @librechat/agents - Versions diffs - 2.4.30 → 2.4.311 - Mend

@librechat/agents 2.4.30 → 2.4.311

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/dist/cjs/common/enum.cjs +1 -0
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/main.cjs +2 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/tools/search/firecrawl.cjs +149 -0
package/dist/cjs/tools/search/firecrawl.cjs.map +1 -0
package/dist/cjs/tools/search/format.cjs +116 -0
package/dist/cjs/tools/search/format.cjs.map +1 -0
package/dist/cjs/tools/search/highlights.cjs +194 -0
package/dist/cjs/tools/search/highlights.cjs.map +1 -0
package/dist/cjs/tools/search/rerankers.cjs +187 -0
package/dist/cjs/tools/search/rerankers.cjs.map +1 -0
package/dist/cjs/tools/search/search.cjs +410 -0
package/dist/cjs/tools/search/search.cjs.map +1 -0
package/dist/cjs/tools/search/tool.cjs +103 -0
package/dist/cjs/tools/search/tool.cjs.map +1 -0
package/dist/esm/common/enum.mjs +1 -0
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/main.mjs +1 -0
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/tools/search/firecrawl.mjs +145 -0
package/dist/esm/tools/search/firecrawl.mjs.map +1 -0
package/dist/esm/tools/search/format.mjs +114 -0
package/dist/esm/tools/search/format.mjs.map +1 -0
package/dist/esm/tools/search/highlights.mjs +192 -0
package/dist/esm/tools/search/highlights.mjs.map +1 -0
package/dist/esm/tools/search/rerankers.mjs +181 -0
package/dist/esm/tools/search/rerankers.mjs.map +1 -0
package/dist/esm/tools/search/search.mjs +407 -0
package/dist/esm/tools/search/search.mjs.map +1 -0
package/dist/esm/tools/search/tool.mjs +101 -0
package/dist/esm/tools/search/tool.mjs.map +1 -0
package/dist/types/common/enum.d.ts +1 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/scripts/search.d.ts +1 -0
package/dist/types/tools/search/firecrawl.d.ts +117 -0
package/dist/types/tools/search/format.d.ts +2 -0
package/dist/types/tools/search/highlights.d.ts +13 -0
package/dist/types/tools/search/index.d.ts +2 -0
package/dist/types/tools/search/rerankers.d.ts +32 -0
package/dist/types/tools/search/search.d.ts +9 -0
package/dist/types/tools/search/tool.d.ts +12 -0
package/dist/types/tools/search/types.d.ts +150 -0
package/package.json +2 -1
package/src/common/enum.ts +1 -0
package/src/index.ts +1 -0
package/src/scripts/search.ts +141 -0
package/src/tools/search/firecrawl.ts +270 -0
package/src/tools/search/format.ts +121 -0
package/src/tools/search/highlights.ts +238 -0
package/src/tools/search/index.ts +2 -0
package/src/tools/search/rerankers.ts +248 -0
package/src/tools/search/search.ts +567 -0
package/src/tools/search/tool.ts +151 -0
package/src/tools/search/types.ts +179 -0

package/src/tools/search/firecrawl.ts ADDED Viewed

@@ -0,0 +1,270 @@
+/* eslint-disable no-console */
+import axios from 'axios';
+export interface FirecrawlScrapeOptions {
+  formats?: string[];
+  includeTags?: string[];
+  excludeTags?: string[];
+  headers?: Record<string, string>;
+  waitFor?: number;
+  timeout?: number;
+}
+interface ScrapeMetadata {
+  // Core source information
+  sourceURL?: string;
+  url?: string;
+  scrapeId?: string;
+  statusCode?: number;
+  // Basic metadata
+  title?: string;
+  description?: string;
+  language?: string;
+  favicon?: string;
+  viewport?: string;
+  robots?: string;
+  'theme-color'?: string;
+  // Open Graph metadata
+  'og:url'?: string;
+  'og:title'?: string;
+  'og:description'?: string;
+  'og:type'?: string;
+  'og:image'?: string;
+  'og:image:width'?: string;
+  'og:image:height'?: string;
+  'og:site_name'?: string;
+  ogUrl?: string;
+  ogTitle?: string;
+  ogDescription?: string;
+  ogImage?: string;
+  ogSiteName?: string;
+  // Article metadata
+  'article:author'?: string;
+  'article:published_time'?: string;
+  'article:modified_time'?: string;
+  'article:section'?: string;
+  'article:tag'?: string;
+  'article:publisher'?: string;
+  publishedTime?: string;
+  modifiedTime?: string;
+  // Twitter metadata
+  'twitter:site'?: string;
+  'twitter:creator'?: string;
+  'twitter:card'?: string;
+  'twitter:image'?: string;
+  'twitter:dnt'?: string;
+  'twitter:app:name:iphone'?: string;
+  'twitter:app:id:iphone'?: string;
+  'twitter:app:url:iphone'?: string;
+  'twitter:app:name:ipad'?: string;
+  'twitter:app:id:ipad'?: string;
+  'twitter:app:url:ipad'?: string;
+  'twitter:app:name:googleplay'?: string;
+  'twitter:app:id:googleplay'?: string;
+  'twitter:app:url:googleplay'?: string;
+  // Facebook metadata
+  'fb:app_id'?: string;
+  // App links
+  'al:ios:url'?: string;
+  'al:ios:app_name'?: string;
+  'al:ios:app_store_id'?: string;
+  // Allow for additional properties that might be present
+  [key: string]: string | number | boolean | null | undefined;
+}
+export interface FirecrawlScrapeResponse {
+  success: boolean;
+  data?: {
+    markdown?: string;
+    html?: string;
+    rawHtml?: string;
+    screenshot?: string;
+    links?: string[];
+    metadata?: ScrapeMetadata;
+  };
+  error?: string;
+}
+export interface FirecrawlScraperConfig {
+  apiKey?: string;
+  apiUrl?: string;
+  formats?: string[];
+  timeout?: number;
+}
+const getDomainName = (
+  link: string,
+  metadata?: ScrapeMetadata
+): string | undefined => {
+  try {
+    const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
+    const domain = new URL(url).hostname.replace(/^www\./, '');
+    if (domain) {
+      return domain;
+    }
+  } catch (e) {
+    // URL parsing failed
+    console.error('Error parsing URL:', e);
+  }
+  return;
+};
+export function getAttribution(
+  link: string,
+  metadata?: ScrapeMetadata
+): string | undefined {
+  if (!metadata) return getDomainName(link, metadata);
+  const possibleAttributions = [
+    metadata.ogSiteName,
+    metadata['og:site_name'],
+    metadata.title?.split('|').pop()?.trim(),
+    metadata['twitter:site']?.replace(/^@/, ''),
+  ];
+  const attribution = possibleAttributions.find(
+    (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''
+  );
+  if (attribution != null) {
+    return attribution;
+  }
+  return getDomainName(link, metadata);
+}
+/**
+ * Firecrawl scraper implementation
+ * Uses the Firecrawl API to scrape web pages
+ */
+export class FirecrawlScraper {
+  private apiKey: string;
+  private apiUrl: string;
+  private defaultFormats: string[];
+  private timeout: number;
+  constructor(config: FirecrawlScraperConfig = {}) {
+    this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
+    const baseUrl =
+      config.apiUrl ??
+      process.env.FIRECRAWL_BASE_URL ??
+      'https://api.firecrawl.dev';
+    this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
+    this.defaultFormats = config.formats ?? ['markdown', 'html'];
+    this.timeout = config.timeout ?? 15000;
+    if (!this.apiKey) {
+      console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
+    }
+    console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
+  }
+  /**
+   * Scrape a single URL
+   * @param url URL to scrape
+   * @param options Scrape options
+   * @returns Scrape response
+   */
+  async scrapeUrl(
+    url: string,
+    options: FirecrawlScrapeOptions = {}
+  ): Promise<[string, FirecrawlScrapeResponse]> {
+    if (!this.apiKey) {
+      return [
+        url,
+        {
+          success: false,
+          error: 'FIRECRAWL_API_KEY is not set',
+        },
+      ];
+    }
+    try {
+      const response = await axios.post(
+        this.apiUrl,
+        {
+          url,
+          formats: options.formats || this.defaultFormats,
+          includeTags: options.includeTags,
+          excludeTags: options.excludeTags,
+          headers: options.headers,
+          waitFor: options.waitFor,
+          timeout: options.timeout ?? this.timeout,
+        },
+        {
+          headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${this.apiKey}`,
+          },
+          timeout: this.timeout,
+        }
+      );
+      return [url, response.data];
+    } catch (error) {
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      return [
+        url,
+        {
+          success: false,
+          error: `Firecrawl API request failed: ${errorMessage}`,
+        },
+      ];
+    }
+  }
+  /**
+   * Extract content from scrape response
+   * @param response Scrape response
+   * @returns Extracted content or empty string if not available
+   */
+  extractContent(response: FirecrawlScrapeResponse): string {
+    if (!response.success || !response.data) {
+      return '';
+    }
+    // Prefer markdown content if available
+    if (response.data.markdown != null) {
+      return response.data.markdown;
+    }
+    // Fall back to HTML content
+    if (response.data.html != null) {
+      return response.data.html;
+    }
+    // Fall back to raw HTML content
+    if (response.data.rawHtml != null) {
+      return response.data.rawHtml;
+    }
+    return '';
+  }
+  /**
+   * Extract metadata from scrape response
+   * @param response Scrape response
+   * @returns Metadata object
+   */
+  extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {
+    if (!response.success || !response.data || !response.data.metadata) {
+      return {};
+    }
+    return response.data.metadata;
+  }
+}
+/**
+ * Create a Firecrawl scraper instance
+ * @param config Scraper configuration
+ * @returns Firecrawl scraper instance
+ */
+export const createFirecrawlScraper = (
+  config: FirecrawlScraperConfig = {}
+): FirecrawlScraper => {
+  return new FirecrawlScraper(config);
+};

package/src/tools/search/format.ts ADDED Viewed

@@ -0,0 +1,121 @@
+import type * as t from './types';
+export function formatResultsForLLM(results: t.SearchResultData): string {
+  let output = '';
+  const addSection = (title: string): void => {
+    output += `\n=== ${title} ===\n`;
+  };
+  // Organic (web) results
+  const organic = results.organic ?? [];
+  if (organic.length) {
+    addSection('Web Results');
+    organic.forEach((r, i) => {
+      output += [
+        `Source ${i}: ${r.title ?? '(no title)'}`,
+        `Citation Anchor: \\ue202turn0search${i}`,
+        `URL: ${r.link}`,
+        r.snippet != null ? `Summary: ${r.snippet}` : '',
+        r.date != null ? `Date: ${r.date}` : '',
+        r.attribution != null ? `Source: ${r.attribution}` : '',
+        '',
+        '--- Content Highlights ---',
+        ...(r.highlights ?? [])
+          .filter((h) => h.text.trim().length > 0)
+          .map((h) => `[Relevance: ${h.score.toFixed(2)}]\n${h.text.trim()}`),
+        '',
+      ]
+        .filter(Boolean)
+        .join('\n');
+    });
+  }
+  // Ignoring these sections for now
+  // // Top stories (news)
+  // const topStores = results.topStories ?? [];
+  // if (topStores.length) {
+  //   addSection('News Results');
+  //   topStores.forEach((r, i) => {
+  //     output += [
+  //       `Anchor: \ue202turn0news${i}`,
+  //       `Title: ${r.title ?? '(no title)'}`,
+  //       `URL: ${r.link}`,
+  //       r.snippet != null ? `Snippet: ${r.snippet}` : '',
+  //       r.date != null ? `Date: ${r.date}` : '',
+  //       r.attribution != null ? `Source: ${r.attribution}` : '',
+  //       ''
+  //     ].filter(Boolean).join('\n');
+  //   });
+  // }
+  // // Images
+  // const images = results.images ?? [];
+  // if (images.length) {
+  //   addSection('Image Results');
+  //   images.forEach((img, i) => {
+  //     output += [
+  //       `Anchor: \ue202turn0image${i}`,
+  //       `Title: ${img.title ?? '(no title)'}`,
+  //       `Image URL: ${img.imageUrl}`,
+  //       ''
+  //     ].join('\n');
+  //   });
+  // }
+  // Knowledge Graph
+  if (results.knowledgeGraph != null) {
+    addSection('Knowledge Graph');
+    output += [
+      `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,
+      results.knowledgeGraph.description != null
+        ? `Description: ${results.knowledgeGraph.description}`
+        : '',
+      results.knowledgeGraph.type != null
+        ? `Type: ${results.knowledgeGraph.type}`
+        : '',
+      results.knowledgeGraph.imageUrl != null
+        ? `Image URL: ${results.knowledgeGraph.imageUrl}`
+        : '',
+      results.knowledgeGraph.attributes != null
+        ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`
+        : '',
+      '',
+    ]
+      .filter(Boolean)
+      .join('\n');
+  }
+  // Answer Box
+  if (results.answerBox != null) {
+    addSection('Answer Box');
+    output += [
+      results.answerBox.title != null
+        ? `Title: ${results.answerBox.title}`
+        : '',
+      results.answerBox.answer != null
+        ? `Answer: ${results.answerBox.answer}`
+        : '',
+      results.answerBox.snippet != null
+        ? `Snippet: ${results.answerBox.snippet}`
+        : '',
+      results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
+      '',
+    ]
+      .filter(Boolean)
+      .join('\n');
+  }
+  // People also ask
+  const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
+  if (peopleAlsoAsk.length) {
+    addSection('People Also Ask');
+    peopleAlsoAsk.forEach((p, _i) => {
+      output += [`Q: ${p.question}`, `A: ${p.answer}`, '']
+        .filter(Boolean)
+        .join('\n');
+    });
+  }
+  return output.trim();
+}

package/src/tools/search/highlights.ts ADDED Viewed

@@ -0,0 +1,238 @@
+import type * as t from './types';
+// 2. Pre-compile all regular expressions (only do this once)
+// Group patterns by priority for early returns
+const priorityPatterns = [
+  // High priority patterns (structural)
+  [
+    { regex: /\n\n/g }, // Double newline (paragraph break)
+    { regex: /\n/g }, // Single newline
+    { regex: /={3,}\s*\n|-{3,}\s*\n/g }, // Section separators
+  ],
+  // Medium priority (semantic)
+  [
+    { regex: /[.!?][")\]]?\s/g }, // End of sentence
+    { regex: /;\s/g }, // Semicolon
+    { regex: /:\s/g }, // Colon
+  ],
+  // Low priority (any breaks)
+  [
+    { regex: /,\s/g }, // Comma
+    { regex: /\s-\s/g }, // Dash surrounded by spaces
+    { regex: /\s/g }, // Any space
+  ],
+];
+function findFirstMatch(text: string, regex: RegExp): number {
+  // Reset regex
+  regex.lastIndex = 0;
+  // For very long texts, try chunking
+  if (text.length > 10000) {
+    const chunkSize = 2000;
+    let position = 0;
+    while (position < text.length) {
+      const chunk = text.substring(position, position + chunkSize);
+      regex.lastIndex = 0;
+      const match = regex.exec(chunk);
+      if (match) {
+        return position + match.index;
+      }
+      // Move to next chunk with some overlap
+      position += chunkSize - 100;
+      if (position >= text.length) break;
+    }
+    return -1;
+  }
+  // For shorter texts, normal regex search
+  const match = regex.exec(text);
+  return match ? match.index : -1;
+}
+// 3. Optimized boundary finding functions
+function findLastMatch(text: string, regex: RegExp): number {
+  // Reset regex state
+  regex.lastIndex = 0;
+  let lastIndex = -1;
+  let lastLength = 0;
+  let match;
+  // For very long texts, use a different approach to avoid regex engine slowdowns
+  if (text.length > 10000) {
+    // Try dividing the text into chunks for faster processing
+    const chunkSize = 2000;
+    let startPosition = Math.max(0, text.length - chunkSize);
+    while (startPosition >= 0) {
+      const chunk = text.substring(startPosition, startPosition + chunkSize);
+      regex.lastIndex = 0;
+      let chunkLastIndex = -1;
+      let chunkLastLength = 0;
+      while ((match = regex.exec(chunk)) !== null) {
+        chunkLastIndex = match.index;
+        chunkLastLength = match[0].length;
+      }
+      if (chunkLastIndex !== -1) {
+        return startPosition + chunkLastIndex + chunkLastLength;
+      }
+      // Move to previous chunk with some overlap
+      startPosition = Math.max(0, startPosition - chunkSize + 100) - 1;
+      if (startPosition <= 0) break;
+    }
+    return -1;
+  }
+  // For shorter texts, normal regex search
+  while ((match = regex.exec(text)) !== null) {
+    lastIndex = match.index;
+    lastLength = match[0].length;
+  }
+  return lastIndex === -1 ? -1 : lastIndex + lastLength;
+}
+// 4. Find the best boundary with priority groups
+function findBestBoundary(text: string, direction = 'backward'): number {
+  if (!text || text.length === 0) return 0;
+  // Try each priority group
+  for (const patternGroup of priorityPatterns) {
+    for (const pattern of patternGroup) {
+      const position =
+        direction === 'backward'
+          ? findLastMatch(text, pattern.regex)
+          : findFirstMatch(text, pattern.regex);
+      if (position !== -1) {
+        return position;
+      }
+    }
+  }
+  // No match found, use character boundary
+  return direction === 'backward' ? text.length : 0;
+}
+/**
+ * Expand highlights in search results using smart boundary detection.
+ *
+ * This implementation finds natural text boundaries like paragraphs, sentences,
+ * and phrases to provide context while maintaining readability.
+ *
+ * @param searchResults - Search results object
+ * @param mainExpandBy - Primary expansion size on each side (default: 300)
+ * @param separatorExpandBy - Additional range to look for separators (default: 150)
+ * @returns Copy of search results with expanded highlights
+ */
+export function expandHighlights(
+  searchResults: t.SearchResultData,
+  mainExpandBy = 300,
+  separatorExpandBy = 150
+): t.SearchResultData {
+  // 1. Avoid full deep copy - only copy what we modify
+  const resultCopy = { ...searchResults };
+  // Only deep copy the relevant arrays
+  if (resultCopy.organic) {
+    resultCopy.organic = [...resultCopy.organic];
+  }
+  if (resultCopy.topStories) {
+    resultCopy.topStories = [...resultCopy.topStories];
+  }
+  // 5. Process the results efficiently
+  const processResultTypes = ['organic', 'topStories'] as const;
+  for (const resultType of processResultTypes) {
+    if (!resultCopy[resultType as 'organic' | 'topStories']) continue;
+    // Map results to new array with modified highlights
+    resultCopy[resultType] = resultCopy[resultType]?.map((result) => {
+      if (
+        result.content == null ||
+        result.content === '' ||
+        !result.highlights ||
+        result.highlights.length === 0
+      ) {
+        return result; // No modification needed
+      }
+      // Create a shallow copy with expanded highlights
+      const resultCopy = { ...result };
+      const content = result.content;
+      const highlights = [];
+      // Process each highlight
+      for (const highlight of result.highlights) {
+        const highlightText = highlight.text;
+        let startPos = content.indexOf(highlightText);
+        let highlightLen = highlightText.length;
+        if (startPos === -1) {
+          // Try with stripped whitespace
+          const strippedHighlight = highlightText.trim();
+          startPos = content.indexOf(strippedHighlight);
+          if (startPos === -1) {
+            highlights.push({
+              text: highlight.text,
+              score: highlight.score,
+            });
+            continue;
+          }
+          highlightLen = strippedHighlight.length;
+        }
+        // Calculate boundaries
+        const mainStart = Math.max(0, startPos - mainExpandBy);
+        const mainEnd = Math.min(
+          content.length,
+          startPos + highlightLen + mainExpandBy
+        );
+        const separatorStart = Math.max(0, mainStart - separatorExpandBy);
+        const separatorEnd = Math.min(
+          content.length,
+          mainEnd + separatorExpandBy
+        );
+        // Extract text segments
+        const headText = content.substring(separatorStart, mainStart);
+        const tailText = content.substring(mainEnd, separatorEnd);
+        // Find natural boundaries
+        const bestHeadBoundary = findBestBoundary(headText, 'backward');
+        const bestTailBoundary = findBestBoundary(tailText, 'forward');
+        // Calculate final positions
+        const finalStart = separatorStart + bestHeadBoundary;
+        const finalEnd = mainEnd + bestTailBoundary;
+        // Extract the expanded highlight
+        const expandedHighlightText = content
+          .substring(finalStart, finalEnd)
+          .trim();
+        highlights.push({
+          text: expandedHighlightText,
+          score: highlight.score,
+        });
+      }
+      delete resultCopy.content;
+      resultCopy.highlights = highlights;
+      return resultCopy;
+    });
+  }
+  return resultCopy;
+}

package/src/tools/search/index.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export * from './tool';
2	+ export type * from './types';