@librechat/agents 2.4.22 → 2.4.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/cjs/common/enum.cjs +1 -0
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/llm/anthropic/index.cjs +1 -1
  4. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/types.cjs +50 -0
  6. package/dist/cjs/llm/anthropic/types.cjs.map +1 -0
  7. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +227 -21
  8. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +1 -0
  10. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  11. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  12. package/dist/cjs/main.cjs +2 -0
  13. package/dist/cjs/main.cjs.map +1 -1
  14. package/dist/cjs/run.cjs.map +1 -1
  15. package/dist/cjs/tools/search/firecrawl.cjs +149 -0
  16. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -0
  17. package/dist/cjs/tools/search/format.cjs +116 -0
  18. package/dist/cjs/tools/search/format.cjs.map +1 -0
  19. package/dist/cjs/tools/search/highlights.cjs +193 -0
  20. package/dist/cjs/tools/search/highlights.cjs.map +1 -0
  21. package/dist/cjs/tools/search/rerankers.cjs +187 -0
  22. package/dist/cjs/tools/search/rerankers.cjs.map +1 -0
  23. package/dist/cjs/tools/search/search.cjs +410 -0
  24. package/dist/cjs/tools/search/search.cjs.map +1 -0
  25. package/dist/cjs/tools/search/tool.cjs +103 -0
  26. package/dist/cjs/tools/search/tool.cjs.map +1 -0
  27. package/dist/esm/common/enum.mjs +1 -0
  28. package/dist/esm/common/enum.mjs.map +1 -1
  29. package/dist/esm/llm/anthropic/index.mjs +1 -1
  30. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  31. package/dist/esm/llm/anthropic/types.mjs +48 -0
  32. package/dist/esm/llm/anthropic/types.mjs.map +1 -0
  33. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +228 -22
  34. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  35. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +1 -0
  36. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  37. package/dist/esm/llm/openai/index.mjs.map +1 -1
  38. package/dist/esm/main.mjs +1 -0
  39. package/dist/esm/main.mjs.map +1 -1
  40. package/dist/esm/run.mjs.map +1 -1
  41. package/dist/esm/tools/search/firecrawl.mjs +145 -0
  42. package/dist/esm/tools/search/firecrawl.mjs.map +1 -0
  43. package/dist/esm/tools/search/format.mjs +114 -0
  44. package/dist/esm/tools/search/format.mjs.map +1 -0
  45. package/dist/esm/tools/search/highlights.mjs +191 -0
  46. package/dist/esm/tools/search/highlights.mjs.map +1 -0
  47. package/dist/esm/tools/search/rerankers.mjs +181 -0
  48. package/dist/esm/tools/search/rerankers.mjs.map +1 -0
  49. package/dist/esm/tools/search/search.mjs +407 -0
  50. package/dist/esm/tools/search/search.mjs.map +1 -0
  51. package/dist/esm/tools/search/tool.mjs +101 -0
  52. package/dist/esm/tools/search/tool.mjs.map +1 -0
  53. package/dist/types/common/enum.d.ts +1 -0
  54. package/dist/types/index.d.ts +1 -0
  55. package/dist/types/llm/anthropic/index.d.ts +3 -4
  56. package/dist/types/llm/anthropic/types.d.ts +4 -35
  57. package/dist/types/llm/anthropic/utils/message_inputs.d.ts +2 -2
  58. package/dist/types/llm/anthropic/utils/message_outputs.d.ts +1 -3
  59. package/dist/types/llm/anthropic/utils/output_parsers.d.ts +22 -0
  60. package/dist/types/llm/openai/index.d.ts +3 -2
  61. package/dist/types/scripts/search.d.ts +1 -0
  62. package/dist/types/tools/example.d.ts +21 -3
  63. package/dist/types/tools/search/firecrawl.d.ts +117 -0
  64. package/dist/types/tools/search/format.d.ts +2 -0
  65. package/dist/types/tools/search/highlights.d.ts +13 -0
  66. package/dist/types/tools/search/index.d.ts +2 -0
  67. package/dist/types/tools/search/rerankers.d.ts +32 -0
  68. package/dist/types/tools/search/search.d.ts +9 -0
  69. package/dist/types/tools/search/tool.d.ts +12 -0
  70. package/dist/types/tools/search/types.d.ts +150 -0
  71. package/package.json +10 -9
  72. package/src/common/enum.ts +1 -0
  73. package/src/index.ts +1 -0
  74. package/src/llm/anthropic/index.ts +6 -5
  75. package/src/llm/anthropic/llm.spec.ts +176 -179
  76. package/src/llm/anthropic/types.ts +64 -39
  77. package/src/llm/anthropic/utils/message_inputs.ts +275 -37
  78. package/src/llm/anthropic/utils/message_outputs.ts +4 -21
  79. package/src/llm/anthropic/utils/output_parsers.ts +114 -0
  80. package/src/llm/openai/index.ts +7 -6
  81. package/src/run.ts +1 -1
  82. package/src/scripts/search.ts +141 -0
  83. package/src/tools/search/firecrawl.ts +270 -0
  84. package/src/tools/search/format.ts +121 -0
  85. package/src/tools/search/highlights.ts +237 -0
  86. package/src/tools/search/index.ts +2 -0
  87. package/src/tools/search/rerankers.ts +248 -0
  88. package/src/tools/search/search.ts +567 -0
  89. package/src/tools/search/tool.ts +151 -0
  90. package/src/tools/search/types.ts +179 -0
@@ -0,0 +1,114 @@
1
+ /* eslint-disable @typescript-eslint/explicit-function-return-type */
2
+ /* eslint-disable @typescript-eslint/no-empty-object-type */
3
+ import { z } from 'zod';
4
+ import {
5
+ BaseLLMOutputParser,
6
+ OutputParserException,
7
+ } from '@langchain/core/output_parsers';
8
+ import { JsonOutputKeyToolsParserParams } from '@langchain/core/output_parsers/openai_tools';
9
+ import { ChatGeneration } from '@langchain/core/outputs';
10
+ import { ToolCall } from '@langchain/core/messages/tool';
11
+
12
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
13
+ interface AnthropicToolsOutputParserParams<T extends Record<string, any>>
14
+ extends JsonOutputKeyToolsParserParams<T> {}
15
+
16
+ export class AnthropicToolsOutputParser<
17
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
18
+ T extends Record<string, any> = Record<string, any>,
19
+ > extends BaseLLMOutputParser<T> {
20
+ static lc_name() {
21
+ return 'AnthropicToolsOutputParser';
22
+ }
23
+
24
+ lc_namespace = ['langchain', 'anthropic', 'output_parsers'];
25
+
26
+ returnId = false;
27
+
28
+ /** The type of tool calls to return. */
29
+ keyName: string;
30
+
31
+ /** Whether to return only the first tool call. */
32
+ returnSingle = false;
33
+
34
+ zodSchema?: z.ZodType<T>;
35
+
36
+ constructor(params: AnthropicToolsOutputParserParams<T>) {
37
+ super(params);
38
+ this.keyName = params.keyName;
39
+ this.returnSingle = params.returnSingle ?? this.returnSingle;
40
+ this.zodSchema = params.zodSchema;
41
+ }
42
+
43
+ protected async _validateResult(result: unknown): Promise<T> {
44
+ let parsedResult = result;
45
+ if (typeof result === 'string') {
46
+ try {
47
+ parsedResult = JSON.parse(result);
48
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
49
+ } catch (e: any) {
50
+ throw new OutputParserException(
51
+ `Failed to parse. Text: "${JSON.stringify(
52
+ result,
53
+ null,
54
+ 2
55
+ )}". Error: ${JSON.stringify(e.message)}`,
56
+ result
57
+ );
58
+ }
59
+ } else {
60
+ parsedResult = result;
61
+ }
62
+ if (this.zodSchema === undefined) {
63
+ return parsedResult as T;
64
+ }
65
+ const zodParsedResult = await this.zodSchema.safeParseAsync(parsedResult);
66
+ if (zodParsedResult.success) {
67
+ return zodParsedResult.data;
68
+ } else {
69
+ throw new OutputParserException(
70
+ `Failed to parse. Text: "${JSON.stringify(
71
+ result,
72
+ null,
73
+ 2
74
+ )}". Error: ${JSON.stringify(zodParsedResult.error.errors)}`,
75
+ JSON.stringify(parsedResult, null, 2)
76
+ );
77
+ }
78
+ }
79
+
80
+ async parseResult(generations: ChatGeneration[]): Promise<T> {
81
+ const tools = generations.flatMap((generation) => {
82
+ const { message } = generation;
83
+ if (!Array.isArray(message.content)) {
84
+ return [];
85
+ }
86
+ const tool = extractToolCalls(message.content)[0];
87
+ return tool;
88
+ });
89
+ if (tools[0] === undefined) {
90
+ throw new Error(
91
+ 'No parseable tool calls provided to AnthropicToolsOutputParser.'
92
+ );
93
+ }
94
+ const [tool] = tools;
95
+ const validatedResult = await this._validateResult(tool.args);
96
+ return validatedResult;
97
+ }
98
+ }
99
+
100
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
101
+ export function extractToolCalls(content: Record<string, any>[]) {
102
+ const toolCalls: ToolCall[] = [];
103
+ for (const block of content) {
104
+ if (block.type === 'tool_use') {
105
+ toolCalls.push({
106
+ name: block.name,
107
+ args: block.input,
108
+ id: block.id,
109
+ type: 'tool_call',
110
+ });
111
+ }
112
+ }
113
+ return toolCalls;
114
+ }
@@ -7,6 +7,7 @@ import {
7
7
  ChatOpenAI as OriginalChatOpenAI,
8
8
  AzureChatOpenAI as OriginalAzureChatOpenAI,
9
9
  } from '@langchain/openai';
10
+ import type { OpenAICoreRequestOptions } from 'node_modules/@langchain/deepseek/node_modules/@langchain/openai';
10
11
  import type * as t from '@langchain/openai';
11
12
 
12
13
  function createAbortHandler(controller: AbortController): () => void {
@@ -191,8 +192,8 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
191
192
  return this.client;
192
193
  }
193
194
  protected _getClientOptions(
194
- options?: t.OpenAICoreRequestOptions
195
- ): t.OpenAICoreRequestOptions {
195
+ options?: OpenAICoreRequestOptions
196
+ ): OpenAICoreRequestOptions {
196
197
  if (!(this.client as OpenAIClient | undefined)) {
197
198
  const openAIEndpointConfig: t.OpenAIEndpointConfig = {
198
199
  baseURL: this.clientConfig.baseURL,
@@ -214,7 +215,7 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
214
215
  const requestOptions = {
215
216
  ...this.clientConfig,
216
217
  ...options,
217
- } as t.OpenAICoreRequestOptions;
218
+ } as OpenAICoreRequestOptions;
218
219
  return requestOptions;
219
220
  }
220
221
  }
@@ -224,8 +225,8 @@ export class ChatXAI extends OriginalChatXAI {
224
225
  return this.client;
225
226
  }
226
227
  protected _getClientOptions(
227
- options?: t.OpenAICoreRequestOptions
228
- ): t.OpenAICoreRequestOptions {
228
+ options?: OpenAICoreRequestOptions
229
+ ): OpenAICoreRequestOptions {
229
230
  if (!(this.client as OpenAIClient | undefined)) {
230
231
  const openAIEndpointConfig: t.OpenAIEndpointConfig = {
231
232
  baseURL: this.clientConfig.baseURL,
@@ -247,7 +248,7 @@ export class ChatXAI extends OriginalChatXAI {
247
248
  const requestOptions = {
248
249
  ...this.clientConfig,
249
250
  ...options,
250
- } as t.OpenAICoreRequestOptions;
251
+ } as OpenAICoreRequestOptions;
251
252
  return requestOptions;
252
253
  }
253
254
  }
package/src/run.ts CHANGED
@@ -141,7 +141,7 @@ export class Run<T extends t.BaseGraphState> {
141
141
  }
142
142
 
143
143
  const jsonSchema = zodToJsonSchema(
144
- tool.schema.describe(tool.description ?? ''),
144
+ (tool.schema as t.ZodObjectAny).describe(tool.description ?? ''),
145
145
  tool.name
146
146
  );
147
147
  return (
@@ -0,0 +1,141 @@
1
+ /* eslint-disable no-console */
2
+ // src/scripts/cli.ts
3
+ import { config } from 'dotenv';
4
+ config();
5
+ import { HumanMessage, BaseMessage } from '@langchain/core/messages';
6
+ import type * as t from '@/types';
7
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
8
+ import { ToolEndHandler, ModelEndHandler } from '@/events';
9
+ import { createSearchTool } from '@/tools/search';
10
+
11
+ import { getArgs } from '@/scripts/args';
12
+ import { Run } from '@/run';
13
+ import { GraphEvents, Callback } from '@/common';
14
+ import { getLLMConfig } from '@/utils/llmConfig';
15
+
16
+ const conversationHistory: BaseMessage[] = [];
17
+ async function testStandardStreaming(): Promise<void> {
18
+ const { userName, location, provider, currentDate } = await getArgs();
19
+ const { contentParts, aggregateContent } = createContentAggregator();
20
+ const customHandlers = {
21
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
22
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
23
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
24
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
25
+ handle: (
26
+ event: GraphEvents.ON_RUN_STEP_COMPLETED,
27
+ data: t.StreamEventData
28
+ ): void => {
29
+ console.log('====== ON_RUN_STEP_COMPLETED ======');
30
+ // console.dir(data, { depth: null });
31
+ aggregateContent({
32
+ event,
33
+ data: data as unknown as { result: t.ToolEndEvent },
34
+ });
35
+ },
36
+ },
37
+ [GraphEvents.ON_RUN_STEP]: {
38
+ handle: (
39
+ event: GraphEvents.ON_RUN_STEP,
40
+ data: t.StreamEventData
41
+ ): void => {
42
+ console.log('====== ON_RUN_STEP ======');
43
+ console.dir(data, { depth: null });
44
+ aggregateContent({ event, data: data as t.RunStep });
45
+ },
46
+ },
47
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
48
+ handle: (
49
+ event: GraphEvents.ON_RUN_STEP_DELTA,
50
+ data: t.StreamEventData
51
+ ): void => {
52
+ console.log('====== ON_RUN_STEP_DELTA ======');
53
+ console.dir(data, { depth: null });
54
+ aggregateContent({ event, data: data as t.RunStepDeltaEvent });
55
+ },
56
+ },
57
+ [GraphEvents.ON_MESSAGE_DELTA]: {
58
+ handle: (
59
+ event: GraphEvents.ON_MESSAGE_DELTA,
60
+ data: t.StreamEventData
61
+ ): void => {
62
+ console.log('====== ON_MESSAGE_DELTA ======');
63
+ console.dir(data, { depth: null });
64
+ aggregateContent({ event, data: data as t.MessageDeltaEvent });
65
+ },
66
+ },
67
+ [GraphEvents.TOOL_START]: {
68
+ handle: (
69
+ _event: string,
70
+ data: t.StreamEventData,
71
+ metadata?: Record<string, unknown>
72
+ ): void => {
73
+ console.log('====== TOOL_START ======');
74
+ // console.dir(data, { depth: null });
75
+ },
76
+ },
77
+ };
78
+
79
+ const llmConfig = getLLMConfig(provider);
80
+
81
+ const run = await Run.create<t.IState>({
82
+ runId: 'test-run-id',
83
+ graphConfig: {
84
+ type: 'standard',
85
+ llmConfig,
86
+ tools: [createSearchTool()],
87
+ instructions:
88
+ 'You are a friendly AI assistant. Always address the user by their name.',
89
+ additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
90
+ },
91
+ returnContent: true,
92
+ customHandlers,
93
+ });
94
+
95
+ const config = {
96
+ configurable: {
97
+ provider,
98
+ thread_id: 'conversation-num-1',
99
+ },
100
+ streamMode: 'values',
101
+ version: 'v2' as const,
102
+ };
103
+
104
+ console.log('Test 1: Weather query (content parts test)');
105
+
106
+ // const userMessage = `
107
+ // Make a search for the weather in ${location} today, which is ${currentDate}.
108
+ // Before making the search, please let me know what you're about to do, then immediately start searching without hesitation.
109
+ // Make sure to always refer to me by name, which is ${userName}.
110
+ // After giving me a thorough summary, tell me a joke about the weather forecast we went over.
111
+ // `;
112
+ const userMessage = 'Are massage guns good?';
113
+
114
+ conversationHistory.push(new HumanMessage(userMessage));
115
+
116
+ const inputs = {
117
+ messages: conversationHistory,
118
+ };
119
+ const finalContentParts = await run.processStream(inputs, config);
120
+ const finalMessages = run.getRunMessages();
121
+ if (finalMessages) {
122
+ conversationHistory.push(...finalMessages);
123
+ console.dir(conversationHistory, { depth: null });
124
+ }
125
+ // console.dir(finalContentParts, { depth: null });
126
+ console.log('\n\n====================\n\n');
127
+ // console.dir(contentParts, { depth: null });
128
+ }
129
+
130
+ process.on('unhandledRejection', (reason, promise) => {
131
+ console.error('Unhandled Rejection at:', promise, 'reason:', reason);
132
+ console.log('Conversation history:');
133
+ process.exit(1);
134
+ });
135
+
136
+ testStandardStreaming().catch((err) => {
137
+ console.error(err);
138
+ console.log('Conversation history:');
139
+ console.dir(conversationHistory, { depth: null });
140
+ process.exit(1);
141
+ });
@@ -0,0 +1,270 @@
1
+ /* eslint-disable no-console */
2
+ import axios from 'axios';
3
+
4
+ export interface FirecrawlScrapeOptions {
5
+ formats?: string[];
6
+ includeTags?: string[];
7
+ excludeTags?: string[];
8
+ headers?: Record<string, string>;
9
+ waitFor?: number;
10
+ timeout?: number;
11
+ }
12
+
13
+ interface ScrapeMetadata {
14
+ // Core source information
15
+ sourceURL?: string;
16
+ url?: string;
17
+ scrapeId?: string;
18
+ statusCode?: number;
19
+ // Basic metadata
20
+ title?: string;
21
+ description?: string;
22
+ language?: string;
23
+ favicon?: string;
24
+ viewport?: string;
25
+ robots?: string;
26
+ 'theme-color'?: string;
27
+ // Open Graph metadata
28
+ 'og:url'?: string;
29
+ 'og:title'?: string;
30
+ 'og:description'?: string;
31
+ 'og:type'?: string;
32
+ 'og:image'?: string;
33
+ 'og:image:width'?: string;
34
+ 'og:image:height'?: string;
35
+ 'og:site_name'?: string;
36
+ ogUrl?: string;
37
+ ogTitle?: string;
38
+ ogDescription?: string;
39
+ ogImage?: string;
40
+ ogSiteName?: string;
41
+ // Article metadata
42
+ 'article:author'?: string;
43
+ 'article:published_time'?: string;
44
+ 'article:modified_time'?: string;
45
+ 'article:section'?: string;
46
+ 'article:tag'?: string;
47
+ 'article:publisher'?: string;
48
+ publishedTime?: string;
49
+ modifiedTime?: string;
50
+ // Twitter metadata
51
+ 'twitter:site'?: string;
52
+ 'twitter:creator'?: string;
53
+ 'twitter:card'?: string;
54
+ 'twitter:image'?: string;
55
+ 'twitter:dnt'?: string;
56
+ 'twitter:app:name:iphone'?: string;
57
+ 'twitter:app:id:iphone'?: string;
58
+ 'twitter:app:url:iphone'?: string;
59
+ 'twitter:app:name:ipad'?: string;
60
+ 'twitter:app:id:ipad'?: string;
61
+ 'twitter:app:url:ipad'?: string;
62
+ 'twitter:app:name:googleplay'?: string;
63
+ 'twitter:app:id:googleplay'?: string;
64
+ 'twitter:app:url:googleplay'?: string;
65
+ // Facebook metadata
66
+ 'fb:app_id'?: string;
67
+ // App links
68
+ 'al:ios:url'?: string;
69
+ 'al:ios:app_name'?: string;
70
+ 'al:ios:app_store_id'?: string;
71
+ // Allow for additional properties that might be present
72
+ [key: string]: string | number | boolean | null | undefined;
73
+ }
74
+
75
+ export interface FirecrawlScrapeResponse {
76
+ success: boolean;
77
+ data?: {
78
+ markdown?: string;
79
+ html?: string;
80
+ rawHtml?: string;
81
+ screenshot?: string;
82
+ links?: string[];
83
+ metadata?: ScrapeMetadata;
84
+ };
85
+ error?: string;
86
+ }
87
+
88
+ export interface FirecrawlScraperConfig {
89
+ apiKey?: string;
90
+ apiUrl?: string;
91
+ formats?: string[];
92
+ timeout?: number;
93
+ }
94
+ const getDomainName = (
95
+ link: string,
96
+ metadata?: ScrapeMetadata
97
+ ): string | undefined => {
98
+ try {
99
+ const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
100
+ const domain = new URL(url).hostname.replace(/^www\./, '');
101
+ if (domain) {
102
+ return domain;
103
+ }
104
+ } catch (e) {
105
+ // URL parsing failed
106
+ console.error('Error parsing URL:', e);
107
+ }
108
+
109
+ return;
110
+ };
111
+
112
+ export function getAttribution(
113
+ link: string,
114
+ metadata?: ScrapeMetadata
115
+ ): string | undefined {
116
+ if (!metadata) return getDomainName(link, metadata);
117
+
118
+ const possibleAttributions = [
119
+ metadata.ogSiteName,
120
+ metadata['og:site_name'],
121
+ metadata.title?.split('|').pop()?.trim(),
122
+ metadata['twitter:site']?.replace(/^@/, ''),
123
+ ];
124
+
125
+ const attribution = possibleAttributions.find(
126
+ (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''
127
+ );
128
+ if (attribution != null) {
129
+ return attribution;
130
+ }
131
+
132
+ return getDomainName(link, metadata);
133
+ }
134
+
135
+ /**
136
+ * Firecrawl scraper implementation
137
+ * Uses the Firecrawl API to scrape web pages
138
+ */
139
+ export class FirecrawlScraper {
140
+ private apiKey: string;
141
+ private apiUrl: string;
142
+ private defaultFormats: string[];
143
+ private timeout: number;
144
+
145
+ constructor(config: FirecrawlScraperConfig = {}) {
146
+ this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
147
+
148
+ const baseUrl =
149
+ config.apiUrl ??
150
+ process.env.FIRECRAWL_BASE_URL ??
151
+ 'https://api.firecrawl.dev';
152
+ this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
153
+
154
+ this.defaultFormats = config.formats ?? ['markdown', 'html'];
155
+ this.timeout = config.timeout ?? 30000;
156
+
157
+ if (!this.apiKey) {
158
+ console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
159
+ }
160
+
161
+ console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
162
+ }
163
+
164
+ /**
165
+ * Scrape a single URL
166
+ * @param url URL to scrape
167
+ * @param options Scrape options
168
+ * @returns Scrape response
169
+ */
170
+ async scrapeUrl(
171
+ url: string,
172
+ options: FirecrawlScrapeOptions = {}
173
+ ): Promise<[string, FirecrawlScrapeResponse]> {
174
+ if (!this.apiKey) {
175
+ return [
176
+ url,
177
+ {
178
+ success: false,
179
+ error: 'FIRECRAWL_API_KEY is not set',
180
+ },
181
+ ];
182
+ }
183
+
184
+ try {
185
+ const response = await axios.post(
186
+ this.apiUrl,
187
+ {
188
+ url,
189
+ formats: options.formats || this.defaultFormats,
190
+ includeTags: options.includeTags,
191
+ excludeTags: options.excludeTags,
192
+ headers: options.headers,
193
+ waitFor: options.waitFor,
194
+ timeout: options.timeout ?? this.timeout,
195
+ },
196
+ {
197
+ headers: {
198
+ 'Content-Type': 'application/json',
199
+ Authorization: `Bearer ${this.apiKey}`,
200
+ },
201
+ timeout: this.timeout,
202
+ }
203
+ );
204
+
205
+ return [url, response.data];
206
+ } catch (error) {
207
+ const errorMessage =
208
+ error instanceof Error ? error.message : String(error);
209
+ return [
210
+ url,
211
+ {
212
+ success: false,
213
+ error: `Firecrawl API request failed: ${errorMessage}`,
214
+ },
215
+ ];
216
+ }
217
+ }
218
+
219
+ /**
220
+ * Extract content from scrape response
221
+ * @param response Scrape response
222
+ * @returns Extracted content or empty string if not available
223
+ */
224
+ extractContent(response: FirecrawlScrapeResponse): string {
225
+ if (!response.success || !response.data) {
226
+ return '';
227
+ }
228
+
229
+ // Prefer markdown content if available
230
+ if (response.data.markdown != null) {
231
+ return response.data.markdown;
232
+ }
233
+
234
+ // Fall back to HTML content
235
+ if (response.data.html != null) {
236
+ return response.data.html;
237
+ }
238
+
239
+ // Fall back to raw HTML content
240
+ if (response.data.rawHtml != null) {
241
+ return response.data.rawHtml;
242
+ }
243
+
244
+ return '';
245
+ }
246
+
247
+ /**
248
+ * Extract metadata from scrape response
249
+ * @param response Scrape response
250
+ * @returns Metadata object
251
+ */
252
+ extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {
253
+ if (!response.success || !response.data || !response.data.metadata) {
254
+ return {};
255
+ }
256
+
257
+ return response.data.metadata;
258
+ }
259
+ }
260
+
261
+ /**
262
+ * Create a Firecrawl scraper instance
263
+ * @param config Scraper configuration
264
+ * @returns Firecrawl scraper instance
265
+ */
266
+ export const createFirecrawlScraper = (
267
+ config: FirecrawlScraperConfig = {}
268
+ ): FirecrawlScraper => {
269
+ return new FirecrawlScraper(config);
270
+ };
@@ -0,0 +1,121 @@
1
+ import type * as t from './types';
2
+
3
+ export function formatResultsForLLM(results: t.SearchResultData): string {
4
+ let output = '';
5
+
6
+ const addSection = (title: string): void => {
7
+ output += `\n=== ${title} ===\n`;
8
+ };
9
+
10
+ // Organic (web) results
11
+ const organic = results.organic ?? [];
12
+ if (organic.length) {
13
+ addSection('Web Results');
14
+ organic.forEach((r, i) => {
15
+ output += [
16
+ `Source ${i + 1}: ${r.title ?? '(no title)'}`,
17
+ `Citation Anchor: \\ue202turn0search${i + 1}`,
18
+ `URL: ${r.link}`,
19
+ r.snippet != null ? `Summary: ${r.snippet}` : '',
20
+ r.date != null ? `Date: ${r.date}` : '',
21
+ r.attribution != null ? `Source: ${r.attribution}` : '',
22
+ '',
23
+ '--- Content Highlights ---',
24
+ ...(r.highlights ?? [])
25
+ .filter((h) => h.text.trim().length > 0)
26
+ .map((h) => `[Relevance: ${h.score.toFixed(2)}]\n${h.text.trim()}`),
27
+ '',
28
+ ]
29
+ .filter(Boolean)
30
+ .join('\n');
31
+ });
32
+ }
33
+
34
+ // Ignoring these sections for now
35
+ // // Top stories (news)
36
+ // const topStores = results.topStories ?? [];
37
+ // if (topStores.length) {
38
+ // addSection('News Results');
39
+ // topStores.forEach((r, i) => {
40
+ // output += [
41
+ // `Anchor: \ue202turn0news${i + 1}`,
42
+ // `Title: ${r.title ?? '(no title)'}`,
43
+ // `URL: ${r.link}`,
44
+ // r.snippet != null ? `Snippet: ${r.snippet}` : '',
45
+ // r.date != null ? `Date: ${r.date}` : '',
46
+ // r.attribution != null ? `Source: ${r.attribution}` : '',
47
+ // ''
48
+ // ].filter(Boolean).join('\n');
49
+ // });
50
+ // }
51
+
52
+ // // Images
53
+ // const images = results.images ?? [];
54
+ // if (images.length) {
55
+ // addSection('Image Results');
56
+ // images.forEach((img, i) => {
57
+ // output += [
58
+ // `Anchor: \ue202turn0image${i + 1}`,
59
+ // `Title: ${img.title ?? '(no title)'}`,
60
+ // `Image URL: ${img.imageUrl}`,
61
+ // ''
62
+ // ].join('\n');
63
+ // });
64
+ // }
65
+
66
+ // Knowledge Graph
67
+ if (results.knowledgeGraph != null) {
68
+ addSection('Knowledge Graph');
69
+ output += [
70
+ `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,
71
+ results.knowledgeGraph.description != null
72
+ ? `Description: ${results.knowledgeGraph.description}`
73
+ : '',
74
+ results.knowledgeGraph.type != null
75
+ ? `Type: ${results.knowledgeGraph.type}`
76
+ : '',
77
+ results.knowledgeGraph.imageUrl != null
78
+ ? `Image URL: ${results.knowledgeGraph.imageUrl}`
79
+ : '',
80
+ results.knowledgeGraph.attributes != null
81
+ ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`
82
+ : '',
83
+ '',
84
+ ]
85
+ .filter(Boolean)
86
+ .join('\n');
87
+ }
88
+
89
+ // Answer Box
90
+ if (results.answerBox != null) {
91
+ addSection('Answer Box');
92
+ output += [
93
+ results.answerBox.title != null
94
+ ? `Title: ${results.answerBox.title}`
95
+ : '',
96
+ results.answerBox.answer != null
97
+ ? `Answer: ${results.answerBox.answer}`
98
+ : '',
99
+ results.answerBox.snippet != null
100
+ ? `Snippet: ${results.answerBox.snippet}`
101
+ : '',
102
+ results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
103
+ '',
104
+ ]
105
+ .filter(Boolean)
106
+ .join('\n');
107
+ }
108
+
109
+ // People also ask
110
+ const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
111
+ if (peopleAlsoAsk.length) {
112
+ addSection('People Also Ask');
113
+ peopleAlsoAsk.forEach((p, _i) => {
114
+ output += [`Q: ${p.question}`, `A: ${p.answer}`, '']
115
+ .filter(Boolean)
116
+ .join('\n');
117
+ });
118
+ }
119
+
120
+ return output.trim();
121
+ }