crawlforge-mcp-server 3.3.1 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlforge-mcp-server",
3
- "version": "3.3.1",
3
+ "version": "3.4.0",
4
4
  "description": "CrawlForge MCP Server - Professional Model Context Protocol server with 21 comprehensive web scraping, crawling, and content processing tools.",
5
5
  "main": "server.js",
6
6
  "bin": {
package/server.js CHANGED
@@ -395,15 +395,15 @@ server.registerTool("extract_structured", {
395
395
 
396
396
  // Tool: extract_with_llm
397
397
  server.registerTool("extract_with_llm", {
398
- description: "Extract structured data from a URL or text using a natural-language prompt, powered by OpenAI or Anthropic. Requires OPENAI_API_KEY or ANTHROPIC_API_KEY in the environment.",
398
+ description: "Extract structured data from a URL or text using a natural-language prompt. Supports OpenAI, Anthropic, or a local Ollama model. Cloud providers require OPENAI_API_KEY or ANTHROPIC_API_KEY; Ollama requires no key (set provider: \"ollama\" with a running `ollama serve` on http://localhost:11434).",
399
399
  annotations: { title: "Extract With LLM", readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
400
400
  inputSchema: {
401
401
  url: z.string().url().optional().describe("URL to fetch and extract from (one of url/content required)"),
402
402
  content: z.string().optional().describe("Pre-fetched text to extract from (one of url/content required)"),
403
403
  prompt: z.string().describe("Natural-language extraction instruction"),
404
- schema: z.record(z.unknown()).optional().describe("Optional JSON-schema-like hint for output shape"),
405
- provider: z.enum(["openai", "anthropic", "auto"]).optional().default("auto").describe("LLM provider"),
406
- model: z.string().optional().describe("Override default model"),
404
+ schema: z.record(z.unknown()).optional().describe("Optional JSON-schema for output shape (used as Ollama structured-outputs format when provider is 'ollama')"),
405
+ provider: z.enum(["openai", "anthropic", "ollama", "auto"]).optional().default("auto").describe("LLM provider. Use 'ollama' for a local model on http://localhost:11434"),
406
+ model: z.string().optional().describe("Override default model (e.g. 'llama3.2' for ollama)"),
407
407
  maxTokens: z.number().optional().default(4096).describe("Maximum output tokens")
408
408
  }
409
409
  }, withAuth("extract_with_llm", async (params) => {
@@ -1,10 +1,10 @@
1
1
  /**
2
2
  * Extract With LLM MCP Tool
3
- * Natural-language extraction powered by OpenAI or Anthropic.
3
+ * Natural-language extraction powered by OpenAI, Anthropic, or a local Ollama model.
4
4
  * Mirrors ScrapeGraphAI positioning: describe what you want, get structured JSON back.
5
5
  *
6
- * Requires OPENAI_API_KEY or ANTHROPIC_API_KEY in environment.
7
- * Gate: tool throws a clear error when neither key is present.
6
+ * Cloud providers require OPENAI_API_KEY or ANTHROPIC_API_KEY in environment.
7
+ * Ollama requires no API key just a running `ollama serve` on http://localhost:11434.
8
8
  */
9
9
 
10
10
  import { fetchAndParse } from './_fetchAndParse.js';
@@ -15,6 +15,7 @@ const MAX_INPUT_CHARS = 50_000;
15
15
 
16
16
  const OPENAI_DEFAULT_MODEL = 'gpt-4o-mini';
17
17
  const ANTHROPIC_DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
18
+ const OLLAMA_DEFAULT_MODEL = 'llama3.2';
18
19
 
19
20
  // Support test-time overrides so the test suite can stub endpoints.
20
21
  function openaiBaseUrl() {
@@ -23,23 +24,29 @@ function openaiBaseUrl() {
23
24
  function anthropicBaseUrl() {
24
25
  return (process.env.ANTHROPIC_BASE_URL || 'https://api.anthropic.com').replace(/\/$/, '');
25
26
  }
27
+ function ollamaBaseUrl() {
28
+ return (process.env.OLLAMA_BASE_URL || 'http://localhost:11434').replace(/\/$/, '');
29
+ }
26
30
 
27
31
  // ── Helpers ───────────────────────────────────────────────────────────────────
28
32
 
29
33
  /**
30
34
  * Resolve which provider to use.
31
- * @param {'openai'|'anthropic'|'auto'} provider
32
- * @returns {{ provider: 'openai'|'anthropic', apiKey: string }}
35
+ * @param {'openai'|'anthropic'|'ollama'|'auto'} provider
36
+ * @returns {{ provider: 'openai'|'anthropic'|'ollama', apiKey: string|null }}
33
37
  */
34
38
  function resolveProvider(provider) {
35
39
  const anthropicKey = process.env.ANTHROPIC_API_KEY;
36
40
  const openaiKey = process.env.OPENAI_API_KEY;
41
+ const ollamaOptIn = !!process.env.OLLAMA_BASE_URL;
37
42
 
38
43
  if (provider === 'auto') {
39
44
  if (anthropicKey) return { provider: 'anthropic', apiKey: anthropicKey };
40
45
  if (openaiKey) return { provider: 'openai', apiKey: openaiKey };
46
+ if (ollamaOptIn) return { provider: 'ollama', apiKey: null };
41
47
  throw new Error(
42
- 'extract_with_llm requires OPENAI_API_KEY or ANTHROPIC_API_KEY in environment'
48
+ 'extract_with_llm requires OPENAI_API_KEY, ANTHROPIC_API_KEY, or OLLAMA_BASE_URL in environment ' +
49
+ '(or pass provider: "ollama" explicitly to use a local Ollama server)'
43
50
  );
44
51
  }
45
52
 
@@ -53,6 +60,10 @@ function resolveProvider(provider) {
53
60
  return { provider: 'openai', apiKey: openaiKey };
54
61
  }
55
62
 
63
+ if (provider === 'ollama') {
64
+ return { provider: 'ollama', apiKey: null };
65
+ }
66
+
56
67
  throw new Error(`extract_with_llm: unknown provider "${provider}"`);
57
68
  }
58
69
 
@@ -157,12 +168,68 @@ async function callAnthropic({ apiKey, model, systemMessage, userMessage, maxTok
157
168
  return { rawText: content, usage, model: json.model || model };
158
169
  }
159
170
 
171
+ // ── Ollama call ───────────────────────────────────────────────────────────────
172
+
173
+ async function callOllama({ model, systemMessage, userMessage, maxTokens, schema }) {
174
+ const url = `${ollamaBaseUrl()}/api/chat`;
175
+ const body = {
176
+ model,
177
+ messages: [
178
+ { role: 'system', content: systemMessage },
179
+ { role: 'user', content: userMessage }
180
+ ],
181
+ stream: false,
182
+ options: { num_predict: maxTokens, temperature: 0 },
183
+ format: (schema && Object.keys(schema).length > 0) ? schema : 'json'
184
+ };
185
+
186
+ let response;
187
+ try {
188
+ response = await fetch(url, {
189
+ method: 'POST',
190
+ headers: { 'Content-Type': 'application/json' },
191
+ body: JSON.stringify(body),
192
+ signal: AbortSignal.timeout(120_000)
193
+ });
194
+ } catch (err) {
195
+ const code = err?.cause?.code;
196
+ if (code === 'ECONNREFUSED' || code === 'ENOTFOUND' || /ECONNREFUSED|ENOTFOUND|fetch failed/i.test(err.message || '')) {
197
+ throw new Error(
198
+ `Ollama is not running at ${ollamaBaseUrl()}. ` +
199
+ `Start it with "ollama serve" and pull a model: "ollama pull ${model}".`
200
+ );
201
+ }
202
+ throw err;
203
+ }
204
+
205
+ if (!response.ok) {
206
+ const errText = await response.text().catch(() => '');
207
+ if (response.status === 404 && /model.*not found|pull/i.test(errText)) {
208
+ throw new Error(
209
+ `Ollama model "${model}" is not pulled. Run: "ollama pull ${model}"`
210
+ );
211
+ }
212
+ throw new Error(`Ollama API error ${response.status}: ${errText.slice(0, 200)}`);
213
+ }
214
+
215
+ const json = await response.json();
216
+ const content = json.message?.content ?? '';
217
+ const usage = {
218
+ input_tokens: json.prompt_eval_count ?? 0,
219
+ output_tokens: json.eval_count ?? 0
220
+ };
221
+ return { rawText: content, usage, model: json.model || model };
222
+ }
223
+
160
224
  // ── LLM dispatch ─────────────────────────────────────────────────────────────
161
225
 
162
- async function callLLM({ provider, apiKey, model, systemMessage, userMessage, maxTokens }) {
226
+ async function callLLM({ provider, apiKey, model, systemMessage, userMessage, maxTokens, schema }) {
163
227
  if (provider === 'openai') {
164
228
  return callOpenAI({ apiKey, model, systemMessage, userMessage, maxTokens });
165
229
  }
230
+ if (provider === 'ollama') {
231
+ return callOllama({ model, systemMessage, userMessage, maxTokens, schema });
232
+ }
166
233
  return callAnthropic({ apiKey, model, systemMessage, userMessage, maxTokens });
167
234
  }
168
235
 
@@ -216,7 +283,10 @@ export class ExtractWithLlm {
216
283
  }
217
284
 
218
285
  const { provider, apiKey } = resolved;
219
- const defaultModel = provider === 'openai' ? OPENAI_DEFAULT_MODEL : ANTHROPIC_DEFAULT_MODEL;
286
+ const defaultModel =
287
+ provider === 'openai' ? OPENAI_DEFAULT_MODEL :
288
+ provider === 'ollama' ? (process.env.OLLAMA_DEFAULT_MODEL || OLLAMA_DEFAULT_MODEL) :
289
+ ANTHROPIC_DEFAULT_MODEL;
220
290
  const model = modelParam || defaultModel;
221
291
 
222
292
  // Step 1: Get text to extract from
@@ -241,7 +311,7 @@ export class ExtractWithLlm {
241
311
  let rawText, usage;
242
312
  try {
243
313
  ({ rawText, usage } = await callLLM({
244
- provider, apiKey, model, systemMessage, userMessage, maxTokens
314
+ provider, apiKey, model, systemMessage, userMessage, maxTokens, schema
245
315
  }));
246
316
  } catch (llmErr) {
247
317
  return { success: false, error: `LLM call failed: ${llmErr.message}` };
@@ -260,7 +330,7 @@ export class ExtractWithLlm {
260
330
  try {
261
331
  ({ rawText: retryRaw, usage: retryUsage } = await callLLM({
262
332
  provider, apiKey, model, systemMessage,
263
- userMessage: retryUserMessage, maxTokens
333
+ userMessage: retryUserMessage, maxTokens, schema
264
334
  }));
265
335
  // Merge usage
266
336
  usage = {