scrapex 0.5.3 → 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +551 -145
- package/dist/enhancer-ByjRD-t5.mjs +769 -0
- package/dist/enhancer-ByjRD-t5.mjs.map +1 -0
- package/dist/enhancer-j0xqKDJm.cjs +847 -0
- package/dist/enhancer-j0xqKDJm.cjs.map +1 -0
- package/dist/index-CDgcRnig.d.cts +268 -0
- package/dist/index-CDgcRnig.d.cts.map +1 -0
- package/dist/index-piS5wtki.d.mts +268 -0
- package/dist/index-piS5wtki.d.mts.map +1 -0
- package/dist/index.cjs +2007 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +580 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +580 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1956 -0
- package/dist/index.mjs.map +1 -0
- package/dist/llm/index.cjs +334 -0
- package/dist/llm/index.cjs.map +1 -0
- package/dist/llm/index.d.cts +258 -0
- package/dist/llm/index.d.cts.map +1 -0
- package/dist/llm/index.d.mts +258 -0
- package/dist/llm/index.d.mts.map +1 -0
- package/dist/llm/index.mjs +317 -0
- package/dist/llm/index.mjs.map +1 -0
- package/dist/parsers/index.cjs +11 -0
- package/dist/parsers/index.d.cts +2 -0
- package/dist/parsers/index.d.mts +2 -0
- package/dist/parsers/index.mjs +3 -0
- package/dist/parsers-Bneuws8x.cjs +569 -0
- package/dist/parsers-Bneuws8x.cjs.map +1 -0
- package/dist/parsers-CwkYnyWY.mjs +482 -0
- package/dist/parsers-CwkYnyWY.mjs.map +1 -0
- package/dist/types-CadAXrme.d.mts +674 -0
- package/dist/types-CadAXrme.d.mts.map +1 -0
- package/dist/types-DPEtPihB.d.cts +674 -0
- package/dist/types-DPEtPihB.d.cts.map +1 -0
- package/package.json +79 -100
- package/dist/index.d.ts +0 -45
- package/dist/index.js +0 -8
- package/dist/scrapex.cjs.development.js +0 -1130
- package/dist/scrapex.cjs.development.js.map +0 -1
- package/dist/scrapex.cjs.production.min.js +0 -2
- package/dist/scrapex.cjs.production.min.js.map +0 -1
- package/dist/scrapex.esm.js +0 -1122
- package/dist/scrapex.esm.js.map +0 -1
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
const require_parsers = require('../parsers-Bneuws8x.cjs');
|
|
2
|
+
const require_enhancer = require('../enhancer-j0xqKDJm.cjs');
|
|
3
|
+
let zod = require("zod");
|
|
4
|
+
|
|
5
|
+
//#region src/llm/http.ts
|
|
6
|
+
/**
|
|
7
|
+
* HTTP-based LLM Provider using native fetch.
|
|
8
|
+
* Provides a unified interface for any REST-based LLM API.
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* HTTP-based LLM provider.
|
|
12
|
+
* Works with any REST API using native fetch.
|
|
13
|
+
*/
|
|
14
|
+
var HttpLLMProvider = class extends require_enhancer.BaseHttpProvider {
|
|
15
|
+
name;
|
|
16
|
+
requestBuilder;
|
|
17
|
+
responseMapper;
|
|
18
|
+
jsonMode;
|
|
19
|
+
constructor(config) {
|
|
20
|
+
super(config);
|
|
21
|
+
this.name = "http-llm";
|
|
22
|
+
this.jsonMode = config.jsonMode ?? false;
|
|
23
|
+
this.requestBuilder = config.requestBuilder ?? ((prompt, opts) => {
|
|
24
|
+
const messages = [];
|
|
25
|
+
if (opts.systemPrompt) messages.push({
|
|
26
|
+
role: "system",
|
|
27
|
+
content: opts.systemPrompt
|
|
28
|
+
});
|
|
29
|
+
messages.push({
|
|
30
|
+
role: "user",
|
|
31
|
+
content: prompt
|
|
32
|
+
});
|
|
33
|
+
const request = {
|
|
34
|
+
model: this.model,
|
|
35
|
+
messages
|
|
36
|
+
};
|
|
37
|
+
if (opts.temperature !== void 0) request.temperature = opts.temperature;
|
|
38
|
+
if (opts.maxTokens !== void 0) request.max_tokens = opts.maxTokens;
|
|
39
|
+
return request;
|
|
40
|
+
});
|
|
41
|
+
this.responseMapper = config.responseMapper ?? ((response) => {
|
|
42
|
+
const resp = response;
|
|
43
|
+
if (Array.isArray(resp.choices) && resp.choices.length > 0) {
|
|
44
|
+
const choice = resp.choices[0];
|
|
45
|
+
if (choice.message?.content) return choice.message.content;
|
|
46
|
+
}
|
|
47
|
+
if (Array.isArray(resp.content)) {
|
|
48
|
+
const textBlock = resp.content.find((c) => c.type === "text");
|
|
49
|
+
if (textBlock?.text) return textBlock.text;
|
|
50
|
+
}
|
|
51
|
+
throw new require_enhancer.ScrapeError("Unable to parse LLM response. Provide a custom responseMapper.", "VALIDATION_ERROR");
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Generate a text completion.
|
|
56
|
+
*/
|
|
57
|
+
async complete(prompt, options = {}) {
|
|
58
|
+
let body = this.requestBuilder(prompt, options);
|
|
59
|
+
if (this.jsonMode && typeof body === "object" && body !== null) body = {
|
|
60
|
+
...body,
|
|
61
|
+
response_format: { type: "json_object" }
|
|
62
|
+
};
|
|
63
|
+
const { data } = await this.fetch(this.baseUrl, { body });
|
|
64
|
+
const content = this.responseMapper(data);
|
|
65
|
+
if (!content) throw new require_enhancer.ScrapeError("Empty response from LLM", "LLM_ERROR");
|
|
66
|
+
return content;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Generate a structured JSON completion with Zod validation.
|
|
70
|
+
*/
|
|
71
|
+
async completeJSON(prompt, schema, options = {}) {
|
|
72
|
+
const jsonPrompt = `${prompt}
|
|
73
|
+
|
|
74
|
+
Respond ONLY with valid JSON matching this schema:
|
|
75
|
+
${JSON.stringify(zodToJsonSchema(schema), null, 2)}
|
|
76
|
+
|
|
77
|
+
Do not include any explanation or markdown formatting. Just the JSON object.`;
|
|
78
|
+
const useJsonMode = this.jsonMode;
|
|
79
|
+
let body = this.requestBuilder(jsonPrompt, {
|
|
80
|
+
...options,
|
|
81
|
+
systemPrompt: options.systemPrompt ?? "You are a helpful assistant that responds only with valid JSON."
|
|
82
|
+
});
|
|
83
|
+
if (useJsonMode && typeof body === "object" && body !== null) body = {
|
|
84
|
+
...body,
|
|
85
|
+
response_format: { type: "json_object" }
|
|
86
|
+
};
|
|
87
|
+
const { data } = await this.fetch(this.baseUrl, { body });
|
|
88
|
+
const content = this.responseMapper(data);
|
|
89
|
+
if (!content) throw new require_enhancer.ScrapeError("Empty response from LLM", "LLM_ERROR");
|
|
90
|
+
try {
|
|
91
|
+
const trimmed = content.trim();
|
|
92
|
+
try {
|
|
93
|
+
return schema.parse(JSON.parse(trimmed));
|
|
94
|
+
} catch {}
|
|
95
|
+
const jsonMatch = content.match(/[[{][\s\S]*[\]}]/);
|
|
96
|
+
if (!jsonMatch) throw new Error("No JSON found in response");
|
|
97
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
98
|
+
return schema.parse(parsed);
|
|
99
|
+
} catch (error) {
|
|
100
|
+
throw new require_enhancer.ScrapeError(`Failed to parse LLM response as JSON: ${error instanceof Error ? error.message : String(error)}`, "VALIDATION_ERROR", void 0, error instanceof Error ? error : void 0);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
/**
|
|
105
|
+
* Convert a Zod schema to a JSON Schema representation.
|
|
106
|
+
* Uses Zod's built-in toJSONSchema method (Zod 4+).
|
|
107
|
+
* Used for prompting LLMs to return structured data.
|
|
108
|
+
*/
|
|
109
|
+
function zodToJsonSchema(schema) {
|
|
110
|
+
if (typeof zod.z.toJSONSchema === "function") {
|
|
111
|
+
const { $schema, ...rest } = zod.z.toJSONSchema(schema);
|
|
112
|
+
return rest;
|
|
113
|
+
}
|
|
114
|
+
const def = schema._def;
|
|
115
|
+
switch (def.type) {
|
|
116
|
+
case "object": {
|
|
117
|
+
const shape = schema.shape;
|
|
118
|
+
const properties = {};
|
|
119
|
+
const required = [];
|
|
120
|
+
for (const [key, value] of Object.entries(shape)) {
|
|
121
|
+
properties[key] = zodToJsonSchema(value);
|
|
122
|
+
if (value._def.type !== "optional") required.push(key);
|
|
123
|
+
}
|
|
124
|
+
return {
|
|
125
|
+
type: "object",
|
|
126
|
+
properties,
|
|
127
|
+
required
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
case "array": return {
|
|
131
|
+
type: "array",
|
|
132
|
+
items: zodToJsonSchema(def.element)
|
|
133
|
+
};
|
|
134
|
+
case "string": return { type: "string" };
|
|
135
|
+
case "number": return { type: "number" };
|
|
136
|
+
case "boolean": return { type: "boolean" };
|
|
137
|
+
case "enum": {
|
|
138
|
+
const enumDef = def;
|
|
139
|
+
return {
|
|
140
|
+
type: "string",
|
|
141
|
+
enum: Object.values(enumDef.entries)
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
case "optional": return zodToJsonSchema(def.innerType);
|
|
145
|
+
default: return { type: "string" };
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
//#endregion
|
|
150
|
+
//#region src/llm/presets.ts
|
|
151
|
+
/**
|
|
152
|
+
* Preset factory functions for common LLM providers.
|
|
153
|
+
* All presets use the HttpLLMProvider with appropriate configuration.
|
|
154
|
+
*/
|
|
155
|
+
/**
|
|
156
|
+
* Create an OpenAI LLM provider.
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* ```ts
|
|
160
|
+
* const provider = createOpenAI({ apiKey: 'sk-...' });
|
|
161
|
+
* const result = await scrape(url, { llm: provider, enhance: ['summarize'] });
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
164
|
+
function createOpenAI(options) {
|
|
165
|
+
const apiKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
166
|
+
if (!apiKey) throw new Error("OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.");
|
|
167
|
+
return new HttpLLMProvider({
|
|
168
|
+
baseUrl: options?.baseUrl ?? "https://api.openai.com/v1/chat/completions",
|
|
169
|
+
model: options?.model ?? "gpt-4o-mini",
|
|
170
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
171
|
+
jsonMode: true
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Create an Anthropic Claude LLM provider.
|
|
176
|
+
*
|
|
177
|
+
* @example
|
|
178
|
+
* ```ts
|
|
179
|
+
* const provider = createAnthropic({ apiKey: 'sk-...' });
|
|
180
|
+
* const result = await scrape(url, { llm: provider, enhance: ['summarize'] });
|
|
181
|
+
* ```
|
|
182
|
+
*/
|
|
183
|
+
function createAnthropic(options) {
|
|
184
|
+
const apiKey = options?.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
|
185
|
+
if (!apiKey) throw new Error("Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass apiKey option.");
|
|
186
|
+
const model = options?.model ?? "claude-3-5-haiku-20241022";
|
|
187
|
+
return new HttpLLMProvider({
|
|
188
|
+
baseUrl: "https://api.anthropic.com/v1/messages",
|
|
189
|
+
model,
|
|
190
|
+
headers: {
|
|
191
|
+
"x-api-key": apiKey,
|
|
192
|
+
"anthropic-version": "2023-06-01"
|
|
193
|
+
},
|
|
194
|
+
requestBuilder: (prompt, opts) => ({
|
|
195
|
+
model,
|
|
196
|
+
max_tokens: opts.maxTokens ?? 1024,
|
|
197
|
+
messages: [{
|
|
198
|
+
role: "user",
|
|
199
|
+
content: prompt
|
|
200
|
+
}],
|
|
201
|
+
...opts.systemPrompt && { system: opts.systemPrompt },
|
|
202
|
+
...opts.temperature !== void 0 && { temperature: opts.temperature }
|
|
203
|
+
}),
|
|
204
|
+
responseMapper: (res) => res.content.find((item) => item.type === "text")?.text ?? ""
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Create a Groq LLM provider.
|
|
209
|
+
* Groq provides fast inference for open-source models.
|
|
210
|
+
*
|
|
211
|
+
* @example
|
|
212
|
+
* ```ts
|
|
213
|
+
* const provider = createGroq({ model: 'llama-3.1-70b-versatile' });
|
|
214
|
+
* ```
|
|
215
|
+
*/
|
|
216
|
+
function createGroq(options) {
|
|
217
|
+
const apiKey = options?.apiKey ?? process.env.GROQ_API_KEY;
|
|
218
|
+
if (!apiKey) throw new Error("Groq API key required. Set GROQ_API_KEY env var or pass apiKey option.");
|
|
219
|
+
return new HttpLLMProvider({
|
|
220
|
+
baseUrl: "https://api.groq.com/openai/v1/chat/completions",
|
|
221
|
+
model: options?.model ?? "llama-3.1-70b-versatile",
|
|
222
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
223
|
+
jsonMode: true
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Create an Ollama LLM provider for local models.
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* ```ts
|
|
231
|
+
* const provider = createOllama({ model: 'llama3.2' });
|
|
232
|
+
* ```
|
|
233
|
+
*/
|
|
234
|
+
function createOllama(options) {
|
|
235
|
+
return new HttpLLMProvider({
|
|
236
|
+
baseUrl: options.baseUrl ?? "http://localhost:11434/v1/chat/completions",
|
|
237
|
+
model: options.model,
|
|
238
|
+
requireHttps: false,
|
|
239
|
+
allowPrivate: true
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Create an LM Studio LLM provider for local models.
|
|
244
|
+
*
|
|
245
|
+
* @example
|
|
246
|
+
* ```ts
|
|
247
|
+
* const provider = createLMStudio({ model: 'local-model' });
|
|
248
|
+
* ```
|
|
249
|
+
*/
|
|
250
|
+
function createLMStudio(options) {
|
|
251
|
+
return new HttpLLMProvider({
|
|
252
|
+
baseUrl: options.baseUrl ?? "http://localhost:1234/v1/chat/completions",
|
|
253
|
+
model: options.model,
|
|
254
|
+
requireHttps: false,
|
|
255
|
+
allowPrivate: true
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Create a Together AI LLM provider.
|
|
260
|
+
*
|
|
261
|
+
* @example
|
|
262
|
+
* ```ts
|
|
263
|
+
* const provider = createTogether({ model: 'meta-llama/Llama-3.2-3B-Instruct-Turbo' });
|
|
264
|
+
* ```
|
|
265
|
+
*/
|
|
266
|
+
function createTogether(options) {
|
|
267
|
+
const apiKey = options?.apiKey ?? process.env.TOGETHER_API_KEY;
|
|
268
|
+
if (!apiKey) throw new Error("Together API key required. Set TOGETHER_API_KEY env var or pass apiKey option.");
|
|
269
|
+
return new HttpLLMProvider({
|
|
270
|
+
baseUrl: "https://api.together.xyz/v1/chat/completions",
|
|
271
|
+
model: options?.model ?? "meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
272
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
273
|
+
jsonMode: true
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Create an OpenRouter LLM provider.
|
|
278
|
+
* OpenRouter provides access to many models through a unified API.
|
|
279
|
+
*
|
|
280
|
+
* @example
|
|
281
|
+
* ```ts
|
|
282
|
+
* const provider = createOpenRouter({
|
|
283
|
+
* model: 'anthropic/claude-3.5-sonnet',
|
|
284
|
+
* });
|
|
285
|
+
* ```
|
|
286
|
+
*/
|
|
287
|
+
function createOpenRouter(options) {
|
|
288
|
+
const apiKey = options.apiKey ?? process.env.OPENROUTER_API_KEY;
|
|
289
|
+
if (!apiKey) throw new Error("OpenRouter API key required. Set OPENROUTER_API_KEY env var or pass apiKey option.");
|
|
290
|
+
const headers = { Authorization: `Bearer ${apiKey}` };
|
|
291
|
+
if (options.siteUrl) headers["HTTP-Referer"] = options.siteUrl;
|
|
292
|
+
if (options.siteName) headers["X-Title"] = options.siteName;
|
|
293
|
+
return new HttpLLMProvider({
|
|
294
|
+
baseUrl: "https://openrouter.ai/api/v1/chat/completions",
|
|
295
|
+
model: options.model,
|
|
296
|
+
headers
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Create a generic HTTP LLM provider.
|
|
301
|
+
* Use this for any OpenAI-compatible API.
|
|
302
|
+
*
|
|
303
|
+
* @example
|
|
304
|
+
* ```ts
|
|
305
|
+
* const provider = createHttpLLM({
|
|
306
|
+
* baseUrl: 'https://my-api.com/v1/chat/completions',
|
|
307
|
+
* model: 'my-model',
|
|
308
|
+
* headers: { Authorization: 'Bearer ...' },
|
|
309
|
+
* });
|
|
310
|
+
* ```
|
|
311
|
+
*/
|
|
312
|
+
function createHttpLLM(config) {
|
|
313
|
+
return new HttpLLMProvider(config);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
//#endregion
|
|
317
|
+
exports.ClassifySchema = require_enhancer.ClassifySchema;
|
|
318
|
+
exports.EntitiesSchema = require_enhancer.EntitiesSchema;
|
|
319
|
+
exports.HttpLLMProvider = HttpLLMProvider;
|
|
320
|
+
exports.SummarySchema = require_enhancer.SummarySchema;
|
|
321
|
+
exports.TagsSchema = require_enhancer.TagsSchema;
|
|
322
|
+
exports.ask = require_enhancer.ask;
|
|
323
|
+
exports.createAnthropic = createAnthropic;
|
|
324
|
+
exports.createGroq = createGroq;
|
|
325
|
+
exports.createHttpLLM = createHttpLLM;
|
|
326
|
+
exports.createLMStudio = createLMStudio;
|
|
327
|
+
exports.createOllama = createOllama;
|
|
328
|
+
exports.createOpenAI = createOpenAI;
|
|
329
|
+
exports.createOpenRouter = createOpenRouter;
|
|
330
|
+
exports.createTogether = createTogether;
|
|
331
|
+
exports.enhance = require_enhancer.enhance;
|
|
332
|
+
exports.extract = require_enhancer.extract;
|
|
333
|
+
exports.zodToJsonSchema = zodToJsonSchema;
|
|
334
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","names":["BaseHttpProvider","messages: Array<{ role: string; content: string }>","request: Record<string, unknown>","ScrapeError","z","properties: Record<string, object>","required: string[]","headers: Record<string, string>"],"sources":["../../src/llm/http.ts","../../src/llm/presets.ts"],"sourcesContent":["/**\n * HTTP-based LLM Provider using native fetch.\n * Provides a unified interface for any REST-based LLM API.\n */\n\nimport { z } from 'zod';\nimport { type BaseHttpConfig, BaseHttpProvider } from '../common/http-base.js';\nimport { ScrapeError } from '../core/errors.js';\nimport type { CompletionOptions, LLMProvider } from './types.js';\n\n// ─────────────────────────────────────────────────────────────\n// Types\n// ─────────────────────────────────────────────────────────────\n\n/**\n * HTTP LLM provider configuration.\n */\nexport interface HttpLLMConfig<TRequest = unknown, TResponse = unknown, TError = unknown>\n extends BaseHttpConfig<TError> {\n /**\n * Build request body from prompt and options.\n * @default OpenAI-compatible format with messages array\n */\n requestBuilder?: (prompt: string, options: CompletionOptions) => TRequest;\n /**\n * Extract completion text from response.\n * @default (res) => res.choices[0].message.content\n */\n responseMapper?: (response: TResponse) => string;\n /**\n * Enable JSON mode - adds response_format to request.\n * For OpenAI-compatible APIs, this adds { response_format: { type: \"json_object\" } }\n */\n jsonMode?: boolean;\n}\n\n// ─────────────────────────────────────────────────────────────\n// HTTP LLM Provider\n// ─────────────────────────────────────────────────────────────\n\n/**\n * HTTP-based LLM provider.\n * Works with any REST API using native fetch.\n */\nexport class HttpLLMProvider<TRequest = unknown, TResponse = unknown, TError = unknown>\n extends BaseHttpProvider<TError>\n implements LLMProvider\n{\n readonly name: string;\n\n private readonly requestBuilder: (prompt: string, options: CompletionOptions) => TRequest;\n private readonly responseMapper: (response: TResponse) => string;\n private readonly jsonMode: boolean;\n\n constructor(config: HttpLLMConfig<TRequest, TResponse, TError>) {\n super(config);\n this.name = 'http-llm';\n this.jsonMode = config.jsonMode ?? false;\n\n // Default request builder: OpenAI-compatible format\n this.requestBuilder =\n config.requestBuilder ??\n ((prompt: string, opts: CompletionOptions) => {\n const messages: Array<{ role: string; content: string }> = [];\n\n if (opts.systemPrompt) {\n messages.push({ role: 'system', content: opts.systemPrompt });\n }\n messages.push({ role: 'user', content: prompt });\n\n const request: Record<string, unknown> = {\n model: this.model,\n messages,\n };\n\n if (opts.temperature !== undefined) {\n request.temperature = opts.temperature;\n }\n if (opts.maxTokens !== undefined) {\n request.max_tokens = opts.maxTokens;\n }\n\n return request as TRequest;\n });\n\n // Default response mapper: OpenAI-compatible format\n this.responseMapper =\n config.responseMapper ??\n ((response: TResponse) => {\n const resp = response as Record<string, unknown>;\n\n // OpenAI format: { choices: [{ message: { content: \"...\" } }] }\n if (Array.isArray(resp.choices) && resp.choices.length > 0) {\n const choice = resp.choices[0] as { message?: { content?: string } };\n if (choice.message?.content) {\n return choice.message.content;\n }\n }\n\n // Anthropic format: { content: [{ type: \"text\", text: \"...\" }] }\n if (Array.isArray(resp.content)) {\n const textBlock = resp.content.find((c: { type?: string }) => c.type === 'text') as\n | { text?: string }\n | undefined;\n if (textBlock?.text) {\n return textBlock.text;\n }\n }\n\n throw new ScrapeError(\n 'Unable to parse LLM response. Provide a custom responseMapper.',\n 'VALIDATION_ERROR'\n );\n });\n }\n\n /**\n * Generate a text completion.\n */\n async complete(prompt: string, options: CompletionOptions = {}): Promise<string> {\n let body = this.requestBuilder(prompt, options);\n\n // Add JSON mode if enabled\n if (this.jsonMode && typeof body === 'object' && body !== null) {\n body = {\n ...body,\n response_format: { type: 'json_object' },\n } as TRequest;\n }\n\n const { data } = await this.fetch<TResponse>(this.baseUrl, { body });\n\n const content = this.responseMapper(data);\n if (!content) {\n throw new ScrapeError('Empty response from LLM', 'LLM_ERROR');\n }\n\n return content;\n }\n\n /**\n * Generate a structured JSON completion with Zod validation.\n */\n async completeJSON<T>(\n prompt: string,\n schema: z.ZodType<T>,\n options: CompletionOptions = {}\n ): Promise<T> {\n // Build a prompt that requests JSON output\n const jsonPrompt = `${prompt}\n\nRespond ONLY with valid JSON matching this schema:\n${JSON.stringify(zodToJsonSchema(schema), null, 2)}\n\nDo not include any explanation or markdown formatting. Just the JSON object.`;\n\n // Use JSON mode if available\n const useJsonMode = this.jsonMode;\n let body = this.requestBuilder(jsonPrompt, {\n ...options,\n systemPrompt:\n options.systemPrompt ?? 'You are a helpful assistant that responds only with valid JSON.',\n });\n\n if (useJsonMode && typeof body === 'object' && body !== null) {\n body = {\n ...body,\n response_format: { type: 'json_object' },\n } as TRequest;\n }\n\n const { data } = await this.fetch<TResponse>(this.baseUrl, { body });\n const content = this.responseMapper(data);\n\n if (!content) {\n throw new ScrapeError('Empty response from LLM', 'LLM_ERROR');\n }\n\n try {\n const trimmed = content.trim();\n try {\n return schema.parse(JSON.parse(trimmed));\n } catch {\n // Fall back to extracting JSON from markdown or surrounding text\n }\n\n const jsonMatch = content.match(/[[{][\\s\\S]*[\\]}]/);\n if (!jsonMatch) {\n throw new Error('No JSON found in response');\n }\n\n const parsed = JSON.parse(jsonMatch[0]);\n return schema.parse(parsed);\n } catch (error) {\n throw new ScrapeError(\n `Failed to parse LLM response as JSON: ${error instanceof Error ? error.message : String(error)}`,\n 'VALIDATION_ERROR',\n undefined,\n error instanceof Error ? error : undefined\n );\n }\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Utilities\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Convert a Zod schema to a JSON Schema representation.\n * Uses Zod's built-in toJSONSchema method (Zod 4+).\n * Used for prompting LLMs to return structured data.\n */\nexport function zodToJsonSchema(schema: z.ZodType<unknown>): object {\n // Zod 4+ has built-in static toJSONSchema method\n if (typeof z.toJSONSchema === 'function') {\n const jsonSchema = z.toJSONSchema(schema);\n // Remove $schema key as it's not needed for LLM prompting\n const { $schema, ...rest } = jsonSchema as { $schema?: string; [key: string]: unknown };\n return rest;\n }\n\n // Fallback for older Zod versions using _def.type\n const def = (schema as z.ZodType<unknown> & { _def: { type: string } })._def;\n const type = def.type;\n\n switch (type) {\n case 'object': {\n const shape = (schema as z.ZodObject<z.ZodRawShape>).shape;\n const properties: Record<string, object> = {};\n const required: string[] = [];\n\n for (const [key, value] of Object.entries(shape)) {\n properties[key] = zodToJsonSchema(value as z.ZodType<unknown>);\n const valueDef = (value as z.ZodType<unknown> & { _def: { type: string } })._def;\n if (valueDef.type !== 'optional') {\n required.push(key);\n }\n }\n return { type: 'object', properties, required };\n }\n case 'array': {\n const arrayDef = def as unknown as { element: z.ZodType<unknown> };\n return { type: 'array', items: zodToJsonSchema(arrayDef.element) };\n }\n case 'string':\n return { type: 'string' };\n case 'number':\n return { type: 'number' };\n case 'boolean':\n return { type: 'boolean' };\n case 'enum': {\n const enumDef = def as unknown as { entries: Record<string, string> };\n return { type: 'string', enum: Object.values(enumDef.entries) };\n }\n case 'optional': {\n const optionalDef = def as unknown as { innerType: z.ZodType<unknown> };\n return zodToJsonSchema(optionalDef.innerType);\n }\n default:\n return { type: 'string' };\n }\n}\n\n// Re-export types for convenience\nexport type { ZodType } from 'zod';\n","/**\n * Preset factory functions for common LLM providers.\n * All presets use the HttpLLMProvider with appropriate configuration.\n */\n\nimport { type HttpLLMConfig, HttpLLMProvider } from './http.js';\nimport type { LLMProvider } from './types.js';\n\n// ─────────────────────────────────────────────────────────────\n// OpenAI\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create an OpenAI LLM provider.\n *\n * @example\n * ```ts\n * const provider = createOpenAI({ apiKey: 'sk-...' });\n * const result = await scrape(url, { llm: provider, enhance: ['summarize'] });\n * ```\n */\nexport function createOpenAI(options?: {\n apiKey?: string;\n model?: string;\n baseUrl?: string;\n}): LLMProvider {\n const apiKey = options?.apiKey ?? process.env.OPENAI_API_KEY;\n if (!apiKey) {\n throw new Error('OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.');\n }\n\n return new HttpLLMProvider({\n baseUrl: options?.baseUrl ?? 'https://api.openai.com/v1/chat/completions',\n model: options?.model ?? 'gpt-4o-mini',\n headers: { Authorization: `Bearer ${apiKey}` },\n jsonMode: true,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Anthropic\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Anthropic API response shape.\n */\ninterface AnthropicResponse {\n content: Array<{ type: string; text?: string }>;\n}\n\n/**\n * Create an Anthropic Claude LLM provider.\n *\n * @example\n * ```ts\n * const provider = createAnthropic({ apiKey: 'sk-...' });\n * const result = await scrape(url, { llm: provider, enhance: ['summarize'] });\n * ```\n */\nexport function createAnthropic(options?: { apiKey?: string; model?: string }): LLMProvider {\n const apiKey = options?.apiKey ?? process.env.ANTHROPIC_API_KEY;\n if (!apiKey) {\n throw new Error(\n 'Anthropic API key required. Set ANTHROPIC_API_KEY env var or pass apiKey option.'\n );\n }\n\n const model = options?.model ?? 'claude-3-5-haiku-20241022';\n\n return new HttpLLMProvider<unknown, AnthropicResponse>({\n baseUrl: 'https://api.anthropic.com/v1/messages',\n model,\n headers: {\n 'x-api-key': apiKey,\n 'anthropic-version': '2023-06-01',\n },\n requestBuilder: (prompt, opts) => ({\n model,\n max_tokens: opts.maxTokens ?? 1024,\n messages: [{ role: 'user', content: prompt }],\n ...(opts.systemPrompt && { system: opts.systemPrompt }),\n ...(opts.temperature !== undefined && { temperature: opts.temperature }),\n }),\n responseMapper: (res) => res.content.find((item) => item.type === 'text')?.text ?? '',\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Groq\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create a Groq LLM provider.\n * Groq provides fast inference for open-source models.\n *\n * @example\n * ```ts\n * const provider = createGroq({ model: 'llama-3.1-70b-versatile' });\n * ```\n */\nexport function createGroq(options?: { apiKey?: string; model?: string }): LLMProvider {\n const apiKey = options?.apiKey ?? process.env.GROQ_API_KEY;\n if (!apiKey) {\n throw new Error('Groq API key required. Set GROQ_API_KEY env var or pass apiKey option.');\n }\n\n return new HttpLLMProvider({\n baseUrl: 'https://api.groq.com/openai/v1/chat/completions',\n model: options?.model ?? 'llama-3.1-70b-versatile',\n headers: { Authorization: `Bearer ${apiKey}` },\n jsonMode: true,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Ollama (Local)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create an Ollama LLM provider for local models.\n *\n * @example\n * ```ts\n * const provider = createOllama({ model: 'llama3.2' });\n * ```\n */\nexport function createOllama(options: { model: string; baseUrl?: string }): LLMProvider {\n return new HttpLLMProvider({\n baseUrl: options.baseUrl ?? 'http://localhost:11434/v1/chat/completions',\n model: options.model,\n requireHttps: false,\n allowPrivate: true,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// LM Studio (Local)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create an LM Studio LLM provider for local models.\n *\n * @example\n * ```ts\n * const provider = createLMStudio({ model: 'local-model' });\n * ```\n */\nexport function createLMStudio(options: { model: string; baseUrl?: string }): LLMProvider {\n return new HttpLLMProvider({\n baseUrl: options.baseUrl ?? 'http://localhost:1234/v1/chat/completions',\n model: options.model,\n requireHttps: false,\n allowPrivate: true,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Together AI\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create a Together AI LLM provider.\n *\n * @example\n * ```ts\n * const provider = createTogether({ model: 'meta-llama/Llama-3.2-3B-Instruct-Turbo' });\n * ```\n */\nexport function createTogether(options?: { apiKey?: string; model?: string }): LLMProvider {\n const apiKey = options?.apiKey ?? process.env.TOGETHER_API_KEY;\n if (!apiKey) {\n throw new Error(\n 'Together API key required. Set TOGETHER_API_KEY env var or pass apiKey option.'\n );\n }\n\n return new HttpLLMProvider({\n baseUrl: 'https://api.together.xyz/v1/chat/completions',\n model: options?.model ?? 'meta-llama/Llama-3.2-3B-Instruct-Turbo',\n headers: { Authorization: `Bearer ${apiKey}` },\n jsonMode: true,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// OpenRouter\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create an OpenRouter LLM provider.\n * OpenRouter provides access to many models through a unified API.\n *\n * @example\n * ```ts\n * const provider = createOpenRouter({\n * model: 'anthropic/claude-3.5-sonnet',\n * });\n * ```\n */\nexport function createOpenRouter(options: {\n apiKey?: string;\n model: string;\n siteUrl?: string;\n siteName?: string;\n}): LLMProvider {\n const apiKey = options.apiKey ?? process.env.OPENROUTER_API_KEY;\n if (!apiKey) {\n throw new Error(\n 'OpenRouter API key required. Set OPENROUTER_API_KEY env var or pass apiKey option.'\n );\n }\n\n const headers: Record<string, string> = {\n Authorization: `Bearer ${apiKey}`,\n };\n\n if (options.siteUrl) {\n headers['HTTP-Referer'] = options.siteUrl;\n }\n if (options.siteName) {\n headers['X-Title'] = options.siteName;\n }\n\n return new HttpLLMProvider({\n baseUrl: 'https://openrouter.ai/api/v1/chat/completions',\n model: options.model,\n headers,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Generic HTTP Provider\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create a generic HTTP LLM provider.\n * Use this for any OpenAI-compatible API.\n *\n * @example\n * ```ts\n * const provider = createHttpLLM({\n * baseUrl: 'https://my-api.com/v1/chat/completions',\n * model: 'my-model',\n * headers: { Authorization: 'Bearer ...' },\n * });\n * ```\n */\nexport function createHttpLLM<TRequest = unknown, TResponse = unknown, TError = unknown>(\n config: HttpLLMConfig<TRequest, TResponse, TError>\n): LLMProvider {\n return new HttpLLMProvider(config);\n}\n"],"mappings":";;;;;;;;;;;;;AA4CA,IAAa,kBAAb,cACUA,kCAEV;CACE,AAAS;CAET,AAAiB;CACjB,AAAiB;CACjB,AAAiB;CAEjB,YAAY,QAAoD;AAC9D,QAAM,OAAO;AACb,OAAK,OAAO;AACZ,OAAK,WAAW,OAAO,YAAY;AAGnC,OAAK,iBACH,OAAO,oBACL,QAAgB,SAA4B;GAC5C,MAAMC,WAAqD,EAAE;AAE7D,OAAI,KAAK,aACP,UAAS,KAAK;IAAE,MAAM;IAAU,SAAS,KAAK;IAAc,CAAC;AAE/D,YAAS,KAAK;IAAE,MAAM;IAAQ,SAAS;IAAQ,CAAC;GAEhD,MAAMC,UAAmC;IACvC,OAAO,KAAK;IACZ;IACD;AAED,OAAI,KAAK,gBAAgB,OACvB,SAAQ,cAAc,KAAK;AAE7B,OAAI,KAAK,cAAc,OACrB,SAAQ,aAAa,KAAK;AAG5B,UAAO;;AAIX,OAAK,iBACH,OAAO,oBACL,aAAwB;GACxB,MAAM,OAAO;AAGb,OAAI,MAAM,QAAQ,KAAK,QAAQ,IAAI,KAAK,QAAQ,SAAS,GAAG;IAC1D,MAAM,SAAS,KAAK,QAAQ;AAC5B,QAAI,OAAO,SAAS,QAClB,QAAO,OAAO,QAAQ;;AAK1B,OAAI,MAAM,QAAQ,KAAK,QAAQ,EAAE;IAC/B,MAAM,YAAY,KAAK,QAAQ,MAAM,MAAyB,EAAE,SAAS,OAAO;AAGhF,QAAI,WAAW,KACb,QAAO,UAAU;;AAIrB,SAAM,IAAIC,6BACR,kEACA,mBACD;;;;;;CAOP,MAAM,SAAS,QAAgB,UAA6B,EAAE,EAAmB;EAC/E,IAAI,OAAO,KAAK,eAAe,QAAQ,QAAQ;AAG/C,MAAI,KAAK,YAAY,OAAO,SAAS,YAAY,SAAS,KACxD,QAAO;GACL,GAAG;GACH,iBAAiB,EAAE,MAAM,eAAe;GACzC;EAGH,MAAM,EAAE,SAAS,MAAM,KAAK,MAAiB,KAAK,SAAS,EAAE,MAAM,CAAC;EAEpE,MAAM,UAAU,KAAK,eAAe,KAAK;AACzC,MAAI,CAAC,QACH,OAAM,IAAIA,6BAAY,2BAA2B,YAAY;AAG/D,SAAO;;;;;CAMT,MAAM,aACJ,QACA,QACA,UAA6B,EAAE,EACnB;EAEZ,MAAM,aAAa,GAAG,OAAO;;;EAG/B,KAAK,UAAU,gBAAgB,OAAO,EAAE,MAAM,EAAE,CAAC;;;EAK/C,MAAM,cAAc,KAAK;EACzB,IAAI,OAAO,KAAK,eAAe,YAAY;GACzC,GAAG;GACH,cACE,QAAQ,gBAAgB;GAC3B,CAAC;AAEF,MAAI,eAAe,OAAO,SAAS,YAAY,SAAS,KACtD,QAAO;GACL,GAAG;GACH,iBAAiB,EAAE,MAAM,eAAe;GACzC;EAGH,MAAM,EAAE,SAAS,MAAM,KAAK,MAAiB,KAAK,SAAS,EAAE,MAAM,CAAC;EACpE,MAAM,UAAU,KAAK,eAAe,KAAK;AAEzC,MAAI,CAAC,QACH,OAAM,IAAIA,6BAAY,2BAA2B,YAAY;AAG/D,MAAI;GACF,MAAM,UAAU,QAAQ,MAAM;AAC9B,OAAI;AACF,WAAO,OAAO,MAAM,KAAK,MAAM,QAAQ,CAAC;WAClC;GAIR,MAAM,YAAY,QAAQ,MAAM,mBAAmB;AACnD,OAAI,CAAC,UACH,OAAM,IAAI,MAAM,4BAA4B;GAG9C,MAAM,SAAS,KAAK,MAAM,UAAU,GAAG;AACvC,UAAO,OAAO,MAAM,OAAO;WACpB,OAAO;AACd,SAAM,IAAIA,6BACR,yCAAyC,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM,IAC/F,oBACA,QACA,iBAAiB,QAAQ,QAAQ,OAClC;;;;;;;;;AAcP,SAAgB,gBAAgB,QAAoC;AAElE,KAAI,OAAOC,MAAE,iBAAiB,YAAY;EAGxC,MAAM,EAAE,SAAS,GAAG,SAFDA,MAAE,aAAa,OAAO;AAGzC,SAAO;;CAIT,MAAM,MAAO,OAA2D;AAGxE,SAFa,IAAI,MAEjB;EACE,KAAK,UAAU;GACb,MAAM,QAAS,OAAsC;GACrD,MAAMC,aAAqC,EAAE;GAC7C,MAAMC,WAAqB,EAAE;AAE7B,QAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,MAAM,EAAE;AAChD,eAAW,OAAO,gBAAgB,MAA4B;AAE9D,QADkB,MAA0D,KAC/D,SAAS,WACpB,UAAS,KAAK,IAAI;;AAGtB,UAAO;IAAE,MAAM;IAAU;IAAY;IAAU;;EAEjD,KAAK,QAEH,QAAO;GAAE,MAAM;GAAS,OAAO,gBADd,IACuC,QAAQ;GAAE;EAEpE,KAAK,SACH,QAAO,EAAE,MAAM,UAAU;EAC3B,KAAK,SACH,QAAO,EAAE,MAAM,UAAU;EAC3B,KAAK,UACH,QAAO,EAAE,MAAM,WAAW;EAC5B,KAAK,QAAQ;GACX,MAAM,UAAU;AAChB,UAAO;IAAE,MAAM;IAAU,MAAM,OAAO,OAAO,QAAQ,QAAQ;IAAE;;EAEjE,KAAK,WAEH,QAAO,gBADa,IACe,UAAU;EAE/C,QACE,QAAO,EAAE,MAAM,UAAU;;;;;;;;;;;;;;;;;;;AC/O/B,SAAgB,aAAa,SAIb;CACd,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,6EAA6E;AAG/F,QAAO,IAAI,gBAAgB;EACzB,SAAS,SAAS,WAAW;EAC7B,OAAO,SAAS,SAAS;EACzB,SAAS,EAAE,eAAe,UAAU,UAAU;EAC9C,UAAU;EACX,CAAC;;;;;;;;;;;AAuBJ,SAAgB,gBAAgB,SAA4D;CAC1F,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MACR,mFACD;CAGH,MAAM,QAAQ,SAAS,SAAS;AAEhC,QAAO,IAAI,gBAA4C;EACrD,SAAS;EACT;EACA,SAAS;GACP,aAAa;GACb,qBAAqB;GACtB;EACD,iBAAiB,QAAQ,UAAU;GACjC;GACA,YAAY,KAAK,aAAa;GAC9B,UAAU,CAAC;IAAE,MAAM;IAAQ,SAAS;IAAQ,CAAC;GAC7C,GAAI,KAAK,gBAAgB,EAAE,QAAQ,KAAK,cAAc;GACtD,GAAI,KAAK,gBAAgB,UAAa,EAAE,aAAa,KAAK,aAAa;GACxE;EACD,iBAAiB,QAAQ,IAAI,QAAQ,MAAM,SAAS,KAAK,SAAS,OAAO,EAAE,QAAQ;EACpF,CAAC;;;;;;;;;;;AAgBJ,SAAgB,WAAW,SAA4D;CACrF,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,yEAAyE;AAG3F,QAAO,IAAI,gBAAgB;EACzB,SAAS;EACT,OAAO,SAAS,SAAS;EACzB,SAAS,EAAE,eAAe,UAAU,UAAU;EAC9C,UAAU;EACX,CAAC;;;;;;;;;;AAeJ,SAAgB,aAAa,SAA2D;AACtF,QAAO,IAAI,gBAAgB;EACzB,SAAS,QAAQ,WAAW;EAC5B,OAAO,QAAQ;EACf,cAAc;EACd,cAAc;EACf,CAAC;;;;;;;;;;AAeJ,SAAgB,eAAe,SAA2D;AACxF,QAAO,IAAI,gBAAgB;EACzB,SAAS,QAAQ,WAAW;EAC5B,OAAO,QAAQ;EACf,cAAc;EACd,cAAc;EACf,CAAC;;;;;;;;;;AAeJ,SAAgB,eAAe,SAA4D;CACzF,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MACR,iFACD;AAGH,QAAO,IAAI,gBAAgB;EACzB,SAAS;EACT,OAAO,SAAS,SAAS;EACzB,SAAS,EAAE,eAAe,UAAU,UAAU;EAC9C,UAAU;EACX,CAAC;;;;;;;;;;;;;AAkBJ,SAAgB,iBAAiB,SAKjB;CACd,MAAM,SAAS,QAAQ,UAAU,QAAQ,IAAI;AAC7C,KAAI,CAAC,OACH,OAAM,IAAI,MACR,qFACD;CAGH,MAAMC,UAAkC,EACtC,eAAe,UAAU,UAC1B;AAED,KAAI,QAAQ,QACV,SAAQ,kBAAkB,QAAQ;AAEpC,KAAI,QAAQ,SACV,SAAQ,aAAa,QAAQ;AAG/B,QAAO,IAAI,gBAAgB;EACzB,SAAS;EACT,OAAO,QAAQ;EACf;EACD,CAAC;;;;;;;;;;;;;;;AAoBJ,SAAgB,cACd,QACa;AACb,QAAO,IAAI,gBAAgB,OAAO"}
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { B as BaseHttpProvider, h as ScrapedData, r as EnhancementType, s as ExtractionSchema, z as BaseHttpConfig } from "../types-DPEtPihB.cjs";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
//#region src/llm/types.d.ts
|
|
5
|
+
/**
|
|
6
|
+
* LLM completion options
|
|
7
|
+
*/
|
|
8
|
+
interface CompletionOptions {
|
|
9
|
+
maxTokens?: number;
|
|
10
|
+
temperature?: number;
|
|
11
|
+
systemPrompt?: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* LLM Provider interface - implemented by all providers
|
|
15
|
+
*/
|
|
16
|
+
interface LLMProvider {
|
|
17
|
+
readonly name: string;
|
|
18
|
+
/**
|
|
19
|
+
* Generate a text completion
|
|
20
|
+
*/
|
|
21
|
+
complete(prompt: string, options?: CompletionOptions): Promise<string>;
|
|
22
|
+
/**
|
|
23
|
+
* Generate a structured JSON completion with Zod validation
|
|
24
|
+
*/
|
|
25
|
+
completeJSON<T>(prompt: string, schema: z.ZodType<T>, options?: CompletionOptions): Promise<T>;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Enhancement result types
|
|
29
|
+
*/
|
|
30
|
+
interface SummaryResult {
|
|
31
|
+
summary: string;
|
|
32
|
+
}
|
|
33
|
+
interface TagsResult {
|
|
34
|
+
tags: string[];
|
|
35
|
+
}
|
|
36
|
+
interface EntitiesResult {
|
|
37
|
+
people: string[];
|
|
38
|
+
organizations: string[];
|
|
39
|
+
technologies: string[];
|
|
40
|
+
locations: string[];
|
|
41
|
+
concepts: string[];
|
|
42
|
+
}
|
|
43
|
+
interface ClassifyResult {
|
|
44
|
+
contentType: string;
|
|
45
|
+
confidence: number;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Zod schemas for LLM outputs
|
|
49
|
+
*/
|
|
50
|
+
declare const SummarySchema: z.ZodObject<{
|
|
51
|
+
summary: z.ZodString;
|
|
52
|
+
}, z.core.$strip>;
|
|
53
|
+
declare const TagsSchema: z.ZodObject<{
|
|
54
|
+
tags: z.ZodArray<z.ZodString>;
|
|
55
|
+
}, z.core.$strip>;
|
|
56
|
+
declare const EntitiesSchema: z.ZodObject<{
|
|
57
|
+
people: z.ZodArray<z.ZodString>;
|
|
58
|
+
organizations: z.ZodArray<z.ZodString>;
|
|
59
|
+
technologies: z.ZodArray<z.ZodString>;
|
|
60
|
+
locations: z.ZodArray<z.ZodString>;
|
|
61
|
+
concepts: z.ZodArray<z.ZodString>;
|
|
62
|
+
}, z.core.$strip>;
|
|
63
|
+
declare const ClassifySchema: z.ZodObject<{
|
|
64
|
+
contentType: z.ZodEnum<{
|
|
65
|
+
unknown: "unknown";
|
|
66
|
+
article: "article";
|
|
67
|
+
repo: "repo";
|
|
68
|
+
docs: "docs";
|
|
69
|
+
package: "package";
|
|
70
|
+
video: "video";
|
|
71
|
+
tool: "tool";
|
|
72
|
+
product: "product";
|
|
73
|
+
}>;
|
|
74
|
+
confidence: z.ZodNumber;
|
|
75
|
+
}, z.core.$strip>;
|
|
76
|
+
//#endregion
|
|
77
|
+
//#region src/llm/enhancer.d.ts
|
|
78
|
+
/**
|
|
79
|
+
* Enhance scraped data with LLM-powered features
|
|
80
|
+
*/
|
|
81
|
+
declare function enhance(data: ScrapedData, provider: LLMProvider, types: EnhancementType[]): Promise<Partial<ScrapedData>>;
|
|
82
|
+
/**
|
|
83
|
+
* Options for the ask() function
|
|
84
|
+
*/
|
|
85
|
+
interface AskOptions {
|
|
86
|
+
/** Key to store the result under in custom field */
|
|
87
|
+
key?: string;
|
|
88
|
+
/** Schema for structured response */
|
|
89
|
+
schema?: ExtractionSchema;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Ask a custom question about the scraped content
|
|
93
|
+
* Results are stored in the `custom` field of ScrapedData
|
|
94
|
+
*/
|
|
95
|
+
declare function ask(data: ScrapedData, provider: LLMProvider, prompt: string, options?: AskOptions): Promise<Partial<ScrapedData>>;
|
|
96
|
+
/**
|
|
97
|
+
* Extract structured data using LLM and a custom schema
|
|
98
|
+
*/
|
|
99
|
+
declare function extract<T>(data: ScrapedData, provider: LLMProvider, schema: ExtractionSchema, promptTemplate?: string): Promise<T>;
|
|
100
|
+
//#endregion
|
|
101
|
+
//#region src/llm/http.d.ts
|
|
102
|
+
/**
|
|
103
|
+
* HTTP LLM provider configuration.
|
|
104
|
+
*/
|
|
105
|
+
interface HttpLLMConfig<TRequest = unknown, TResponse = unknown, TError = unknown> extends BaseHttpConfig<TError> {
|
|
106
|
+
/**
|
|
107
|
+
* Build request body from prompt and options.
|
|
108
|
+
* @default OpenAI-compatible format with messages array
|
|
109
|
+
*/
|
|
110
|
+
requestBuilder?: (prompt: string, options: CompletionOptions) => TRequest;
|
|
111
|
+
/**
|
|
112
|
+
* Extract completion text from response.
|
|
113
|
+
* @default (res) => res.choices[0].message.content
|
|
114
|
+
*/
|
|
115
|
+
responseMapper?: (response: TResponse) => string;
|
|
116
|
+
/**
|
|
117
|
+
* Enable JSON mode - adds response_format to request.
|
|
118
|
+
* For OpenAI-compatible APIs, this adds { response_format: { type: "json_object" } }
|
|
119
|
+
*/
|
|
120
|
+
jsonMode?: boolean;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* HTTP-based LLM provider.
|
|
124
|
+
* Works with any REST API using native fetch.
|
|
125
|
+
*/
|
|
126
|
+
declare class HttpLLMProvider<TRequest = unknown, TResponse = unknown, TError = unknown> extends BaseHttpProvider<TError> implements LLMProvider {
|
|
127
|
+
readonly name: string;
|
|
128
|
+
private readonly requestBuilder;
|
|
129
|
+
private readonly responseMapper;
|
|
130
|
+
private readonly jsonMode;
|
|
131
|
+
constructor(config: HttpLLMConfig<TRequest, TResponse, TError>);
|
|
132
|
+
/**
|
|
133
|
+
* Generate a text completion.
|
|
134
|
+
*/
|
|
135
|
+
complete(prompt: string, options?: CompletionOptions): Promise<string>;
|
|
136
|
+
/**
|
|
137
|
+
* Generate a structured JSON completion with Zod validation.
|
|
138
|
+
*/
|
|
139
|
+
completeJSON<T>(prompt: string, schema: z.ZodType<T>, options?: CompletionOptions): Promise<T>;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Convert a Zod schema to a JSON Schema representation.
|
|
143
|
+
* Uses Zod's built-in toJSONSchema method (Zod 4+).
|
|
144
|
+
* Used for prompting LLMs to return structured data.
|
|
145
|
+
*/
|
|
146
|
+
declare function zodToJsonSchema(schema: z.ZodType<unknown>): object;
|
|
147
|
+
//#endregion
|
|
148
|
+
//#region src/llm/presets.d.ts
|
|
149
|
+
/**
|
|
150
|
+
* Create an OpenAI LLM provider.
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* ```ts
|
|
154
|
+
* const provider = createOpenAI({ apiKey: 'sk-...' });
|
|
155
|
+
* const result = await scrape(url, { llm: provider, enhance: ['summarize'] });
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
declare function createOpenAI(options?: {
|
|
159
|
+
apiKey?: string;
|
|
160
|
+
model?: string;
|
|
161
|
+
baseUrl?: string;
|
|
162
|
+
}): LLMProvider;
|
|
163
|
+
/**
|
|
164
|
+
* Create an Anthropic Claude LLM provider.
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* ```ts
|
|
168
|
+
* const provider = createAnthropic({ apiKey: 'sk-...' });
|
|
169
|
+
* const result = await scrape(url, { llm: provider, enhance: ['summarize'] });
|
|
170
|
+
* ```
|
|
171
|
+
*/
|
|
172
|
+
declare function createAnthropic(options?: {
|
|
173
|
+
apiKey?: string;
|
|
174
|
+
model?: string;
|
|
175
|
+
}): LLMProvider;
|
|
176
|
+
/**
|
|
177
|
+
* Create a Groq LLM provider.
|
|
178
|
+
* Groq provides fast inference for open-source models.
|
|
179
|
+
*
|
|
180
|
+
* @example
|
|
181
|
+
* ```ts
|
|
182
|
+
* const provider = createGroq({ model: 'llama-3.1-70b-versatile' });
|
|
183
|
+
* ```
|
|
184
|
+
*/
|
|
185
|
+
declare function createGroq(options?: {
|
|
186
|
+
apiKey?: string;
|
|
187
|
+
model?: string;
|
|
188
|
+
}): LLMProvider;
|
|
189
|
+
/**
|
|
190
|
+
* Create an Ollama LLM provider for local models.
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```ts
|
|
194
|
+
* const provider = createOllama({ model: 'llama3.2' });
|
|
195
|
+
* ```
|
|
196
|
+
*/
|
|
197
|
+
declare function createOllama(options: {
|
|
198
|
+
model: string;
|
|
199
|
+
baseUrl?: string;
|
|
200
|
+
}): LLMProvider;
|
|
201
|
+
/**
|
|
202
|
+
* Create an LM Studio LLM provider for local models.
|
|
203
|
+
*
|
|
204
|
+
* @example
|
|
205
|
+
* ```ts
|
|
206
|
+
* const provider = createLMStudio({ model: 'local-model' });
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
declare function createLMStudio(options: {
|
|
210
|
+
model: string;
|
|
211
|
+
baseUrl?: string;
|
|
212
|
+
}): LLMProvider;
|
|
213
|
+
/**
|
|
214
|
+
* Create a Together AI LLM provider.
|
|
215
|
+
*
|
|
216
|
+
* @example
|
|
217
|
+
* ```ts
|
|
218
|
+
* const provider = createTogether({ model: 'meta-llama/Llama-3.2-3B-Instruct-Turbo' });
|
|
219
|
+
* ```
|
|
220
|
+
*/
|
|
221
|
+
declare function createTogether(options?: {
|
|
222
|
+
apiKey?: string;
|
|
223
|
+
model?: string;
|
|
224
|
+
}): LLMProvider;
|
|
225
|
+
/**
|
|
226
|
+
* Create an OpenRouter LLM provider.
|
|
227
|
+
* OpenRouter provides access to many models through a unified API.
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* ```ts
|
|
231
|
+
* const provider = createOpenRouter({
|
|
232
|
+
* model: 'anthropic/claude-3.5-sonnet',
|
|
233
|
+
* });
|
|
234
|
+
* ```
|
|
235
|
+
*/
|
|
236
|
+
declare function createOpenRouter(options: {
|
|
237
|
+
apiKey?: string;
|
|
238
|
+
model: string;
|
|
239
|
+
siteUrl?: string;
|
|
240
|
+
siteName?: string;
|
|
241
|
+
}): LLMProvider;
|
|
242
|
+
/**
|
|
243
|
+
* Create a generic HTTP LLM provider.
|
|
244
|
+
* Use this for any OpenAI-compatible API.
|
|
245
|
+
*
|
|
246
|
+
* @example
|
|
247
|
+
* ```ts
|
|
248
|
+
* const provider = createHttpLLM({
|
|
249
|
+
* baseUrl: 'https://my-api.com/v1/chat/completions',
|
|
250
|
+
* model: 'my-model',
|
|
251
|
+
* headers: { Authorization: 'Bearer ...' },
|
|
252
|
+
* });
|
|
253
|
+
* ```
|
|
254
|
+
*/
|
|
255
|
+
declare function createHttpLLM<TRequest = unknown, TResponse = unknown, TError = unknown>(config: HttpLLMConfig<TRequest, TResponse, TError>): LLMProvider;
|
|
256
|
+
//#endregion
|
|
257
|
+
export { type AskOptions, type ClassifyResult, ClassifySchema, type CompletionOptions, type EntitiesResult, EntitiesSchema, type HttpLLMConfig, HttpLLMProvider, type LLMProvider, type SummaryResult, SummarySchema, type TagsResult, TagsSchema, ask, createAnthropic, createGroq, createHttpLLM, createLMStudio, createOllama, createOpenAI, createOpenRouter, createTogether, enhance, extract, zodToJsonSchema };
|
|
258
|
+
//# sourceMappingURL=index.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../../src/llm/types.ts","../../src/llm/enhancer.ts","../../src/llm/http.ts","../../src/llm/presets.ts"],"sourcesContent":[],"mappings":";;;;;;;AAKiB,UAAA,iBAAA,CAAiB;EASjB,SAAA,CAAA,EAAA,MAAW;EAMS,WAAA,CAAA,EAAA,MAAA;EAAoB,YAAA,CAAA,EAAA,MAAA;;;;;AAK6B,UAXrE,WAAA,CAWqE;EAAO,SAAA,IAAA,EAAA,MAAA;EAM5E;AAIjB;AAIA;EAQiB,QAAA,CAAA,MAAA,EAAc,MAAA,EAAA,OAAA,CAAA,EA3BM,iBA2BN,CAAA,EA3B0B,OA2B1B,CAAA,MAAA,CAAA;EAQlB;;;EAAa,YAAA,CAAA,CAAA,CAAA,CAAA,MAAA,EAAA,MAAA,EAAA,MAAA,EA9BgB,CAAA,CAAE,OA8BlB,CA9B0B,CA8B1B,CAAA,EAAA,OAAA,CAAA,EA9BwC,iBA8BxC,CAAA,EA9B4D,OA8B5D,CA9BoE,CA8BpE,CAAA;;AAI1B;;;UA5BiB,aAAA;EA4BM,OAAA,EAAA,MAAA;;AAIV,UA5BI,UAAA,CAkCf;;;UA9Be,cAAA;;;;;;;UAQA,cAAA;;EAgBU,UAAA,EAAA,MAAA;;AAQ3B;;;cAhBa,eAAa,CAAA,CAAA;EAgBC,OAAA,aAAA;CAAA,eAAA,CAAA;cAZd,YAAU,CAAA,CAAA;;;AC9CD,cDkDT,cClDgB,EDkDF,CAAA,CAAA,SClDE,CAAA;EACrB,MAAA,YAAA,YAAA,CAAA;EACI,aAAA,YAAA,YAAA,CAAA;EACH,YAAA,YAAA,YAAA,CAAA;EACU,SAAA,YAAA,YAAA,CAAA;EAAR,QAAA,YAAA,YAAA,CAAA;CAAR,eAAA,CAAA;AAAO,cDsDG,cCtDH,EDsDiB,CAAA,CAAA,SCtDjB,CAAA;EAoDO,WAAA,WAAU,CAAA;IAWL,OAAG,EAAA,SAAA;IACjB,OAAA,EAAA,SAAA;IACI,IAAA,EAAA,MAAA;IAEA,IAAA,EAAA,MAAA;IACO,OAAA,EAAA,SAAA;IAAR,KAAA,EAAA,OAAA;IAAR,IAAA,EAAA,MAAA;IAAO,OAAA,EAAA,SAAA;EA8CY,CAAA,CAAA;EACd,UAAA,aAAA;CACI,eAAA,CAAA;;;;;ADhIZ;AASiB,iBCDK,OAAA,CDCM,IAAA,ECApB,WDAoB,EAAA,QAAA,ECChB,WDDgB,EAAA,KAAA,ECEnB,eDFmB,EAAA,CAAA,ECGzB,ODHyB,CCGjB,ODHiB,CCGT,WDHS,CAAA,CAAA;;;;AAWgB,UC4C3B,UAAA,CD5C2B;EAAsB;EAA4B,GAAA,CAAA,EAAA,MAAA;EAAR;EAAO,MAAA,CAAA,ECgDlF,gBDhDkF;AAM7F;AAIA;AAIA;AAQA;AAQA;iBCyBsB,GAAA,OACd,uBACI,uCAEA,aACT,QAAQ,QAAQ;;;;AD1BN,iBCwES,ODtEpB,CAAA,CAAA,CAAA,CAAA,IAAA,ECuEM,WDvEN,EAAA,QAAA,ECwEU,WDxEV,EAAA,MAAA,ECyEQ,gBDzER,EAAA,cAAA,CAAA,EAAA,MAAA,CAAA,EC2EC,OD3ED,CC2ES,CD3ET,CAAA;;;;;;AApCgE,UERjD,aFQiD,CAAA,WAAA,OAAA,EAAA,YAAA,OAAA,EAAA,SAAA,OAAA,CAAA,SEPxD,cFOwD,CEPzC,MFOyC,CAAA,CAAA;EAA4B;;;AAM9F;EAIiB,cAAU,CAAA,EAAA,CAAA,MAAA,EAAA,MAAA,EAAA,OAAA,EEZkB,iBFYlB,EAAA,GEZwC,QFYxC;EAIV;AAQjB;AAQA;;8BE3B8B;EF2BJ;;AAI1B;;;;;;AAIA;;cEnBa,mFACH,iBAAiB,mBACd;;;;;sBAQS,cAAc,UAAU,WAAW;;;;qCAiEf,oBAAyB;EFxDxC;;AAQ3B;0CE0EY,CAAA,CAAE,QAAQ,cACT,oBACR,QAAQ;;;;;;;iBAkEG,eAAA,SAAwB,CAAA,CAAE;;;;;;;;;;;AFtL1C;AAIiB,iBGdD,YAAA,CHcW,QAAA,EAAA;EAIV,MAAA,CAAA,EAAA,MAAA;EAQA,KAAA,CAAA,EAAA,MAAA;EAQJ,OAAA,CAAA,EAAA,MAEX;IGhCE;;;;AHkCJ;;;;;;AAIa,iBGJG,eAAA,CHUd,QAAA,EAAA;;;IGV8E;;;;;;;;;;AHIrD,iBGqCX,UAAA,CHrCW,QAAA,EAAA;EAQd,MAAA,CAAA,EAAA,MAAA;;IG6B8D;;;;;;;AFvF3E;;AAEY,iBE+GI,YAAA,CF/GJ,OAAA,EAAA;EACH,KAAA,EAAA,MAAA;EACU,OAAA,CAAA,EAAA,MAAA;CAAR,CAAA,EE6GiE,WF7GjE;;;AAoDX;AAWA;;;;;AAKW,iBE8DK,cAAA,CF9DL,OAAA,EAAA;EAAR,KAAA,EAAA,MAAA;EAAO,OAAA,CAAA,EAAA,MAAA;AA8CV,CAAA,CAAA,EEgB8E,WFhBjD;;;;;;;;;iBEqCb,cAAA,QDjJ6B;EAN5B,MAAA,CAAA,EAAA,MAAA;EACQ,KAAA,CAAA,EAAA,MAAA;CAKoB,CAAA,ECiJkC,WDjJlC;;;;;AAqB7C;;;;;;;AA2EmE,iBCgFnD,gBAAA,CDhFmD,OAAA,EAAA;EA0B7C,MAAA,CAAA,EAAA,MAAA;EAAR,KAAA,EAAA,MAAA;EACD,OAAA,CAAA,EAAA,MAAA;EACA,QAAA,CAAA,EAAA,MAAA;CAAR,CAAA,ECyDD,WDzDC;;;;AAkEL;;;;AChMA;AAsCA;AAyCA;AA0BA;AAqBA;AAqBA;AA+BgB,iBAgDA,aA3CZ,CAAA,WAAW,OAAA,EAAA,YAAA,OAAA,EAAA,SAAA,OAAA,CAAA,CAAA,MAAA,EA4CL,aA5CK,CA4CS,QA5CT,EA4CmB,SA5CnB,EA4C8B,MA5C9B,CAAA,CAAA,EA6CZ,WA7CY"}
|