webpeel 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +39 -4
  2. package/dist/cli-auth.d.ts +6 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js.map +1 -1
  5. package/dist/cli.js +463 -22
  6. package/dist/cli.js.map +1 -1
  7. package/dist/core/challenge-detection.d.ts.map +1 -1
  8. package/dist/core/challenge-detection.js +39 -6
  9. package/dist/core/challenge-detection.js.map +1 -1
  10. package/dist/core/extract-listings.d.ts.map +1 -1
  11. package/dist/core/extract-listings.js +167 -36
  12. package/dist/core/extract-listings.js.map +1 -1
  13. package/dist/core/fetcher.d.ts +6 -0
  14. package/dist/core/fetcher.d.ts.map +1 -1
  15. package/dist/core/fetcher.js +147 -11
  16. package/dist/core/fetcher.js.map +1 -1
  17. package/dist/core/hotel-search.d.ts +121 -0
  18. package/dist/core/hotel-search.d.ts.map +1 -0
  19. package/dist/core/hotel-search.js +381 -0
  20. package/dist/core/hotel-search.js.map +1 -0
  21. package/dist/core/llm-extract.d.ts +42 -0
  22. package/dist/core/llm-extract.d.ts.map +1 -0
  23. package/dist/core/llm-extract.js +144 -0
  24. package/dist/core/llm-extract.js.map +1 -0
  25. package/dist/core/profiles.d.ts +48 -0
  26. package/dist/core/profiles.d.ts.map +1 -0
  27. package/dist/core/profiles.js +211 -0
  28. package/dist/core/profiles.js.map +1 -0
  29. package/dist/core/schema-extraction.d.ts +67 -0
  30. package/dist/core/schema-extraction.d.ts.map +1 -0
  31. package/dist/core/schema-extraction.js +353 -0
  32. package/dist/core/schema-extraction.js.map +1 -0
  33. package/dist/core/strategies.d.ts +5 -0
  34. package/dist/core/strategies.d.ts.map +1 -1
  35. package/dist/core/strategies.js +9 -2
  36. package/dist/core/strategies.js.map +1 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +2 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/types.d.ts +6 -0
  41. package/dist/types.d.ts.map +1 -1
  42. package/dist/types.js.map +1 -1
  43. package/package.json +1 -1
@@ -0,0 +1,381 @@
1
+ /**
2
+ * Hotel search module — searches multiple travel sites and returns sorted hotel listings.
3
+ *
4
+ * Sources: Kayak, Booking.com, Google Travel
5
+ * All sources are fetched in parallel; failures are captured per-source without
6
+ * crashing the overall search.
7
+ */
8
+ import { peel } from '../index.js';
9
+ import { extractListings } from './extract-listings.js';
10
+ import { findSchemaForUrl, extractWithSchema } from './schema-extraction.js';
11
+ // ── Date Parsing ──────────────────────────────────────────────────────────────
12
+ /**
13
+ * Parse a date string (ISO or relative) into an ISO date string (YYYY-MM-DD).
14
+ *
15
+ * Supported relative formats:
16
+ * - "tomorrow" → today + 1 day
17
+ * - "next <weekday>" → next occurrence of that weekday
18
+ * - ISO date "2026-02-20" → returned as-is
19
+ */
20
+ export function parseDate(input, baseDate) {
21
+ const base = baseDate ?? new Date();
22
+ // Normalise
23
+ const normalised = input.trim().toLowerCase();
24
+ if (normalised === 'today') {
25
+ return toIsoDate(base);
26
+ }
27
+ if (normalised === 'tomorrow') {
28
+ const d = new Date(base);
29
+ d.setDate(d.getDate() + 1);
30
+ return toIsoDate(d);
31
+ }
32
+ // "next <weekday>"
33
+ const nextMatch = normalised.match(/^next\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)$/);
34
+ if (nextMatch) {
35
+ const weekdays = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday'];
36
+ const targetDay = weekdays.indexOf(nextMatch[1]);
37
+ const d = new Date(base);
38
+ const currentDay = d.getDay();
39
+ let daysUntil = targetDay - currentDay;
40
+ if (daysUntil <= 0)
41
+ daysUntil += 7;
42
+ d.setDate(d.getDate() + daysUntil);
43
+ return toIsoDate(d);
44
+ }
45
+ // Try ISO date (YYYY-MM-DD)
46
+ if (/^\d{4}-\d{2}-\d{2}$/.test(input.trim())) {
47
+ return input.trim();
48
+ }
49
+ // Fallback: try to parse as a generic date string
50
+ const parsed = new Date(input);
51
+ if (!isNaN(parsed.getTime())) {
52
+ return toIsoDate(parsed);
53
+ }
54
+ throw new Error(`Unrecognized date format: "${input}"`);
55
+ }
56
+ function toIsoDate(d) {
57
+ const yyyy = d.getFullYear();
58
+ const mm = String(d.getMonth() + 1).padStart(2, '0');
59
+ const dd = String(d.getDate()).padStart(2, '0');
60
+ return `${yyyy}-${mm}-${dd}`;
61
+ }
62
+ /** Add N days to an ISO date string and return the new ISO date string. */
63
+ export function addDays(isoDate, days) {
64
+ const d = new Date(isoDate + 'T12:00:00Z');
65
+ d.setUTCDate(d.getUTCDate() + days);
66
+ const yyyy = d.getUTCFullYear();
67
+ const mm = String(d.getUTCMonth() + 1).padStart(2, '0');
68
+ const dd = String(d.getUTCDate()).padStart(2, '0');
69
+ return `${yyyy}-${mm}-${dd}`;
70
+ }
71
+ // ── URL Builders ──────────────────────────────────────────────────────────────
72
+ /**
73
+ * Convert a destination name to a Kayak-friendly slug.
74
+ * e.g. "Manhattan, New York" → "Manhattan,New-York"
75
+ * e.g. "Long Island City" → "Long-Island-City"
76
+ */
77
+ export function toKayakSlug(destination) {
78
+ return destination
79
+ .split(',')
80
+ .map(part => part.trim().replace(/\s+/g, '-'))
81
+ .join(',');
82
+ }
83
+ /**
84
+ * Build the search URL for each source.
85
+ */
86
+ export function buildSourceUrls(destination, checkin, checkout) {
87
+ const kayakSlug = toKayakSlug(destination);
88
+ const bookingDest = encodeURIComponent(destination);
89
+ const googleDest = destination.replace(/\s+/g, '+');
90
+ const expediaDest = encodeURIComponent(destination);
91
+ return [
92
+ {
93
+ name: 'kayak',
94
+ url: `https://www.kayak.com/hotels/${kayakSlug}/${checkin}/${checkout}?sort=price_a`,
95
+ },
96
+ {
97
+ name: 'booking',
98
+ url: `https://www.booking.com/searchresults.html?ss=${bookingDest}&checkin=${checkin}&checkout=${checkout}&order=price`,
99
+ },
100
+ {
101
+ name: 'google',
102
+ url: `https://www.google.com/travel/hotels/${googleDest}`,
103
+ },
104
+ {
105
+ name: 'expedia',
106
+ url: `https://www.expedia.com/Hotel-Search?destination=${expediaDest}&startDate=${checkin}&endDate=${checkout}&sort=PRICE_LOW_TO_HIGH`,
107
+ },
108
+ ];
109
+ }
110
+ // ── Price & Rating Parsers ────────────────────────────────────────────────────
111
+ /**
112
+ * Parse a price display string into a numeric USD value.
113
+ * Returns null if unparseable.
114
+ *
115
+ * Examples:
116
+ * "$119" → 119
117
+ * "$1,299" → 1299
118
+ * "£85" → 85 (GBP treated as USD approximation)
119
+ * "€95" → 95
120
+ * "US$200" → 200
121
+ */
122
+ export function parsePrice(raw) {
123
+ if (!raw)
124
+ return null;
125
+ // Remove currency symbols and "US$" prefix, commas, whitespace
126
+ const cleaned = raw.replace(/US\$|[$£€¥₹]/g, '').replace(/,/g, '').trim();
127
+ // Extract first number
128
+ const match = cleaned.match(/(\d+(?:\.\d+)?)/);
129
+ if (!match)
130
+ return null;
131
+ const n = parseFloat(match[1]);
132
+ return isNaN(n) ? null : n;
133
+ }
134
+ /**
135
+ * Parse a rating string into a numeric value.
136
+ * Returns null if unparseable.
137
+ *
138
+ * Examples:
139
+ * "Scored 8.4" → 8.4
140
+ * "4.2/5" → 4.2
141
+ * "4.2/5 (1.4K)" → 4.2
142
+ * "8.3" → 8.3
143
+ * "Very Good 8.6" → 8.6
144
+ */
145
+ export function parseRating(raw) {
146
+ if (!raw)
147
+ return null;
148
+ // "Scored N.N" or "Very Good N.N" etc.
149
+ const scoredMatch = raw.match(/(\d+(?:\.\d+)?)\s*\/\s*\d/);
150
+ if (scoredMatch) {
151
+ const n = parseFloat(scoredMatch[1]);
152
+ return isNaN(n) ? null : n;
153
+ }
154
+ // Extract last number (handles "Scored 8.4", "Very Good 8.6", standalone "8.3")
155
+ const numMatch = raw.match(/(\d+(?:\.\d+)?)/g);
156
+ if (!numMatch)
157
+ return null;
158
+ // Take the last number that looks like a rating (0–10 scale or 0–5 scale)
159
+ for (let i = numMatch.length - 1; i >= 0; i--) {
160
+ const n = parseFloat(numMatch[i]);
161
+ if (!isNaN(n) && n >= 0 && n <= 10)
162
+ return n;
163
+ }
164
+ return null;
165
+ }
166
+ // ── Result Normalisation ──────────────────────────────────────────────────────
167
+ /**
168
+ * Map an extracted listing item to a HotelResult, tagged with the source name.
169
+ */
170
+ function normaliseToHotelResult(item, sourceName) {
171
+ const name = item.title?.trim();
172
+ if (!name)
173
+ return null;
174
+ const priceDisplay = item.price ?? '';
175
+ const ratingDisplay = item.rating ?? '';
176
+ return {
177
+ name,
178
+ price: parsePrice(priceDisplay),
179
+ priceDisplay,
180
+ rating: parseRating(ratingDisplay),
181
+ ratingDisplay,
182
+ source: sourceName,
183
+ link: item.link ?? '',
184
+ location: item.description?.trim() || undefined,
185
+ image: item.image || undefined,
186
+ };
187
+ }
188
+ // ── Deduplication ─────────────────────────────────────────────────────────────
189
+ /**
190
+ * Deduplicate hotel results by name (case-insensitive).
191
+ * When duplicates exist, keep the one with the most data (price + rating),
192
+ * with lowest price as a tiebreaker.
193
+ */
194
+ export function deduplicateHotels(hotels) {
195
+ const byName = new Map();
196
+ for (const hotel of hotels) {
197
+ const key = hotel.name.toLowerCase().replace(/\s+/g, ' ').trim();
198
+ const existing = byName.get(key);
199
+ if (!existing) {
200
+ byName.set(key, hotel);
201
+ continue;
202
+ }
203
+ // Score = number of non-null data fields
204
+ const scoreNew = (hotel.price !== null ? 1 : 0) + (hotel.rating !== null ? 1 : 0);
205
+ const scoreOld = (existing.price !== null ? 1 : 0) + (existing.rating !== null ? 1 : 0);
206
+ if (scoreNew > scoreOld) {
207
+ byName.set(key, hotel);
208
+ }
209
+ else if (scoreNew === scoreOld) {
210
+ // Tiebreak: prefer the one with lower price (or keep existing if equal)
211
+ if (hotel.price !== null && (existing.price === null || hotel.price < existing.price)) {
212
+ byName.set(key, hotel);
213
+ }
214
+ }
215
+ }
216
+ return Array.from(byName.values());
217
+ }
218
+ // ── Sorting ───────────────────────────────────────────────────────────────────
219
+ /**
220
+ * Sort hotel results.
221
+ * - price: ascending, nulls last
222
+ * - rating: descending, nulls last
223
+ * - value: rating/price ratio, descending, nulls last
224
+ */
225
+ export function sortHotels(hotels, sort) {
226
+ const sorted = [...hotels];
227
+ switch (sort) {
228
+ case 'price':
229
+ sorted.sort((a, b) => {
230
+ if (a.price === null && b.price === null)
231
+ return 0;
232
+ if (a.price === null)
233
+ return 1;
234
+ if (b.price === null)
235
+ return -1;
236
+ return a.price - b.price;
237
+ });
238
+ break;
239
+ case 'rating':
240
+ sorted.sort((a, b) => {
241
+ if (a.rating === null && b.rating === null)
242
+ return 0;
243
+ if (a.rating === null)
244
+ return 1;
245
+ if (b.rating === null)
246
+ return -1;
247
+ return b.rating - a.rating;
248
+ });
249
+ break;
250
+ case 'value': {
251
+ const valueOf = (h) => {
252
+ if (h.price === null || h.price === 0 || h.rating === null)
253
+ return null;
254
+ return h.rating / h.price;
255
+ };
256
+ sorted.sort((a, b) => {
257
+ const va = valueOf(a);
258
+ const vb = valueOf(b);
259
+ if (va === null && vb === null)
260
+ return 0;
261
+ if (va === null)
262
+ return 1;
263
+ if (vb === null)
264
+ return -1;
265
+ return vb - va;
266
+ });
267
+ break;
268
+ }
269
+ }
270
+ return sorted;
271
+ }
272
+ // ── Main Function ─────────────────────────────────────────────────────────────
273
+ const DEFAULT_SOURCES = ['kayak', 'booking', 'google', 'expedia'];
274
+ const SIMPLE_TIMEOUT = 15_000;
275
+ const BROWSER_TIMEOUT = 30_000;
276
+ const EXPEDIA_TIMEOUT = 60_000;
277
+ /**
278
+ * Search multiple travel sites for hotels and return sorted, deduplicated results.
279
+ */
280
+ export async function searchHotels(options) {
281
+ const startTime = Date.now();
282
+ // ── Parse dates ────────────────────────────────────────────────────────────
283
+ const checkin = parseDate(options.checkin);
284
+ const rawCheckout = options.checkout;
285
+ const checkout = rawCheckout ? parseDate(rawCheckout) : addDays(checkin, 1);
286
+ const destination = options.destination;
287
+ const sort = options.sort ?? 'price';
288
+ const limit = options.limit ?? 20;
289
+ const allowedSources = new Set((options.sources ?? DEFAULT_SOURCES).map(s => s.toLowerCase()));
290
+ const useGlobalStealth = options.stealth ?? false;
291
+ // ── Build source URLs ──────────────────────────────────────────────────────
292
+ const allSourceUrls = buildSourceUrls(destination, checkin, checkout).filter(s => allowedSources.has(s.name));
293
+ // ── Fetch all sources in parallel ──────────────────────────────────────────
294
+ const settled = await Promise.allSettled(allSourceUrls.map(async (src) => {
295
+ const isKayak = src.name === 'kayak';
296
+ const isBooking = src.name === 'booking';
297
+ const isExpedia = src.name === 'expedia';
298
+ const useStealth = useGlobalStealth || isKayak || isExpedia;
299
+ const useRender = useStealth || isBooking;
300
+ const timeout = isExpedia ? EXPEDIA_TIMEOUT : (useRender ? BROWSER_TIMEOUT : SIMPLE_TIMEOUT);
301
+ // Expedia is a SPA — wait for property listings to appear before extracting
302
+ const actions = isExpedia
303
+ ? [{ type: 'waitForSelector', selector: "[data-stid='property-listing'], li.uitk-spacing" }]
304
+ : undefined;
305
+ const result = await peel(src.url, {
306
+ format: 'html',
307
+ render: useRender,
308
+ stealth: useStealth,
309
+ timeout,
310
+ ...(actions ? { actions } : {}),
311
+ });
312
+ // Prefer CSS schema extraction when a schema is available for this source
313
+ const schema = findSchemaForUrl(src.url);
314
+ const hotels = [];
315
+ if (schema) {
316
+ const schemaItems = extractWithSchema(result.content, schema, src.url);
317
+ for (const item of schemaItems) {
318
+ const mapped = {
319
+ title: typeof item.title === 'string' ? item.title : undefined,
320
+ price: typeof item.price === 'string' ? item.price : undefined,
321
+ rating: typeof item.rating === 'string' ? item.rating : undefined,
322
+ link: typeof item.link === 'string' ? item.link : undefined,
323
+ image: typeof item.image === 'string' ? item.image : undefined,
324
+ description: typeof item.location === 'string' ? item.location : undefined,
325
+ };
326
+ const hotel = normaliseToHotelResult(mapped, src.name);
327
+ if (hotel)
328
+ hotels.push(hotel);
329
+ }
330
+ }
331
+ // Fall back to generic extraction if schema yielded nothing
332
+ if (hotels.length === 0) {
333
+ const listings = extractListings(result.content, src.url);
334
+ for (const item of listings) {
335
+ const hotel = normaliseToHotelResult(item, src.name);
336
+ if (hotel)
337
+ hotels.push(hotel);
338
+ }
339
+ }
340
+ return { name: src.name, hotels };
341
+ }));
342
+ // ── Collect per-source status and results ──────────────────────────────────
343
+ const sourceStats = [];
344
+ const allHotels = [];
345
+ for (let i = 0; i < allSourceUrls.length; i++) {
346
+ const src = allSourceUrls[i];
347
+ const outcome = settled[i];
348
+ if (outcome.status === 'fulfilled') {
349
+ const { hotels } = outcome.value;
350
+ sourceStats.push({ name: src.name, count: hotels.length, status: 'ok' });
351
+ allHotels.push(...hotels);
352
+ }
353
+ else {
354
+ const errMsg = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
355
+ const isBlocked = errMsg.toLowerCase().includes('blocked') ||
356
+ errMsg.toLowerCase().includes('403') ||
357
+ errMsg.toLowerCase().includes('cloudflare');
358
+ sourceStats.push({
359
+ name: src.name,
360
+ count: 0,
361
+ status: isBlocked ? 'blocked' : 'error',
362
+ error: errMsg,
363
+ });
364
+ }
365
+ }
366
+ // ── Deduplicate, sort, limit ───────────────────────────────────────────────
367
+ const unique = deduplicateHotels(allHotels);
368
+ const sorted = sortHotels(unique, sort);
369
+ const results = sorted.slice(0, limit);
370
+ const elapsed = Date.now() - startTime;
371
+ return {
372
+ destination,
373
+ checkin,
374
+ checkout,
375
+ totalResults: results.length,
376
+ results,
377
+ sources: sourceStats,
378
+ elapsed,
379
+ };
380
+ }
381
+ //# sourceMappingURL=hotel-search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hotel-search.js","sourceRoot":"","sources":["../../src/core/hotel-search.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAkD7E,iFAAiF;AAEjF;;;;;;;GAOG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa,EAAE,QAAe;IACtD,MAAM,IAAI,GAAG,QAAQ,IAAI,IAAI,IAAI,EAAE,CAAC;IAEpC,YAAY;IACZ,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE9C,IAAI,UAAU,KAAK,OAAO,EAAE,CAAC;QAC3B,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,UAAU,KAAK,UAAU,EAAE,CAAC;QAC9B,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;QAC3B,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAC;IAC1G,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChG,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,CAAC;QAClD,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,SAAS,GAAG,UAAU,CAAC;QACvC,IAAI,SAAS,IAAI,CAAC;YAAE,SAAS,IAAI,CAAC,CAAC;QACnC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,CAAC;QACnC,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,4BAA4B;IAC5B,IAAI,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QAC7C,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;QAC7B,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,GAAG,CAAC,CAAC;AAC1D,CAAC;AAED,SAAS,SAAS,CAAC,CAAO;IACxB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IAC7B,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrD,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAChD,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC;AAC/B,CAAC;AAED,2EAA2E;AAC3E,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAAY;IACnD,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,OAAO,GAAG,YAAY,CAAC,CAAC;IAC3C,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,CAAC,CAAC,cAAc,EAAE,CAAC;IAChC,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACxD,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACnD,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC;AAC/B,CAAC;AAED,iFAAiF;AAEjF;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,WAAmB;IAC7C,OAAO,WAAW;SACf,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SAC7C,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAOD;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAmB,EACnB,OAAe,EACf,QAAgB;IAEhB,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IACpD,MAAM,UAAU,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAEpD,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAEpD,OAAO;QACL;YACE,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,gCAAgC,SAAS,IAAI,OAAO,IAAI,QAAQ,eAAe;SACrF;QACD;YACE,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,iDAAiD,WAAW,YAAY,OAAO,aAAa,QAAQ,cAAc;SACxH;QACD;YACE,IAAI,EAAE,QAAQ;YACd,GAAG,EAAE,wCAAwC,UAAU,EAAE;SAC1D;QACD;YACE,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,oDAAoD,WAAW,cAAc,OAAO,YAAY,QAAQ,yBAAyB;SACvI;KACF,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;GAUG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,+DAA+D;IAC/D,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1E,uBAAuB;IACvB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAC/C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC;IAChC,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,uCAAuC;IACvC,MAAM,WAAW,GAAG,GAAG,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC3D,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC,CAAC;QACtC,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IAED,gFAAgF;IAChF,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC/C,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,0EAA0E;IAC1E,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAC;QACnC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,iFAAiF;AAEjF;;GAEG;AACH,SAAS,sBAAsB,CAC7B,IAAiJ,EACjJ,UAAkB;IAElB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;IAExC,OAAO;QACL,IAAI;QACJ,KAAK,EAAE,UAAU,CAAC,YAAY,CAAC;QAC/B,YAAY;QACZ,MAAM,EAAE,WAAW,CAAC,aAAa,CAAC;QAClC,aAAa;QACb,MAAM,EAAE,UAAU;QAClB,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;QACrB,QAAQ,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS;QAC/C,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;KAC/B,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAqB;IACrD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE9C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACjE,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACvB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClF,MAAM,QAAQ,GAAG,CAAC,QAAQ,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAExF,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;YACxB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACzB,CAAC;aAAM,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACjC,wEAAwE;YACxE,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtF,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;AACrC,CAAC;AAED,iFAAiF;AAEjF;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CAAC,MAAqB,EAAE,IAAkC;IAClF,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;IAE3B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,OAAO;YACV,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACnD,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAC/B,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBAChC,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;YAC3B,CAAC,CAAC,CAAC;YACH,MAAM;QAER,KAAK,QAAQ;YACX,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACrD,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAChC,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBACjC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;YAC7B,CAAC,CAAC,CAAC;YACH,MAAM;QAER,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,OAAO,GAAG,CAAC,CAAc,EAAiB,EAAE;gBAChD,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,IAAI,CAAC;gBACxE,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC;YAC5B,CAAC,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,EAAE,KAAK,IAAI,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACzC,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAC1B,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBAC3B,OAAO,EAAE,GAAG,EAAE,CAAC;YACjB,CAAC,CAAC,CAAC;YACH,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,iFAAiF;AAEjF,MAAM,eAAe,GAAG,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;AAClE,MAAM,cAAc,GAAG,MAAM,CAAC;AAC9B,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAA2B;IAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,8EAA8E;IAC9E,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC;IACrC,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE5E,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IACxC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAC/F,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC;IAElD,8EAA8E;IAC9E,MAAM,aAAa,GAAG,eAAe,CAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAC/E,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAC3B,CAAC;IAEF,8EAA8E;IAC9E,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,aAAa,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC9B,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,KAAK,OAAO,CAAC;QACrC,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC;QACzC,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC;QAEzC,MAAM,UAAU,GAAG,gBAAgB,IAAI,OAAO,IAAI,SAAS,CAAC;QAC5D,MAAM,SAAS,GAAG,UAAU,IAAI,SAAS,CAAC;QAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;QAE7F,4EAA4E;QAC5E,MAAM,OAAO,GAA6B,SAAS;YACjD,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,QAAQ,EAAE,iDAAiD,EAAE,CAAC;YAC5F,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE;YACjC,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,UAAU;YACnB,OAAO;YACP,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAChC,CAAC,CAAC;QAEH,0EAA0E;QAC1E,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACzC,MAAM,MAAM,GAAkB,EAAE,CAAC;QAEjC,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,iBAAiB,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;YACvE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;gBAC/B,MAAM,MAAM,GAAG;oBACb,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,MAAM,EAAE,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;oBACjE,IAAI,EAAE,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;oBAC3D,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,WAAW,EAAE,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;iBAC3E,CAAC;gBACF,MAAM,KAAK,GAAG,sBAAsB,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;gBACvD,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QAED,4DAA4D;QAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1D,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;gBACrD,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;IACpC,CAAC,CAAC,CACH,CAAC;IAEF,8EAA8E;IAC9E,MAAM,WAAW,GAAiC,EAAE,CAAC;IACrD,MAAM,SAAS,GAAkB,EAAE,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,GAAG,GAAG,aAAa,CAAC,CAAC,CAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAE,CAAC;QAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACnC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC;YACjC,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YACzE,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACjG,MAAM,SAAS,GACb,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;gBACxC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACpC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YAC9C,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO;gBACvC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAEvC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;IAEvC,OAAO;QACL,WAAW;QACX,OAAO;QACP,QAAQ;QACR,YAAY,EAAE,OAAO,CAAC,MAAM;QAC5B,OAAO;QACP,OAAO,EAAE,WAAW;QACpB,OAAO;KACR,CAAC;AACJ,CAAC"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * LLM-based extraction: sends markdown/text content to an LLM
3
+ * with instructions to extract structured data.
4
+ *
5
+ * Supports OpenAI-compatible APIs (OpenAI, Anthropic via proxy, local models).
6
+ */
7
+ export interface LLMExtractionOptions {
8
+ content: string;
9
+ instruction?: string;
10
+ schema?: object;
11
+ apiKey?: string;
12
+ baseUrl?: string;
13
+ model?: string;
14
+ maxTokens?: number;
15
+ }
16
+ export interface LLMExtractionResult {
17
+ items: Array<Record<string, any>>;
18
+ tokensUsed: {
19
+ input: number;
20
+ output: number;
21
+ };
22
+ model: string;
23
+ cost?: number;
24
+ }
25
+ /**
26
+ * Build the user message from content + optional instruction + optional schema.
27
+ */
28
+ export declare function buildUserMessage(content: string, instruction?: string, schema?: object): string;
29
+ /**
30
+ * Calculate estimated cost in USD for a given model and token counts.
31
+ */
32
+ export declare function estimateCost(model: string, inputTokens: number, outputTokens: number): number | undefined;
33
+ /**
34
+ * Parse the LLM response text into an items array.
35
+ * Handles both `{ "items": [...] }` and `[...]` formats.
36
+ */
37
+ export declare function parseItems(text: string): Array<Record<string, any>>;
38
+ /**
39
+ * Extract structured data from content using an LLM.
40
+ */
41
+ export declare function extractWithLLM(options: LLMExtractionOptions): Promise<LLMExtractionResult>;
42
+ //# sourceMappingURL=llm-extract.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-extract.d.ts","sourceRoot":"","sources":["../../src/core/llm-extract.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,MAAM,WAAW,oBAAoB;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;IAClC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAcD;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAe/F;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAMzG;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAqCnE;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAsEhG"}
@@ -0,0 +1,144 @@
1
+ /**
2
+ * LLM-based extraction: sends markdown/text content to an LLM
3
+ * with instructions to extract structured data.
4
+ *
5
+ * Supports OpenAI-compatible APIs (OpenAI, Anthropic via proxy, local models).
6
+ */
7
+ // Cost per 1M tokens (input, output) for known models
8
+ const MODEL_COSTS = {
9
+ 'gpt-4o-mini': [0.15, 0.60],
10
+ 'gpt-4o': [2.50, 10.0],
11
+ };
12
+ const SYSTEM_PROMPT = `You are a data extraction assistant. Extract structured data from the provided web content.
13
+ Return a JSON array of objects. Each object represents one item/listing found on the page.
14
+ Always include these fields when available: title, price, link, rating, description, image.
15
+ If the user provides additional instructions, follow them.
16
+ Return ONLY valid JSON — no markdown, no explanation, just the array.`;
17
+ /**
18
+ * Build the user message from content + optional instruction + optional schema.
19
+ */
20
+ export function buildUserMessage(content, instruction, schema) {
21
+ // Truncate content if over 100K chars
22
+ const truncated = content.length > 100_000 ? content.slice(0, 50_000) : content;
23
+ let msg = `Here is the web content to extract data from:\n\n${truncated}`;
24
+ if (schema) {
25
+ msg += `\n\nExtract data matching this schema: ${JSON.stringify(schema, null, 2)}`;
26
+ }
27
+ if (instruction) {
28
+ msg += `\n\nAdditional instruction: ${instruction}`;
29
+ }
30
+ return msg;
31
+ }
32
+ /**
33
+ * Calculate estimated cost in USD for a given model and token counts.
34
+ */
35
+ export function estimateCost(model, inputTokens, outputTokens) {
36
+ // Normalize model key (strip version suffixes like -2024-11-20 for matching)
37
+ const key = Object.keys(MODEL_COSTS).find(k => model.startsWith(k) || model === k);
38
+ if (!key)
39
+ return undefined;
40
+ const [inputRate, outputRate] = MODEL_COSTS[key];
41
+ return (inputTokens / 1_000_000) * inputRate + (outputTokens / 1_000_000) * outputRate;
42
+ }
43
+ /**
44
+ * Parse the LLM response text into an items array.
45
+ * Handles both `{ "items": [...] }` and `[...]` formats.
46
+ */
47
+ export function parseItems(text) {
48
+ const trimmed = text.trim();
49
+ // Try to parse as-is first
50
+ let parsed;
51
+ try {
52
+ parsed = JSON.parse(trimmed);
53
+ }
54
+ catch {
55
+ // Try to extract JSON from the text (sometimes LLMs add preamble despite instructions)
56
+ const arrayMatch = trimmed.match(/\[[\s\S]*\]/);
57
+ const objMatch = trimmed.match(/\{[\s\S]*\}/);
58
+ if (arrayMatch) {
59
+ try {
60
+ parsed = JSON.parse(arrayMatch[0]);
61
+ }
62
+ catch { /* fall through */ }
63
+ }
64
+ else if (objMatch) {
65
+ try {
66
+ parsed = JSON.parse(objMatch[0]);
67
+ }
68
+ catch { /* fall through */ }
69
+ }
70
+ if (parsed === undefined) {
71
+ throw new Error(`Failed to parse LLM response as JSON: ${trimmed.slice(0, 200)}`);
72
+ }
73
+ }
74
+ // Handle { items: [...] } or { data: [...] } or { results: [...] }
75
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
76
+ const obj = parsed;
77
+ if (Array.isArray(obj['items']))
78
+ return obj['items'];
79
+ if (Array.isArray(obj['data']))
80
+ return obj['data'];
81
+ if (Array.isArray(obj['results']))
82
+ return obj['results'];
83
+ // Single object — wrap in array
84
+ return [obj];
85
+ }
86
+ // Handle bare array
87
+ if (Array.isArray(parsed)) {
88
+ return parsed;
89
+ }
90
+ return [];
91
+ }
92
+ /**
93
+ * Extract structured data from content using an LLM.
94
+ */
95
+ export async function extractWithLLM(options) {
96
+ const { content, instruction, schema, baseUrl = 'https://api.openai.com/v1', model = 'gpt-4o-mini', maxTokens = 4000, } = options;
97
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
98
+ if (!apiKey) {
99
+ throw new Error('LLM extraction requires an API key.\n' +
100
+ 'Set OPENAI_API_KEY environment variable or use --llm-key <key>');
101
+ }
102
+ const userMessage = buildUserMessage(content, instruction, schema);
103
+ const response = await fetch(`${baseUrl}/chat/completions`, {
104
+ method: 'POST',
105
+ headers: {
106
+ 'Content-Type': 'application/json',
107
+ 'Authorization': `Bearer ${apiKey}`,
108
+ },
109
+ body: JSON.stringify({
110
+ model,
111
+ messages: [
112
+ { role: 'system', content: SYSTEM_PROMPT },
113
+ { role: 'user', content: userMessage },
114
+ ],
115
+ temperature: 0,
116
+ max_tokens: maxTokens,
117
+ response_format: { type: 'json_object' },
118
+ }),
119
+ });
120
+ if (!response.ok) {
121
+ const body = await response.text().catch(() => '');
122
+ if (response.status === 401) {
123
+ throw new Error(`LLM API authentication failed (401). Check your API key.`);
124
+ }
125
+ if (response.status === 429) {
126
+ throw new Error(`LLM API rate limit exceeded (429). Please wait and retry.`);
127
+ }
128
+ throw new Error(`LLM API error: HTTP ${response.status}${body ? ` — ${body.slice(0, 200)}` : ''}`);
129
+ }
130
+ const data = await response.json();
131
+ const rawText = data.choices?.[0]?.message?.content ?? '';
132
+ const items = parseItems(rawText);
133
+ const inputTokens = data.usage?.prompt_tokens ?? 0;
134
+ const outputTokens = data.usage?.completion_tokens ?? 0;
135
+ const resolvedModel = data.model ?? model;
136
+ const cost = estimateCost(resolvedModel, inputTokens, outputTokens);
137
+ return {
138
+ items,
139
+ tokensUsed: { input: inputTokens, output: outputTokens },
140
+ model: resolvedModel,
141
+ cost,
142
+ };
143
+ }
144
+ //# sourceMappingURL=llm-extract.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-extract.js","sourceRoot":"","sources":["../../src/core/llm-extract.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmBH,sDAAsD;AACtD,MAAM,WAAW,GAAqC;IACpD,aAAa,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;IAC3B,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;CACvB,CAAC;AAEF,MAAM,aAAa,GAAG;;;;sEAIgD,CAAC;AAEvE;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,WAAoB,EAAE,MAAe;IACrF,sCAAsC;IACtC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IAEhF,IAAI,GAAG,GAAG,oDAAoD,SAAS,EAAE,CAAC;IAE1E,IAAI,MAAM,EAAE,CAAC;QACX,GAAG,IAAI,0CAA0C,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;IACrF,CAAC;IAED,IAAI,WAAW,EAAE,CAAC;QAChB,GAAG,IAAI,+BAA+B,WAAW,EAAE,CAAC;IACtD,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,KAAa,EAAE,WAAmB,EAAE,YAAoB;IACnF,6EAA6E;IAC7E,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC;IACnF,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAC;IAC3B,MAAM,CAAC,SAAS,EAAE,UAAU,CAAC,GAAG,WAAW,CAAC,GAAG,CAAE,CAAC;IAClD,OAAO,CAAC,WAAW,GAAG,SAAS,CAAC,GAAG,SAAS,GAAG,CAAC,YAAY,GAAG,SAAS,CAAC,GAAG,UAAU,CAAC;AACzF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,2BAA2B;IAC3B,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,uFAAuF;QACvF,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAChD,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,UAAU,EAAE,CAAC;YACf,IAAI,CAAC;gBAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC;QAC1E,CAAC;aAAM,IAAI,QAAQ,EAAE,CAAC;YACpB,IAAI,CAAC;gBAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC;QACxE,CAAC;QACD,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,yCAAyC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IAED,mEAAmE;IACnE,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACnE,MAAM,GAAG,GAAG,MAA6B,CAAC;QAC1C,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YAAE,OAAO,GAAG,CAAC,OAAO,CAAC,CAAC;QACrD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAAE,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC;QACnD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAAE,OAAO,GAAG,CAAC,SAAS,CAAC,CAAC;QACzD,gCAAgC;QAChC,OAAO,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IAED,oBAAoB;IACpB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,OAA6B;IAChE,MAAM,EACJ,OAAO,EACP,WAAW,EACX,MAAM,EACN,OAAO,GAAG,2BAA2B,EACrC,KAAK,GAAG,aAAa,EACrB,SAAS,GAAG,IAAI,GACjB,GAAG,OAAO,CAAC;IAEZ,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAE5D,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CACb,uCAAuC;YACvC,gEAAgE,CACjE,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAEnE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,mBAAmB,EAAE;QAC1D,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,eAAe,EAAE,UAAU,MAAM,EAAE;SACpC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,KAAK;YACL,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;gBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE;aACvC;YACD,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,SAAS;YACrB,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;SACzC,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACnD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC9E,CAAC;QACD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAC/E,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACrG,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAI/B,CAAC;IAEF,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;IAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;IAElC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC;IAC1C,MAAM,IAAI,GAAG,YAAY,CAAC,aAAa,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC;IAEpE,OAAO;QACL,KAAK;QACL,UAAU,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE;QACxD,KAAK,EAAE,aAAa;QACpB,IAAI;KACL,CAAC;AACJ,CAAC"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * WebPeel Profile Management
3
+ *
4
+ * Manages named browser profiles stored in ~/.webpeel/profiles/<name>/
5
+ * Each profile contains:
6
+ * - storage-state.json (Playwright storage state: cookies, localStorage, origins)
7
+ * - metadata.json (name, created, lastUsed, domains, description)
8
+ */
9
+ export interface ProfileMetadata {
10
+ name: string;
11
+ created: string;
12
+ lastUsed: string;
13
+ domains: string[];
14
+ description?: string;
15
+ }
16
+ /**
17
+ * Valid profile names: letters, digits, hyphens only. No spaces or special chars.
18
+ */
19
+ export declare function isValidProfileName(name: string): boolean;
20
+ /**
21
+ * Get the directory path for a named profile, or null if it doesn't exist.
22
+ */
23
+ export declare function getProfilePath(name: string): string | null;
24
+ /**
25
+ * Load the Playwright storage state (cookies + localStorage) for a named profile.
26
+ * Returns null if the profile or storage-state.json doesn't exist.
27
+ */
28
+ export declare function loadStorageState(name: string): any | null;
29
+ /**
30
+ * Update the lastUsed timestamp for a profile.
31
+ */
32
+ export declare function touchProfile(name: string): void;
33
+ /**
34
+ * List all profiles, sorted by lastUsed descending.
35
+ */
36
+ export declare function listProfiles(): ProfileMetadata[];
37
+ /**
38
+ * Delete a named profile. Returns true if deleted, false if not found.
39
+ */
40
+ export declare function deleteProfile(name: string): boolean;
41
+ /**
42
+ * Interactively create a new profile:
43
+ * 1. Launches a VISIBLE (headed) Chromium browser
44
+ * 2. User navigates and logs into sites
45
+ * 3. On browser close or Ctrl+C, captures storage state and saves the profile
46
+ */
47
+ export declare function createProfile(name: string, description?: string): Promise<void>;
48
+ //# sourceMappingURL=profiles.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"profiles.d.ts","sourceRoot":"","sources":["../../src/core/profiles.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgBH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAcD;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAExD;AAID;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAM1D;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,GAAG,IAAI,CAQzD;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAU/C;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,eAAe,EAAE,CAsBhD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CASnD;AAID;;;;;GAKG;AACH,wBAAsB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAqHrF"}