webpeel 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +39 -4
  2. package/dist/cli-auth.d.ts +6 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js.map +1 -1
  5. package/dist/cli.js +506 -23
  6. package/dist/cli.js.map +1 -1
  7. package/dist/core/challenge-detection.d.ts.map +1 -1
  8. package/dist/core/challenge-detection.js +39 -6
  9. package/dist/core/challenge-detection.js.map +1 -1
  10. package/dist/core/extract-listings.d.ts.map +1 -1
  11. package/dist/core/extract-listings.js +167 -36
  12. package/dist/core/extract-listings.js.map +1 -1
  13. package/dist/core/fetcher.d.ts +14 -1
  14. package/dist/core/fetcher.d.ts.map +1 -1
  15. package/dist/core/fetcher.js +176 -14
  16. package/dist/core/fetcher.js.map +1 -1
  17. package/dist/core/hotel-search.d.ts +123 -0
  18. package/dist/core/hotel-search.d.ts.map +1 -0
  19. package/dist/core/hotel-search.js +383 -0
  20. package/dist/core/hotel-search.js.map +1 -0
  21. package/dist/core/llm-extract.d.ts +56 -0
  22. package/dist/core/llm-extract.d.ts.map +1 -0
  23. package/dist/core/llm-extract.js +264 -0
  24. package/dist/core/llm-extract.js.map +1 -0
  25. package/dist/core/profiles.d.ts +48 -0
  26. package/dist/core/profiles.d.ts.map +1 -0
  27. package/dist/core/profiles.js +211 -0
  28. package/dist/core/profiles.js.map +1 -0
  29. package/dist/core/schema-extraction.d.ts +67 -0
  30. package/dist/core/schema-extraction.d.ts.map +1 -0
  31. package/dist/core/schema-extraction.js +353 -0
  32. package/dist/core/schema-extraction.js.map +1 -0
  33. package/dist/core/strategies.d.ts +11 -0
  34. package/dist/core/strategies.d.ts.map +1 -1
  35. package/dist/core/strategies.js +17 -5
  36. package/dist/core/strategies.js.map +1 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +3 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/mcp/server.js +47 -3
  41. package/dist/mcp/server.js.map +1 -1
  42. package/dist/types.d.ts +16 -0
  43. package/dist/types.d.ts.map +1 -1
  44. package/dist/types.js.map +1 -1
  45. package/package.json +1 -1
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Hotel search module — searches multiple travel sites and returns sorted hotel listings.
3
+ *
4
+ * Sources: Kayak, Booking.com, Google Travel
5
+ * All sources are fetched in parallel; failures are captured per-source without
6
+ * crashing the overall search.
7
+ */
8
+ export interface HotelSearchOptions {
9
+ /** Destination name, e.g. "Manhattan" or "Long Island City, New York" */
10
+ destination: string;
11
+ /** ISO date "2026-02-20" or relative string like "tomorrow" or "next friday" */
12
+ checkin: string;
13
+ /** ISO date or relative string. Defaults to checkin + 1 day if omitted. */
14
+ checkout?: string;
15
+ /** Sort order: price (default), rating, or value (rating/price ratio) */
16
+ sort?: 'price' | 'rating' | 'value';
17
+ /** Max results to return. Default: 20 */
18
+ limit?: number;
19
+ /** Specific sources to use. Default: all (kayak, booking, google) */
20
+ sources?: string[];
21
+ /** Use stealth mode for all sources */
22
+ stealth?: boolean;
23
+ /** Suppress progress output */
24
+ silent?: boolean;
25
+ /** Proxy URL for requests (http://host:port, socks5://user:pass@host:port) */
26
+ proxy?: string;
27
+ }
28
+ export interface HotelResult {
29
+ name: string;
30
+ /** Numeric price in USD (null if unknown) */
31
+ price: number | null;
32
+ /** "$119" as shown on the source */
33
+ priceDisplay: string;
34
+ /** Numeric rating (null if unknown) */
35
+ rating: number | null;
36
+ /** "8.4" or "4.2/5" as shown on the source */
37
+ ratingDisplay: string;
38
+ source: string;
39
+ link: string;
40
+ location?: string;
41
+ image?: string;
42
+ }
43
+ export interface HotelSearchResult {
44
+ destination: string;
45
+ checkin: string;
46
+ checkout: string;
47
+ totalResults: number;
48
+ results: HotelResult[];
49
+ sources: {
50
+ name: string;
51
+ count: number;
52
+ status: 'ok' | 'blocked' | 'error';
53
+ error?: string;
54
+ }[];
55
+ elapsed: number;
56
+ }
57
+ /**
58
+ * Parse a date string (ISO or relative) into an ISO date string (YYYY-MM-DD).
59
+ *
60
+ * Supported relative formats:
61
+ * - "tomorrow" → today + 1 day
62
+ * - "next <weekday>" → next occurrence of that weekday
63
+ * - ISO date "2026-02-20" → returned as-is
64
+ */
65
+ export declare function parseDate(input: string, baseDate?: Date): string;
66
+ /** Add N days to an ISO date string and return the new ISO date string. */
67
+ export declare function addDays(isoDate: string, days: number): string;
68
+ /**
69
+ * Convert a destination name to a Kayak-friendly slug.
70
+ * e.g. "Manhattan, New York" → "Manhattan,New-York"
71
+ * e.g. "Long Island City" → "Long-Island-City"
72
+ */
73
+ export declare function toKayakSlug(destination: string): string;
74
+ export interface SourceUrl {
75
+ name: string;
76
+ url: string;
77
+ }
78
+ /**
79
+ * Build the search URL for each source.
80
+ */
81
+ export declare function buildSourceUrls(destination: string, checkin: string, checkout: string): SourceUrl[];
82
+ /**
83
+ * Parse a price display string into a numeric USD value.
84
+ * Returns null if unparseable.
85
+ *
86
+ * Examples:
87
+ * "$119" → 119
88
+ * "$1,299" → 1299
89
+ * "£85" → 85 (GBP treated as USD approximation)
90
+ * "€95" → 95
91
+ * "US$200" → 200
92
+ */
93
+ export declare function parsePrice(raw: string): number | null;
94
+ /**
95
+ * Parse a rating string into a numeric value.
96
+ * Returns null if unparseable.
97
+ *
98
+ * Examples:
99
+ * "Scored 8.4" → 8.4
100
+ * "4.2/5" → 4.2
101
+ * "4.2/5 (1.4K)" → 4.2
102
+ * "8.3" → 8.3
103
+ * "Very Good 8.6" → 8.6
104
+ */
105
+ export declare function parseRating(raw: string): number | null;
106
+ /**
107
+ * Deduplicate hotel results by name (case-insensitive).
108
+ * When duplicates exist, keep the one with the most data (price + rating),
109
+ * with lowest price as a tiebreaker.
110
+ */
111
+ export declare function deduplicateHotels(hotels: HotelResult[]): HotelResult[];
112
+ /**
113
+ * Sort hotel results.
114
+ * - price: ascending, nulls last
115
+ * - rating: descending, nulls last
116
+ * - value: rating/price ratio, descending, nulls last
117
+ */
118
+ export declare function sortHotels(hotels: HotelResult[], sort: 'price' | 'rating' | 'value'): HotelResult[];
119
+ /**
120
+ * Search multiple travel sites for hotels and return sorted, deduplicated results.
121
+ */
122
+ export declare function searchHotels(options: HotelSearchOptions): Promise<HotelSearchResult>;
123
+ //# sourceMappingURL=hotel-search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hotel-search.d.ts","sourceRoot":"","sources":["../../src/core/hotel-search.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AASH,MAAM,WAAW,kBAAkB;IACjC,yEAAyE;IACzE,WAAW,EAAE,MAAM,CAAC;IACpB,gFAAgF;IAChF,OAAO,EAAE,MAAM,CAAC;IAChB,2EAA2E;IAC3E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,yEAAyE;IACzE,IAAI,CAAC,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC;IACpC,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qEAAqE;IACrE,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,uCAAuC;IACvC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,+BAA+B;IAC/B,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,8EAA8E;IAC9E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IACrB,uCAAuC;IACvC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,8CAA8C;IAC9C,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC/F,OAAO,EAAE,MAAM,CAAC;CACjB;AAID;;;;;;;GAOG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,IAAI,GAAG,MAAM,CAyChE;AASD,2EAA2E;AAC3E,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAO7D;AAID;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAKvD;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,SAAS,EAAE,CAyBb;AAID;;;;;;;;;;GAUG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CASrD;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAqBtD;AAgCD;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,WAAW,EAAE,CA2BtE;AAID;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,EAAE,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,EAAE,CAwCnG;AASD;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAyH1F"}
@@ -0,0 +1,383 @@
1
+ /**
2
+ * Hotel search module — searches multiple travel sites and returns sorted hotel listings.
3
+ *
4
+ * Sources: Kayak, Booking.com, Google Travel
5
+ * All sources are fetched in parallel; failures are captured per-source without
6
+ * crashing the overall search.
7
+ */
8
+ import { peel } from '../index.js';
9
+ import { extractListings } from './extract-listings.js';
10
+ import { findSchemaForUrl, extractWithSchema } from './schema-extraction.js';
11
+ // ── Date Parsing ──────────────────────────────────────────────────────────────
12
+ /**
13
+ * Parse a date string (ISO or relative) into an ISO date string (YYYY-MM-DD).
14
+ *
15
+ * Supported relative formats:
16
+ * - "tomorrow" → today + 1 day
17
+ * - "next <weekday>" → next occurrence of that weekday
18
+ * - ISO date "2026-02-20" → returned as-is
19
+ */
20
+ export function parseDate(input, baseDate) {
21
+ const base = baseDate ?? new Date();
22
+ // Normalise
23
+ const normalised = input.trim().toLowerCase();
24
+ if (normalised === 'today') {
25
+ return toIsoDate(base);
26
+ }
27
+ if (normalised === 'tomorrow') {
28
+ const d = new Date(base);
29
+ d.setDate(d.getDate() + 1);
30
+ return toIsoDate(d);
31
+ }
32
+ // "next <weekday>"
33
+ const nextMatch = normalised.match(/^next\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)$/);
34
+ if (nextMatch) {
35
+ const weekdays = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday'];
36
+ const targetDay = weekdays.indexOf(nextMatch[1]);
37
+ const d = new Date(base);
38
+ const currentDay = d.getDay();
39
+ let daysUntil = targetDay - currentDay;
40
+ if (daysUntil <= 0)
41
+ daysUntil += 7;
42
+ d.setDate(d.getDate() + daysUntil);
43
+ return toIsoDate(d);
44
+ }
45
+ // Try ISO date (YYYY-MM-DD)
46
+ if (/^\d{4}-\d{2}-\d{2}$/.test(input.trim())) {
47
+ return input.trim();
48
+ }
49
+ // Fallback: try to parse as a generic date string
50
+ const parsed = new Date(input);
51
+ if (!isNaN(parsed.getTime())) {
52
+ return toIsoDate(parsed);
53
+ }
54
+ throw new Error(`Unrecognized date format: "${input}"`);
55
+ }
56
+ function toIsoDate(d) {
57
+ const yyyy = d.getFullYear();
58
+ const mm = String(d.getMonth() + 1).padStart(2, '0');
59
+ const dd = String(d.getDate()).padStart(2, '0');
60
+ return `${yyyy}-${mm}-${dd}`;
61
+ }
62
+ /** Add N days to an ISO date string and return the new ISO date string. */
63
+ export function addDays(isoDate, days) {
64
+ const d = new Date(isoDate + 'T12:00:00Z');
65
+ d.setUTCDate(d.getUTCDate() + days);
66
+ const yyyy = d.getUTCFullYear();
67
+ const mm = String(d.getUTCMonth() + 1).padStart(2, '0');
68
+ const dd = String(d.getUTCDate()).padStart(2, '0');
69
+ return `${yyyy}-${mm}-${dd}`;
70
+ }
71
+ // ── URL Builders ──────────────────────────────────────────────────────────────
72
+ /**
73
+ * Convert a destination name to a Kayak-friendly slug.
74
+ * e.g. "Manhattan, New York" → "Manhattan,New-York"
75
+ * e.g. "Long Island City" → "Long-Island-City"
76
+ */
77
+ export function toKayakSlug(destination) {
78
+ return destination
79
+ .split(',')
80
+ .map(part => part.trim().replace(/\s+/g, '-'))
81
+ .join(',');
82
+ }
83
+ /**
84
+ * Build the search URL for each source.
85
+ */
86
+ export function buildSourceUrls(destination, checkin, checkout) {
87
+ const kayakSlug = toKayakSlug(destination);
88
+ const bookingDest = encodeURIComponent(destination);
89
+ const googleDest = destination.replace(/\s+/g, '+');
90
+ const expediaDest = encodeURIComponent(destination);
91
+ return [
92
+ {
93
+ name: 'kayak',
94
+ url: `https://www.kayak.com/hotels/${kayakSlug}/${checkin}/${checkout}?sort=price_a`,
95
+ },
96
+ {
97
+ name: 'booking',
98
+ url: `https://www.booking.com/searchresults.html?ss=${bookingDest}&checkin=${checkin}&checkout=${checkout}&order=price`,
99
+ },
100
+ {
101
+ name: 'google',
102
+ url: `https://www.google.com/travel/hotels/${googleDest}`,
103
+ },
104
+ {
105
+ name: 'expedia',
106
+ url: `https://www.expedia.com/Hotel-Search?destination=${expediaDest}&startDate=${checkin}&endDate=${checkout}&sort=PRICE_LOW_TO_HIGH`,
107
+ },
108
+ ];
109
+ }
110
+ // ── Price & Rating Parsers ────────────────────────────────────────────────────
111
+ /**
112
+ * Parse a price display string into a numeric USD value.
113
+ * Returns null if unparseable.
114
+ *
115
+ * Examples:
116
+ * "$119" → 119
117
+ * "$1,299" → 1299
118
+ * "£85" → 85 (GBP treated as USD approximation)
119
+ * "€95" → 95
120
+ * "US$200" → 200
121
+ */
122
+ export function parsePrice(raw) {
123
+ if (!raw)
124
+ return null;
125
+ // Remove currency symbols and "US$" prefix, commas, whitespace
126
+ const cleaned = raw.replace(/US\$|[$£€¥₹]/g, '').replace(/,/g, '').trim();
127
+ // Extract first number
128
+ const match = cleaned.match(/(\d+(?:\.\d+)?)/);
129
+ if (!match)
130
+ return null;
131
+ const n = parseFloat(match[1]);
132
+ return isNaN(n) ? null : n;
133
+ }
134
+ /**
135
+ * Parse a rating string into a numeric value.
136
+ * Returns null if unparseable.
137
+ *
138
+ * Examples:
139
+ * "Scored 8.4" → 8.4
140
+ * "4.2/5" → 4.2
141
+ * "4.2/5 (1.4K)" → 4.2
142
+ * "8.3" → 8.3
143
+ * "Very Good 8.6" → 8.6
144
+ */
145
+ export function parseRating(raw) {
146
+ if (!raw)
147
+ return null;
148
+ // "Scored N.N" or "Very Good N.N" etc.
149
+ const scoredMatch = raw.match(/(\d+(?:\.\d+)?)\s*\/\s*\d/);
150
+ if (scoredMatch) {
151
+ const n = parseFloat(scoredMatch[1]);
152
+ return isNaN(n) ? null : n;
153
+ }
154
+ // Extract last number (handles "Scored 8.4", "Very Good 8.6", standalone "8.3")
155
+ const numMatch = raw.match(/(\d+(?:\.\d+)?)/g);
156
+ if (!numMatch)
157
+ return null;
158
+ // Take the last number that looks like a rating (0–10 scale or 0–5 scale)
159
+ for (let i = numMatch.length - 1; i >= 0; i--) {
160
+ const n = parseFloat(numMatch[i]);
161
+ if (!isNaN(n) && n >= 0 && n <= 10)
162
+ return n;
163
+ }
164
+ return null;
165
+ }
166
+ // ── Result Normalisation ──────────────────────────────────────────────────────
167
+ /**
168
+ * Map an extracted listing item to a HotelResult, tagged with the source name.
169
+ */
170
+ function normaliseToHotelResult(item, sourceName) {
171
+ const name = item.title?.trim();
172
+ if (!name)
173
+ return null;
174
+ const priceDisplay = item.price ?? '';
175
+ const ratingDisplay = item.rating ?? '';
176
+ return {
177
+ name,
178
+ price: parsePrice(priceDisplay),
179
+ priceDisplay,
180
+ rating: parseRating(ratingDisplay),
181
+ ratingDisplay,
182
+ source: sourceName,
183
+ link: item.link ?? '',
184
+ location: item.description?.trim() || undefined,
185
+ image: item.image || undefined,
186
+ };
187
+ }
188
+ // ── Deduplication ─────────────────────────────────────────────────────────────
189
+ /**
190
+ * Deduplicate hotel results by name (case-insensitive).
191
+ * When duplicates exist, keep the one with the most data (price + rating),
192
+ * with lowest price as a tiebreaker.
193
+ */
194
+ export function deduplicateHotels(hotels) {
195
+ const byName = new Map();
196
+ for (const hotel of hotels) {
197
+ const key = hotel.name.toLowerCase().replace(/\s+/g, ' ').trim();
198
+ const existing = byName.get(key);
199
+ if (!existing) {
200
+ byName.set(key, hotel);
201
+ continue;
202
+ }
203
+ // Score = number of non-null data fields
204
+ const scoreNew = (hotel.price !== null ? 1 : 0) + (hotel.rating !== null ? 1 : 0);
205
+ const scoreOld = (existing.price !== null ? 1 : 0) + (existing.rating !== null ? 1 : 0);
206
+ if (scoreNew > scoreOld) {
207
+ byName.set(key, hotel);
208
+ }
209
+ else if (scoreNew === scoreOld) {
210
+ // Tiebreak: prefer the one with lower price (or keep existing if equal)
211
+ if (hotel.price !== null && (existing.price === null || hotel.price < existing.price)) {
212
+ byName.set(key, hotel);
213
+ }
214
+ }
215
+ }
216
+ return Array.from(byName.values());
217
+ }
218
+ // ── Sorting ───────────────────────────────────────────────────────────────────
219
+ /**
220
+ * Sort hotel results.
221
+ * - price: ascending, nulls last
222
+ * - rating: descending, nulls last
223
+ * - value: rating/price ratio, descending, nulls last
224
+ */
225
+ export function sortHotels(hotels, sort) {
226
+ const sorted = [...hotels];
227
+ switch (sort) {
228
+ case 'price':
229
+ sorted.sort((a, b) => {
230
+ if (a.price === null && b.price === null)
231
+ return 0;
232
+ if (a.price === null)
233
+ return 1;
234
+ if (b.price === null)
235
+ return -1;
236
+ return a.price - b.price;
237
+ });
238
+ break;
239
+ case 'rating':
240
+ sorted.sort((a, b) => {
241
+ if (a.rating === null && b.rating === null)
242
+ return 0;
243
+ if (a.rating === null)
244
+ return 1;
245
+ if (b.rating === null)
246
+ return -1;
247
+ return b.rating - a.rating;
248
+ });
249
+ break;
250
+ case 'value': {
251
+ const valueOf = (h) => {
252
+ if (h.price === null || h.price === 0 || h.rating === null)
253
+ return null;
254
+ return h.rating / h.price;
255
+ };
256
+ sorted.sort((a, b) => {
257
+ const va = valueOf(a);
258
+ const vb = valueOf(b);
259
+ if (va === null && vb === null)
260
+ return 0;
261
+ if (va === null)
262
+ return 1;
263
+ if (vb === null)
264
+ return -1;
265
+ return vb - va;
266
+ });
267
+ break;
268
+ }
269
+ }
270
+ return sorted;
271
+ }
272
+ // ── Main Function ─────────────────────────────────────────────────────────────
273
+ const DEFAULT_SOURCES = ['kayak', 'booking', 'google', 'expedia'];
274
+ const SIMPLE_TIMEOUT = 15_000;
275
+ const BROWSER_TIMEOUT = 30_000;
276
+ const EXPEDIA_TIMEOUT = 60_000;
277
+ /**
278
+ * Search multiple travel sites for hotels and return sorted, deduplicated results.
279
+ */
280
+ export async function searchHotels(options) {
281
+ const startTime = Date.now();
282
+ // ── Parse dates ────────────────────────────────────────────────────────────
283
+ const checkin = parseDate(options.checkin);
284
+ const rawCheckout = options.checkout;
285
+ const checkout = rawCheckout ? parseDate(rawCheckout) : addDays(checkin, 1);
286
+ const destination = options.destination;
287
+ const sort = options.sort ?? 'price';
288
+ const limit = options.limit ?? 20;
289
+ const allowedSources = new Set((options.sources ?? DEFAULT_SOURCES).map(s => s.toLowerCase()));
290
+ const useGlobalStealth = options.stealth ?? false;
291
+ const proxyUrl = options.proxy;
292
+ // ── Build source URLs ──────────────────────────────────────────────────────
293
+ const allSourceUrls = buildSourceUrls(destination, checkin, checkout).filter(s => allowedSources.has(s.name));
294
+ // ── Fetch all sources in parallel ──────────────────────────────────────────
295
+ const settled = await Promise.allSettled(allSourceUrls.map(async (src) => {
296
+ const isKayak = src.name === 'kayak';
297
+ const isBooking = src.name === 'booking';
298
+ const isExpedia = src.name === 'expedia';
299
+ const useStealth = useGlobalStealth || isKayak || isExpedia;
300
+ const useRender = useStealth || isBooking;
301
+ const timeout = isExpedia ? EXPEDIA_TIMEOUT : (useRender ? BROWSER_TIMEOUT : SIMPLE_TIMEOUT);
302
+ // Expedia is a SPA — wait for property listings to appear before extracting
303
+ const actions = isExpedia
304
+ ? [{ type: 'waitForSelector', selector: "[data-stid='property-listing'], li.uitk-spacing" }]
305
+ : undefined;
306
+ const result = await peel(src.url, {
307
+ format: 'html',
308
+ render: useRender,
309
+ stealth: useStealth,
310
+ timeout,
311
+ ...(actions ? { actions } : {}),
312
+ ...(proxyUrl ? { proxy: proxyUrl } : {}),
313
+ });
314
+ // Prefer CSS schema extraction when a schema is available for this source
315
+ const schema = findSchemaForUrl(src.url);
316
+ const hotels = [];
317
+ if (schema) {
318
+ const schemaItems = extractWithSchema(result.content, schema, src.url);
319
+ for (const item of schemaItems) {
320
+ const mapped = {
321
+ title: typeof item.title === 'string' ? item.title : undefined,
322
+ price: typeof item.price === 'string' ? item.price : undefined,
323
+ rating: typeof item.rating === 'string' ? item.rating : undefined,
324
+ link: typeof item.link === 'string' ? item.link : undefined,
325
+ image: typeof item.image === 'string' ? item.image : undefined,
326
+ description: typeof item.location === 'string' ? item.location : undefined,
327
+ };
328
+ const hotel = normaliseToHotelResult(mapped, src.name);
329
+ if (hotel)
330
+ hotels.push(hotel);
331
+ }
332
+ }
333
+ // Fall back to generic extraction if schema yielded nothing
334
+ if (hotels.length === 0) {
335
+ const listings = extractListings(result.content, src.url);
336
+ for (const item of listings) {
337
+ const hotel = normaliseToHotelResult(item, src.name);
338
+ if (hotel)
339
+ hotels.push(hotel);
340
+ }
341
+ }
342
+ return { name: src.name, hotels };
343
+ }));
344
+ // ── Collect per-source status and results ──────────────────────────────────
345
+ const sourceStats = [];
346
+ const allHotels = [];
347
+ for (let i = 0; i < allSourceUrls.length; i++) {
348
+ const src = allSourceUrls[i];
349
+ const outcome = settled[i];
350
+ if (outcome.status === 'fulfilled') {
351
+ const { hotels } = outcome.value;
352
+ sourceStats.push({ name: src.name, count: hotels.length, status: 'ok' });
353
+ allHotels.push(...hotels);
354
+ }
355
+ else {
356
+ const errMsg = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
357
+ const isBlocked = errMsg.toLowerCase().includes('blocked') ||
358
+ errMsg.toLowerCase().includes('403') ||
359
+ errMsg.toLowerCase().includes('cloudflare');
360
+ sourceStats.push({
361
+ name: src.name,
362
+ count: 0,
363
+ status: isBlocked ? 'blocked' : 'error',
364
+ error: errMsg,
365
+ });
366
+ }
367
+ }
368
+ // ── Deduplicate, sort, limit ───────────────────────────────────────────────
369
+ const unique = deduplicateHotels(allHotels);
370
+ const sorted = sortHotels(unique, sort);
371
+ const results = sorted.slice(0, limit);
372
+ const elapsed = Date.now() - startTime;
373
+ return {
374
+ destination,
375
+ checkin,
376
+ checkout,
377
+ totalResults: results.length,
378
+ results,
379
+ sources: sourceStats,
380
+ elapsed,
381
+ };
382
+ }
383
+ //# sourceMappingURL=hotel-search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hotel-search.js","sourceRoot":"","sources":["../../src/core/hotel-search.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAoD7E,iFAAiF;AAEjF;;;;;;;GAOG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa,EAAE,QAAe;IACtD,MAAM,IAAI,GAAG,QAAQ,IAAI,IAAI,IAAI,EAAE,CAAC;IAEpC,YAAY;IACZ,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE9C,IAAI,UAAU,KAAK,OAAO,EAAE,CAAC;QAC3B,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,UAAU,KAAK,UAAU,EAAE,CAAC;QAC9B,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;QAC3B,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAC;IAC1G,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChG,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,CAAC;QAClD,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,SAAS,GAAG,UAAU,CAAC;QACvC,IAAI,SAAS,IAAI,CAAC;YAAE,SAAS,IAAI,CAAC,CAAC;QACnC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,CAAC;QACnC,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,4BAA4B;IAC5B,IAAI,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QAC7C,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;QAC7B,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,GAAG,CAAC,CAAC;AAC1D,CAAC;AAED,SAAS,SAAS,CAAC,CAAO;IACxB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IAC7B,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrD,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAChD,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC;AAC/B,CAAC;AAED,2EAA2E;AAC3E,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAAY;IACnD,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,OAAO,GAAG,YAAY,CAAC,CAAC;IAC3C,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,CAAC,CAAC,cAAc,EAAE,CAAC;IAChC,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACxD,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACnD,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC;AAC/B,CAAC;AAED,iFAAiF;AAEjF;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,WAAmB;IAC7C,OAAO,WAAW;SACf,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SAC7C,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAOD;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAmB,EACnB,OAAe,EACf,QAAgB;IAEhB,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IACpD,MAAM,UAAU,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAEpD,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAEpD,OAAO;QACL;YACE,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,gCAAgC,SAAS,IAAI,OAAO,IAAI,QAAQ,eAAe;SACrF;QACD;YACE,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,iDAAiD,WAAW,YAAY,OAAO,aAAa,QAAQ,cAAc;SACxH;QACD;YACE,IAAI,EAAE,QAAQ;YACd,GAAG,EAAE,wCAAwC,UAAU,EAAE;SAC1D;QACD;YACE,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,oDAAoD,WAAW,cAAc,OAAO,YAAY,QAAQ,yBAAyB;SACvI;KACF,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;GAUG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,+DAA+D;IAC/D,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1E,uBAAuB;IACvB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAC/C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC;IAChC,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,uCAAuC;IACvC,MAAM,WAAW,GAAG,GAAG,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC3D,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC,CAAC;QACtC,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IAED,gFAAgF;IAChF,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAC/C,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,0EAA0E;IAC1E,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAC;QACnC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,iFAAiF;AAEjF;;GAEG;AACH,SAAS,sBAAsB,CAC7B,IAAiJ,EACjJ,UAAkB;IAElB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;IAExC,OAAO;QACL,IAAI;QACJ,KAAK,EAAE,UAAU,CAAC,YAAY,CAAC;QAC/B,YAAY;QACZ,MAAM,EAAE,WAAW,CAAC,aAAa,CAAC;QAClC,aAAa;QACb,MAAM,EAAE,UAAU;QAClB,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;QACrB,QAAQ,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS;QAC/C,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;KAC/B,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAqB;IACrD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE9C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACjE,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACvB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClF,MAAM,QAAQ,GAAG,CAAC,QAAQ,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAExF,IAAI,QAAQ,GAAG,QAAQ,EAAE,CAAC;YACxB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACzB,CAAC;aAAM,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACjC,wEAAwE;YACxE,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtF,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;AACrC,CAAC;AAED,iFAAiF;AAEjF;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CAAC,MAAqB,EAAE,IAAkC;IAClF,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;IAE3B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,OAAO;YACV,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACnD,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAC/B,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBAChC,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;YAC3B,CAAC,CAAC,CAAC;YACH,MAAM;QAER,KAAK,QAAQ;YACX,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACrD,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAChC,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBACjC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;YAC7B,CAAC,CAAC,CAAC;YACH,MAAM;QAER,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,OAAO,GAAG,CAAC,CAAc,EAAiB,EAAE;gBAChD,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,IAAI;oBAAE,OAAO,IAAI,CAAC;gBACxE,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC;YAC5B,CAAC,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACnB,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,EAAE,KAAK,IAAI,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBACzC,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC;gBAC1B,IAAI,EAAE,KAAK,IAAI;oBAAE,OAAO,CAAC,CAAC,CAAC;gBAC3B,OAAO,EAAE,GAAG,EAAE,CAAC;YACjB,CAAC,CAAC,CAAC;YACH,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,iFAAiF;AAEjF,MAAM,eAAe,GAAG,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;AAClE,MAAM,cAAc,GAAG,MAAM,CAAC;AAC9B,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAA2B;IAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,8EAA8E;IAC9E,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC;IACrC,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE5E,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IACxC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAC/F,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC;IAE/B,8EAA8E;IAC9E,MAAM,aAAa,GAAG,eAAe,CAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAC/E,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAC3B,CAAC;IAEF,8EAA8E;IAC9E,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,aAAa,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC9B,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,KAAK,OAAO,CAAC;QACrC,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC;QACzC,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC;QAEzC,MAAM,UAAU,GAAG,gBAAgB,IAAI,OAAO,IAAI,SAAS,CAAC;QAC5D,MAAM,SAAS,GAAG,UAAU,IAAI,SAAS,CAAC;QAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;QAE7F,4EAA4E;QAC5E,MAAM,OAAO,GAA6B,SAAS;YACjD,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,QAAQ,EAAE,iDAAiD,EAAE,CAAC;YAC5F,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE;YACjC,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,UAAU;YACnB,OAAO;YACP,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/B,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzC,CAAC,CAAC;QAEH,0EAA0E;QAC1E,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACzC,MAAM,MAAM,GAAkB,EAAE,CAAC;QAEjC,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,iBAAiB,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;YACvE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;gBAC/B,MAAM,MAAM,GAAG;oBACb,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,MAAM,EAAE,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;oBACjE,IAAI,EAAE,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;oBAC3D,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC9D,WAAW,EAAE,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;iBAC3E,CAAC;gBACF,MAAM,KAAK,GAAG,sBAAsB,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;gBACvD,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QAED,4DAA4D;QAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1D,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;gBACrD,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;IACpC,CAAC,CAAC,CACH,CAAC;IAEF,8EAA8E;IAC9E,MAAM,WAAW,GAAiC,EAAE,CAAC;IACrD,MAAM,SAAS,GAAkB,EAAE,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,GAAG,GAAG,aAAa,CAAC,CAAC,CAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAE,CAAC;QAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACnC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC;YACjC,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YACzE,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACjG,MAAM,SAAS,GACb,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;gBACxC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACpC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YAC9C,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO;gBACvC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAEvC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;IAEvC,OAAO;QACL,WAAW;QACX,OAAO;QACP,QAAQ;QACR,YAAY,EAAE,OAAO,CAAC,MAAM;QAC5B,OAAO;QACP,OAAO,EAAE,WAAW;QACpB,OAAO;KACR,CAAC;AACJ,CAAC"}
@@ -0,0 +1,56 @@
1
+ /**
2
+ * LLM-based extraction: sends markdown/text content to an LLM
3
+ * with instructions to extract structured data.
4
+ *
5
+ * Supports OpenAI-compatible APIs (OpenAI, Anthropic via proxy, local models).
6
+ */
7
+ export interface LLMExtractionOptions {
8
+ content: string;
9
+ instruction?: string;
10
+ schema?: object;
11
+ apiKey?: string;
12
+ baseUrl?: string;
13
+ model?: string;
14
+ maxTokens?: number;
15
+ }
16
+ export interface LLMExtractionResult {
17
+ items: Array<Record<string, any>>;
18
+ tokensUsed: {
19
+ input: number;
20
+ output: number;
21
+ };
22
+ model: string;
23
+ cost?: number;
24
+ }
25
+ /**
26
+ * Detect if schema is a "full" JSON Schema (has type:"object" and properties).
27
+ */
28
+ export declare function isFullJsonSchema(schema: object): boolean;
29
+ /**
30
+ * Convert a simple example object to a proper JSON Schema.
31
+ *
32
+ * Supports:
33
+ * - Primitive values: "" → { type: "string" }, 0 → { type: "number" }
34
+ * - Arrays of objects: [{name:"", price:""}] → { type: "array", items: { type: "object", properties: {...} } }
35
+ * - Nested objects
36
+ */
37
+ export declare function convertSimpleToJsonSchema(example: object): object;
38
+ /**
39
+ * Build the user message from content + optional instruction + optional schema.
40
+ */
41
+ export declare function buildUserMessage(content: string, instruction?: string, schema?: object): string;
42
+ /**
43
+ * Calculate estimated cost in USD for a given model and token counts.
44
+ */
45
+ export declare function estimateCost(model: string, inputTokens: number, outputTokens: number): number | undefined;
46
+ /**
47
+ * Parse the LLM response text into an items array.
48
+ * Handles both `{ "items": [...] }` and `[...]` formats.
49
+ * When a schema is provided, also handles single-object responses.
50
+ */
51
+ export declare function parseItems(text: string, _schema?: object): Array<Record<string, any>>;
52
+ /**
53
+ * Extract structured data from content using an LLM.
54
+ */
55
+ export declare function extractWithLLM(options: LLMExtractionOptions): Promise<LLMExtractionResult>;
56
+ //# sourceMappingURL=llm-extract.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-extract.d.ts","sourceRoot":"","sources":["../../src/core/llm-extract.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,MAAM,WAAW,oBAAoB;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;IAClC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAmBD;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAGxD;AAED;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEjE;AA2CD;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAe/F;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAMzG;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAqCrF;AA2DD;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAqFhG"}