@galihvsx/gmr-scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,724 @@
1
+ 'use strict';
2
+
3
+ // src/cache.ts
4
+ var Cache = class {
5
+ cache = /* @__PURE__ */ new Map();
6
+ ttl;
7
+ maxSize;
8
+ constructor(options = {}) {
9
+ this.ttl = options.ttl ?? 3e5;
10
+ this.maxSize = options.maxSize ?? 100;
11
+ }
12
+ set(key, value) {
13
+ if (this.cache.size >= this.maxSize) {
14
+ const firstKey = this.cache.keys().next().value;
15
+ if (firstKey) {
16
+ this.cache.delete(firstKey);
17
+ }
18
+ }
19
+ this.cache.set(key, {
20
+ value,
21
+ expiresAt: Date.now() + this.ttl
22
+ });
23
+ }
24
+ get(key) {
25
+ const entry = this.cache.get(key);
26
+ if (!entry) {
27
+ return void 0;
28
+ }
29
+ if (Date.now() > entry.expiresAt) {
30
+ this.cache.delete(key);
31
+ return void 0;
32
+ }
33
+ return entry.value;
34
+ }
35
+ has(key) {
36
+ return this.get(key) !== void 0;
37
+ }
38
+ delete(key) {
39
+ this.cache.delete(key);
40
+ }
41
+ clear() {
42
+ this.cache.clear();
43
+ }
44
+ size() {
45
+ this.cleanup();
46
+ return this.cache.size;
47
+ }
48
+ cleanup() {
49
+ const now = Date.now();
50
+ for (const [key, entry] of this.cache.entries()) {
51
+ if (now > entry.expiresAt) {
52
+ this.cache.delete(key);
53
+ }
54
+ }
55
+ }
56
+ };
57
+ function createCacheKey(placeId, sort, page, query, lang) {
58
+ return `${placeId}:${sort}:${page}:${query}:${lang}`;
59
+ }
60
+
61
+ // src/rate-limiter.ts
62
+ var RateLimiter = class {
63
+ tokens;
64
+ lastRefill;
65
+ requestsPerSecond;
66
+ burstSize;
67
+ refillRate;
68
+ constructor(options = {}) {
69
+ this.requestsPerSecond = options.requestsPerSecond ?? 2;
70
+ this.burstSize = options.burstSize ?? 5;
71
+ this.tokens = this.burstSize;
72
+ this.lastRefill = Date.now();
73
+ this.refillRate = 1e3 / this.requestsPerSecond;
74
+ }
75
+ refill() {
76
+ const now = Date.now();
77
+ const timePassed = now - this.lastRefill;
78
+ const tokensToAdd = Math.floor(timePassed / this.refillRate);
79
+ if (tokensToAdd > 0) {
80
+ this.tokens = Math.min(this.burstSize, this.tokens + tokensToAdd);
81
+ this.lastRefill = now;
82
+ }
83
+ }
84
+ async acquire() {
85
+ this.refill();
86
+ if (this.tokens > 0) {
87
+ this.tokens--;
88
+ return;
89
+ }
90
+ const waitTime = this.refillRate - (Date.now() - this.lastRefill);
91
+ if (waitTime > 0) {
92
+ await new Promise((resolve) => setTimeout(resolve, waitTime));
93
+ return this.acquire();
94
+ }
95
+ }
96
+ async execute(fn) {
97
+ await this.acquire();
98
+ return fn();
99
+ }
100
+ };
101
+
102
+ // src/types.ts
103
+ var SortEnum = /* @__PURE__ */ ((SortEnum3) => {
104
+ SortEnum3[SortEnum3["relevant"] = 1] = "relevant";
105
+ SortEnum3[SortEnum3["newest"] = 2] = "newest";
106
+ SortEnum3[SortEnum3["highest_rating"] = 3] = "highest_rating";
107
+ SortEnum3[SortEnum3["lowest_rating"] = 4] = "lowest_rating";
108
+ return SortEnum3;
109
+ })(SortEnum || {});
110
+
111
+ // src/errors.ts
112
+ var ScraperError = class extends Error {
113
+ constructor(message, code) {
114
+ super(message);
115
+ this.code = code;
116
+ this.name = "ScraperError";
117
+ Error.captureStackTrace(this, this.constructor);
118
+ }
119
+ };
120
+ var InvalidUrlError = class extends ScraperError {
121
+ constructor(message) {
122
+ super(message, "INVALID_URL");
123
+ this.name = "InvalidUrlError";
124
+ }
125
+ };
126
+ var FetchError = class extends ScraperError {
127
+ constructor(message, statusCode, response) {
128
+ super(message, "FETCH_ERROR");
129
+ this.statusCode = statusCode;
130
+ this.response = response;
131
+ this.name = "FetchError";
132
+ }
133
+ };
134
+ var ParseError = class extends ScraperError {
135
+ constructor(message, data) {
136
+ super(message, "PARSE_ERROR");
137
+ this.data = data;
138
+ this.name = "ParseError";
139
+ }
140
+ };
141
+ var RateLimitError = class extends ScraperError {
142
+ constructor(message = "Rate limit exceeded", retryAfter) {
143
+ super(message, "RATE_LIMIT");
144
+ this.retryAfter = retryAfter;
145
+ this.name = "RateLimitError";
146
+ }
147
+ };
148
+ var TimeoutError = class extends ScraperError {
149
+ constructor(message = "Request timeout") {
150
+ super(message, "TIMEOUT");
151
+ this.name = "TimeoutError";
152
+ }
153
+ };
154
+
155
+ // src/extractors.ts
156
+ async function resolveUrl(url) {
157
+ if (url.includes("goo.gl") || url.includes("maps.app.goo.gl")) {
158
+ const response = await fetch(url, { redirect: "follow", method: "HEAD" });
159
+ return response.url;
160
+ }
161
+ return url;
162
+ }
163
+ function extractPlaceId(url) {
164
+ const match1 = url.match(/!1s(0x[0-9a-fA-F]+:0x[0-9a-fA-F]+)/);
165
+ if (match1 && match1[1]) {
166
+ return match1[1];
167
+ }
168
+ const match2 = url.match(/!1s([a-zA-Z0-9_:]+)!/);
169
+ if (match2 && match2[1]) {
170
+ return match2[1];
171
+ }
172
+ throw new Error(
173
+ "Could not extract Place ID from URL. Please ensure it is a valid Google Maps Place URL."
174
+ );
175
+ }
176
+ function hexToDec(hex) {
177
+ return BigInt(hex).toString();
178
+ }
179
+ function buildRpcUrl(placeId, sort, nextPageToken = "", searchQuery = "", lang = "en") {
180
+ const parts = placeId.split(":");
181
+ if (parts.length !== 2) {
182
+ throw new Error(
183
+ `Invalid Place ID format for listentitiesreviews: ${placeId}`
184
+ );
185
+ }
186
+ const h1 = hexToDec(parts[0]);
187
+ const h2 = hexToDec(parts[1]);
188
+ const constantToken = "dzvaXrvAMImImAXHsLPICA";
189
+ const paginationBlock = nextPageToken ? `!2m2!2i10!3s${nextPageToken}` : `!2m1!2i10`;
190
+ return `https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=${lang}&gl=in&pb=!1m2!1y${h1}!2y${h2}${paginationBlock}!3e${sort}!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1s${constantToken}!7e81`;
191
+ }
192
+
193
+ // src/parser.ts
194
+ function parseReviews(reviews) {
195
+ return reviews.map((review) => {
196
+ const authorInfo = review[0] || [];
197
+ const images = null;
198
+ const response = null;
199
+ return {
200
+ review_id: review[10],
201
+ time: {
202
+ published: review[1],
203
+ last_edited: null
204
+ },
205
+ author: {
206
+ name: authorInfo[1],
207
+ profile_url: authorInfo[0],
208
+ url: authorInfo[0],
209
+ id: review[6]
210
+ },
211
+ review: {
212
+ rating: review[4],
213
+ text: parseText(review[3]).text,
214
+ original_text: parseText(review[3]).original_text,
215
+ language: review[32]
216
+ },
217
+ images,
218
+ source: review[10],
219
+ response
220
+ };
221
+ });
222
+ }
223
+ function parseText(text) {
224
+ if (!text) return { text: null, original_text: null };
225
+ const splitPattern = /\n\n\((.+?)\)\n/g;
226
+ const match = splitPattern.exec(text);
227
+ if (match) {
228
+ const splitIndex = match.index;
229
+ const originalHeaderLength = match[0].length;
230
+ const firstPart = text.substring(0, splitIndex);
231
+ const secondPart = text.substring(splitIndex + originalHeaderLength);
232
+ const translationParamMatch = firstPart.match(/^\((.+?)\) /);
233
+ let translatedText = firstPart;
234
+ if (translationParamMatch) {
235
+ translatedText = firstPart.substring(translationParamMatch[0].length);
236
+ }
237
+ return {
238
+ text: translatedText,
239
+ original_text: secondPart
240
+ };
241
+ }
242
+ return { text, original_text: null };
243
+ }
244
+
245
+ // src/retry.ts
246
+ var DEFAULT_RETRY_OPTIONS = {
247
+ maxAttempts: 3,
248
+ initialDelay: 1e3,
249
+ maxDelay: 1e4,
250
+ backoffMultiplier: 2,
251
+ timeout: 3e4,
252
+ retryCondition: (error) => {
253
+ if (error.name === "InvalidUrlError") return false;
254
+ if (error.name === "ParseError") return false;
255
+ return true;
256
+ }
257
+ };
258
+ async function sleep(ms) {
259
+ return new Promise((resolve) => setTimeout(resolve, ms));
260
+ }
261
+ function calculateDelay(attempt, initialDelay, maxDelay, backoffMultiplier) {
262
+ const delay = initialDelay * Math.pow(backoffMultiplier, attempt - 1);
263
+ return Math.min(delay, maxDelay);
264
+ }
265
+ async function withRetry(fn, options = {}) {
266
+ const opts = { ...DEFAULT_RETRY_OPTIONS, ...options };
267
+ let lastError;
268
+ for (let attempt = 1; attempt <= opts.maxAttempts; attempt++) {
269
+ try {
270
+ if (opts.timeout) {
271
+ return await withTimeout(fn(), opts.timeout);
272
+ }
273
+ return await fn();
274
+ } catch (error) {
275
+ lastError = error instanceof Error ? error : new Error(String(error));
276
+ if (attempt === opts.maxAttempts || !opts.retryCondition(lastError)) {
277
+ throw lastError;
278
+ }
279
+ const delay = calculateDelay(
280
+ attempt,
281
+ opts.initialDelay,
282
+ opts.maxDelay,
283
+ opts.backoffMultiplier
284
+ );
285
+ console.warn(
286
+ `Attempt ${attempt}/${opts.maxAttempts} failed: ${lastError.message}. Retrying in ${delay}ms...`
287
+ );
288
+ await sleep(delay);
289
+ }
290
+ }
291
+ throw lastError;
292
+ }
293
+ async function withTimeout(promise, timeoutMs) {
294
+ const timeoutPromise = new Promise((_, reject) => {
295
+ setTimeout(() => {
296
+ reject(new TimeoutError(`Operation timed out after ${timeoutMs}ms`));
297
+ }, timeoutMs);
298
+ });
299
+ return Promise.race([promise, timeoutPromise]);
300
+ }
301
+
302
+ // src/utils.ts
303
+ function validateParams(url, sortType, pages, clean) {
304
+ if (!url) {
305
+ throw new InvalidUrlError("URL is required");
306
+ }
307
+ if (sortType && !SortEnum[sortType]) {
308
+ throw new InvalidUrlError(
309
+ `Invalid sort type: ${sortType}. Valid options: ${Object.keys(SortEnum).filter((k) => isNaN(Number(k))).join(", ")}`
310
+ );
311
+ }
312
+ if (pages !== void 0 && pages !== "max" && (typeof pages !== "number" || pages < 1)) {
313
+ throw new InvalidUrlError("Pages must be 'max' or a positive number");
314
+ }
315
+ }
316
+ async function fetchReviews(placeId, sort, nextPageToken = "", searchQuery = "", lang = "en", cache, rateLimiter, retryOptions) {
317
+ const cacheKey = cache ? createCacheKey(placeId, sort, nextPageToken, searchQuery, lang) : "";
318
+ if (cache && cacheKey) {
319
+ const cached = cache.get(cacheKey);
320
+ if (cached) {
321
+ return cached;
322
+ }
323
+ }
324
+ const fetchFn = async () => {
325
+ const apiUrl = buildRpcUrl(placeId, sort, nextPageToken, searchQuery, lang);
326
+ const actualFetch = async () => {
327
+ const response = await fetch(apiUrl, {
328
+ headers: {
329
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
330
+ }
331
+ });
332
+ if (!response.ok) {
333
+ throw new FetchError(
334
+ `Failed to fetch reviews: ${response.statusText}`,
335
+ response.status
336
+ );
337
+ }
338
+ const textData = await response.text();
339
+ const cleanJson = textData.replace(/^\)\]\}'/, "");
340
+ let parsedData;
341
+ try {
342
+ parsedData = JSON.parse(cleanJson);
343
+ } catch (e) {
344
+ console.error("Failed to parse JSON:", cleanJson.substring(0, 100));
345
+ throw new FetchError("Failed to parse API response");
346
+ }
347
+ if (!parsedData || !Array.isArray(parsedData)) {
348
+ console.warn("Unexpected response structure:", parsedData);
349
+ return { data: [], nextPageToken: null };
350
+ }
351
+ const reviews = parsedData[2] || [];
352
+ const nextToken = parsedData[1] || null;
353
+ return { data: reviews, nextPageToken: nextToken };
354
+ };
355
+ if (rateLimiter) {
356
+ return rateLimiter.execute(actualFetch);
357
+ }
358
+ return actualFetch();
359
+ };
360
+ const result = retryOptions ? await withRetry(fetchFn, retryOptions) : await fetchFn();
361
+ if (cache && cacheKey) {
362
+ cache.set(cacheKey, result);
363
+ }
364
+ return result;
365
+ }
366
+ async function paginateReviews(url, sort, pages, searchQuery, clean, lang = "en", cache, rateLimiter, retryOptions, onProgress) {
367
+ const resolvedUrl = await resolveUrl(url);
368
+ const placeId = extractPlaceId(resolvedUrl);
369
+ const initial = await fetchReviews(
370
+ placeId,
371
+ sort,
372
+ "",
373
+ searchQuery,
374
+ lang,
375
+ cache,
376
+ rateLimiter,
377
+ retryOptions
378
+ );
379
+ let allReviews = [...initial.data];
380
+ let nextPageToken = initial.nextPageToken;
381
+ if (onProgress) {
382
+ onProgress(1, pages);
383
+ }
384
+ if (pages === 1 || !nextPageToken) {
385
+ return clean ? parseReviews(allReviews) : allReviews;
386
+ }
387
+ let currentPage = 2;
388
+ const maxPages = pages === "max" ? Infinity : pages;
389
+ while (nextPageToken && currentPage <= maxPages) {
390
+ if (!rateLimiter) {
391
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
392
+ }
393
+ try {
394
+ const nextBatch = await fetchReviews(
395
+ placeId,
396
+ sort,
397
+ nextPageToken,
398
+ searchQuery,
399
+ lang,
400
+ cache,
401
+ rateLimiter,
402
+ retryOptions
403
+ );
404
+ allReviews = [...allReviews, ...nextBatch.data];
405
+ nextPageToken = nextBatch.nextPageToken;
406
+ if (onProgress) {
407
+ onProgress(currentPage, pages);
408
+ }
409
+ if (!nextBatch.data.length && !nextPageToken) {
410
+ break;
411
+ }
412
+ } catch (e) {
413
+ console.error(`Error scraping page ${currentPage}:`, e);
414
+ break;
415
+ }
416
+ currentPage++;
417
+ }
418
+ return clean ? parseReviews(allReviews) : allReviews;
419
+ }
420
+
421
+ // src/analytics.ts
422
+ function calculateAnalytics(reviews) {
423
+ const totalReviews = reviews.length;
424
+ const ratingDistribution = {
425
+ 1: 0,
426
+ 2: 0,
427
+ 3: 0,
428
+ 4: 0,
429
+ 5: 0
430
+ };
431
+ let totalRating = 0;
432
+ let reviewsWithText = 0;
433
+ let reviewsWithImages = 0;
434
+ let reviewsWithResponse = 0;
435
+ for (const review of reviews) {
436
+ const rating = review.review.rating;
437
+ totalRating += rating;
438
+ ratingDistribution[rating] = (ratingDistribution[rating] || 0) + 1;
439
+ if (review.review.text) reviewsWithText++;
440
+ if (review.images && review.images.length > 0) reviewsWithImages++;
441
+ if (review.response) reviewsWithResponse++;
442
+ }
443
+ return {
444
+ totalReviews,
445
+ averageRating: totalReviews > 0 ? Number((totalRating / totalReviews).toFixed(2)) : 0,
446
+ ratingDistribution,
447
+ reviewsWithText,
448
+ reviewsWithImages,
449
+ reviewsWithResponse
450
+ };
451
+ }
452
+ function getTopRatedReviews(reviews, limit = 10) {
453
+ return [...reviews].sort((a, b) => b.review.rating - a.review.rating).slice(0, limit);
454
+ }
455
+ function getRecentReviews(reviews, limit = 10) {
456
+ return [...reviews].sort((a, b) => {
457
+ const dateA = new Date(a.time.published).getTime();
458
+ const dateB = new Date(b.time.published).getTime();
459
+ return dateB - dateA;
460
+ }).slice(0, limit);
461
+ }
462
+ function groupByRating(reviews) {
463
+ const grouped = {
464
+ 1: [],
465
+ 2: [],
466
+ 3: [],
467
+ 4: [],
468
+ 5: []
469
+ };
470
+ for (const review of reviews) {
471
+ const rating = review.review.rating;
472
+ if (grouped[rating]) {
473
+ grouped[rating].push(review);
474
+ }
475
+ }
476
+ return grouped;
477
+ }
478
+
479
+ // src/batch.ts
480
+ async function batchScraper(urls, options = {}) {
481
+ const {
482
+ concurrency = 3,
483
+ onProgress,
484
+ includeAnalytics = false,
485
+ ...scraperOptions
486
+ } = options;
487
+ const results = [];
488
+ const queue = [...urls];
489
+ let completed = 0;
490
+ const processBatch = async (url) => {
491
+ try {
492
+ const resolvedUrl = await resolveUrl(url);
493
+ const placeId = extractPlaceId(resolvedUrl);
494
+ const reviews = await scraper(url, scraperOptions);
495
+ const result = {
496
+ url,
497
+ placeId,
498
+ reviews
499
+ };
500
+ if (includeAnalytics && scraperOptions.clean) {
501
+ result.analytics = calculateAnalytics2(reviews);
502
+ }
503
+ completed++;
504
+ if (onProgress) {
505
+ onProgress(completed, urls.length, url);
506
+ }
507
+ return result;
508
+ } catch (error) {
509
+ completed++;
510
+ if (onProgress) {
511
+ onProgress(completed, urls.length, url);
512
+ }
513
+ return {
514
+ url,
515
+ reviews: [],
516
+ error: error instanceof Error ? error : new Error(String(error))
517
+ };
518
+ }
519
+ };
520
+ const workers = [];
521
+ for (let i = 0; i < concurrency; i++) {
522
+ workers.push(
523
+ (async () => {
524
+ while (queue.length > 0) {
525
+ const url = queue.shift();
526
+ if (url) {
527
+ const result = await processBatch(url);
528
+ results.push(result);
529
+ }
530
+ }
531
+ })()
532
+ );
533
+ }
534
+ await Promise.all(workers);
535
+ return results;
536
+ }
537
+ function calculateAnalytics2(reviews) {
538
+ const totalReviews = reviews.length;
539
+ const ratingDistribution = {
540
+ 1: 0,
541
+ 2: 0,
542
+ 3: 0,
543
+ 4: 0,
544
+ 5: 0
545
+ };
546
+ let totalRating = 0;
547
+ let reviewsWithText = 0;
548
+ let reviewsWithImages = 0;
549
+ let reviewsWithResponse = 0;
550
+ for (const review of reviews) {
551
+ const rating = review.review.rating;
552
+ totalRating += rating;
553
+ ratingDistribution[rating] = (ratingDistribution[rating] || 0) + 1;
554
+ if (review.review.text) reviewsWithText++;
555
+ if (review.images && review.images.length > 0) reviewsWithImages++;
556
+ if (review.response) reviewsWithResponse++;
557
+ }
558
+ return {
559
+ totalReviews,
560
+ averageRating: totalReviews > 0 ? totalRating / totalReviews : 0,
561
+ ratingDistribution,
562
+ reviewsWithText,
563
+ reviewsWithImages,
564
+ reviewsWithResponse
565
+ };
566
+ }
567
+
568
+ // src/filters.ts
569
+ function filterReviews(reviews, filters) {
570
+ return reviews.filter((review) => {
571
+ if (filters.minRating !== void 0 && review.review.rating < filters.minRating) {
572
+ return false;
573
+ }
574
+ if (filters.maxRating !== void 0 && review.review.rating > filters.maxRating) {
575
+ return false;
576
+ }
577
+ if (filters.hasText !== void 0) {
578
+ const hasText = !!review.review.text;
579
+ if (filters.hasText !== hasText) {
580
+ return false;
581
+ }
582
+ }
583
+ if (filters.hasImages !== void 0) {
584
+ const hasImages = !!review.images && review.images.length > 0;
585
+ if (filters.hasImages !== hasImages) {
586
+ return false;
587
+ }
588
+ }
589
+ if (filters.hasResponse !== void 0) {
590
+ const hasResponse = !!review.response;
591
+ if (filters.hasResponse !== hasResponse) {
592
+ return false;
593
+ }
594
+ }
595
+ if (filters.keywords && filters.keywords.length > 0) {
596
+ const text = (review.review.text || "").toLowerCase();
597
+ const hasKeyword = filters.keywords.some(
598
+ (keyword) => text.includes(keyword.toLowerCase())
599
+ );
600
+ if (!hasKeyword) {
601
+ return false;
602
+ }
603
+ }
604
+ return true;
605
+ });
606
+ }
607
+ function searchReviews(reviews, query) {
608
+ const lowerQuery = query.toLowerCase();
609
+ return reviews.filter((review) => {
610
+ const text = (review.review.text || "").toLowerCase();
611
+ const authorName = review.author.name.toLowerCase();
612
+ return text.includes(lowerQuery) || authorName.includes(lowerQuery);
613
+ });
614
+ }
615
+
616
+ // src/stream.ts
617
+ async function* scrapeStream(url, options = {}) {
618
+ const {
619
+ sort_type = "relevant",
620
+ search_query = "",
621
+ pages = "max",
622
+ clean = false,
623
+ lang = "en",
624
+ retry,
625
+ cache: cacheOptions,
626
+ rateLimit
627
+ } = options;
628
+ validateParams(url, sort_type, pages);
629
+ const sort = SortEnum[sort_type];
630
+ const resolvedUrl = await resolveUrl(url);
631
+ const placeId = extractPlaceId(resolvedUrl);
632
+ const cache = cacheOptions?.enabled ? new Cache(cacheOptions) : void 0;
633
+ const rateLimiter = rateLimit ? new RateLimiter(rateLimit) : void 0;
634
+ const retryOptions = {
635
+ ...retry
636
+ };
637
+ let nextPageToken = "";
638
+ let currentPage = 1;
639
+ const maxPages = pages === "max" ? Infinity : pages;
640
+ while (currentPage <= maxPages) {
641
+ const result = await fetchReviews(
642
+ placeId,
643
+ sort,
644
+ nextPageToken || "",
645
+ search_query,
646
+ lang,
647
+ cache,
648
+ rateLimiter,
649
+ retryOptions
650
+ );
651
+ const reviews = clean ? parseReviews(result.data) : result.data;
652
+ for (const review of reviews) {
653
+ yield review;
654
+ }
655
+ nextPageToken = result.nextPageToken;
656
+ if (!nextPageToken || !result.data.length) {
657
+ break;
658
+ }
659
+ if (!rateLimiter && currentPage < maxPages) {
660
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
661
+ }
662
+ currentPage++;
663
+ }
664
+ }
665
+
666
+ // src/index.ts
667
+ async function scraper(url, options = {}) {
668
+ const {
669
+ sort_type = "relevant",
670
+ search_query = "",
671
+ pages = "max",
672
+ clean = false,
673
+ lang = "en",
674
+ retry,
675
+ cache: cacheOptions,
676
+ rateLimit,
677
+ timeout,
678
+ onProgress
679
+ } = options;
680
+ validateParams(url, sort_type, pages);
681
+ const sort = SortEnum[sort_type];
682
+ const cache = cacheOptions?.enabled ? new Cache(cacheOptions) : void 0;
683
+ const rateLimiter = rateLimit ? new RateLimiter(rateLimit) : void 0;
684
+ const retryOptions = {
685
+ timeout,
686
+ ...retry
687
+ };
688
+ try {
689
+ return await paginateReviews(
690
+ url,
691
+ sort,
692
+ pages,
693
+ search_query,
694
+ clean,
695
+ lang,
696
+ cache,
697
+ rateLimiter,
698
+ retryOptions,
699
+ onProgress
700
+ );
701
+ } catch (e) {
702
+ console.error("Scraper Error:", e);
703
+ throw e;
704
+ }
705
+ }
706
+
707
+ exports.FetchError = FetchError;
708
+ exports.InvalidUrlError = InvalidUrlError;
709
+ exports.ParseError = ParseError;
710
+ exports.RateLimitError = RateLimitError;
711
+ exports.ScraperError = ScraperError;
712
+ exports.SortEnum = SortEnum;
713
+ exports.TimeoutError = TimeoutError;
714
+ exports.batchScraper = batchScraper;
715
+ exports.calculateAnalytics = calculateAnalytics;
716
+ exports.filterReviews = filterReviews;
717
+ exports.getRecentReviews = getRecentReviews;
718
+ exports.getTopRatedReviews = getTopRatedReviews;
719
+ exports.groupByRating = groupByRating;
720
+ exports.scrapeStream = scrapeStream;
721
+ exports.scraper = scraper;
722
+ exports.searchReviews = searchReviews;
723
+ //# sourceMappingURL=index.cjs.map
724
+ //# sourceMappingURL=index.cjs.map