hotelzero 1.12.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -39,6 +39,7 @@ export const ErrorCodes = {
39
39
  NETWORK_ERROR: "NETWORK_ERROR",
40
40
  TIMEOUT: "TIMEOUT",
41
41
  BLOCKED: "BLOCKED",
42
+ INVALID_PARAMS: "INVALID_PARAMS",
42
43
  };
43
44
  const DEFAULT_RETRY_CONFIG = {
44
45
  maxRetries: 3,
@@ -74,7 +75,8 @@ const USER_AGENTS = [
74
75
  * Get a random user agent from the pool
75
76
  */
76
77
  function getRandomUserAgent() {
77
- return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
78
+ const index = Math.floor(Math.random() * USER_AGENTS.length);
79
+ return USER_AGENTS[index] ?? USER_AGENTS[0];
78
80
  }
79
81
  // Retry with exponential backoff
80
82
  async function retryWithBackoff(fn, config = DEFAULT_RETRY_CONFIG, onRetry) {
@@ -762,8 +764,15 @@ export class HotelBrowser {
762
764
  // Scroll to load more results (pass limit to control how many to load)
763
765
  const targetResults = params.limit || 25;
764
766
  await this.scrollToLoadMore(targetResults);
765
- // Extract detailed hotel info
766
- let hotels = await this.extractHotelDetails();
767
+ // Try API-based extraction first (more reliable), fall back to DOM scraping
768
+ let hotels = await this.extractHotelsFromAPI();
769
+ if (hotels.length === 0) {
770
+ logger.debug("API extraction returned no results, falling back to DOM scraping");
771
+ hotels = await this.extractHotelDetails();
772
+ }
773
+ else {
774
+ logger.debug({ hotelCount: hotels.length }, "Hotels extracted from API cache");
775
+ }
767
776
  logger.debug({ hotelCount: hotels.length }, "Hotels extracted from page");
768
777
  // Apply limit to cap results
769
778
  if (params.limit && params.limit > 0) {
@@ -785,6 +794,287 @@ export class HotelBrowser {
785
794
  logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Search attempt failed, retrying");
786
795
  });
787
796
  }
797
+ /**
798
+ * Search for a specific hotel's rate via the search API.
799
+ * This is 100% API-based (no HTML scraping) and returns detailed rate info
800
+ * including room type, meal plan, cancellation policy, and bed configuration.
801
+ *
802
+ * The method searches for the hotel by name and extracts rate details from
803
+ * the Apollo cache's `blocks` array and `matchingUnitConfigurations`.
804
+ *
805
+ * @param hotelUrl - The hotel's URL or name/slug (e.g., "la-sanguine" or full URL)
806
+ * @param checkIn - Check-in date (YYYY-MM-DD)
807
+ * @param checkOut - Check-out date (YYYY-MM-DD)
808
+ * @param guests - Number of guests
809
+ * @param rooms - Number of rooms
810
+ * @param filters - Optional rate filters (breakfast, free cancellation, bed type)
811
+ * @returns Rate details or null if hotel not found in results
812
+ */
813
+ async searchHotelRates(hotelUrl, checkIn, checkOut, guests = 2, rooms = 1, filters) {
814
+ if (!this.page) {
815
+ throw new HotelSearchError("Browser not initialized. Call init() first.", ErrorCodes.BROWSER_NOT_INITIALIZED, false);
816
+ }
817
+ // Extract hotel name from URL for search query
818
+ const hotelName = this.extractHotelNameFromUrl(hotelUrl);
819
+ if (!hotelName) {
820
+ throw new HotelSearchError("Could not extract hotel name from URL", ErrorCodes.INVALID_PARAMS, false);
821
+ }
822
+ logger.info({ hotelName, checkIn, checkOut, guests, rooms, hasFilters: !!filters }, "Searching for hotel rate via API");
823
+ // Build search URL with hotel name as destination
824
+ const searchFilters = {};
825
+ // Apply rate-specific filters
826
+ if (filters?.breakfast) {
827
+ searchFilters.breakfast = true;
828
+ }
829
+ if (filters?.freeCancellation) {
830
+ searchFilters.freeCancellation = true;
831
+ }
832
+ const searchParams = {
833
+ destination: hotelName.replace(/-/g, " "), // "la-sanguine" -> "la sanguine"
834
+ checkIn,
835
+ checkOut,
836
+ guests,
837
+ rooms,
838
+ limit: 10, // Small limit since we're looking for a specific hotel
839
+ };
840
+ const url = this.buildBookingUrl(searchParams, searchFilters);
841
+ logger.debug({ url }, "Hotel rate search URL");
842
+ return await retryWithBackoff(async () => {
843
+ await this.enforceRateLimit();
844
+ try {
845
+ await this.page.goto(url, {
846
+ waitUntil: "networkidle",
847
+ timeout: 30000,
848
+ });
849
+ }
850
+ catch (error) {
851
+ const err = error;
852
+ if (err.message.includes("timeout") || err.message.includes("Timeout")) {
853
+ throw new HotelSearchError("Page load timed out. The server may be slow or unavailable.", ErrorCodes.TIMEOUT, true);
854
+ }
855
+ throw new HotelSearchError(`Navigation failed: ${err.message}`, ErrorCodes.NAVIGATION_FAILED, true);
856
+ }
857
+ await this.page.waitForTimeout(2000);
858
+ await this.checkForBlocking();
859
+ await this.dismissPopups();
860
+ // Extract rate details from Apollo cache
861
+ const rateResult = await this.extractHotelRateFromAPI(hotelName, filters);
862
+ if (rateResult) {
863
+ // Populate search params in result
864
+ rateResult.checkIn = checkIn;
865
+ rateResult.checkOut = checkOut;
866
+ rateResult.guests = guests;
867
+ rateResult.rooms = rooms;
868
+ logger.info({ hotelName: rateResult.hotelName, price: rateResult.price, roomName: rateResult.roomName }, "Hotel rate found via API");
869
+ await this.saveSession();
870
+ return rateResult;
871
+ }
872
+ logger.warn({ hotelName }, "Hotel not found in search results");
873
+ return null;
874
+ }, DEFAULT_RETRY_CONFIG, (attempt, error, delayMs) => {
875
+ logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Hotel rate search failed, retrying");
876
+ });
877
+ }
878
+ /**
879
+ * Extract hotel name/slug from a Booking.com URL.
880
+ * Handles formats like:
881
+ * - https://www.booking.com/hotel/fr/la-sanguine.html
882
+ * - /hotel/fr/la-sanguine.html
883
+ * - la-sanguine
884
+ */
885
+ extractHotelNameFromUrl(urlOrName) {
886
+ // If it's just a name/slug (no slashes), return as-is
887
+ if (!urlOrName.includes("/")) {
888
+ return urlOrName.replace(/\.html$/, "");
889
+ }
890
+ // Extract from URL pattern: /hotel/{country}/{name}.html
891
+ const match = urlOrName.match(/\/hotel\/[a-z]{2}\/([^/.]+)/i);
892
+ if (match && match[1]) {
893
+ return match[1];
894
+ }
895
+ // Fallback: try to get the last path segment
896
+ const parts = urlOrName.split("/").filter(Boolean);
897
+ const lastPart = parts[parts.length - 1];
898
+ return lastPart?.replace(/\.html$/, "") || null;
899
+ }
900
+ /**
901
+ * Extract hotel rate details from Apollo cache.
902
+ * Finds the hotel matching the given name and extracts rate info from
903
+ * the `blocks` array and `matchingUnitConfigurations`.
904
+ */
905
+ async extractHotelRateFromAPI(hotelSlug, filters) {
906
+ if (!this.page)
907
+ return null;
908
+ // Bed type mapping for filter matching
909
+ const bedTypeMap = {
910
+ single: 1,
911
+ twin: 2,
912
+ double: 3,
913
+ queen: 5,
914
+ king: 6,
915
+ };
916
+ const targetBedType = filters?.bedType ? bedTypeMap[filters.bedType] : undefined;
917
+ return await this.page.evaluate(({ hotelSlug, targetBedType }) => {
918
+ try {
919
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
920
+ const w = window;
921
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
922
+ if (!cache)
923
+ return null;
924
+ const rootQuery = cache["ROOT_QUERY"];
925
+ if (!rootQuery)
926
+ return null;
927
+ const searchQueries = rootQuery.searchQueries;
928
+ if (!searchQueries)
929
+ return null;
930
+ const searchKey = Object.keys(searchQueries).find((k) => k.startsWith("search("));
931
+ if (!searchKey)
932
+ return null;
933
+ const searchOutput = searchQueries[searchKey];
934
+ const searchResults = searchOutput?.results;
935
+ if (!searchResults || !Array.isArray(searchResults))
936
+ return null;
937
+ // Find hotel matching the slug (check pageName)
938
+ const normalizedSlug = hotelSlug.toLowerCase().replace(/-/g, "");
939
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
940
+ const hotel = searchResults.find((h) => {
941
+ if (!h)
942
+ return false;
943
+ const pageName = h.basicPropertyData?.pageName?.toLowerCase()?.replace(/-/g, "") || "";
944
+ const displayName = h.displayName?.text?.toLowerCase()?.replace(/\s+/g, "") || "";
945
+ return pageName.includes(normalizedSlug) ||
946
+ normalizedSlug.includes(pageName) ||
947
+ displayName.includes(normalizedSlug) ||
948
+ normalizedSlug.includes(displayName);
949
+ });
950
+ if (!hotel)
951
+ return null;
952
+ // Extract basic hotel info
953
+ const hotelName = hotel.displayName?.text || hotel.basicPropertyData?.pageName || "Unknown";
954
+ const pageName = hotel.basicPropertyData?.pageName || "";
955
+ const countryCode = hotel.basicPropertyData?.location?.countryCode || "";
956
+ const hotelId = hotel.basicPropertyData?.id?.toString() || "";
957
+ const hotelUrl = countryCode && pageName
958
+ ? `https://www.booking.com/hotel/${countryCode}/${pageName}.html`
959
+ : "";
960
+ // Get price info
961
+ const priceInfo = hotel.priceDisplayInfoIrene;
962
+ const displayPrice = priceInfo?.displayPrice?.amountPerStay;
963
+ const price = displayPrice?.amountUnformatted ?? 0;
964
+ const priceDisplay = displayPrice?.amountRounded || displayPrice?.amount || "$0";
965
+ const currency = displayPrice?.currency || "USD";
966
+ const pricePerNight = priceInfo?.averagePricePerNight?.amountUnformatted ?? 0;
967
+ // Get blocks array (rate options)
968
+ const blocks = hotel.blocks;
969
+ if (!blocks || !Array.isArray(blocks) || blocks.length === 0) {
970
+ // No blocks, return basic info without detailed rate
971
+ return {
972
+ hotelName,
973
+ hotelId,
974
+ hotelUrl,
975
+ checkIn: "",
976
+ checkOut: "",
977
+ guests: 0,
978
+ rooms: 0,
979
+ roomName: "Unknown",
980
+ roomId: "",
981
+ price,
982
+ priceDisplay,
983
+ pricePerNight,
984
+ currency,
985
+ mealPlan: "Unknown",
986
+ cancellationPolicy: "Unknown",
987
+ freeCancellationUntil: null,
988
+ bedType: "Unknown",
989
+ bedCount: 0,
990
+ };
991
+ }
992
+ // Get the first (cheapest/best match) block
993
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
994
+ const block = blocks[0];
995
+ const blockId = block.blockId || {};
996
+ const roomId = blockId.roomId?.toString() || "";
997
+ const mealPlanId = blockId.mealPlanId;
998
+ // Meal plan mapping
999
+ const mealPlanNames = {
1000
+ 0: "Room only",
1001
+ 1: "Breakfast included",
1002
+ 2: "Half board",
1003
+ 3: "Full board",
1004
+ 4: "All-inclusive",
1005
+ };
1006
+ const mealPlan = mealPlanNames[mealPlanId] || "Room only";
1007
+ // Cancellation policy
1008
+ const freeCancellationUntil = block.freeCancellationUntil || null;
1009
+ const cancellationPolicy = freeCancellationUntil
1010
+ ? `Free cancellation until ${freeCancellationUntil}`
1011
+ : "Non-refundable";
1012
+ // Get room name and bed configuration from matchingUnitConfigurations
1013
+ let roomName = "Standard Room";
1014
+ let bedType = "Unknown";
1015
+ let bedCount = 0;
1016
+ const unitConfigs = hotel.matchingUnitConfigurations?.unitConfigurations;
1017
+ if (unitConfigs && Array.isArray(unitConfigs)) {
1018
+ // If filtering by bed type, try to find matching config
1019
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1020
+ let matchingConfig = unitConfigs[0];
1021
+ if (targetBedType !== undefined) {
1022
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1023
+ const bedMatch = unitConfigs.find((config) => {
1024
+ const beds = config.bedConfigurations?.[0]?.beds || [];
1025
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1026
+ return beds.some((bed) => bed.type === targetBedType);
1027
+ });
1028
+ if (bedMatch) {
1029
+ matchingConfig = bedMatch;
1030
+ }
1031
+ }
1032
+ if (matchingConfig) {
1033
+ roomName = matchingConfig.name || roomName;
1034
+ const beds = matchingConfig.bedConfigurations?.[0]?.beds;
1035
+ if (beds && beds.length > 0) {
1036
+ const firstBed = beds[0];
1037
+ bedCount = firstBed.count || 1;
1038
+ // Reverse map bed type
1039
+ const bedTypeNames = {
1040
+ 1: "Single",
1041
+ 2: "Twin",
1042
+ 3: "Double",
1043
+ 4: "Large Double",
1044
+ 5: "Queen",
1045
+ 6: "King",
1046
+ 7: "Super King",
1047
+ };
1048
+ bedType = bedTypeNames[firstBed.type] || "Unknown";
1049
+ }
1050
+ }
1051
+ }
1052
+ return {
1053
+ hotelName,
1054
+ hotelId,
1055
+ hotelUrl,
1056
+ checkIn: "", // Will be filled by caller
1057
+ checkOut: "",
1058
+ guests: 0,
1059
+ rooms: 0,
1060
+ roomName,
1061
+ roomId,
1062
+ price,
1063
+ priceDisplay,
1064
+ pricePerNight,
1065
+ currency,
1066
+ mealPlan,
1067
+ cancellationPolicy,
1068
+ freeCancellationUntil,
1069
+ bedType,
1070
+ bedCount,
1071
+ };
1072
+ }
1073
+ catch {
1074
+ return null;
1075
+ }
1076
+ }, { hotelSlug, targetBedType });
1077
+ }
788
1078
  async dismissPopups() {
789
1079
  if (!this.page)
790
1080
  return;
@@ -879,10 +1169,11 @@ export class HotelBrowser {
879
1169
  let priceText = "";
880
1170
  let price = null;
881
1171
  // First price element is usually per night
882
- if (allPriceEls.length > 0) {
883
- priceText = allPriceEls[0].textContent?.trim() || "";
1172
+ const firstPriceEl = allPriceEls[0];
1173
+ if (firstPriceEl) {
1174
+ priceText = firstPriceEl.textContent?.trim() || "";
884
1175
  const priceMatch = priceText.match(/\$?([\d,]+)/);
885
- price = priceMatch ? parseInt(priceMatch[1].replace(",", "")) : null;
1176
+ price = priceMatch?.[1] ? parseInt(priceMatch[1].replace(",", "")) : null;
886
1177
  }
887
1178
  // Rating - look for the numeric score
888
1179
  const ratingScoreEl = card.querySelector('[data-testid="review-score"] .dff2e52086');
@@ -895,7 +1186,7 @@ export class HotelBrowser {
895
1186
  const reviewCountEl = card.querySelector('[data-testid="review-score"] .fb14de7f14');
896
1187
  const reviewText = reviewCountEl?.textContent || "";
897
1188
  const reviewMatch = reviewText.match(/([\d,]+)/);
898
- const reviewCount = reviewMatch ? parseInt(reviewMatch[1].replace(",", "")) : null;
1189
+ const reviewCount = reviewMatch?.[1] ? parseInt(reviewMatch[1].replace(",", "")) : null;
899
1190
  // Distance to center
900
1191
  const distanceEl = card.querySelector('[data-testid="distance"]');
901
1192
  const distanceToCenter = distanceEl?.textContent?.trim() || "";
@@ -987,6 +1278,796 @@ export class HotelBrowser {
987
1278
  return results;
988
1279
  });
989
1280
  }
1281
+ /**
1282
+ * Extract hotel data from Booking.com's Apollo GraphQL cache.
1283
+ * This is more reliable than DOM scraping as it uses structured data.
1284
+ * Falls back gracefully if the cache structure changes.
1285
+ */
1286
+ async extractHotelsFromAPI() {
1287
+ if (!this.page)
1288
+ return [];
1289
+ return await this.page.evaluate(() => {
1290
+ try {
1291
+ // Access the Apollo cache embedded in the page
1292
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1293
+ const w = window;
1294
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1295
+ if (!cache)
1296
+ return [];
1297
+ const rootQuery = cache['ROOT_QUERY'];
1298
+ if (!rootQuery)
1299
+ return [];
1300
+ // searchQueries contains the search results
1301
+ const searchQueries = rootQuery.searchQueries;
1302
+ if (!searchQueries)
1303
+ return [];
1304
+ // Find the search key (complex key with query parameters)
1305
+ const searchKey = Object.keys(searchQueries).find(k => k.startsWith('search('));
1306
+ if (!searchKey)
1307
+ return [];
1308
+ const searchOutput = searchQueries[searchKey];
1309
+ if (!searchOutput)
1310
+ return [];
1311
+ // Get the results array
1312
+ const searchResults = searchOutput.results;
1313
+ if (!searchResults || !Array.isArray(searchResults))
1314
+ return [];
1315
+ const results = [];
1316
+ for (const hotel of searchResults) {
1317
+ if (!hotel)
1318
+ continue;
1319
+ // Skip sponsored/native ad listings
1320
+ const persuasion = hotel.persuasion;
1321
+ if (persuasion?.showNativeAdLabel || persuasion?.nativeAdId) {
1322
+ continue;
1323
+ }
1324
+ // Extract name
1325
+ const name = hotel.displayName?.text || hotel.basicPropertyData?.pageName || 'Unknown';
1326
+ // Extract price
1327
+ let price = null;
1328
+ let priceDisplay = 'Price not shown';
1329
+ const priceInfo = hotel.priceDisplayInfoIrene?.displayPrice?.amountPerStay;
1330
+ if (priceInfo) {
1331
+ priceDisplay = priceInfo.amountRounded || priceInfo.amount || priceDisplay;
1332
+ price = typeof priceInfo.amountUnformatted === 'number' ? priceInfo.amountUnformatted : null;
1333
+ }
1334
+ // Extract rating and reviews from basicPropertyData.reviews
1335
+ let rating = null;
1336
+ let ratingText = '';
1337
+ let reviewCount = null;
1338
+ const reviews = hotel.basicPropertyData?.reviews;
1339
+ if (reviews) {
1340
+ rating = typeof reviews.totalScore === 'number' ? reviews.totalScore : null;
1341
+ ratingText = reviews.totalScoreTextTag?.translation || '';
1342
+ reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
1343
+ }
1344
+ // Extract location
1345
+ const location = hotel.location?.displayLocation || '';
1346
+ const distanceToCenter = hotel.location?.mainDistance || '';
1347
+ // Build thumbnail URL
1348
+ let thumbnailUrl = null;
1349
+ const mainPhoto = hotel.basicPropertyData?.photos?.main;
1350
+ if (mainPhoto) {
1351
+ const relativeUrl = mainPhoto.highResJpegUrl?.relativeUrl ||
1352
+ mainPhoto.highResUrl?.relativeUrl ||
1353
+ mainPhoto.lowResJpegUrl?.relativeUrl;
1354
+ if (relativeUrl) {
1355
+ thumbnailUrl = `https://cf.bstatic.com${relativeUrl}`;
1356
+ }
1357
+ }
1358
+ // Build link with country code (required for API data to load on detail page)
1359
+ let link = '';
1360
+ const pageName = hotel.basicPropertyData?.pageName;
1361
+ const countryCode = hotel.basicPropertyData?.location?.countryCode;
1362
+ if (pageName && countryCode) {
1363
+ link = `https://www.booking.com/hotel/${countryCode}/${pageName}.html`;
1364
+ }
1365
+ else if (pageName) {
1366
+ // Fallback without country code (less reliable for API extraction)
1367
+ link = `https://www.booking.com/hotel/${pageName}.html`;
1368
+ }
1369
+ // Extract amenities and highlights
1370
+ const amenities = [];
1371
+ const highlights = [];
1372
+ // Sustainability
1373
+ if (hotel.propertySustainability?.isSustainable) {
1374
+ amenities.push('Sustainable');
1375
+ }
1376
+ // Policies
1377
+ const policies = hotel.policies;
1378
+ if (policies?.showFreeCancellation) {
1379
+ highlights.push('Free Cancellation');
1380
+ }
1381
+ if (policies?.showNoPrepayment) {
1382
+ highlights.push('No Prepayment');
1383
+ }
1384
+ if (policies?.showPetsAllowedForFree) {
1385
+ amenities.push('Pet Friendly');
1386
+ }
1387
+ // Meal plan
1388
+ if (hotel.mealPlanIncluded?.mealPlanType) {
1389
+ amenities.push('Breakfast Included');
1390
+ }
1391
+ // Extract availability info
1392
+ let availability = null;
1393
+ const soldOutInfo = hotel.soldOutInfo;
1394
+ if (soldOutInfo?.messages && soldOutInfo.messages.length > 0) {
1395
+ const msg = soldOutInfo.messages[0];
1396
+ if (msg?.text) {
1397
+ availability = msg.text;
1398
+ }
1399
+ }
1400
+ results.push({
1401
+ name,
1402
+ price,
1403
+ priceDisplay,
1404
+ rating,
1405
+ ratingText,
1406
+ reviewCount,
1407
+ location,
1408
+ distanceToCenter,
1409
+ amenities,
1410
+ highlights,
1411
+ link,
1412
+ thumbnailUrl,
1413
+ availability,
1414
+ });
1415
+ }
1416
+ return results;
1417
+ }
1418
+ catch {
1419
+ // If anything goes wrong with API extraction, return empty to trigger fallback
1420
+ return [];
1421
+ }
1422
+ });
1423
+ }
1424
+ /**
1425
+ * Extract hotel details from Booking.com's Apollo GraphQL cache on a hotel detail page.
1426
+ * This is more reliable than DOM scraping as it uses structured data.
1427
+ * Returns null if extraction fails (triggering DOM fallback).
1428
+ */
1429
+ async extractHotelDetailsFromAPI() {
1430
+ if (!this.page)
1431
+ return null;
1432
+ return await this.page.evaluate(() => {
1433
+ try {
1434
+ // Access the Apollo cache embedded in the page
1435
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1436
+ const w = window;
1437
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1438
+ if (!cache)
1439
+ return null;
1440
+ // Helper to resolve __ref pointers
1441
+ const resolveRef = (ref) => {
1442
+ if (ref && typeof ref === 'object' && '__ref' in ref) {
1443
+ return cache[ref.__ref];
1444
+ }
1445
+ return ref;
1446
+ };
1447
+ // Find the Property entry - it has a key like 'Property:{"id":6523595}'
1448
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1449
+ if (!propertyKey)
1450
+ return null;
1451
+ const property = cache[propertyKey];
1452
+ if (!property)
1453
+ return null;
1454
+ // Extract hotel ID from the property key
1455
+ const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
1456
+ const hotelId = idMatch ? idMatch[1] : null;
1457
+ // Get BasicPropertyData for address and location
1458
+ const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
1459
+ const basicData = basicDataKey ? cache[basicDataKey] : null;
1460
+ // Extract name
1461
+ const name = property.name || basicData?.name || 'Unknown';
1462
+ // Extract rating and reviews from property.reviews
1463
+ let rating = null;
1464
+ let ratingText = '';
1465
+ let reviewCount = null;
1466
+ const reviews = property.reviews;
1467
+ if (reviews) {
1468
+ reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
1469
+ // Find the total score from questions array
1470
+ const questions = reviews.questions;
1471
+ if (Array.isArray(questions)) {
1472
+ const totalQ = questions.find((q) => q?.name === 'total');
1473
+ if (totalQ && typeof totalQ.score === 'number') {
1474
+ const score = totalQ.score;
1475
+ rating = score;
1476
+ // Generate rating text based on score
1477
+ if (score >= 9)
1478
+ ratingText = 'Superb';
1479
+ else if (score >= 8)
1480
+ ratingText = 'Very Good';
1481
+ else if (score >= 7)
1482
+ ratingText = 'Good';
1483
+ else if (score >= 6)
1484
+ ratingText = 'Pleasant';
1485
+ else
1486
+ ratingText = 'Review score';
1487
+ }
1488
+ }
1489
+ }
1490
+ // Extract address from BasicPropertyData
1491
+ const address = basicData?.location?.formattedAddress ||
1492
+ basicData?.location?.formattedAddressShort || '';
1493
+ // Extract star rating from accommodation type
1494
+ let starRating = null;
1495
+ const accomType = resolveRef(property.accommodationType);
1496
+ if (accomType && typeof accomType === 'object' && 'starRating' in accomType) {
1497
+ starRating = accomType.starRating || null;
1498
+ }
1499
+ // Extract check-in/out times from houseRules
1500
+ let checkInTime = '';
1501
+ let checkOutTime = '';
1502
+ const houseRules = property.houseRules;
1503
+ if (houseRules?.checkinCheckoutTimes) {
1504
+ const times = houseRules.checkinCheckoutTimes;
1505
+ if (times.checkinTimeRange) {
1506
+ const from = times.checkinTimeRange.fromFormatted;
1507
+ const until = times.checkinTimeRange.untilFormatted;
1508
+ if (from && until) {
1509
+ checkInTime = `${from} - ${until}`;
1510
+ }
1511
+ else if (from) {
1512
+ checkInTime = `From ${from}`;
1513
+ }
1514
+ else if (until) {
1515
+ checkInTime = `Until ${until}`;
1516
+ }
1517
+ }
1518
+ if (times.checkoutTimeRange) {
1519
+ const from = times.checkoutTimeRange.fromFormatted;
1520
+ const until = times.checkoutTimeRange.untilFormatted;
1521
+ if (from && until) {
1522
+ checkOutTime = `${from} - ${until}`;
1523
+ }
1524
+ else if (until) {
1525
+ checkOutTime = `Until ${until}`;
1526
+ }
1527
+ else if (from) {
1528
+ checkOutTime = `From ${from}`;
1529
+ }
1530
+ }
1531
+ }
1532
+ // Extract popular facilities from accommodationHighlights
1533
+ const popularFacilities = [];
1534
+ const highlightKeys = Object.keys(property).filter(k => k.startsWith('accommodationHighlights('));
1535
+ for (const key of highlightKeys) {
1536
+ const highlights = property[key];
1537
+ if (Array.isArray(highlights)) {
1538
+ for (const item of highlights) {
1539
+ const entities = item?.entities;
1540
+ if (Array.isArray(entities)) {
1541
+ for (const entity of entities) {
1542
+ // Direct title (like BreakfastHighlight)
1543
+ if (entity?.title) {
1544
+ popularFacilities.push(entity.title);
1545
+ }
1546
+ // Resolve __ref for GenericFacilityHighlight, WifiFacilityHighlight, etc.
1547
+ const resolved = resolveRef(entity);
1548
+ if (resolved && typeof resolved === 'object' && 'title' in resolved) {
1549
+ const title = resolved.title;
1550
+ if (title && !popularFacilities.includes(title)) {
1551
+ popularFacilities.push(title);
1552
+ }
1553
+ }
1554
+ }
1555
+ }
1556
+ }
1557
+ }
1558
+ }
1559
+ // Extract all facilities from highlights (popularity based)
1560
+ const allFacilities = [];
1561
+ const facilityKeys = Object.keys(property).filter(k => k.startsWith('highlights('));
1562
+ for (const key of facilityKeys) {
1563
+ const highlightData = property[key];
1564
+ const entities = highlightData?.entities;
1565
+ if (Array.isArray(entities)) {
1566
+ for (const entity of entities) {
1567
+ // Skip Meal type entries
1568
+ if (entity?.__typename === 'Meal')
1569
+ continue;
1570
+ const resolved = resolveRef(entity);
1571
+ if (resolved && typeof resolved === 'object') {
1572
+ // For BaseFacility, look at instances
1573
+ const instances = resolved.instances;
1574
+ if (Array.isArray(instances)) {
1575
+ for (const inst of instances) {
1576
+ const resolvedInst = resolveRef(inst);
1577
+ if (resolvedInst && typeof resolvedInst === 'object' && 'title' in resolvedInst) {
1578
+ const title = resolvedInst.title;
1579
+ if (title && !allFacilities.includes(title)) {
1580
+ allFacilities.push(title);
1581
+ }
1582
+ }
1583
+ }
1584
+ }
1585
+ }
1586
+ }
1587
+ }
1588
+ }
1589
+ // Extract photos from propertyGallery
1590
+ const photos = [];
1591
+ const galleryKeys = Object.keys(property).filter(k => k.startsWith('propertyGallery('));
1592
+ for (const key of galleryKeys) {
1593
+ const gallery = property[key];
1594
+ // Main photo
1595
+ if (gallery?.mainPhoto) {
1596
+ const mainPhoto = resolveRef(gallery.mainPhoto);
1597
+ if (mainPhoto && typeof mainPhoto === 'object') {
1598
+ // Look for resource with max500 or max1024x768
1599
+ const photoObj = mainPhoto;
1600
+ const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
1601
+ if (resourceKey) {
1602
+ const resource = photoObj[resourceKey];
1603
+ if (resource?.absoluteUrl) {
1604
+ photos.push(resource.absoluteUrl);
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+ // Room photos
1610
+ const roomPhotos = gallery?.roomPhotos;
1611
+ if (Array.isArray(roomPhotos)) {
1612
+ for (const room of roomPhotos) {
1613
+ const roomPhotosList = room?.photos;
1614
+ if (Array.isArray(roomPhotosList) && photos.length < 5) {
1615
+ for (const photoRef of roomPhotosList) {
1616
+ if (photos.length >= 5)
1617
+ break;
1618
+ const photo = resolveRef(photoRef);
1619
+ if (photo && typeof photo === 'object') {
1620
+ const photoObj = photo;
1621
+ const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
1622
+ if (resourceKey) {
1623
+ const resource = photoObj[resourceKey];
1624
+ if (resource?.absoluteUrl && !photos.includes(resource.absoluteUrl)) {
1625
+ photos.push(resource.absoluteUrl);
1626
+ }
1627
+ }
1628
+ }
1629
+ }
1630
+ }
1631
+ }
1632
+ }
1633
+ }
1634
+ // Extract room types from property.rooms
1635
+ const roomTypes = [];
1636
+ const rooms = property.rooms;
1637
+ if (Array.isArray(rooms)) {
1638
+ for (const roomRef of rooms) {
1639
+ const room = resolveRef(roomRef);
1640
+ if (room && typeof room === 'object') {
1641
+ const roomObj = room;
1642
+ const roomName = roomObj.name || roomObj.description;
1643
+ if (roomName && !roomTypes.includes(roomName)) {
1644
+ roomTypes.push(roomName);
1645
+ }
1646
+ }
1647
+ }
1648
+ }
1649
+ // Extract location info
1650
+ let locationInfo = '';
1651
+ if (basicData?.location) {
1652
+ const loc = basicData.location;
1653
+ const parts = [];
1654
+ if (loc.city)
1655
+ parts.push(loc.city);
1656
+ if (loc.countryCode)
1657
+ parts.push(loc.countryCode.toUpperCase());
1658
+ locationInfo = parts.join(', ');
1659
+ if (loc.latitude && loc.longitude) {
1660
+ locationInfo += ` (${loc.latitude.toFixed(4)}, ${loc.longitude.toFixed(4)})`;
1661
+ }
1662
+ }
1663
+ // Extract review category scores for highlights
1664
+ const guestReviewHighlights = [];
1665
+ if (reviews?.questions && Array.isArray(reviews.questions)) {
1666
+ const categoryNames = {
1667
+ 'hotel_staff': 'Staff',
1668
+ 'hotel_location': 'Location',
1669
+ 'hotel_clean': 'Cleanliness',
1670
+ 'hotel_comfort': 'Comfort',
1671
+ 'hotel_value': 'Value for money',
1672
+ 'hotel_services': 'Facilities',
1673
+ 'hotel_free_wifi': 'Free WiFi'
1674
+ };
1675
+ for (const q of reviews.questions) {
1676
+ if (q?.name && q.name !== 'total' && typeof q.score === 'number') {
1677
+ const displayName = categoryNames[q.name] || q.name;
1678
+ if (categoryNames[q.name]) {
1679
+ guestReviewHighlights.push(`${displayName}: ${q.score.toFixed(1)}`);
1680
+ }
1681
+ }
1682
+ }
1683
+ }
1684
+ // Validate we have meaningful data before returning
1685
+ // Name should be a proper hotel name (at least 3 chars, not 'Unknown')
1686
+ if (!name || name === 'Unknown' || name.length < 3) {
1687
+ return null; // Trigger DOM fallback
1688
+ }
1689
+ // Note: Description, pricePerNight, totalPrice, nearbyAttractions may need DOM fallback
1690
+ // as they're not consistently in the Apollo cache or are dynamic
1691
+ return {
1692
+ name,
1693
+ rating,
1694
+ ratingText,
1695
+ reviewCount,
1696
+ starRating,
1697
+ address,
1698
+ description: '', // Not typically in cache, will need DOM fallback if needed
1699
+ highlights: popularFacilities.slice(0, 5).join(', '),
1700
+ pricePerNight: null, // Dynamic, not in cache
1701
+ priceDisplay: '',
1702
+ totalPrice: '',
1703
+ checkInTime,
1704
+ checkOutTime,
1705
+ popularFacilities: popularFacilities.slice(0, 15),
1706
+ allFacilities: allFacilities.slice(0, 30),
1707
+ roomTypes: roomTypes.slice(0, 5),
1708
+ photos: photos.slice(0, 5),
1709
+ nearbyAttractions: [], // Would need propertySurroundings query
1710
+ guestReviewHighlights: guestReviewHighlights.slice(0, 7),
1711
+ locationInfo
1712
+ };
1713
+ }
1714
+ catch {
1715
+ // If anything goes wrong with API extraction, return null to trigger fallback
1716
+ return null;
1717
+ }
1718
+ });
1719
+ }
1720
+ /**
1721
+ * Fetch room facilities via Booking.com's GraphQL API.
1722
+ * This provides detailed amenities for each room type (AC, TV, bathroom details, etc.)
1723
+ * Must be called when already on a hotel page with an active session.
1724
+ *
1725
+ * @param hotelId - The numeric hotel ID (e.g., 6523595)
1726
+ * @param checkIn - Check-in date in YYYY-MM-DD format
1727
+ * @param checkOut - Check-out date in YYYY-MM-DD format
1728
+ * @returns Map of roomId to array of amenity categories
1729
+ */
1730
+ async fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut) {
1731
+ if (!this.page)
1732
+ return new Map();
1733
+ try {
1734
+ const result = await this.page.evaluate(async ({ hotelId, checkIn, checkOut }) => {
1735
+ const query = `
1736
+ query RoomPageDesktopRDS($rdsInput: RDSRoomDetailQueryInput!) {
1737
+ roomDetail(roomDetailQueryInput: $rdsInput) {
1738
+ categorizedFacilitiesForAllRooms {
1739
+ roomId
1740
+ categorizedFacilities {
1741
+ category
1742
+ facilities {
1743
+ name
1744
+ id
1745
+ }
1746
+ }
1747
+ }
1748
+ }
1749
+ }
1750
+ `;
1751
+ const variables = {
1752
+ rdsInput: {
1753
+ hotelId: String(hotelId),
1754
+ searchConfig: {
1755
+ searchConfigDate: {
1756
+ checkin: checkIn,
1757
+ checkout: checkOut,
1758
+ },
1759
+ nbRooms: 1,
1760
+ nbAdults: 2,
1761
+ nbChildren: 0,
1762
+ childrenAges: [],
1763
+ },
1764
+ highlightedBlocks: [],
1765
+ selectedFilters: '',
1766
+ travelReason: 'LEISURE',
1767
+ },
1768
+ };
1769
+ try {
1770
+ const response = await fetch('/dml/graphql', {
1771
+ method: 'POST',
1772
+ headers: {
1773
+ 'Content-Type': 'application/json',
1774
+ 'x-booking-topic': 'capla_browser_b-property-web-property-page',
1775
+ 'x-booking-context-action-name': 'hotel',
1776
+ 'apollographql-client-name': 'b-property-web-property-page_rust',
1777
+ },
1778
+ body: JSON.stringify({
1779
+ operationName: 'RoomPageDesktopRDS',
1780
+ variables,
1781
+ query,
1782
+ }),
1783
+ });
1784
+ if (!response.ok) {
1785
+ return { error: `HTTP ${response.status}` };
1786
+ }
1787
+ const data = await response.json();
1788
+ return data;
1789
+ }
1790
+ catch (e) {
1791
+ return { error: e instanceof Error ? e.message : 'Unknown error' };
1792
+ }
1793
+ }, { hotelId, checkIn, checkOut });
1794
+ if ('error' in result) {
1795
+ logger.debug({ error: result.error }, 'GraphQL room facilities fetch failed');
1796
+ return new Map();
1797
+ }
1798
+ // Parse the response into our map structure
1799
+ const facilitiesMap = new Map();
1800
+ const roomData = result?.data?.roomDetail?.categorizedFacilitiesForAllRooms || [];
1801
+ for (const room of roomData) {
1802
+ const roomId = String(room.roomId);
1803
+ const categories = [];
1804
+ for (const cat of room.categorizedFacilities || []) {
1805
+ categories.push({
1806
+ category: cat.category || 'General',
1807
+ items: (cat.facilities || []).map((f) => f.name || '').filter(Boolean),
1808
+ });
1809
+ }
1810
+ if (categories.length > 0) {
1811
+ facilitiesMap.set(roomId, categories);
1812
+ }
1813
+ }
1814
+ logger.debug({ roomCount: facilitiesMap.size }, 'Fetched room facilities via GraphQL');
1815
+ return facilitiesMap;
1816
+ }
1817
+ catch (error) {
1818
+ logger.debug({ error }, 'Failed to fetch room facilities via GraphQL');
1819
+ return new Map();
1820
+ }
1821
+ }
1822
+ /**
1823
+ * Extract hotel ID from the current page URL or DOM.
1824
+ * Booking.com hotel IDs are typically in the URL path or data attributes.
1825
+ */
1826
+ async extractHotelId() {
1827
+ if (!this.page)
1828
+ return null;
1829
+ return await this.page.evaluate(() => {
1830
+ // Try to get from URL path (e.g., /hotel/fr/hotel-name.html?... contains ID in data)
1831
+ // Actually, the ID is often in data attributes or Apollo cache
1832
+ // Method 1: Look for data-hotel-id attribute
1833
+ const hotelIdEl = document.querySelector('[data-hotel-id]');
1834
+ if (hotelIdEl) {
1835
+ return hotelIdEl.getAttribute('data-hotel-id');
1836
+ }
1837
+ // Method 2: Look in Apollo cache
1838
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1839
+ const w = window;
1840
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1841
+ if (cache) {
1842
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1843
+ if (propertyKey) {
1844
+ const match = propertyKey.match(/Property:\{"id":(\d+)\}/);
1845
+ if (match && match[1])
1846
+ return match[1];
1847
+ }
1848
+ }
1849
+ // Method 3: Look for form inputs with hotel_id
1850
+ const hotelInput = document.querySelector('input[name="hotel_id"]');
1851
+ if (hotelInput?.value)
1852
+ return hotelInput.value;
1853
+ // Method 4: Look in data-block-id attributes (format: roomTypeId_policyId_hotelId_...)
1854
+ const blockEl = document.querySelector('[data-block-id]');
1855
+ if (blockEl) {
1856
+ const blockId = blockEl.getAttribute('data-block-id') || '';
1857
+ const parts = blockId.split('_');
1858
+ // Hotel ID is typically in position 2 (after roomTypeId and policyId)
1859
+ const potentialHotelId = parts[2];
1860
+ if (parts.length >= 3 && potentialHotelId && /^\d{5,}$/.test(potentialHotelId)) {
1861
+ return potentialHotelId;
1862
+ }
1863
+ }
1864
+ return null;
1865
+ });
1866
+ }
1867
+ /**
1868
+ * Extract reviews data from Booking.com's Apollo GraphQL cache.
1869
+ * Returns null if extraction fails (triggering DOM fallback).
1870
+ */
1871
+ async extractReviewsFromAPI() {
1872
+ if (!this.page)
1873
+ return null;
1874
+ return await this.page.evaluate(() => {
1875
+ try {
1876
+ // Access the Apollo cache embedded in the page
1877
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1878
+ const w = window;
1879
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1880
+ if (!cache)
1881
+ return null;
1882
+ // Helper to resolve __ref pointers
1883
+ const resolveRef = (ref) => {
1884
+ if (ref && typeof ref === 'object' && '__ref' in ref) {
1885
+ return cache[ref.__ref];
1886
+ }
1887
+ return ref;
1888
+ };
1889
+ // Find the Property entry - it has a key like 'Property:{"id":6523595}'
1890
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1891
+ if (!propertyKey)
1892
+ return null;
1893
+ const property = cache[propertyKey];
1894
+ if (!property)
1895
+ return null;
1896
+ // Extract hotel ID from the property key
1897
+ const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
1898
+ const hotelId = idMatch ? idMatch[1] : null;
1899
+ // Get BasicPropertyData for hotel name
1900
+ const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
1901
+ const basicData = basicDataKey ? cache[basicDataKey] : null;
1902
+ // Extract hotel name
1903
+ const hotelName = property.name || basicData?.name || '';
1904
+ // Extract overall rating, total reviews, and rating breakdown from property.reviews
1905
+ let overallRating = null;
1906
+ let totalReviews = 0;
1907
+ const ratingBreakdown = {
1908
+ staff: null,
1909
+ facilities: null,
1910
+ cleanliness: null,
1911
+ comfort: null,
1912
+ valueForMoney: null,
1913
+ location: null,
1914
+ freeWifi: null,
1915
+ };
1916
+ const reviewsData = property.reviews;
1917
+ if (reviewsData) {
1918
+ totalReviews = typeof reviewsData.reviewsCount === 'number' ? reviewsData.reviewsCount : 0;
1919
+ // Map question names to breakdown fields
1920
+ const questionMap = {
1921
+ 'hotel_staff': 'staff',
1922
+ 'hotel_services': 'facilities',
1923
+ 'hotel_clean': 'cleanliness',
1924
+ 'hotel_comfort': 'comfort',
1925
+ 'hotel_value': 'valueForMoney',
1926
+ 'hotel_location': 'location',
1927
+ 'hotel_free_wifi': 'freeWifi',
1928
+ };
1929
+ const questions = reviewsData.questions;
1930
+ if (Array.isArray(questions)) {
1931
+ for (const q of questions) {
1932
+ if (!q?.name || typeof q.score !== 'number')
1933
+ continue;
1934
+ if (q.name === 'total') {
1935
+ overallRating = q.score;
1936
+ }
1937
+ else {
1938
+ const breakdownKey = questionMap[q.name];
1939
+ if (breakdownKey) {
1940
+ ratingBreakdown[breakdownKey] = q.score;
1941
+ }
1942
+ }
1943
+ }
1944
+ }
1945
+ }
1946
+ // Extract individual reviews from FeaturedReview entries
1947
+ const reviews = [];
1948
+ // Map customer types to display names
1949
+ const customerTypeMap = {
1950
+ 'SOLO_TRAVELLER': 'Solo traveler',
1951
+ 'YOUNG_COUPLE': 'Couple',
1952
+ 'MATURE_COUPLE': 'Couple',
1953
+ 'FAMILY_WITH_YOUNG_CHILDREN': 'Family with young children',
1954
+ 'FAMILY_WITH_OLDER_CHILDREN': 'Family with older children',
1955
+ 'WITH_FRIENDS': 'Group of friends',
1956
+ 'BUSINESS': 'Business traveler',
1957
+ };
1958
+ // Map country codes to names
1959
+ const countryCodeMap = {
1960
+ 'us': 'United States',
1961
+ 'gb': 'United Kingdom',
1962
+ 'fr': 'France',
1963
+ 'de': 'Germany',
1964
+ 'es': 'Spain',
1965
+ 'it': 'Italy',
1966
+ 'nl': 'Netherlands',
1967
+ 'be': 'Belgium',
1968
+ 'ch': 'Switzerland',
1969
+ 'au': 'Australia',
1970
+ 'ca': 'Canada',
1971
+ 'jp': 'Japan',
1972
+ 'cn': 'China',
1973
+ 'kr': 'South Korea',
1974
+ 'br': 'Brazil',
1975
+ 'mx': 'Mexico',
1976
+ 'in': 'India',
1977
+ 'ru': 'Russia',
1978
+ 'pl': 'Poland',
1979
+ 'se': 'Sweden',
1980
+ 'no': 'Norway',
1981
+ 'dk': 'Denmark',
1982
+ 'fi': 'Finland',
1983
+ 'at': 'Austria',
1984
+ 'pt': 'Portugal',
1985
+ 'gr': 'Greece',
1986
+ 'tr': 'Turkey',
1987
+ 'ie': 'Ireland',
1988
+ 'nz': 'New Zealand',
1989
+ 'za': 'South Africa',
1990
+ 'ar': 'Argentina',
1991
+ 'cl': 'Chile',
1992
+ 'co': 'Colombia',
1993
+ 'th': 'Thailand',
1994
+ 'sg': 'Singapore',
1995
+ 'my': 'Malaysia',
1996
+ 'id': 'Indonesia',
1997
+ 'ph': 'Philippines',
1998
+ 'vn': 'Vietnam',
1999
+ 'ae': 'United Arab Emirates',
2000
+ 'sa': 'Saudi Arabia',
2001
+ 'eg': 'Egypt',
2002
+ 'il': 'Israel',
2003
+ 'cz': 'Czech Republic',
2004
+ 'hu': 'Hungary',
2005
+ 'ro': 'Romania',
2006
+ };
2007
+ // Find all FeaturedReview entries
2008
+ const reviewKeys = Object.keys(cache).filter(k => k.startsWith('FeaturedReview:'));
2009
+ for (const key of reviewKeys) {
2010
+ const review = cache[key];
2011
+ if (!review)
2012
+ continue;
2013
+ // Format the date from Unix timestamp
2014
+ let dateStr = '';
2015
+ if (typeof review.completed === 'number') {
2016
+ const date = new Date(review.completed * 1000);
2017
+ dateStr = date.toLocaleDateString('en-US', {
2018
+ year: 'numeric',
2019
+ month: 'long',
2020
+ day: 'numeric'
2021
+ });
2022
+ }
2023
+ // Get room type from ref
2024
+ let roomType = '';
2025
+ const roomRef = resolveRef(review.roomType);
2026
+ if (roomRef && typeof roomRef === 'object' && 'name' in roomRef) {
2027
+ roomType = roomRef.name || '';
2028
+ }
2029
+ // Get country name from code
2030
+ const countryCode = (review.guestCountryCode || '').toLowerCase();
2031
+ const country = countryCodeMap[countryCode] || countryCode.toUpperCase();
2032
+ // Get traveler type display name
2033
+ const travelerType = customerTypeMap[review.customerType] || review.customerType || '';
2034
+ reviews.push({
2035
+ title: review.title || '',
2036
+ rating: typeof review.averageScore === 'number' ? review.averageScore : null,
2037
+ date: dateStr,
2038
+ travelerType,
2039
+ stayDate: '', // Not available in FeaturedReview
2040
+ roomType,
2041
+ nightsStayed: '', // Not available in FeaturedReview
2042
+ positive: review.positiveText || '',
2043
+ negative: review.negativeText || '',
2044
+ country,
2045
+ });
2046
+ }
2047
+ // Sort reviews by date (newest first - higher timestamp = newer)
2048
+ reviews.sort((a, b) => {
2049
+ // Parse dates back for comparison
2050
+ const dateA = new Date(a.date).getTime() || 0;
2051
+ const dateB = new Date(b.date).getTime() || 0;
2052
+ return dateB - dateA;
2053
+ });
2054
+ // Validate we have meaningful data
2055
+ if (!hotelName || hotelName.length < 3) {
2056
+ return null;
2057
+ }
2058
+ return {
2059
+ hotelName,
2060
+ overallRating,
2061
+ totalReviews,
2062
+ ratingBreakdown,
2063
+ reviews,
2064
+ };
2065
+ }
2066
+ catch {
2067
+ return null;
2068
+ }
2069
+ });
2070
+ }
990
2071
  scoreAndFilterHotels(hotels, filters) {
991
2072
  return hotels
992
2073
  .map((hotel) => {
@@ -1190,6 +2271,16 @@ export class HotelBrowser {
1190
2271
  await this.page.waitForTimeout(2000);
1191
2272
  await this.checkForBlocking();
1192
2273
  await this.dismissPopups();
2274
+ // Try API extraction first (more reliable structured data)
2275
+ const apiDetails = await this.extractHotelDetailsFromAPI();
2276
+ if (apiDetails) {
2277
+ logger.debug("Successfully extracted hotel details from API cache");
2278
+ return {
2279
+ ...apiDetails,
2280
+ url: hotelUrl,
2281
+ };
2282
+ }
2283
+ logger.debug("API extraction returned no results, falling back to DOM scraping");
1193
2284
  // Extract comprehensive hotel details using evaluate with string to avoid __name compilation issues
1194
2285
  const details = await this.page.evaluate(`
1195
2286
  (function() {
@@ -1473,7 +2564,7 @@ export class HotelBrowser {
1473
2564
  await this.page.waitForTimeout(2000);
1474
2565
  await this.checkForBlocking();
1475
2566
  await this.dismissPopups();
1476
- // Extract room availability using string-based evaluate
2567
+ // Extract room availability using data attributes (primary) with DOM fallback
1477
2568
  const result = await this.page.evaluate(`
1478
2569
  (function() {
1479
2570
  function getText(selector) {
@@ -1485,152 +2576,292 @@ export class HotelBrowser {
1485
2576
  var hotelName = getText('h2') || getText('h1').split('(')[0].trim() || "Unknown Hotel";
1486
2577
 
1487
2578
  var roomOptions = [];
1488
- var seenRooms = {};
1489
2579
 
1490
- // Strategy 1: Look for room type links (most reliable on Booking.com)
1491
- var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
2580
+ // ============================================================
2581
+ // STRATEGY 1: Extract from data-* attributes (most reliable)
2582
+ // Uses data-block-id, data-hotel-rounded-price, and data-fltrs
2583
+ // ============================================================
2584
+
2585
+ // First, build maps of room type IDs to room names and bed types from header rows
2586
+ var roomNameMap = {};
2587
+ var bedTypeMap = {};
2588
+ var roomTypeHeaders = document.querySelectorAll('.hprt-roomtype-link');
2589
+ for (var h = 0; h < roomTypeHeaders.length; h++) {
2590
+ var header = roomTypeHeaders[h];
2591
+ var headerRow = header.closest('tr');
2592
+ var headerBlockId = headerRow ? headerRow.getAttribute('data-block-id') : null;
2593
+ if (headerBlockId && headerBlockId.indexOf('_') > 0) {
2594
+ var headerRoomTypeId = headerBlockId.split('_')[0];
2595
+ var headerRoomName = header.textContent ? header.textContent.trim() : '';
2596
+ if (headerRoomName) {
2597
+ roomNameMap[headerRoomTypeId] = headerRoomName;
2598
+ }
2599
+ // Also capture bed type from header row
2600
+ var bedEl = headerRow.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
2601
+ if (bedEl) {
2602
+ var bedText = bedEl.textContent || '';
2603
+ var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
2604
+ for (var b = 0; b < bedLines.length; b++) {
2605
+ if (bedLines[b].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
2606
+ bedTypeMap[headerRoomTypeId] = bedLines[b];
2607
+ break;
2608
+ }
2609
+ }
2610
+ }
2611
+ }
2612
+ }
2613
+
2614
+ // Extract all room blocks with data-hotel-rounded-price attribute
2615
+ // Returns ALL rate options (room + meal plan + cancellation combinations)
2616
+ var dataRows = document.querySelectorAll('tr[data-block-id][data-hotel-rounded-price]');
2617
+ var seenBlockIds = {}; // Track exact block IDs to avoid true duplicates
1492
2618
 
1493
- for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
1494
- var roomLink = roomTypeLinks[i];
1495
- var name = roomLink.textContent.trim();
2619
+ for (var i = 0; i < dataRows.length && roomOptions.length < 30; i++) {
2620
+ var row = dataRows[i];
2621
+ var blockId = row.getAttribute('data-block-id') || '';
2622
+ var parts = blockId.split('_');
2623
+ if (parts.length < 2) continue;
1496
2624
 
1497
- if (!name || name.length < 3 || seenRooms[name]) continue;
1498
- seenRooms[name] = true;
2625
+ // Skip exact duplicate block IDs
2626
+ if (seenBlockIds[blockId]) continue;
2627
+ seenBlockIds[blockId] = true;
1499
2628
 
1500
- // Find the containing row to get price and details
1501
- var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
1502
- var rowText = row ? row.textContent || "" : "";
2629
+ var roomTypeId = parts[0];
1503
2630
 
1504
- // Try to find price in the same row or nearby
1505
- var price = null;
1506
- var priceDisplay = "";
2631
+ // Get price from data attribute (more reliable than DOM text)
2632
+ var roundedPrice = row.getAttribute('data-hotel-rounded-price');
2633
+ var price = roundedPrice ? parseInt(roundedPrice, 10) : null;
1507
2634
 
1508
- // Look for price cell in this row or next siblings
1509
- var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
1510
- if (priceCell) {
1511
- priceDisplay = priceCell.textContent.trim();
1512
- var match = priceDisplay.match(/[\\$€£¥]\\s*([\\d,]+)/);
1513
- if (match) {
1514
- price = parseInt(match[1].replace(/,/g, ""));
1515
- // Clean up price display
1516
- var perNightMatch = priceDisplay.match(/[\\$€£¥]\\s*[\\d,]+/);
1517
- priceDisplay = perNightMatch ? perNightMatch[0] : priceDisplay.split('\\n')[0];
1518
- }
2635
+ // Get price display from DOM
2636
+ var priceDisplay = '';
2637
+ var priceEl = row.querySelector('.bui-price-display__value');
2638
+ if (priceEl) {
2639
+ var displayMatch = (priceEl.textContent || '').match(/[\\$€£¥][\\d,]+/);
2640
+ priceDisplay = displayMatch ? displayMatch[0] : '';
1519
2641
  }
1520
2642
 
1521
- // If no price found in row, search in sibling rows with same room type
1522
- if (!price) {
1523
- var allPriceCells = document.querySelectorAll('.hprt-table-cell-price');
1524
- for (var j = 0; j < allPriceCells.length && !price; j++) {
1525
- var cellText = allPriceCells[j].textContent || "";
1526
- var match = cellText.match(/[\\$€£¥]\\s*([\\d,]+)/);
1527
- if (match) {
1528
- price = parseInt(match[1].replace(/,/g, ""));
1529
- priceDisplay = match[0];
1530
- break;
1531
- }
1532
- }
2643
+ // Get room name from our map
2644
+ var roomName = roomNameMap[roomTypeId] || '';
2645
+
2646
+ // If no name in map, try to find it in the row
2647
+ if (!roomName) {
2648
+ var roomLink = row.querySelector('.hprt-roomtype-link, a[class*="room"]');
2649
+ roomName = roomLink ? (roomLink.textContent || '').trim() : '';
1533
2650
  }
1534
2651
 
1535
- // Bed type - clean up multiline text
1536
- var bedType = "";
1537
- var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
2652
+ // Still no name? Use a generic one
2653
+ if (!roomName) {
2654
+ roomName = 'Room Type ' + roomTypeId;
2655
+ }
2656
+
2657
+ // Parse data-fltrs for structured info (breakfast, beds)
2658
+ var fltrs = row.getAttribute('data-fltrs');
2659
+ var breakfastIncluded = false;
2660
+ var bedCount = [];
2661
+
2662
+ if (fltrs) {
2663
+ try {
2664
+ var fltrData = JSON.parse(fltrs.replace(/\\n/g, ''));
2665
+ breakfastIncluded = fltrData.breakfast_included === 1;
2666
+ bedCount = fltrData.bed_count || [];
2667
+ } catch (e) {}
2668
+ }
2669
+
2670
+ // Get bed type from DOM (for display)
2671
+ var bedType = '';
2672
+ var bedEl = row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
1538
2673
  if (bedEl) {
1539
- // Get first meaningful line
1540
- var bedText = bedEl.textContent || "";
2674
+ var bedText = bedEl.textContent || '';
1541
2675
  var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
1542
- // Find line with bed info
1543
2676
  for (var k = 0; k < bedLines.length; k++) {
1544
2677
  if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
1545
2678
  bedType = bedLines[k];
1546
2679
  break;
1547
2680
  }
1548
2681
  }
1549
- if (!bedType && bedLines.length > 0) {
1550
- bedType = bedLines[0];
1551
- }
2682
+ }
2683
+ // Fallback 1: use bed type from our map (captured from header rows)
2684
+ if (!bedType && bedTypeMap[roomTypeId]) {
2685
+ bedType = bedTypeMap[roomTypeId];
2686
+ }
2687
+ // Fallback 2: use bed count from data-fltrs
2688
+ if (!bedType && bedCount.length > 0) {
2689
+ bedType = bedCount.length === 1 ? bedCount[0] + ' bed' : bedCount.join(' or ') + ' beds';
1552
2690
  }
1553
2691
 
1554
- // Cancellation
1555
- var cancellation = "";
1556
- if (rowText.toLowerCase().indexOf("free cancellation") >= 0) {
1557
- cancellation = "Free cancellation";
1558
- } else if (rowText.toLowerCase().indexOf("non-refundable") >= 0) {
1559
- cancellation = "Non-refundable";
2692
+ // Get cancellation policy from row text
2693
+ var rowText = row.textContent || '';
2694
+ var rowTextLower = rowText.toLowerCase();
2695
+ var cancellation = '';
2696
+ if (rowTextLower.indexOf('free cancellation') >= 0) {
2697
+ cancellation = 'Free cancellation';
2698
+ } else if (rowTextLower.indexOf('non-refundable') >= 0) {
2699
+ cancellation = 'Non-refundable';
1560
2700
  }
1561
2701
 
1562
- // Breakfast
1563
- var breakfast = "";
1564
- if (rowText.toLowerCase().indexOf("breakfast included") >= 0) {
1565
- breakfast = "Breakfast included";
1566
- } else if (rowText.toLowerCase().indexOf("room only") >= 0) {
1567
- breakfast = "Room only";
2702
+ // Get breakfast info (prefer data-fltrs, fallback to DOM text)
2703
+ var breakfast = '';
2704
+ if (breakfastIncluded) {
2705
+ breakfast = 'Breakfast included';
2706
+ } else if (rowTextLower.indexOf('breakfast included') >= 0) {
2707
+ breakfast = 'Breakfast included';
2708
+ } else if (rowTextLower.indexOf('room only') >= 0) {
2709
+ breakfast = 'Room only';
1568
2710
  }
1569
2711
 
1570
- // Occupancy
2712
+ // Get occupancy
1571
2713
  var sleeps = null;
1572
- var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
2714
+ var occupancyEl = row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info');
1573
2715
  if (occupancyEl) {
1574
- var occMatch = occupancyEl.textContent.match(/(\\d+)/);
1575
- sleeps = occMatch ? parseInt(occMatch[1]) : null;
2716
+ var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
2717
+ sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
1576
2718
  }
1577
2719
 
2720
+ // Build features array
2721
+ var features = [];
2722
+ if (breakfast) features.push(breakfast);
2723
+ if (cancellation) features.push(cancellation);
2724
+
1578
2725
  roomOptions.push({
1579
- name: name,
2726
+ name: roomName,
1580
2727
  price: price,
1581
2728
  priceDisplay: priceDisplay,
1582
2729
  sleeps: sleeps,
1583
- features: [],
2730
+ features: features,
1584
2731
  bedType: bedType,
1585
2732
  cancellation: cancellation,
1586
- breakfast: breakfast
2733
+ breakfast: breakfast,
2734
+ roomTypeId: roomTypeId
1587
2735
  });
1588
2736
  }
1589
2737
 
1590
- // Strategy 2: If no rooms found, try data-block-id elements
2738
+ // ============================================================
2739
+ // STRATEGY 2: Fallback to DOM scraping if data attributes failed
2740
+ // ============================================================
1591
2741
  if (roomOptions.length === 0) {
2742
+ var seenRooms = {};
2743
+ var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
2744
+
2745
+ for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
2746
+ var roomLink = roomTypeLinks[i];
2747
+ var name = roomLink.textContent ? roomLink.textContent.trim() : '';
2748
+
2749
+ if (!name || name.length < 3 || seenRooms[name]) continue;
2750
+ seenRooms[name] = true;
2751
+
2752
+ var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
2753
+ var rowText = row ? row.textContent || '' : '';
2754
+
2755
+ // Try to find price
2756
+ var price = null;
2757
+ var priceDisplay = '';
2758
+ var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
2759
+ if (priceCell) {
2760
+ var match = (priceCell.textContent || '').match(/[\\$€£¥]\\s*([\\d,]+)/);
2761
+ if (match) {
2762
+ price = parseInt(match[1].replace(/,/g, ''), 10);
2763
+ priceDisplay = match[0];
2764
+ }
2765
+ }
2766
+
2767
+ // Bed type
2768
+ var bedType = '';
2769
+ var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
2770
+ if (bedEl) {
2771
+ var bedText = bedEl.textContent || '';
2772
+ var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
2773
+ for (var k = 0; k < bedLines.length; k++) {
2774
+ if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
2775
+ bedType = bedLines[k];
2776
+ break;
2777
+ }
2778
+ }
2779
+ }
2780
+
2781
+ // Cancellation and breakfast from text
2782
+ var rowTextLower = rowText.toLowerCase();
2783
+ var cancellation = '';
2784
+ if (rowTextLower.indexOf('free cancellation') >= 0) {
2785
+ cancellation = 'Free cancellation';
2786
+ } else if (rowTextLower.indexOf('non-refundable') >= 0) {
2787
+ cancellation = 'Non-refundable';
2788
+ }
2789
+
2790
+ var breakfast = '';
2791
+ if (rowTextLower.indexOf('breakfast included') >= 0) {
2792
+ breakfast = 'Breakfast included';
2793
+ } else if (rowTextLower.indexOf('room only') >= 0) {
2794
+ breakfast = 'Room only';
2795
+ }
2796
+
2797
+ // Occupancy
2798
+ var sleeps = null;
2799
+ var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
2800
+ if (occupancyEl) {
2801
+ var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
2802
+ sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
2803
+ }
2804
+
2805
+ roomOptions.push({
2806
+ name: name,
2807
+ price: price,
2808
+ priceDisplay: priceDisplay,
2809
+ sleeps: sleeps,
2810
+ features: [],
2811
+ bedType: bedType,
2812
+ cancellation: cancellation,
2813
+ breakfast: breakfast
2814
+ });
2815
+ }
2816
+ }
2817
+
2818
+ // ============================================================
2819
+ // STRATEGY 3: Last resort - look for any data-block-id elements
2820
+ // ============================================================
2821
+ if (roomOptions.length === 0) {
2822
+ var seenBlocks = {};
1592
2823
  var blocks = document.querySelectorAll('[data-block-id]');
1593
2824
  for (var i = 0; i < blocks.length && roomOptions.length < 10; i++) {
1594
2825
  var block = blocks[i];
1595
- var blockText = block.textContent || "";
2826
+ var blockId = block.getAttribute('data-block-id') || '';
2827
+ if (!blockId || blockId === 'header_survey') continue;
1596
2828
 
1597
- // Look for any room name pattern
2829
+ var blockText = block.textContent || '';
1598
2830
  var nameEl = block.querySelector('a[class*="room"], span[class*="room-name"]');
1599
- var name = nameEl ? nameEl.textContent.trim() : "";
2831
+ var name = nameEl ? (nameEl.textContent || '').trim() : '';
1600
2832
 
1601
2833
  if (!name) {
1602
- // Try to extract from block text
1603
2834
  var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
1604
- name = lines[0] ? lines[0].trim().slice(0, 50) : "";
2835
+ name = lines[0] ? lines[0].trim().slice(0, 50) : '';
1605
2836
  }
1606
2837
 
1607
- if (!name || name.length < 3 || seenRooms[name]) continue;
1608
- seenRooms[name] = true;
2838
+ if (!name || name.length < 3 || seenBlocks[name]) continue;
2839
+ seenBlocks[name] = true;
1609
2840
 
1610
2841
  var priceMatch = blockText.match(/[\\$€£¥]\\s*([\\d,]+)/);
1611
- var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, "")) : null;
2842
+ var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, ''), 10) : null;
1612
2843
 
1613
2844
  roomOptions.push({
1614
2845
  name: name,
1615
2846
  price: price,
1616
- priceDisplay: priceMatch ? priceMatch[0] : "",
2847
+ priceDisplay: priceMatch ? priceMatch[0] : '',
1617
2848
  sleeps: null,
1618
2849
  features: [],
1619
- bedType: "",
1620
- cancellation: "",
1621
- breakfast: ""
2850
+ bedType: '',
2851
+ cancellation: '',
2852
+ breakfast: ''
1622
2853
  });
1623
2854
  }
1624
2855
  }
1625
2856
 
1626
2857
  // Check for "no availability" message
1627
- var bodyText = document.body.textContent || "";
2858
+ var bodyText = document.body.textContent || '';
1628
2859
  var noAvailability =
1629
- bodyText.indexOf("no availability") >= 0 ||
1630
- bodyText.indexOf("sold out") >= 0 ||
1631
- bodyText.indexOf("no rooms available") >= 0 ||
1632
- bodyText.indexOf("fully booked") >= 0 ||
1633
- bodyText.indexOf("We have no availability") >= 0;
2860
+ bodyText.indexOf('no availability') >= 0 ||
2861
+ bodyText.indexOf('sold out') >= 0 ||
2862
+ bodyText.indexOf('no rooms available') >= 0 ||
2863
+ bodyText.indexOf('fully booked') >= 0 ||
2864
+ bodyText.indexOf('We have no availability') >= 0;
1634
2865
 
1635
2866
  return {
1636
2867
  hotelName: hotelName,
@@ -1639,6 +2870,45 @@ export class HotelBrowser {
1639
2870
  };
1640
2871
  })()
1641
2872
  `);
2873
+ // Enrich room options with facilities from GraphQL API
2874
+ // This provides detailed amenities (AC, TV, bathroom, etc.) per room type
2875
+ if (result.roomOptions.length > 0) {
2876
+ try {
2877
+ const hotelId = await this.extractHotelId();
2878
+ if (hotelId) {
2879
+ const facilitiesMap = await this.fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut);
2880
+ if (facilitiesMap.size > 0) {
2881
+ // Merge facilities into room options based on roomTypeId
2882
+ // Room type IDs are the first 9 digits of the full room ID (e.g., 652359501 -> 652359501)
2883
+ for (const room of result.roomOptions) {
2884
+ if (room.roomTypeId) {
2885
+ // Try exact match first
2886
+ let facilities = facilitiesMap.get(room.roomTypeId);
2887
+ // If not found, the GraphQL returns full room IDs (e.g., 652359501)
2888
+ // while our roomTypeId might be just the prefix
2889
+ if (!facilities) {
2890
+ // Find a matching key that starts with our roomTypeId
2891
+ for (const [key, value] of facilitiesMap) {
2892
+ if (key.startsWith(room.roomTypeId) || room.roomTypeId.startsWith(key)) {
2893
+ facilities = value;
2894
+ break;
2895
+ }
2896
+ }
2897
+ }
2898
+ if (facilities) {
2899
+ room.amenities = facilities;
2900
+ }
2901
+ }
2902
+ }
2903
+ logger.debug({ enrichedRooms: result.roomOptions.filter(r => r.amenities).length }, 'Enriched room options with GraphQL facilities');
2904
+ }
2905
+ }
2906
+ }
2907
+ catch (error) {
2908
+ // Non-fatal: continue without facilities enrichment
2909
+ logger.debug({ error }, 'Failed to enrich rooms with GraphQL facilities');
2910
+ }
2911
+ }
1642
2912
  // Determine availability and lowest price
1643
2913
  const available = result.roomOptions.length > 0 && !result.noAvailabilityDetected;
1644
2914
  const prices = result.roomOptions
@@ -1686,7 +2956,8 @@ export class HotelBrowser {
1686
2956
  if (!this.page)
1687
2957
  throw new Error("Browser not initialized");
1688
2958
  // Navigate to hotel page
1689
- const cleanUrl = hotelUrl.split("?")[0].split("#")[0];
2959
+ const urlParts = hotelUrl.split("?")[0];
2960
+ const cleanUrl = urlParts?.split("#")[0] ?? hotelUrl;
1690
2961
  await this.page.goto(cleanUrl, {
1691
2962
  waitUntil: "domcontentloaded",
1692
2963
  timeout: 60000,
@@ -1705,52 +2976,112 @@ export class HotelBrowser {
1705
2976
  catch { }
1706
2977
  await this.page.keyboard.press("Escape");
1707
2978
  await this.page.waitForTimeout(500);
1708
- // Get overall rating info from main page before opening modal
1709
- const mainPageData = await this.page.evaluate(`
1710
- (function() {
1711
- var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
1712
-
1713
- // Hotel name
1714
- var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
1715
- results.hotelName = nameEl?.textContent?.trim() || '';
1716
-
1717
- // Overall rating and total reviews from review-score-component
1718
- var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
1719
- if (scoreComponent) {
1720
- var text = scoreComponent.textContent || '';
1721
- // Extract score (e.g., "Scored 9.1 9.1..." -> 9.1)
1722
- var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
1723
- if (scoreMatch) {
1724
- results.overallRating = parseFloat(scoreMatch[1]);
1725
- }
1726
- // Extract total reviews (e.g., "1,043 reviews")
1727
- var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
1728
- if (reviewCountMatch) {
1729
- results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
1730
- }
1731
- }
1732
-
1733
- // Rating breakdown categories
1734
- var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
1735
- breakdownEls.forEach(function(el) {
1736
- var text = el.textContent?.trim() || '';
1737
- var parts = text.split(/\\s+/);
1738
- if (parts.length >= 2) {
1739
- var score = parseFloat(parts[parts.length - 1]);
1740
- var category = parts.slice(0, -1).join(' ').toLowerCase();
1741
- if (category.includes('staff')) results.breakdown.staff = score;
1742
- else if (category.includes('facilities')) results.breakdown.facilities = score;
1743
- else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
1744
- else if (category.includes('comfort')) results.breakdown.comfort = score;
1745
- else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
1746
- else if (category.includes('location')) results.breakdown.location = score;
1747
- else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
2979
+ // Try API extraction first for basic review data
2980
+ // API provides: hotel name, overall rating, rating breakdown, and featured reviews
2981
+ // Note: API reviews are limited to what's in cache (~6-10 reviews), sorted by newest
2982
+ const apiData = await this.extractReviewsFromAPI();
2983
+ // Determine if we can use API data directly or need DOM fallback
2984
+ // Use API if: we have enough reviews AND no special sorting/filtering is requested
2985
+ const canUseApiOnly = apiData &&
2986
+ apiData.reviews.length >= limit &&
2987
+ sortBy === "recent" &&
2988
+ !filterBy;
2989
+ if (canUseApiOnly) {
2990
+ logger.debug("Using API extraction for reviews (sufficient data, no filters)");
2991
+ const reviewsResult = {
2992
+ hotelName: apiData.hotelName,
2993
+ overallRating: apiData.overallRating,
2994
+ totalReviews: apiData.totalReviews,
2995
+ ratingBreakdown: apiData.ratingBreakdown,
2996
+ reviews: apiData.reviews.slice(0, limit),
2997
+ url: cleanUrl,
2998
+ };
2999
+ await this.saveSession();
3000
+ return reviewsResult;
3001
+ }
3002
+ // Use API data for metadata if available, but get reviews from DOM
3003
+ // This gives us accurate rating breakdown from API + more reviews from DOM
3004
+ const baseData = apiData || {
3005
+ hotelName: '',
3006
+ overallRating: null,
3007
+ totalReviews: 0,
3008
+ ratingBreakdown: {
3009
+ staff: null,
3010
+ facilities: null,
3011
+ cleanliness: null,
3012
+ comfort: null,
3013
+ valueForMoney: null,
3014
+ location: null,
3015
+ freeWifi: null,
3016
+ },
3017
+ };
3018
+ // If API didn't give us hotel info, get it from DOM
3019
+ if (!baseData.hotelName) {
3020
+ const mainPageData = await this.page.evaluate(`
3021
+ (function() {
3022
+ var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
3023
+
3024
+ // Hotel name
3025
+ var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
3026
+ results.hotelName = nameEl?.textContent?.trim() || '';
3027
+
3028
+ // Overall rating and total reviews from review-score-component
3029
+ var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
3030
+ if (scoreComponent) {
3031
+ var text = scoreComponent.textContent || '';
3032
+ var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
3033
+ if (scoreMatch) {
3034
+ results.overallRating = parseFloat(scoreMatch[1]);
3035
+ }
3036
+ var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
3037
+ if (reviewCountMatch) {
3038
+ results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
3039
+ }
1748
3040
  }
1749
- });
1750
-
1751
- return results;
1752
- })()
1753
- `);
3041
+
3042
+ // Rating breakdown categories
3043
+ var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
3044
+ breakdownEls.forEach(function(el) {
3045
+ var text = el.textContent?.trim() || '';
3046
+ var parts = text.split(/\\s+/);
3047
+ if (parts.length >= 2) {
3048
+ var score = parseFloat(parts[parts.length - 1]);
3049
+ var category = parts.slice(0, -1).join(' ').toLowerCase();
3050
+ if (category.includes('staff')) results.breakdown.staff = score;
3051
+ else if (category.includes('facilities')) results.breakdown.facilities = score;
3052
+ else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
3053
+ else if (category.includes('comfort')) results.breakdown.comfort = score;
3054
+ else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
3055
+ else if (category.includes('location')) results.breakdown.location = score;
3056
+ else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
3057
+ }
3058
+ });
3059
+
3060
+ return results;
3061
+ })()
3062
+ `);
3063
+ baseData.hotelName = mainPageData.hotelName;
3064
+ if (baseData.overallRating === null)
3065
+ baseData.overallRating = mainPageData.overallRating;
3066
+ if (baseData.totalReviews === 0)
3067
+ baseData.totalReviews = mainPageData.totalReviews;
3068
+ // Fill in missing rating breakdown from DOM
3069
+ if (baseData.ratingBreakdown.staff === null)
3070
+ baseData.ratingBreakdown.staff = mainPageData.breakdown.staff ?? null;
3071
+ if (baseData.ratingBreakdown.facilities === null)
3072
+ baseData.ratingBreakdown.facilities = mainPageData.breakdown.facilities ?? null;
3073
+ if (baseData.ratingBreakdown.cleanliness === null)
3074
+ baseData.ratingBreakdown.cleanliness = mainPageData.breakdown.cleanliness ?? null;
3075
+ if (baseData.ratingBreakdown.comfort === null)
3076
+ baseData.ratingBreakdown.comfort = mainPageData.breakdown.comfort ?? null;
3077
+ if (baseData.ratingBreakdown.valueForMoney === null)
3078
+ baseData.ratingBreakdown.valueForMoney = mainPageData.breakdown.valueForMoney ?? null;
3079
+ if (baseData.ratingBreakdown.location === null)
3080
+ baseData.ratingBreakdown.location = mainPageData.breakdown.location ?? null;
3081
+ if (baseData.ratingBreakdown.freeWifi === null)
3082
+ baseData.ratingBreakdown.freeWifi = mainPageData.breakdown.freeWifi ?? null;
3083
+ }
3084
+ logger.debug("Using DOM extraction for reviews (need more reviews or filters)");
1754
3085
  // Click "Read all reviews" button to open reviews modal
1755
3086
  const readAllBtn = await this.page.$('[data-testid="fr-read-all-reviews"], [data-testid="review-score-read-all"]');
1756
3087
  if (!readAllBtn) {
@@ -1919,20 +3250,20 @@ export class HotelBrowser {
1919
3250
  return reviews;
1920
3251
  })()
1921
3252
  `);
1922
- // Build rating breakdown with proper null handling
3253
+ // Build rating breakdown from baseData (populated from API or DOM)
1923
3254
  const ratingBreakdown = {
1924
- staff: mainPageData.breakdown.staff ?? null,
1925
- facilities: mainPageData.breakdown.facilities ?? null,
1926
- cleanliness: mainPageData.breakdown.cleanliness ?? null,
1927
- comfort: mainPageData.breakdown.comfort ?? null,
1928
- valueForMoney: mainPageData.breakdown.valueForMoney ?? null,
1929
- location: mainPageData.breakdown.location ?? null,
1930
- freeWifi: mainPageData.breakdown.freeWifi ?? null,
3255
+ staff: baseData.ratingBreakdown.staff,
3256
+ facilities: baseData.ratingBreakdown.facilities,
3257
+ cleanliness: baseData.ratingBreakdown.cleanliness,
3258
+ comfort: baseData.ratingBreakdown.comfort,
3259
+ valueForMoney: baseData.ratingBreakdown.valueForMoney,
3260
+ location: baseData.ratingBreakdown.location,
3261
+ freeWifi: baseData.ratingBreakdown.freeWifi,
1931
3262
  };
1932
3263
  const reviewsResult = {
1933
- hotelName: mainPageData.hotelName,
1934
- overallRating: mainPageData.overallRating,
1935
- totalReviews: mainPageData.totalReviews,
3264
+ hotelName: baseData.hotelName,
3265
+ overallRating: baseData.overallRating,
3266
+ totalReviews: baseData.totalReviews,
1936
3267
  ratingBreakdown,
1937
3268
  reviews: reviews.slice(0, limit),
1938
3269
  url: cleanUrl,
@@ -1962,13 +3293,18 @@ export class HotelBrowser {
1962
3293
  checkIn.setDate(checkIn.getDate() + i);
1963
3294
  const checkOut = new Date(checkIn);
1964
3295
  checkOut.setDate(checkOut.getDate() + 1);
1965
- dates.push({
1966
- checkIn: checkIn.toISOString().split("T")[0],
1967
- checkOut: checkOut.toISOString().split("T")[0],
1968
- });
3296
+ const checkInStr = checkIn.toISOString().split("T")[0];
3297
+ const checkOutStr = checkOut.toISOString().split("T")[0];
3298
+ if (checkInStr && checkOutStr) {
3299
+ dates.push({
3300
+ checkIn: checkInStr,
3301
+ checkOut: checkOutStr,
3302
+ });
3303
+ }
1969
3304
  }
1970
3305
  // Clean the hotel URL
1971
- const cleanUrl = hotelUrl.split("?")[0].split("#")[0];
3306
+ const urlParts = hotelUrl.split("?")[0];
3307
+ const cleanUrl = urlParts?.split("#")[0] ?? hotelUrl;
1972
3308
  // Collect prices for each date
1973
3309
  const prices = [];
1974
3310
  let hotelName = "";
@@ -2109,10 +3445,11 @@ export class HotelBrowser {
2109
3445
  // Calculate end date
2110
3446
  const endDate = new Date(start);
2111
3447
  endDate.setDate(endDate.getDate() + actualNights - 1);
3448
+ const endDateStr = endDate.toISOString().split("T")[0] ?? startDate;
2112
3449
  const priceCalendarResult = {
2113
3450
  hotelName,
2114
3451
  startDate,
2115
- endDate: endDate.toISOString().split("T")[0],
3452
+ endDate: endDateStr,
2116
3453
  nights: actualNights,
2117
3454
  currency,
2118
3455
  prices,