hotelzero 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -39,6 +39,7 @@ export const ErrorCodes = {
39
39
  NETWORK_ERROR: "NETWORK_ERROR",
40
40
  TIMEOUT: "TIMEOUT",
41
41
  BLOCKED: "BLOCKED",
42
+ INVALID_PARAMS: "INVALID_PARAMS",
42
43
  };
43
44
  const DEFAULT_RETRY_CONFIG = {
44
45
  maxRetries: 3,
@@ -763,8 +764,15 @@ export class HotelBrowser {
763
764
  // Scroll to load more results (pass limit to control how many to load)
764
765
  const targetResults = params.limit || 25;
765
766
  await this.scrollToLoadMore(targetResults);
766
- // Extract detailed hotel info
767
- let hotels = await this.extractHotelDetails();
767
+ // Try API-based extraction first (more reliable), fall back to DOM scraping
768
+ let hotels = await this.extractHotelsFromAPI();
769
+ if (hotels.length === 0) {
770
+ logger.debug("API extraction returned no results, falling back to DOM scraping");
771
+ hotels = await this.extractHotelDetails();
772
+ }
773
+ else {
774
+ logger.debug({ hotelCount: hotels.length }, "Hotels extracted from API cache");
775
+ }
768
776
  logger.debug({ hotelCount: hotels.length }, "Hotels extracted from page");
769
777
  // Apply limit to cap results
770
778
  if (params.limit && params.limit > 0) {
@@ -786,6 +794,287 @@ export class HotelBrowser {
786
794
  logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Search attempt failed, retrying");
787
795
  });
788
796
  }
797
+ /**
798
+ * Search for a specific hotel's rate via the search API.
799
+ * This is 100% API-based (no HTML scraping) and returns detailed rate info
800
+ * including room type, meal plan, cancellation policy, and bed configuration.
801
+ *
802
+ * The method searches for the hotel by name and extracts rate details from
803
+ * the Apollo cache's `blocks` array and `matchingUnitConfigurations`.
804
+ *
805
+ * @param hotelUrl - The hotel's URL or name/slug (e.g., "la-sanguine" or full URL)
806
+ * @param checkIn - Check-in date (YYYY-MM-DD)
807
+ * @param checkOut - Check-out date (YYYY-MM-DD)
808
+ * @param guests - Number of guests
809
+ * @param rooms - Number of rooms
810
+ * @param filters - Optional rate filters (breakfast, free cancellation, bed type)
811
+ * @returns Rate details or null if hotel not found in results
812
+ */
813
+ async searchHotelRates(hotelUrl, checkIn, checkOut, guests = 2, rooms = 1, filters) {
814
+ if (!this.page) {
815
+ throw new HotelSearchError("Browser not initialized. Call init() first.", ErrorCodes.BROWSER_NOT_INITIALIZED, false);
816
+ }
817
+ // Extract hotel name from URL for search query
818
+ const hotelName = this.extractHotelNameFromUrl(hotelUrl);
819
+ if (!hotelName) {
820
+ throw new HotelSearchError("Could not extract hotel name from URL", ErrorCodes.INVALID_PARAMS, false);
821
+ }
822
+ logger.info({ hotelName, checkIn, checkOut, guests, rooms, hasFilters: !!filters }, "Searching for hotel rate via API");
823
+ // Build search URL with hotel name as destination
824
+ const searchFilters = {};
825
+ // Apply rate-specific filters
826
+ if (filters?.breakfast) {
827
+ searchFilters.breakfast = true;
828
+ }
829
+ if (filters?.freeCancellation) {
830
+ searchFilters.freeCancellation = true;
831
+ }
832
+ const searchParams = {
833
+ destination: hotelName.replace(/-/g, " "), // "la-sanguine" -> "la sanguine"
834
+ checkIn,
835
+ checkOut,
836
+ guests,
837
+ rooms,
838
+ limit: 10, // Small limit since we're looking for a specific hotel
839
+ };
840
+ const url = this.buildBookingUrl(searchParams, searchFilters);
841
+ logger.debug({ url }, "Hotel rate search URL");
842
+ return await retryWithBackoff(async () => {
843
+ await this.enforceRateLimit();
844
+ try {
845
+ await this.page.goto(url, {
846
+ waitUntil: "networkidle",
847
+ timeout: 30000,
848
+ });
849
+ }
850
+ catch (error) {
851
+ const err = error;
852
+ if (err.message.includes("timeout") || err.message.includes("Timeout")) {
853
+ throw new HotelSearchError("Page load timed out. The server may be slow or unavailable.", ErrorCodes.TIMEOUT, true);
854
+ }
855
+ throw new HotelSearchError(`Navigation failed: ${err.message}`, ErrorCodes.NAVIGATION_FAILED, true);
856
+ }
857
+ await this.page.waitForTimeout(2000);
858
+ await this.checkForBlocking();
859
+ await this.dismissPopups();
860
+ // Extract rate details from Apollo cache
861
+ const rateResult = await this.extractHotelRateFromAPI(hotelName, filters);
862
+ if (rateResult) {
863
+ // Populate search params in result
864
+ rateResult.checkIn = checkIn;
865
+ rateResult.checkOut = checkOut;
866
+ rateResult.guests = guests;
867
+ rateResult.rooms = rooms;
868
+ logger.info({ hotelName: rateResult.hotelName, price: rateResult.price, roomName: rateResult.roomName }, "Hotel rate found via API");
869
+ await this.saveSession();
870
+ return rateResult;
871
+ }
872
+ logger.warn({ hotelName }, "Hotel not found in search results");
873
+ return null;
874
+ }, DEFAULT_RETRY_CONFIG, (attempt, error, delayMs) => {
875
+ logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Hotel rate search failed, retrying");
876
+ });
877
+ }
878
+ /**
879
+ * Extract hotel name/slug from a Booking.com URL.
880
+ * Handles formats like:
881
+ * - https://www.booking.com/hotel/fr/la-sanguine.html
882
+ * - /hotel/fr/la-sanguine.html
883
+ * - la-sanguine
884
+ */
885
+ extractHotelNameFromUrl(urlOrName) {
886
+ // If it's just a name/slug (no slashes), return as-is
887
+ if (!urlOrName.includes("/")) {
888
+ return urlOrName.replace(/\.html$/, "");
889
+ }
890
+ // Extract from URL pattern: /hotel/{country}/{name}.html
891
+ const match = urlOrName.match(/\/hotel\/[a-z]{2}\/([^/.]+)/i);
892
+ if (match && match[1]) {
893
+ return match[1];
894
+ }
895
+ // Fallback: try to get the last path segment
896
+ const parts = urlOrName.split("/").filter(Boolean);
897
+ const lastPart = parts[parts.length - 1];
898
+ return lastPart?.replace(/\.html$/, "") || null;
899
+ }
900
+ /**
901
+ * Extract hotel rate details from Apollo cache.
902
+ * Finds the hotel matching the given name and extracts rate info from
903
+ * the `blocks` array and `matchingUnitConfigurations`.
904
+ */
905
+ async extractHotelRateFromAPI(hotelSlug, filters) {
906
+ if (!this.page)
907
+ return null;
908
+ // Bed type mapping for filter matching
909
+ const bedTypeMap = {
910
+ single: 1,
911
+ twin: 2,
912
+ double: 3,
913
+ queen: 5,
914
+ king: 6,
915
+ };
916
+ const targetBedType = filters?.bedType ? bedTypeMap[filters.bedType] : undefined;
917
+ return await this.page.evaluate(({ hotelSlug, targetBedType }) => {
918
+ try {
919
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
920
+ const w = window;
921
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
922
+ if (!cache)
923
+ return null;
924
+ const rootQuery = cache["ROOT_QUERY"];
925
+ if (!rootQuery)
926
+ return null;
927
+ const searchQueries = rootQuery.searchQueries;
928
+ if (!searchQueries)
929
+ return null;
930
+ const searchKey = Object.keys(searchQueries).find((k) => k.startsWith("search("));
931
+ if (!searchKey)
932
+ return null;
933
+ const searchOutput = searchQueries[searchKey];
934
+ const searchResults = searchOutput?.results;
935
+ if (!searchResults || !Array.isArray(searchResults))
936
+ return null;
937
+ // Find hotel matching the slug (check pageName)
938
+ const normalizedSlug = hotelSlug.toLowerCase().replace(/-/g, "");
939
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
940
+ const hotel = searchResults.find((h) => {
941
+ if (!h)
942
+ return false;
943
+ const pageName = h.basicPropertyData?.pageName?.toLowerCase()?.replace(/-/g, "") || "";
944
+ const displayName = h.displayName?.text?.toLowerCase()?.replace(/\s+/g, "") || "";
945
+ return pageName.includes(normalizedSlug) ||
946
+ normalizedSlug.includes(pageName) ||
947
+ displayName.includes(normalizedSlug) ||
948
+ normalizedSlug.includes(displayName);
949
+ });
950
+ if (!hotel)
951
+ return null;
952
+ // Extract basic hotel info
953
+ const hotelName = hotel.displayName?.text || hotel.basicPropertyData?.pageName || "Unknown";
954
+ const pageName = hotel.basicPropertyData?.pageName || "";
955
+ const countryCode = hotel.basicPropertyData?.location?.countryCode || "";
956
+ const hotelId = hotel.basicPropertyData?.id?.toString() || "";
957
+ const hotelUrl = countryCode && pageName
958
+ ? `https://www.booking.com/hotel/${countryCode}/${pageName}.html`
959
+ : "";
960
+ // Get price info
961
+ const priceInfo = hotel.priceDisplayInfoIrene;
962
+ const displayPrice = priceInfo?.displayPrice?.amountPerStay;
963
+ const price = displayPrice?.amountUnformatted ?? 0;
964
+ const priceDisplay = displayPrice?.amountRounded || displayPrice?.amount || "$0";
965
+ const currency = displayPrice?.currency || "USD";
966
+ const pricePerNight = priceInfo?.averagePricePerNight?.amountUnformatted ?? 0;
967
+ // Get blocks array (rate options)
968
+ const blocks = hotel.blocks;
969
+ if (!blocks || !Array.isArray(blocks) || blocks.length === 0) {
970
+ // No blocks, return basic info without detailed rate
971
+ return {
972
+ hotelName,
973
+ hotelId,
974
+ hotelUrl,
975
+ checkIn: "",
976
+ checkOut: "",
977
+ guests: 0,
978
+ rooms: 0,
979
+ roomName: "Unknown",
980
+ roomId: "",
981
+ price,
982
+ priceDisplay,
983
+ pricePerNight,
984
+ currency,
985
+ mealPlan: "Unknown",
986
+ cancellationPolicy: "Unknown",
987
+ freeCancellationUntil: null,
988
+ bedType: "Unknown",
989
+ bedCount: 0,
990
+ };
991
+ }
992
+ // Get the first (cheapest/best match) block
993
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
994
+ const block = blocks[0];
995
+ const blockId = block.blockId || {};
996
+ const roomId = blockId.roomId?.toString() || "";
997
+ const mealPlanId = blockId.mealPlanId;
998
+ // Meal plan mapping
999
+ const mealPlanNames = {
1000
+ 0: "Room only",
1001
+ 1: "Breakfast included",
1002
+ 2: "Half board",
1003
+ 3: "Full board",
1004
+ 4: "All-inclusive",
1005
+ };
1006
+ const mealPlan = mealPlanNames[mealPlanId] || "Room only";
1007
+ // Cancellation policy
1008
+ const freeCancellationUntil = block.freeCancellationUntil || null;
1009
+ const cancellationPolicy = freeCancellationUntil
1010
+ ? `Free cancellation until ${freeCancellationUntil}`
1011
+ : "Non-refundable";
1012
+ // Get room name and bed configuration from matchingUnitConfigurations
1013
+ let roomName = "Standard Room";
1014
+ let bedType = "Unknown";
1015
+ let bedCount = 0;
1016
+ const unitConfigs = hotel.matchingUnitConfigurations?.unitConfigurations;
1017
+ if (unitConfigs && Array.isArray(unitConfigs)) {
1018
+ // If filtering by bed type, try to find matching config
1019
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1020
+ let matchingConfig = unitConfigs[0];
1021
+ if (targetBedType !== undefined) {
1022
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1023
+ const bedMatch = unitConfigs.find((config) => {
1024
+ const beds = config.bedConfigurations?.[0]?.beds || [];
1025
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1026
+ return beds.some((bed) => bed.type === targetBedType);
1027
+ });
1028
+ if (bedMatch) {
1029
+ matchingConfig = bedMatch;
1030
+ }
1031
+ }
1032
+ if (matchingConfig) {
1033
+ roomName = matchingConfig.name || roomName;
1034
+ const beds = matchingConfig.bedConfigurations?.[0]?.beds;
1035
+ if (beds && beds.length > 0) {
1036
+ const firstBed = beds[0];
1037
+ bedCount = firstBed.count || 1;
1038
+ // Reverse map bed type
1039
+ const bedTypeNames = {
1040
+ 1: "Single",
1041
+ 2: "Twin",
1042
+ 3: "Double",
1043
+ 4: "Large Double",
1044
+ 5: "Queen",
1045
+ 6: "King",
1046
+ 7: "Super King",
1047
+ };
1048
+ bedType = bedTypeNames[firstBed.type] || "Unknown";
1049
+ }
1050
+ }
1051
+ }
1052
+ return {
1053
+ hotelName,
1054
+ hotelId,
1055
+ hotelUrl,
1056
+ checkIn: "", // Will be filled by caller
1057
+ checkOut: "",
1058
+ guests: 0,
1059
+ rooms: 0,
1060
+ roomName,
1061
+ roomId,
1062
+ price,
1063
+ priceDisplay,
1064
+ pricePerNight,
1065
+ currency,
1066
+ mealPlan,
1067
+ cancellationPolicy,
1068
+ freeCancellationUntil,
1069
+ bedType,
1070
+ bedCount,
1071
+ };
1072
+ }
1073
+ catch {
1074
+ return null;
1075
+ }
1076
+ }, { hotelSlug, targetBedType });
1077
+ }
789
1078
  async dismissPopups() {
790
1079
  if (!this.page)
791
1080
  return;
@@ -989,6 +1278,796 @@ export class HotelBrowser {
989
1278
  return results;
990
1279
  });
991
1280
  }
1281
+ /**
1282
+ * Extract hotel data from Booking.com's Apollo GraphQL cache.
1283
+ * This is more reliable than DOM scraping as it uses structured data.
1284
+ * Falls back gracefully if the cache structure changes.
1285
+ */
1286
+ async extractHotelsFromAPI() {
1287
+ if (!this.page)
1288
+ return [];
1289
+ return await this.page.evaluate(() => {
1290
+ try {
1291
+ // Access the Apollo cache embedded in the page
1292
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1293
+ const w = window;
1294
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1295
+ if (!cache)
1296
+ return [];
1297
+ const rootQuery = cache['ROOT_QUERY'];
1298
+ if (!rootQuery)
1299
+ return [];
1300
+ // searchQueries contains the search results
1301
+ const searchQueries = rootQuery.searchQueries;
1302
+ if (!searchQueries)
1303
+ return [];
1304
+ // Find the search key (complex key with query parameters)
1305
+ const searchKey = Object.keys(searchQueries).find(k => k.startsWith('search('));
1306
+ if (!searchKey)
1307
+ return [];
1308
+ const searchOutput = searchQueries[searchKey];
1309
+ if (!searchOutput)
1310
+ return [];
1311
+ // Get the results array
1312
+ const searchResults = searchOutput.results;
1313
+ if (!searchResults || !Array.isArray(searchResults))
1314
+ return [];
1315
+ const results = [];
1316
+ for (const hotel of searchResults) {
1317
+ if (!hotel)
1318
+ continue;
1319
+ // Skip sponsored/native ad listings
1320
+ const persuasion = hotel.persuasion;
1321
+ if (persuasion?.showNativeAdLabel || persuasion?.nativeAdId) {
1322
+ continue;
1323
+ }
1324
+ // Extract name
1325
+ const name = hotel.displayName?.text || hotel.basicPropertyData?.pageName || 'Unknown';
1326
+ // Extract price
1327
+ let price = null;
1328
+ let priceDisplay = 'Price not shown';
1329
+ const priceInfo = hotel.priceDisplayInfoIrene?.displayPrice?.amountPerStay;
1330
+ if (priceInfo) {
1331
+ priceDisplay = priceInfo.amountRounded || priceInfo.amount || priceDisplay;
1332
+ price = typeof priceInfo.amountUnformatted === 'number' ? priceInfo.amountUnformatted : null;
1333
+ }
1334
+ // Extract rating and reviews from basicPropertyData.reviews
1335
+ let rating = null;
1336
+ let ratingText = '';
1337
+ let reviewCount = null;
1338
+ const reviews = hotel.basicPropertyData?.reviews;
1339
+ if (reviews) {
1340
+ rating = typeof reviews.totalScore === 'number' ? reviews.totalScore : null;
1341
+ ratingText = reviews.totalScoreTextTag?.translation || '';
1342
+ reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
1343
+ }
1344
+ // Extract location
1345
+ const location = hotel.location?.displayLocation || '';
1346
+ const distanceToCenter = hotel.location?.mainDistance || '';
1347
+ // Build thumbnail URL
1348
+ let thumbnailUrl = null;
1349
+ const mainPhoto = hotel.basicPropertyData?.photos?.main;
1350
+ if (mainPhoto) {
1351
+ const relativeUrl = mainPhoto.highResJpegUrl?.relativeUrl ||
1352
+ mainPhoto.highResUrl?.relativeUrl ||
1353
+ mainPhoto.lowResJpegUrl?.relativeUrl;
1354
+ if (relativeUrl) {
1355
+ thumbnailUrl = `https://cf.bstatic.com${relativeUrl}`;
1356
+ }
1357
+ }
1358
+ // Build link with country code (required for API data to load on detail page)
1359
+ let link = '';
1360
+ const pageName = hotel.basicPropertyData?.pageName;
1361
+ const countryCode = hotel.basicPropertyData?.location?.countryCode;
1362
+ if (pageName && countryCode) {
1363
+ link = `https://www.booking.com/hotel/${countryCode}/${pageName}.html`;
1364
+ }
1365
+ else if (pageName) {
1366
+ // Fallback without country code (less reliable for API extraction)
1367
+ link = `https://www.booking.com/hotel/${pageName}.html`;
1368
+ }
1369
+ // Extract amenities and highlights
1370
+ const amenities = [];
1371
+ const highlights = [];
1372
+ // Sustainability
1373
+ if (hotel.propertySustainability?.isSustainable) {
1374
+ amenities.push('Sustainable');
1375
+ }
1376
+ // Policies
1377
+ const policies = hotel.policies;
1378
+ if (policies?.showFreeCancellation) {
1379
+ highlights.push('Free Cancellation');
1380
+ }
1381
+ if (policies?.showNoPrepayment) {
1382
+ highlights.push('No Prepayment');
1383
+ }
1384
+ if (policies?.showPetsAllowedForFree) {
1385
+ amenities.push('Pet Friendly');
1386
+ }
1387
+ // Meal plan
1388
+ if (hotel.mealPlanIncluded?.mealPlanType) {
1389
+ amenities.push('Breakfast Included');
1390
+ }
1391
+ // Extract availability info
1392
+ let availability = null;
1393
+ const soldOutInfo = hotel.soldOutInfo;
1394
+ if (soldOutInfo?.messages && soldOutInfo.messages.length > 0) {
1395
+ const msg = soldOutInfo.messages[0];
1396
+ if (msg?.text) {
1397
+ availability = msg.text;
1398
+ }
1399
+ }
1400
+ results.push({
1401
+ name,
1402
+ price,
1403
+ priceDisplay,
1404
+ rating,
1405
+ ratingText,
1406
+ reviewCount,
1407
+ location,
1408
+ distanceToCenter,
1409
+ amenities,
1410
+ highlights,
1411
+ link,
1412
+ thumbnailUrl,
1413
+ availability,
1414
+ });
1415
+ }
1416
+ return results;
1417
+ }
1418
+ catch {
1419
+ // If anything goes wrong with API extraction, return empty to trigger fallback
1420
+ return [];
1421
+ }
1422
+ });
1423
+ }
1424
+ /**
1425
+ * Extract hotel details from Booking.com's Apollo GraphQL cache on a hotel detail page.
1426
+ * This is more reliable than DOM scraping as it uses structured data.
1427
+ * Returns null if extraction fails (triggering DOM fallback).
1428
+ */
1429
+ async extractHotelDetailsFromAPI() {
1430
+ if (!this.page)
1431
+ return null;
1432
+ return await this.page.evaluate(() => {
1433
+ try {
1434
+ // Access the Apollo cache embedded in the page
1435
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1436
+ const w = window;
1437
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1438
+ if (!cache)
1439
+ return null;
1440
+ // Helper to resolve __ref pointers
1441
+ const resolveRef = (ref) => {
1442
+ if (ref && typeof ref === 'object' && '__ref' in ref) {
1443
+ return cache[ref.__ref];
1444
+ }
1445
+ return ref;
1446
+ };
1447
+ // Find the Property entry - it has a key like 'Property:{"id":6523595}'
1448
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1449
+ if (!propertyKey)
1450
+ return null;
1451
+ const property = cache[propertyKey];
1452
+ if (!property)
1453
+ return null;
1454
+ // Extract hotel ID from the property key
1455
+ const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
1456
+ const hotelId = idMatch ? idMatch[1] : null;
1457
+ // Get BasicPropertyData for address and location
1458
+ const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
1459
+ const basicData = basicDataKey ? cache[basicDataKey] : null;
1460
+ // Extract name
1461
+ const name = property.name || basicData?.name || 'Unknown';
1462
+ // Extract rating and reviews from property.reviews
1463
+ let rating = null;
1464
+ let ratingText = '';
1465
+ let reviewCount = null;
1466
+ const reviews = property.reviews;
1467
+ if (reviews) {
1468
+ reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
1469
+ // Find the total score from questions array
1470
+ const questions = reviews.questions;
1471
+ if (Array.isArray(questions)) {
1472
+ const totalQ = questions.find((q) => q?.name === 'total');
1473
+ if (totalQ && typeof totalQ.score === 'number') {
1474
+ const score = totalQ.score;
1475
+ rating = score;
1476
+ // Generate rating text based on score
1477
+ if (score >= 9)
1478
+ ratingText = 'Superb';
1479
+ else if (score >= 8)
1480
+ ratingText = 'Very Good';
1481
+ else if (score >= 7)
1482
+ ratingText = 'Good';
1483
+ else if (score >= 6)
1484
+ ratingText = 'Pleasant';
1485
+ else
1486
+ ratingText = 'Review score';
1487
+ }
1488
+ }
1489
+ }
1490
+ // Extract address from BasicPropertyData
1491
+ const address = basicData?.location?.formattedAddress ||
1492
+ basicData?.location?.formattedAddressShort || '';
1493
+ // Extract star rating from accommodation type
1494
+ let starRating = null;
1495
+ const accomType = resolveRef(property.accommodationType);
1496
+ if (accomType && typeof accomType === 'object' && 'starRating' in accomType) {
1497
+ starRating = accomType.starRating || null;
1498
+ }
1499
+ // Extract check-in/out times from houseRules
1500
+ let checkInTime = '';
1501
+ let checkOutTime = '';
1502
+ const houseRules = property.houseRules;
1503
+ if (houseRules?.checkinCheckoutTimes) {
1504
+ const times = houseRules.checkinCheckoutTimes;
1505
+ if (times.checkinTimeRange) {
1506
+ const from = times.checkinTimeRange.fromFormatted;
1507
+ const until = times.checkinTimeRange.untilFormatted;
1508
+ if (from && until) {
1509
+ checkInTime = `${from} - ${until}`;
1510
+ }
1511
+ else if (from) {
1512
+ checkInTime = `From ${from}`;
1513
+ }
1514
+ else if (until) {
1515
+ checkInTime = `Until ${until}`;
1516
+ }
1517
+ }
1518
+ if (times.checkoutTimeRange) {
1519
+ const from = times.checkoutTimeRange.fromFormatted;
1520
+ const until = times.checkoutTimeRange.untilFormatted;
1521
+ if (from && until) {
1522
+ checkOutTime = `${from} - ${until}`;
1523
+ }
1524
+ else if (until) {
1525
+ checkOutTime = `Until ${until}`;
1526
+ }
1527
+ else if (from) {
1528
+ checkOutTime = `From ${from}`;
1529
+ }
1530
+ }
1531
+ }
1532
+ // Extract popular facilities from accommodationHighlights
1533
+ const popularFacilities = [];
1534
+ const highlightKeys = Object.keys(property).filter(k => k.startsWith('accommodationHighlights('));
1535
+ for (const key of highlightKeys) {
1536
+ const highlights = property[key];
1537
+ if (Array.isArray(highlights)) {
1538
+ for (const item of highlights) {
1539
+ const entities = item?.entities;
1540
+ if (Array.isArray(entities)) {
1541
+ for (const entity of entities) {
1542
+ // Direct title (like BreakfastHighlight)
1543
+ if (entity?.title) {
1544
+ popularFacilities.push(entity.title);
1545
+ }
1546
+ // Resolve __ref for GenericFacilityHighlight, WifiFacilityHighlight, etc.
1547
+ const resolved = resolveRef(entity);
1548
+ if (resolved && typeof resolved === 'object' && 'title' in resolved) {
1549
+ const title = resolved.title;
1550
+ if (title && !popularFacilities.includes(title)) {
1551
+ popularFacilities.push(title);
1552
+ }
1553
+ }
1554
+ }
1555
+ }
1556
+ }
1557
+ }
1558
+ }
1559
+ // Extract all facilities from highlights (popularity based)
1560
+ const allFacilities = [];
1561
+ const facilityKeys = Object.keys(property).filter(k => k.startsWith('highlights('));
1562
+ for (const key of facilityKeys) {
1563
+ const highlightData = property[key];
1564
+ const entities = highlightData?.entities;
1565
+ if (Array.isArray(entities)) {
1566
+ for (const entity of entities) {
1567
+ // Skip Meal type entries
1568
+ if (entity?.__typename === 'Meal')
1569
+ continue;
1570
+ const resolved = resolveRef(entity);
1571
+ if (resolved && typeof resolved === 'object') {
1572
+ // For BaseFacility, look at instances
1573
+ const instances = resolved.instances;
1574
+ if (Array.isArray(instances)) {
1575
+ for (const inst of instances) {
1576
+ const resolvedInst = resolveRef(inst);
1577
+ if (resolvedInst && typeof resolvedInst === 'object' && 'title' in resolvedInst) {
1578
+ const title = resolvedInst.title;
1579
+ if (title && !allFacilities.includes(title)) {
1580
+ allFacilities.push(title);
1581
+ }
1582
+ }
1583
+ }
1584
+ }
1585
+ }
1586
+ }
1587
+ }
1588
+ }
1589
+ // Extract photos from propertyGallery
1590
+ const photos = [];
1591
+ const galleryKeys = Object.keys(property).filter(k => k.startsWith('propertyGallery('));
1592
+ for (const key of galleryKeys) {
1593
+ const gallery = property[key];
1594
+ // Main photo
1595
+ if (gallery?.mainPhoto) {
1596
+ const mainPhoto = resolveRef(gallery.mainPhoto);
1597
+ if (mainPhoto && typeof mainPhoto === 'object') {
1598
+ // Look for resource with max500 or max1024x768
1599
+ const photoObj = mainPhoto;
1600
+ const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
1601
+ if (resourceKey) {
1602
+ const resource = photoObj[resourceKey];
1603
+ if (resource?.absoluteUrl) {
1604
+ photos.push(resource.absoluteUrl);
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+ // Room photos
1610
+ const roomPhotos = gallery?.roomPhotos;
1611
+ if (Array.isArray(roomPhotos)) {
1612
+ for (const room of roomPhotos) {
1613
+ const roomPhotosList = room?.photos;
1614
+ if (Array.isArray(roomPhotosList) && photos.length < 5) {
1615
+ for (const photoRef of roomPhotosList) {
1616
+ if (photos.length >= 5)
1617
+ break;
1618
+ const photo = resolveRef(photoRef);
1619
+ if (photo && typeof photo === 'object') {
1620
+ const photoObj = photo;
1621
+ const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
1622
+ if (resourceKey) {
1623
+ const resource = photoObj[resourceKey];
1624
+ if (resource?.absoluteUrl && !photos.includes(resource.absoluteUrl)) {
1625
+ photos.push(resource.absoluteUrl);
1626
+ }
1627
+ }
1628
+ }
1629
+ }
1630
+ }
1631
+ }
1632
+ }
1633
+ }
1634
+ // Extract room types from property.rooms
1635
+ const roomTypes = [];
1636
+ const rooms = property.rooms;
1637
+ if (Array.isArray(rooms)) {
1638
+ for (const roomRef of rooms) {
1639
+ const room = resolveRef(roomRef);
1640
+ if (room && typeof room === 'object') {
1641
+ const roomObj = room;
1642
+ const roomName = roomObj.name || roomObj.description;
1643
+ if (roomName && !roomTypes.includes(roomName)) {
1644
+ roomTypes.push(roomName);
1645
+ }
1646
+ }
1647
+ }
1648
+ }
1649
+ // Extract location info
1650
+ let locationInfo = '';
1651
+ if (basicData?.location) {
1652
+ const loc = basicData.location;
1653
+ const parts = [];
1654
+ if (loc.city)
1655
+ parts.push(loc.city);
1656
+ if (loc.countryCode)
1657
+ parts.push(loc.countryCode.toUpperCase());
1658
+ locationInfo = parts.join(', ');
1659
+ if (loc.latitude && loc.longitude) {
1660
+ locationInfo += ` (${loc.latitude.toFixed(4)}, ${loc.longitude.toFixed(4)})`;
1661
+ }
1662
+ }
1663
+ // Extract review category scores for highlights
1664
+ const guestReviewHighlights = [];
1665
+ if (reviews?.questions && Array.isArray(reviews.questions)) {
1666
+ const categoryNames = {
1667
+ 'hotel_staff': 'Staff',
1668
+ 'hotel_location': 'Location',
1669
+ 'hotel_clean': 'Cleanliness',
1670
+ 'hotel_comfort': 'Comfort',
1671
+ 'hotel_value': 'Value for money',
1672
+ 'hotel_services': 'Facilities',
1673
+ 'hotel_free_wifi': 'Free WiFi'
1674
+ };
1675
+ for (const q of reviews.questions) {
1676
+ if (q?.name && q.name !== 'total' && typeof q.score === 'number') {
1677
+ const displayName = categoryNames[q.name] || q.name;
1678
+ if (categoryNames[q.name]) {
1679
+ guestReviewHighlights.push(`${displayName}: ${q.score.toFixed(1)}`);
1680
+ }
1681
+ }
1682
+ }
1683
+ }
1684
+ // Validate we have meaningful data before returning
1685
+ // Name should be a proper hotel name (at least 3 chars, not 'Unknown')
1686
+ if (!name || name === 'Unknown' || name.length < 3) {
1687
+ return null; // Trigger DOM fallback
1688
+ }
1689
+ // Note: Description, pricePerNight, totalPrice, nearbyAttractions may need DOM fallback
1690
+ // as they're not consistently in the Apollo cache or are dynamic
1691
+ return {
1692
+ name,
1693
+ rating,
1694
+ ratingText,
1695
+ reviewCount,
1696
+ starRating,
1697
+ address,
1698
+ description: '', // Not typically in cache, will need DOM fallback if needed
1699
+ highlights: popularFacilities.slice(0, 5).join(', '),
1700
+ pricePerNight: null, // Dynamic, not in cache
1701
+ priceDisplay: '',
1702
+ totalPrice: '',
1703
+ checkInTime,
1704
+ checkOutTime,
1705
+ popularFacilities: popularFacilities.slice(0, 15),
1706
+ allFacilities: allFacilities.slice(0, 30),
1707
+ roomTypes: roomTypes.slice(0, 5),
1708
+ photos: photos.slice(0, 5),
1709
+ nearbyAttractions: [], // Would need propertySurroundings query
1710
+ guestReviewHighlights: guestReviewHighlights.slice(0, 7),
1711
+ locationInfo
1712
+ };
1713
+ }
1714
+ catch {
1715
+ // If anything goes wrong with API extraction, return null to trigger fallback
1716
+ return null;
1717
+ }
1718
+ });
1719
+ }
1720
+ /**
1721
+ * Fetch room facilities via Booking.com's GraphQL API.
1722
+ * This provides detailed amenities for each room type (AC, TV, bathroom details, etc.)
1723
+ * Must be called when already on a hotel page with an active session.
1724
+ *
1725
+ * @param hotelId - The numeric hotel ID (e.g., 6523595)
1726
+ * @param checkIn - Check-in date in YYYY-MM-DD format
1727
+ * @param checkOut - Check-out date in YYYY-MM-DD format
1728
+ * @returns Map of roomId to array of amenity categories
1729
+ */
1730
+ async fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut) {
1731
+ if (!this.page)
1732
+ return new Map();
1733
+ try {
1734
+ const result = await this.page.evaluate(async ({ hotelId, checkIn, checkOut }) => {
1735
+ const query = `
1736
+ query RoomPageDesktopRDS($rdsInput: RDSRoomDetailQueryInput!) {
1737
+ roomDetail(roomDetailQueryInput: $rdsInput) {
1738
+ categorizedFacilitiesForAllRooms {
1739
+ roomId
1740
+ categorizedFacilities {
1741
+ category
1742
+ facilities {
1743
+ name
1744
+ id
1745
+ }
1746
+ }
1747
+ }
1748
+ }
1749
+ }
1750
+ `;
1751
+ const variables = {
1752
+ rdsInput: {
1753
+ hotelId: String(hotelId),
1754
+ searchConfig: {
1755
+ searchConfigDate: {
1756
+ checkin: checkIn,
1757
+ checkout: checkOut,
1758
+ },
1759
+ nbRooms: 1,
1760
+ nbAdults: 2,
1761
+ nbChildren: 0,
1762
+ childrenAges: [],
1763
+ },
1764
+ highlightedBlocks: [],
1765
+ selectedFilters: '',
1766
+ travelReason: 'LEISURE',
1767
+ },
1768
+ };
1769
+ try {
1770
+ const response = await fetch('/dml/graphql', {
1771
+ method: 'POST',
1772
+ headers: {
1773
+ 'Content-Type': 'application/json',
1774
+ 'x-booking-topic': 'capla_browser_b-property-web-property-page',
1775
+ 'x-booking-context-action-name': 'hotel',
1776
+ 'apollographql-client-name': 'b-property-web-property-page_rust',
1777
+ },
1778
+ body: JSON.stringify({
1779
+ operationName: 'RoomPageDesktopRDS',
1780
+ variables,
1781
+ query,
1782
+ }),
1783
+ });
1784
+ if (!response.ok) {
1785
+ return { error: `HTTP ${response.status}` };
1786
+ }
1787
+ const data = await response.json();
1788
+ return data;
1789
+ }
1790
+ catch (e) {
1791
+ return { error: e instanceof Error ? e.message : 'Unknown error' };
1792
+ }
1793
+ }, { hotelId, checkIn, checkOut });
1794
+ if ('error' in result) {
1795
+ logger.debug({ error: result.error }, 'GraphQL room facilities fetch failed');
1796
+ return new Map();
1797
+ }
1798
+ // Parse the response into our map structure
1799
+ const facilitiesMap = new Map();
1800
+ const roomData = result?.data?.roomDetail?.categorizedFacilitiesForAllRooms || [];
1801
+ for (const room of roomData) {
1802
+ const roomId = String(room.roomId);
1803
+ const categories = [];
1804
+ for (const cat of room.categorizedFacilities || []) {
1805
+ categories.push({
1806
+ category: cat.category || 'General',
1807
+ items: (cat.facilities || []).map((f) => f.name || '').filter(Boolean),
1808
+ });
1809
+ }
1810
+ if (categories.length > 0) {
1811
+ facilitiesMap.set(roomId, categories);
1812
+ }
1813
+ }
1814
+ logger.debug({ roomCount: facilitiesMap.size }, 'Fetched room facilities via GraphQL');
1815
+ return facilitiesMap;
1816
+ }
1817
+ catch (error) {
1818
+ logger.debug({ error }, 'Failed to fetch room facilities via GraphQL');
1819
+ return new Map();
1820
+ }
1821
+ }
1822
+ /**
1823
+ * Extract hotel ID from the current page URL or DOM.
1824
+ * Booking.com hotel IDs are typically in the URL path or data attributes.
1825
+ */
1826
+ async extractHotelId() {
1827
+ if (!this.page)
1828
+ return null;
1829
+ return await this.page.evaluate(() => {
1830
+ // Try to get from URL path (e.g., /hotel/fr/hotel-name.html?... contains ID in data)
1831
+ // Actually, the ID is often in data attributes or Apollo cache
1832
+ // Method 1: Look for data-hotel-id attribute
1833
+ const hotelIdEl = document.querySelector('[data-hotel-id]');
1834
+ if (hotelIdEl) {
1835
+ return hotelIdEl.getAttribute('data-hotel-id');
1836
+ }
1837
+ // Method 2: Look in Apollo cache
1838
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1839
+ const w = window;
1840
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1841
+ if (cache) {
1842
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1843
+ if (propertyKey) {
1844
+ const match = propertyKey.match(/Property:\{"id":(\d+)\}/);
1845
+ if (match && match[1])
1846
+ return match[1];
1847
+ }
1848
+ }
1849
+ // Method 3: Look for form inputs with hotel_id
1850
+ const hotelInput = document.querySelector('input[name="hotel_id"]');
1851
+ if (hotelInput?.value)
1852
+ return hotelInput.value;
1853
+ // Method 4: Look in data-block-id attributes (format: roomTypeId_policyId_hotelId_...)
1854
+ const blockEl = document.querySelector('[data-block-id]');
1855
+ if (blockEl) {
1856
+ const blockId = blockEl.getAttribute('data-block-id') || '';
1857
+ const parts = blockId.split('_');
1858
+ // Hotel ID is typically in position 2 (after roomTypeId and policyId)
1859
+ const potentialHotelId = parts[2];
1860
+ if (parts.length >= 3 && potentialHotelId && /^\d{5,}$/.test(potentialHotelId)) {
1861
+ return potentialHotelId;
1862
+ }
1863
+ }
1864
+ return null;
1865
+ });
1866
+ }
1867
+ /**
1868
+ * Extract reviews data from Booking.com's Apollo GraphQL cache.
1869
+ * Returns null if extraction fails (triggering DOM fallback).
1870
+ */
1871
+ async extractReviewsFromAPI() {
1872
+ if (!this.page)
1873
+ return null;
1874
+ return await this.page.evaluate(() => {
1875
+ try {
1876
+ // Access the Apollo cache embedded in the page
1877
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1878
+ const w = window;
1879
+ const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
1880
+ if (!cache)
1881
+ return null;
1882
+ // Helper to resolve __ref pointers
1883
+ const resolveRef = (ref) => {
1884
+ if (ref && typeof ref === 'object' && '__ref' in ref) {
1885
+ return cache[ref.__ref];
1886
+ }
1887
+ return ref;
1888
+ };
1889
+ // Find the Property entry - it has a key like 'Property:{"id":6523595}'
1890
+ const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
1891
+ if (!propertyKey)
1892
+ return null;
1893
+ const property = cache[propertyKey];
1894
+ if (!property)
1895
+ return null;
1896
+ // Extract hotel ID from the property key
1897
+ const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
1898
+ const hotelId = idMatch ? idMatch[1] : null;
1899
+ // Get BasicPropertyData for hotel name
1900
+ const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
1901
+ const basicData = basicDataKey ? cache[basicDataKey] : null;
1902
+ // Extract hotel name
1903
+ const hotelName = property.name || basicData?.name || '';
1904
+ // Extract overall rating, total reviews, and rating breakdown from property.reviews
1905
+ let overallRating = null;
1906
+ let totalReviews = 0;
1907
+ const ratingBreakdown = {
1908
+ staff: null,
1909
+ facilities: null,
1910
+ cleanliness: null,
1911
+ comfort: null,
1912
+ valueForMoney: null,
1913
+ location: null,
1914
+ freeWifi: null,
1915
+ };
1916
+ const reviewsData = property.reviews;
1917
+ if (reviewsData) {
1918
+ totalReviews = typeof reviewsData.reviewsCount === 'number' ? reviewsData.reviewsCount : 0;
1919
+ // Map question names to breakdown fields
1920
+ const questionMap = {
1921
+ 'hotel_staff': 'staff',
1922
+ 'hotel_services': 'facilities',
1923
+ 'hotel_clean': 'cleanliness',
1924
+ 'hotel_comfort': 'comfort',
1925
+ 'hotel_value': 'valueForMoney',
1926
+ 'hotel_location': 'location',
1927
+ 'hotel_free_wifi': 'freeWifi',
1928
+ };
1929
+ const questions = reviewsData.questions;
1930
+ if (Array.isArray(questions)) {
1931
+ for (const q of questions) {
1932
+ if (!q?.name || typeof q.score !== 'number')
1933
+ continue;
1934
+ if (q.name === 'total') {
1935
+ overallRating = q.score;
1936
+ }
1937
+ else {
1938
+ const breakdownKey = questionMap[q.name];
1939
+ if (breakdownKey) {
1940
+ ratingBreakdown[breakdownKey] = q.score;
1941
+ }
1942
+ }
1943
+ }
1944
+ }
1945
+ }
1946
+ // Extract individual reviews from FeaturedReview entries
1947
+ const reviews = [];
1948
+ // Map customer types to display names
1949
+ const customerTypeMap = {
1950
+ 'SOLO_TRAVELLER': 'Solo traveler',
1951
+ 'YOUNG_COUPLE': 'Couple',
1952
+ 'MATURE_COUPLE': 'Couple',
1953
+ 'FAMILY_WITH_YOUNG_CHILDREN': 'Family with young children',
1954
+ 'FAMILY_WITH_OLDER_CHILDREN': 'Family with older children',
1955
+ 'WITH_FRIENDS': 'Group of friends',
1956
+ 'BUSINESS': 'Business traveler',
1957
+ };
1958
+ // Map country codes to names
1959
+ const countryCodeMap = {
1960
+ 'us': 'United States',
1961
+ 'gb': 'United Kingdom',
1962
+ 'fr': 'France',
1963
+ 'de': 'Germany',
1964
+ 'es': 'Spain',
1965
+ 'it': 'Italy',
1966
+ 'nl': 'Netherlands',
1967
+ 'be': 'Belgium',
1968
+ 'ch': 'Switzerland',
1969
+ 'au': 'Australia',
1970
+ 'ca': 'Canada',
1971
+ 'jp': 'Japan',
1972
+ 'cn': 'China',
1973
+ 'kr': 'South Korea',
1974
+ 'br': 'Brazil',
1975
+ 'mx': 'Mexico',
1976
+ 'in': 'India',
1977
+ 'ru': 'Russia',
1978
+ 'pl': 'Poland',
1979
+ 'se': 'Sweden',
1980
+ 'no': 'Norway',
1981
+ 'dk': 'Denmark',
1982
+ 'fi': 'Finland',
1983
+ 'at': 'Austria',
1984
+ 'pt': 'Portugal',
1985
+ 'gr': 'Greece',
1986
+ 'tr': 'Turkey',
1987
+ 'ie': 'Ireland',
1988
+ 'nz': 'New Zealand',
1989
+ 'za': 'South Africa',
1990
+ 'ar': 'Argentina',
1991
+ 'cl': 'Chile',
1992
+ 'co': 'Colombia',
1993
+ 'th': 'Thailand',
1994
+ 'sg': 'Singapore',
1995
+ 'my': 'Malaysia',
1996
+ 'id': 'Indonesia',
1997
+ 'ph': 'Philippines',
1998
+ 'vn': 'Vietnam',
1999
+ 'ae': 'United Arab Emirates',
2000
+ 'sa': 'Saudi Arabia',
2001
+ 'eg': 'Egypt',
2002
+ 'il': 'Israel',
2003
+ 'cz': 'Czech Republic',
2004
+ 'hu': 'Hungary',
2005
+ 'ro': 'Romania',
2006
+ };
2007
+ // Find all FeaturedReview entries
2008
+ const reviewKeys = Object.keys(cache).filter(k => k.startsWith('FeaturedReview:'));
2009
+ for (const key of reviewKeys) {
2010
+ const review = cache[key];
2011
+ if (!review)
2012
+ continue;
2013
+ // Format the date from Unix timestamp
2014
+ let dateStr = '';
2015
+ if (typeof review.completed === 'number') {
2016
+ const date = new Date(review.completed * 1000);
2017
+ dateStr = date.toLocaleDateString('en-US', {
2018
+ year: 'numeric',
2019
+ month: 'long',
2020
+ day: 'numeric'
2021
+ });
2022
+ }
2023
+ // Get room type from ref
2024
+ let roomType = '';
2025
+ const roomRef = resolveRef(review.roomType);
2026
+ if (roomRef && typeof roomRef === 'object' && 'name' in roomRef) {
2027
+ roomType = roomRef.name || '';
2028
+ }
2029
+ // Get country name from code
2030
+ const countryCode = (review.guestCountryCode || '').toLowerCase();
2031
+ const country = countryCodeMap[countryCode] || countryCode.toUpperCase();
2032
+ // Get traveler type display name
2033
+ const travelerType = customerTypeMap[review.customerType] || review.customerType || '';
2034
+ reviews.push({
2035
+ title: review.title || '',
2036
+ rating: typeof review.averageScore === 'number' ? review.averageScore : null,
2037
+ date: dateStr,
2038
+ travelerType,
2039
+ stayDate: '', // Not available in FeaturedReview
2040
+ roomType,
2041
+ nightsStayed: '', // Not available in FeaturedReview
2042
+ positive: review.positiveText || '',
2043
+ negative: review.negativeText || '',
2044
+ country,
2045
+ });
2046
+ }
2047
+ // Sort reviews by date (newest first - higher timestamp = newer)
2048
+ reviews.sort((a, b) => {
2049
+ // Parse dates back for comparison
2050
+ const dateA = new Date(a.date).getTime() || 0;
2051
+ const dateB = new Date(b.date).getTime() || 0;
2052
+ return dateB - dateA;
2053
+ });
2054
+ // Validate we have meaningful data
2055
+ if (!hotelName || hotelName.length < 3) {
2056
+ return null;
2057
+ }
2058
+ return {
2059
+ hotelName,
2060
+ overallRating,
2061
+ totalReviews,
2062
+ ratingBreakdown,
2063
+ reviews,
2064
+ };
2065
+ }
2066
+ catch {
2067
+ return null;
2068
+ }
2069
+ });
2070
+ }
992
2071
  scoreAndFilterHotels(hotels, filters) {
993
2072
  return hotels
994
2073
  .map((hotel) => {
@@ -1192,6 +2271,16 @@ export class HotelBrowser {
1192
2271
  await this.page.waitForTimeout(2000);
1193
2272
  await this.checkForBlocking();
1194
2273
  await this.dismissPopups();
2274
+ // Try API extraction first (more reliable structured data)
2275
+ const apiDetails = await this.extractHotelDetailsFromAPI();
2276
+ if (apiDetails) {
2277
+ logger.debug("Successfully extracted hotel details from API cache");
2278
+ return {
2279
+ ...apiDetails,
2280
+ url: hotelUrl,
2281
+ };
2282
+ }
2283
+ logger.debug("API extraction returned no results, falling back to DOM scraping");
1195
2284
  // Extract comprehensive hotel details using evaluate with string to avoid __name compilation issues
1196
2285
  const details = await this.page.evaluate(`
1197
2286
  (function() {
@@ -1475,7 +2564,7 @@ export class HotelBrowser {
1475
2564
  await this.page.waitForTimeout(2000);
1476
2565
  await this.checkForBlocking();
1477
2566
  await this.dismissPopups();
1478
- // Extract room availability using string-based evaluate
2567
+ // Extract room availability using data attributes (primary) with DOM fallback
1479
2568
  const result = await this.page.evaluate(`
1480
2569
  (function() {
1481
2570
  function getText(selector) {
@@ -1487,152 +2576,292 @@ export class HotelBrowser {
1487
2576
  var hotelName = getText('h2') || getText('h1').split('(')[0].trim() || "Unknown Hotel";
1488
2577
 
1489
2578
  var roomOptions = [];
1490
- var seenRooms = {};
1491
2579
 
1492
- // Strategy 1: Look for room type links (most reliable on Booking.com)
1493
- var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
2580
+ // ============================================================
2581
+ // STRATEGY 1: Extract from data-* attributes (most reliable)
2582
+ // Uses data-block-id, data-hotel-rounded-price, and data-fltrs
2583
+ // ============================================================
2584
+
2585
+ // First, build maps of room type IDs to room names and bed types from header rows
2586
+ var roomNameMap = {};
2587
+ var bedTypeMap = {};
2588
+ var roomTypeHeaders = document.querySelectorAll('.hprt-roomtype-link');
2589
+ for (var h = 0; h < roomTypeHeaders.length; h++) {
2590
+ var header = roomTypeHeaders[h];
2591
+ var headerRow = header.closest('tr');
2592
+ var headerBlockId = headerRow ? headerRow.getAttribute('data-block-id') : null;
2593
+ if (headerBlockId && headerBlockId.indexOf('_') > 0) {
2594
+ var headerRoomTypeId = headerBlockId.split('_')[0];
2595
+ var headerRoomName = header.textContent ? header.textContent.trim() : '';
2596
+ if (headerRoomName) {
2597
+ roomNameMap[headerRoomTypeId] = headerRoomName;
2598
+ }
2599
+ // Also capture bed type from header row
2600
+ var bedEl = headerRow.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
2601
+ if (bedEl) {
2602
+ var bedText = bedEl.textContent || '';
2603
+ var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
2604
+ for (var b = 0; b < bedLines.length; b++) {
2605
+ if (bedLines[b].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
2606
+ bedTypeMap[headerRoomTypeId] = bedLines[b];
2607
+ break;
2608
+ }
2609
+ }
2610
+ }
2611
+ }
2612
+ }
2613
+
2614
+ // Extract all room blocks with data-hotel-rounded-price attribute
2615
+ // Returns ALL rate options (room + meal plan + cancellation combinations)
2616
+ var dataRows = document.querySelectorAll('tr[data-block-id][data-hotel-rounded-price]');
2617
+ var seenBlockIds = {}; // Track exact block IDs to avoid true duplicates
1494
2618
 
1495
- for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
1496
- var roomLink = roomTypeLinks[i];
1497
- var name = roomLink.textContent.trim();
2619
+ for (var i = 0; i < dataRows.length && roomOptions.length < 30; i++) {
2620
+ var row = dataRows[i];
2621
+ var blockId = row.getAttribute('data-block-id') || '';
2622
+ var parts = blockId.split('_');
2623
+ if (parts.length < 2) continue;
1498
2624
 
1499
- if (!name || name.length < 3 || seenRooms[name]) continue;
1500
- seenRooms[name] = true;
2625
+ // Skip exact duplicate block IDs
2626
+ if (seenBlockIds[blockId]) continue;
2627
+ seenBlockIds[blockId] = true;
1501
2628
 
1502
- // Find the containing row to get price and details
1503
- var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
1504
- var rowText = row ? row.textContent || "" : "";
2629
+ var roomTypeId = parts[0];
1505
2630
 
1506
- // Try to find price in the same row or nearby
1507
- var price = null;
1508
- var priceDisplay = "";
2631
+ // Get price from data attribute (more reliable than DOM text)
2632
+ var roundedPrice = row.getAttribute('data-hotel-rounded-price');
2633
+ var price = roundedPrice ? parseInt(roundedPrice, 10) : null;
1509
2634
 
1510
- // Look for price cell in this row or next siblings
1511
- var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
1512
- if (priceCell) {
1513
- priceDisplay = priceCell.textContent.trim();
1514
- var match = priceDisplay.match(/[\\$€£¥]\\s*([\\d,]+)/);
1515
- if (match) {
1516
- price = parseInt(match[1].replace(/,/g, ""));
1517
- // Clean up price display
1518
- var perNightMatch = priceDisplay.match(/[\\$€£¥]\\s*[\\d,]+/);
1519
- priceDisplay = perNightMatch ? perNightMatch[0] : priceDisplay.split('\\n')[0];
1520
- }
2635
+ // Get price display from DOM
2636
+ var priceDisplay = '';
2637
+ var priceEl = row.querySelector('.bui-price-display__value');
2638
+ if (priceEl) {
2639
+ var displayMatch = (priceEl.textContent || '').match(/[\\$€£¥][\\d,]+/);
2640
+ priceDisplay = displayMatch ? displayMatch[0] : '';
1521
2641
  }
1522
2642
 
1523
- // If no price found in row, search in sibling rows with same room type
1524
- if (!price) {
1525
- var allPriceCells = document.querySelectorAll('.hprt-table-cell-price');
1526
- for (var j = 0; j < allPriceCells.length && !price; j++) {
1527
- var cellText = allPriceCells[j].textContent || "";
1528
- var match = cellText.match(/[\\$€£¥]\\s*([\\d,]+)/);
1529
- if (match) {
1530
- price = parseInt(match[1].replace(/,/g, ""));
1531
- priceDisplay = match[0];
1532
- break;
1533
- }
1534
- }
2643
+ // Get room name from our map
2644
+ var roomName = roomNameMap[roomTypeId] || '';
2645
+
2646
+ // If no name in map, try to find it in the row
2647
+ if (!roomName) {
2648
+ var roomLink = row.querySelector('.hprt-roomtype-link, a[class*="room"]');
2649
+ roomName = roomLink ? (roomLink.textContent || '').trim() : '';
2650
+ }
2651
+
2652
+ // Still no name? Use a generic one
2653
+ if (!roomName) {
2654
+ roomName = 'Room Type ' + roomTypeId;
2655
+ }
2656
+
2657
+ // Parse data-fltrs for structured info (breakfast, beds)
2658
+ var fltrs = row.getAttribute('data-fltrs');
2659
+ var breakfastIncluded = false;
2660
+ var bedCount = [];
2661
+
2662
+ if (fltrs) {
2663
+ try {
2664
+ var fltrData = JSON.parse(fltrs.replace(/\\n/g, ''));
2665
+ breakfastIncluded = fltrData.breakfast_included === 1;
2666
+ bedCount = fltrData.bed_count || [];
2667
+ } catch (e) {}
1535
2668
  }
1536
2669
 
1537
- // Bed type - clean up multiline text
1538
- var bedType = "";
1539
- var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
2670
+ // Get bed type from DOM (for display)
2671
+ var bedType = '';
2672
+ var bedEl = row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
1540
2673
  if (bedEl) {
1541
- // Get first meaningful line
1542
- var bedText = bedEl.textContent || "";
2674
+ var bedText = bedEl.textContent || '';
1543
2675
  var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
1544
- // Find line with bed info
1545
2676
  for (var k = 0; k < bedLines.length; k++) {
1546
2677
  if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
1547
2678
  bedType = bedLines[k];
1548
2679
  break;
1549
2680
  }
1550
2681
  }
1551
- if (!bedType && bedLines.length > 0) {
1552
- bedType = bedLines[0];
1553
- }
2682
+ }
2683
+ // Fallback 1: use bed type from our map (captured from header rows)
2684
+ if (!bedType && bedTypeMap[roomTypeId]) {
2685
+ bedType = bedTypeMap[roomTypeId];
2686
+ }
2687
+ // Fallback 2: use bed count from data-fltrs
2688
+ if (!bedType && bedCount.length > 0) {
2689
+ bedType = bedCount.length === 1 ? bedCount[0] + ' bed' : bedCount.join(' or ') + ' beds';
1554
2690
  }
1555
2691
 
1556
- // Cancellation
1557
- var cancellation = "";
1558
- if (rowText.toLowerCase().indexOf("free cancellation") >= 0) {
1559
- cancellation = "Free cancellation";
1560
- } else if (rowText.toLowerCase().indexOf("non-refundable") >= 0) {
1561
- cancellation = "Non-refundable";
2692
+ // Get cancellation policy from row text
2693
+ var rowText = row.textContent || '';
2694
+ var rowTextLower = rowText.toLowerCase();
2695
+ var cancellation = '';
2696
+ if (rowTextLower.indexOf('free cancellation') >= 0) {
2697
+ cancellation = 'Free cancellation';
2698
+ } else if (rowTextLower.indexOf('non-refundable') >= 0) {
2699
+ cancellation = 'Non-refundable';
1562
2700
  }
1563
2701
 
1564
- // Breakfast
1565
- var breakfast = "";
1566
- if (rowText.toLowerCase().indexOf("breakfast included") >= 0) {
1567
- breakfast = "Breakfast included";
1568
- } else if (rowText.toLowerCase().indexOf("room only") >= 0) {
1569
- breakfast = "Room only";
2702
+ // Get breakfast info (prefer data-fltrs, fallback to DOM text)
2703
+ var breakfast = '';
2704
+ if (breakfastIncluded) {
2705
+ breakfast = 'Breakfast included';
2706
+ } else if (rowTextLower.indexOf('breakfast included') >= 0) {
2707
+ breakfast = 'Breakfast included';
2708
+ } else if (rowTextLower.indexOf('room only') >= 0) {
2709
+ breakfast = 'Room only';
1570
2710
  }
1571
2711
 
1572
- // Occupancy
2712
+ // Get occupancy
1573
2713
  var sleeps = null;
1574
- var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
2714
+ var occupancyEl = row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info');
1575
2715
  if (occupancyEl) {
1576
- var occMatch = occupancyEl.textContent.match(/(\\d+)/);
1577
- sleeps = occMatch ? parseInt(occMatch[1]) : null;
2716
+ var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
2717
+ sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
1578
2718
  }
1579
2719
 
2720
+ // Build features array
2721
+ var features = [];
2722
+ if (breakfast) features.push(breakfast);
2723
+ if (cancellation) features.push(cancellation);
2724
+
1580
2725
  roomOptions.push({
1581
- name: name,
2726
+ name: roomName,
1582
2727
  price: price,
1583
2728
  priceDisplay: priceDisplay,
1584
2729
  sleeps: sleeps,
1585
- features: [],
2730
+ features: features,
1586
2731
  bedType: bedType,
1587
2732
  cancellation: cancellation,
1588
- breakfast: breakfast
2733
+ breakfast: breakfast,
2734
+ roomTypeId: roomTypeId
1589
2735
  });
1590
2736
  }
1591
2737
 
1592
- // Strategy 2: If no rooms found, try data-block-id elements
2738
+ // ============================================================
2739
+ // STRATEGY 2: Fallback to DOM scraping if data attributes failed
2740
+ // ============================================================
2741
+ if (roomOptions.length === 0) {
2742
+ var seenRooms = {};
2743
+ var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
2744
+
2745
+ for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
2746
+ var roomLink = roomTypeLinks[i];
2747
+ var name = roomLink.textContent ? roomLink.textContent.trim() : '';
2748
+
2749
+ if (!name || name.length < 3 || seenRooms[name]) continue;
2750
+ seenRooms[name] = true;
2751
+
2752
+ var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
2753
+ var rowText = row ? row.textContent || '' : '';
2754
+
2755
+ // Try to find price
2756
+ var price = null;
2757
+ var priceDisplay = '';
2758
+ var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
2759
+ if (priceCell) {
2760
+ var match = (priceCell.textContent || '').match(/[\\$€£¥]\\s*([\\d,]+)/);
2761
+ if (match) {
2762
+ price = parseInt(match[1].replace(/,/g, ''), 10);
2763
+ priceDisplay = match[0];
2764
+ }
2765
+ }
2766
+
2767
+ // Bed type
2768
+ var bedType = '';
2769
+ var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
2770
+ if (bedEl) {
2771
+ var bedText = bedEl.textContent || '';
2772
+ var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
2773
+ for (var k = 0; k < bedLines.length; k++) {
2774
+ if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
2775
+ bedType = bedLines[k];
2776
+ break;
2777
+ }
2778
+ }
2779
+ }
2780
+
2781
+ // Cancellation and breakfast from text
2782
+ var rowTextLower = rowText.toLowerCase();
2783
+ var cancellation = '';
2784
+ if (rowTextLower.indexOf('free cancellation') >= 0) {
2785
+ cancellation = 'Free cancellation';
2786
+ } else if (rowTextLower.indexOf('non-refundable') >= 0) {
2787
+ cancellation = 'Non-refundable';
2788
+ }
2789
+
2790
+ var breakfast = '';
2791
+ if (rowTextLower.indexOf('breakfast included') >= 0) {
2792
+ breakfast = 'Breakfast included';
2793
+ } else if (rowTextLower.indexOf('room only') >= 0) {
2794
+ breakfast = 'Room only';
2795
+ }
2796
+
2797
+ // Occupancy
2798
+ var sleeps = null;
2799
+ var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
2800
+ if (occupancyEl) {
2801
+ var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
2802
+ sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
2803
+ }
2804
+
2805
+ roomOptions.push({
2806
+ name: name,
2807
+ price: price,
2808
+ priceDisplay: priceDisplay,
2809
+ sleeps: sleeps,
2810
+ features: [],
2811
+ bedType: bedType,
2812
+ cancellation: cancellation,
2813
+ breakfast: breakfast
2814
+ });
2815
+ }
2816
+ }
2817
+
2818
+ // ============================================================
2819
+ // STRATEGY 3: Last resort - look for any data-block-id elements
2820
+ // ============================================================
1593
2821
  if (roomOptions.length === 0) {
2822
+ var seenBlocks = {};
1594
2823
  var blocks = document.querySelectorAll('[data-block-id]');
1595
2824
  for (var i = 0; i < blocks.length && roomOptions.length < 10; i++) {
1596
2825
  var block = blocks[i];
1597
- var blockText = block.textContent || "";
2826
+ var blockId = block.getAttribute('data-block-id') || '';
2827
+ if (!blockId || blockId === 'header_survey') continue;
1598
2828
 
1599
- // Look for any room name pattern
2829
+ var blockText = block.textContent || '';
1600
2830
  var nameEl = block.querySelector('a[class*="room"], span[class*="room-name"]');
1601
- var name = nameEl ? nameEl.textContent.trim() : "";
2831
+ var name = nameEl ? (nameEl.textContent || '').trim() : '';
1602
2832
 
1603
2833
  if (!name) {
1604
- // Try to extract from block text
1605
2834
  var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
1606
- name = lines[0] ? lines[0].trim().slice(0, 50) : "";
2835
+ name = lines[0] ? lines[0].trim().slice(0, 50) : '';
1607
2836
  }
1608
2837
 
1609
- if (!name || name.length < 3 || seenRooms[name]) continue;
1610
- seenRooms[name] = true;
2838
+ if (!name || name.length < 3 || seenBlocks[name]) continue;
2839
+ seenBlocks[name] = true;
1611
2840
 
1612
2841
  var priceMatch = blockText.match(/[\\$€£¥]\\s*([\\d,]+)/);
1613
- var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, "")) : null;
2842
+ var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, ''), 10) : null;
1614
2843
 
1615
2844
  roomOptions.push({
1616
2845
  name: name,
1617
2846
  price: price,
1618
- priceDisplay: priceMatch ? priceMatch[0] : "",
2847
+ priceDisplay: priceMatch ? priceMatch[0] : '',
1619
2848
  sleeps: null,
1620
2849
  features: [],
1621
- bedType: "",
1622
- cancellation: "",
1623
- breakfast: ""
2850
+ bedType: '',
2851
+ cancellation: '',
2852
+ breakfast: ''
1624
2853
  });
1625
2854
  }
1626
2855
  }
1627
2856
 
1628
2857
  // Check for "no availability" message
1629
- var bodyText = document.body.textContent || "";
2858
+ var bodyText = document.body.textContent || '';
1630
2859
  var noAvailability =
1631
- bodyText.indexOf("no availability") >= 0 ||
1632
- bodyText.indexOf("sold out") >= 0 ||
1633
- bodyText.indexOf("no rooms available") >= 0 ||
1634
- bodyText.indexOf("fully booked") >= 0 ||
1635
- bodyText.indexOf("We have no availability") >= 0;
2860
+ bodyText.indexOf('no availability') >= 0 ||
2861
+ bodyText.indexOf('sold out') >= 0 ||
2862
+ bodyText.indexOf('no rooms available') >= 0 ||
2863
+ bodyText.indexOf('fully booked') >= 0 ||
2864
+ bodyText.indexOf('We have no availability') >= 0;
1636
2865
 
1637
2866
  return {
1638
2867
  hotelName: hotelName,
@@ -1641,6 +2870,45 @@ export class HotelBrowser {
1641
2870
  };
1642
2871
  })()
1643
2872
  `);
2873
+ // Enrich room options with facilities from GraphQL API
2874
+ // This provides detailed amenities (AC, TV, bathroom, etc.) per room type
2875
+ if (result.roomOptions.length > 0) {
2876
+ try {
2877
+ const hotelId = await this.extractHotelId();
2878
+ if (hotelId) {
2879
+ const facilitiesMap = await this.fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut);
2880
+ if (facilitiesMap.size > 0) {
2881
+ // Merge facilities into room options based on roomTypeId
2882
+ // Room type IDs are the first 9 digits of the full room ID (e.g., 652359501 -> 652359501)
2883
+ for (const room of result.roomOptions) {
2884
+ if (room.roomTypeId) {
2885
+ // Try exact match first
2886
+ let facilities = facilitiesMap.get(room.roomTypeId);
2887
+ // If not found, the GraphQL returns full room IDs (e.g., 652359501)
2888
+ // while our roomTypeId might be just the prefix
2889
+ if (!facilities) {
2890
+ // Find a matching key that starts with our roomTypeId
2891
+ for (const [key, value] of facilitiesMap) {
2892
+ if (key.startsWith(room.roomTypeId) || room.roomTypeId.startsWith(key)) {
2893
+ facilities = value;
2894
+ break;
2895
+ }
2896
+ }
2897
+ }
2898
+ if (facilities) {
2899
+ room.amenities = facilities;
2900
+ }
2901
+ }
2902
+ }
2903
+ logger.debug({ enrichedRooms: result.roomOptions.filter(r => r.amenities).length }, 'Enriched room options with GraphQL facilities');
2904
+ }
2905
+ }
2906
+ }
2907
+ catch (error) {
2908
+ // Non-fatal: continue without facilities enrichment
2909
+ logger.debug({ error }, 'Failed to enrich rooms with GraphQL facilities');
2910
+ }
2911
+ }
1644
2912
  // Determine availability and lowest price
1645
2913
  const available = result.roomOptions.length > 0 && !result.noAvailabilityDetected;
1646
2914
  const prices = result.roomOptions
@@ -1708,52 +2976,112 @@ export class HotelBrowser {
1708
2976
  catch { }
1709
2977
  await this.page.keyboard.press("Escape");
1710
2978
  await this.page.waitForTimeout(500);
1711
- // Get overall rating info from main page before opening modal
1712
- const mainPageData = await this.page.evaluate(`
1713
- (function() {
1714
- var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
1715
-
1716
- // Hotel name
1717
- var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
1718
- results.hotelName = nameEl?.textContent?.trim() || '';
1719
-
1720
- // Overall rating and total reviews from review-score-component
1721
- var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
1722
- if (scoreComponent) {
1723
- var text = scoreComponent.textContent || '';
1724
- // Extract score (e.g., "Scored 9.1 9.1..." -> 9.1)
1725
- var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
1726
- if (scoreMatch) {
1727
- results.overallRating = parseFloat(scoreMatch[1]);
1728
- }
1729
- // Extract total reviews (e.g., "1,043 reviews")
1730
- var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
1731
- if (reviewCountMatch) {
1732
- results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
1733
- }
1734
- }
1735
-
1736
- // Rating breakdown categories
1737
- var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
1738
- breakdownEls.forEach(function(el) {
1739
- var text = el.textContent?.trim() || '';
1740
- var parts = text.split(/\\s+/);
1741
- if (parts.length >= 2) {
1742
- var score = parseFloat(parts[parts.length - 1]);
1743
- var category = parts.slice(0, -1).join(' ').toLowerCase();
1744
- if (category.includes('staff')) results.breakdown.staff = score;
1745
- else if (category.includes('facilities')) results.breakdown.facilities = score;
1746
- else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
1747
- else if (category.includes('comfort')) results.breakdown.comfort = score;
1748
- else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
1749
- else if (category.includes('location')) results.breakdown.location = score;
1750
- else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
2979
+ // Try API extraction first for basic review data
2980
+ // API provides: hotel name, overall rating, rating breakdown, and featured reviews
2981
+ // Note: API reviews are limited to what's in cache (~6-10 reviews), sorted by newest
2982
+ const apiData = await this.extractReviewsFromAPI();
2983
+ // Determine if we can use API data directly or need DOM fallback
2984
+ // Use API if: we have enough reviews AND no special sorting/filtering is requested
2985
+ const canUseApiOnly = apiData &&
2986
+ apiData.reviews.length >= limit &&
2987
+ sortBy === "recent" &&
2988
+ !filterBy;
2989
+ if (canUseApiOnly) {
2990
+ logger.debug("Using API extraction for reviews (sufficient data, no filters)");
2991
+ const reviewsResult = {
2992
+ hotelName: apiData.hotelName,
2993
+ overallRating: apiData.overallRating,
2994
+ totalReviews: apiData.totalReviews,
2995
+ ratingBreakdown: apiData.ratingBreakdown,
2996
+ reviews: apiData.reviews.slice(0, limit),
2997
+ url: cleanUrl,
2998
+ };
2999
+ await this.saveSession();
3000
+ return reviewsResult;
3001
+ }
3002
+ // Use API data for metadata if available, but get reviews from DOM
3003
+ // This gives us accurate rating breakdown from API + more reviews from DOM
3004
+ const baseData = apiData || {
3005
+ hotelName: '',
3006
+ overallRating: null,
3007
+ totalReviews: 0,
3008
+ ratingBreakdown: {
3009
+ staff: null,
3010
+ facilities: null,
3011
+ cleanliness: null,
3012
+ comfort: null,
3013
+ valueForMoney: null,
3014
+ location: null,
3015
+ freeWifi: null,
3016
+ },
3017
+ };
3018
+ // If API didn't give us hotel info, get it from DOM
3019
+ if (!baseData.hotelName) {
3020
+ const mainPageData = await this.page.evaluate(`
3021
+ (function() {
3022
+ var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
3023
+
3024
+ // Hotel name
3025
+ var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
3026
+ results.hotelName = nameEl?.textContent?.trim() || '';
3027
+
3028
+ // Overall rating and total reviews from review-score-component
3029
+ var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
3030
+ if (scoreComponent) {
3031
+ var text = scoreComponent.textContent || '';
3032
+ var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
3033
+ if (scoreMatch) {
3034
+ results.overallRating = parseFloat(scoreMatch[1]);
3035
+ }
3036
+ var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
3037
+ if (reviewCountMatch) {
3038
+ results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
3039
+ }
1751
3040
  }
1752
- });
1753
-
1754
- return results;
1755
- })()
1756
- `);
3041
+
3042
+ // Rating breakdown categories
3043
+ var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
3044
+ breakdownEls.forEach(function(el) {
3045
+ var text = el.textContent?.trim() || '';
3046
+ var parts = text.split(/\\s+/);
3047
+ if (parts.length >= 2) {
3048
+ var score = parseFloat(parts[parts.length - 1]);
3049
+ var category = parts.slice(0, -1).join(' ').toLowerCase();
3050
+ if (category.includes('staff')) results.breakdown.staff = score;
3051
+ else if (category.includes('facilities')) results.breakdown.facilities = score;
3052
+ else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
3053
+ else if (category.includes('comfort')) results.breakdown.comfort = score;
3054
+ else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
3055
+ else if (category.includes('location')) results.breakdown.location = score;
3056
+ else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
3057
+ }
3058
+ });
3059
+
3060
+ return results;
3061
+ })()
3062
+ `);
3063
+ baseData.hotelName = mainPageData.hotelName;
3064
+ if (baseData.overallRating === null)
3065
+ baseData.overallRating = mainPageData.overallRating;
3066
+ if (baseData.totalReviews === 0)
3067
+ baseData.totalReviews = mainPageData.totalReviews;
3068
+ // Fill in missing rating breakdown from DOM
3069
+ if (baseData.ratingBreakdown.staff === null)
3070
+ baseData.ratingBreakdown.staff = mainPageData.breakdown.staff ?? null;
3071
+ if (baseData.ratingBreakdown.facilities === null)
3072
+ baseData.ratingBreakdown.facilities = mainPageData.breakdown.facilities ?? null;
3073
+ if (baseData.ratingBreakdown.cleanliness === null)
3074
+ baseData.ratingBreakdown.cleanliness = mainPageData.breakdown.cleanliness ?? null;
3075
+ if (baseData.ratingBreakdown.comfort === null)
3076
+ baseData.ratingBreakdown.comfort = mainPageData.breakdown.comfort ?? null;
3077
+ if (baseData.ratingBreakdown.valueForMoney === null)
3078
+ baseData.ratingBreakdown.valueForMoney = mainPageData.breakdown.valueForMoney ?? null;
3079
+ if (baseData.ratingBreakdown.location === null)
3080
+ baseData.ratingBreakdown.location = mainPageData.breakdown.location ?? null;
3081
+ if (baseData.ratingBreakdown.freeWifi === null)
3082
+ baseData.ratingBreakdown.freeWifi = mainPageData.breakdown.freeWifi ?? null;
3083
+ }
3084
+ logger.debug("Using DOM extraction for reviews (need more reviews or filters)");
1757
3085
  // Click "Read all reviews" button to open reviews modal
1758
3086
  const readAllBtn = await this.page.$('[data-testid="fr-read-all-reviews"], [data-testid="review-score-read-all"]');
1759
3087
  if (!readAllBtn) {
@@ -1922,20 +3250,20 @@ export class HotelBrowser {
1922
3250
  return reviews;
1923
3251
  })()
1924
3252
  `);
1925
- // Build rating breakdown with proper null handling
3253
+ // Build rating breakdown from baseData (populated from API or DOM)
1926
3254
  const ratingBreakdown = {
1927
- staff: mainPageData.breakdown.staff ?? null,
1928
- facilities: mainPageData.breakdown.facilities ?? null,
1929
- cleanliness: mainPageData.breakdown.cleanliness ?? null,
1930
- comfort: mainPageData.breakdown.comfort ?? null,
1931
- valueForMoney: mainPageData.breakdown.valueForMoney ?? null,
1932
- location: mainPageData.breakdown.location ?? null,
1933
- freeWifi: mainPageData.breakdown.freeWifi ?? null,
3255
+ staff: baseData.ratingBreakdown.staff,
3256
+ facilities: baseData.ratingBreakdown.facilities,
3257
+ cleanliness: baseData.ratingBreakdown.cleanliness,
3258
+ comfort: baseData.ratingBreakdown.comfort,
3259
+ valueForMoney: baseData.ratingBreakdown.valueForMoney,
3260
+ location: baseData.ratingBreakdown.location,
3261
+ freeWifi: baseData.ratingBreakdown.freeWifi,
1934
3262
  };
1935
3263
  const reviewsResult = {
1936
- hotelName: mainPageData.hotelName,
1937
- overallRating: mainPageData.overallRating,
1938
- totalReviews: mainPageData.totalReviews,
3264
+ hotelName: baseData.hotelName,
3265
+ overallRating: baseData.overallRating,
3266
+ totalReviews: baseData.totalReviews,
1939
3267
  ratingBreakdown,
1940
3268
  reviews: reviews.slice(0, limit),
1941
3269
  url: cleanUrl,