hotelzero 1.13.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +247 -10
- package/dist/browser.d.ts +96 -0
- package/dist/browser.js +1471 -143
- package/dist/debug-checkin.d.ts +1 -0
- package/dist/debug-checkin.js +31 -0
- package/dist/debug-extraction.d.ts +1 -0
- package/dist/debug-extraction.js +49 -0
- package/dist/debug-search.d.ts +1 -0
- package/dist/debug-search.js +47 -0
- package/dist/explore-cache.d.ts +1 -0
- package/dist/explore-cache.js +78 -0
- package/dist/index.js +2 -2
- package/dist/intercept-test.d.ts +1 -0
- package/dist/intercept-test.js +161 -0
- package/dist/verify-api-extraction.d.ts +1 -0
- package/dist/verify-api-extraction.js +116 -0
- package/package.json +1 -1
package/dist/browser.js
CHANGED
|
@@ -39,6 +39,7 @@ export const ErrorCodes = {
|
|
|
39
39
|
NETWORK_ERROR: "NETWORK_ERROR",
|
|
40
40
|
TIMEOUT: "TIMEOUT",
|
|
41
41
|
BLOCKED: "BLOCKED",
|
|
42
|
+
INVALID_PARAMS: "INVALID_PARAMS",
|
|
42
43
|
};
|
|
43
44
|
const DEFAULT_RETRY_CONFIG = {
|
|
44
45
|
maxRetries: 3,
|
|
@@ -763,8 +764,15 @@ export class HotelBrowser {
|
|
|
763
764
|
// Scroll to load more results (pass limit to control how many to load)
|
|
764
765
|
const targetResults = params.limit || 25;
|
|
765
766
|
await this.scrollToLoadMore(targetResults);
|
|
766
|
-
//
|
|
767
|
-
let hotels = await this.
|
|
767
|
+
// Try API-based extraction first (more reliable), fall back to DOM scraping
|
|
768
|
+
let hotels = await this.extractHotelsFromAPI();
|
|
769
|
+
if (hotels.length === 0) {
|
|
770
|
+
logger.debug("API extraction returned no results, falling back to DOM scraping");
|
|
771
|
+
hotels = await this.extractHotelDetails();
|
|
772
|
+
}
|
|
773
|
+
else {
|
|
774
|
+
logger.debug({ hotelCount: hotels.length }, "Hotels extracted from API cache");
|
|
775
|
+
}
|
|
768
776
|
logger.debug({ hotelCount: hotels.length }, "Hotels extracted from page");
|
|
769
777
|
// Apply limit to cap results
|
|
770
778
|
if (params.limit && params.limit > 0) {
|
|
@@ -786,6 +794,287 @@ export class HotelBrowser {
|
|
|
786
794
|
logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Search attempt failed, retrying");
|
|
787
795
|
});
|
|
788
796
|
}
|
|
797
|
+
/**
|
|
798
|
+
* Search for a specific hotel's rate via the search API.
|
|
799
|
+
* This is 100% API-based (no HTML scraping) and returns detailed rate info
|
|
800
|
+
* including room type, meal plan, cancellation policy, and bed configuration.
|
|
801
|
+
*
|
|
802
|
+
* The method searches for the hotel by name and extracts rate details from
|
|
803
|
+
* the Apollo cache's `blocks` array and `matchingUnitConfigurations`.
|
|
804
|
+
*
|
|
805
|
+
* @param hotelUrl - The hotel's URL or name/slug (e.g., "la-sanguine" or full URL)
|
|
806
|
+
* @param checkIn - Check-in date (YYYY-MM-DD)
|
|
807
|
+
* @param checkOut - Check-out date (YYYY-MM-DD)
|
|
808
|
+
* @param guests - Number of guests
|
|
809
|
+
* @param rooms - Number of rooms
|
|
810
|
+
* @param filters - Optional rate filters (breakfast, free cancellation, bed type)
|
|
811
|
+
* @returns Rate details or null if hotel not found in results
|
|
812
|
+
*/
|
|
813
|
+
async searchHotelRates(hotelUrl, checkIn, checkOut, guests = 2, rooms = 1, filters) {
|
|
814
|
+
if (!this.page) {
|
|
815
|
+
throw new HotelSearchError("Browser not initialized. Call init() first.", ErrorCodes.BROWSER_NOT_INITIALIZED, false);
|
|
816
|
+
}
|
|
817
|
+
// Extract hotel name from URL for search query
|
|
818
|
+
const hotelName = this.extractHotelNameFromUrl(hotelUrl);
|
|
819
|
+
if (!hotelName) {
|
|
820
|
+
throw new HotelSearchError("Could not extract hotel name from URL", ErrorCodes.INVALID_PARAMS, false);
|
|
821
|
+
}
|
|
822
|
+
logger.info({ hotelName, checkIn, checkOut, guests, rooms, hasFilters: !!filters }, "Searching for hotel rate via API");
|
|
823
|
+
// Build search URL with hotel name as destination
|
|
824
|
+
const searchFilters = {};
|
|
825
|
+
// Apply rate-specific filters
|
|
826
|
+
if (filters?.breakfast) {
|
|
827
|
+
searchFilters.breakfast = true;
|
|
828
|
+
}
|
|
829
|
+
if (filters?.freeCancellation) {
|
|
830
|
+
searchFilters.freeCancellation = true;
|
|
831
|
+
}
|
|
832
|
+
const searchParams = {
|
|
833
|
+
destination: hotelName.replace(/-/g, " "), // "la-sanguine" -> "la sanguine"
|
|
834
|
+
checkIn,
|
|
835
|
+
checkOut,
|
|
836
|
+
guests,
|
|
837
|
+
rooms,
|
|
838
|
+
limit: 10, // Small limit since we're looking for a specific hotel
|
|
839
|
+
};
|
|
840
|
+
const url = this.buildBookingUrl(searchParams, searchFilters);
|
|
841
|
+
logger.debug({ url }, "Hotel rate search URL");
|
|
842
|
+
return await retryWithBackoff(async () => {
|
|
843
|
+
await this.enforceRateLimit();
|
|
844
|
+
try {
|
|
845
|
+
await this.page.goto(url, {
|
|
846
|
+
waitUntil: "networkidle",
|
|
847
|
+
timeout: 30000,
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
catch (error) {
|
|
851
|
+
const err = error;
|
|
852
|
+
if (err.message.includes("timeout") || err.message.includes("Timeout")) {
|
|
853
|
+
throw new HotelSearchError("Page load timed out. The server may be slow or unavailable.", ErrorCodes.TIMEOUT, true);
|
|
854
|
+
}
|
|
855
|
+
throw new HotelSearchError(`Navigation failed: ${err.message}`, ErrorCodes.NAVIGATION_FAILED, true);
|
|
856
|
+
}
|
|
857
|
+
await this.page.waitForTimeout(2000);
|
|
858
|
+
await this.checkForBlocking();
|
|
859
|
+
await this.dismissPopups();
|
|
860
|
+
// Extract rate details from Apollo cache
|
|
861
|
+
const rateResult = await this.extractHotelRateFromAPI(hotelName, filters);
|
|
862
|
+
if (rateResult) {
|
|
863
|
+
// Populate search params in result
|
|
864
|
+
rateResult.checkIn = checkIn;
|
|
865
|
+
rateResult.checkOut = checkOut;
|
|
866
|
+
rateResult.guests = guests;
|
|
867
|
+
rateResult.rooms = rooms;
|
|
868
|
+
logger.info({ hotelName: rateResult.hotelName, price: rateResult.price, roomName: rateResult.roomName }, "Hotel rate found via API");
|
|
869
|
+
await this.saveSession();
|
|
870
|
+
return rateResult;
|
|
871
|
+
}
|
|
872
|
+
logger.warn({ hotelName }, "Hotel not found in search results");
|
|
873
|
+
return null;
|
|
874
|
+
}, DEFAULT_RETRY_CONFIG, (attempt, error, delayMs) => {
|
|
875
|
+
logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Hotel rate search failed, retrying");
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Extract hotel name/slug from a Booking.com URL.
|
|
880
|
+
* Handles formats like:
|
|
881
|
+
* - https://www.booking.com/hotel/fr/la-sanguine.html
|
|
882
|
+
* - /hotel/fr/la-sanguine.html
|
|
883
|
+
* - la-sanguine
|
|
884
|
+
*/
|
|
885
|
+
extractHotelNameFromUrl(urlOrName) {
|
|
886
|
+
// If it's just a name/slug (no slashes), return as-is
|
|
887
|
+
if (!urlOrName.includes("/")) {
|
|
888
|
+
return urlOrName.replace(/\.html$/, "");
|
|
889
|
+
}
|
|
890
|
+
// Extract from URL pattern: /hotel/{country}/{name}.html
|
|
891
|
+
const match = urlOrName.match(/\/hotel\/[a-z]{2}\/([^/.]+)/i);
|
|
892
|
+
if (match && match[1]) {
|
|
893
|
+
return match[1];
|
|
894
|
+
}
|
|
895
|
+
// Fallback: try to get the last path segment
|
|
896
|
+
const parts = urlOrName.split("/").filter(Boolean);
|
|
897
|
+
const lastPart = parts[parts.length - 1];
|
|
898
|
+
return lastPart?.replace(/\.html$/, "") || null;
|
|
899
|
+
}
|
|
900
|
+
/**
|
|
901
|
+
* Extract hotel rate details from Apollo cache.
|
|
902
|
+
* Finds the hotel matching the given name and extracts rate info from
|
|
903
|
+
* the `blocks` array and `matchingUnitConfigurations`.
|
|
904
|
+
*/
|
|
905
|
+
async extractHotelRateFromAPI(hotelSlug, filters) {
|
|
906
|
+
if (!this.page)
|
|
907
|
+
return null;
|
|
908
|
+
// Bed type mapping for filter matching
|
|
909
|
+
const bedTypeMap = {
|
|
910
|
+
single: 1,
|
|
911
|
+
twin: 2,
|
|
912
|
+
double: 3,
|
|
913
|
+
queen: 5,
|
|
914
|
+
king: 6,
|
|
915
|
+
};
|
|
916
|
+
const targetBedType = filters?.bedType ? bedTypeMap[filters.bedType] : undefined;
|
|
917
|
+
return await this.page.evaluate(({ hotelSlug, targetBedType }) => {
|
|
918
|
+
try {
|
|
919
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
920
|
+
const w = window;
|
|
921
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
922
|
+
if (!cache)
|
|
923
|
+
return null;
|
|
924
|
+
const rootQuery = cache["ROOT_QUERY"];
|
|
925
|
+
if (!rootQuery)
|
|
926
|
+
return null;
|
|
927
|
+
const searchQueries = rootQuery.searchQueries;
|
|
928
|
+
if (!searchQueries)
|
|
929
|
+
return null;
|
|
930
|
+
const searchKey = Object.keys(searchQueries).find((k) => k.startsWith("search("));
|
|
931
|
+
if (!searchKey)
|
|
932
|
+
return null;
|
|
933
|
+
const searchOutput = searchQueries[searchKey];
|
|
934
|
+
const searchResults = searchOutput?.results;
|
|
935
|
+
if (!searchResults || !Array.isArray(searchResults))
|
|
936
|
+
return null;
|
|
937
|
+
// Find hotel matching the slug (check pageName)
|
|
938
|
+
const normalizedSlug = hotelSlug.toLowerCase().replace(/-/g, "");
|
|
939
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
940
|
+
const hotel = searchResults.find((h) => {
|
|
941
|
+
if (!h)
|
|
942
|
+
return false;
|
|
943
|
+
const pageName = h.basicPropertyData?.pageName?.toLowerCase()?.replace(/-/g, "") || "";
|
|
944
|
+
const displayName = h.displayName?.text?.toLowerCase()?.replace(/\s+/g, "") || "";
|
|
945
|
+
return pageName.includes(normalizedSlug) ||
|
|
946
|
+
normalizedSlug.includes(pageName) ||
|
|
947
|
+
displayName.includes(normalizedSlug) ||
|
|
948
|
+
normalizedSlug.includes(displayName);
|
|
949
|
+
});
|
|
950
|
+
if (!hotel)
|
|
951
|
+
return null;
|
|
952
|
+
// Extract basic hotel info
|
|
953
|
+
const hotelName = hotel.displayName?.text || hotel.basicPropertyData?.pageName || "Unknown";
|
|
954
|
+
const pageName = hotel.basicPropertyData?.pageName || "";
|
|
955
|
+
const countryCode = hotel.basicPropertyData?.location?.countryCode || "";
|
|
956
|
+
const hotelId = hotel.basicPropertyData?.id?.toString() || "";
|
|
957
|
+
const hotelUrl = countryCode && pageName
|
|
958
|
+
? `https://www.booking.com/hotel/${countryCode}/${pageName}.html`
|
|
959
|
+
: "";
|
|
960
|
+
// Get price info
|
|
961
|
+
const priceInfo = hotel.priceDisplayInfoIrene;
|
|
962
|
+
const displayPrice = priceInfo?.displayPrice?.amountPerStay;
|
|
963
|
+
const price = displayPrice?.amountUnformatted ?? 0;
|
|
964
|
+
const priceDisplay = displayPrice?.amountRounded || displayPrice?.amount || "$0";
|
|
965
|
+
const currency = displayPrice?.currency || "USD";
|
|
966
|
+
const pricePerNight = priceInfo?.averagePricePerNight?.amountUnformatted ?? 0;
|
|
967
|
+
// Get blocks array (rate options)
|
|
968
|
+
const blocks = hotel.blocks;
|
|
969
|
+
if (!blocks || !Array.isArray(blocks) || blocks.length === 0) {
|
|
970
|
+
// No blocks, return basic info without detailed rate
|
|
971
|
+
return {
|
|
972
|
+
hotelName,
|
|
973
|
+
hotelId,
|
|
974
|
+
hotelUrl,
|
|
975
|
+
checkIn: "",
|
|
976
|
+
checkOut: "",
|
|
977
|
+
guests: 0,
|
|
978
|
+
rooms: 0,
|
|
979
|
+
roomName: "Unknown",
|
|
980
|
+
roomId: "",
|
|
981
|
+
price,
|
|
982
|
+
priceDisplay,
|
|
983
|
+
pricePerNight,
|
|
984
|
+
currency,
|
|
985
|
+
mealPlan: "Unknown",
|
|
986
|
+
cancellationPolicy: "Unknown",
|
|
987
|
+
freeCancellationUntil: null,
|
|
988
|
+
bedType: "Unknown",
|
|
989
|
+
bedCount: 0,
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
// Get the first (cheapest/best match) block
|
|
993
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
994
|
+
const block = blocks[0];
|
|
995
|
+
const blockId = block.blockId || {};
|
|
996
|
+
const roomId = blockId.roomId?.toString() || "";
|
|
997
|
+
const mealPlanId = blockId.mealPlanId;
|
|
998
|
+
// Meal plan mapping
|
|
999
|
+
const mealPlanNames = {
|
|
1000
|
+
0: "Room only",
|
|
1001
|
+
1: "Breakfast included",
|
|
1002
|
+
2: "Half board",
|
|
1003
|
+
3: "Full board",
|
|
1004
|
+
4: "All-inclusive",
|
|
1005
|
+
};
|
|
1006
|
+
const mealPlan = mealPlanNames[mealPlanId] || "Room only";
|
|
1007
|
+
// Cancellation policy
|
|
1008
|
+
const freeCancellationUntil = block.freeCancellationUntil || null;
|
|
1009
|
+
const cancellationPolicy = freeCancellationUntil
|
|
1010
|
+
? `Free cancellation until ${freeCancellationUntil}`
|
|
1011
|
+
: "Non-refundable";
|
|
1012
|
+
// Get room name and bed configuration from matchingUnitConfigurations
|
|
1013
|
+
let roomName = "Standard Room";
|
|
1014
|
+
let bedType = "Unknown";
|
|
1015
|
+
let bedCount = 0;
|
|
1016
|
+
const unitConfigs = hotel.matchingUnitConfigurations?.unitConfigurations;
|
|
1017
|
+
if (unitConfigs && Array.isArray(unitConfigs)) {
|
|
1018
|
+
// If filtering by bed type, try to find matching config
|
|
1019
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1020
|
+
let matchingConfig = unitConfigs[0];
|
|
1021
|
+
if (targetBedType !== undefined) {
|
|
1022
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1023
|
+
const bedMatch = unitConfigs.find((config) => {
|
|
1024
|
+
const beds = config.bedConfigurations?.[0]?.beds || [];
|
|
1025
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1026
|
+
return beds.some((bed) => bed.type === targetBedType);
|
|
1027
|
+
});
|
|
1028
|
+
if (bedMatch) {
|
|
1029
|
+
matchingConfig = bedMatch;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
if (matchingConfig) {
|
|
1033
|
+
roomName = matchingConfig.name || roomName;
|
|
1034
|
+
const beds = matchingConfig.bedConfigurations?.[0]?.beds;
|
|
1035
|
+
if (beds && beds.length > 0) {
|
|
1036
|
+
const firstBed = beds[0];
|
|
1037
|
+
bedCount = firstBed.count || 1;
|
|
1038
|
+
// Reverse map bed type
|
|
1039
|
+
const bedTypeNames = {
|
|
1040
|
+
1: "Single",
|
|
1041
|
+
2: "Twin",
|
|
1042
|
+
3: "Double",
|
|
1043
|
+
4: "Large Double",
|
|
1044
|
+
5: "Queen",
|
|
1045
|
+
6: "King",
|
|
1046
|
+
7: "Super King",
|
|
1047
|
+
};
|
|
1048
|
+
bedType = bedTypeNames[firstBed.type] || "Unknown";
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
return {
|
|
1053
|
+
hotelName,
|
|
1054
|
+
hotelId,
|
|
1055
|
+
hotelUrl,
|
|
1056
|
+
checkIn: "", // Will be filled by caller
|
|
1057
|
+
checkOut: "",
|
|
1058
|
+
guests: 0,
|
|
1059
|
+
rooms: 0,
|
|
1060
|
+
roomName,
|
|
1061
|
+
roomId,
|
|
1062
|
+
price,
|
|
1063
|
+
priceDisplay,
|
|
1064
|
+
pricePerNight,
|
|
1065
|
+
currency,
|
|
1066
|
+
mealPlan,
|
|
1067
|
+
cancellationPolicy,
|
|
1068
|
+
freeCancellationUntil,
|
|
1069
|
+
bedType,
|
|
1070
|
+
bedCount,
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
1073
|
+
catch {
|
|
1074
|
+
return null;
|
|
1075
|
+
}
|
|
1076
|
+
}, { hotelSlug, targetBedType });
|
|
1077
|
+
}
|
|
789
1078
|
async dismissPopups() {
|
|
790
1079
|
if (!this.page)
|
|
791
1080
|
return;
|
|
@@ -989,6 +1278,796 @@ export class HotelBrowser {
|
|
|
989
1278
|
return results;
|
|
990
1279
|
});
|
|
991
1280
|
}
|
|
1281
|
+
/**
|
|
1282
|
+
* Extract hotel data from Booking.com's Apollo GraphQL cache.
|
|
1283
|
+
* This is more reliable than DOM scraping as it uses structured data.
|
|
1284
|
+
* Falls back gracefully if the cache structure changes.
|
|
1285
|
+
*/
|
|
1286
|
+
async extractHotelsFromAPI() {
|
|
1287
|
+
if (!this.page)
|
|
1288
|
+
return [];
|
|
1289
|
+
return await this.page.evaluate(() => {
|
|
1290
|
+
try {
|
|
1291
|
+
// Access the Apollo cache embedded in the page
|
|
1292
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1293
|
+
const w = window;
|
|
1294
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1295
|
+
if (!cache)
|
|
1296
|
+
return [];
|
|
1297
|
+
const rootQuery = cache['ROOT_QUERY'];
|
|
1298
|
+
if (!rootQuery)
|
|
1299
|
+
return [];
|
|
1300
|
+
// searchQueries contains the search results
|
|
1301
|
+
const searchQueries = rootQuery.searchQueries;
|
|
1302
|
+
if (!searchQueries)
|
|
1303
|
+
return [];
|
|
1304
|
+
// Find the search key (complex key with query parameters)
|
|
1305
|
+
const searchKey = Object.keys(searchQueries).find(k => k.startsWith('search('));
|
|
1306
|
+
if (!searchKey)
|
|
1307
|
+
return [];
|
|
1308
|
+
const searchOutput = searchQueries[searchKey];
|
|
1309
|
+
if (!searchOutput)
|
|
1310
|
+
return [];
|
|
1311
|
+
// Get the results array
|
|
1312
|
+
const searchResults = searchOutput.results;
|
|
1313
|
+
if (!searchResults || !Array.isArray(searchResults))
|
|
1314
|
+
return [];
|
|
1315
|
+
const results = [];
|
|
1316
|
+
for (const hotel of searchResults) {
|
|
1317
|
+
if (!hotel)
|
|
1318
|
+
continue;
|
|
1319
|
+
// Skip sponsored/native ad listings
|
|
1320
|
+
const persuasion = hotel.persuasion;
|
|
1321
|
+
if (persuasion?.showNativeAdLabel || persuasion?.nativeAdId) {
|
|
1322
|
+
continue;
|
|
1323
|
+
}
|
|
1324
|
+
// Extract name
|
|
1325
|
+
const name = hotel.displayName?.text || hotel.basicPropertyData?.pageName || 'Unknown';
|
|
1326
|
+
// Extract price
|
|
1327
|
+
let price = null;
|
|
1328
|
+
let priceDisplay = 'Price not shown';
|
|
1329
|
+
const priceInfo = hotel.priceDisplayInfoIrene?.displayPrice?.amountPerStay;
|
|
1330
|
+
if (priceInfo) {
|
|
1331
|
+
priceDisplay = priceInfo.amountRounded || priceInfo.amount || priceDisplay;
|
|
1332
|
+
price = typeof priceInfo.amountUnformatted === 'number' ? priceInfo.amountUnformatted : null;
|
|
1333
|
+
}
|
|
1334
|
+
// Extract rating and reviews from basicPropertyData.reviews
|
|
1335
|
+
let rating = null;
|
|
1336
|
+
let ratingText = '';
|
|
1337
|
+
let reviewCount = null;
|
|
1338
|
+
const reviews = hotel.basicPropertyData?.reviews;
|
|
1339
|
+
if (reviews) {
|
|
1340
|
+
rating = typeof reviews.totalScore === 'number' ? reviews.totalScore : null;
|
|
1341
|
+
ratingText = reviews.totalScoreTextTag?.translation || '';
|
|
1342
|
+
reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
|
|
1343
|
+
}
|
|
1344
|
+
// Extract location
|
|
1345
|
+
const location = hotel.location?.displayLocation || '';
|
|
1346
|
+
const distanceToCenter = hotel.location?.mainDistance || '';
|
|
1347
|
+
// Build thumbnail URL
|
|
1348
|
+
let thumbnailUrl = null;
|
|
1349
|
+
const mainPhoto = hotel.basicPropertyData?.photos?.main;
|
|
1350
|
+
if (mainPhoto) {
|
|
1351
|
+
const relativeUrl = mainPhoto.highResJpegUrl?.relativeUrl ||
|
|
1352
|
+
mainPhoto.highResUrl?.relativeUrl ||
|
|
1353
|
+
mainPhoto.lowResJpegUrl?.relativeUrl;
|
|
1354
|
+
if (relativeUrl) {
|
|
1355
|
+
thumbnailUrl = `https://cf.bstatic.com${relativeUrl}`;
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
// Build link with country code (required for API data to load on detail page)
|
|
1359
|
+
let link = '';
|
|
1360
|
+
const pageName = hotel.basicPropertyData?.pageName;
|
|
1361
|
+
const countryCode = hotel.basicPropertyData?.location?.countryCode;
|
|
1362
|
+
if (pageName && countryCode) {
|
|
1363
|
+
link = `https://www.booking.com/hotel/${countryCode}/${pageName}.html`;
|
|
1364
|
+
}
|
|
1365
|
+
else if (pageName) {
|
|
1366
|
+
// Fallback without country code (less reliable for API extraction)
|
|
1367
|
+
link = `https://www.booking.com/hotel/${pageName}.html`;
|
|
1368
|
+
}
|
|
1369
|
+
// Extract amenities and highlights
|
|
1370
|
+
const amenities = [];
|
|
1371
|
+
const highlights = [];
|
|
1372
|
+
// Sustainability
|
|
1373
|
+
if (hotel.propertySustainability?.isSustainable) {
|
|
1374
|
+
amenities.push('Sustainable');
|
|
1375
|
+
}
|
|
1376
|
+
// Policies
|
|
1377
|
+
const policies = hotel.policies;
|
|
1378
|
+
if (policies?.showFreeCancellation) {
|
|
1379
|
+
highlights.push('Free Cancellation');
|
|
1380
|
+
}
|
|
1381
|
+
if (policies?.showNoPrepayment) {
|
|
1382
|
+
highlights.push('No Prepayment');
|
|
1383
|
+
}
|
|
1384
|
+
if (policies?.showPetsAllowedForFree) {
|
|
1385
|
+
amenities.push('Pet Friendly');
|
|
1386
|
+
}
|
|
1387
|
+
// Meal plan
|
|
1388
|
+
if (hotel.mealPlanIncluded?.mealPlanType) {
|
|
1389
|
+
amenities.push('Breakfast Included');
|
|
1390
|
+
}
|
|
1391
|
+
// Extract availability info
|
|
1392
|
+
let availability = null;
|
|
1393
|
+
const soldOutInfo = hotel.soldOutInfo;
|
|
1394
|
+
if (soldOutInfo?.messages && soldOutInfo.messages.length > 0) {
|
|
1395
|
+
const msg = soldOutInfo.messages[0];
|
|
1396
|
+
if (msg?.text) {
|
|
1397
|
+
availability = msg.text;
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
results.push({
|
|
1401
|
+
name,
|
|
1402
|
+
price,
|
|
1403
|
+
priceDisplay,
|
|
1404
|
+
rating,
|
|
1405
|
+
ratingText,
|
|
1406
|
+
reviewCount,
|
|
1407
|
+
location,
|
|
1408
|
+
distanceToCenter,
|
|
1409
|
+
amenities,
|
|
1410
|
+
highlights,
|
|
1411
|
+
link,
|
|
1412
|
+
thumbnailUrl,
|
|
1413
|
+
availability,
|
|
1414
|
+
});
|
|
1415
|
+
}
|
|
1416
|
+
return results;
|
|
1417
|
+
}
|
|
1418
|
+
catch {
|
|
1419
|
+
// If anything goes wrong with API extraction, return empty to trigger fallback
|
|
1420
|
+
return [];
|
|
1421
|
+
}
|
|
1422
|
+
});
|
|
1423
|
+
}
|
|
1424
|
+
/**
|
|
1425
|
+
* Extract hotel details from Booking.com's Apollo GraphQL cache on a hotel detail page.
|
|
1426
|
+
* This is more reliable than DOM scraping as it uses structured data.
|
|
1427
|
+
* Returns null if extraction fails (triggering DOM fallback).
|
|
1428
|
+
*/
|
|
1429
|
+
async extractHotelDetailsFromAPI() {
|
|
1430
|
+
if (!this.page)
|
|
1431
|
+
return null;
|
|
1432
|
+
return await this.page.evaluate(() => {
|
|
1433
|
+
try {
|
|
1434
|
+
// Access the Apollo cache embedded in the page
|
|
1435
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1436
|
+
const w = window;
|
|
1437
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1438
|
+
if (!cache)
|
|
1439
|
+
return null;
|
|
1440
|
+
// Helper to resolve __ref pointers
|
|
1441
|
+
const resolveRef = (ref) => {
|
|
1442
|
+
if (ref && typeof ref === 'object' && '__ref' in ref) {
|
|
1443
|
+
return cache[ref.__ref];
|
|
1444
|
+
}
|
|
1445
|
+
return ref;
|
|
1446
|
+
};
|
|
1447
|
+
// Find the Property entry - it has a key like 'Property:{"id":6523595}'
|
|
1448
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1449
|
+
if (!propertyKey)
|
|
1450
|
+
return null;
|
|
1451
|
+
const property = cache[propertyKey];
|
|
1452
|
+
if (!property)
|
|
1453
|
+
return null;
|
|
1454
|
+
// Extract hotel ID from the property key
|
|
1455
|
+
const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1456
|
+
const hotelId = idMatch ? idMatch[1] : null;
|
|
1457
|
+
// Get BasicPropertyData for address and location
|
|
1458
|
+
const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
|
|
1459
|
+
const basicData = basicDataKey ? cache[basicDataKey] : null;
|
|
1460
|
+
// Extract name
|
|
1461
|
+
const name = property.name || basicData?.name || 'Unknown';
|
|
1462
|
+
// Extract rating and reviews from property.reviews
|
|
1463
|
+
let rating = null;
|
|
1464
|
+
let ratingText = '';
|
|
1465
|
+
let reviewCount = null;
|
|
1466
|
+
const reviews = property.reviews;
|
|
1467
|
+
if (reviews) {
|
|
1468
|
+
reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
|
|
1469
|
+
// Find the total score from questions array
|
|
1470
|
+
const questions = reviews.questions;
|
|
1471
|
+
if (Array.isArray(questions)) {
|
|
1472
|
+
const totalQ = questions.find((q) => q?.name === 'total');
|
|
1473
|
+
if (totalQ && typeof totalQ.score === 'number') {
|
|
1474
|
+
const score = totalQ.score;
|
|
1475
|
+
rating = score;
|
|
1476
|
+
// Generate rating text based on score
|
|
1477
|
+
if (score >= 9)
|
|
1478
|
+
ratingText = 'Superb';
|
|
1479
|
+
else if (score >= 8)
|
|
1480
|
+
ratingText = 'Very Good';
|
|
1481
|
+
else if (score >= 7)
|
|
1482
|
+
ratingText = 'Good';
|
|
1483
|
+
else if (score >= 6)
|
|
1484
|
+
ratingText = 'Pleasant';
|
|
1485
|
+
else
|
|
1486
|
+
ratingText = 'Review score';
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
// Extract address from BasicPropertyData
|
|
1491
|
+
const address = basicData?.location?.formattedAddress ||
|
|
1492
|
+
basicData?.location?.formattedAddressShort || '';
|
|
1493
|
+
// Extract star rating from accommodation type
|
|
1494
|
+
let starRating = null;
|
|
1495
|
+
const accomType = resolveRef(property.accommodationType);
|
|
1496
|
+
if (accomType && typeof accomType === 'object' && 'starRating' in accomType) {
|
|
1497
|
+
starRating = accomType.starRating || null;
|
|
1498
|
+
}
|
|
1499
|
+
// Extract check-in/out times from houseRules
|
|
1500
|
+
let checkInTime = '';
|
|
1501
|
+
let checkOutTime = '';
|
|
1502
|
+
const houseRules = property.houseRules;
|
|
1503
|
+
if (houseRules?.checkinCheckoutTimes) {
|
|
1504
|
+
const times = houseRules.checkinCheckoutTimes;
|
|
1505
|
+
if (times.checkinTimeRange) {
|
|
1506
|
+
const from = times.checkinTimeRange.fromFormatted;
|
|
1507
|
+
const until = times.checkinTimeRange.untilFormatted;
|
|
1508
|
+
if (from && until) {
|
|
1509
|
+
checkInTime = `${from} - ${until}`;
|
|
1510
|
+
}
|
|
1511
|
+
else if (from) {
|
|
1512
|
+
checkInTime = `From ${from}`;
|
|
1513
|
+
}
|
|
1514
|
+
else if (until) {
|
|
1515
|
+
checkInTime = `Until ${until}`;
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
if (times.checkoutTimeRange) {
|
|
1519
|
+
const from = times.checkoutTimeRange.fromFormatted;
|
|
1520
|
+
const until = times.checkoutTimeRange.untilFormatted;
|
|
1521
|
+
if (from && until) {
|
|
1522
|
+
checkOutTime = `${from} - ${until}`;
|
|
1523
|
+
}
|
|
1524
|
+
else if (until) {
|
|
1525
|
+
checkOutTime = `Until ${until}`;
|
|
1526
|
+
}
|
|
1527
|
+
else if (from) {
|
|
1528
|
+
checkOutTime = `From ${from}`;
|
|
1529
|
+
}
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
// Extract popular facilities from accommodationHighlights
|
|
1533
|
+
const popularFacilities = [];
|
|
1534
|
+
const highlightKeys = Object.keys(property).filter(k => k.startsWith('accommodationHighlights('));
|
|
1535
|
+
for (const key of highlightKeys) {
|
|
1536
|
+
const highlights = property[key];
|
|
1537
|
+
if (Array.isArray(highlights)) {
|
|
1538
|
+
for (const item of highlights) {
|
|
1539
|
+
const entities = item?.entities;
|
|
1540
|
+
if (Array.isArray(entities)) {
|
|
1541
|
+
for (const entity of entities) {
|
|
1542
|
+
// Direct title (like BreakfastHighlight)
|
|
1543
|
+
if (entity?.title) {
|
|
1544
|
+
popularFacilities.push(entity.title);
|
|
1545
|
+
}
|
|
1546
|
+
// Resolve __ref for GenericFacilityHighlight, WifiFacilityHighlight, etc.
|
|
1547
|
+
const resolved = resolveRef(entity);
|
|
1548
|
+
if (resolved && typeof resolved === 'object' && 'title' in resolved) {
|
|
1549
|
+
const title = resolved.title;
|
|
1550
|
+
if (title && !popularFacilities.includes(title)) {
|
|
1551
|
+
popularFacilities.push(title);
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
// Extract all facilities from highlights (popularity based)
|
|
1560
|
+
const allFacilities = [];
|
|
1561
|
+
const facilityKeys = Object.keys(property).filter(k => k.startsWith('highlights('));
|
|
1562
|
+
for (const key of facilityKeys) {
|
|
1563
|
+
const highlightData = property[key];
|
|
1564
|
+
const entities = highlightData?.entities;
|
|
1565
|
+
if (Array.isArray(entities)) {
|
|
1566
|
+
for (const entity of entities) {
|
|
1567
|
+
// Skip Meal type entries
|
|
1568
|
+
if (entity?.__typename === 'Meal')
|
|
1569
|
+
continue;
|
|
1570
|
+
const resolved = resolveRef(entity);
|
|
1571
|
+
if (resolved && typeof resolved === 'object') {
|
|
1572
|
+
// For BaseFacility, look at instances
|
|
1573
|
+
const instances = resolved.instances;
|
|
1574
|
+
if (Array.isArray(instances)) {
|
|
1575
|
+
for (const inst of instances) {
|
|
1576
|
+
const resolvedInst = resolveRef(inst);
|
|
1577
|
+
if (resolvedInst && typeof resolvedInst === 'object' && 'title' in resolvedInst) {
|
|
1578
|
+
const title = resolvedInst.title;
|
|
1579
|
+
if (title && !allFacilities.includes(title)) {
|
|
1580
|
+
allFacilities.push(title);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
// Extract photos from propertyGallery
|
|
1590
|
+
const photos = [];
|
|
1591
|
+
const galleryKeys = Object.keys(property).filter(k => k.startsWith('propertyGallery('));
|
|
1592
|
+
for (const key of galleryKeys) {
|
|
1593
|
+
const gallery = property[key];
|
|
1594
|
+
// Main photo
|
|
1595
|
+
if (gallery?.mainPhoto) {
|
|
1596
|
+
const mainPhoto = resolveRef(gallery.mainPhoto);
|
|
1597
|
+
if (mainPhoto && typeof mainPhoto === 'object') {
|
|
1598
|
+
// Look for resource with max500 or max1024x768
|
|
1599
|
+
const photoObj = mainPhoto;
|
|
1600
|
+
const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
|
|
1601
|
+
if (resourceKey) {
|
|
1602
|
+
const resource = photoObj[resourceKey];
|
|
1603
|
+
if (resource?.absoluteUrl) {
|
|
1604
|
+
photos.push(resource.absoluteUrl);
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
// Room photos
|
|
1610
|
+
const roomPhotos = gallery?.roomPhotos;
|
|
1611
|
+
if (Array.isArray(roomPhotos)) {
|
|
1612
|
+
for (const room of roomPhotos) {
|
|
1613
|
+
const roomPhotosList = room?.photos;
|
|
1614
|
+
if (Array.isArray(roomPhotosList) && photos.length < 5) {
|
|
1615
|
+
for (const photoRef of roomPhotosList) {
|
|
1616
|
+
if (photos.length >= 5)
|
|
1617
|
+
break;
|
|
1618
|
+
const photo = resolveRef(photoRef);
|
|
1619
|
+
if (photo && typeof photo === 'object') {
|
|
1620
|
+
const photoObj = photo;
|
|
1621
|
+
const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
|
|
1622
|
+
if (resourceKey) {
|
|
1623
|
+
const resource = photoObj[resourceKey];
|
|
1624
|
+
if (resource?.absoluteUrl && !photos.includes(resource.absoluteUrl)) {
|
|
1625
|
+
photos.push(resource.absoluteUrl);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
// Extract room types from property.rooms
|
|
1635
|
+
const roomTypes = [];
|
|
1636
|
+
const rooms = property.rooms;
|
|
1637
|
+
if (Array.isArray(rooms)) {
|
|
1638
|
+
for (const roomRef of rooms) {
|
|
1639
|
+
const room = resolveRef(roomRef);
|
|
1640
|
+
if (room && typeof room === 'object') {
|
|
1641
|
+
const roomObj = room;
|
|
1642
|
+
const roomName = roomObj.name || roomObj.description;
|
|
1643
|
+
if (roomName && !roomTypes.includes(roomName)) {
|
|
1644
|
+
roomTypes.push(roomName);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
// Extract location info
|
|
1650
|
+
let locationInfo = '';
|
|
1651
|
+
if (basicData?.location) {
|
|
1652
|
+
const loc = basicData.location;
|
|
1653
|
+
const parts = [];
|
|
1654
|
+
if (loc.city)
|
|
1655
|
+
parts.push(loc.city);
|
|
1656
|
+
if (loc.countryCode)
|
|
1657
|
+
parts.push(loc.countryCode.toUpperCase());
|
|
1658
|
+
locationInfo = parts.join(', ');
|
|
1659
|
+
if (loc.latitude && loc.longitude) {
|
|
1660
|
+
locationInfo += ` (${loc.latitude.toFixed(4)}, ${loc.longitude.toFixed(4)})`;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
// Extract review category scores for highlights
|
|
1664
|
+
const guestReviewHighlights = [];
|
|
1665
|
+
if (reviews?.questions && Array.isArray(reviews.questions)) {
|
|
1666
|
+
const categoryNames = {
|
|
1667
|
+
'hotel_staff': 'Staff',
|
|
1668
|
+
'hotel_location': 'Location',
|
|
1669
|
+
'hotel_clean': 'Cleanliness',
|
|
1670
|
+
'hotel_comfort': 'Comfort',
|
|
1671
|
+
'hotel_value': 'Value for money',
|
|
1672
|
+
'hotel_services': 'Facilities',
|
|
1673
|
+
'hotel_free_wifi': 'Free WiFi'
|
|
1674
|
+
};
|
|
1675
|
+
for (const q of reviews.questions) {
|
|
1676
|
+
if (q?.name && q.name !== 'total' && typeof q.score === 'number') {
|
|
1677
|
+
const displayName = categoryNames[q.name] || q.name;
|
|
1678
|
+
if (categoryNames[q.name]) {
|
|
1679
|
+
guestReviewHighlights.push(`${displayName}: ${q.score.toFixed(1)}`);
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
// Validate we have meaningful data before returning
|
|
1685
|
+
// Name should be a proper hotel name (at least 3 chars, not 'Unknown')
|
|
1686
|
+
if (!name || name === 'Unknown' || name.length < 3) {
|
|
1687
|
+
return null; // Trigger DOM fallback
|
|
1688
|
+
}
|
|
1689
|
+
// Note: Description, pricePerNight, totalPrice, nearbyAttractions may need DOM fallback
|
|
1690
|
+
// as they're not consistently in the Apollo cache or are dynamic
|
|
1691
|
+
return {
|
|
1692
|
+
name,
|
|
1693
|
+
rating,
|
|
1694
|
+
ratingText,
|
|
1695
|
+
reviewCount,
|
|
1696
|
+
starRating,
|
|
1697
|
+
address,
|
|
1698
|
+
description: '', // Not typically in cache, will need DOM fallback if needed
|
|
1699
|
+
highlights: popularFacilities.slice(0, 5).join(', '),
|
|
1700
|
+
pricePerNight: null, // Dynamic, not in cache
|
|
1701
|
+
priceDisplay: '',
|
|
1702
|
+
totalPrice: '',
|
|
1703
|
+
checkInTime,
|
|
1704
|
+
checkOutTime,
|
|
1705
|
+
popularFacilities: popularFacilities.slice(0, 15),
|
|
1706
|
+
allFacilities: allFacilities.slice(0, 30),
|
|
1707
|
+
roomTypes: roomTypes.slice(0, 5),
|
|
1708
|
+
photos: photos.slice(0, 5),
|
|
1709
|
+
nearbyAttractions: [], // Would need propertySurroundings query
|
|
1710
|
+
guestReviewHighlights: guestReviewHighlights.slice(0, 7),
|
|
1711
|
+
locationInfo
|
|
1712
|
+
};
|
|
1713
|
+
}
|
|
1714
|
+
catch {
|
|
1715
|
+
// If anything goes wrong with API extraction, return null to trigger fallback
|
|
1716
|
+
return null;
|
|
1717
|
+
}
|
|
1718
|
+
});
|
|
1719
|
+
}
|
|
1720
|
+
/**
|
|
1721
|
+
* Fetch room facilities via Booking.com's GraphQL API.
|
|
1722
|
+
* This provides detailed amenities for each room type (AC, TV, bathroom details, etc.)
|
|
1723
|
+
* Must be called when already on a hotel page with an active session.
|
|
1724
|
+
*
|
|
1725
|
+
* @param hotelId - The numeric hotel ID (e.g., 6523595)
|
|
1726
|
+
* @param checkIn - Check-in date in YYYY-MM-DD format
|
|
1727
|
+
* @param checkOut - Check-out date in YYYY-MM-DD format
|
|
1728
|
+
* @returns Map of roomId to array of amenity categories
|
|
1729
|
+
*/
|
|
1730
|
+
async fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut) {
|
|
1731
|
+
if (!this.page)
|
|
1732
|
+
return new Map();
|
|
1733
|
+
try {
|
|
1734
|
+
const result = await this.page.evaluate(async ({ hotelId, checkIn, checkOut }) => {
|
|
1735
|
+
const query = `
|
|
1736
|
+
query RoomPageDesktopRDS($rdsInput: RDSRoomDetailQueryInput!) {
|
|
1737
|
+
roomDetail(roomDetailQueryInput: $rdsInput) {
|
|
1738
|
+
categorizedFacilitiesForAllRooms {
|
|
1739
|
+
roomId
|
|
1740
|
+
categorizedFacilities {
|
|
1741
|
+
category
|
|
1742
|
+
facilities {
|
|
1743
|
+
name
|
|
1744
|
+
id
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
`;
|
|
1751
|
+
const variables = {
|
|
1752
|
+
rdsInput: {
|
|
1753
|
+
hotelId: String(hotelId),
|
|
1754
|
+
searchConfig: {
|
|
1755
|
+
searchConfigDate: {
|
|
1756
|
+
checkin: checkIn,
|
|
1757
|
+
checkout: checkOut,
|
|
1758
|
+
},
|
|
1759
|
+
nbRooms: 1,
|
|
1760
|
+
nbAdults: 2,
|
|
1761
|
+
nbChildren: 0,
|
|
1762
|
+
childrenAges: [],
|
|
1763
|
+
},
|
|
1764
|
+
highlightedBlocks: [],
|
|
1765
|
+
selectedFilters: '',
|
|
1766
|
+
travelReason: 'LEISURE',
|
|
1767
|
+
},
|
|
1768
|
+
};
|
|
1769
|
+
try {
|
|
1770
|
+
const response = await fetch('/dml/graphql', {
|
|
1771
|
+
method: 'POST',
|
|
1772
|
+
headers: {
|
|
1773
|
+
'Content-Type': 'application/json',
|
|
1774
|
+
'x-booking-topic': 'capla_browser_b-property-web-property-page',
|
|
1775
|
+
'x-booking-context-action-name': 'hotel',
|
|
1776
|
+
'apollographql-client-name': 'b-property-web-property-page_rust',
|
|
1777
|
+
},
|
|
1778
|
+
body: JSON.stringify({
|
|
1779
|
+
operationName: 'RoomPageDesktopRDS',
|
|
1780
|
+
variables,
|
|
1781
|
+
query,
|
|
1782
|
+
}),
|
|
1783
|
+
});
|
|
1784
|
+
if (!response.ok) {
|
|
1785
|
+
return { error: `HTTP ${response.status}` };
|
|
1786
|
+
}
|
|
1787
|
+
const data = await response.json();
|
|
1788
|
+
return data;
|
|
1789
|
+
}
|
|
1790
|
+
catch (e) {
|
|
1791
|
+
return { error: e instanceof Error ? e.message : 'Unknown error' };
|
|
1792
|
+
}
|
|
1793
|
+
}, { hotelId, checkIn, checkOut });
|
|
1794
|
+
if ('error' in result) {
|
|
1795
|
+
logger.debug({ error: result.error }, 'GraphQL room facilities fetch failed');
|
|
1796
|
+
return new Map();
|
|
1797
|
+
}
|
|
1798
|
+
// Parse the response into our map structure
|
|
1799
|
+
const facilitiesMap = new Map();
|
|
1800
|
+
const roomData = result?.data?.roomDetail?.categorizedFacilitiesForAllRooms || [];
|
|
1801
|
+
for (const room of roomData) {
|
|
1802
|
+
const roomId = String(room.roomId);
|
|
1803
|
+
const categories = [];
|
|
1804
|
+
for (const cat of room.categorizedFacilities || []) {
|
|
1805
|
+
categories.push({
|
|
1806
|
+
category: cat.category || 'General',
|
|
1807
|
+
items: (cat.facilities || []).map((f) => f.name || '').filter(Boolean),
|
|
1808
|
+
});
|
|
1809
|
+
}
|
|
1810
|
+
if (categories.length > 0) {
|
|
1811
|
+
facilitiesMap.set(roomId, categories);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
logger.debug({ roomCount: facilitiesMap.size }, 'Fetched room facilities via GraphQL');
|
|
1815
|
+
return facilitiesMap;
|
|
1816
|
+
}
|
|
1817
|
+
catch (error) {
|
|
1818
|
+
logger.debug({ error }, 'Failed to fetch room facilities via GraphQL');
|
|
1819
|
+
return new Map();
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
/**
|
|
1823
|
+
* Extract hotel ID from the current page URL or DOM.
|
|
1824
|
+
* Booking.com hotel IDs are typically in the URL path or data attributes.
|
|
1825
|
+
*/
|
|
1826
|
+
async extractHotelId() {
|
|
1827
|
+
if (!this.page)
|
|
1828
|
+
return null;
|
|
1829
|
+
return await this.page.evaluate(() => {
|
|
1830
|
+
// Try to get from URL path (e.g., /hotel/fr/hotel-name.html?... contains ID in data)
|
|
1831
|
+
// Actually, the ID is often in data attributes or Apollo cache
|
|
1832
|
+
// Method 1: Look for data-hotel-id attribute
|
|
1833
|
+
const hotelIdEl = document.querySelector('[data-hotel-id]');
|
|
1834
|
+
if (hotelIdEl) {
|
|
1835
|
+
return hotelIdEl.getAttribute('data-hotel-id');
|
|
1836
|
+
}
|
|
1837
|
+
// Method 2: Look in Apollo cache
|
|
1838
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1839
|
+
const w = window;
|
|
1840
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1841
|
+
if (cache) {
|
|
1842
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1843
|
+
if (propertyKey) {
|
|
1844
|
+
const match = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1845
|
+
if (match && match[1])
|
|
1846
|
+
return match[1];
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
// Method 3: Look for form inputs with hotel_id
|
|
1850
|
+
const hotelInput = document.querySelector('input[name="hotel_id"]');
|
|
1851
|
+
if (hotelInput?.value)
|
|
1852
|
+
return hotelInput.value;
|
|
1853
|
+
// Method 4: Look in data-block-id attributes (format: roomTypeId_policyId_hotelId_...)
|
|
1854
|
+
const blockEl = document.querySelector('[data-block-id]');
|
|
1855
|
+
if (blockEl) {
|
|
1856
|
+
const blockId = blockEl.getAttribute('data-block-id') || '';
|
|
1857
|
+
const parts = blockId.split('_');
|
|
1858
|
+
// Hotel ID is typically in position 2 (after roomTypeId and policyId)
|
|
1859
|
+
const potentialHotelId = parts[2];
|
|
1860
|
+
if (parts.length >= 3 && potentialHotelId && /^\d{5,}$/.test(potentialHotelId)) {
|
|
1861
|
+
return potentialHotelId;
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
return null;
|
|
1865
|
+
});
|
|
1866
|
+
}
|
|
1867
|
+
/**
|
|
1868
|
+
* Extract reviews data from Booking.com's Apollo GraphQL cache.
|
|
1869
|
+
* Returns null if extraction fails (triggering DOM fallback).
|
|
1870
|
+
*/
|
|
1871
|
+
async extractReviewsFromAPI() {
|
|
1872
|
+
if (!this.page)
|
|
1873
|
+
return null;
|
|
1874
|
+
return await this.page.evaluate(() => {
|
|
1875
|
+
try {
|
|
1876
|
+
// Access the Apollo cache embedded in the page
|
|
1877
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1878
|
+
const w = window;
|
|
1879
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1880
|
+
if (!cache)
|
|
1881
|
+
return null;
|
|
1882
|
+
// Helper to resolve __ref pointers
|
|
1883
|
+
const resolveRef = (ref) => {
|
|
1884
|
+
if (ref && typeof ref === 'object' && '__ref' in ref) {
|
|
1885
|
+
return cache[ref.__ref];
|
|
1886
|
+
}
|
|
1887
|
+
return ref;
|
|
1888
|
+
};
|
|
1889
|
+
// Find the Property entry - it has a key like 'Property:{"id":6523595}'
|
|
1890
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1891
|
+
if (!propertyKey)
|
|
1892
|
+
return null;
|
|
1893
|
+
const property = cache[propertyKey];
|
|
1894
|
+
if (!property)
|
|
1895
|
+
return null;
|
|
1896
|
+
// Extract hotel ID from the property key
|
|
1897
|
+
const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1898
|
+
const hotelId = idMatch ? idMatch[1] : null;
|
|
1899
|
+
// Get BasicPropertyData for hotel name
|
|
1900
|
+
const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
|
|
1901
|
+
const basicData = basicDataKey ? cache[basicDataKey] : null;
|
|
1902
|
+
// Extract hotel name
|
|
1903
|
+
const hotelName = property.name || basicData?.name || '';
|
|
1904
|
+
// Extract overall rating, total reviews, and rating breakdown from property.reviews
|
|
1905
|
+
let overallRating = null;
|
|
1906
|
+
let totalReviews = 0;
|
|
1907
|
+
const ratingBreakdown = {
|
|
1908
|
+
staff: null,
|
|
1909
|
+
facilities: null,
|
|
1910
|
+
cleanliness: null,
|
|
1911
|
+
comfort: null,
|
|
1912
|
+
valueForMoney: null,
|
|
1913
|
+
location: null,
|
|
1914
|
+
freeWifi: null,
|
|
1915
|
+
};
|
|
1916
|
+
const reviewsData = property.reviews;
|
|
1917
|
+
if (reviewsData) {
|
|
1918
|
+
totalReviews = typeof reviewsData.reviewsCount === 'number' ? reviewsData.reviewsCount : 0;
|
|
1919
|
+
// Map question names to breakdown fields
|
|
1920
|
+
const questionMap = {
|
|
1921
|
+
'hotel_staff': 'staff',
|
|
1922
|
+
'hotel_services': 'facilities',
|
|
1923
|
+
'hotel_clean': 'cleanliness',
|
|
1924
|
+
'hotel_comfort': 'comfort',
|
|
1925
|
+
'hotel_value': 'valueForMoney',
|
|
1926
|
+
'hotel_location': 'location',
|
|
1927
|
+
'hotel_free_wifi': 'freeWifi',
|
|
1928
|
+
};
|
|
1929
|
+
const questions = reviewsData.questions;
|
|
1930
|
+
if (Array.isArray(questions)) {
|
|
1931
|
+
for (const q of questions) {
|
|
1932
|
+
if (!q?.name || typeof q.score !== 'number')
|
|
1933
|
+
continue;
|
|
1934
|
+
if (q.name === 'total') {
|
|
1935
|
+
overallRating = q.score;
|
|
1936
|
+
}
|
|
1937
|
+
else {
|
|
1938
|
+
const breakdownKey = questionMap[q.name];
|
|
1939
|
+
if (breakdownKey) {
|
|
1940
|
+
ratingBreakdown[breakdownKey] = q.score;
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
// Extract individual reviews from FeaturedReview entries
|
|
1947
|
+
const reviews = [];
|
|
1948
|
+
// Map customer types to display names
|
|
1949
|
+
const customerTypeMap = {
|
|
1950
|
+
'SOLO_TRAVELLER': 'Solo traveler',
|
|
1951
|
+
'YOUNG_COUPLE': 'Couple',
|
|
1952
|
+
'MATURE_COUPLE': 'Couple',
|
|
1953
|
+
'FAMILY_WITH_YOUNG_CHILDREN': 'Family with young children',
|
|
1954
|
+
'FAMILY_WITH_OLDER_CHILDREN': 'Family with older children',
|
|
1955
|
+
'WITH_FRIENDS': 'Group of friends',
|
|
1956
|
+
'BUSINESS': 'Business traveler',
|
|
1957
|
+
};
|
|
1958
|
+
// Map country codes to names
|
|
1959
|
+
const countryCodeMap = {
|
|
1960
|
+
'us': 'United States',
|
|
1961
|
+
'gb': 'United Kingdom',
|
|
1962
|
+
'fr': 'France',
|
|
1963
|
+
'de': 'Germany',
|
|
1964
|
+
'es': 'Spain',
|
|
1965
|
+
'it': 'Italy',
|
|
1966
|
+
'nl': 'Netherlands',
|
|
1967
|
+
'be': 'Belgium',
|
|
1968
|
+
'ch': 'Switzerland',
|
|
1969
|
+
'au': 'Australia',
|
|
1970
|
+
'ca': 'Canada',
|
|
1971
|
+
'jp': 'Japan',
|
|
1972
|
+
'cn': 'China',
|
|
1973
|
+
'kr': 'South Korea',
|
|
1974
|
+
'br': 'Brazil',
|
|
1975
|
+
'mx': 'Mexico',
|
|
1976
|
+
'in': 'India',
|
|
1977
|
+
'ru': 'Russia',
|
|
1978
|
+
'pl': 'Poland',
|
|
1979
|
+
'se': 'Sweden',
|
|
1980
|
+
'no': 'Norway',
|
|
1981
|
+
'dk': 'Denmark',
|
|
1982
|
+
'fi': 'Finland',
|
|
1983
|
+
'at': 'Austria',
|
|
1984
|
+
'pt': 'Portugal',
|
|
1985
|
+
'gr': 'Greece',
|
|
1986
|
+
'tr': 'Turkey',
|
|
1987
|
+
'ie': 'Ireland',
|
|
1988
|
+
'nz': 'New Zealand',
|
|
1989
|
+
'za': 'South Africa',
|
|
1990
|
+
'ar': 'Argentina',
|
|
1991
|
+
'cl': 'Chile',
|
|
1992
|
+
'co': 'Colombia',
|
|
1993
|
+
'th': 'Thailand',
|
|
1994
|
+
'sg': 'Singapore',
|
|
1995
|
+
'my': 'Malaysia',
|
|
1996
|
+
'id': 'Indonesia',
|
|
1997
|
+
'ph': 'Philippines',
|
|
1998
|
+
'vn': 'Vietnam',
|
|
1999
|
+
'ae': 'United Arab Emirates',
|
|
2000
|
+
'sa': 'Saudi Arabia',
|
|
2001
|
+
'eg': 'Egypt',
|
|
2002
|
+
'il': 'Israel',
|
|
2003
|
+
'cz': 'Czech Republic',
|
|
2004
|
+
'hu': 'Hungary',
|
|
2005
|
+
'ro': 'Romania',
|
|
2006
|
+
};
|
|
2007
|
+
// Find all FeaturedReview entries
|
|
2008
|
+
const reviewKeys = Object.keys(cache).filter(k => k.startsWith('FeaturedReview:'));
|
|
2009
|
+
for (const key of reviewKeys) {
|
|
2010
|
+
const review = cache[key];
|
|
2011
|
+
if (!review)
|
|
2012
|
+
continue;
|
|
2013
|
+
// Format the date from Unix timestamp
|
|
2014
|
+
let dateStr = '';
|
|
2015
|
+
if (typeof review.completed === 'number') {
|
|
2016
|
+
const date = new Date(review.completed * 1000);
|
|
2017
|
+
dateStr = date.toLocaleDateString('en-US', {
|
|
2018
|
+
year: 'numeric',
|
|
2019
|
+
month: 'long',
|
|
2020
|
+
day: 'numeric'
|
|
2021
|
+
});
|
|
2022
|
+
}
|
|
2023
|
+
// Get room type from ref
|
|
2024
|
+
let roomType = '';
|
|
2025
|
+
const roomRef = resolveRef(review.roomType);
|
|
2026
|
+
if (roomRef && typeof roomRef === 'object' && 'name' in roomRef) {
|
|
2027
|
+
roomType = roomRef.name || '';
|
|
2028
|
+
}
|
|
2029
|
+
// Get country name from code
|
|
2030
|
+
const countryCode = (review.guestCountryCode || '').toLowerCase();
|
|
2031
|
+
const country = countryCodeMap[countryCode] || countryCode.toUpperCase();
|
|
2032
|
+
// Get traveler type display name
|
|
2033
|
+
const travelerType = customerTypeMap[review.customerType] || review.customerType || '';
|
|
2034
|
+
reviews.push({
|
|
2035
|
+
title: review.title || '',
|
|
2036
|
+
rating: typeof review.averageScore === 'number' ? review.averageScore : null,
|
|
2037
|
+
date: dateStr,
|
|
2038
|
+
travelerType,
|
|
2039
|
+
stayDate: '', // Not available in FeaturedReview
|
|
2040
|
+
roomType,
|
|
2041
|
+
nightsStayed: '', // Not available in FeaturedReview
|
|
2042
|
+
positive: review.positiveText || '',
|
|
2043
|
+
negative: review.negativeText || '',
|
|
2044
|
+
country,
|
|
2045
|
+
});
|
|
2046
|
+
}
|
|
2047
|
+
// Sort reviews by date (newest first - higher timestamp = newer)
|
|
2048
|
+
reviews.sort((a, b) => {
|
|
2049
|
+
// Parse dates back for comparison
|
|
2050
|
+
const dateA = new Date(a.date).getTime() || 0;
|
|
2051
|
+
const dateB = new Date(b.date).getTime() || 0;
|
|
2052
|
+
return dateB - dateA;
|
|
2053
|
+
});
|
|
2054
|
+
// Validate we have meaningful data
|
|
2055
|
+
if (!hotelName || hotelName.length < 3) {
|
|
2056
|
+
return null;
|
|
2057
|
+
}
|
|
2058
|
+
return {
|
|
2059
|
+
hotelName,
|
|
2060
|
+
overallRating,
|
|
2061
|
+
totalReviews,
|
|
2062
|
+
ratingBreakdown,
|
|
2063
|
+
reviews,
|
|
2064
|
+
};
|
|
2065
|
+
}
|
|
2066
|
+
catch {
|
|
2067
|
+
return null;
|
|
2068
|
+
}
|
|
2069
|
+
});
|
|
2070
|
+
}
|
|
992
2071
|
scoreAndFilterHotels(hotels, filters) {
|
|
993
2072
|
return hotels
|
|
994
2073
|
.map((hotel) => {
|
|
@@ -1192,6 +2271,16 @@ export class HotelBrowser {
|
|
|
1192
2271
|
await this.page.waitForTimeout(2000);
|
|
1193
2272
|
await this.checkForBlocking();
|
|
1194
2273
|
await this.dismissPopups();
|
|
2274
|
+
// Try API extraction first (more reliable structured data)
|
|
2275
|
+
const apiDetails = await this.extractHotelDetailsFromAPI();
|
|
2276
|
+
if (apiDetails) {
|
|
2277
|
+
logger.debug("Successfully extracted hotel details from API cache");
|
|
2278
|
+
return {
|
|
2279
|
+
...apiDetails,
|
|
2280
|
+
url: hotelUrl,
|
|
2281
|
+
};
|
|
2282
|
+
}
|
|
2283
|
+
logger.debug("API extraction returned no results, falling back to DOM scraping");
|
|
1195
2284
|
// Extract comprehensive hotel details using evaluate with string to avoid __name compilation issues
|
|
1196
2285
|
const details = await this.page.evaluate(`
|
|
1197
2286
|
(function() {
|
|
@@ -1475,7 +2564,7 @@ export class HotelBrowser {
|
|
|
1475
2564
|
await this.page.waitForTimeout(2000);
|
|
1476
2565
|
await this.checkForBlocking();
|
|
1477
2566
|
await this.dismissPopups();
|
|
1478
|
-
// Extract room availability using
|
|
2567
|
+
// Extract room availability using data attributes (primary) with DOM fallback
|
|
1479
2568
|
const result = await this.page.evaluate(`
|
|
1480
2569
|
(function() {
|
|
1481
2570
|
function getText(selector) {
|
|
@@ -1487,152 +2576,292 @@ export class HotelBrowser {
|
|
|
1487
2576
|
var hotelName = getText('h2') || getText('h1').split('(')[0].trim() || "Unknown Hotel";
|
|
1488
2577
|
|
|
1489
2578
|
var roomOptions = [];
|
|
1490
|
-
var seenRooms = {};
|
|
1491
2579
|
|
|
1492
|
-
//
|
|
1493
|
-
|
|
2580
|
+
// ============================================================
|
|
2581
|
+
// STRATEGY 1: Extract from data-* attributes (most reliable)
|
|
2582
|
+
// Uses data-block-id, data-hotel-rounded-price, and data-fltrs
|
|
2583
|
+
// ============================================================
|
|
2584
|
+
|
|
2585
|
+
// First, build maps of room type IDs to room names and bed types from header rows
|
|
2586
|
+
var roomNameMap = {};
|
|
2587
|
+
var bedTypeMap = {};
|
|
2588
|
+
var roomTypeHeaders = document.querySelectorAll('.hprt-roomtype-link');
|
|
2589
|
+
for (var h = 0; h < roomTypeHeaders.length; h++) {
|
|
2590
|
+
var header = roomTypeHeaders[h];
|
|
2591
|
+
var headerRow = header.closest('tr');
|
|
2592
|
+
var headerBlockId = headerRow ? headerRow.getAttribute('data-block-id') : null;
|
|
2593
|
+
if (headerBlockId && headerBlockId.indexOf('_') > 0) {
|
|
2594
|
+
var headerRoomTypeId = headerBlockId.split('_')[0];
|
|
2595
|
+
var headerRoomName = header.textContent ? header.textContent.trim() : '';
|
|
2596
|
+
if (headerRoomName) {
|
|
2597
|
+
roomNameMap[headerRoomTypeId] = headerRoomName;
|
|
2598
|
+
}
|
|
2599
|
+
// Also capture bed type from header row
|
|
2600
|
+
var bedEl = headerRow.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
|
|
2601
|
+
if (bedEl) {
|
|
2602
|
+
var bedText = bedEl.textContent || '';
|
|
2603
|
+
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
2604
|
+
for (var b = 0; b < bedLines.length; b++) {
|
|
2605
|
+
if (bedLines[b].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
2606
|
+
bedTypeMap[headerRoomTypeId] = bedLines[b];
|
|
2607
|
+
break;
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2610
|
+
}
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
2613
|
+
|
|
2614
|
+
// Extract all room blocks with data-hotel-rounded-price attribute
|
|
2615
|
+
// Returns ALL rate options (room + meal plan + cancellation combinations)
|
|
2616
|
+
var dataRows = document.querySelectorAll('tr[data-block-id][data-hotel-rounded-price]');
|
|
2617
|
+
var seenBlockIds = {}; // Track exact block IDs to avoid true duplicates
|
|
1494
2618
|
|
|
1495
|
-
for (var i = 0; i <
|
|
1496
|
-
var
|
|
1497
|
-
var
|
|
2619
|
+
for (var i = 0; i < dataRows.length && roomOptions.length < 30; i++) {
|
|
2620
|
+
var row = dataRows[i];
|
|
2621
|
+
var blockId = row.getAttribute('data-block-id') || '';
|
|
2622
|
+
var parts = blockId.split('_');
|
|
2623
|
+
if (parts.length < 2) continue;
|
|
1498
2624
|
|
|
1499
|
-
|
|
1500
|
-
|
|
2625
|
+
// Skip exact duplicate block IDs
|
|
2626
|
+
if (seenBlockIds[blockId]) continue;
|
|
2627
|
+
seenBlockIds[blockId] = true;
|
|
1501
2628
|
|
|
1502
|
-
|
|
1503
|
-
var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
|
|
1504
|
-
var rowText = row ? row.textContent || "" : "";
|
|
2629
|
+
var roomTypeId = parts[0];
|
|
1505
2630
|
|
|
1506
|
-
//
|
|
1507
|
-
var
|
|
1508
|
-
var
|
|
2631
|
+
// Get price from data attribute (more reliable than DOM text)
|
|
2632
|
+
var roundedPrice = row.getAttribute('data-hotel-rounded-price');
|
|
2633
|
+
var price = roundedPrice ? parseInt(roundedPrice, 10) : null;
|
|
1509
2634
|
|
|
1510
|
-
//
|
|
1511
|
-
var
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
var
|
|
1515
|
-
|
|
1516
|
-
price = parseInt(match[1].replace(/,/g, ""));
|
|
1517
|
-
// Clean up price display
|
|
1518
|
-
var perNightMatch = priceDisplay.match(/[\\$€£¥]\\s*[\\d,]+/);
|
|
1519
|
-
priceDisplay = perNightMatch ? perNightMatch[0] : priceDisplay.split('\\n')[0];
|
|
1520
|
-
}
|
|
2635
|
+
// Get price display from DOM
|
|
2636
|
+
var priceDisplay = '';
|
|
2637
|
+
var priceEl = row.querySelector('.bui-price-display__value');
|
|
2638
|
+
if (priceEl) {
|
|
2639
|
+
var displayMatch = (priceEl.textContent || '').match(/[\\$€£¥][\\d,]+/);
|
|
2640
|
+
priceDisplay = displayMatch ? displayMatch[0] : '';
|
|
1521
2641
|
}
|
|
1522
2642
|
|
|
1523
|
-
//
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
2643
|
+
// Get room name from our map
|
|
2644
|
+
var roomName = roomNameMap[roomTypeId] || '';
|
|
2645
|
+
|
|
2646
|
+
// If no name in map, try to find it in the row
|
|
2647
|
+
if (!roomName) {
|
|
2648
|
+
var roomLink = row.querySelector('.hprt-roomtype-link, a[class*="room"]');
|
|
2649
|
+
roomName = roomLink ? (roomLink.textContent || '').trim() : '';
|
|
2650
|
+
}
|
|
2651
|
+
|
|
2652
|
+
// Still no name? Use a generic one
|
|
2653
|
+
if (!roomName) {
|
|
2654
|
+
roomName = 'Room Type ' + roomTypeId;
|
|
2655
|
+
}
|
|
2656
|
+
|
|
2657
|
+
// Parse data-fltrs for structured info (breakfast, beds)
|
|
2658
|
+
var fltrs = row.getAttribute('data-fltrs');
|
|
2659
|
+
var breakfastIncluded = false;
|
|
2660
|
+
var bedCount = [];
|
|
2661
|
+
|
|
2662
|
+
if (fltrs) {
|
|
2663
|
+
try {
|
|
2664
|
+
var fltrData = JSON.parse(fltrs.replace(/\\n/g, ''));
|
|
2665
|
+
breakfastIncluded = fltrData.breakfast_included === 1;
|
|
2666
|
+
bedCount = fltrData.bed_count || [];
|
|
2667
|
+
} catch (e) {}
|
|
1535
2668
|
}
|
|
1536
2669
|
|
|
1537
|
-
//
|
|
1538
|
-
var bedType =
|
|
1539
|
-
var bedEl = row
|
|
2670
|
+
// Get bed type from DOM (for display)
|
|
2671
|
+
var bedType = '';
|
|
2672
|
+
var bedEl = row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
|
|
1540
2673
|
if (bedEl) {
|
|
1541
|
-
|
|
1542
|
-
var bedText = bedEl.textContent || "";
|
|
2674
|
+
var bedText = bedEl.textContent || '';
|
|
1543
2675
|
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
1544
|
-
// Find line with bed info
|
|
1545
2676
|
for (var k = 0; k < bedLines.length; k++) {
|
|
1546
2677
|
if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
1547
2678
|
bedType = bedLines[k];
|
|
1548
2679
|
break;
|
|
1549
2680
|
}
|
|
1550
2681
|
}
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
2682
|
+
}
|
|
2683
|
+
// Fallback 1: use bed type from our map (captured from header rows)
|
|
2684
|
+
if (!bedType && bedTypeMap[roomTypeId]) {
|
|
2685
|
+
bedType = bedTypeMap[roomTypeId];
|
|
2686
|
+
}
|
|
2687
|
+
// Fallback 2: use bed count from data-fltrs
|
|
2688
|
+
if (!bedType && bedCount.length > 0) {
|
|
2689
|
+
bedType = bedCount.length === 1 ? bedCount[0] + ' bed' : bedCount.join(' or ') + ' beds';
|
|
1554
2690
|
}
|
|
1555
2691
|
|
|
1556
|
-
//
|
|
1557
|
-
var
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
cancellation =
|
|
2692
|
+
// Get cancellation policy from row text
|
|
2693
|
+
var rowText = row.textContent || '';
|
|
2694
|
+
var rowTextLower = rowText.toLowerCase();
|
|
2695
|
+
var cancellation = '';
|
|
2696
|
+
if (rowTextLower.indexOf('free cancellation') >= 0) {
|
|
2697
|
+
cancellation = 'Free cancellation';
|
|
2698
|
+
} else if (rowTextLower.indexOf('non-refundable') >= 0) {
|
|
2699
|
+
cancellation = 'Non-refundable';
|
|
1562
2700
|
}
|
|
1563
2701
|
|
|
1564
|
-
//
|
|
1565
|
-
var breakfast =
|
|
1566
|
-
if (
|
|
1567
|
-
breakfast =
|
|
1568
|
-
} else if (
|
|
1569
|
-
breakfast =
|
|
2702
|
+
// Get breakfast info (prefer data-fltrs, fallback to DOM text)
|
|
2703
|
+
var breakfast = '';
|
|
2704
|
+
if (breakfastIncluded) {
|
|
2705
|
+
breakfast = 'Breakfast included';
|
|
2706
|
+
} else if (rowTextLower.indexOf('breakfast included') >= 0) {
|
|
2707
|
+
breakfast = 'Breakfast included';
|
|
2708
|
+
} else if (rowTextLower.indexOf('room only') >= 0) {
|
|
2709
|
+
breakfast = 'Room only';
|
|
1570
2710
|
}
|
|
1571
2711
|
|
|
1572
|
-
//
|
|
2712
|
+
// Get occupancy
|
|
1573
2713
|
var sleeps = null;
|
|
1574
|
-
var occupancyEl = row
|
|
2714
|
+
var occupancyEl = row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info');
|
|
1575
2715
|
if (occupancyEl) {
|
|
1576
|
-
var occMatch = occupancyEl.textContent.match(/(\\d+)/);
|
|
1577
|
-
sleeps = occMatch ? parseInt(occMatch[1]) : null;
|
|
2716
|
+
var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
|
|
2717
|
+
sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
|
|
1578
2718
|
}
|
|
1579
2719
|
|
|
2720
|
+
// Build features array
|
|
2721
|
+
var features = [];
|
|
2722
|
+
if (breakfast) features.push(breakfast);
|
|
2723
|
+
if (cancellation) features.push(cancellation);
|
|
2724
|
+
|
|
1580
2725
|
roomOptions.push({
|
|
1581
|
-
name:
|
|
2726
|
+
name: roomName,
|
|
1582
2727
|
price: price,
|
|
1583
2728
|
priceDisplay: priceDisplay,
|
|
1584
2729
|
sleeps: sleeps,
|
|
1585
|
-
features:
|
|
2730
|
+
features: features,
|
|
1586
2731
|
bedType: bedType,
|
|
1587
2732
|
cancellation: cancellation,
|
|
1588
|
-
breakfast: breakfast
|
|
2733
|
+
breakfast: breakfast,
|
|
2734
|
+
roomTypeId: roomTypeId
|
|
1589
2735
|
});
|
|
1590
2736
|
}
|
|
1591
2737
|
|
|
1592
|
-
//
|
|
2738
|
+
// ============================================================
|
|
2739
|
+
// STRATEGY 2: Fallback to DOM scraping if data attributes failed
|
|
2740
|
+
// ============================================================
|
|
2741
|
+
if (roomOptions.length === 0) {
|
|
2742
|
+
var seenRooms = {};
|
|
2743
|
+
var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
|
|
2744
|
+
|
|
2745
|
+
for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
|
|
2746
|
+
var roomLink = roomTypeLinks[i];
|
|
2747
|
+
var name = roomLink.textContent ? roomLink.textContent.trim() : '';
|
|
2748
|
+
|
|
2749
|
+
if (!name || name.length < 3 || seenRooms[name]) continue;
|
|
2750
|
+
seenRooms[name] = true;
|
|
2751
|
+
|
|
2752
|
+
var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
|
|
2753
|
+
var rowText = row ? row.textContent || '' : '';
|
|
2754
|
+
|
|
2755
|
+
// Try to find price
|
|
2756
|
+
var price = null;
|
|
2757
|
+
var priceDisplay = '';
|
|
2758
|
+
var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
|
|
2759
|
+
if (priceCell) {
|
|
2760
|
+
var match = (priceCell.textContent || '').match(/[\\$€£¥]\\s*([\\d,]+)/);
|
|
2761
|
+
if (match) {
|
|
2762
|
+
price = parseInt(match[1].replace(/,/g, ''), 10);
|
|
2763
|
+
priceDisplay = match[0];
|
|
2764
|
+
}
|
|
2765
|
+
}
|
|
2766
|
+
|
|
2767
|
+
// Bed type
|
|
2768
|
+
var bedType = '';
|
|
2769
|
+
var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
|
|
2770
|
+
if (bedEl) {
|
|
2771
|
+
var bedText = bedEl.textContent || '';
|
|
2772
|
+
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
2773
|
+
for (var k = 0; k < bedLines.length; k++) {
|
|
2774
|
+
if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
2775
|
+
bedType = bedLines[k];
|
|
2776
|
+
break;
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
// Cancellation and breakfast from text
|
|
2782
|
+
var rowTextLower = rowText.toLowerCase();
|
|
2783
|
+
var cancellation = '';
|
|
2784
|
+
if (rowTextLower.indexOf('free cancellation') >= 0) {
|
|
2785
|
+
cancellation = 'Free cancellation';
|
|
2786
|
+
} else if (rowTextLower.indexOf('non-refundable') >= 0) {
|
|
2787
|
+
cancellation = 'Non-refundable';
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
var breakfast = '';
|
|
2791
|
+
if (rowTextLower.indexOf('breakfast included') >= 0) {
|
|
2792
|
+
breakfast = 'Breakfast included';
|
|
2793
|
+
} else if (rowTextLower.indexOf('room only') >= 0) {
|
|
2794
|
+
breakfast = 'Room only';
|
|
2795
|
+
}
|
|
2796
|
+
|
|
2797
|
+
// Occupancy
|
|
2798
|
+
var sleeps = null;
|
|
2799
|
+
var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
|
|
2800
|
+
if (occupancyEl) {
|
|
2801
|
+
var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
|
|
2802
|
+
sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
|
|
2803
|
+
}
|
|
2804
|
+
|
|
2805
|
+
roomOptions.push({
|
|
2806
|
+
name: name,
|
|
2807
|
+
price: price,
|
|
2808
|
+
priceDisplay: priceDisplay,
|
|
2809
|
+
sleeps: sleeps,
|
|
2810
|
+
features: [],
|
|
2811
|
+
bedType: bedType,
|
|
2812
|
+
cancellation: cancellation,
|
|
2813
|
+
breakfast: breakfast
|
|
2814
|
+
});
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
|
|
2818
|
+
// ============================================================
|
|
2819
|
+
// STRATEGY 3: Last resort - look for any data-block-id elements
|
|
2820
|
+
// ============================================================
|
|
1593
2821
|
if (roomOptions.length === 0) {
|
|
2822
|
+
var seenBlocks = {};
|
|
1594
2823
|
var blocks = document.querySelectorAll('[data-block-id]');
|
|
1595
2824
|
for (var i = 0; i < blocks.length && roomOptions.length < 10; i++) {
|
|
1596
2825
|
var block = blocks[i];
|
|
1597
|
-
var
|
|
2826
|
+
var blockId = block.getAttribute('data-block-id') || '';
|
|
2827
|
+
if (!blockId || blockId === 'header_survey') continue;
|
|
1598
2828
|
|
|
1599
|
-
|
|
2829
|
+
var blockText = block.textContent || '';
|
|
1600
2830
|
var nameEl = block.querySelector('a[class*="room"], span[class*="room-name"]');
|
|
1601
|
-
var name = nameEl ? nameEl.textContent.trim() :
|
|
2831
|
+
var name = nameEl ? (nameEl.textContent || '').trim() : '';
|
|
1602
2832
|
|
|
1603
2833
|
if (!name) {
|
|
1604
|
-
// Try to extract from block text
|
|
1605
2834
|
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
|
|
1606
|
-
name = lines[0] ? lines[0].trim().slice(0, 50) :
|
|
2835
|
+
name = lines[0] ? lines[0].trim().slice(0, 50) : '';
|
|
1607
2836
|
}
|
|
1608
2837
|
|
|
1609
|
-
if (!name || name.length < 3 ||
|
|
1610
|
-
|
|
2838
|
+
if (!name || name.length < 3 || seenBlocks[name]) continue;
|
|
2839
|
+
seenBlocks[name] = true;
|
|
1611
2840
|
|
|
1612
2841
|
var priceMatch = blockText.match(/[\\$€£¥]\\s*([\\d,]+)/);
|
|
1613
|
-
var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g,
|
|
2842
|
+
var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, ''), 10) : null;
|
|
1614
2843
|
|
|
1615
2844
|
roomOptions.push({
|
|
1616
2845
|
name: name,
|
|
1617
2846
|
price: price,
|
|
1618
|
-
priceDisplay: priceMatch ? priceMatch[0] :
|
|
2847
|
+
priceDisplay: priceMatch ? priceMatch[0] : '',
|
|
1619
2848
|
sleeps: null,
|
|
1620
2849
|
features: [],
|
|
1621
|
-
bedType:
|
|
1622
|
-
cancellation:
|
|
1623
|
-
breakfast:
|
|
2850
|
+
bedType: '',
|
|
2851
|
+
cancellation: '',
|
|
2852
|
+
breakfast: ''
|
|
1624
2853
|
});
|
|
1625
2854
|
}
|
|
1626
2855
|
}
|
|
1627
2856
|
|
|
1628
2857
|
// Check for "no availability" message
|
|
1629
|
-
var bodyText = document.body.textContent ||
|
|
2858
|
+
var bodyText = document.body.textContent || '';
|
|
1630
2859
|
var noAvailability =
|
|
1631
|
-
bodyText.indexOf(
|
|
1632
|
-
bodyText.indexOf(
|
|
1633
|
-
bodyText.indexOf(
|
|
1634
|
-
bodyText.indexOf(
|
|
1635
|
-
bodyText.indexOf(
|
|
2860
|
+
bodyText.indexOf('no availability') >= 0 ||
|
|
2861
|
+
bodyText.indexOf('sold out') >= 0 ||
|
|
2862
|
+
bodyText.indexOf('no rooms available') >= 0 ||
|
|
2863
|
+
bodyText.indexOf('fully booked') >= 0 ||
|
|
2864
|
+
bodyText.indexOf('We have no availability') >= 0;
|
|
1636
2865
|
|
|
1637
2866
|
return {
|
|
1638
2867
|
hotelName: hotelName,
|
|
@@ -1641,6 +2870,45 @@ export class HotelBrowser {
|
|
|
1641
2870
|
};
|
|
1642
2871
|
})()
|
|
1643
2872
|
`);
|
|
2873
|
+
// Enrich room options with facilities from GraphQL API
|
|
2874
|
+
// This provides detailed amenities (AC, TV, bathroom, etc.) per room type
|
|
2875
|
+
if (result.roomOptions.length > 0) {
|
|
2876
|
+
try {
|
|
2877
|
+
const hotelId = await this.extractHotelId();
|
|
2878
|
+
if (hotelId) {
|
|
2879
|
+
const facilitiesMap = await this.fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut);
|
|
2880
|
+
if (facilitiesMap.size > 0) {
|
|
2881
|
+
// Merge facilities into room options based on roomTypeId
|
|
2882
|
+
// Room type IDs are the first 9 digits of the full room ID (e.g., 652359501 -> 652359501)
|
|
2883
|
+
for (const room of result.roomOptions) {
|
|
2884
|
+
if (room.roomTypeId) {
|
|
2885
|
+
// Try exact match first
|
|
2886
|
+
let facilities = facilitiesMap.get(room.roomTypeId);
|
|
2887
|
+
// If not found, the GraphQL returns full room IDs (e.g., 652359501)
|
|
2888
|
+
// while our roomTypeId might be just the prefix
|
|
2889
|
+
if (!facilities) {
|
|
2890
|
+
// Find a matching key that starts with our roomTypeId
|
|
2891
|
+
for (const [key, value] of facilitiesMap) {
|
|
2892
|
+
if (key.startsWith(room.roomTypeId) || room.roomTypeId.startsWith(key)) {
|
|
2893
|
+
facilities = value;
|
|
2894
|
+
break;
|
|
2895
|
+
}
|
|
2896
|
+
}
|
|
2897
|
+
}
|
|
2898
|
+
if (facilities) {
|
|
2899
|
+
room.amenities = facilities;
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
}
|
|
2903
|
+
logger.debug({ enrichedRooms: result.roomOptions.filter(r => r.amenities).length }, 'Enriched room options with GraphQL facilities');
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
}
|
|
2907
|
+
catch (error) {
|
|
2908
|
+
// Non-fatal: continue without facilities enrichment
|
|
2909
|
+
logger.debug({ error }, 'Failed to enrich rooms with GraphQL facilities');
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
1644
2912
|
// Determine availability and lowest price
|
|
1645
2913
|
const available = result.roomOptions.length > 0 && !result.noAvailabilityDetected;
|
|
1646
2914
|
const prices = result.roomOptions
|
|
@@ -1708,52 +2976,112 @@ export class HotelBrowser {
|
|
|
1708
2976
|
catch { }
|
|
1709
2977
|
await this.page.keyboard.press("Escape");
|
|
1710
2978
|
await this.page.waitForTimeout(500);
|
|
1711
|
-
//
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
//
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
2979
|
+
// Try API extraction first for basic review data
|
|
2980
|
+
// API provides: hotel name, overall rating, rating breakdown, and featured reviews
|
|
2981
|
+
// Note: API reviews are limited to what's in cache (~6-10 reviews), sorted by newest
|
|
2982
|
+
const apiData = await this.extractReviewsFromAPI();
|
|
2983
|
+
// Determine if we can use API data directly or need DOM fallback
|
|
2984
|
+
// Use API if: we have enough reviews AND no special sorting/filtering is requested
|
|
2985
|
+
const canUseApiOnly = apiData &&
|
|
2986
|
+
apiData.reviews.length >= limit &&
|
|
2987
|
+
sortBy === "recent" &&
|
|
2988
|
+
!filterBy;
|
|
2989
|
+
if (canUseApiOnly) {
|
|
2990
|
+
logger.debug("Using API extraction for reviews (sufficient data, no filters)");
|
|
2991
|
+
const reviewsResult = {
|
|
2992
|
+
hotelName: apiData.hotelName,
|
|
2993
|
+
overallRating: apiData.overallRating,
|
|
2994
|
+
totalReviews: apiData.totalReviews,
|
|
2995
|
+
ratingBreakdown: apiData.ratingBreakdown,
|
|
2996
|
+
reviews: apiData.reviews.slice(0, limit),
|
|
2997
|
+
url: cleanUrl,
|
|
2998
|
+
};
|
|
2999
|
+
await this.saveSession();
|
|
3000
|
+
return reviewsResult;
|
|
3001
|
+
}
|
|
3002
|
+
// Use API data for metadata if available, but get reviews from DOM
|
|
3003
|
+
// This gives us accurate rating breakdown from API + more reviews from DOM
|
|
3004
|
+
const baseData = apiData || {
|
|
3005
|
+
hotelName: '',
|
|
3006
|
+
overallRating: null,
|
|
3007
|
+
totalReviews: 0,
|
|
3008
|
+
ratingBreakdown: {
|
|
3009
|
+
staff: null,
|
|
3010
|
+
facilities: null,
|
|
3011
|
+
cleanliness: null,
|
|
3012
|
+
comfort: null,
|
|
3013
|
+
valueForMoney: null,
|
|
3014
|
+
location: null,
|
|
3015
|
+
freeWifi: null,
|
|
3016
|
+
},
|
|
3017
|
+
};
|
|
3018
|
+
// If API didn't give us hotel info, get it from DOM
|
|
3019
|
+
if (!baseData.hotelName) {
|
|
3020
|
+
const mainPageData = await this.page.evaluate(`
|
|
3021
|
+
(function() {
|
|
3022
|
+
var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
|
|
3023
|
+
|
|
3024
|
+
// Hotel name
|
|
3025
|
+
var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
|
|
3026
|
+
results.hotelName = nameEl?.textContent?.trim() || '';
|
|
3027
|
+
|
|
3028
|
+
// Overall rating and total reviews from review-score-component
|
|
3029
|
+
var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
|
|
3030
|
+
if (scoreComponent) {
|
|
3031
|
+
var text = scoreComponent.textContent || '';
|
|
3032
|
+
var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
|
|
3033
|
+
if (scoreMatch) {
|
|
3034
|
+
results.overallRating = parseFloat(scoreMatch[1]);
|
|
3035
|
+
}
|
|
3036
|
+
var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
|
|
3037
|
+
if (reviewCountMatch) {
|
|
3038
|
+
results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
|
|
3039
|
+
}
|
|
1751
3040
|
}
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
3041
|
+
|
|
3042
|
+
// Rating breakdown categories
|
|
3043
|
+
var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
|
|
3044
|
+
breakdownEls.forEach(function(el) {
|
|
3045
|
+
var text = el.textContent?.trim() || '';
|
|
3046
|
+
var parts = text.split(/\\s+/);
|
|
3047
|
+
if (parts.length >= 2) {
|
|
3048
|
+
var score = parseFloat(parts[parts.length - 1]);
|
|
3049
|
+
var category = parts.slice(0, -1).join(' ').toLowerCase();
|
|
3050
|
+
if (category.includes('staff')) results.breakdown.staff = score;
|
|
3051
|
+
else if (category.includes('facilities')) results.breakdown.facilities = score;
|
|
3052
|
+
else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
|
|
3053
|
+
else if (category.includes('comfort')) results.breakdown.comfort = score;
|
|
3054
|
+
else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
|
|
3055
|
+
else if (category.includes('location')) results.breakdown.location = score;
|
|
3056
|
+
else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
|
|
3057
|
+
}
|
|
3058
|
+
});
|
|
3059
|
+
|
|
3060
|
+
return results;
|
|
3061
|
+
})()
|
|
3062
|
+
`);
|
|
3063
|
+
baseData.hotelName = mainPageData.hotelName;
|
|
3064
|
+
if (baseData.overallRating === null)
|
|
3065
|
+
baseData.overallRating = mainPageData.overallRating;
|
|
3066
|
+
if (baseData.totalReviews === 0)
|
|
3067
|
+
baseData.totalReviews = mainPageData.totalReviews;
|
|
3068
|
+
// Fill in missing rating breakdown from DOM
|
|
3069
|
+
if (baseData.ratingBreakdown.staff === null)
|
|
3070
|
+
baseData.ratingBreakdown.staff = mainPageData.breakdown.staff ?? null;
|
|
3071
|
+
if (baseData.ratingBreakdown.facilities === null)
|
|
3072
|
+
baseData.ratingBreakdown.facilities = mainPageData.breakdown.facilities ?? null;
|
|
3073
|
+
if (baseData.ratingBreakdown.cleanliness === null)
|
|
3074
|
+
baseData.ratingBreakdown.cleanliness = mainPageData.breakdown.cleanliness ?? null;
|
|
3075
|
+
if (baseData.ratingBreakdown.comfort === null)
|
|
3076
|
+
baseData.ratingBreakdown.comfort = mainPageData.breakdown.comfort ?? null;
|
|
3077
|
+
if (baseData.ratingBreakdown.valueForMoney === null)
|
|
3078
|
+
baseData.ratingBreakdown.valueForMoney = mainPageData.breakdown.valueForMoney ?? null;
|
|
3079
|
+
if (baseData.ratingBreakdown.location === null)
|
|
3080
|
+
baseData.ratingBreakdown.location = mainPageData.breakdown.location ?? null;
|
|
3081
|
+
if (baseData.ratingBreakdown.freeWifi === null)
|
|
3082
|
+
baseData.ratingBreakdown.freeWifi = mainPageData.breakdown.freeWifi ?? null;
|
|
3083
|
+
}
|
|
3084
|
+
logger.debug("Using DOM extraction for reviews (need more reviews or filters)");
|
|
1757
3085
|
// Click "Read all reviews" button to open reviews modal
|
|
1758
3086
|
const readAllBtn = await this.page.$('[data-testid="fr-read-all-reviews"], [data-testid="review-score-read-all"]');
|
|
1759
3087
|
if (!readAllBtn) {
|
|
@@ -1922,20 +3250,20 @@ export class HotelBrowser {
|
|
|
1922
3250
|
return reviews;
|
|
1923
3251
|
})()
|
|
1924
3252
|
`);
|
|
1925
|
-
// Build rating breakdown
|
|
3253
|
+
// Build rating breakdown from baseData (populated from API or DOM)
|
|
1926
3254
|
const ratingBreakdown = {
|
|
1927
|
-
staff:
|
|
1928
|
-
facilities:
|
|
1929
|
-
cleanliness:
|
|
1930
|
-
comfort:
|
|
1931
|
-
valueForMoney:
|
|
1932
|
-
location:
|
|
1933
|
-
freeWifi:
|
|
3255
|
+
staff: baseData.ratingBreakdown.staff,
|
|
3256
|
+
facilities: baseData.ratingBreakdown.facilities,
|
|
3257
|
+
cleanliness: baseData.ratingBreakdown.cleanliness,
|
|
3258
|
+
comfort: baseData.ratingBreakdown.comfort,
|
|
3259
|
+
valueForMoney: baseData.ratingBreakdown.valueForMoney,
|
|
3260
|
+
location: baseData.ratingBreakdown.location,
|
|
3261
|
+
freeWifi: baseData.ratingBreakdown.freeWifi,
|
|
1934
3262
|
};
|
|
1935
3263
|
const reviewsResult = {
|
|
1936
|
-
hotelName:
|
|
1937
|
-
overallRating:
|
|
1938
|
-
totalReviews:
|
|
3264
|
+
hotelName: baseData.hotelName,
|
|
3265
|
+
overallRating: baseData.overallRating,
|
|
3266
|
+
totalReviews: baseData.totalReviews,
|
|
1939
3267
|
ratingBreakdown,
|
|
1940
3268
|
reviews: reviews.slice(0, limit),
|
|
1941
3269
|
url: cleanUrl,
|