hotelzero 1.12.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +247 -10
- package/dist/browser.d.ts +96 -0
- package/dist/browser.js +1492 -155
- package/dist/debug-checkin.d.ts +1 -0
- package/dist/debug-checkin.js +31 -0
- package/dist/debug-extraction.d.ts +1 -0
- package/dist/debug-extraction.js +49 -0
- package/dist/debug-search.d.ts +1 -0
- package/dist/debug-search.js +47 -0
- package/dist/explore-cache.d.ts +1 -0
- package/dist/explore-cache.js +78 -0
- package/dist/index.js +10 -7
- package/dist/intercept-test.d.ts +1 -0
- package/dist/intercept-test.js +161 -0
- package/dist/verify-api-extraction.d.ts +1 -0
- package/dist/verify-api-extraction.js +116 -0
- package/package.json +1 -1
package/dist/browser.js
CHANGED
|
@@ -39,6 +39,7 @@ export const ErrorCodes = {
|
|
|
39
39
|
NETWORK_ERROR: "NETWORK_ERROR",
|
|
40
40
|
TIMEOUT: "TIMEOUT",
|
|
41
41
|
BLOCKED: "BLOCKED",
|
|
42
|
+
INVALID_PARAMS: "INVALID_PARAMS",
|
|
42
43
|
};
|
|
43
44
|
const DEFAULT_RETRY_CONFIG = {
|
|
44
45
|
maxRetries: 3,
|
|
@@ -74,7 +75,8 @@ const USER_AGENTS = [
|
|
|
74
75
|
* Get a random user agent from the pool
|
|
75
76
|
*/
|
|
76
77
|
function getRandomUserAgent() {
|
|
77
|
-
|
|
78
|
+
const index = Math.floor(Math.random() * USER_AGENTS.length);
|
|
79
|
+
return USER_AGENTS[index] ?? USER_AGENTS[0];
|
|
78
80
|
}
|
|
79
81
|
// Retry with exponential backoff
|
|
80
82
|
async function retryWithBackoff(fn, config = DEFAULT_RETRY_CONFIG, onRetry) {
|
|
@@ -762,8 +764,15 @@ export class HotelBrowser {
|
|
|
762
764
|
// Scroll to load more results (pass limit to control how many to load)
|
|
763
765
|
const targetResults = params.limit || 25;
|
|
764
766
|
await this.scrollToLoadMore(targetResults);
|
|
765
|
-
//
|
|
766
|
-
let hotels = await this.
|
|
767
|
+
// Try API-based extraction first (more reliable), fall back to DOM scraping
|
|
768
|
+
let hotels = await this.extractHotelsFromAPI();
|
|
769
|
+
if (hotels.length === 0) {
|
|
770
|
+
logger.debug("API extraction returned no results, falling back to DOM scraping");
|
|
771
|
+
hotels = await this.extractHotelDetails();
|
|
772
|
+
}
|
|
773
|
+
else {
|
|
774
|
+
logger.debug({ hotelCount: hotels.length }, "Hotels extracted from API cache");
|
|
775
|
+
}
|
|
767
776
|
logger.debug({ hotelCount: hotels.length }, "Hotels extracted from page");
|
|
768
777
|
// Apply limit to cap results
|
|
769
778
|
if (params.limit && params.limit > 0) {
|
|
@@ -785,6 +794,287 @@ export class HotelBrowser {
|
|
|
785
794
|
logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Search attempt failed, retrying");
|
|
786
795
|
});
|
|
787
796
|
}
|
|
797
|
+
/**
|
|
798
|
+
* Search for a specific hotel's rate via the search API.
|
|
799
|
+
* This is 100% API-based (no HTML scraping) and returns detailed rate info
|
|
800
|
+
* including room type, meal plan, cancellation policy, and bed configuration.
|
|
801
|
+
*
|
|
802
|
+
* The method searches for the hotel by name and extracts rate details from
|
|
803
|
+
* the Apollo cache's `blocks` array and `matchingUnitConfigurations`.
|
|
804
|
+
*
|
|
805
|
+
* @param hotelUrl - The hotel's URL or name/slug (e.g., "la-sanguine" or full URL)
|
|
806
|
+
* @param checkIn - Check-in date (YYYY-MM-DD)
|
|
807
|
+
* @param checkOut - Check-out date (YYYY-MM-DD)
|
|
808
|
+
* @param guests - Number of guests
|
|
809
|
+
* @param rooms - Number of rooms
|
|
810
|
+
* @param filters - Optional rate filters (breakfast, free cancellation, bed type)
|
|
811
|
+
* @returns Rate details or null if hotel not found in results
|
|
812
|
+
*/
|
|
813
|
+
async searchHotelRates(hotelUrl, checkIn, checkOut, guests = 2, rooms = 1, filters) {
|
|
814
|
+
if (!this.page) {
|
|
815
|
+
throw new HotelSearchError("Browser not initialized. Call init() first.", ErrorCodes.BROWSER_NOT_INITIALIZED, false);
|
|
816
|
+
}
|
|
817
|
+
// Extract hotel name from URL for search query
|
|
818
|
+
const hotelName = this.extractHotelNameFromUrl(hotelUrl);
|
|
819
|
+
if (!hotelName) {
|
|
820
|
+
throw new HotelSearchError("Could not extract hotel name from URL", ErrorCodes.INVALID_PARAMS, false);
|
|
821
|
+
}
|
|
822
|
+
logger.info({ hotelName, checkIn, checkOut, guests, rooms, hasFilters: !!filters }, "Searching for hotel rate via API");
|
|
823
|
+
// Build search URL with hotel name as destination
|
|
824
|
+
const searchFilters = {};
|
|
825
|
+
// Apply rate-specific filters
|
|
826
|
+
if (filters?.breakfast) {
|
|
827
|
+
searchFilters.breakfast = true;
|
|
828
|
+
}
|
|
829
|
+
if (filters?.freeCancellation) {
|
|
830
|
+
searchFilters.freeCancellation = true;
|
|
831
|
+
}
|
|
832
|
+
const searchParams = {
|
|
833
|
+
destination: hotelName.replace(/-/g, " "), // "la-sanguine" -> "la sanguine"
|
|
834
|
+
checkIn,
|
|
835
|
+
checkOut,
|
|
836
|
+
guests,
|
|
837
|
+
rooms,
|
|
838
|
+
limit: 10, // Small limit since we're looking for a specific hotel
|
|
839
|
+
};
|
|
840
|
+
const url = this.buildBookingUrl(searchParams, searchFilters);
|
|
841
|
+
logger.debug({ url }, "Hotel rate search URL");
|
|
842
|
+
return await retryWithBackoff(async () => {
|
|
843
|
+
await this.enforceRateLimit();
|
|
844
|
+
try {
|
|
845
|
+
await this.page.goto(url, {
|
|
846
|
+
waitUntil: "networkidle",
|
|
847
|
+
timeout: 30000,
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
catch (error) {
|
|
851
|
+
const err = error;
|
|
852
|
+
if (err.message.includes("timeout") || err.message.includes("Timeout")) {
|
|
853
|
+
throw new HotelSearchError("Page load timed out. The server may be slow or unavailable.", ErrorCodes.TIMEOUT, true);
|
|
854
|
+
}
|
|
855
|
+
throw new HotelSearchError(`Navigation failed: ${err.message}`, ErrorCodes.NAVIGATION_FAILED, true);
|
|
856
|
+
}
|
|
857
|
+
await this.page.waitForTimeout(2000);
|
|
858
|
+
await this.checkForBlocking();
|
|
859
|
+
await this.dismissPopups();
|
|
860
|
+
// Extract rate details from Apollo cache
|
|
861
|
+
const rateResult = await this.extractHotelRateFromAPI(hotelName, filters);
|
|
862
|
+
if (rateResult) {
|
|
863
|
+
// Populate search params in result
|
|
864
|
+
rateResult.checkIn = checkIn;
|
|
865
|
+
rateResult.checkOut = checkOut;
|
|
866
|
+
rateResult.guests = guests;
|
|
867
|
+
rateResult.rooms = rooms;
|
|
868
|
+
logger.info({ hotelName: rateResult.hotelName, price: rateResult.price, roomName: rateResult.roomName }, "Hotel rate found via API");
|
|
869
|
+
await this.saveSession();
|
|
870
|
+
return rateResult;
|
|
871
|
+
}
|
|
872
|
+
logger.warn({ hotelName }, "Hotel not found in search results");
|
|
873
|
+
return null;
|
|
874
|
+
}, DEFAULT_RETRY_CONFIG, (attempt, error, delayMs) => {
|
|
875
|
+
logger.warn({ attempt, error: error.message, retryInMs: delayMs }, "Hotel rate search failed, retrying");
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Extract hotel name/slug from a Booking.com URL.
|
|
880
|
+
* Handles formats like:
|
|
881
|
+
* - https://www.booking.com/hotel/fr/la-sanguine.html
|
|
882
|
+
* - /hotel/fr/la-sanguine.html
|
|
883
|
+
* - la-sanguine
|
|
884
|
+
*/
|
|
885
|
+
extractHotelNameFromUrl(urlOrName) {
|
|
886
|
+
// If it's just a name/slug (no slashes), return as-is
|
|
887
|
+
if (!urlOrName.includes("/")) {
|
|
888
|
+
return urlOrName.replace(/\.html$/, "");
|
|
889
|
+
}
|
|
890
|
+
// Extract from URL pattern: /hotel/{country}/{name}.html
|
|
891
|
+
const match = urlOrName.match(/\/hotel\/[a-z]{2}\/([^/.]+)/i);
|
|
892
|
+
if (match && match[1]) {
|
|
893
|
+
return match[1];
|
|
894
|
+
}
|
|
895
|
+
// Fallback: try to get the last path segment
|
|
896
|
+
const parts = urlOrName.split("/").filter(Boolean);
|
|
897
|
+
const lastPart = parts[parts.length - 1];
|
|
898
|
+
return lastPart?.replace(/\.html$/, "") || null;
|
|
899
|
+
}
|
|
900
|
+
/**
|
|
901
|
+
* Extract hotel rate details from Apollo cache.
|
|
902
|
+
* Finds the hotel matching the given name and extracts rate info from
|
|
903
|
+
* the `blocks` array and `matchingUnitConfigurations`.
|
|
904
|
+
*/
|
|
905
|
+
async extractHotelRateFromAPI(hotelSlug, filters) {
|
|
906
|
+
if (!this.page)
|
|
907
|
+
return null;
|
|
908
|
+
// Bed type mapping for filter matching
|
|
909
|
+
const bedTypeMap = {
|
|
910
|
+
single: 1,
|
|
911
|
+
twin: 2,
|
|
912
|
+
double: 3,
|
|
913
|
+
queen: 5,
|
|
914
|
+
king: 6,
|
|
915
|
+
};
|
|
916
|
+
const targetBedType = filters?.bedType ? bedTypeMap[filters.bedType] : undefined;
|
|
917
|
+
return await this.page.evaluate(({ hotelSlug, targetBedType }) => {
|
|
918
|
+
try {
|
|
919
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
920
|
+
const w = window;
|
|
921
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
922
|
+
if (!cache)
|
|
923
|
+
return null;
|
|
924
|
+
const rootQuery = cache["ROOT_QUERY"];
|
|
925
|
+
if (!rootQuery)
|
|
926
|
+
return null;
|
|
927
|
+
const searchQueries = rootQuery.searchQueries;
|
|
928
|
+
if (!searchQueries)
|
|
929
|
+
return null;
|
|
930
|
+
const searchKey = Object.keys(searchQueries).find((k) => k.startsWith("search("));
|
|
931
|
+
if (!searchKey)
|
|
932
|
+
return null;
|
|
933
|
+
const searchOutput = searchQueries[searchKey];
|
|
934
|
+
const searchResults = searchOutput?.results;
|
|
935
|
+
if (!searchResults || !Array.isArray(searchResults))
|
|
936
|
+
return null;
|
|
937
|
+
// Find hotel matching the slug (check pageName)
|
|
938
|
+
const normalizedSlug = hotelSlug.toLowerCase().replace(/-/g, "");
|
|
939
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
940
|
+
const hotel = searchResults.find((h) => {
|
|
941
|
+
if (!h)
|
|
942
|
+
return false;
|
|
943
|
+
const pageName = h.basicPropertyData?.pageName?.toLowerCase()?.replace(/-/g, "") || "";
|
|
944
|
+
const displayName = h.displayName?.text?.toLowerCase()?.replace(/\s+/g, "") || "";
|
|
945
|
+
return pageName.includes(normalizedSlug) ||
|
|
946
|
+
normalizedSlug.includes(pageName) ||
|
|
947
|
+
displayName.includes(normalizedSlug) ||
|
|
948
|
+
normalizedSlug.includes(displayName);
|
|
949
|
+
});
|
|
950
|
+
if (!hotel)
|
|
951
|
+
return null;
|
|
952
|
+
// Extract basic hotel info
|
|
953
|
+
const hotelName = hotel.displayName?.text || hotel.basicPropertyData?.pageName || "Unknown";
|
|
954
|
+
const pageName = hotel.basicPropertyData?.pageName || "";
|
|
955
|
+
const countryCode = hotel.basicPropertyData?.location?.countryCode || "";
|
|
956
|
+
const hotelId = hotel.basicPropertyData?.id?.toString() || "";
|
|
957
|
+
const hotelUrl = countryCode && pageName
|
|
958
|
+
? `https://www.booking.com/hotel/${countryCode}/${pageName}.html`
|
|
959
|
+
: "";
|
|
960
|
+
// Get price info
|
|
961
|
+
const priceInfo = hotel.priceDisplayInfoIrene;
|
|
962
|
+
const displayPrice = priceInfo?.displayPrice?.amountPerStay;
|
|
963
|
+
const price = displayPrice?.amountUnformatted ?? 0;
|
|
964
|
+
const priceDisplay = displayPrice?.amountRounded || displayPrice?.amount || "$0";
|
|
965
|
+
const currency = displayPrice?.currency || "USD";
|
|
966
|
+
const pricePerNight = priceInfo?.averagePricePerNight?.amountUnformatted ?? 0;
|
|
967
|
+
// Get blocks array (rate options)
|
|
968
|
+
const blocks = hotel.blocks;
|
|
969
|
+
if (!blocks || !Array.isArray(blocks) || blocks.length === 0) {
|
|
970
|
+
// No blocks, return basic info without detailed rate
|
|
971
|
+
return {
|
|
972
|
+
hotelName,
|
|
973
|
+
hotelId,
|
|
974
|
+
hotelUrl,
|
|
975
|
+
checkIn: "",
|
|
976
|
+
checkOut: "",
|
|
977
|
+
guests: 0,
|
|
978
|
+
rooms: 0,
|
|
979
|
+
roomName: "Unknown",
|
|
980
|
+
roomId: "",
|
|
981
|
+
price,
|
|
982
|
+
priceDisplay,
|
|
983
|
+
pricePerNight,
|
|
984
|
+
currency,
|
|
985
|
+
mealPlan: "Unknown",
|
|
986
|
+
cancellationPolicy: "Unknown",
|
|
987
|
+
freeCancellationUntil: null,
|
|
988
|
+
bedType: "Unknown",
|
|
989
|
+
bedCount: 0,
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
// Get the first (cheapest/best match) block
|
|
993
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
994
|
+
const block = blocks[0];
|
|
995
|
+
const blockId = block.blockId || {};
|
|
996
|
+
const roomId = blockId.roomId?.toString() || "";
|
|
997
|
+
const mealPlanId = blockId.mealPlanId;
|
|
998
|
+
// Meal plan mapping
|
|
999
|
+
const mealPlanNames = {
|
|
1000
|
+
0: "Room only",
|
|
1001
|
+
1: "Breakfast included",
|
|
1002
|
+
2: "Half board",
|
|
1003
|
+
3: "Full board",
|
|
1004
|
+
4: "All-inclusive",
|
|
1005
|
+
};
|
|
1006
|
+
const mealPlan = mealPlanNames[mealPlanId] || "Room only";
|
|
1007
|
+
// Cancellation policy
|
|
1008
|
+
const freeCancellationUntil = block.freeCancellationUntil || null;
|
|
1009
|
+
const cancellationPolicy = freeCancellationUntil
|
|
1010
|
+
? `Free cancellation until ${freeCancellationUntil}`
|
|
1011
|
+
: "Non-refundable";
|
|
1012
|
+
// Get room name and bed configuration from matchingUnitConfigurations
|
|
1013
|
+
let roomName = "Standard Room";
|
|
1014
|
+
let bedType = "Unknown";
|
|
1015
|
+
let bedCount = 0;
|
|
1016
|
+
const unitConfigs = hotel.matchingUnitConfigurations?.unitConfigurations;
|
|
1017
|
+
if (unitConfigs && Array.isArray(unitConfigs)) {
|
|
1018
|
+
// If filtering by bed type, try to find matching config
|
|
1019
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1020
|
+
let matchingConfig = unitConfigs[0];
|
|
1021
|
+
if (targetBedType !== undefined) {
|
|
1022
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1023
|
+
const bedMatch = unitConfigs.find((config) => {
|
|
1024
|
+
const beds = config.bedConfigurations?.[0]?.beds || [];
|
|
1025
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1026
|
+
return beds.some((bed) => bed.type === targetBedType);
|
|
1027
|
+
});
|
|
1028
|
+
if (bedMatch) {
|
|
1029
|
+
matchingConfig = bedMatch;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
if (matchingConfig) {
|
|
1033
|
+
roomName = matchingConfig.name || roomName;
|
|
1034
|
+
const beds = matchingConfig.bedConfigurations?.[0]?.beds;
|
|
1035
|
+
if (beds && beds.length > 0) {
|
|
1036
|
+
const firstBed = beds[0];
|
|
1037
|
+
bedCount = firstBed.count || 1;
|
|
1038
|
+
// Reverse map bed type
|
|
1039
|
+
const bedTypeNames = {
|
|
1040
|
+
1: "Single",
|
|
1041
|
+
2: "Twin",
|
|
1042
|
+
3: "Double",
|
|
1043
|
+
4: "Large Double",
|
|
1044
|
+
5: "Queen",
|
|
1045
|
+
6: "King",
|
|
1046
|
+
7: "Super King",
|
|
1047
|
+
};
|
|
1048
|
+
bedType = bedTypeNames[firstBed.type] || "Unknown";
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
return {
|
|
1053
|
+
hotelName,
|
|
1054
|
+
hotelId,
|
|
1055
|
+
hotelUrl,
|
|
1056
|
+
checkIn: "", // Will be filled by caller
|
|
1057
|
+
checkOut: "",
|
|
1058
|
+
guests: 0,
|
|
1059
|
+
rooms: 0,
|
|
1060
|
+
roomName,
|
|
1061
|
+
roomId,
|
|
1062
|
+
price,
|
|
1063
|
+
priceDisplay,
|
|
1064
|
+
pricePerNight,
|
|
1065
|
+
currency,
|
|
1066
|
+
mealPlan,
|
|
1067
|
+
cancellationPolicy,
|
|
1068
|
+
freeCancellationUntil,
|
|
1069
|
+
bedType,
|
|
1070
|
+
bedCount,
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
1073
|
+
catch {
|
|
1074
|
+
return null;
|
|
1075
|
+
}
|
|
1076
|
+
}, { hotelSlug, targetBedType });
|
|
1077
|
+
}
|
|
788
1078
|
async dismissPopups() {
|
|
789
1079
|
if (!this.page)
|
|
790
1080
|
return;
|
|
@@ -879,10 +1169,11 @@ export class HotelBrowser {
|
|
|
879
1169
|
let priceText = "";
|
|
880
1170
|
let price = null;
|
|
881
1171
|
// First price element is usually per night
|
|
882
|
-
|
|
883
|
-
|
|
1172
|
+
const firstPriceEl = allPriceEls[0];
|
|
1173
|
+
if (firstPriceEl) {
|
|
1174
|
+
priceText = firstPriceEl.textContent?.trim() || "";
|
|
884
1175
|
const priceMatch = priceText.match(/\$?([\d,]+)/);
|
|
885
|
-
price = priceMatch ? parseInt(priceMatch[1].replace(",", "")) : null;
|
|
1176
|
+
price = priceMatch?.[1] ? parseInt(priceMatch[1].replace(",", "")) : null;
|
|
886
1177
|
}
|
|
887
1178
|
// Rating - look for the numeric score
|
|
888
1179
|
const ratingScoreEl = card.querySelector('[data-testid="review-score"] .dff2e52086');
|
|
@@ -895,7 +1186,7 @@ export class HotelBrowser {
|
|
|
895
1186
|
const reviewCountEl = card.querySelector('[data-testid="review-score"] .fb14de7f14');
|
|
896
1187
|
const reviewText = reviewCountEl?.textContent || "";
|
|
897
1188
|
const reviewMatch = reviewText.match(/([\d,]+)/);
|
|
898
|
-
const reviewCount = reviewMatch ? parseInt(reviewMatch[1].replace(",", "")) : null;
|
|
1189
|
+
const reviewCount = reviewMatch?.[1] ? parseInt(reviewMatch[1].replace(",", "")) : null;
|
|
899
1190
|
// Distance to center
|
|
900
1191
|
const distanceEl = card.querySelector('[data-testid="distance"]');
|
|
901
1192
|
const distanceToCenter = distanceEl?.textContent?.trim() || "";
|
|
@@ -987,6 +1278,796 @@ export class HotelBrowser {
|
|
|
987
1278
|
return results;
|
|
988
1279
|
});
|
|
989
1280
|
}
|
|
1281
|
+
/**
|
|
1282
|
+
* Extract hotel data from Booking.com's Apollo GraphQL cache.
|
|
1283
|
+
* This is more reliable than DOM scraping as it uses structured data.
|
|
1284
|
+
* Falls back gracefully if the cache structure changes.
|
|
1285
|
+
*/
|
|
1286
|
+
async extractHotelsFromAPI() {
|
|
1287
|
+
if (!this.page)
|
|
1288
|
+
return [];
|
|
1289
|
+
return await this.page.evaluate(() => {
|
|
1290
|
+
try {
|
|
1291
|
+
// Access the Apollo cache embedded in the page
|
|
1292
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1293
|
+
const w = window;
|
|
1294
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1295
|
+
if (!cache)
|
|
1296
|
+
return [];
|
|
1297
|
+
const rootQuery = cache['ROOT_QUERY'];
|
|
1298
|
+
if (!rootQuery)
|
|
1299
|
+
return [];
|
|
1300
|
+
// searchQueries contains the search results
|
|
1301
|
+
const searchQueries = rootQuery.searchQueries;
|
|
1302
|
+
if (!searchQueries)
|
|
1303
|
+
return [];
|
|
1304
|
+
// Find the search key (complex key with query parameters)
|
|
1305
|
+
const searchKey = Object.keys(searchQueries).find(k => k.startsWith('search('));
|
|
1306
|
+
if (!searchKey)
|
|
1307
|
+
return [];
|
|
1308
|
+
const searchOutput = searchQueries[searchKey];
|
|
1309
|
+
if (!searchOutput)
|
|
1310
|
+
return [];
|
|
1311
|
+
// Get the results array
|
|
1312
|
+
const searchResults = searchOutput.results;
|
|
1313
|
+
if (!searchResults || !Array.isArray(searchResults))
|
|
1314
|
+
return [];
|
|
1315
|
+
const results = [];
|
|
1316
|
+
for (const hotel of searchResults) {
|
|
1317
|
+
if (!hotel)
|
|
1318
|
+
continue;
|
|
1319
|
+
// Skip sponsored/native ad listings
|
|
1320
|
+
const persuasion = hotel.persuasion;
|
|
1321
|
+
if (persuasion?.showNativeAdLabel || persuasion?.nativeAdId) {
|
|
1322
|
+
continue;
|
|
1323
|
+
}
|
|
1324
|
+
// Extract name
|
|
1325
|
+
const name = hotel.displayName?.text || hotel.basicPropertyData?.pageName || 'Unknown';
|
|
1326
|
+
// Extract price
|
|
1327
|
+
let price = null;
|
|
1328
|
+
let priceDisplay = 'Price not shown';
|
|
1329
|
+
const priceInfo = hotel.priceDisplayInfoIrene?.displayPrice?.amountPerStay;
|
|
1330
|
+
if (priceInfo) {
|
|
1331
|
+
priceDisplay = priceInfo.amountRounded || priceInfo.amount || priceDisplay;
|
|
1332
|
+
price = typeof priceInfo.amountUnformatted === 'number' ? priceInfo.amountUnformatted : null;
|
|
1333
|
+
}
|
|
1334
|
+
// Extract rating and reviews from basicPropertyData.reviews
|
|
1335
|
+
let rating = null;
|
|
1336
|
+
let ratingText = '';
|
|
1337
|
+
let reviewCount = null;
|
|
1338
|
+
const reviews = hotel.basicPropertyData?.reviews;
|
|
1339
|
+
if (reviews) {
|
|
1340
|
+
rating = typeof reviews.totalScore === 'number' ? reviews.totalScore : null;
|
|
1341
|
+
ratingText = reviews.totalScoreTextTag?.translation || '';
|
|
1342
|
+
reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
|
|
1343
|
+
}
|
|
1344
|
+
// Extract location
|
|
1345
|
+
const location = hotel.location?.displayLocation || '';
|
|
1346
|
+
const distanceToCenter = hotel.location?.mainDistance || '';
|
|
1347
|
+
// Build thumbnail URL
|
|
1348
|
+
let thumbnailUrl = null;
|
|
1349
|
+
const mainPhoto = hotel.basicPropertyData?.photos?.main;
|
|
1350
|
+
if (mainPhoto) {
|
|
1351
|
+
const relativeUrl = mainPhoto.highResJpegUrl?.relativeUrl ||
|
|
1352
|
+
mainPhoto.highResUrl?.relativeUrl ||
|
|
1353
|
+
mainPhoto.lowResJpegUrl?.relativeUrl;
|
|
1354
|
+
if (relativeUrl) {
|
|
1355
|
+
thumbnailUrl = `https://cf.bstatic.com${relativeUrl}`;
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
// Build link with country code (required for API data to load on detail page)
|
|
1359
|
+
let link = '';
|
|
1360
|
+
const pageName = hotel.basicPropertyData?.pageName;
|
|
1361
|
+
const countryCode = hotel.basicPropertyData?.location?.countryCode;
|
|
1362
|
+
if (pageName && countryCode) {
|
|
1363
|
+
link = `https://www.booking.com/hotel/${countryCode}/${pageName}.html`;
|
|
1364
|
+
}
|
|
1365
|
+
else if (pageName) {
|
|
1366
|
+
// Fallback without country code (less reliable for API extraction)
|
|
1367
|
+
link = `https://www.booking.com/hotel/${pageName}.html`;
|
|
1368
|
+
}
|
|
1369
|
+
// Extract amenities and highlights
|
|
1370
|
+
const amenities = [];
|
|
1371
|
+
const highlights = [];
|
|
1372
|
+
// Sustainability
|
|
1373
|
+
if (hotel.propertySustainability?.isSustainable) {
|
|
1374
|
+
amenities.push('Sustainable');
|
|
1375
|
+
}
|
|
1376
|
+
// Policies
|
|
1377
|
+
const policies = hotel.policies;
|
|
1378
|
+
if (policies?.showFreeCancellation) {
|
|
1379
|
+
highlights.push('Free Cancellation');
|
|
1380
|
+
}
|
|
1381
|
+
if (policies?.showNoPrepayment) {
|
|
1382
|
+
highlights.push('No Prepayment');
|
|
1383
|
+
}
|
|
1384
|
+
if (policies?.showPetsAllowedForFree) {
|
|
1385
|
+
amenities.push('Pet Friendly');
|
|
1386
|
+
}
|
|
1387
|
+
// Meal plan
|
|
1388
|
+
if (hotel.mealPlanIncluded?.mealPlanType) {
|
|
1389
|
+
amenities.push('Breakfast Included');
|
|
1390
|
+
}
|
|
1391
|
+
// Extract availability info
|
|
1392
|
+
let availability = null;
|
|
1393
|
+
const soldOutInfo = hotel.soldOutInfo;
|
|
1394
|
+
if (soldOutInfo?.messages && soldOutInfo.messages.length > 0) {
|
|
1395
|
+
const msg = soldOutInfo.messages[0];
|
|
1396
|
+
if (msg?.text) {
|
|
1397
|
+
availability = msg.text;
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
results.push({
|
|
1401
|
+
name,
|
|
1402
|
+
price,
|
|
1403
|
+
priceDisplay,
|
|
1404
|
+
rating,
|
|
1405
|
+
ratingText,
|
|
1406
|
+
reviewCount,
|
|
1407
|
+
location,
|
|
1408
|
+
distanceToCenter,
|
|
1409
|
+
amenities,
|
|
1410
|
+
highlights,
|
|
1411
|
+
link,
|
|
1412
|
+
thumbnailUrl,
|
|
1413
|
+
availability,
|
|
1414
|
+
});
|
|
1415
|
+
}
|
|
1416
|
+
return results;
|
|
1417
|
+
}
|
|
1418
|
+
catch {
|
|
1419
|
+
// If anything goes wrong with API extraction, return empty to trigger fallback
|
|
1420
|
+
return [];
|
|
1421
|
+
}
|
|
1422
|
+
});
|
|
1423
|
+
}
|
|
1424
|
+
/**
|
|
1425
|
+
* Extract hotel details from Booking.com's Apollo GraphQL cache on a hotel detail page.
|
|
1426
|
+
* This is more reliable than DOM scraping as it uses structured data.
|
|
1427
|
+
* Returns null if extraction fails (triggering DOM fallback).
|
|
1428
|
+
*/
|
|
1429
|
+
async extractHotelDetailsFromAPI() {
|
|
1430
|
+
if (!this.page)
|
|
1431
|
+
return null;
|
|
1432
|
+
return await this.page.evaluate(() => {
|
|
1433
|
+
try {
|
|
1434
|
+
// Access the Apollo cache embedded in the page
|
|
1435
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1436
|
+
const w = window;
|
|
1437
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1438
|
+
if (!cache)
|
|
1439
|
+
return null;
|
|
1440
|
+
// Helper to resolve __ref pointers
|
|
1441
|
+
const resolveRef = (ref) => {
|
|
1442
|
+
if (ref && typeof ref === 'object' && '__ref' in ref) {
|
|
1443
|
+
return cache[ref.__ref];
|
|
1444
|
+
}
|
|
1445
|
+
return ref;
|
|
1446
|
+
};
|
|
1447
|
+
// Find the Property entry - it has a key like 'Property:{"id":6523595}'
|
|
1448
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1449
|
+
if (!propertyKey)
|
|
1450
|
+
return null;
|
|
1451
|
+
const property = cache[propertyKey];
|
|
1452
|
+
if (!property)
|
|
1453
|
+
return null;
|
|
1454
|
+
// Extract hotel ID from the property key
|
|
1455
|
+
const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1456
|
+
const hotelId = idMatch ? idMatch[1] : null;
|
|
1457
|
+
// Get BasicPropertyData for address and location
|
|
1458
|
+
const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
|
|
1459
|
+
const basicData = basicDataKey ? cache[basicDataKey] : null;
|
|
1460
|
+
// Extract name
|
|
1461
|
+
const name = property.name || basicData?.name || 'Unknown';
|
|
1462
|
+
// Extract rating and reviews from property.reviews
|
|
1463
|
+
let rating = null;
|
|
1464
|
+
let ratingText = '';
|
|
1465
|
+
let reviewCount = null;
|
|
1466
|
+
const reviews = property.reviews;
|
|
1467
|
+
if (reviews) {
|
|
1468
|
+
reviewCount = typeof reviews.reviewsCount === 'number' ? reviews.reviewsCount : null;
|
|
1469
|
+
// Find the total score from questions array
|
|
1470
|
+
const questions = reviews.questions;
|
|
1471
|
+
if (Array.isArray(questions)) {
|
|
1472
|
+
const totalQ = questions.find((q) => q?.name === 'total');
|
|
1473
|
+
if (totalQ && typeof totalQ.score === 'number') {
|
|
1474
|
+
const score = totalQ.score;
|
|
1475
|
+
rating = score;
|
|
1476
|
+
// Generate rating text based on score
|
|
1477
|
+
if (score >= 9)
|
|
1478
|
+
ratingText = 'Superb';
|
|
1479
|
+
else if (score >= 8)
|
|
1480
|
+
ratingText = 'Very Good';
|
|
1481
|
+
else if (score >= 7)
|
|
1482
|
+
ratingText = 'Good';
|
|
1483
|
+
else if (score >= 6)
|
|
1484
|
+
ratingText = 'Pleasant';
|
|
1485
|
+
else
|
|
1486
|
+
ratingText = 'Review score';
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
// Extract address from BasicPropertyData
|
|
1491
|
+
const address = basicData?.location?.formattedAddress ||
|
|
1492
|
+
basicData?.location?.formattedAddressShort || '';
|
|
1493
|
+
// Extract star rating from accommodation type
|
|
1494
|
+
let starRating = null;
|
|
1495
|
+
const accomType = resolveRef(property.accommodationType);
|
|
1496
|
+
if (accomType && typeof accomType === 'object' && 'starRating' in accomType) {
|
|
1497
|
+
starRating = accomType.starRating || null;
|
|
1498
|
+
}
|
|
1499
|
+
// Extract check-in/out times from houseRules
|
|
1500
|
+
let checkInTime = '';
|
|
1501
|
+
let checkOutTime = '';
|
|
1502
|
+
const houseRules = property.houseRules;
|
|
1503
|
+
if (houseRules?.checkinCheckoutTimes) {
|
|
1504
|
+
const times = houseRules.checkinCheckoutTimes;
|
|
1505
|
+
if (times.checkinTimeRange) {
|
|
1506
|
+
const from = times.checkinTimeRange.fromFormatted;
|
|
1507
|
+
const until = times.checkinTimeRange.untilFormatted;
|
|
1508
|
+
if (from && until) {
|
|
1509
|
+
checkInTime = `${from} - ${until}`;
|
|
1510
|
+
}
|
|
1511
|
+
else if (from) {
|
|
1512
|
+
checkInTime = `From ${from}`;
|
|
1513
|
+
}
|
|
1514
|
+
else if (until) {
|
|
1515
|
+
checkInTime = `Until ${until}`;
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
if (times.checkoutTimeRange) {
|
|
1519
|
+
const from = times.checkoutTimeRange.fromFormatted;
|
|
1520
|
+
const until = times.checkoutTimeRange.untilFormatted;
|
|
1521
|
+
if (from && until) {
|
|
1522
|
+
checkOutTime = `${from} - ${until}`;
|
|
1523
|
+
}
|
|
1524
|
+
else if (until) {
|
|
1525
|
+
checkOutTime = `Until ${until}`;
|
|
1526
|
+
}
|
|
1527
|
+
else if (from) {
|
|
1528
|
+
checkOutTime = `From ${from}`;
|
|
1529
|
+
}
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
// Extract popular facilities from accommodationHighlights
|
|
1533
|
+
const popularFacilities = [];
|
|
1534
|
+
const highlightKeys = Object.keys(property).filter(k => k.startsWith('accommodationHighlights('));
|
|
1535
|
+
for (const key of highlightKeys) {
|
|
1536
|
+
const highlights = property[key];
|
|
1537
|
+
if (Array.isArray(highlights)) {
|
|
1538
|
+
for (const item of highlights) {
|
|
1539
|
+
const entities = item?.entities;
|
|
1540
|
+
if (Array.isArray(entities)) {
|
|
1541
|
+
for (const entity of entities) {
|
|
1542
|
+
// Direct title (like BreakfastHighlight)
|
|
1543
|
+
if (entity?.title) {
|
|
1544
|
+
popularFacilities.push(entity.title);
|
|
1545
|
+
}
|
|
1546
|
+
// Resolve __ref for GenericFacilityHighlight, WifiFacilityHighlight, etc.
|
|
1547
|
+
const resolved = resolveRef(entity);
|
|
1548
|
+
if (resolved && typeof resolved === 'object' && 'title' in resolved) {
|
|
1549
|
+
const title = resolved.title;
|
|
1550
|
+
if (title && !popularFacilities.includes(title)) {
|
|
1551
|
+
popularFacilities.push(title);
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
// Extract all facilities from highlights (popularity based)
|
|
1560
|
+
const allFacilities = [];
|
|
1561
|
+
const facilityKeys = Object.keys(property).filter(k => k.startsWith('highlights('));
|
|
1562
|
+
for (const key of facilityKeys) {
|
|
1563
|
+
const highlightData = property[key];
|
|
1564
|
+
const entities = highlightData?.entities;
|
|
1565
|
+
if (Array.isArray(entities)) {
|
|
1566
|
+
for (const entity of entities) {
|
|
1567
|
+
// Skip Meal type entries
|
|
1568
|
+
if (entity?.__typename === 'Meal')
|
|
1569
|
+
continue;
|
|
1570
|
+
const resolved = resolveRef(entity);
|
|
1571
|
+
if (resolved && typeof resolved === 'object') {
|
|
1572
|
+
// For BaseFacility, look at instances
|
|
1573
|
+
const instances = resolved.instances;
|
|
1574
|
+
if (Array.isArray(instances)) {
|
|
1575
|
+
for (const inst of instances) {
|
|
1576
|
+
const resolvedInst = resolveRef(inst);
|
|
1577
|
+
if (resolvedInst && typeof resolvedInst === 'object' && 'title' in resolvedInst) {
|
|
1578
|
+
const title = resolvedInst.title;
|
|
1579
|
+
if (title && !allFacilities.includes(title)) {
|
|
1580
|
+
allFacilities.push(title);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
// Extract photos from propertyGallery
|
|
1590
|
+
const photos = [];
|
|
1591
|
+
const galleryKeys = Object.keys(property).filter(k => k.startsWith('propertyGallery('));
|
|
1592
|
+
for (const key of galleryKeys) {
|
|
1593
|
+
const gallery = property[key];
|
|
1594
|
+
// Main photo
|
|
1595
|
+
if (gallery?.mainPhoto) {
|
|
1596
|
+
const mainPhoto = resolveRef(gallery.mainPhoto);
|
|
1597
|
+
if (mainPhoto && typeof mainPhoto === 'object') {
|
|
1598
|
+
// Look for resource with max500 or max1024x768
|
|
1599
|
+
const photoObj = mainPhoto;
|
|
1600
|
+
const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
|
|
1601
|
+
if (resourceKey) {
|
|
1602
|
+
const resource = photoObj[resourceKey];
|
|
1603
|
+
if (resource?.absoluteUrl) {
|
|
1604
|
+
photos.push(resource.absoluteUrl);
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
// Room photos
|
|
1610
|
+
const roomPhotos = gallery?.roomPhotos;
|
|
1611
|
+
if (Array.isArray(roomPhotos)) {
|
|
1612
|
+
for (const room of roomPhotos) {
|
|
1613
|
+
const roomPhotosList = room?.photos;
|
|
1614
|
+
if (Array.isArray(roomPhotosList) && photos.length < 5) {
|
|
1615
|
+
for (const photoRef of roomPhotosList) {
|
|
1616
|
+
if (photos.length >= 5)
|
|
1617
|
+
break;
|
|
1618
|
+
const photo = resolveRef(photoRef);
|
|
1619
|
+
if (photo && typeof photo === 'object') {
|
|
1620
|
+
const photoObj = photo;
|
|
1621
|
+
const resourceKey = Object.keys(photoObj).find(k => k.includes('max500') || k.includes('max1024'));
|
|
1622
|
+
if (resourceKey) {
|
|
1623
|
+
const resource = photoObj[resourceKey];
|
|
1624
|
+
if (resource?.absoluteUrl && !photos.includes(resource.absoluteUrl)) {
|
|
1625
|
+
photos.push(resource.absoluteUrl);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
// Extract room types from property.rooms
|
|
1635
|
+
const roomTypes = [];
|
|
1636
|
+
const rooms = property.rooms;
|
|
1637
|
+
if (Array.isArray(rooms)) {
|
|
1638
|
+
for (const roomRef of rooms) {
|
|
1639
|
+
const room = resolveRef(roomRef);
|
|
1640
|
+
if (room && typeof room === 'object') {
|
|
1641
|
+
const roomObj = room;
|
|
1642
|
+
const roomName = roomObj.name || roomObj.description;
|
|
1643
|
+
if (roomName && !roomTypes.includes(roomName)) {
|
|
1644
|
+
roomTypes.push(roomName);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
// Extract location info
|
|
1650
|
+
let locationInfo = '';
|
|
1651
|
+
if (basicData?.location) {
|
|
1652
|
+
const loc = basicData.location;
|
|
1653
|
+
const parts = [];
|
|
1654
|
+
if (loc.city)
|
|
1655
|
+
parts.push(loc.city);
|
|
1656
|
+
if (loc.countryCode)
|
|
1657
|
+
parts.push(loc.countryCode.toUpperCase());
|
|
1658
|
+
locationInfo = parts.join(', ');
|
|
1659
|
+
if (loc.latitude && loc.longitude) {
|
|
1660
|
+
locationInfo += ` (${loc.latitude.toFixed(4)}, ${loc.longitude.toFixed(4)})`;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
// Extract review category scores for highlights
|
|
1664
|
+
const guestReviewHighlights = [];
|
|
1665
|
+
if (reviews?.questions && Array.isArray(reviews.questions)) {
|
|
1666
|
+
const categoryNames = {
|
|
1667
|
+
'hotel_staff': 'Staff',
|
|
1668
|
+
'hotel_location': 'Location',
|
|
1669
|
+
'hotel_clean': 'Cleanliness',
|
|
1670
|
+
'hotel_comfort': 'Comfort',
|
|
1671
|
+
'hotel_value': 'Value for money',
|
|
1672
|
+
'hotel_services': 'Facilities',
|
|
1673
|
+
'hotel_free_wifi': 'Free WiFi'
|
|
1674
|
+
};
|
|
1675
|
+
for (const q of reviews.questions) {
|
|
1676
|
+
if (q?.name && q.name !== 'total' && typeof q.score === 'number') {
|
|
1677
|
+
const displayName = categoryNames[q.name] || q.name;
|
|
1678
|
+
if (categoryNames[q.name]) {
|
|
1679
|
+
guestReviewHighlights.push(`${displayName}: ${q.score.toFixed(1)}`);
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
// Validate we have meaningful data before returning
|
|
1685
|
+
// Name should be a proper hotel name (at least 3 chars, not 'Unknown')
|
|
1686
|
+
if (!name || name === 'Unknown' || name.length < 3) {
|
|
1687
|
+
return null; // Trigger DOM fallback
|
|
1688
|
+
}
|
|
1689
|
+
// Note: Description, pricePerNight, totalPrice, nearbyAttractions may need DOM fallback
|
|
1690
|
+
// as they're not consistently in the Apollo cache or are dynamic
|
|
1691
|
+
return {
|
|
1692
|
+
name,
|
|
1693
|
+
rating,
|
|
1694
|
+
ratingText,
|
|
1695
|
+
reviewCount,
|
|
1696
|
+
starRating,
|
|
1697
|
+
address,
|
|
1698
|
+
description: '', // Not typically in cache, will need DOM fallback if needed
|
|
1699
|
+
highlights: popularFacilities.slice(0, 5).join(', '),
|
|
1700
|
+
pricePerNight: null, // Dynamic, not in cache
|
|
1701
|
+
priceDisplay: '',
|
|
1702
|
+
totalPrice: '',
|
|
1703
|
+
checkInTime,
|
|
1704
|
+
checkOutTime,
|
|
1705
|
+
popularFacilities: popularFacilities.slice(0, 15),
|
|
1706
|
+
allFacilities: allFacilities.slice(0, 30),
|
|
1707
|
+
roomTypes: roomTypes.slice(0, 5),
|
|
1708
|
+
photos: photos.slice(0, 5),
|
|
1709
|
+
nearbyAttractions: [], // Would need propertySurroundings query
|
|
1710
|
+
guestReviewHighlights: guestReviewHighlights.slice(0, 7),
|
|
1711
|
+
locationInfo
|
|
1712
|
+
};
|
|
1713
|
+
}
|
|
1714
|
+
catch {
|
|
1715
|
+
// If anything goes wrong with API extraction, return null to trigger fallback
|
|
1716
|
+
return null;
|
|
1717
|
+
}
|
|
1718
|
+
});
|
|
1719
|
+
}
|
|
1720
|
+
/**
|
|
1721
|
+
* Fetch room facilities via Booking.com's GraphQL API.
|
|
1722
|
+
* This provides detailed amenities for each room type (AC, TV, bathroom details, etc.)
|
|
1723
|
+
* Must be called when already on a hotel page with an active session.
|
|
1724
|
+
*
|
|
1725
|
+
* @param hotelId - The numeric hotel ID (e.g., 6523595)
|
|
1726
|
+
* @param checkIn - Check-in date in YYYY-MM-DD format
|
|
1727
|
+
* @param checkOut - Check-out date in YYYY-MM-DD format
|
|
1728
|
+
* @returns Map of roomId to array of amenity categories
|
|
1729
|
+
*/
|
|
1730
|
+
async fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut) {
|
|
1731
|
+
if (!this.page)
|
|
1732
|
+
return new Map();
|
|
1733
|
+
try {
|
|
1734
|
+
const result = await this.page.evaluate(async ({ hotelId, checkIn, checkOut }) => {
|
|
1735
|
+
const query = `
|
|
1736
|
+
query RoomPageDesktopRDS($rdsInput: RDSRoomDetailQueryInput!) {
|
|
1737
|
+
roomDetail(roomDetailQueryInput: $rdsInput) {
|
|
1738
|
+
categorizedFacilitiesForAllRooms {
|
|
1739
|
+
roomId
|
|
1740
|
+
categorizedFacilities {
|
|
1741
|
+
category
|
|
1742
|
+
facilities {
|
|
1743
|
+
name
|
|
1744
|
+
id
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
`;
|
|
1751
|
+
const variables = {
|
|
1752
|
+
rdsInput: {
|
|
1753
|
+
hotelId: String(hotelId),
|
|
1754
|
+
searchConfig: {
|
|
1755
|
+
searchConfigDate: {
|
|
1756
|
+
checkin: checkIn,
|
|
1757
|
+
checkout: checkOut,
|
|
1758
|
+
},
|
|
1759
|
+
nbRooms: 1,
|
|
1760
|
+
nbAdults: 2,
|
|
1761
|
+
nbChildren: 0,
|
|
1762
|
+
childrenAges: [],
|
|
1763
|
+
},
|
|
1764
|
+
highlightedBlocks: [],
|
|
1765
|
+
selectedFilters: '',
|
|
1766
|
+
travelReason: 'LEISURE',
|
|
1767
|
+
},
|
|
1768
|
+
};
|
|
1769
|
+
try {
|
|
1770
|
+
const response = await fetch('/dml/graphql', {
|
|
1771
|
+
method: 'POST',
|
|
1772
|
+
headers: {
|
|
1773
|
+
'Content-Type': 'application/json',
|
|
1774
|
+
'x-booking-topic': 'capla_browser_b-property-web-property-page',
|
|
1775
|
+
'x-booking-context-action-name': 'hotel',
|
|
1776
|
+
'apollographql-client-name': 'b-property-web-property-page_rust',
|
|
1777
|
+
},
|
|
1778
|
+
body: JSON.stringify({
|
|
1779
|
+
operationName: 'RoomPageDesktopRDS',
|
|
1780
|
+
variables,
|
|
1781
|
+
query,
|
|
1782
|
+
}),
|
|
1783
|
+
});
|
|
1784
|
+
if (!response.ok) {
|
|
1785
|
+
return { error: `HTTP ${response.status}` };
|
|
1786
|
+
}
|
|
1787
|
+
const data = await response.json();
|
|
1788
|
+
return data;
|
|
1789
|
+
}
|
|
1790
|
+
catch (e) {
|
|
1791
|
+
return { error: e instanceof Error ? e.message : 'Unknown error' };
|
|
1792
|
+
}
|
|
1793
|
+
}, { hotelId, checkIn, checkOut });
|
|
1794
|
+
if ('error' in result) {
|
|
1795
|
+
logger.debug({ error: result.error }, 'GraphQL room facilities fetch failed');
|
|
1796
|
+
return new Map();
|
|
1797
|
+
}
|
|
1798
|
+
// Parse the response into our map structure
|
|
1799
|
+
const facilitiesMap = new Map();
|
|
1800
|
+
const roomData = result?.data?.roomDetail?.categorizedFacilitiesForAllRooms || [];
|
|
1801
|
+
for (const room of roomData) {
|
|
1802
|
+
const roomId = String(room.roomId);
|
|
1803
|
+
const categories = [];
|
|
1804
|
+
for (const cat of room.categorizedFacilities || []) {
|
|
1805
|
+
categories.push({
|
|
1806
|
+
category: cat.category || 'General',
|
|
1807
|
+
items: (cat.facilities || []).map((f) => f.name || '').filter(Boolean),
|
|
1808
|
+
});
|
|
1809
|
+
}
|
|
1810
|
+
if (categories.length > 0) {
|
|
1811
|
+
facilitiesMap.set(roomId, categories);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
logger.debug({ roomCount: facilitiesMap.size }, 'Fetched room facilities via GraphQL');
|
|
1815
|
+
return facilitiesMap;
|
|
1816
|
+
}
|
|
1817
|
+
catch (error) {
|
|
1818
|
+
logger.debug({ error }, 'Failed to fetch room facilities via GraphQL');
|
|
1819
|
+
return new Map();
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
/**
|
|
1823
|
+
* Extract hotel ID from the current page URL or DOM.
|
|
1824
|
+
* Booking.com hotel IDs are typically in the URL path or data attributes.
|
|
1825
|
+
*/
|
|
1826
|
+
async extractHotelId() {
|
|
1827
|
+
if (!this.page)
|
|
1828
|
+
return null;
|
|
1829
|
+
return await this.page.evaluate(() => {
|
|
1830
|
+
// Try to get from URL path (e.g., /hotel/fr/hotel-name.html?... contains ID in data)
|
|
1831
|
+
// Actually, the ID is often in data attributes or Apollo cache
|
|
1832
|
+
// Method 1: Look for data-hotel-id attribute
|
|
1833
|
+
const hotelIdEl = document.querySelector('[data-hotel-id]');
|
|
1834
|
+
if (hotelIdEl) {
|
|
1835
|
+
return hotelIdEl.getAttribute('data-hotel-id');
|
|
1836
|
+
}
|
|
1837
|
+
// Method 2: Look in Apollo cache
|
|
1838
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1839
|
+
const w = window;
|
|
1840
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1841
|
+
if (cache) {
|
|
1842
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1843
|
+
if (propertyKey) {
|
|
1844
|
+
const match = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1845
|
+
if (match && match[1])
|
|
1846
|
+
return match[1];
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
// Method 3: Look for form inputs with hotel_id
|
|
1850
|
+
const hotelInput = document.querySelector('input[name="hotel_id"]');
|
|
1851
|
+
if (hotelInput?.value)
|
|
1852
|
+
return hotelInput.value;
|
|
1853
|
+
// Method 4: Look in data-block-id attributes (format: roomTypeId_policyId_hotelId_...)
|
|
1854
|
+
const blockEl = document.querySelector('[data-block-id]');
|
|
1855
|
+
if (blockEl) {
|
|
1856
|
+
const blockId = blockEl.getAttribute('data-block-id') || '';
|
|
1857
|
+
const parts = blockId.split('_');
|
|
1858
|
+
// Hotel ID is typically in position 2 (after roomTypeId and policyId)
|
|
1859
|
+
const potentialHotelId = parts[2];
|
|
1860
|
+
if (parts.length >= 3 && potentialHotelId && /^\d{5,}$/.test(potentialHotelId)) {
|
|
1861
|
+
return potentialHotelId;
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
return null;
|
|
1865
|
+
});
|
|
1866
|
+
}
|
|
1867
|
+
/**
|
|
1868
|
+
* Extract reviews data from Booking.com's Apollo GraphQL cache.
|
|
1869
|
+
* Returns null if extraction fails (triggering DOM fallback).
|
|
1870
|
+
*/
|
|
1871
|
+
async extractReviewsFromAPI() {
|
|
1872
|
+
if (!this.page)
|
|
1873
|
+
return null;
|
|
1874
|
+
return await this.page.evaluate(() => {
|
|
1875
|
+
try {
|
|
1876
|
+
// Access the Apollo cache embedded in the page
|
|
1877
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1878
|
+
const w = window;
|
|
1879
|
+
const cache = w.__caplaDataStore?.apollo?.cache?.data?.data;
|
|
1880
|
+
if (!cache)
|
|
1881
|
+
return null;
|
|
1882
|
+
// Helper to resolve __ref pointers
|
|
1883
|
+
const resolveRef = (ref) => {
|
|
1884
|
+
if (ref && typeof ref === 'object' && '__ref' in ref) {
|
|
1885
|
+
return cache[ref.__ref];
|
|
1886
|
+
}
|
|
1887
|
+
return ref;
|
|
1888
|
+
};
|
|
1889
|
+
// Find the Property entry - it has a key like 'Property:{"id":6523595}'
|
|
1890
|
+
const propertyKey = Object.keys(cache).find(k => k.startsWith('Property:{"id":'));
|
|
1891
|
+
if (!propertyKey)
|
|
1892
|
+
return null;
|
|
1893
|
+
const property = cache[propertyKey];
|
|
1894
|
+
if (!property)
|
|
1895
|
+
return null;
|
|
1896
|
+
// Extract hotel ID from the property key
|
|
1897
|
+
const idMatch = propertyKey.match(/Property:\{"id":(\d+)\}/);
|
|
1898
|
+
const hotelId = idMatch ? idMatch[1] : null;
|
|
1899
|
+
// Get BasicPropertyData for hotel name
|
|
1900
|
+
const basicDataKey = hotelId ? `BasicPropertyData:${hotelId}` : null;
|
|
1901
|
+
const basicData = basicDataKey ? cache[basicDataKey] : null;
|
|
1902
|
+
// Extract hotel name
|
|
1903
|
+
const hotelName = property.name || basicData?.name || '';
|
|
1904
|
+
// Extract overall rating, total reviews, and rating breakdown from property.reviews
|
|
1905
|
+
let overallRating = null;
|
|
1906
|
+
let totalReviews = 0;
|
|
1907
|
+
const ratingBreakdown = {
|
|
1908
|
+
staff: null,
|
|
1909
|
+
facilities: null,
|
|
1910
|
+
cleanliness: null,
|
|
1911
|
+
comfort: null,
|
|
1912
|
+
valueForMoney: null,
|
|
1913
|
+
location: null,
|
|
1914
|
+
freeWifi: null,
|
|
1915
|
+
};
|
|
1916
|
+
const reviewsData = property.reviews;
|
|
1917
|
+
if (reviewsData) {
|
|
1918
|
+
totalReviews = typeof reviewsData.reviewsCount === 'number' ? reviewsData.reviewsCount : 0;
|
|
1919
|
+
// Map question names to breakdown fields
|
|
1920
|
+
const questionMap = {
|
|
1921
|
+
'hotel_staff': 'staff',
|
|
1922
|
+
'hotel_services': 'facilities',
|
|
1923
|
+
'hotel_clean': 'cleanliness',
|
|
1924
|
+
'hotel_comfort': 'comfort',
|
|
1925
|
+
'hotel_value': 'valueForMoney',
|
|
1926
|
+
'hotel_location': 'location',
|
|
1927
|
+
'hotel_free_wifi': 'freeWifi',
|
|
1928
|
+
};
|
|
1929
|
+
const questions = reviewsData.questions;
|
|
1930
|
+
if (Array.isArray(questions)) {
|
|
1931
|
+
for (const q of questions) {
|
|
1932
|
+
if (!q?.name || typeof q.score !== 'number')
|
|
1933
|
+
continue;
|
|
1934
|
+
if (q.name === 'total') {
|
|
1935
|
+
overallRating = q.score;
|
|
1936
|
+
}
|
|
1937
|
+
else {
|
|
1938
|
+
const breakdownKey = questionMap[q.name];
|
|
1939
|
+
if (breakdownKey) {
|
|
1940
|
+
ratingBreakdown[breakdownKey] = q.score;
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
// Extract individual reviews from FeaturedReview entries
|
|
1947
|
+
const reviews = [];
|
|
1948
|
+
// Map customer types to display names
|
|
1949
|
+
const customerTypeMap = {
|
|
1950
|
+
'SOLO_TRAVELLER': 'Solo traveler',
|
|
1951
|
+
'YOUNG_COUPLE': 'Couple',
|
|
1952
|
+
'MATURE_COUPLE': 'Couple',
|
|
1953
|
+
'FAMILY_WITH_YOUNG_CHILDREN': 'Family with young children',
|
|
1954
|
+
'FAMILY_WITH_OLDER_CHILDREN': 'Family with older children',
|
|
1955
|
+
'WITH_FRIENDS': 'Group of friends',
|
|
1956
|
+
'BUSINESS': 'Business traveler',
|
|
1957
|
+
};
|
|
1958
|
+
// Map country codes to names
|
|
1959
|
+
const countryCodeMap = {
|
|
1960
|
+
'us': 'United States',
|
|
1961
|
+
'gb': 'United Kingdom',
|
|
1962
|
+
'fr': 'France',
|
|
1963
|
+
'de': 'Germany',
|
|
1964
|
+
'es': 'Spain',
|
|
1965
|
+
'it': 'Italy',
|
|
1966
|
+
'nl': 'Netherlands',
|
|
1967
|
+
'be': 'Belgium',
|
|
1968
|
+
'ch': 'Switzerland',
|
|
1969
|
+
'au': 'Australia',
|
|
1970
|
+
'ca': 'Canada',
|
|
1971
|
+
'jp': 'Japan',
|
|
1972
|
+
'cn': 'China',
|
|
1973
|
+
'kr': 'South Korea',
|
|
1974
|
+
'br': 'Brazil',
|
|
1975
|
+
'mx': 'Mexico',
|
|
1976
|
+
'in': 'India',
|
|
1977
|
+
'ru': 'Russia',
|
|
1978
|
+
'pl': 'Poland',
|
|
1979
|
+
'se': 'Sweden',
|
|
1980
|
+
'no': 'Norway',
|
|
1981
|
+
'dk': 'Denmark',
|
|
1982
|
+
'fi': 'Finland',
|
|
1983
|
+
'at': 'Austria',
|
|
1984
|
+
'pt': 'Portugal',
|
|
1985
|
+
'gr': 'Greece',
|
|
1986
|
+
'tr': 'Turkey',
|
|
1987
|
+
'ie': 'Ireland',
|
|
1988
|
+
'nz': 'New Zealand',
|
|
1989
|
+
'za': 'South Africa',
|
|
1990
|
+
'ar': 'Argentina',
|
|
1991
|
+
'cl': 'Chile',
|
|
1992
|
+
'co': 'Colombia',
|
|
1993
|
+
'th': 'Thailand',
|
|
1994
|
+
'sg': 'Singapore',
|
|
1995
|
+
'my': 'Malaysia',
|
|
1996
|
+
'id': 'Indonesia',
|
|
1997
|
+
'ph': 'Philippines',
|
|
1998
|
+
'vn': 'Vietnam',
|
|
1999
|
+
'ae': 'United Arab Emirates',
|
|
2000
|
+
'sa': 'Saudi Arabia',
|
|
2001
|
+
'eg': 'Egypt',
|
|
2002
|
+
'il': 'Israel',
|
|
2003
|
+
'cz': 'Czech Republic',
|
|
2004
|
+
'hu': 'Hungary',
|
|
2005
|
+
'ro': 'Romania',
|
|
2006
|
+
};
|
|
2007
|
+
// Find all FeaturedReview entries
|
|
2008
|
+
const reviewKeys = Object.keys(cache).filter(k => k.startsWith('FeaturedReview:'));
|
|
2009
|
+
for (const key of reviewKeys) {
|
|
2010
|
+
const review = cache[key];
|
|
2011
|
+
if (!review)
|
|
2012
|
+
continue;
|
|
2013
|
+
// Format the date from Unix timestamp
|
|
2014
|
+
let dateStr = '';
|
|
2015
|
+
if (typeof review.completed === 'number') {
|
|
2016
|
+
const date = new Date(review.completed * 1000);
|
|
2017
|
+
dateStr = date.toLocaleDateString('en-US', {
|
|
2018
|
+
year: 'numeric',
|
|
2019
|
+
month: 'long',
|
|
2020
|
+
day: 'numeric'
|
|
2021
|
+
});
|
|
2022
|
+
}
|
|
2023
|
+
// Get room type from ref
|
|
2024
|
+
let roomType = '';
|
|
2025
|
+
const roomRef = resolveRef(review.roomType);
|
|
2026
|
+
if (roomRef && typeof roomRef === 'object' && 'name' in roomRef) {
|
|
2027
|
+
roomType = roomRef.name || '';
|
|
2028
|
+
}
|
|
2029
|
+
// Get country name from code
|
|
2030
|
+
const countryCode = (review.guestCountryCode || '').toLowerCase();
|
|
2031
|
+
const country = countryCodeMap[countryCode] || countryCode.toUpperCase();
|
|
2032
|
+
// Get traveler type display name
|
|
2033
|
+
const travelerType = customerTypeMap[review.customerType] || review.customerType || '';
|
|
2034
|
+
reviews.push({
|
|
2035
|
+
title: review.title || '',
|
|
2036
|
+
rating: typeof review.averageScore === 'number' ? review.averageScore : null,
|
|
2037
|
+
date: dateStr,
|
|
2038
|
+
travelerType,
|
|
2039
|
+
stayDate: '', // Not available in FeaturedReview
|
|
2040
|
+
roomType,
|
|
2041
|
+
nightsStayed: '', // Not available in FeaturedReview
|
|
2042
|
+
positive: review.positiveText || '',
|
|
2043
|
+
negative: review.negativeText || '',
|
|
2044
|
+
country,
|
|
2045
|
+
});
|
|
2046
|
+
}
|
|
2047
|
+
// Sort reviews by date (newest first - higher timestamp = newer)
|
|
2048
|
+
reviews.sort((a, b) => {
|
|
2049
|
+
// Parse dates back for comparison
|
|
2050
|
+
const dateA = new Date(a.date).getTime() || 0;
|
|
2051
|
+
const dateB = new Date(b.date).getTime() || 0;
|
|
2052
|
+
return dateB - dateA;
|
|
2053
|
+
});
|
|
2054
|
+
// Validate we have meaningful data
|
|
2055
|
+
if (!hotelName || hotelName.length < 3) {
|
|
2056
|
+
return null;
|
|
2057
|
+
}
|
|
2058
|
+
return {
|
|
2059
|
+
hotelName,
|
|
2060
|
+
overallRating,
|
|
2061
|
+
totalReviews,
|
|
2062
|
+
ratingBreakdown,
|
|
2063
|
+
reviews,
|
|
2064
|
+
};
|
|
2065
|
+
}
|
|
2066
|
+
catch {
|
|
2067
|
+
return null;
|
|
2068
|
+
}
|
|
2069
|
+
});
|
|
2070
|
+
}
|
|
990
2071
|
scoreAndFilterHotels(hotels, filters) {
|
|
991
2072
|
return hotels
|
|
992
2073
|
.map((hotel) => {
|
|
@@ -1190,6 +2271,16 @@ export class HotelBrowser {
|
|
|
1190
2271
|
await this.page.waitForTimeout(2000);
|
|
1191
2272
|
await this.checkForBlocking();
|
|
1192
2273
|
await this.dismissPopups();
|
|
2274
|
+
// Try API extraction first (more reliable structured data)
|
|
2275
|
+
const apiDetails = await this.extractHotelDetailsFromAPI();
|
|
2276
|
+
if (apiDetails) {
|
|
2277
|
+
logger.debug("Successfully extracted hotel details from API cache");
|
|
2278
|
+
return {
|
|
2279
|
+
...apiDetails,
|
|
2280
|
+
url: hotelUrl,
|
|
2281
|
+
};
|
|
2282
|
+
}
|
|
2283
|
+
logger.debug("API extraction returned no results, falling back to DOM scraping");
|
|
1193
2284
|
// Extract comprehensive hotel details using evaluate with string to avoid __name compilation issues
|
|
1194
2285
|
const details = await this.page.evaluate(`
|
|
1195
2286
|
(function() {
|
|
@@ -1473,7 +2564,7 @@ export class HotelBrowser {
|
|
|
1473
2564
|
await this.page.waitForTimeout(2000);
|
|
1474
2565
|
await this.checkForBlocking();
|
|
1475
2566
|
await this.dismissPopups();
|
|
1476
|
-
// Extract room availability using
|
|
2567
|
+
// Extract room availability using data attributes (primary) with DOM fallback
|
|
1477
2568
|
const result = await this.page.evaluate(`
|
|
1478
2569
|
(function() {
|
|
1479
2570
|
function getText(selector) {
|
|
@@ -1485,152 +2576,292 @@ export class HotelBrowser {
|
|
|
1485
2576
|
var hotelName = getText('h2') || getText('h1').split('(')[0].trim() || "Unknown Hotel";
|
|
1486
2577
|
|
|
1487
2578
|
var roomOptions = [];
|
|
1488
|
-
var seenRooms = {};
|
|
1489
2579
|
|
|
1490
|
-
//
|
|
1491
|
-
|
|
2580
|
+
// ============================================================
|
|
2581
|
+
// STRATEGY 1: Extract from data-* attributes (most reliable)
|
|
2582
|
+
// Uses data-block-id, data-hotel-rounded-price, and data-fltrs
|
|
2583
|
+
// ============================================================
|
|
2584
|
+
|
|
2585
|
+
// First, build maps of room type IDs to room names and bed types from header rows
|
|
2586
|
+
var roomNameMap = {};
|
|
2587
|
+
var bedTypeMap = {};
|
|
2588
|
+
var roomTypeHeaders = document.querySelectorAll('.hprt-roomtype-link');
|
|
2589
|
+
for (var h = 0; h < roomTypeHeaders.length; h++) {
|
|
2590
|
+
var header = roomTypeHeaders[h];
|
|
2591
|
+
var headerRow = header.closest('tr');
|
|
2592
|
+
var headerBlockId = headerRow ? headerRow.getAttribute('data-block-id') : null;
|
|
2593
|
+
if (headerBlockId && headerBlockId.indexOf('_') > 0) {
|
|
2594
|
+
var headerRoomTypeId = headerBlockId.split('_')[0];
|
|
2595
|
+
var headerRoomName = header.textContent ? header.textContent.trim() : '';
|
|
2596
|
+
if (headerRoomName) {
|
|
2597
|
+
roomNameMap[headerRoomTypeId] = headerRoomName;
|
|
2598
|
+
}
|
|
2599
|
+
// Also capture bed type from header row
|
|
2600
|
+
var bedEl = headerRow.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
|
|
2601
|
+
if (bedEl) {
|
|
2602
|
+
var bedText = bedEl.textContent || '';
|
|
2603
|
+
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
2604
|
+
for (var b = 0; b < bedLines.length; b++) {
|
|
2605
|
+
if (bedLines[b].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
2606
|
+
bedTypeMap[headerRoomTypeId] = bedLines[b];
|
|
2607
|
+
break;
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2610
|
+
}
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
2613
|
+
|
|
2614
|
+
// Extract all room blocks with data-hotel-rounded-price attribute
|
|
2615
|
+
// Returns ALL rate options (room + meal plan + cancellation combinations)
|
|
2616
|
+
var dataRows = document.querySelectorAll('tr[data-block-id][data-hotel-rounded-price]');
|
|
2617
|
+
var seenBlockIds = {}; // Track exact block IDs to avoid true duplicates
|
|
1492
2618
|
|
|
1493
|
-
for (var i = 0; i <
|
|
1494
|
-
var
|
|
1495
|
-
var
|
|
2619
|
+
for (var i = 0; i < dataRows.length && roomOptions.length < 30; i++) {
|
|
2620
|
+
var row = dataRows[i];
|
|
2621
|
+
var blockId = row.getAttribute('data-block-id') || '';
|
|
2622
|
+
var parts = blockId.split('_');
|
|
2623
|
+
if (parts.length < 2) continue;
|
|
1496
2624
|
|
|
1497
|
-
|
|
1498
|
-
|
|
2625
|
+
// Skip exact duplicate block IDs
|
|
2626
|
+
if (seenBlockIds[blockId]) continue;
|
|
2627
|
+
seenBlockIds[blockId] = true;
|
|
1499
2628
|
|
|
1500
|
-
|
|
1501
|
-
var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
|
|
1502
|
-
var rowText = row ? row.textContent || "" : "";
|
|
2629
|
+
var roomTypeId = parts[0];
|
|
1503
2630
|
|
|
1504
|
-
//
|
|
1505
|
-
var
|
|
1506
|
-
var
|
|
2631
|
+
// Get price from data attribute (more reliable than DOM text)
|
|
2632
|
+
var roundedPrice = row.getAttribute('data-hotel-rounded-price');
|
|
2633
|
+
var price = roundedPrice ? parseInt(roundedPrice, 10) : null;
|
|
1507
2634
|
|
|
1508
|
-
//
|
|
1509
|
-
var
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
var
|
|
1513
|
-
|
|
1514
|
-
price = parseInt(match[1].replace(/,/g, ""));
|
|
1515
|
-
// Clean up price display
|
|
1516
|
-
var perNightMatch = priceDisplay.match(/[\\$€£¥]\\s*[\\d,]+/);
|
|
1517
|
-
priceDisplay = perNightMatch ? perNightMatch[0] : priceDisplay.split('\\n')[0];
|
|
1518
|
-
}
|
|
2635
|
+
// Get price display from DOM
|
|
2636
|
+
var priceDisplay = '';
|
|
2637
|
+
var priceEl = row.querySelector('.bui-price-display__value');
|
|
2638
|
+
if (priceEl) {
|
|
2639
|
+
var displayMatch = (priceEl.textContent || '').match(/[\\$€£¥][\\d,]+/);
|
|
2640
|
+
priceDisplay = displayMatch ? displayMatch[0] : '';
|
|
1519
2641
|
}
|
|
1520
2642
|
|
|
1521
|
-
//
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
price = parseInt(match[1].replace(/,/g, ""));
|
|
1529
|
-
priceDisplay = match[0];
|
|
1530
|
-
break;
|
|
1531
|
-
}
|
|
1532
|
-
}
|
|
2643
|
+
// Get room name from our map
|
|
2644
|
+
var roomName = roomNameMap[roomTypeId] || '';
|
|
2645
|
+
|
|
2646
|
+
// If no name in map, try to find it in the row
|
|
2647
|
+
if (!roomName) {
|
|
2648
|
+
var roomLink = row.querySelector('.hprt-roomtype-link, a[class*="room"]');
|
|
2649
|
+
roomName = roomLink ? (roomLink.textContent || '').trim() : '';
|
|
1533
2650
|
}
|
|
1534
2651
|
|
|
1535
|
-
//
|
|
1536
|
-
|
|
1537
|
-
|
|
2652
|
+
// Still no name? Use a generic one
|
|
2653
|
+
if (!roomName) {
|
|
2654
|
+
roomName = 'Room Type ' + roomTypeId;
|
|
2655
|
+
}
|
|
2656
|
+
|
|
2657
|
+
// Parse data-fltrs for structured info (breakfast, beds)
|
|
2658
|
+
var fltrs = row.getAttribute('data-fltrs');
|
|
2659
|
+
var breakfastIncluded = false;
|
|
2660
|
+
var bedCount = [];
|
|
2661
|
+
|
|
2662
|
+
if (fltrs) {
|
|
2663
|
+
try {
|
|
2664
|
+
var fltrData = JSON.parse(fltrs.replace(/\\n/g, ''));
|
|
2665
|
+
breakfastIncluded = fltrData.breakfast_included === 1;
|
|
2666
|
+
bedCount = fltrData.bed_count || [];
|
|
2667
|
+
} catch (e) {}
|
|
2668
|
+
}
|
|
2669
|
+
|
|
2670
|
+
// Get bed type from DOM (for display)
|
|
2671
|
+
var bedType = '';
|
|
2672
|
+
var bedEl = row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]');
|
|
1538
2673
|
if (bedEl) {
|
|
1539
|
-
|
|
1540
|
-
var bedText = bedEl.textContent || "";
|
|
2674
|
+
var bedText = bedEl.textContent || '';
|
|
1541
2675
|
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
1542
|
-
// Find line with bed info
|
|
1543
2676
|
for (var k = 0; k < bedLines.length; k++) {
|
|
1544
2677
|
if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
1545
2678
|
bedType = bedLines[k];
|
|
1546
2679
|
break;
|
|
1547
2680
|
}
|
|
1548
2681
|
}
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
2682
|
+
}
|
|
2683
|
+
// Fallback 1: use bed type from our map (captured from header rows)
|
|
2684
|
+
if (!bedType && bedTypeMap[roomTypeId]) {
|
|
2685
|
+
bedType = bedTypeMap[roomTypeId];
|
|
2686
|
+
}
|
|
2687
|
+
// Fallback 2: use bed count from data-fltrs
|
|
2688
|
+
if (!bedType && bedCount.length > 0) {
|
|
2689
|
+
bedType = bedCount.length === 1 ? bedCount[0] + ' bed' : bedCount.join(' or ') + ' beds';
|
|
1552
2690
|
}
|
|
1553
2691
|
|
|
1554
|
-
//
|
|
1555
|
-
var
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
cancellation =
|
|
2692
|
+
// Get cancellation policy from row text
|
|
2693
|
+
var rowText = row.textContent || '';
|
|
2694
|
+
var rowTextLower = rowText.toLowerCase();
|
|
2695
|
+
var cancellation = '';
|
|
2696
|
+
if (rowTextLower.indexOf('free cancellation') >= 0) {
|
|
2697
|
+
cancellation = 'Free cancellation';
|
|
2698
|
+
} else if (rowTextLower.indexOf('non-refundable') >= 0) {
|
|
2699
|
+
cancellation = 'Non-refundable';
|
|
1560
2700
|
}
|
|
1561
2701
|
|
|
1562
|
-
//
|
|
1563
|
-
var breakfast =
|
|
1564
|
-
if (
|
|
1565
|
-
breakfast =
|
|
1566
|
-
} else if (
|
|
1567
|
-
breakfast =
|
|
2702
|
+
// Get breakfast info (prefer data-fltrs, fallback to DOM text)
|
|
2703
|
+
var breakfast = '';
|
|
2704
|
+
if (breakfastIncluded) {
|
|
2705
|
+
breakfast = 'Breakfast included';
|
|
2706
|
+
} else if (rowTextLower.indexOf('breakfast included') >= 0) {
|
|
2707
|
+
breakfast = 'Breakfast included';
|
|
2708
|
+
} else if (rowTextLower.indexOf('room only') >= 0) {
|
|
2709
|
+
breakfast = 'Room only';
|
|
1568
2710
|
}
|
|
1569
2711
|
|
|
1570
|
-
//
|
|
2712
|
+
// Get occupancy
|
|
1571
2713
|
var sleeps = null;
|
|
1572
|
-
var occupancyEl = row
|
|
2714
|
+
var occupancyEl = row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info');
|
|
1573
2715
|
if (occupancyEl) {
|
|
1574
|
-
var occMatch = occupancyEl.textContent.match(/(\\d+)/);
|
|
1575
|
-
sleeps = occMatch ? parseInt(occMatch[1]) : null;
|
|
2716
|
+
var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
|
|
2717
|
+
sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
|
|
1576
2718
|
}
|
|
1577
2719
|
|
|
2720
|
+
// Build features array
|
|
2721
|
+
var features = [];
|
|
2722
|
+
if (breakfast) features.push(breakfast);
|
|
2723
|
+
if (cancellation) features.push(cancellation);
|
|
2724
|
+
|
|
1578
2725
|
roomOptions.push({
|
|
1579
|
-
name:
|
|
2726
|
+
name: roomName,
|
|
1580
2727
|
price: price,
|
|
1581
2728
|
priceDisplay: priceDisplay,
|
|
1582
2729
|
sleeps: sleeps,
|
|
1583
|
-
features:
|
|
2730
|
+
features: features,
|
|
1584
2731
|
bedType: bedType,
|
|
1585
2732
|
cancellation: cancellation,
|
|
1586
|
-
breakfast: breakfast
|
|
2733
|
+
breakfast: breakfast,
|
|
2734
|
+
roomTypeId: roomTypeId
|
|
1587
2735
|
});
|
|
1588
2736
|
}
|
|
1589
2737
|
|
|
1590
|
-
//
|
|
2738
|
+
// ============================================================
|
|
2739
|
+
// STRATEGY 2: Fallback to DOM scraping if data attributes failed
|
|
2740
|
+
// ============================================================
|
|
1591
2741
|
if (roomOptions.length === 0) {
|
|
2742
|
+
var seenRooms = {};
|
|
2743
|
+
var roomTypeLinks = document.querySelectorAll('.hprt-roomtype-link, a[class*="hprt-roomtype"]');
|
|
2744
|
+
|
|
2745
|
+
for (var i = 0; i < roomTypeLinks.length && roomOptions.length < 10; i++) {
|
|
2746
|
+
var roomLink = roomTypeLinks[i];
|
|
2747
|
+
var name = roomLink.textContent ? roomLink.textContent.trim() : '';
|
|
2748
|
+
|
|
2749
|
+
if (!name || name.length < 3 || seenRooms[name]) continue;
|
|
2750
|
+
seenRooms[name] = true;
|
|
2751
|
+
|
|
2752
|
+
var row = roomLink.closest('tr') || roomLink.closest('[data-block-id]') || roomLink.parentElement;
|
|
2753
|
+
var rowText = row ? row.textContent || '' : '';
|
|
2754
|
+
|
|
2755
|
+
// Try to find price
|
|
2756
|
+
var price = null;
|
|
2757
|
+
var priceDisplay = '';
|
|
2758
|
+
var priceCell = row ? row.querySelector('.hprt-table-cell-price, [class*="price-block"], [class*="bui-price"]') : null;
|
|
2759
|
+
if (priceCell) {
|
|
2760
|
+
var match = (priceCell.textContent || '').match(/[\\$€£¥]\\s*([\\d,]+)/);
|
|
2761
|
+
if (match) {
|
|
2762
|
+
price = parseInt(match[1].replace(/,/g, ''), 10);
|
|
2763
|
+
priceDisplay = match[0];
|
|
2764
|
+
}
|
|
2765
|
+
}
|
|
2766
|
+
|
|
2767
|
+
// Bed type
|
|
2768
|
+
var bedType = '';
|
|
2769
|
+
var bedEl = row ? row.querySelector('.hprt-roomtype-bed, [class*="bed-type"]') : null;
|
|
2770
|
+
if (bedEl) {
|
|
2771
|
+
var bedText = bedEl.textContent || '';
|
|
2772
|
+
var bedLines = bedText.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
2773
|
+
for (var k = 0; k < bedLines.length; k++) {
|
|
2774
|
+
if (bedLines[k].match(/(bed|queen|king|twin|double|single|sofa)/i)) {
|
|
2775
|
+
bedType = bedLines[k];
|
|
2776
|
+
break;
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
// Cancellation and breakfast from text
|
|
2782
|
+
var rowTextLower = rowText.toLowerCase();
|
|
2783
|
+
var cancellation = '';
|
|
2784
|
+
if (rowTextLower.indexOf('free cancellation') >= 0) {
|
|
2785
|
+
cancellation = 'Free cancellation';
|
|
2786
|
+
} else if (rowTextLower.indexOf('non-refundable') >= 0) {
|
|
2787
|
+
cancellation = 'Non-refundable';
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
var breakfast = '';
|
|
2791
|
+
if (rowTextLower.indexOf('breakfast included') >= 0) {
|
|
2792
|
+
breakfast = 'Breakfast included';
|
|
2793
|
+
} else if (rowTextLower.indexOf('room only') >= 0) {
|
|
2794
|
+
breakfast = 'Room only';
|
|
2795
|
+
}
|
|
2796
|
+
|
|
2797
|
+
// Occupancy
|
|
2798
|
+
var sleeps = null;
|
|
2799
|
+
var occupancyEl = row ? row.querySelector('[class*="occupancy"], .hprt-occupancy-occupancy-info') : null;
|
|
2800
|
+
if (occupancyEl) {
|
|
2801
|
+
var occMatch = (occupancyEl.textContent || '').match(/(\\d+)/);
|
|
2802
|
+
sleeps = occMatch ? parseInt(occMatch[1], 10) : null;
|
|
2803
|
+
}
|
|
2804
|
+
|
|
2805
|
+
roomOptions.push({
|
|
2806
|
+
name: name,
|
|
2807
|
+
price: price,
|
|
2808
|
+
priceDisplay: priceDisplay,
|
|
2809
|
+
sleeps: sleeps,
|
|
2810
|
+
features: [],
|
|
2811
|
+
bedType: bedType,
|
|
2812
|
+
cancellation: cancellation,
|
|
2813
|
+
breakfast: breakfast
|
|
2814
|
+
});
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
|
|
2818
|
+
// ============================================================
|
|
2819
|
+
// STRATEGY 3: Last resort - look for any data-block-id elements
|
|
2820
|
+
// ============================================================
|
|
2821
|
+
if (roomOptions.length === 0) {
|
|
2822
|
+
var seenBlocks = {};
|
|
1592
2823
|
var blocks = document.querySelectorAll('[data-block-id]');
|
|
1593
2824
|
for (var i = 0; i < blocks.length && roomOptions.length < 10; i++) {
|
|
1594
2825
|
var block = blocks[i];
|
|
1595
|
-
var
|
|
2826
|
+
var blockId = block.getAttribute('data-block-id') || '';
|
|
2827
|
+
if (!blockId || blockId === 'header_survey') continue;
|
|
1596
2828
|
|
|
1597
|
-
|
|
2829
|
+
var blockText = block.textContent || '';
|
|
1598
2830
|
var nameEl = block.querySelector('a[class*="room"], span[class*="room-name"]');
|
|
1599
|
-
var name = nameEl ? nameEl.textContent.trim() :
|
|
2831
|
+
var name = nameEl ? (nameEl.textContent || '').trim() : '';
|
|
1600
2832
|
|
|
1601
2833
|
if (!name) {
|
|
1602
|
-
// Try to extract from block text
|
|
1603
2834
|
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
|
|
1604
|
-
name = lines[0] ? lines[0].trim().slice(0, 50) :
|
|
2835
|
+
name = lines[0] ? lines[0].trim().slice(0, 50) : '';
|
|
1605
2836
|
}
|
|
1606
2837
|
|
|
1607
|
-
if (!name || name.length < 3 ||
|
|
1608
|
-
|
|
2838
|
+
if (!name || name.length < 3 || seenBlocks[name]) continue;
|
|
2839
|
+
seenBlocks[name] = true;
|
|
1609
2840
|
|
|
1610
2841
|
var priceMatch = blockText.match(/[\\$€£¥]\\s*([\\d,]+)/);
|
|
1611
|
-
var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g,
|
|
2842
|
+
var price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, ''), 10) : null;
|
|
1612
2843
|
|
|
1613
2844
|
roomOptions.push({
|
|
1614
2845
|
name: name,
|
|
1615
2846
|
price: price,
|
|
1616
|
-
priceDisplay: priceMatch ? priceMatch[0] :
|
|
2847
|
+
priceDisplay: priceMatch ? priceMatch[0] : '',
|
|
1617
2848
|
sleeps: null,
|
|
1618
2849
|
features: [],
|
|
1619
|
-
bedType:
|
|
1620
|
-
cancellation:
|
|
1621
|
-
breakfast:
|
|
2850
|
+
bedType: '',
|
|
2851
|
+
cancellation: '',
|
|
2852
|
+
breakfast: ''
|
|
1622
2853
|
});
|
|
1623
2854
|
}
|
|
1624
2855
|
}
|
|
1625
2856
|
|
|
1626
2857
|
// Check for "no availability" message
|
|
1627
|
-
var bodyText = document.body.textContent ||
|
|
2858
|
+
var bodyText = document.body.textContent || '';
|
|
1628
2859
|
var noAvailability =
|
|
1629
|
-
bodyText.indexOf(
|
|
1630
|
-
bodyText.indexOf(
|
|
1631
|
-
bodyText.indexOf(
|
|
1632
|
-
bodyText.indexOf(
|
|
1633
|
-
bodyText.indexOf(
|
|
2860
|
+
bodyText.indexOf('no availability') >= 0 ||
|
|
2861
|
+
bodyText.indexOf('sold out') >= 0 ||
|
|
2862
|
+
bodyText.indexOf('no rooms available') >= 0 ||
|
|
2863
|
+
bodyText.indexOf('fully booked') >= 0 ||
|
|
2864
|
+
bodyText.indexOf('We have no availability') >= 0;
|
|
1634
2865
|
|
|
1635
2866
|
return {
|
|
1636
2867
|
hotelName: hotelName,
|
|
@@ -1639,6 +2870,45 @@ export class HotelBrowser {
|
|
|
1639
2870
|
};
|
|
1640
2871
|
})()
|
|
1641
2872
|
`);
|
|
2873
|
+
// Enrich room options with facilities from GraphQL API
|
|
2874
|
+
// This provides detailed amenities (AC, TV, bathroom, etc.) per room type
|
|
2875
|
+
if (result.roomOptions.length > 0) {
|
|
2876
|
+
try {
|
|
2877
|
+
const hotelId = await this.extractHotelId();
|
|
2878
|
+
if (hotelId) {
|
|
2879
|
+
const facilitiesMap = await this.fetchRoomFacilitiesGraphQL(hotelId, checkIn, checkOut);
|
|
2880
|
+
if (facilitiesMap.size > 0) {
|
|
2881
|
+
// Merge facilities into room options based on roomTypeId
|
|
2882
|
+
// Room type IDs are the first 9 digits of the full room ID (e.g., 652359501 -> 652359501)
|
|
2883
|
+
for (const room of result.roomOptions) {
|
|
2884
|
+
if (room.roomTypeId) {
|
|
2885
|
+
// Try exact match first
|
|
2886
|
+
let facilities = facilitiesMap.get(room.roomTypeId);
|
|
2887
|
+
// If not found, the GraphQL returns full room IDs (e.g., 652359501)
|
|
2888
|
+
// while our roomTypeId might be just the prefix
|
|
2889
|
+
if (!facilities) {
|
|
2890
|
+
// Find a matching key that starts with our roomTypeId
|
|
2891
|
+
for (const [key, value] of facilitiesMap) {
|
|
2892
|
+
if (key.startsWith(room.roomTypeId) || room.roomTypeId.startsWith(key)) {
|
|
2893
|
+
facilities = value;
|
|
2894
|
+
break;
|
|
2895
|
+
}
|
|
2896
|
+
}
|
|
2897
|
+
}
|
|
2898
|
+
if (facilities) {
|
|
2899
|
+
room.amenities = facilities;
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
}
|
|
2903
|
+
logger.debug({ enrichedRooms: result.roomOptions.filter(r => r.amenities).length }, 'Enriched room options with GraphQL facilities');
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
}
|
|
2907
|
+
catch (error) {
|
|
2908
|
+
// Non-fatal: continue without facilities enrichment
|
|
2909
|
+
logger.debug({ error }, 'Failed to enrich rooms with GraphQL facilities');
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
1642
2912
|
// Determine availability and lowest price
|
|
1643
2913
|
const available = result.roomOptions.length > 0 && !result.noAvailabilityDetected;
|
|
1644
2914
|
const prices = result.roomOptions
|
|
@@ -1686,7 +2956,8 @@ export class HotelBrowser {
|
|
|
1686
2956
|
if (!this.page)
|
|
1687
2957
|
throw new Error("Browser not initialized");
|
|
1688
2958
|
// Navigate to hotel page
|
|
1689
|
-
const
|
|
2959
|
+
const urlParts = hotelUrl.split("?")[0];
|
|
2960
|
+
const cleanUrl = urlParts?.split("#")[0] ?? hotelUrl;
|
|
1690
2961
|
await this.page.goto(cleanUrl, {
|
|
1691
2962
|
waitUntil: "domcontentloaded",
|
|
1692
2963
|
timeout: 60000,
|
|
@@ -1705,52 +2976,112 @@ export class HotelBrowser {
|
|
|
1705
2976
|
catch { }
|
|
1706
2977
|
await this.page.keyboard.press("Escape");
|
|
1707
2978
|
await this.page.waitForTimeout(500);
|
|
1708
|
-
//
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
//
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
2979
|
+
// Try API extraction first for basic review data
|
|
2980
|
+
// API provides: hotel name, overall rating, rating breakdown, and featured reviews
|
|
2981
|
+
// Note: API reviews are limited to what's in cache (~6-10 reviews), sorted by newest
|
|
2982
|
+
const apiData = await this.extractReviewsFromAPI();
|
|
2983
|
+
// Determine if we can use API data directly or need DOM fallback
|
|
2984
|
+
// Use API if: we have enough reviews AND no special sorting/filtering is requested
|
|
2985
|
+
const canUseApiOnly = apiData &&
|
|
2986
|
+
apiData.reviews.length >= limit &&
|
|
2987
|
+
sortBy === "recent" &&
|
|
2988
|
+
!filterBy;
|
|
2989
|
+
if (canUseApiOnly) {
|
|
2990
|
+
logger.debug("Using API extraction for reviews (sufficient data, no filters)");
|
|
2991
|
+
const reviewsResult = {
|
|
2992
|
+
hotelName: apiData.hotelName,
|
|
2993
|
+
overallRating: apiData.overallRating,
|
|
2994
|
+
totalReviews: apiData.totalReviews,
|
|
2995
|
+
ratingBreakdown: apiData.ratingBreakdown,
|
|
2996
|
+
reviews: apiData.reviews.slice(0, limit),
|
|
2997
|
+
url: cleanUrl,
|
|
2998
|
+
};
|
|
2999
|
+
await this.saveSession();
|
|
3000
|
+
return reviewsResult;
|
|
3001
|
+
}
|
|
3002
|
+
// Use API data for metadata if available, but get reviews from DOM
|
|
3003
|
+
// This gives us accurate rating breakdown from API + more reviews from DOM
|
|
3004
|
+
const baseData = apiData || {
|
|
3005
|
+
hotelName: '',
|
|
3006
|
+
overallRating: null,
|
|
3007
|
+
totalReviews: 0,
|
|
3008
|
+
ratingBreakdown: {
|
|
3009
|
+
staff: null,
|
|
3010
|
+
facilities: null,
|
|
3011
|
+
cleanliness: null,
|
|
3012
|
+
comfort: null,
|
|
3013
|
+
valueForMoney: null,
|
|
3014
|
+
location: null,
|
|
3015
|
+
freeWifi: null,
|
|
3016
|
+
},
|
|
3017
|
+
};
|
|
3018
|
+
// If API didn't give us hotel info, get it from DOM
|
|
3019
|
+
if (!baseData.hotelName) {
|
|
3020
|
+
const mainPageData = await this.page.evaluate(`
|
|
3021
|
+
(function() {
|
|
3022
|
+
var results = { hotelName: '', overallRating: null, totalReviews: 0, breakdown: {} };
|
|
3023
|
+
|
|
3024
|
+
// Hotel name
|
|
3025
|
+
var nameEl = document.querySelector('h2[class*="pp-header__title"], [data-testid="PropertyHeaderDesktop-wrapper"] h2, h2.d2fee87262');
|
|
3026
|
+
results.hotelName = nameEl?.textContent?.trim() || '';
|
|
3027
|
+
|
|
3028
|
+
// Overall rating and total reviews from review-score-component
|
|
3029
|
+
var scoreComponent = document.querySelector('[data-testid="review-score-component"]');
|
|
3030
|
+
if (scoreComponent) {
|
|
3031
|
+
var text = scoreComponent.textContent || '';
|
|
3032
|
+
var scoreMatch = text.match(/Scored\\s+([\\d.]+)/);
|
|
3033
|
+
if (scoreMatch) {
|
|
3034
|
+
results.overallRating = parseFloat(scoreMatch[1]);
|
|
3035
|
+
}
|
|
3036
|
+
var reviewCountMatch = text.match(/([\\d,]+)\\s+reviews?/);
|
|
3037
|
+
if (reviewCountMatch) {
|
|
3038
|
+
results.totalReviews = parseInt(reviewCountMatch[1].replace(/,/g, ''));
|
|
3039
|
+
}
|
|
1748
3040
|
}
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
3041
|
+
|
|
3042
|
+
// Rating breakdown categories
|
|
3043
|
+
var breakdownEls = document.querySelectorAll('[data-testid="review-subscore"]');
|
|
3044
|
+
breakdownEls.forEach(function(el) {
|
|
3045
|
+
var text = el.textContent?.trim() || '';
|
|
3046
|
+
var parts = text.split(/\\s+/);
|
|
3047
|
+
if (parts.length >= 2) {
|
|
3048
|
+
var score = parseFloat(parts[parts.length - 1]);
|
|
3049
|
+
var category = parts.slice(0, -1).join(' ').toLowerCase();
|
|
3050
|
+
if (category.includes('staff')) results.breakdown.staff = score;
|
|
3051
|
+
else if (category.includes('facilities')) results.breakdown.facilities = score;
|
|
3052
|
+
else if (category.includes('cleanliness')) results.breakdown.cleanliness = score;
|
|
3053
|
+
else if (category.includes('comfort')) results.breakdown.comfort = score;
|
|
3054
|
+
else if (category.includes('value') || category.includes('money')) results.breakdown.valueForMoney = score;
|
|
3055
|
+
else if (category.includes('location')) results.breakdown.location = score;
|
|
3056
|
+
else if (category.includes('wifi') || category.includes('wi-fi')) results.breakdown.freeWifi = score;
|
|
3057
|
+
}
|
|
3058
|
+
});
|
|
3059
|
+
|
|
3060
|
+
return results;
|
|
3061
|
+
})()
|
|
3062
|
+
`);
|
|
3063
|
+
baseData.hotelName = mainPageData.hotelName;
|
|
3064
|
+
if (baseData.overallRating === null)
|
|
3065
|
+
baseData.overallRating = mainPageData.overallRating;
|
|
3066
|
+
if (baseData.totalReviews === 0)
|
|
3067
|
+
baseData.totalReviews = mainPageData.totalReviews;
|
|
3068
|
+
// Fill in missing rating breakdown from DOM
|
|
3069
|
+
if (baseData.ratingBreakdown.staff === null)
|
|
3070
|
+
baseData.ratingBreakdown.staff = mainPageData.breakdown.staff ?? null;
|
|
3071
|
+
if (baseData.ratingBreakdown.facilities === null)
|
|
3072
|
+
baseData.ratingBreakdown.facilities = mainPageData.breakdown.facilities ?? null;
|
|
3073
|
+
if (baseData.ratingBreakdown.cleanliness === null)
|
|
3074
|
+
baseData.ratingBreakdown.cleanliness = mainPageData.breakdown.cleanliness ?? null;
|
|
3075
|
+
if (baseData.ratingBreakdown.comfort === null)
|
|
3076
|
+
baseData.ratingBreakdown.comfort = mainPageData.breakdown.comfort ?? null;
|
|
3077
|
+
if (baseData.ratingBreakdown.valueForMoney === null)
|
|
3078
|
+
baseData.ratingBreakdown.valueForMoney = mainPageData.breakdown.valueForMoney ?? null;
|
|
3079
|
+
if (baseData.ratingBreakdown.location === null)
|
|
3080
|
+
baseData.ratingBreakdown.location = mainPageData.breakdown.location ?? null;
|
|
3081
|
+
if (baseData.ratingBreakdown.freeWifi === null)
|
|
3082
|
+
baseData.ratingBreakdown.freeWifi = mainPageData.breakdown.freeWifi ?? null;
|
|
3083
|
+
}
|
|
3084
|
+
logger.debug("Using DOM extraction for reviews (need more reviews or filters)");
|
|
1754
3085
|
// Click "Read all reviews" button to open reviews modal
|
|
1755
3086
|
const readAllBtn = await this.page.$('[data-testid="fr-read-all-reviews"], [data-testid="review-score-read-all"]');
|
|
1756
3087
|
if (!readAllBtn) {
|
|
@@ -1919,20 +3250,20 @@ export class HotelBrowser {
|
|
|
1919
3250
|
return reviews;
|
|
1920
3251
|
})()
|
|
1921
3252
|
`);
|
|
1922
|
-
// Build rating breakdown
|
|
3253
|
+
// Build rating breakdown from baseData (populated from API or DOM)
|
|
1923
3254
|
const ratingBreakdown = {
|
|
1924
|
-
staff:
|
|
1925
|
-
facilities:
|
|
1926
|
-
cleanliness:
|
|
1927
|
-
comfort:
|
|
1928
|
-
valueForMoney:
|
|
1929
|
-
location:
|
|
1930
|
-
freeWifi:
|
|
3255
|
+
staff: baseData.ratingBreakdown.staff,
|
|
3256
|
+
facilities: baseData.ratingBreakdown.facilities,
|
|
3257
|
+
cleanliness: baseData.ratingBreakdown.cleanliness,
|
|
3258
|
+
comfort: baseData.ratingBreakdown.comfort,
|
|
3259
|
+
valueForMoney: baseData.ratingBreakdown.valueForMoney,
|
|
3260
|
+
location: baseData.ratingBreakdown.location,
|
|
3261
|
+
freeWifi: baseData.ratingBreakdown.freeWifi,
|
|
1931
3262
|
};
|
|
1932
3263
|
const reviewsResult = {
|
|
1933
|
-
hotelName:
|
|
1934
|
-
overallRating:
|
|
1935
|
-
totalReviews:
|
|
3264
|
+
hotelName: baseData.hotelName,
|
|
3265
|
+
overallRating: baseData.overallRating,
|
|
3266
|
+
totalReviews: baseData.totalReviews,
|
|
1936
3267
|
ratingBreakdown,
|
|
1937
3268
|
reviews: reviews.slice(0, limit),
|
|
1938
3269
|
url: cleanUrl,
|
|
@@ -1962,13 +3293,18 @@ export class HotelBrowser {
|
|
|
1962
3293
|
checkIn.setDate(checkIn.getDate() + i);
|
|
1963
3294
|
const checkOut = new Date(checkIn);
|
|
1964
3295
|
checkOut.setDate(checkOut.getDate() + 1);
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
3296
|
+
const checkInStr = checkIn.toISOString().split("T")[0];
|
|
3297
|
+
const checkOutStr = checkOut.toISOString().split("T")[0];
|
|
3298
|
+
if (checkInStr && checkOutStr) {
|
|
3299
|
+
dates.push({
|
|
3300
|
+
checkIn: checkInStr,
|
|
3301
|
+
checkOut: checkOutStr,
|
|
3302
|
+
});
|
|
3303
|
+
}
|
|
1969
3304
|
}
|
|
1970
3305
|
// Clean the hotel URL
|
|
1971
|
-
const
|
|
3306
|
+
const urlParts = hotelUrl.split("?")[0];
|
|
3307
|
+
const cleanUrl = urlParts?.split("#")[0] ?? hotelUrl;
|
|
1972
3308
|
// Collect prices for each date
|
|
1973
3309
|
const prices = [];
|
|
1974
3310
|
let hotelName = "";
|
|
@@ -2109,10 +3445,11 @@ export class HotelBrowser {
|
|
|
2109
3445
|
// Calculate end date
|
|
2110
3446
|
const endDate = new Date(start);
|
|
2111
3447
|
endDate.setDate(endDate.getDate() + actualNights - 1);
|
|
3448
|
+
const endDateStr = endDate.toISOString().split("T")[0] ?? startDate;
|
|
2112
3449
|
const priceCalendarResult = {
|
|
2113
3450
|
hotelName,
|
|
2114
3451
|
startDate,
|
|
2115
|
-
endDate:
|
|
3452
|
+
endDate: endDateStr,
|
|
2116
3453
|
nights: actualNights,
|
|
2117
3454
|
currency,
|
|
2118
3455
|
prices,
|