@jackwener/opencli 1.7.17 → 1.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +10 -8
  2. package/README.zh-CN.md +9 -8
  3. package/cli-manifest.json +585 -9
  4. package/clis/ctrip/ctrip.test.js +486 -1
  5. package/clis/ctrip/flight.js +136 -0
  6. package/clis/ctrip/hotel-search.js +132 -0
  7. package/clis/ctrip/utils.js +298 -0
  8. package/clis/doubao/utils.js +17 -0
  9. package/clis/doubao/utils.test.js +61 -0
  10. package/clis/google/search.js +16 -6
  11. package/clis/google-scholar/search.js +20 -5
  12. package/clis/google-scholar/search.test.js +35 -2
  13. package/clis/reddit/home.js +117 -0
  14. package/clis/reddit/home.test.js +127 -0
  15. package/clis/reddit/read.js +400 -54
  16. package/clis/reddit/read.test.js +315 -12
  17. package/clis/reddit/reply.js +182 -0
  18. package/clis/reddit/reply.test.js +89 -0
  19. package/clis/reddit/subreddit-info.js +117 -0
  20. package/clis/reddit/subreddit-info.test.js +163 -0
  21. package/clis/reddit/whoami.js +84 -0
  22. package/clis/reddit/whoami.test.js +105 -0
  23. package/clis/rednote/comments.js +76 -0
  24. package/clis/rednote/download.js +59 -0
  25. package/clis/rednote/feed.js +95 -0
  26. package/clis/rednote/navigation.test.js +26 -0
  27. package/clis/rednote/note.js +68 -0
  28. package/clis/rednote/notifications.js +139 -0
  29. package/clis/rednote/rednote.test.js +157 -0
  30. package/clis/rednote/search.js +101 -0
  31. package/clis/rednote/user.js +55 -0
  32. package/clis/twitter/bookmark-folder.js +3 -1
  33. package/clis/twitter/bookmarks.js +3 -1
  34. package/clis/twitter/followers.js +20 -5
  35. package/clis/twitter/followers.test.js +44 -0
  36. package/clis/twitter/following.js +36 -20
  37. package/clis/twitter/following.test.js +60 -8
  38. package/clis/twitter/likes.js +28 -13
  39. package/clis/twitter/likes.test.js +111 -1
  40. package/clis/twitter/list-add.js +128 -204
  41. package/clis/twitter/list-add.test.js +97 -1
  42. package/clis/twitter/list-tweets.js +13 -4
  43. package/clis/twitter/list-tweets.test.js +48 -0
  44. package/clis/twitter/lists.js +5 -2
  45. package/clis/twitter/post.js +23 -4
  46. package/clis/twitter/post.test.js +30 -0
  47. package/clis/twitter/profile.js +16 -8
  48. package/clis/twitter/profile.test.js +39 -0
  49. package/clis/twitter/reply.js +133 -10
  50. package/clis/twitter/reply.test.js +55 -0
  51. package/clis/twitter/search.js +188 -170
  52. package/clis/twitter/search.test.js +96 -258
  53. package/clis/twitter/shared.js +167 -16
  54. package/clis/twitter/shared.test.js +102 -1
  55. package/clis/twitter/timeline.js +3 -1
  56. package/clis/twitter/tweets.js +147 -51
  57. package/clis/twitter/tweets.test.js +238 -1
  58. package/clis/xiaohongshu/comments.js +57 -26
  59. package/clis/xiaohongshu/comments.test.js +63 -1
  60. package/clis/xiaohongshu/download.js +32 -23
  61. package/clis/xiaohongshu/feed.js +23 -15
  62. package/clis/xiaohongshu/note-helpers.js +16 -6
  63. package/clis/xiaohongshu/note.js +26 -20
  64. package/clis/xiaohongshu/notifications.js +26 -19
  65. package/clis/xiaohongshu/search.js +201 -37
  66. package/clis/xiaohongshu/search.test.js +82 -8
  67. package/clis/xiaohongshu/user-helpers.js +13 -4
  68. package/clis/xiaohongshu/user-helpers.test.js +20 -0
  69. package/clis/xiaohongshu/user.js +9 -4
  70. package/clis/xueqiu/earnings-date.js +2 -2
  71. package/clis/xueqiu/kline.js +2 -2
  72. package/clis/xueqiu/utils.js +19 -0
  73. package/clis/xueqiu/utils.test.js +26 -0
  74. package/clis/youtube/transcript.js +28 -3
  75. package/clis/youtube/transcript.test.js +90 -1
  76. package/clis/zhihu/answer-detail.js +233 -0
  77. package/clis/zhihu/answer-detail.test.js +330 -0
  78. package/clis/zhihu/question.js +44 -10
  79. package/clis/zhihu/question.test.js +78 -1
  80. package/clis/zhihu/recommend.js +103 -0
  81. package/clis/zhihu/recommend.test.js +143 -0
  82. package/dist/src/browser/base-page.d.ts +3 -2
  83. package/dist/src/browser/base-page.test.js +2 -2
  84. package/dist/src/browser/cdp.js +3 -3
  85. package/dist/src/browser/page.d.ts +3 -2
  86. package/dist/src/browser/page.js +4 -4
  87. package/dist/src/browser/page.test.js +31 -0
  88. package/dist/src/browser/utils.d.ts +10 -0
  89. package/dist/src/browser/utils.js +37 -0
  90. package/dist/src/browser/utils.test.d.ts +1 -0
  91. package/dist/src/browser/utils.test.js +29 -0
  92. package/dist/src/cli-argv-preprocess.d.ts +37 -0
  93. package/dist/src/cli-argv-preprocess.js +131 -0
  94. package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
  95. package/dist/src/cli-argv-preprocess.test.js +130 -0
  96. package/dist/src/cli.js +123 -86
  97. package/dist/src/cli.test.js +32 -22
  98. package/dist/src/commands/daemon.js +6 -7
  99. package/dist/src/doctor.js +21 -17
  100. package/dist/src/doctor.test.js +2 -0
  101. package/dist/src/download/progress.js +15 -11
  102. package/dist/src/download/progress.test.d.ts +1 -0
  103. package/dist/src/download/progress.test.js +25 -0
  104. package/dist/src/execution.js +1 -3
  105. package/dist/src/execution.test.js +4 -16
  106. package/dist/src/help.d.ts +11 -0
  107. package/dist/src/help.js +46 -5
  108. package/dist/src/logger.js +8 -9
  109. package/dist/src/main.js +16 -0
  110. package/dist/src/output.js +4 -5
  111. package/dist/src/runtime-detect.d.ts +1 -1
  112. package/dist/src/runtime-detect.js +1 -1
  113. package/dist/src/runtime-detect.test.js +3 -2
  114. package/dist/src/tui.d.ts +0 -1
  115. package/dist/src/tui.js +9 -22
  116. package/dist/src/types.d.ts +3 -1
  117. package/dist/src/update-check.js +4 -5
  118. package/package.json +5 -4
@@ -0,0 +1,132 @@
1
+ /**
2
+ * 携程酒店 list — search hotels by city + date range.
3
+ *
4
+ * Reads `window.__NEXT_DATA__.props.pageProps.initListData.hotelList` directly
5
+ * from the SSR-rendered hotel listing page. Ctrip serves first 13 hotels
6
+ * (10 organic + ~3 promoted) inline; `&pageSize=N` URL params are ignored
7
+ * server-side so we cap default limit accordingly (see
8
+ * `~/.opencli/sites/ctrip/notes.md`).
9
+ *
10
+ * Reuses the existing `mapHotelRow` + `pickHotelMapCoords` helpers from utils.js
11
+ * so the column shape stays consistent if future variants (hotel-detail) also
12
+ * project from the same `hotelInfo` shape.
13
+ *
14
+ * Anti-bot: not detected on first-page navigation (PR #1481 recon 2026-05-12).
15
+ */
16
+ import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
17
+ import { cli, Strategy } from '@jackwener/opencli/registry';
18
+ import { mapHotelRow, parseCityId, parseIsoDate } from './utils.js';
19
+
20
+ const MIN_LIMIT = 1;
21
+ const MAX_LIMIT = 30;
22
+ const DEFAULT_LIMIT = 10;
23
+
24
+ function parseHotelLimit(raw) {
25
+ if (raw === undefined || raw === null || raw === '') return DEFAULT_LIMIT;
26
+ const parsed = Number(raw);
27
+ if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
28
+ throw new ArgumentError(`--limit must be an integer between ${MIN_LIMIT} and ${MAX_LIMIT}, got ${JSON.stringify(raw)}`);
29
+ }
30
+ if (parsed < MIN_LIMIT || parsed > MAX_LIMIT) {
31
+ throw new ArgumentError(`--limit must be between ${MIN_LIMIT} and ${MAX_LIMIT}, got ${parsed}`);
32
+ }
33
+ return parsed;
34
+ }
35
+
36
+ /**
37
+ * Wait for SSR state to be populated, or detect a login/captcha gate.
38
+ *
39
+ * Ctrip occasionally serves a captcha redirect (`/captcha`) when traffic
40
+ * looks bot-like; we catch that as AuthRequired so the agent can pop a
41
+ * human session instead of looping on an empty extract.
42
+ */
43
+ const WAIT_FOR_SSR_JS = `
44
+ new Promise((resolve) => {
45
+ const detect = () => {
46
+ if (location.pathname.includes('captcha') || /验证码|verify the human/i.test(document.body?.innerText || '')) return 'captcha';
47
+ const hotels = window.__NEXT_DATA__?.props?.pageProps?.initListData?.hotelList;
48
+ if (Array.isArray(hotels)) return 'content';
49
+ return null;
50
+ };
51
+ const found = detect();
52
+ if (found) return resolve(found);
53
+ const observer = new MutationObserver(() => {
54
+ const result = detect();
55
+ if (result) { observer.disconnect(); resolve(result); }
56
+ });
57
+ observer.observe(document.documentElement, { childList: true, subtree: true });
58
+ setTimeout(() => { observer.disconnect(); resolve('timeout'); }, 5000);
59
+ })
60
+ `;
61
+
62
+ const EXTRACT_HOTELS_JS = `
63
+ (() => {
64
+ const list = window.__NEXT_DATA__?.props?.pageProps?.initListData?.hotelList;
65
+ if (!Array.isArray(list)) return null;
66
+ return list;
67
+ })()
68
+ `;
69
+
70
+ function assertCheckinBeforeCheckout(checkin, checkout) {
71
+ if (Date.parse(checkin + 'T00:00:00Z') >= Date.parse(checkout + 'T00:00:00Z')) {
72
+ throw new ArgumentError(`--checkin must be earlier than --checkout (got ${checkin} >= ${checkout})`);
73
+ }
74
+ }
75
+
76
+ cli({
77
+ site: 'ctrip',
78
+ name: 'hotel-search',
79
+ access: 'read',
80
+ description: '搜索携程酒店列表(按城市 + 入住/离店日期)',
81
+ domain: 'hotels.ctrip.com',
82
+ strategy: Strategy.COOKIE,
83
+ browser: true,
84
+ navigateBefore: false,
85
+ args: [
86
+ { name: 'city', required: true, positional: true, help: 'Numeric Ctrip city ID (use `ctrip search` or `ctrip hotel-suggest` to discover)' },
87
+ { name: 'checkin', required: true, help: 'Check-in date (YYYY-MM-DD)' },
88
+ { name: 'checkout', required: true, help: 'Check-out date (YYYY-MM-DD)' },
89
+ { name: 'limit', type: 'int', default: DEFAULT_LIMIT, help: `Number of hotels (${MIN_LIMIT}-${MAX_LIMIT}); SSR first page returns ~13 entries` },
90
+ ],
91
+ columns: [
92
+ 'rank', 'hotelId', 'name', 'enName',
93
+ 'star', 'score', 'scoreLabel', 'reviewCount',
94
+ 'cityName', 'district', 'address',
95
+ 'lat', 'lon',
96
+ 'price', 'currency', 'url',
97
+ ],
98
+ func: async (page, kwargs) => {
99
+ const cityId = parseCityId(kwargs.city);
100
+ const checkin = parseIsoDate('checkin', kwargs.checkin);
101
+ const checkout = parseIsoDate('checkout', kwargs.checkout);
102
+ assertCheckinBeforeCheckout(checkin, checkout);
103
+ const limit = parseHotelLimit(kwargs.limit);
104
+
105
+ const url = `https://hotels.ctrip.com/hotels/list?city=${cityId}&checkin=${checkin}&checkout=${checkout}`;
106
+ await page.goto(url);
107
+ const waitResult = await page.evaluate(WAIT_FOR_SSR_JS);
108
+ if (waitResult === 'captcha') {
109
+ throw new AuthRequiredError('hotels.ctrip.com', 'Ctrip is asking for a captcha; complete it in your browser session and retry');
110
+ }
111
+ if (waitResult !== 'content') {
112
+ throw new CommandExecutionError(`Ctrip hotel-search page did not expose SSR hotel list (state=${String(waitResult)})`);
113
+ }
114
+ const raw = await page.evaluate(EXTRACT_HOTELS_JS);
115
+ if (!Array.isArray(raw)) {
116
+ throw new CommandExecutionError('Ctrip hotel-search returned malformed SSR hotel list');
117
+ }
118
+ if (raw.length === 0) {
119
+ throw new EmptyResultError('ctrip hotel-search', `No hotels for city=${cityId} on ${checkin} → ${checkout}`);
120
+ }
121
+ const rows = raw
122
+ .map((entry, i) => mapHotelRow(entry, i))
123
+ .filter((row) => row.hotelId && row.name)
124
+ .slice(0, limit);
125
+ if (rows.length === 0) {
126
+ throw new CommandExecutionError('Ctrip hotel-search SSR rows were missing required hotelId/name anchors');
127
+ }
128
+ return rows;
129
+ },
130
+ });
131
+
132
+ export const __test__ = { parseHotelLimit, assertCheckinBeforeCheckout, WAIT_FOR_SSR_JS, EXTRACT_HOTELS_JS };
@@ -172,4 +172,302 @@ export function mapSuggestRow(item, index) {
172
172
  };
173
173
  }
174
174
 
175
+ /* --------- Helpers shared by hotel-search / flight (browser-context) ---------- */
176
+
177
+ const ISO_DATE_RE = /^(\d{4})-(\d{2})-(\d{2})$/;
178
+
179
+ /**
180
+ * Validate YYYY-MM-DD and return the canonical string. Rejects out-of-range
181
+ * month/day, malformed input, and silent NaN. Does NOT coerce or shift timezones.
182
+ */
183
+ export function parseIsoDate(name, raw) {
184
+ if (raw === undefined || raw === null || raw === '') {
185
+ throw new ArgumentError(`--${name} is required (YYYY-MM-DD)`);
186
+ }
187
+ const value = String(raw).trim();
188
+ const m = ISO_DATE_RE.exec(value);
189
+ if (!m) {
190
+ throw new ArgumentError(`--${name} must be YYYY-MM-DD, got ${JSON.stringify(raw)}`);
191
+ }
192
+ const year = Number(m[1]);
193
+ const month = Number(m[2]);
194
+ const day = Number(m[3]);
195
+ if (month < 1 || month > 12 || day < 1 || day > 31) {
196
+ throw new ArgumentError(`--${name} has invalid month/day: ${value}`);
197
+ }
198
+ // Cross-check via UTC date math so 2026-02-30 doesn't pass.
199
+ const parsed = new Date(Date.UTC(year, month - 1, day));
200
+ if (parsed.getUTCFullYear() !== year || parsed.getUTCMonth() !== month - 1 || parsed.getUTCDate() !== day) {
201
+ throw new ArgumentError(`--${name} is not a real calendar date: ${value}`);
202
+ }
203
+ return value;
204
+ }
205
+
206
+ /**
207
+ * Validate a 3-letter IATA airport / metro code, return uppercase.
208
+ * Ctrip URL accepts both single-airport (PEK / PVG) and metro-group (BJS / SHA) codes.
209
+ */
210
+ export function parseIataCode(name, raw) {
211
+ if (raw === undefined || raw === null || raw === '') {
212
+ throw new ArgumentError(`--${name} is required (3-letter IATA code, e.g. PEK, SHA)`);
213
+ }
214
+ const value = String(raw).trim().toUpperCase();
215
+ if (!/^[A-Z]{3}$/.test(value)) {
216
+ throw new ArgumentError(`--${name} must be a 3-letter IATA code, got ${JSON.stringify(raw)}`);
217
+ }
218
+ return value;
219
+ }
220
+
221
+ /**
222
+ * Validate a numeric Ctrip city ID (returned by `ctrip search` / `ctrip hotel-suggest`).
223
+ */
224
+ export function parseCityId(raw) {
225
+ if (raw === undefined || raw === null || raw === '') {
226
+ throw new ArgumentError('--city is required (numeric city ID from `ctrip search` or `ctrip hotel-suggest`)');
227
+ }
228
+ const parsed = Number(raw);
229
+ if (!Number.isFinite(parsed) || !Number.isInteger(parsed) || parsed <= 0) {
230
+ throw new ArgumentError(`--city must be a positive integer city ID, got ${JSON.stringify(raw)}`);
231
+ }
232
+ return parsed;
233
+ }
234
+
235
+ /**
236
+ * Pick the best lat/lon from a Ctrip hotel `positionInfo.mapCoordinate` array.
237
+ *
238
+ * Each entry has a `coordinateType` (1=WGS84, 2=GCJ02, 3=BD09 / Baidu). We prefer
239
+ * WGS84 when present (most portable), then fall through. All coordinates are
240
+ * strings in the API, so we Number() and reject NaN.
241
+ */
242
+ export function pickHotelMapCoords(mapCoordinate) {
243
+ if (!Array.isArray(mapCoordinate) || mapCoordinate.length === 0) {
244
+ return { lat: null, lon: null };
245
+ }
246
+ // Order: WGS84 (1) → GCJ02 (2) → BD09 (3) → whatever exists
247
+ const ranking = (entry) => {
248
+ const t = Number(entry?.coordinateType);
249
+ if (t === 1) return 0;
250
+ if (t === 2) return 1;
251
+ if (t === 3) return 2;
252
+ return 3;
253
+ };
254
+ const sorted = [...mapCoordinate].sort((a, b) => ranking(a) - ranking(b));
255
+ for (const entry of sorted) {
256
+ const lat = Number(entry?.latitude);
257
+ const lon = Number(entry?.longitude);
258
+ if (Number.isFinite(lat) && Number.isFinite(lon) && (lat !== 0 || lon !== 0)) {
259
+ return { lat, lon };
260
+ }
261
+ }
262
+ return { lat: null, lon: null };
263
+ }
264
+
265
+ /**
266
+ * Project a single Ctrip hotel row from `__NEXT_DATA__.props.pageProps.initListData.hotelList[*]`
267
+ * into stable adapter column shape.
268
+ *
269
+ * No silent fallbacks — every field is `string|number|null`, never `''` masquerading
270
+ * as "no data" (see typed-errors.md §"scalar sentinels are anti-pattern").
271
+ */
272
+ export function mapHotelRow(entry, index) {
273
+ const hotelInfo = entry?.hotelInfo ?? {};
274
+ const rooms = Array.isArray(entry?.roomInfo) ? entry.roomInfo : [];
275
+ const summary = hotelInfo.summary ?? {};
276
+ const nameInfo = hotelInfo.nameInfo ?? {};
277
+ const hotelStar = hotelInfo.hotelStar ?? {};
278
+ const commentInfo = hotelInfo.commentInfo ?? {};
279
+ const positionInfo = hotelInfo.positionInfo ?? {};
280
+ const firstRoom = rooms[0] ?? {};
281
+ const priceInfo = firstRoom.priceInfo ?? {};
282
+
283
+ const hotelId = summary.hotelId ? String(summary.hotelId) : null;
284
+ const { lat, lon } = pickHotelMapCoords(positionInfo.mapCoordinate);
285
+
286
+ // commenterNumber arrives as "13,966条点评" — strip non-digits to int, else null.
287
+ let reviewCount = null;
288
+ if (commentInfo.commenterNumber) {
289
+ const digits = String(commentInfo.commenterNumber).replace(/[^\d]/g, '');
290
+ if (digits) reviewCount = Number(digits);
291
+ }
292
+ const score = commentInfo.commentScore ? Number(commentInfo.commentScore) : null;
293
+
294
+ const star = Number.isFinite(hotelStar.star) && hotelStar.star > 0 ? hotelStar.star : null;
295
+ const price = Number.isFinite(priceInfo.price) && priceInfo.price > 0 ? priceInfo.price : null;
296
+
297
+ return {
298
+ rank: index + 1,
299
+ hotelId,
300
+ name: nameInfo.name ? String(nameInfo.name).trim() : null,
301
+ enName: nameInfo.enName ? String(nameInfo.enName).trim() : null,
302
+ star,
303
+ score: Number.isFinite(score) && score > 0 ? score : null,
304
+ scoreLabel: commentInfo.commentDescription ? String(commentInfo.commentDescription).trim() : null,
305
+ reviewCount,
306
+ cityName: positionInfo.cityName ? String(positionInfo.cityName).trim() : null,
307
+ district: positionInfo.positionDesc ? String(positionInfo.positionDesc).trim() : null,
308
+ address: positionInfo.address ? String(positionInfo.address).trim() : null,
309
+ lat,
310
+ lon,
311
+ price,
312
+ currency: priceInfo.currency ? String(priceInfo.currency).trim() : null,
313
+ url: hotelId ? `https://hotels.ctrip.com/hotels/detail/?hotelid=${hotelId}` : null,
314
+ };
315
+ }
316
+
317
+ /**
318
+ * Build the browser-context IIFE that extracts flight rows from `.flight-list`.
319
+ *
320
+ * Flights are rendered as `.flight-list > span > div` cards. Each card's innerText
321
+ * has a stable ordering (verified 2026-05-12 on bjs→sha route):
322
+ *
323
+ * [airline, flightNo, aircraft, lowPriceTag?, depTime, depAirport,
324
+ * arrTime, arrAirport, terminal?, savings?, promo?, currency, price,
325
+ * priceSuffix, cabin, cta]
326
+ *
327
+ * `lowPriceTag` (e.g. "当日低价") + `terminal` (e.g. "T2") + `savings` + `promo`
328
+ * are optional — we use position-of-first-time-match to anchor and parse around it.
329
+ *
330
+ * The host is baked in so `normalizeUrl` for booking links resolves on the calling site.
331
+ */
332
+ export function buildFlightExtractJs() {
333
+ return `
334
+ (() => {
335
+ const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
336
+ const isTime = (s) => /^([01]?\\d|2[0-3]):[0-5]\\d$/.test(s);
337
+ const isCurrency = (s) => /^[¥$€£]$/.test(s);
338
+ const isPriceDigits = (s) => /^\\d+([.,]\\d+)?$/.test(s);
339
+ const isFlightNo = (s) => /^[A-Z0-9]{2}\\d{3,4}[A-Z]?$/.test(s);
340
+
341
+ const rows = [];
342
+ document.querySelectorAll('.flight-list > span > div').forEach((card) => {
343
+ // Collect ordered text chunks (text nodes only, skip whitespace-only).
344
+ const chunks = [];
345
+ const walk = (node) => {
346
+ for (const c of node.childNodes) {
347
+ if (c.nodeType === 3) {
348
+ const t = cleanText(c.textContent);
349
+ if (t) chunks.push(t);
350
+ } else if (c.nodeType === 1) {
351
+ walk(c);
352
+ }
353
+ }
354
+ };
355
+ walk(card);
356
+ if (chunks.length < 8) return;
357
+
358
+ // Anchor on first HH:MM — that's depTime; depAirport is immediately after.
359
+ const firstTimeIdx = chunks.findIndex(isTime);
360
+ if (firstTimeIdx < 1) return;
361
+ const airline = chunks[0];
362
+ const flightNo = chunks[1] || null;
363
+ if (!airline || !isFlightNo(flightNo)) return;
364
+ const aircraft = chunks[2] && !isTime(chunks[2]) ? chunks[2] : null;
365
+
366
+ const depTime = chunks[firstTimeIdx];
367
+ const depAirport = chunks[firstTimeIdx + 1] || null;
368
+ // Second HH:MM after depTime is arrTime
369
+ const arrTimeIdx = chunks.findIndex((c, i) => i > firstTimeIdx && isTime(c));
370
+ if (arrTimeIdx < 0) return;
371
+ const arrTime = chunks[arrTimeIdx];
372
+ const arrAirport = chunks[arrTimeIdx + 1] || null;
373
+ if (!depAirport || !arrAirport) return;
374
+ // Optional terminal chunk right after arrAirport (matches /^T\\d$/ or single letter)
375
+ let terminal = null;
376
+ if (arrTimeIdx + 2 < chunks.length && /^T\\d$/.test(chunks[arrTimeIdx + 2])) {
377
+ terminal = chunks[arrTimeIdx + 2];
378
+ }
379
+
380
+ // Price: scan for currency symbol then a digit-only chunk
381
+ let price = null;
382
+ let currency = null;
383
+ for (let i = 0; i < chunks.length - 1; i++) {
384
+ if (isCurrency(chunks[i]) && isPriceDigits(chunks[i + 1])) {
385
+ currency = chunks[i];
386
+ price = Number(chunks[i + 1].replace(',', ''));
387
+ break;
388
+ }
389
+ }
390
+ // Cabin: scan from end for first non-CTA Chinese chunk ending in "舱"
391
+ let cabin = null;
392
+ for (let i = chunks.length - 1; i >= 0; i--) {
393
+ if (/舱$/.test(chunks[i])) { cabin = chunks[i]; break; }
394
+ }
395
+
396
+ rows.push({
397
+ airline,
398
+ flightNo,
399
+ aircraft,
400
+ departureTime: depTime,
401
+ departureAirport: depAirport,
402
+ arrivalTime: arrTime,
403
+ arrivalAirport: arrAirport,
404
+ terminal,
405
+ price,
406
+ currency,
407
+ cabin,
408
+ });
409
+ });
410
+ return rows;
411
+ })()
412
+ `;
413
+ }
414
+
415
+ /**
416
+ * Build a scroll-until-enough IIFE for flights/hotels DOM-card pagination.
417
+ *
418
+ * Mirrors `clis/xiaohongshu/search.js#buildScrollUntilJs` (PR #1487) — counts a
419
+ * caller-supplied row selector, scrolls until count >= target / DOM plateau /
420
+ * maxScrolls. Returns final row count so the caller can decide whether to
421
+ * surface an EmptyResultError. (xiaohongshu's helper hardcodes
422
+ * `section.note-item`; this generic version takes a selector.)
423
+ */
424
+ export function buildScrollUntilJs(rowSelector, targetCount, maxScrolls = 8) {
425
+ if (!Number.isInteger(targetCount) || targetCount < 1 || targetCount > 100) {
426
+ throw new ArgumentError(`targetCount must be an integer between 1 and 100, got ${JSON.stringify(targetCount)}`);
427
+ }
428
+ if (!Number.isInteger(maxScrolls) || maxScrolls < 1 || maxScrolls > 30) {
429
+ throw new ArgumentError(`maxScrolls must be an integer between 1 and 30, got ${JSON.stringify(maxScrolls)}`);
430
+ }
431
+ return `
432
+ (async () => {
433
+ const sel = ${JSON.stringify(rowSelector)};
434
+ const isVisible = (el) => {
435
+ const style = window.getComputedStyle(el);
436
+ if (style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity) === 0) return false;
437
+ const rect = el.getBoundingClientRect();
438
+ return rect.width > 0 && rect.height > 0;
439
+ };
440
+ const countItems = () => Array.from(document.querySelectorAll(sel)).filter(isVisible).length;
441
+ let lastCount = countItems();
442
+ let plateauRounds = 0;
443
+ for (let i = 0; i < ${maxScrolls}; i++) {
444
+ if (countItems() >= ${targetCount}) break;
445
+ const lastHeight = document.body.scrollHeight;
446
+ window.scrollTo(0, lastHeight);
447
+ await new Promise((resolve) => {
448
+ let to;
449
+ const ob = new MutationObserver(() => {
450
+ if (document.body.scrollHeight > lastHeight) {
451
+ clearTimeout(to);
452
+ ob.disconnect();
453
+ setTimeout(resolve, 200);
454
+ }
455
+ });
456
+ ob.observe(document.body, { childList: true, subtree: true });
457
+ to = setTimeout(() => { ob.disconnect(); resolve(null); }, 2500);
458
+ });
459
+ const newCount = countItems();
460
+ if (newCount === lastCount) {
461
+ plateauRounds++;
462
+ if (plateauRounds >= 2) break;
463
+ } else {
464
+ plateauRounds = 0;
465
+ lastCount = newCount;
466
+ }
467
+ }
468
+ return countItems();
469
+ })()
470
+ `;
471
+ }
472
+
175
473
  export const __test__ = { ENDPOINT, MIN_LIMIT, MAX_LIMIT };
@@ -163,6 +163,19 @@ function getTurnsScript() {
163
163
  ) {
164
164
  return 'Assistant';
165
165
  }
166
+ // 2026-05 Doubao DOM refactor: no more receive-message / bg-g-receive-msg-bubble
167
+ // markers on assistant turns. Wrappers are now [class*="inner-item-"] /
168
+ // [class*="top-item-"] and the only reliable assistant signal is the
169
+ // .flow-markdown-body content container WITHOUT any send-bubble marker.
170
+ if (
171
+ (root.matches('[class*="inner-item-"], [class*="top-item-"]')
172
+ || root.closest('[class*="inner-item-"], [class*="top-item-"]'))
173
+ && (root.matches('.flow-markdown-body') || root.querySelector('.flow-markdown-body'))
174
+ && !root.matches('[class*="bg-g-send-msg-bubble"]')
175
+ && !root.querySelector('[class*="bg-g-send-msg-bubble"]')
176
+ ) {
177
+ return 'Assistant';
178
+ }
166
179
  return '';
167
180
  };
168
181
 
@@ -223,6 +236,10 @@ function getTurnsScript() {
223
236
  if (!messageList) return [];
224
237
 
225
238
  const itemSelectors = [
239
+ // 2026-05 Doubao DOM refactor wrappers (prepended; outer ones win via
240
+ // ancestor-keep dedup below).
241
+ '[class*="inner-item-"]',
242
+ '[class*="top-item-"]',
226
243
  '[class*="item-kDun2N"]',
227
244
  '[data-testid="union_message"]',
228
245
  '[data-testid="message-block-container"]',
@@ -1,3 +1,4 @@
1
+ import { JSDOM } from 'jsdom';
1
2
  import { describe, expect, it, vi } from 'vitest';
2
3
  import { CommandExecutionError } from '@jackwener/opencli/errors';
3
4
  import {
@@ -145,6 +146,28 @@ describe('doubao send strategy', () => {
145
146
  });
146
147
  });
147
148
  describe('doubao receive strategy', () => {
149
+ function runTurnsScript(html) {
150
+ const dom = new JSDOM(html, { url: 'https://www.doubao.com/chat', runScripts: 'outside-only' });
151
+ Object.defineProperty(dom.window.HTMLElement.prototype, 'innerText', {
152
+ configurable: true,
153
+ get() {
154
+ return this.textContent || '';
155
+ },
156
+ });
157
+ dom.window.HTMLElement.prototype.getBoundingClientRect = () => ({
158
+ width: 100,
159
+ height: 24,
160
+ top: 0,
161
+ left: 0,
162
+ right: 100,
163
+ bottom: 24,
164
+ x: 0,
165
+ y: 0,
166
+ toJSON: () => ({}),
167
+ });
168
+ return dom.window.eval(__test__.getTurnsScript());
169
+ }
170
+
148
171
  it('keeps both the new skin selectors and the older structural fallbacks in the turns script', () => {
149
172
  const turnsScript = __test__.getTurnsScript();
150
173
  expect(turnsScript).toContain('[class*="message-list-S2Fv2S"]');
@@ -157,6 +180,44 @@ describe('doubao receive strategy', () => {
157
180
  expect(turnsScript).toContain('[data-testid="message-block-container"]');
158
181
  });
159
182
 
183
+ it('includes the 2026-05 doubao DOM-refactor inner-item / top-item wrappers and the flow-markdown-body assistant fallback', () => {
184
+ const turnsScript = __test__.getTurnsScript();
185
+ // New wrappers added to itemSelectors so message roots resolve under the
186
+ // refactored DOM where the legacy item-kDun2N / union_message / message-block-container
187
+ // / data-message-id selectors no longer match.
188
+ expect(turnsScript).toContain('[class*="inner-item-"]');
189
+ expect(turnsScript).toContain('[class*="top-item-"]');
190
+ // Assistant fallback: post-refactor doubao no longer emits receive-message /
191
+ // bg-g-receive-msg-bubble markup. Only signal is .flow-markdown-body content
192
+ // container without send-bubble.
193
+ expect(turnsScript).toContain('.flow-markdown-body');
194
+ });
195
+
196
+ it('extracts clean assistant turns from the 2026-05 wrapper DOM without using whole-page chrome', () => {
197
+ const turns = runTurnsScript(`
198
+ <main>
199
+ <aside>历史对话</aside>
200
+ <section class="message-list-S2Fv2S">
201
+ <div class="top-item-user">
202
+ <div class="inner-item-user">
203
+ <div class="bg-g-send-msg-bubble">测试一下,只回复OK</div>
204
+ </div>
205
+ </div>
206
+ <div class="top-item-assistant">
207
+ <div class="inner-item-assistant">
208
+ <div class="flow-markdown-body"><p>OK</p></div>
209
+ </div>
210
+ </div>
211
+ </section>
212
+ </main>
213
+ `);
214
+
215
+ expect(turns).toEqual([
216
+ { Role: 'User', Text: '测试一下,只回复OK' },
217
+ { Role: 'Assistant', Text: 'OK' },
218
+ ]);
219
+ });
220
+
160
221
  it('extends transcript-noise cleanup for the current zh-CN chrome copy', () => {
161
222
  const transcriptScript = __test__.getTranscriptLinesScript();
162
223
  expect(transcriptScript).toContain('请仔细甄别');
@@ -29,13 +29,22 @@ cli({
29
29
  const lang = encodeURIComponent(args.lang);
30
30
  const url = `https://www.google.com/search?q=${keyword}&hl=${lang}&num=${limit}`;
31
31
  await page.goto(url);
32
- await page.wait(2);
33
- const results = await page.evaluate(`
32
+ // Wait until at least one SERP title link is present. On Chrome 148 /
33
+ // Linux Wayland, DOM stability can be reached before #rso anchors are
34
+ // populated, making browser execution look visually correct while the
35
+ // adapter extracts an empty array.
36
+ try {
37
+ await page.wait({ selector: '#rso a h3', timeout: 5 });
38
+ }
39
+ catch {
40
+ await page.wait(2);
41
+ }
42
+ const wrapper = await page.evaluate(`
34
43
  (function() {
35
44
  var results = [];
36
45
  var seenUrls = {};
37
46
  var rso = document.querySelector('#rso');
38
- if (!rso) return results;
47
+ if (!rso) return {items: results};
39
48
 
40
49
  // -- Featured snippet (scoped to #rso to avoid matching unrelated elements) --
41
50
  var featuredEl = rso.querySelector('.xpdopen .hgKElc')
@@ -63,7 +72,7 @@ cli({
63
72
 
64
73
  var href = link.href || '';
65
74
  // Skip non-http, Google internal links, and duplicates
66
- if (!href.match(/^https?:\\/\\//)) continue;
75
+ if (!(href.startsWith('http://') || href.startsWith('https://'))) continue;
67
76
  if (href.indexOf('google.com/search') !== -1) continue;
68
77
  if (seenUrls[href]) continue;
69
78
  seenUrls[href] = true;
@@ -117,10 +126,11 @@ cli({
117
126
  }
118
127
  }
119
128
 
120
- return results;
129
+ return {items: results};
121
130
  })()
122
131
  `);
123
- if (!Array.isArray(results) || results.length === 0) {
132
+ const results = (wrapper && wrapper.items) || [];
133
+ if (results.length === 0) {
124
134
  throw new CliError('NOT_FOUND', 'No search results found', 'Try a different keyword or check for CAPTCHA');
125
135
  }
126
136
  return results;
@@ -1,4 +1,5 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
+ import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
2
3
  import { clampInt, requireNonEmptyQuery } from '../_shared/common.js';
3
4
 
4
5
  cli({
@@ -18,12 +19,17 @@ cli({
18
19
  const limit = clampInt(kwargs.limit, 10, 1, 20);
19
20
  const query = requireNonEmptyQuery(kwargs.query);
20
21
  await page.goto(`https://scholar.google.com/scholar?q=${encodeURIComponent(query)}&hl=zh-CN`);
21
- await page.wait(3);
22
- const data = await page.evaluate(`
22
+ try {
23
+ await page.wait({ selector: '.gs_r.gs_or.gs_scl', timeout: 5 });
24
+ } catch {
25
+ await page.wait(3);
26
+ }
27
+ const wrapper = await page.evaluate(`
23
28
  (() => {
24
29
  const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
25
30
  const results = [];
26
- for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl')) {
31
+ const resultCards = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
32
+ for (const el of resultCards) {
27
33
  const container = el.querySelector('.gs_ri') || el;
28
34
  const titleEl = container.querySelector('.gs_rt a, h3 a');
29
35
  const title = normalize(titleEl?.textContent);
@@ -50,9 +56,18 @@ cli({
50
56
  });
51
57
  if (results.length >= ${limit}) break;
52
58
  }
53
- return results;
59
+ return { items: results, resultCount: resultCards.length };
54
60
  })()
55
61
  `);
56
- return Array.isArray(data) ? data : [];
62
+ if (!wrapper || typeof wrapper !== 'object' || !Array.isArray(wrapper.items)) {
63
+ throw new CommandExecutionError('Google Scholar search returned an unexpected payload shape');
64
+ }
65
+ if (wrapper.items.length === 0) {
66
+ if (Number(wrapper.resultCount) > 0) {
67
+ throw new CommandExecutionError('Google Scholar result cards were present but no rows could be extracted');
68
+ }
69
+ throw new EmptyResultError('google-scholar/search', 'Try a different query or check whether Google Scholar returned a CAPTCHA.');
70
+ }
71
+ return wrapper.items;
57
72
  },
58
73
  });