browser-web-search 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ /* @meta
2
+ {
3
+ "name": "toutiao/feed",
4
+ "description": "今日头条分类新闻(支持关键词过滤)",
5
+ "domain": "www.toutiao.com",
6
+ "args": {
7
+ "category": {"required": false, "description": "分类: hot/tech/entertainment/sports/finance/military/world/game/car (默认 hot)"},
8
+ "keyword": {"required": false, "description": "关键词过滤(可选)"},
9
+ "count": {"required": false, "description": "返回数量 (默认 20, 最多 50)"}
10
+ },
11
+ "readOnly": true,
12
+ "example": "bws toutiao/feed tech --keyword AI"
13
+ }
14
+ */
15
+
16
+ async function(args) {
17
+ const categoryMap = {
18
+ 'all': '__all__',
19
+ 'hot': 'news_hot',
20
+ 'tech': 'news_tech',
21
+ 'entertainment': 'news_entertainment',
22
+ 'sports': 'news_sports',
23
+ 'finance': 'news_finance',
24
+ 'military': 'news_military',
25
+ 'world': 'news_world',
26
+ 'game': 'news_game',
27
+ 'car': 'news_car',
28
+ 'society': 'news_society',
29
+ 'fashion': 'news_fashion',
30
+ 'travel': 'news_travel',
31
+ 'history': 'news_history',
32
+ 'food': 'news_food'
33
+ };
34
+
35
+ const category = categoryMap[args.category] || categoryMap['hot'];
36
+ const maxCount = Math.min(parseInt(args.count) || 20, 50);
37
+ const keyword = args.keyword ? args.keyword.toLowerCase() : null;
38
+
39
+ const resp = await fetch('https://www.toutiao.com/api/pc/feed/?category=' + category + '&max_behot_time=0', {
40
+ credentials: 'include'
41
+ });
42
+
43
+ if (!resp.ok) {
44
+ return {error: 'HTTP ' + resp.status, hint: 'Open www.toutiao.com in browser first'};
45
+ }
46
+
47
+ let data;
48
+ try {
49
+ data = await resp.json();
50
+ } catch (e) {
51
+ return {error: 'Failed to parse response', hint: 'Toutiao API may have changed'};
52
+ }
53
+
54
+ if (!data || !data.data) {
55
+ return {error: 'No data returned', hint: 'Open www.toutiao.com in browser first'};
56
+ }
57
+
58
+ const results = [];
59
+ for (const item of data.data) {
60
+ const title = item.title || '';
61
+ const abstract = item.abstract || '';
62
+ const keywords = item.keywords || '';
63
+ const source = item.source || item.media_name || '';
64
+
65
+ // Filter by keyword if provided
66
+ if (keyword) {
67
+ const searchText = (title + ' ' + abstract + ' ' + keywords).toLowerCase();
68
+ if (!searchText.includes(keyword)) continue;
69
+ }
70
+
71
+ results.push({
72
+ title,
73
+ snippet: abstract.substring(0, 300),
74
+ source,
75
+ time: item.datetime || '',
76
+ url: item.article_url || item.display_url || item.share_url || '',
77
+ tag: item.tag || '',
78
+ hot_value: item.hot || 0,
79
+ comment_count: item.comment_count || 0
80
+ });
81
+
82
+ if (results.length >= maxCount) break;
83
+ }
84
+
85
+ return {
86
+ category: args.category || 'hot',
87
+ keyword: args.keyword || null,
88
+ count: results.length,
89
+ total_fetched: data.data.length,
90
+ results
91
+ };
92
+ }
@@ -1,146 +1,71 @@
1
1
  /* @meta
2
2
  {
3
3
  "name": "toutiao/search",
4
- "description": "今日头条搜索",
5
- "domain": "so.toutiao.com",
4
+ "description": "今日头条热点搜索(从热点新闻中过滤关键词)",
5
+ "domain": "www.toutiao.com",
6
6
  "args": {
7
7
  "query": {"required": true, "description": "搜索关键词"},
8
- "count": {"required": false, "description": "返回结果数量 (默认 10, 最多 20)"}
8
+ "count": {"required": false, "description": "返回结果数量 (默认 10, 最多 50)"}
9
9
  },
10
10
  "readOnly": true,
11
- "example": "ping-browser site toutiao/search AI"
11
+ "example": "bws toutiao/search OpenClaw"
12
12
  }
13
13
  */
14
14
 
15
15
  async function(args) {
16
16
  if (!args.query) return {error: 'Missing argument: query', hint: 'Provide a search keyword'};
17
- const count = Math.min(parseInt(args.count) || 10, 20);
18
-
19
- const url = 'https://so.toutiao.com/search?keyword=' + encodeURIComponent(args.query) + '&pd=information&dvpf=pc';
20
- const resp = await fetch(url, {credentials: 'include'});
21
- if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Open so.toutiao.com in ping-browser first'};
22
-
23
- const html = await resp.text();
24
- const parser = new DOMParser();
25
- const doc = parser.parseFromString(html, 'text/html');
26
-
27
- const results = [];
28
-
29
- // Helper: extract clean article URL from jump redirect chain
30
- function extractArticleUrl(href) {
31
- if (!href) return '';
32
- try {
33
- // Decode nested jump URLs to find the real toutiao article URL
34
- let decoded = href;
35
- for (let i = 0; i < 5; i++) {
36
- const match = decoded.match(/toutiao\.com(?:%2F|\/)+a?(\d{15,})/);
37
- if (match) return 'https://www.toutiao.com/article/' + match[1] + '/';
38
- const groupMatch = decoded.match(/group(?:%2F|\/)(\d{15,})/);
39
- if (groupMatch) return 'https://www.toutiao.com/article/' + groupMatch[1] + '/';
40
- decoded = decodeURIComponent(decoded);
41
- }
42
- } catch (e) {}
43
- return href;
17
+ const maxCount = Math.min(parseInt(args.count) || 10, 50);
18
+ const query = args.query.toLowerCase();
19
+
20
+ // Fetch hot news feed
21
+ const resp = await fetch('https://www.toutiao.com/api/pc/feed/?category=news_hot&max_behot_time=0', {
22
+ credentials: 'include'
23
+ });
24
+
25
+ if (!resp.ok) {
26
+ return {error: 'HTTP ' + resp.status, hint: 'Open www.toutiao.com in browser first'};
44
27
  }
45
28
 
46
- // Strategy 1: SSR HTML uses cs-card containers
47
- const cards = doc.querySelectorAll('.cs-card');
48
- for (const card of cards) {
49
- const titleLink = card.querySelector('a[href*="search/jump"]');
50
- if (!titleLink) continue;
51
-
52
- const title = (titleLink.textContent || '').trim();
53
- if (!title || title.length < 2) continue;
54
- // Skip non-result links like "去西瓜搜" / "去抖音搜"
55
- if (title.includes('去西瓜搜') || title.includes('去抖音搜')) continue;
56
-
57
- const articleUrl = extractArticleUrl(titleLink.getAttribute('href') || '');
58
-
59
- // Extract snippet & source & time from card text
60
- const fullText = (card.textContent || '').trim();
61
- // Remove the title (may appear twice) to get the rest
62
- let rest = fullText;
63
- const titleIdx = rest.indexOf(title);
64
- if (titleIdx >= 0) rest = rest.substring(titleIdx + title.length);
65
- // Remove second occurrence of title if present
66
- const titleIdx2 = rest.indexOf(title);
67
- if (titleIdx2 >= 0) rest = rest.substring(titleIdx2 + title.length);
68
- rest = rest.trim();
69
-
70
- let snippet = '';
71
- let source = '';
72
- let time = '';
73
-
74
- // Remove trailing comment count like "1评论" or "23评论" first
75
- rest = rest.replace(/\d+评论/g, '').trim();
76
-
77
- // Extract time from the tail first
78
- // Time patterns: "3天前", "12小时前", "5分钟前", "前天17:23", "昨天08:00", "2024-01-01"
79
- // The number-based patterns (N天前 etc.) must NOT be preceded by a digit
80
- const timeMatch = rest.match(/((?<=[^\d])|^)(\d{1,2}(?:小时|分钟|天)前|前天[\d:]*|昨天[\d:]*|\d{4}[-/.]\d{2}[-/.]\d{2}.*)$/);
81
- if (timeMatch) {
82
- time = timeMatch[2] ? timeMatch[2].trim() : timeMatch[0].trim();
83
- rest = rest.substring(0, rest.length - timeMatch[0].length).trim();
84
- }
85
-
86
- // Source is the short text at the end (author/media name, typically 2-20 chars)
87
- // Pattern: "...snippet content...SourceName"
88
- const sourceMatch = rest.match(/^([\s\S]+?)([\u4e00-\u9fa5A-Za-z][\u4e00-\u9fa5A-Za-z0-9_\s]{1,19})$/);
89
- if (sourceMatch && sourceMatch[1].length > 10) {
90
- snippet = sourceMatch[1].trim().substring(0, 300);
91
- source = sourceMatch[2].trim();
92
- } else {
93
- snippet = rest.substring(0, 300);
94
- }
95
-
96
- results.push({title, snippet, source, time, url: articleUrl});
97
- if (results.length >= count) break;
29
+ let data;
30
+ try {
31
+ data = await resp.json();
32
+ } catch (e) {
33
+ return {error: 'Failed to parse response', hint: 'Toutiao API may have changed'};
98
34
  }
99
35
 
100
- // Strategy 2: Fallback to finding jump links with article IDs
101
- if (results.length === 0) {
102
- const links = doc.querySelectorAll('a[href*="search/jump"]');
103
- for (const link of links) {
104
- const text = (link.textContent || '').trim();
105
- if (!text || text.length < 4) continue;
106
- // Skip navigation/promo links
107
- if (text.includes('去西瓜搜') || text.includes('去抖音搜') || text.includes('APP')) continue;
108
-
109
- const href = link.getAttribute('href') || '';
110
- // Only include links that point to actual articles
111
- if (!href.match(/toutiao\.com|group|a\d{10,}/)) continue;
112
-
113
- const articleUrl = extractArticleUrl(href);
114
- if (results.some(r => r.title === text)) continue;
115
-
116
- // Try to get snippet from sibling/parent context
117
- let snippet = '';
118
- const container = link.closest('[class*="card"]') || link.parentElement?.parentElement;
119
- if (container) {
120
- const containerText = (container.textContent || '').trim();
121
- const afterTitle = containerText.indexOf(text);
122
- if (afterTitle >= 0) {
123
- const rest = containerText.substring(afterTitle + text.length).trim();
124
- if (rest.length > 10) snippet = rest.substring(0, 300);
125
- }
126
- }
127
-
128
- results.push({title: text, snippet, source: '', time: '', url: articleUrl});
129
- if (results.length >= count) break;
130
- }
36
+ if (!data || !data.data) {
37
+ return {error: 'No data returned', hint: 'Open www.toutiao.com in browser first'};
131
38
  }
132
39
 
133
- if (results.length === 0) {
134
- return {
135
- error: 'No results found',
136
- hint: 'Toutiao may require login or has anti-scraping protection. Try: 1) Open so.toutiao.com in ping-browser first, 2) Log in to toutiao, 3) Use toutiao/hot instead',
137
- query: args.query
138
- };
40
+ // Filter articles by keyword
41
+ const results = [];
42
+ for (const item of data.data) {
43
+ const title = item.title || '';
44
+ const abstract = item.abstract || '';
45
+ const keywords = item.keywords || '';
46
+ const source = item.source || item.media_name || '';
47
+
48
+ // Check if query matches title, abstract, or keywords
49
+ const searchText = (title + ' ' + abstract + ' ' + keywords).toLowerCase();
50
+ if (!searchText.includes(query)) continue;
51
+
52
+ results.push({
53
+ title,
54
+ snippet: abstract.substring(0, 300),
55
+ source,
56
+ time: item.datetime || '',
57
+ url: item.article_url || item.display_url || item.share_url || '',
58
+ hot_value: item.hot || 0,
59
+ comment_count: item.comment_count || 0
60
+ });
61
+
62
+ if (results.length >= maxCount) break;
139
63
  }
140
64
 
141
65
  return {
142
66
  query: args.query,
143
67
  count: results.length,
68
+ total_fetched: data.data.length,
144
69
  results
145
70
  };
146
71
  }
@@ -0,0 +1,56 @@
1
+ /* @meta
2
+ {
3
+ "name": "weibo/hot",
4
+ "description": "微博热搜榜",
5
+ "domain": "weibo.com",
6
+ "args": {
7
+ "count": {"required": false, "description": "返回数量 (默认 30, 最多 50)"}
8
+ },
9
+ "readOnly": true,
10
+ "example": "bws weibo/hot"
11
+ }
12
+ */
13
+
14
+ async function(args) {
15
+ const maxCount = Math.min(parseInt(args.count) || 30, 50);
16
+
17
+ const resp = await fetch('https://weibo.com/ajax/side/hotSearch', {
18
+ credentials: 'include',
19
+ headers: {
20
+ 'Referer': 'https://weibo.com/'
21
+ }
22
+ });
23
+
24
+ if (!resp.ok) {
25
+ return {error: 'HTTP ' + resp.status, hint: 'Open weibo.com in browser first'};
26
+ }
27
+
28
+ let data;
29
+ try {
30
+ data = await resp.json();
31
+ } catch (e) {
32
+ return {error: 'Failed to parse response', hint: 'Weibo API may have changed'};
33
+ }
34
+
35
+ if (!data || !data.data || !data.data.realtime) {
36
+ return {error: 'No data returned', hint: 'Open weibo.com and login first'};
37
+ }
38
+
39
+ const items = data.data.realtime.slice(0, maxCount).map((item, i) => {
40
+ const title = item.word || item.word_scheme || '';
41
+ return {
42
+ rank: i + 1,
43
+ id: item.mid || item.word_scheme || '',
44
+ title: title,
45
+ tag: item.label_name || '',
46
+ hot: item.num || 0,
47
+ url: 'https://s.weibo.com/weibo?q=' + encodeURIComponent(title),
48
+ mobileUrl: 'https://m.weibo.cn/search?containerid=100103type%3D1%26q%3D' + encodeURIComponent(title)
49
+ };
50
+ });
51
+
52
+ return {
53
+ count: items.length,
54
+ items
55
+ };
56
+ }
@@ -1,58 +0,0 @@
1
- /* @meta
2
- {
3
- "name": "douban/comments",
4
- "description": "Get short reviews/comments for a Douban movie or TV show",
5
- "domain": "movie.douban.com",
6
- "args": {
7
- "id": {"required": true, "description": "Douban subject ID (e.g. 1292052)"},
8
- "sort": {"required": false, "description": "Sort order: new_score (default, hot), time (newest first)"},
9
- "count": {"required": false, "description": "Number of comments (default: 20, max: 50)"}
10
- },
11
- "capabilities": ["network"],
12
- "readOnly": true,
13
- "example": "ping-browser site douban/comments 1292052"
14
- }
15
- */
16
-
17
- async function(args) {
18
- if (!args.id) return {error: 'Missing argument: id'};
19
- const id = String(args.id).trim();
20
- const sort = args.sort || 'new_score';
21
- const count = Math.min(parseInt(args.count) || 20, 50);
22
-
23
- if (sort !== 'new_score' && sort !== 'time') {
24
- return {error: 'Invalid sort. Use "new_score" (hot) or "time" (newest)'};
25
- }
26
-
27
- const url = 'https://movie.douban.com/j/subject/' + id + '/comments?start=0&limit=' + count + '&status=P&sort=' + sort;
28
-
29
- const resp = await fetch(url, {credentials: 'include'});
30
- if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Not logged in?'};
31
- const d = await resp.json();
32
-
33
- if (d.retcode !== 1 || !d.result) return {error: 'Failed to fetch comments', hint: 'Invalid ID or not logged in?'};
34
-
35
- const ratingMap = {'1': '很差', '2': '较差', '3': '还行', '4': '推荐', '5': '力荐'};
36
-
37
- const comments = (d.result.normal || []).map(function(c) {
38
- var userId = c.user?.path?.match(/people\/([^/]+)/)?.[1];
39
- return {
40
- id: c.id,
41
- author: c.user?.name || '',
42
- author_id: userId || '',
43
- rating: c.rating ? parseInt(c.rating) : null,
44
- rating_label: c.rating_word || ratingMap[c.rating] || '',
45
- content: c.content || '',
46
- votes: c.votes || 0,
47
- date: c.time || ''
48
- };
49
- });
50
-
51
- return {
52
- subject_id: id,
53
- sort: sort,
54
- total: d.result.total_num || 0,
55
- count: comments.length,
56
- comments: comments
57
- };
58
- }
@@ -1,64 +0,0 @@
1
- /* @meta
2
- {
3
- "name": "douban/movie-hot",
4
- "description": "Get hot/trending movies or TV shows on Douban by tag",
5
- "domain": "movie.douban.com",
6
- "args": {
7
- "type": {"required": false, "description": "Type: movie (default) or tv"},
8
- "tag": {"required": false, "description": "Tag filter (default: 热门). Movies: 热门/最新/豆瓣高分/冷门佳片/华语/欧美/韩国/日本. TV: 热门/国产剧/综艺/美剧/日剧/韩剧/日本动画/纪录片"},
9
- "count": {"required": false, "description": "Number of results (default: 20, max: 50)"}
10
- },
11
- "capabilities": ["network"],
12
- "readOnly": true,
13
- "example": "ping-browser site douban/movie-hot movie 豆瓣高分"
14
- }
15
- */
16
-
17
- async function(args) {
18
- const type = (args.type || 'movie').toLowerCase();
19
- if (type !== 'movie' && type !== 'tv') return {error: 'Invalid type. Use "movie" or "tv"'};
20
-
21
- const tag = args.tag || '热门';
22
- const count = Math.min(parseInt(args.count) || 20, 50);
23
-
24
- const url = 'https://movie.douban.com/j/search_subjects?type=' + type
25
- + '&tag=' + encodeURIComponent(tag)
26
- + '&page_limit=' + count
27
- + '&page_start=0';
28
-
29
- const resp = await fetch(url, {credentials: 'include'});
30
- if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Not logged in?'};
31
- const d = await resp.json();
32
-
33
- if (!d.subjects) return {error: 'No data returned', hint: 'Invalid tag or not logged in?'};
34
-
35
- const items = d.subjects.map(function(s, i) {
36
- return {
37
- rank: i + 1,
38
- id: s.id,
39
- title: s.title,
40
- rating: s.rate ? parseFloat(s.rate) : null,
41
- cover: s.cover,
42
- url: s.url,
43
- playable: s.playable,
44
- is_new: s.is_new,
45
- episodes_info: s.episodes_info || null
46
- };
47
- });
48
-
49
- // Also fetch available tags for reference
50
- var tagsResp = await fetch('https://movie.douban.com/j/search_tags?type=' + type + '&source=index', {credentials: 'include'});
51
- var availableTags = [];
52
- if (tagsResp.ok) {
53
- var tagsData = await tagsResp.json();
54
- availableTags = tagsData.tags || [];
55
- }
56
-
57
- return {
58
- type: type,
59
- tag: tag,
60
- count: items.length,
61
- available_tags: availableTags,
62
- items: items
63
- };
64
- }
@@ -1,65 +0,0 @@
1
- /* @meta
2
- {
3
- "name": "douban/movie-top",
4
- "description": "Get top rated movies by genre from Douban charts",
5
- "domain": "movie.douban.com",
6
- "args": {
7
- "genre": {"required": false, "description": "Genre (default: 剧情). Options: 剧情/喜剧/动作/爱情/科幻/动画/悬疑/惊悚/恐怖/纪录片/短片/情色/音乐/歌舞/家庭/儿童/传记/历史/战争/犯罪/西部/奇幻/冒险/灾难/武侠/古装/运动/黑色电影"},
8
- "count": {"required": false, "description": "Number of results (default: 20, max: 50)"}
9
- },
10
- "capabilities": ["network"],
11
- "readOnly": true,
12
- "example": "ping-browser site douban/movie-top 科幻 10"
13
- }
14
- */
15
-
16
- async function(args) {
17
- // Genre name to type ID mapping
18
- const genreMap = {
19
- '剧情': 11, '喜剧': 24, '动作': 5, '爱情': 13, '科幻': 17,
20
- '动画': 25, '悬疑': 10, '惊悚': 19, '恐怖': 20, '纪录片': 1,
21
- '短片': 23, '情色': 6, '音乐': 14, '歌舞': 7, '家庭': 28,
22
- '儿童': 8, '传记': 2, '历史': 4, '战争': 22, '犯罪': 3,
23
- '西部': 27, '奇幻': 26, '冒险': 15, '灾难': 12, '武侠': 29,
24
- '古装': 30, '运动': 18, '黑色电影': 31
25
- };
26
-
27
- const genre = args.genre || '剧情';
28
- const typeId = genreMap[genre];
29
- if (!typeId) return {error: 'Unknown genre: ' + genre, hint: 'Available: ' + Object.keys(genreMap).join(', ')};
30
-
31
- const count = Math.min(parseInt(args.count) || 20, 50);
32
-
33
- const url = 'https://movie.douban.com/j/chart/top_list?type=' + typeId
34
- + '&interval_id=100%3A90&action=&start=0&limit=' + count;
35
-
36
- const resp = await fetch(url, {credentials: 'include'});
37
- if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Not logged in?'};
38
- const data = await resp.json();
39
-
40
- if (!Array.isArray(data)) return {error: 'Unexpected response format'};
41
-
42
- const items = data.map(function(s) {
43
- return {
44
- rank: s.rank,
45
- id: s.id,
46
- title: s.title,
47
- rating: parseFloat(s.score) || null,
48
- votes: s.vote_count,
49
- types: s.types,
50
- regions: s.regions,
51
- actors: (s.actors || []).slice(0, 5),
52
- release_date: s.release_date,
53
- cover: s.cover_url,
54
- url: s.url,
55
- playable: s.is_playable
56
- };
57
- });
58
-
59
- return {
60
- genre: genre,
61
- count: items.length,
62
- available_genres: Object.keys(genreMap),
63
- items: items
64
- };
65
- }
@@ -1,117 +0,0 @@
1
- /* @meta
2
- {
3
- "name": "douban/movie",
4
- "description": "Get detailed movie/TV info with rating, cast, and hot reviews from Douban",
5
- "domain": "movie.douban.com",
6
- "args": {
7
- "id": {"required": true, "description": "Douban subject ID (e.g. 1292052 for The Shawshank Redemption)"}
8
- },
9
- "capabilities": ["network"],
10
- "readOnly": true,
11
- "example": "ping-browser site douban/movie 1292052"
12
- }
13
- */
14
-
15
- async function(args) {
16
- if (!args.id) return {error: 'Missing argument: id'};
17
- const id = String(args.id).trim();
18
-
19
- // Fetch structured data from the JSON API
20
- const apiResp = await fetch('https://movie.douban.com/j/subject_abstract?subject_id=' + id, {credentials: 'include'});
21
- if (!apiResp.ok) return {error: 'HTTP ' + apiResp.status, hint: 'Not logged in or invalid ID?'};
22
- const apiData = await apiResp.json();
23
- if (apiData.r !== 0 || !apiData.subject) return {error: 'Subject not found', hint: 'Check the ID'};
24
-
25
- const s = apiData.subject;
26
-
27
- // Also fetch the HTML page for richer data (summary, rating distribution, hot comments)
28
- const pageResp = await fetch('https://movie.douban.com/subject/' + id + '/', {credentials: 'include'});
29
- let summary = '', ratingDist = {}, hotComments = [], recommendations = [], votes = null, info = '';
30
-
31
- if (pageResp.ok) {
32
- const html = await pageResp.text();
33
- const doc = new DOMParser().parseFromString(html, 'text/html');
34
-
35
- // Summary
36
- const summaryEl = doc.querySelector('[property="v:summary"]');
37
- summary = summaryEl ? summaryEl.textContent.trim() : '';
38
-
39
- // Vote count
40
- const votesEl = doc.querySelector('[property="v:votes"]');
41
- votes = votesEl ? parseInt(votesEl.textContent) : null;
42
-
43
- // Info block
44
- const infoEl = doc.querySelector('#info');
45
- info = infoEl ? infoEl.innerText || infoEl.textContent.trim() : '';
46
-
47
- // Rating distribution
48
- doc.querySelectorAll('.ratings-on-weight .item').forEach(function(el) {
49
- var star = el.querySelector('span:first-child');
50
- var pct = el.querySelector('.rating_per');
51
- if (star && pct) ratingDist[star.textContent.trim()] = pct.textContent.trim();
52
- });
53
-
54
- // Hot comments
55
- doc.querySelectorAll('#hot-comments .comment-item').forEach(function(el) {
56
- var author = el.querySelector('.comment-info a');
57
- var rating = el.querySelector('.comment-info .rating');
58
- var content = el.querySelector('.short');
59
- var voteCount = el.querySelector('.vote-count');
60
- var date = el.querySelector('.comment-time');
61
- hotComments.push({
62
- author: author ? author.textContent.trim() : '',
63
- rating: rating ? rating.title : '',
64
- content: content ? content.textContent.trim() : '',
65
- votes: voteCount ? parseInt(voteCount.textContent) || 0 : 0,
66
- date: date ? date.textContent.trim() : ''
67
- });
68
- });
69
-
70
- // Recommendations
71
- doc.querySelectorAll('.recommendations-bd dl').forEach(function(dl) {
72
- var a = dl.querySelector('dd a');
73
- if (a) {
74
- var recId = a.href?.match(/subject\/(\d+)/)?.[1];
75
- recommendations.push({title: a.textContent.trim(), id: recId, url: a.href});
76
- }
77
- });
78
- }
79
-
80
- // Parse info block for structured fields
81
- const parseInfo = function(text) {
82
- const result = {};
83
- const lines = text.split('\n').map(function(l) { return l.trim(); }).filter(Boolean);
84
- lines.forEach(function(line) {
85
- var m = line.match(/^(.+?):\s*(.+)$/);
86
- if (m) result[m[1].trim()] = m[2].trim();
87
- });
88
- return result;
89
- };
90
- const infoFields = parseInfo(info);
91
-
92
- return {
93
- id: s.id,
94
- title: s.title,
95
- subtype: s.subtype,
96
- is_tv: s.is_tv,
97
- rating: parseFloat(s.rate) || null,
98
- votes: votes,
99
- rating_distribution: ratingDist,
100
- directors: s.directors,
101
- actors: s.actors,
102
- types: s.types,
103
- region: s.region,
104
- duration: s.duration,
105
- release_year: s.release_year,
106
- episodes_count: s.episodes_count || null,
107
- imdb: infoFields['IMDb'] || null,
108
- alias: infoFields['又名'] || null,
109
- language: infoFields['语言'] || null,
110
- release_date: infoFields['上映日期'] || infoFields['首播'] || null,
111
- summary: summary,
112
- playable: s.playable,
113
- url: s.url,
114
- hot_comments: hotComments,
115
- recommendations: recommendations
116
- };
117
- }