v2er-insight 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/cli/commands/ai.d.ts +13 -0
- package/dist/cli/commands/ai.js +153 -0
- package/dist/cli/commands/analyze.d.ts +13 -0
- package/dist/cli/commands/analyze.js +80 -0
- package/dist/cli/commands/config.d.ts +43 -0
- package/dist/cli/commands/config.js +267 -0
- package/dist/cli/commands/fetch.d.ts +13 -0
- package/dist/cli/commands/fetch.js +150 -0
- package/dist/cli/commands/index.d.ts +10 -0
- package/dist/cli/commands/index.js +22 -0
- package/dist/cli/commands/run.d.ts +23 -0
- package/dist/cli/commands/run.js +52 -0
- package/dist/cli/commands/show.d.ts +13 -0
- package/dist/cli/commands/show.js +154 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +107 -0
- package/dist/cli/types.d.ts +58 -0
- package/dist/cli/types.js +6 -0
- package/dist/cli/utils/error.d.ts +6 -0
- package/dist/cli/utils/error.js +18 -0
- package/dist/cli/utils.d.ts +20 -0
- package/dist/cli/utils.js +48 -0
- package/dist/cli/workflow/orchestrator.d.ts +15 -0
- package/dist/cli/workflow/orchestrator.js +144 -0
- package/dist/cli/workflow/recovery.d.ts +10 -0
- package/dist/cli/workflow/recovery.js +134 -0
- package/dist/cli/workflow/state.d.ts +19 -0
- package/dist/cli/workflow/state.js +45 -0
- package/dist/cli/workflow/types.d.ts +60 -0
- package/dist/cli/workflow/types.js +3 -0
- package/dist/config/defaults.d.ts +48 -0
- package/dist/config/defaults.js +42 -0
- package/dist/config/index.d.ts +16 -0
- package/dist/config/index.js +21 -0
- package/dist/config/path.d.ts +11 -0
- package/dist/config/path.js +28 -0
- package/dist/config/proxy.d.ts +16 -0
- package/dist/config/proxy.js +39 -0
- package/dist/config/storage.d.ts +23 -0
- package/dist/config/storage.js +85 -0
- package/dist/config/types/ai.d.ts +31 -0
- package/dist/config/types/ai.js +13 -0
- package/dist/config/types/analyzer.d.ts +15 -0
- package/dist/config/types/analyzer.js +6 -0
- package/dist/config/types/data.d.ts +20 -0
- package/dist/config/types/data.js +6 -0
- package/dist/config/types/fetch.d.ts +9 -0
- package/dist/config/types/fetch.js +6 -0
- package/dist/config/types/index.d.ts +32 -0
- package/dist/config/types/index.js +11 -0
- package/dist/config/types/log.d.ts +11 -0
- package/dist/config/types/log.js +6 -0
- package/dist/core/ai/index.d.ts +11 -0
- package/dist/core/ai/index.js +18 -0
- package/dist/core/ai/parser/index.d.ts +12 -0
- package/dist/core/ai/parser/index.js +44 -0
- package/dist/core/ai/parser/validator.d.ts +18 -0
- package/dist/core/ai/parser/validator.js +179 -0
- package/dist/core/ai/prompt/index.d.ts +20 -0
- package/dist/core/ai/prompt/index.js +75 -0
- package/dist/core/ai/prompt/system-prompt.md +210 -0
- package/dist/core/ai/providers/gemini.d.ts +25 -0
- package/dist/core/ai/providers/gemini.js +74 -0
- package/dist/core/ai/providers/index.d.ts +6 -0
- package/dist/core/ai/providers/index.js +9 -0
- package/dist/core/ai/types/index.d.ts +7 -0
- package/dist/core/ai/types/index.js +6 -0
- package/dist/core/ai/types/options.d.ts +14 -0
- package/dist/core/ai/types/options.js +6 -0
- package/dist/core/ai/types/provider.d.ts +19 -0
- package/dist/core/ai/types/provider.js +6 -0
- package/dist/core/ai/types/result.d.ts +64 -0
- package/dist/core/ai/types/result.js +6 -0
- package/dist/core/ai/utils/api-key.d.ts +15 -0
- package/dist/core/ai/utils/api-key.js +36 -0
- package/dist/core/ai/utils/index.d.ts +6 -0
- package/dist/core/ai/utils/index.js +11 -0
- package/dist/core/ai/utils/retry.d.ts +15 -0
- package/dist/core/ai/utils/retry.js +37 -0
- package/dist/core/analyzer/builder.d.ts +23 -0
- package/dist/core/analyzer/builder.js +113 -0
- package/dist/core/analyzer/content/chunker.d.ts +18 -0
- package/dist/core/analyzer/content/chunker.js +74 -0
- package/dist/core/analyzer/content/index.d.ts +7 -0
- package/dist/core/analyzer/content/index.js +13 -0
- package/dist/core/analyzer/content/transformer.d.ts +19 -0
- package/dist/core/analyzer/content/transformer.js +33 -0
- package/dist/core/analyzer/index.d.ts +17 -0
- package/dist/core/analyzer/index.js +21 -0
- package/dist/core/analyzer/periods/detector.d.ts +17 -0
- package/dist/core/analyzer/periods/detector.js +36 -0
- package/dist/core/analyzer/periods/index.d.ts +6 -0
- package/dist/core/analyzer/periods/index.js +11 -0
- package/dist/core/analyzer/periods/splitter.d.ts +11 -0
- package/dist/core/analyzer/periods/splitter.js +35 -0
- package/dist/core/analyzer/stats/index.d.ts +7 -0
- package/dist/core/analyzer/stats/index.js +13 -0
- package/dist/core/analyzer/stats/reply-stats.d.ts +15 -0
- package/dist/core/analyzer/stats/reply-stats.js +45 -0
- package/dist/core/analyzer/stats/topic-stats.d.ts +16 -0
- package/dist/core/analyzer/stats/topic-stats.js +51 -0
- package/dist/core/analyzer/stats/user-overview.d.ts +9 -0
- package/dist/core/analyzer/stats/user-overview.js +52 -0
- package/dist/core/analyzer/types/index.d.ts +7 -0
- package/dist/core/analyzer/types/index.js +6 -0
- package/dist/core/analyzer/types/input.d.ts +13 -0
- package/dist/core/analyzer/types/input.js +6 -0
- package/dist/core/analyzer/types/internal.d.ts +28 -0
- package/dist/core/analyzer/types/internal.js +6 -0
- package/dist/core/analyzer/types/output.d.ts +68 -0
- package/dist/core/analyzer/types/output.js +6 -0
- package/dist/core/analyzer/utils/date-parser.d.ts +41 -0
- package/dist/core/analyzer/utils/date-parser.js +118 -0
- package/dist/core/analyzer/utils/index.d.ts +6 -0
- package/dist/core/analyzer/utils/index.js +18 -0
- package/dist/core/analyzer/utils/stats.d.ts +12 -0
- package/dist/core/analyzer/utils/stats.js +64 -0
- package/dist/core/v2ex/index.d.ts +10 -0
- package/dist/core/v2ex/index.js +27 -0
- package/dist/core/v2ex/parsers/index.d.ts +8 -0
- package/dist/core/v2ex/parsers/index.js +15 -0
- package/dist/core/v2ex/parsers/replies-page.d.ts +11 -0
- package/dist/core/v2ex/parsers/replies-page.js +114 -0
- package/dist/core/v2ex/parsers/selectors/index.d.ts +10 -0
- package/dist/core/v2ex/parsers/selectors/index.js +18 -0
- package/dist/core/v2ex/parsers/selectors/pagination.d.ts +11 -0
- package/dist/core/v2ex/parsers/selectors/pagination.js +14 -0
- package/dist/core/v2ex/parsers/selectors/replies-page.d.ts +21 -0
- package/dist/core/v2ex/parsers/selectors/replies-page.js +24 -0
- package/dist/core/v2ex/parsers/selectors/topic-detail.d.ts +19 -0
- package/dist/core/v2ex/parsers/selectors/topic-detail.js +22 -0
- package/dist/core/v2ex/parsers/selectors/topics-list-page.d.ts +11 -0
- package/dist/core/v2ex/parsers/selectors/topics-list-page.js +14 -0
- package/dist/core/v2ex/parsers/selectors/user-profile.d.ts +11 -0
- package/dist/core/v2ex/parsers/selectors/user-profile.js +14 -0
- package/dist/core/v2ex/parsers/topic-detail.d.ts +11 -0
- package/dist/core/v2ex/parsers/topic-detail.js +94 -0
- package/dist/core/v2ex/parsers/topics-list-page.d.ts +11 -0
- package/dist/core/v2ex/parsers/topics-list-page.js +90 -0
- package/dist/core/v2ex/parsers/user-profile.d.ts +11 -0
- package/dist/core/v2ex/parsers/user-profile.js +70 -0
- package/dist/core/v2ex/parsers/utils/index.d.ts +6 -0
- package/dist/core/v2ex/parsers/utils/index.js +9 -0
- package/dist/core/v2ex/parsers/utils/pagination.d.ts +19 -0
- package/dist/core/v2ex/parsers/utils/pagination.js +29 -0
- package/dist/core/v2ex/types/entities.d.ts +45 -0
- package/dist/core/v2ex/types/entities.js +7 -0
- package/dist/core/v2ex/types/index.d.ts +6 -0
- package/dist/core/v2ex/types/index.js +6 -0
- package/dist/core/v2ex/types/parse-result.d.ts +64 -0
- package/dist/core/v2ex/types/parse-result.js +7 -0
- package/dist/core/v2ex/urls/constants.d.ts +5 -0
- package/dist/core/v2ex/urls/constants.js +8 -0
- package/dist/core/v2ex/urls/index.d.ts +7 -0
- package/dist/core/v2ex/urls/index.js +16 -0
- package/dist/core/v2ex/urls/topic-urls.d.ts +19 -0
- package/dist/core/v2ex/urls/topic-urls.js +48 -0
- package/dist/core/v2ex/urls/user-urls.d.ts +24 -0
- package/dist/core/v2ex/urls/user-urls.js +36 -0
- package/dist/core/v2ex/use-cases/index.d.ts +8 -0
- package/dist/core/v2ex/use-cases/index.js +14 -0
- package/dist/core/v2ex/use-cases/types.d.ts +31 -0
- package/dist/core/v2ex/use-cases/types.js +7 -0
- package/dist/core/v2ex/use-cases/user/index.d.ts +10 -0
- package/dist/core/v2ex/use-cases/user/index.js +16 -0
- package/dist/core/v2ex/use-cases/user/profile.d.ts +14 -0
- package/dist/core/v2ex/use-cases/user/profile.js +51 -0
- package/dist/core/v2ex/use-cases/user/replies.d.ts +14 -0
- package/dist/core/v2ex/use-cases/user/replies.js +20 -0
- package/dist/core/v2ex/use-cases/user/topic-urls.d.ts +21 -0
- package/dist/core/v2ex/use-cases/user/topic-urls.js +29 -0
- package/dist/core/v2ex/use-cases/user/topics-detail.d.ts +30 -0
- package/dist/core/v2ex/use-cases/user/topics-detail.js +62 -0
- package/dist/core/v2ex/use-cases/utils/index.d.ts +6 -0
- package/dist/core/v2ex/use-cases/utils/index.js +9 -0
- package/dist/core/v2ex/use-cases/utils/page-orchestrator.d.ts +24 -0
- package/dist/core/v2ex/use-cases/utils/page-orchestrator.js +93 -0
- package/dist/infra/fetcher/agent.d.ts +10 -0
- package/dist/infra/fetcher/agent.js +17 -0
- package/dist/infra/fetcher/fetcher.d.ts +10 -0
- package/dist/infra/fetcher/fetcher.js +81 -0
- package/dist/infra/fetcher/index.d.ts +3 -0
- package/dist/infra/fetcher/index.js +19 -0
- package/dist/infra/fetcher/types.d.ts +29 -0
- package/dist/infra/fetcher/types.js +6 -0
- package/dist/infra/logger/colors.d.ts +15 -0
- package/dist/infra/logger/colors.js +18 -0
- package/dist/infra/logger/index.d.ts +16 -0
- package/dist/infra/logger/index.js +19 -0
- package/dist/infra/logger/logger.d.ts +34 -0
- package/dist/infra/logger/logger.js +101 -0
- package/dist/infra/storage/cleaner.d.ts +24 -0
- package/dist/infra/storage/cleaner.js +73 -0
- package/dist/infra/storage/index.d.ts +7 -0
- package/dist/infra/storage/index.js +15 -0
- package/dist/infra/storage/paths.d.ts +26 -0
- package/dist/infra/storage/paths.js +53 -0
- package/dist/infra/storage/reader.d.ts +15 -0
- package/dist/infra/storage/reader.js +34 -0
- package/dist/infra/storage/types.d.ts +21 -0
- package/dist/infra/storage/types.js +18 -0
- package/dist/infra/storage/writer.d.ts +16 -0
- package/dist/infra/storage/writer.js +31 -0
- package/package.json +89 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 分页选择器
|
|
4
|
+
* 用于解析分页信息
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.PAGINATION_SELECTORS = void 0;
|
|
8
|
+
exports.PAGINATION_SELECTORS = {
|
|
9
|
+
/** 当前页码 */
|
|
10
|
+
currentPage: 'a.page_current',
|
|
11
|
+
/** 其他页码链接 */
|
|
12
|
+
pageLinks: 'a.page_normal',
|
|
13
|
+
};
|
|
14
|
+
//# sourceMappingURL=pagination.js.map
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 回复列表页选择器
|
|
3
|
+
* 页面:/member/{username}/replies
|
|
4
|
+
*/
|
|
5
|
+
export declare const REPLIES_PAGE_SELECTORS: {
|
|
6
|
+
/** 回复总数容器 */
|
|
7
|
+
readonly totalRepliesContainer: ".header small.gray";
|
|
8
|
+
/** 回复项容器 */
|
|
9
|
+
readonly replyItem: ".dock_area";
|
|
10
|
+
/** 回复内容 */
|
|
11
|
+
readonly replyContent: ".reply_content";
|
|
12
|
+
/** 回复时间 */
|
|
13
|
+
readonly replyTime: "span.fade";
|
|
14
|
+
/** 帖子链接 */
|
|
15
|
+
readonly topicLink: "a[href^=\"/t/\"]";
|
|
16
|
+
/** 节点链接 */
|
|
17
|
+
readonly nodeLink: "a[href^=\"/go/\"]";
|
|
18
|
+
/** 用户链接 */
|
|
19
|
+
readonly memberLink: "a[href^=\"/member/\"]";
|
|
20
|
+
};
|
|
21
|
+
//# sourceMappingURL=replies-page.d.ts.map
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 回复列表页选择器
|
|
4
|
+
* 页面:/member/{username}/replies
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.REPLIES_PAGE_SELECTORS = void 0;
|
|
8
|
+
exports.REPLIES_PAGE_SELECTORS = {
|
|
9
|
+
/** 回复总数容器 */
|
|
10
|
+
totalRepliesContainer: '.header small.gray',
|
|
11
|
+
/** 回复项容器 */
|
|
12
|
+
replyItem: '.dock_area',
|
|
13
|
+
/** 回复内容 */
|
|
14
|
+
replyContent: '.reply_content',
|
|
15
|
+
/** 回复时间 */
|
|
16
|
+
replyTime: 'span.fade',
|
|
17
|
+
/** 帖子链接 */
|
|
18
|
+
topicLink: 'a[href^="/t/"]',
|
|
19
|
+
/** 节点链接 */
|
|
20
|
+
nodeLink: 'a[href^="/go/"]',
|
|
21
|
+
/** 用户链接 */
|
|
22
|
+
memberLink: 'a[href^="/member/"]',
|
|
23
|
+
};
|
|
24
|
+
//# sourceMappingURL=replies-page.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 帖子详情页选择器
|
|
3
|
+
* 页面:/t/{topic_id}
|
|
4
|
+
*/
|
|
5
|
+
export declare const TOPIC_DETAIL_SELECTORS: {
|
|
6
|
+
/** 帖子标题 */
|
|
7
|
+
readonly title: ".header h1";
|
|
8
|
+
/** 节点链接 */
|
|
9
|
+
readonly nodeLink: ".header a[href^=\"/go/\"]";
|
|
10
|
+
/** 发布时间(带 title 属性的 span) */
|
|
11
|
+
readonly createdAt: ".header small.gray span[title]";
|
|
12
|
+
/** 帖子内容 */
|
|
13
|
+
readonly content: ".topic_content";
|
|
14
|
+
/** 头部灰色文本(包含点击次数) */
|
|
15
|
+
readonly headerGray: ".header small.gray";
|
|
16
|
+
/** 回复信息(包含回复数和最后回复时间) */
|
|
17
|
+
readonly replyInfo: "span.gray";
|
|
18
|
+
};
|
|
19
|
+
//# sourceMappingURL=topic-detail.d.ts.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 帖子详情页选择器
|
|
4
|
+
* 页面:/t/{topic_id}
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.TOPIC_DETAIL_SELECTORS = void 0;
|
|
8
|
+
exports.TOPIC_DETAIL_SELECTORS = {
|
|
9
|
+
/** 帖子标题 */
|
|
10
|
+
title: '.header h1',
|
|
11
|
+
/** 节点链接 */
|
|
12
|
+
nodeLink: '.header a[href^="/go/"]',
|
|
13
|
+
/** 发布时间(带 title 属性的 span) */
|
|
14
|
+
createdAt: '.header small.gray span[title]',
|
|
15
|
+
/** 帖子内容 */
|
|
16
|
+
content: '.topic_content',
|
|
17
|
+
/** 头部灰色文本(包含点击次数) */
|
|
18
|
+
headerGray: '.header small.gray',
|
|
19
|
+
/** 回复信息(包含回复数和最后回复时间) */
|
|
20
|
+
replyInfo: 'span.gray',
|
|
21
|
+
};
|
|
22
|
+
//# sourceMappingURL=topic-detail.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 帖子列表页选择器
|
|
3
|
+
* 页面:/member/{username}/topics
|
|
4
|
+
*/
|
|
5
|
+
export declare const TOPICS_LIST_PAGE_SELECTORS: {
|
|
6
|
+
/** 隐藏提示文本 */
|
|
7
|
+
readonly hiddenIndicator: ".gray";
|
|
8
|
+
/** 帖子链接 */
|
|
9
|
+
readonly topicLink: "a[href^=\"/t/\"]";
|
|
10
|
+
};
|
|
11
|
+
//# sourceMappingURL=topics-list-page.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 帖子列表页选择器
|
|
4
|
+
* 页面:/member/{username}/topics
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.TOPICS_LIST_PAGE_SELECTORS = void 0;
|
|
8
|
+
exports.TOPICS_LIST_PAGE_SELECTORS = {
|
|
9
|
+
/** 隐藏提示文本 */
|
|
10
|
+
hiddenIndicator: '.gray',
|
|
11
|
+
/** 帖子链接 */
|
|
12
|
+
topicLink: 'a[href^="/t/"]',
|
|
13
|
+
};
|
|
14
|
+
//# sourceMappingURL=topics-list-page.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 用户主页选择器
|
|
3
|
+
* 页面:/member/{username}
|
|
4
|
+
*/
|
|
5
|
+
export declare const USER_PROFILE_SELECTORS: {
|
|
6
|
+
/** 今日活跃度排名链接 */
|
|
7
|
+
readonly dailyRanking: "a[href=\"/top/dau\"]";
|
|
8
|
+
/** 灰色文本(包含加入时间) */
|
|
9
|
+
readonly grayText: ".gray";
|
|
10
|
+
};
|
|
11
|
+
//# sourceMappingURL=user-profile.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 用户主页选择器
|
|
4
|
+
* 页面:/member/{username}
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.USER_PROFILE_SELECTORS = void 0;
|
|
8
|
+
exports.USER_PROFILE_SELECTORS = {
|
|
9
|
+
/** 今日活跃度排名链接 */
|
|
10
|
+
dailyRanking: 'a[href="/top/dau"]',
|
|
11
|
+
/** 灰色文本(包含加入时间) */
|
|
12
|
+
grayText: '.gray',
|
|
13
|
+
};
|
|
14
|
+
//# sourceMappingURL=user-profile.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 单个帖子页面解析器
|
|
3
|
+
*/
|
|
4
|
+
import type { TopicDetailParseResult } from '../types/parse-result';
|
|
5
|
+
/**
|
|
6
|
+
* 解析单个帖子页面
|
|
7
|
+
* @param html - 页面 HTML
|
|
8
|
+
* @returns 帖子详情解析结果
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseTopicDetail(html: string): TopicDetailParseResult;
|
|
11
|
+
//# sourceMappingURL=topic-detail.d.ts.map
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 单个帖子页面解析器
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.parseTopicDetail = parseTopicDetail;
|
|
40
|
+
const cheerio = __importStar(require("cheerio"));
|
|
41
|
+
const selectors_1 = require("./selectors");
|
|
42
|
+
const { title: TITLE_SELECTOR, nodeLink: NODE_LINK, createdAt: CREATED_AT, content: CONTENT, headerGray: HEADER_GRAY, replyInfo: REPLY_INFO, } = selectors_1.TOPIC_DETAIL_SELECTORS;
|
|
43
|
+
/**
|
|
44
|
+
* 解析单个帖子页面
|
|
45
|
+
* @param html - 页面 HTML
|
|
46
|
+
* @returns 帖子详情解析结果
|
|
47
|
+
*/
|
|
48
|
+
function parseTopicDetail(html) {
|
|
49
|
+
const $ = cheerio.load(html);
|
|
50
|
+
// 主题标题
|
|
51
|
+
const title = $(TITLE_SELECTOR).text().trim();
|
|
52
|
+
// 节点名称
|
|
53
|
+
const nodeLink = $(NODE_LINK);
|
|
54
|
+
const nodeName = nodeLink.text().trim();
|
|
55
|
+
// 发布时间(从 title 属性获取绝对时间)
|
|
56
|
+
const timeSpan = $(CREATED_AT);
|
|
57
|
+
const createdAt = timeSpan.attr('title') ?? timeSpan.text().trim();
|
|
58
|
+
// 主题内容
|
|
59
|
+
const content = $(CONTENT).text().trim();
|
|
60
|
+
// 点击次数
|
|
61
|
+
let clickCount = 0;
|
|
62
|
+
const headerGray = $(HEADER_GRAY).text();
|
|
63
|
+
const clickMatch = headerGray.match(/(\d+)\s*次点击/);
|
|
64
|
+
if (clickMatch?.[1]) {
|
|
65
|
+
clickCount = parseInt(clickMatch[1], 10);
|
|
66
|
+
}
|
|
67
|
+
// 回复总数和最后回复时间
|
|
68
|
+
let replyCount = 0;
|
|
69
|
+
let lastReplyTime = null;
|
|
70
|
+
$(REPLY_INFO).each((_, el) => {
|
|
71
|
+
const text = $(el).text();
|
|
72
|
+
if (text.includes('条回复')) {
|
|
73
|
+
const parts = text.split('•');
|
|
74
|
+
const countMatch = parts[0]?.match(/(\d+)/);
|
|
75
|
+
if (countMatch?.[1]) {
|
|
76
|
+
replyCount = parseInt(countMatch[1], 10);
|
|
77
|
+
}
|
|
78
|
+
if (parts.length > 1 && parts[1]) {
|
|
79
|
+
lastReplyTime = parts[1].trim();
|
|
80
|
+
}
|
|
81
|
+
return false; // 找到后提前退出循环
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
return {
|
|
85
|
+
title,
|
|
86
|
+
nodeName,
|
|
87
|
+
createdAt,
|
|
88
|
+
content,
|
|
89
|
+
replyCount,
|
|
90
|
+
lastReplyTime,
|
|
91
|
+
clickCount,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
//# sourceMappingURL=topic-detail.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 发帖列表页解析器
|
|
3
|
+
*/
|
|
4
|
+
import type { TopicsPageParseResult } from '../types/parse-result';
|
|
5
|
+
/**
|
|
6
|
+
* 解析发帖列表页
|
|
7
|
+
* @param html - 页面 HTML
|
|
8
|
+
* @returns 发帖列表解析结果
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseTopicsListPage(html: string): TopicsPageParseResult;
|
|
11
|
+
//# sourceMappingURL=topics-list-page.d.ts.map
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 发帖列表页解析器
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.parseTopicsListPage = parseTopicsListPage;
|
|
40
|
+
const cheerio = __importStar(require("cheerio"));
|
|
41
|
+
const utils_1 = require("./utils");
|
|
42
|
+
const selectors_1 = require("./selectors");
|
|
43
|
+
const { hiddenIndicator: HIDDEN_INDICATOR, topicLink: TOPIC_LINK } = selectors_1.TOPICS_LIST_PAGE_SELECTORS;
|
|
44
|
+
/**
|
|
45
|
+
* 解析发帖列表页
|
|
46
|
+
* @param html - 页面 HTML
|
|
47
|
+
* @returns 发帖列表解析结果
|
|
48
|
+
*/
|
|
49
|
+
function parseTopicsListPage(html) {
|
|
50
|
+
const $ = cheerio.load(html);
|
|
51
|
+
// 检测是否隐藏
|
|
52
|
+
let isHidden = false;
|
|
53
|
+
$(HIDDEN_INDICATOR).each((_, el) => {
|
|
54
|
+
if ($(el).text().includes('主题列表被隐藏')) {
|
|
55
|
+
isHidden = true;
|
|
56
|
+
return false; // 提前退出循环
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
// 如果隐藏,返回空列表
|
|
60
|
+
if (isHidden) {
|
|
61
|
+
return {
|
|
62
|
+
isHidden: true,
|
|
63
|
+
topicUrls: [],
|
|
64
|
+
currentPage: 1,
|
|
65
|
+
totalPages: 1,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// 提取主题 URL(使用 Set 去重,O(n) 复杂度)
|
|
69
|
+
const topicUrlSet = new Set();
|
|
70
|
+
$(TOPIC_LINK).each((_, el) => {
|
|
71
|
+
const href = $(el).attr('href');
|
|
72
|
+
if (href) {
|
|
73
|
+
// 提取纯 URL(去掉 #reply 部分)
|
|
74
|
+
const cleanUrl = href.split('#')[0] ?? '';
|
|
75
|
+
if (cleanUrl) {
|
|
76
|
+
topicUrlSet.add(cleanUrl);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
const topicUrls = Array.from(topicUrlSet);
|
|
81
|
+
// 分页信息
|
|
82
|
+
const { currentPage, totalPages } = (0, utils_1.parsePagination)($);
|
|
83
|
+
return {
|
|
84
|
+
isHidden: false,
|
|
85
|
+
topicUrls,
|
|
86
|
+
currentPage,
|
|
87
|
+
totalPages,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=topics-list-page.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 用户主页解析器
|
|
3
|
+
*/
|
|
4
|
+
import type { UserProfileParseResult } from '../types/parse-result';
|
|
5
|
+
/**
|
|
6
|
+
* 解析用户主页
|
|
7
|
+
* @param html - 页面 HTML
|
|
8
|
+
* @returns 用户主页解析结果
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseUserProfile(html: string): UserProfileParseResult;
|
|
11
|
+
//# sourceMappingURL=user-profile.d.ts.map
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 用户主页解析器
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.parseUserProfile = parseUserProfile;
|
|
40
|
+
const cheerio = __importStar(require("cheerio"));
|
|
41
|
+
const selectors_1 = require("./selectors");
|
|
42
|
+
const { dailyRanking: DAU_SELECTOR, grayText: GRAY_SELECTOR } = selectors_1.USER_PROFILE_SELECTORS;
|
|
43
|
+
/**
|
|
44
|
+
* 解析用户主页
|
|
45
|
+
* @param html - 页面 HTML
|
|
46
|
+
* @returns 用户主页解析结果
|
|
47
|
+
*/
|
|
48
|
+
function parseUserProfile(html) {
|
|
49
|
+
const $ = cheerio.load(html);
|
|
50
|
+
// 今日活跃度排名
|
|
51
|
+
const dauLink = $(DAU_SELECTOR);
|
|
52
|
+
const dailyRanking = dauLink.length > 0 ? parseInt(dauLink.text().trim(), 10) : null;
|
|
53
|
+
// 加入时间
|
|
54
|
+
let joinDate = '';
|
|
55
|
+
$(GRAY_SELECTOR).each((_, el) => {
|
|
56
|
+
const text = $(el).text();
|
|
57
|
+
if (text.includes('加入于')) {
|
|
58
|
+
// 匹配日期格式:YYYY-MM-DD HH:MM:SS +HH:MM
|
|
59
|
+
const match = text.match(/加入于\s+(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\s+\+\d{2}:\d{2})/);
|
|
60
|
+
if (match?.[1]) {
|
|
61
|
+
joinDate = match[1].trim();
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
return {
|
|
66
|
+
dailyRanking: isNaN(dailyRanking) ? null : dailyRanking,
|
|
67
|
+
joinDate,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=user-profile.js.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 解析器工具函数导出
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parsePagination = void 0;
|
|
7
|
+
var pagination_1 = require("./pagination");
|
|
8
|
+
Object.defineProperty(exports, "parsePagination", { enumerable: true, get: function () { return pagination_1.parsePagination; } });
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 分页解析工具
|
|
3
|
+
* 提供共享的分页信息解析函数
|
|
4
|
+
*/
|
|
5
|
+
import type { CheerioAPI } from 'cheerio';
|
|
6
|
+
/**
|
|
7
|
+
* 分页信息
|
|
8
|
+
*/
|
|
9
|
+
export interface PaginationInfo {
|
|
10
|
+
currentPage: number;
|
|
11
|
+
totalPages: number;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* 解析分页信息
|
|
15
|
+
* @param $ - Cheerio 实例
|
|
16
|
+
* @returns 分页信息(当前页和总页数)
|
|
17
|
+
*/
|
|
18
|
+
export declare function parsePagination($: CheerioAPI): PaginationInfo;
|
|
19
|
+
//# sourceMappingURL=pagination.d.ts.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* 分页解析工具
|
|
4
|
+
* 提供共享的分页信息解析函数
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.parsePagination = parsePagination;
|
|
8
|
+
const pagination_1 = require("../selectors/pagination");
|
|
9
|
+
const { currentPage: CURRENT_PAGE, pageLinks: PAGE_LINKS } = pagination_1.PAGINATION_SELECTORS;
|
|
10
|
+
/**
|
|
11
|
+
* 解析分页信息
|
|
12
|
+
* @param $ - Cheerio 实例
|
|
13
|
+
* @returns 分页信息(当前页和总页数)
|
|
14
|
+
*/
|
|
15
|
+
function parsePagination($) {
|
|
16
|
+
const currentPageEl = $(CURRENT_PAGE).first();
|
|
17
|
+
const parsedPage = parseInt(currentPageEl.text(), 10);
|
|
18
|
+
const currentPage = !isNaN(parsedPage) && parsedPage > 0 ? parsedPage : 1;
|
|
19
|
+
const pageLinks = $(PAGE_LINKS);
|
|
20
|
+
let totalPages = currentPage;
|
|
21
|
+
pageLinks.each((_, el) => {
|
|
22
|
+
const pageNum = parseInt($(el).text(), 10);
|
|
23
|
+
if (!isNaN(pageNum) && pageNum > totalPages) {
|
|
24
|
+
totalPages = pageNum;
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
return { currentPage, totalPages };
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=pagination.js.map
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* V2EX 实体类型
|
|
3
|
+
* 定义基本的数据结构
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* 单条用户回复
|
|
7
|
+
* 来源:/member/{username}/replies 页面中的单个回复项
|
|
8
|
+
*/
|
|
9
|
+
export interface V2exReply {
|
|
10
|
+
/** 主题标题 */
|
|
11
|
+
topicTitle: string;
|
|
12
|
+
/** 该主题的回复总数(从 href #reply{N} 提取) */
|
|
13
|
+
topicReplyCount: number;
|
|
14
|
+
/** 节点名称 */
|
|
15
|
+
nodeName: string;
|
|
16
|
+
/** 回复时间(相对时间或具体日期) */
|
|
17
|
+
replyTime: string;
|
|
18
|
+
/** 回复内容(纯文本) */
|
|
19
|
+
content: string;
|
|
20
|
+
/** 是否直接回复主帖 */
|
|
21
|
+
isDirectReply: boolean;
|
|
22
|
+
/** 回复对象用户名,直接回复主帖时为 null */
|
|
23
|
+
replyTo: string | null;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* 帖子详情
|
|
27
|
+
* 来源:/t/{topic_id} 页面
|
|
28
|
+
*/
|
|
29
|
+
export interface V2exTopicDetail {
|
|
30
|
+
/** 主题标题 */
|
|
31
|
+
title: string;
|
|
32
|
+
/** 节点名称 */
|
|
33
|
+
nodeName: string;
|
|
34
|
+
/** 发布时间(绝对时间) */
|
|
35
|
+
createdAt: string;
|
|
36
|
+
/** 主题内容 */
|
|
37
|
+
content: string;
|
|
38
|
+
/** 该帖子的回复总数 */
|
|
39
|
+
replyCount: number;
|
|
40
|
+
/** 最后回复时间,无回复时为 null */
|
|
41
|
+
lastReplyTime: string | null;
|
|
42
|
+
/** 点击次数 */
|
|
43
|
+
clickCount: number;
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=entities.d.ts.map
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* V2EX 解析结果类型
|
|
3
|
+
* 定义解析器返回的数据结构
|
|
4
|
+
*/
|
|
5
|
+
import type { V2exReply } from './entities';
|
|
6
|
+
/**
|
|
7
|
+
* 用户主页解析结果
|
|
8
|
+
* 来源:/member/{username}
|
|
9
|
+
*/
|
|
10
|
+
export interface UserProfileParseResult {
|
|
11
|
+
/** 今日活跃度排名,null 表示未显示 */
|
|
12
|
+
dailyRanking: number | null;
|
|
13
|
+
/** 加入时间,格式:2010-04-25 21:45:46 +08:00 */
|
|
14
|
+
joinDate: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* 回复列表页解析结果
|
|
18
|
+
* 来源:/member/{username}/replies?p={page}
|
|
19
|
+
*/
|
|
20
|
+
export interface RepliesPageParseResult {
|
|
21
|
+
/** 用户的回复总数(页面顶部显示的"回复总数 30237") */
|
|
22
|
+
totalReplies: number;
|
|
23
|
+
/** 本页回复列表 */
|
|
24
|
+
replies: V2exReply[];
|
|
25
|
+
/** 当前页码 */
|
|
26
|
+
currentPage: number;
|
|
27
|
+
/** 总页数 */
|
|
28
|
+
totalPages: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* 发帖列表页解析结果
|
|
32
|
+
* 来源:/member/{username}/topics?p={page}
|
|
33
|
+
*/
|
|
34
|
+
export interface TopicsPageParseResult {
|
|
35
|
+
/** 是否被用户隐藏 */
|
|
36
|
+
isHidden: boolean;
|
|
37
|
+
/** 本页帖子 URL 列表 */
|
|
38
|
+
topicUrls: string[];
|
|
39
|
+
/** 当前页码 */
|
|
40
|
+
currentPage: number;
|
|
41
|
+
/** 总页数 */
|
|
42
|
+
totalPages: number;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* 单个帖子页面解析结果
|
|
46
|
+
* 来源:/t/{topic_id}
|
|
47
|
+
*/
|
|
48
|
+
export interface TopicDetailParseResult {
|
|
49
|
+
/** 主题标题 */
|
|
50
|
+
title: string;
|
|
51
|
+
/** 节点名称 */
|
|
52
|
+
nodeName: string;
|
|
53
|
+
/** 发布时间(绝对时间) */
|
|
54
|
+
createdAt: string;
|
|
55
|
+
/** 主题内容 */
|
|
56
|
+
content: string;
|
|
57
|
+
/** 该帖子的回复总数 */
|
|
58
|
+
replyCount: number;
|
|
59
|
+
/** 最后回复时间,无回复时为 null */
|
|
60
|
+
lastReplyTime: string | null;
|
|
61
|
+
/** 点击次数 */
|
|
62
|
+
clickCount: number;
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=parse-result.d.ts.map
|