autocrew 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HAMLETDEER.md +562 -0
- package/LICENSE +21 -0
- package/README.md +190 -0
- package/README_CN.md +190 -0
- package/adapters/openclaw/index.ts +68 -0
- package/bin/autocrew.mjs +23 -0
- package/bin/autocrew.ts +13 -0
- package/openclaw.plugin.json +36 -0
- package/package.json +74 -0
- package/skills/_writing-style/SKILL.md +68 -0
- package/skills/audience-profiler/SKILL.md +241 -0
- package/skills/content-attribution/SKILL.md +128 -0
- package/skills/content-review/SKILL.md +257 -0
- package/skills/cover-generator/SKILL.md +93 -0
- package/skills/humanizer-zh/SKILL.md +75 -0
- package/skills/intel-digest/SKILL.md +57 -0
- package/skills/intel-pull/SKILL.md +74 -0
- package/skills/manage-pipeline/SKILL.md +63 -0
- package/skills/memory-distill/SKILL.md +89 -0
- package/skills/onboarding/SKILL.md +117 -0
- package/skills/pipeline-status/SKILL.md +51 -0
- package/skills/platform-rewrite/SKILL.md +125 -0
- package/skills/pre-publish/SKILL.md +142 -0
- package/skills/publish-content/SKILL.md +500 -0
- package/skills/remix-content/SKILL.md +77 -0
- package/skills/research/SKILL.md +127 -0
- package/skills/setup/SKILL.md +353 -0
- package/skills/spawn-batch-writer/SKILL.md +66 -0
- package/skills/spawn-planner/SKILL.md +72 -0
- package/skills/spawn-writer/SKILL.md +60 -0
- package/skills/teardown/SKILL.md +144 -0
- package/skills/title-craft/SKILL.md +234 -0
- package/skills/topic-ideas/SKILL.md +105 -0
- package/skills/video-timeline/SKILL.md +117 -0
- package/skills/write-script/SKILL.md +232 -0
- package/skills/xhs-cover-review/SKILL.md +48 -0
- package/src/adapters/browser/browser-cdp.ts +260 -0
- package/src/adapters/browser/browser-relay.ts +236 -0
- package/src/adapters/browser/gateway-client.ts +148 -0
- package/src/adapters/browser/types.ts +36 -0
- package/src/adapters/image/gemini.ts +219 -0
- package/src/adapters/research/tikhub.ts +19 -0
- package/src/cli/banner.ts +18 -0
- package/src/cli/bootstrap.ts +33 -0
- package/src/cli/commands/adapt.ts +28 -0
- package/src/cli/commands/advance.ts +28 -0
- package/src/cli/commands/assets.ts +24 -0
- package/src/cli/commands/audit.ts +18 -0
- package/src/cli/commands/contents.ts +18 -0
- package/src/cli/commands/cover.ts +58 -0
- package/src/cli/commands/events.ts +17 -0
- package/src/cli/commands/humanize.ts +27 -0
- package/src/cli/commands/index.ts +80 -0
- package/src/cli/commands/init.ts +28 -0
- package/src/cli/commands/intel.ts +55 -0
- package/src/cli/commands/learn.ts +34 -0
- package/src/cli/commands/memory.ts +18 -0
- package/src/cli/commands/migrate.ts +24 -0
- package/src/cli/commands/open.ts +21 -0
- package/src/cli/commands/pipelines.ts +18 -0
- package/src/cli/commands/pre-publish.ts +27 -0
- package/src/cli/commands/profile.ts +31 -0
- package/src/cli/commands/research.ts +36 -0
- package/src/cli/commands/restore.ts +28 -0
- package/src/cli/commands/review.ts +61 -0
- package/src/cli/commands/start.ts +28 -0
- package/src/cli/commands/status.ts +14 -0
- package/src/cli/commands/templates.ts +15 -0
- package/src/cli/commands/topics.ts +18 -0
- package/src/cli/commands/trash.ts +28 -0
- package/src/cli/commands/upgrade.ts +48 -0
- package/src/cli/commands/versions.ts +24 -0
- package/src/cli/index.ts +40 -0
- package/src/data/sensitive-words-builtin.json +114 -0
- package/src/data/source-presets.yaml +54 -0
- package/src/e2e.test.ts +596 -0
- package/src/modules/auth/cookie-manager.ts +113 -0
- package/src/modules/cards/template-engine.ts +74 -0
- package/src/modules/cards/templates/comparison-table.ts +71 -0
- package/src/modules/cards/templates/data-chart.ts +76 -0
- package/src/modules/cards/templates/flow-chart.ts +49 -0
- package/src/modules/cards/templates/key-points.ts +59 -0
- package/src/modules/cover/prompt-builder.test.ts +157 -0
- package/src/modules/cover/prompt-builder.ts +212 -0
- package/src/modules/cover/ratio-adapter.test.ts +122 -0
- package/src/modules/cover/ratio-adapter.ts +104 -0
- package/src/modules/filter/sensitive-words.test.ts +72 -0
- package/src/modules/filter/sensitive-words.ts +212 -0
- package/src/modules/humanizer/zh.test.ts +75 -0
- package/src/modules/humanizer/zh.ts +175 -0
- package/src/modules/intel/collector.ts +19 -0
- package/src/modules/intel/collectors/competitor.test.ts +71 -0
- package/src/modules/intel/collectors/competitor.ts +65 -0
- package/src/modules/intel/collectors/rss.test.ts +56 -0
- package/src/modules/intel/collectors/rss.ts +70 -0
- package/src/modules/intel/collectors/trends.test.ts +80 -0
- package/src/modules/intel/collectors/trends.ts +107 -0
- package/src/modules/intel/collectors/web-search.test.ts +85 -0
- package/src/modules/intel/collectors/web-search.ts +81 -0
- package/src/modules/intel/integration.test.ts +203 -0
- package/src/modules/intel/intel-engine.test.ts +103 -0
- package/src/modules/intel/intel-engine.ts +96 -0
- package/src/modules/intel/source-config.test.ts +113 -0
- package/src/modules/intel/source-config.ts +131 -0
- package/src/modules/learnings/diff-tracker.test.ts +144 -0
- package/src/modules/learnings/diff-tracker.ts +189 -0
- package/src/modules/learnings/rule-distiller.ts +141 -0
- package/src/modules/memory/distill.ts +208 -0
- package/src/modules/migrate/legacy-migrate.test.ts +169 -0
- package/src/modules/migrate/legacy-migrate.ts +229 -0
- package/src/modules/pro/api-client.ts +192 -0
- package/src/modules/pro/gate.test.ts +110 -0
- package/src/modules/pro/gate.ts +104 -0
- package/src/modules/profile/creator-profile.test.ts +178 -0
- package/src/modules/profile/creator-profile.ts +248 -0
- package/src/modules/publish/douyin-api.ts +34 -0
- package/src/modules/publish/wechat-mp.ts +320 -0
- package/src/modules/publish/xiaohongshu-api.ts +127 -0
- package/src/modules/research/free-engine.ts +360 -0
- package/src/modules/timeline/markup-generator.ts +63 -0
- package/src/modules/timeline/parser.ts +275 -0
- package/src/modules/workflow/templates.ts +124 -0
- package/src/modules/writing/platform-rewrite.ts +190 -0
- package/src/modules/writing/title-hashtag.ts +385 -0
- package/src/runtime/context.test.ts +97 -0
- package/src/runtime/context.ts +129 -0
- package/src/runtime/events.test.ts +83 -0
- package/src/runtime/events.ts +104 -0
- package/src/runtime/hooks.ts +174 -0
- package/src/runtime/tool-runner.test.ts +204 -0
- package/src/runtime/tool-runner.ts +282 -0
- package/src/runtime/workflow-engine.test.ts +455 -0
- package/src/runtime/workflow-engine.ts +391 -0
- package/src/server/index.ts +409 -0
- package/src/server/start.ts +39 -0
- package/src/storage/local-store.test.ts +304 -0
- package/src/storage/local-store.ts +704 -0
- package/src/storage/pipeline-store.test.ts +363 -0
- package/src/storage/pipeline-store.ts +698 -0
- package/src/tools/asset.ts +96 -0
- package/src/tools/content-save.ts +276 -0
- package/src/tools/cover-review.ts +221 -0
- package/src/tools/humanize.ts +54 -0
- package/src/tools/init.ts +133 -0
- package/src/tools/intel.ts +92 -0
- package/src/tools/memory.ts +76 -0
- package/src/tools/pipeline-ops.ts +109 -0
- package/src/tools/pipeline.ts +168 -0
- package/src/tools/pre-publish.ts +232 -0
- package/src/tools/publish.ts +183 -0
- package/src/tools/registry.ts +198 -0
- package/src/tools/research.ts +304 -0
- package/src/tools/review.ts +305 -0
- package/src/tools/rewrite.ts +165 -0
- package/src/tools/status.ts +30 -0
- package/src/tools/timeline.ts +234 -0
- package/src/tools/topic-create.ts +50 -0
- package/src/types/providers.ts +69 -0
- package/src/types/timeline.test.ts +147 -0
- package/src/types/timeline.ts +83 -0
- package/src/utils/retry.test.ts +97 -0
- package/src/utils/retry.ts +85 -0
- package/templates/AGENTS.md +99 -0
- package/templates/SOUL.md +31 -0
- package/templates/TOOLS.md +76 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sensitive Words Filter — scans text for sensitive/restricted words
|
|
3
|
+
*
|
|
4
|
+
* Sources:
|
|
5
|
+
* 1. Built-in word list: src/data/sensitive-words-builtin.json
|
|
6
|
+
* 2. User custom list: ~/.autocrew/sensitive-words/custom.txt (one word per line)
|
|
7
|
+
* 3. Platform-specific restricted words with replacement suggestions
|
|
8
|
+
*/
|
|
9
|
+
import fs from "node:fs/promises";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
|
|
12
|
+
// --- Types ---
|
|
13
|
+
|
|
14
|
+
export interface ScanHit {
|
|
15
|
+
word: string;
|
|
16
|
+
category: string;
|
|
17
|
+
/** Suggested replacement (if available) */
|
|
18
|
+
suggestion?: string;
|
|
19
|
+
/** All positions where this word appears */
|
|
20
|
+
positions: number[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface ScanResult {
|
|
24
|
+
ok: boolean;
|
|
25
|
+
/** Total number of distinct sensitive words found */
|
|
26
|
+
hitCount: number;
|
|
27
|
+
/** Detailed hits */
|
|
28
|
+
hits: ScanHit[];
|
|
29
|
+
/** Quick summary for display */
|
|
30
|
+
summary: string;
|
|
31
|
+
/** Auto-fixed text (with suggestions applied) */
|
|
32
|
+
autoFixedText?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface BuiltinData {
|
|
36
|
+
categories: Record<string, { description: string; words: string[] }>;
|
|
37
|
+
platform_specific: Record<
|
|
38
|
+
string,
|
|
39
|
+
{
|
|
40
|
+
description: string;
|
|
41
|
+
restricted: string[];
|
|
42
|
+
suggestions?: Record<string, string>;
|
|
43
|
+
}
|
|
44
|
+
>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// --- Loader ---
|
|
48
|
+
|
|
49
|
+
let _builtinCache: BuiltinData | null = null;
|
|
50
|
+
|
|
51
|
+
import { createRequire } from "node:module";
|
|
52
|
+
import { fileURLToPath } from "node:url";
|
|
53
|
+
|
|
54
|
+
const _require = createRequire(import.meta.url);
|
|
55
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
56
|
+
|
|
57
|
+
async function loadBuiltin(): Promise<BuiltinData> {
|
|
58
|
+
if (_builtinCache) return _builtinCache;
|
|
59
|
+
const filePath = path.resolve(__dirname, "../../data/sensitive-words-builtin.json");
|
|
60
|
+
const raw = await fs.readFile(filePath, "utf-8");
|
|
61
|
+
_builtinCache = JSON.parse(raw) as BuiltinData;
|
|
62
|
+
return _builtinCache;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function loadCustomWords(dataDir?: string): Promise<string[]> {
|
|
66
|
+
const home = process.env.HOME || process.env.USERPROFILE || "~";
|
|
67
|
+
const customPath = path.join(dataDir || path.join(home, ".autocrew"), "sensitive-words", "custom.txt");
|
|
68
|
+
try {
|
|
69
|
+
const raw = await fs.readFile(customPath, "utf-8");
|
|
70
|
+
return raw
|
|
71
|
+
.split("\n")
|
|
72
|
+
.map((l) => l.trim())
|
|
73
|
+
.filter((l) => l && !l.startsWith("#"));
|
|
74
|
+
} catch {
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// --- Core scan ---
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Scan text for sensitive words.
|
|
83
|
+
* @param text - The text to scan
|
|
84
|
+
* @param platform - Optional platform for platform-specific checks
|
|
85
|
+
* @param dataDir - Optional custom data directory
|
|
86
|
+
*/
|
|
87
|
+
export async function scanText(
|
|
88
|
+
text: string,
|
|
89
|
+
platform?: string,
|
|
90
|
+
dataDir?: string,
|
|
91
|
+
): Promise<ScanResult> {
|
|
92
|
+
if (!text || !text.trim()) {
|
|
93
|
+
return { ok: true, hitCount: 0, hits: [], summary: "空文本,无需检查" };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const builtin = await loadBuiltin();
|
|
97
|
+
const customWords = await loadCustomWords(dataDir);
|
|
98
|
+
const hits: ScanHit[] = [];
|
|
99
|
+
|
|
100
|
+
// 1. Scan built-in categories
|
|
101
|
+
for (const [category, data] of Object.entries(builtin.categories)) {
|
|
102
|
+
for (const word of data.words) {
|
|
103
|
+
const positions = findAllPositions(text, word);
|
|
104
|
+
if (positions.length > 0) {
|
|
105
|
+
hits.push({ word, category, positions });
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// 2. Scan platform-specific restricted words
|
|
111
|
+
if (platform && builtin.platform_specific[platform]) {
|
|
112
|
+
const platformData = builtin.platform_specific[platform];
|
|
113
|
+
for (const word of platformData.restricted) {
|
|
114
|
+
const positions = findAllPositions(text, word);
|
|
115
|
+
if (positions.length > 0) {
|
|
116
|
+
const suggestion = platformData.suggestions?.[word];
|
|
117
|
+
hits.push({
|
|
118
|
+
word,
|
|
119
|
+
category: `platform:${platform}`,
|
|
120
|
+
suggestion,
|
|
121
|
+
positions,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// 3. Scan custom words
|
|
128
|
+
for (const word of customWords) {
|
|
129
|
+
const positions = findAllPositions(text, word);
|
|
130
|
+
if (positions.length > 0) {
|
|
131
|
+
hits.push({ word, category: "custom", positions });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Deduplicate by word
|
|
136
|
+
const deduped = deduplicateHits(hits);
|
|
137
|
+
|
|
138
|
+
// Build auto-fixed text
|
|
139
|
+
let autoFixedText: string | undefined;
|
|
140
|
+
const fixableHits = deduped.filter((h) => h.suggestion);
|
|
141
|
+
if (fixableHits.length > 0) {
|
|
142
|
+
autoFixedText = text;
|
|
143
|
+
for (const hit of fixableHits) {
|
|
144
|
+
autoFixedText = autoFixedText.replaceAll(hit.word, hit.suggestion!);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const summary = buildSummary(deduped);
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
ok: deduped.length === 0,
|
|
152
|
+
hitCount: deduped.length,
|
|
153
|
+
hits: deduped,
|
|
154
|
+
summary,
|
|
155
|
+
autoFixedText,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// --- Helpers ---
|
|
160
|
+
|
|
161
|
+
function findAllPositions(text: string, word: string): number[] {
|
|
162
|
+
const positions: number[] = [];
|
|
163
|
+
const lower = text.toLowerCase();
|
|
164
|
+
const target = word.toLowerCase();
|
|
165
|
+
let idx = lower.indexOf(target);
|
|
166
|
+
while (idx !== -1) {
|
|
167
|
+
positions.push(idx);
|
|
168
|
+
idx = lower.indexOf(target, idx + 1);
|
|
169
|
+
}
|
|
170
|
+
return positions;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function deduplicateHits(hits: ScanHit[]): ScanHit[] {
|
|
174
|
+
const map = new Map<string, ScanHit>();
|
|
175
|
+
for (const hit of hits) {
|
|
176
|
+
const existing = map.get(hit.word);
|
|
177
|
+
if (existing) {
|
|
178
|
+
// Merge positions, keep first category
|
|
179
|
+
existing.positions = [...new Set([...existing.positions, ...hit.positions])];
|
|
180
|
+
if (!existing.suggestion && hit.suggestion) {
|
|
181
|
+
existing.suggestion = hit.suggestion;
|
|
182
|
+
}
|
|
183
|
+
} else {
|
|
184
|
+
map.set(hit.word, { ...hit });
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return Array.from(map.values());
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function buildSummary(hits: ScanHit[]): string {
|
|
191
|
+
if (hits.length === 0) return "✅ 未检测到敏感词";
|
|
192
|
+
|
|
193
|
+
const byCat = new Map<string, number>();
|
|
194
|
+
for (const h of hits) {
|
|
195
|
+
byCat.set(h.category, (byCat.get(h.category) || 0) + 1);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const parts: string[] = [];
|
|
199
|
+
for (const [cat, count] of byCat) {
|
|
200
|
+
parts.push(`${cat}: ${count} 个`);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const fixable = hits.filter((h) => h.suggestion).length;
|
|
204
|
+
const fixNote = fixable > 0 ? `,其中 ${fixable} 个可自动替换` : "";
|
|
205
|
+
|
|
206
|
+
return `⚠️ 检测到 ${hits.length} 个敏感词(${parts.join("、")})${fixNote}`;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/** Reset the built-in cache (for testing) */
|
|
210
|
+
export function _resetCache(): void {
|
|
211
|
+
_builtinCache = null;
|
|
212
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { humanizeZh } from "../humanizer/zh.js";
|
|
3
|
+
|
|
4
|
+
describe("humanizeZh", () => {
|
|
5
|
+
it("removes filler phrases", () => {
|
|
6
|
+
const result = humanizeZh({ text: "值得一提的是,这个功能很好用。" });
|
|
7
|
+
expect(result.ok).toBe(true);
|
|
8
|
+
expect(result.humanizedText).not.toContain("值得一提的是");
|
|
9
|
+
expect(result.changes.length).toBeGreaterThan(0);
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("replaces corporate buzzwords", () => {
|
|
13
|
+
const result = humanizeZh({ text: "这个产品可以赋能用户,助力企业发展。" });
|
|
14
|
+
expect(result.humanizedText).not.toContain("赋能");
|
|
15
|
+
expect(result.humanizedText).not.toContain("助力");
|
|
16
|
+
expect(result.humanizedText).toContain("帮");
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("replaces 闭环 with 跑通", () => {
|
|
20
|
+
const result = humanizeZh({ text: "我们需要形成完整的闭环。" });
|
|
21
|
+
expect(result.humanizedText).toContain("跑通");
|
|
22
|
+
expect(result.humanizedText).not.toContain("闭环");
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("removes summary openers", () => {
|
|
26
|
+
const result = humanizeZh({ text: "综上所述,这是一个好方案。" });
|
|
27
|
+
expect(result.humanizedText).not.toContain("综上所述");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("removes 总而言之 and 总的来说", () => {
|
|
31
|
+
const r1 = humanizeZh({ text: "总而言之,效果不错。" });
|
|
32
|
+
const r2 = humanizeZh({ text: "总的来说,还可以。" });
|
|
33
|
+
expect(r1.humanizedText).not.toContain("总而言之");
|
|
34
|
+
expect(r2.humanizedText).not.toContain("总的来说");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("removes vague adjectives", () => {
|
|
38
|
+
const result = humanizeZh({ text: "这是一个深度分析,全方位覆盖,多维度解读。" });
|
|
39
|
+
expect(result.humanizedText).not.toContain("深度");
|
|
40
|
+
expect(result.humanizedText).not.toContain("全方位");
|
|
41
|
+
expect(result.humanizedText).not.toContain("多维度");
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("returns original text unchanged when no AI patterns found", () => {
|
|
45
|
+
const clean = "今天天气不错,出去走走吧。";
|
|
46
|
+
const result = humanizeZh({ text: clean });
|
|
47
|
+
expect(result.ok).toBe(true);
|
|
48
|
+
expect(result.humanizedText).toBe(clean);
|
|
49
|
+
expect(result.changeCount).toBe(0);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("returns ok:true and summary on success", () => {
|
|
53
|
+
const result = humanizeZh({ text: "赋能用户,助力发展。" });
|
|
54
|
+
expect(result.ok).toBe(true);
|
|
55
|
+
expect(result.summary).toContain("humanizer-zh");
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("handles empty string", () => {
|
|
59
|
+
const result = humanizeZh({ text: "" });
|
|
60
|
+
expect(result.ok).toBe(true);
|
|
61
|
+
expect(result.humanizedText).toBe("");
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("handles multiple replacements in one pass", () => {
|
|
65
|
+
const result = humanizeZh({
|
|
66
|
+
text: "值得一提的是,我们需要赋能用户,打通渠道,形成闭环。综上所述,这是深度分析。",
|
|
67
|
+
});
|
|
68
|
+
expect(result.humanizedText).not.toContain("值得一提的是");
|
|
69
|
+
expect(result.humanizedText).not.toContain("赋能");
|
|
70
|
+
expect(result.humanizedText).not.toContain("闭环");
|
|
71
|
+
expect(result.humanizedText).not.toContain("综上所述");
|
|
72
|
+
expect(result.humanizedText).not.toContain("深度");
|
|
73
|
+
expect(result.changes.length).toBeGreaterThanOrEqual(5);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
export interface HumanizeZhOptions {
|
|
2
|
+
text: string;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export interface HumanizeZhResult {
|
|
6
|
+
ok: boolean;
|
|
7
|
+
originalText: string;
|
|
8
|
+
humanizedText: string;
|
|
9
|
+
changes: string[];
|
|
10
|
+
changeCount: number;
|
|
11
|
+
summary: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const DIRECT_REPLACEMENTS: Array<{ pattern: RegExp; replacement: string; note: string }> = [
|
|
15
|
+
{ pattern: /值得一提的是/g, replacement: "", note: "删除空转折词“值得一提的是”" },
|
|
16
|
+
{ pattern: /需要注意的是/g, replacement: "", note: "删除空提醒词“需要注意的是”" },
|
|
17
|
+
{ pattern: /综上所述|总而言之|总的来说/g, replacement: "", note: "删除套路化总结句式" },
|
|
18
|
+
{ pattern: /可以说|毫不夸张地说/g, replacement: "", note: "删除夸张前缀,直接表达判断" },
|
|
19
|
+
{ pattern: /赋能/g, replacement: "帮", note: "把“赋能”改成具体动作词" },
|
|
20
|
+
{ pattern: /助力/g, replacement: "帮", note: "把“助力”改成具体动作词" },
|
|
21
|
+
{ pattern: /打通/g, replacement: "连接", note: "把“打通”改成更具体表达" },
|
|
22
|
+
{ pattern: /闭环/g, replacement: "跑通", note: "把“闭环”改成更口语化表达" },
|
|
23
|
+
{ pattern: /深度/g, replacement: "", note: "删除空泛形容词“深度”" },
|
|
24
|
+
{ pattern: /全方位/g, replacement: "", note: "删除空泛形容词“全方位”" },
|
|
25
|
+
{ pattern: /多维度/g, replacement: "", note: "删除空泛形容词“多维度”" },
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
function normalizeWhitespace(text: string): string {
|
|
29
|
+
return text
|
|
30
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
31
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
32
|
+
.replace(/[ \t]{2,}/g, " ")
|
|
33
|
+
.trim();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function replaceWithTracking(
|
|
37
|
+
text: string,
|
|
38
|
+
pattern: RegExp,
|
|
39
|
+
replacement: string,
|
|
40
|
+
): { text: string; count: number } {
|
|
41
|
+
let count = 0;
|
|
42
|
+
const nextText = text.replace(pattern, () => {
|
|
43
|
+
count += 1;
|
|
44
|
+
return replacement;
|
|
45
|
+
});
|
|
46
|
+
return { text: nextText, count };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function breakLongClauses(text: string): { text: string; count: number } {
|
|
50
|
+
const lines = text.split("\n");
|
|
51
|
+
let count = 0;
|
|
52
|
+
const nextLines = lines.map((line) => {
|
|
53
|
+
const trimmed = line.trim();
|
|
54
|
+
if (!trimmed) return line;
|
|
55
|
+
const chineseLength = (trimmed.match(/[\u4e00-\u9fff]/g) || []).length;
|
|
56
|
+
if (chineseLength <= 40) return line;
|
|
57
|
+
|
|
58
|
+
const replaced = line
|
|
59
|
+
.replace(/,(?=[^,。!?]{10,})/, "。")
|
|
60
|
+
.replace(/;(?=[^;。!?]{8,})/, "。");
|
|
61
|
+
if (replaced !== line) {
|
|
62
|
+
count += 1;
|
|
63
|
+
return replaced;
|
|
64
|
+
}
|
|
65
|
+
return line;
|
|
66
|
+
});
|
|
67
|
+
return { text: nextLines.join("\n"), count };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function simplifyProgressionPhrases(text: string): { text: string; count: number } {
|
|
71
|
+
let count = 0;
|
|
72
|
+
let next = text.replace(/首先[,,]?/g, () => {
|
|
73
|
+
count += 1;
|
|
74
|
+
return "";
|
|
75
|
+
});
|
|
76
|
+
next = next.replace(/其次[,,]?/g, () => {
|
|
77
|
+
count += 1;
|
|
78
|
+
return "";
|
|
79
|
+
});
|
|
80
|
+
next = next.replace(/最后[,,]?/g, () => {
|
|
81
|
+
count += 1;
|
|
82
|
+
return "最后,";
|
|
83
|
+
});
|
|
84
|
+
return { text: next, count };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function reduceWeOpenings(text: string): { text: string; count: number } {
|
|
88
|
+
const lines = text.split("\n");
|
|
89
|
+
let weCount = 0;
|
|
90
|
+
for (const line of lines) {
|
|
91
|
+
if (line.trim().startsWith("我们")) {
|
|
92
|
+
weCount += 1;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (weCount <= 2) {
|
|
96
|
+
return { text, count: 0 };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
let changed = 0;
|
|
100
|
+
const nextLines = lines.map((line) => {
|
|
101
|
+
if (changed >= weCount - 2) return line;
|
|
102
|
+
if (line.trim().startsWith("我们")) {
|
|
103
|
+
changed += 1;
|
|
104
|
+
return line.replace("我们", "你");
|
|
105
|
+
}
|
|
106
|
+
return line;
|
|
107
|
+
});
|
|
108
|
+
return { text: nextLines.join("\n"), count: changed };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function addRhythmPhraseIfNeeded(text: string): { text: string; count: number } {
|
|
112
|
+
if (/说白了|你想啊|问题来了/.test(text)) {
|
|
113
|
+
return { text, count: 0 };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const paragraphs = text.split(/\n{2,}/);
|
|
117
|
+
if (paragraphs.length < 2) {
|
|
118
|
+
return { text, count: 0 };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const next = [...paragraphs];
|
|
122
|
+
next.splice(1, 0, "说白了,这件事拼的不是工具数量,而是表达和执行。");
|
|
123
|
+
return { text: next.join("\n\n"), count: 1 };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function humanizeZh(options: HumanizeZhOptions): HumanizeZhResult {
|
|
127
|
+
const originalText = options.text || "";
|
|
128
|
+
let humanizedText = originalText;
|
|
129
|
+
const changes: string[] = [];
|
|
130
|
+
|
|
131
|
+
for (const replacement of DIRECT_REPLACEMENTS) {
|
|
132
|
+
const result = replaceWithTracking(humanizedText, replacement.pattern, replacement.replacement);
|
|
133
|
+
if (result.count > 0) {
|
|
134
|
+
humanizedText = result.text;
|
|
135
|
+
changes.push(`${replacement.note} × ${result.count}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const progression = simplifyProgressionPhrases(humanizedText);
|
|
140
|
+
if (progression.count > 0) {
|
|
141
|
+
humanizedText = progression.text;
|
|
142
|
+
changes.push(`打散“首先/其次/最后”顺序词 × ${progression.count}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const longClauses = breakLongClauses(humanizedText);
|
|
146
|
+
if (longClauses.count > 0) {
|
|
147
|
+
humanizedText = longClauses.text;
|
|
148
|
+
changes.push(`拆开过长句子 × ${longClauses.count}`);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const weOpenings = reduceWeOpenings(humanizedText);
|
|
152
|
+
if (weOpenings.count > 0) {
|
|
153
|
+
humanizedText = weOpenings.text;
|
|
154
|
+
changes.push(`减少“我们”开头句子 × ${weOpenings.count}`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const rhythm = addRhythmPhraseIfNeeded(humanizedText);
|
|
158
|
+
if (rhythm.count > 0) {
|
|
159
|
+
humanizedText = rhythm.text;
|
|
160
|
+
changes.push("补入 1 处口语化节奏句");
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
humanizedText = normalizeWhitespace(humanizedText);
|
|
164
|
+
return {
|
|
165
|
+
ok: true,
|
|
166
|
+
originalText,
|
|
167
|
+
humanizedText,
|
|
168
|
+
changes,
|
|
169
|
+
changeCount: changes.length,
|
|
170
|
+
summary:
|
|
171
|
+
changes.length > 0
|
|
172
|
+
? `humanizer-zh 完成:修改了 ${changes.length} 类问题`
|
|
173
|
+
: "humanizer-zh 完成:确认无明显 AI 痕迹",
|
|
174
|
+
};
|
|
175
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { IntelItem } from "../../storage/pipeline-store.js";
|
|
2
|
+
|
|
3
|
+
export interface CollectorResult {
|
|
4
|
+
items: IntelItem[];
|
|
5
|
+
source: string;
|
|
6
|
+
errors: string[];
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface Collector {
|
|
10
|
+
id: string;
|
|
11
|
+
collect(opts: CollectorOptions): Promise<CollectorResult>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface CollectorOptions {
|
|
15
|
+
keywords: string[];
|
|
16
|
+
industry: string;
|
|
17
|
+
platforms: string[];
|
|
18
|
+
dataDir?: string;
|
|
19
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { transformCompetitorResults } from "./competitor.js";
|
|
3
|
+
import type { ResearchItem } from "../../../adapters/browser/types.js";
|
|
4
|
+
|
|
5
|
+
describe("transformCompetitorResults", () => {
|
|
6
|
+
it("produces correct IntelItems with competitor tags", () => {
|
|
7
|
+
const results: ResearchItem[] = [
|
|
8
|
+
{
|
|
9
|
+
title: "Competitor Post 1",
|
|
10
|
+
summary: "Their latest product launch",
|
|
11
|
+
url: "https://xiaohongshu.com/note/123",
|
|
12
|
+
platform: "xiaohongshu",
|
|
13
|
+
source: "browser_cdp",
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
title: "Competitor Post 2",
|
|
17
|
+
url: "https://xiaohongshu.com/note/456",
|
|
18
|
+
platform: "xiaohongshu",
|
|
19
|
+
source: "browser_cdp",
|
|
20
|
+
},
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const items = transformCompetitorResults(results, "竞品A", "美妆", "xiaohongshu");
|
|
24
|
+
expect(items).toHaveLength(2);
|
|
25
|
+
expect(items[0].source).toBe("competitor");
|
|
26
|
+
expect(items[0].domain).toBe("美妆");
|
|
27
|
+
expect(items[0].tags).toContain("competitor:竞品A");
|
|
28
|
+
expect(items[0].tags).toContain("xiaohongshu");
|
|
29
|
+
expect(items[0].title).toBe("Competitor Post 1");
|
|
30
|
+
expect(items[0].summary).toBe("Their latest product launch");
|
|
31
|
+
expect(items[0].sourceUrl).toBe("https://xiaohongshu.com/note/123");
|
|
32
|
+
expect(items[0].relevance).toBe(60);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("filters out items without titles", () => {
|
|
36
|
+
const results: ResearchItem[] = [
|
|
37
|
+
{
|
|
38
|
+
title: "",
|
|
39
|
+
platform: "xiaohongshu",
|
|
40
|
+
source: "browser_cdp",
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
title: "Valid Title",
|
|
44
|
+
platform: "xiaohongshu",
|
|
45
|
+
source: "browser_cdp",
|
|
46
|
+
},
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
const items = transformCompetitorResults(results, "竞品B", "科技", "xiaohongshu");
|
|
50
|
+
expect(items).toHaveLength(1);
|
|
51
|
+
expect(items[0].title).toBe("Valid Title");
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("handles empty results array", () => {
|
|
55
|
+
const items = transformCompetitorResults([], "竞品C", "教育", "bilibili");
|
|
56
|
+
expect(items).toEqual([]);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("uses empty string for missing summary", () => {
|
|
60
|
+
const results: ResearchItem[] = [
|
|
61
|
+
{
|
|
62
|
+
title: "No Summary",
|
|
63
|
+
platform: "douyin",
|
|
64
|
+
source: "browser_cdp",
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
const items = transformCompetitorResults(results, "竞品D", "美食", "douyin");
|
|
69
|
+
expect(items[0].summary).toBe("");
|
|
70
|
+
});
|
|
71
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import type { IntelItem } from "../../../storage/pipeline-store.js";
|
|
2
|
+
import type { Collector, CollectorOptions, CollectorResult } from "../collector.js";
|
|
3
|
+
import type { ResearchItem, BrowserPlatform } from "../../../adapters/browser/types.js";
|
|
4
|
+
import { browserCdpAdapter } from "../../../adapters/browser/browser-cdp.js";
|
|
5
|
+
import { loadSourceConfig } from "../source-config.js";
|
|
6
|
+
|
|
7
|
+
// ─── Result Transformer ────────────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
export function transformCompetitorResults(
|
|
10
|
+
results: ResearchItem[],
|
|
11
|
+
accountName: string,
|
|
12
|
+
domain: string,
|
|
13
|
+
platform: string,
|
|
14
|
+
): IntelItem[] {
|
|
15
|
+
return results
|
|
16
|
+
.filter((r) => r.title)
|
|
17
|
+
.map((r) => ({
|
|
18
|
+
title: r.title,
|
|
19
|
+
domain,
|
|
20
|
+
source: "competitor" as const,
|
|
21
|
+
sourceUrl: r.url,
|
|
22
|
+
collectedAt: new Date().toISOString(),
|
|
23
|
+
relevance: 60,
|
|
24
|
+
tags: [`competitor:${accountName}`, platform],
|
|
25
|
+
expiresAfter: 14,
|
|
26
|
+
summary: r.summary ?? "",
|
|
27
|
+
keyPoints: [],
|
|
28
|
+
topicPotential: "",
|
|
29
|
+
}));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// ─── Competitor Collector ───────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
export function createCompetitorCollector(): Collector {
|
|
35
|
+
return {
|
|
36
|
+
id: "competitor",
|
|
37
|
+
async collect(opts: CollectorOptions): Promise<CollectorResult> {
|
|
38
|
+
const config = await loadSourceConfig(opts.dataDir);
|
|
39
|
+
const items: IntelItem[] = [];
|
|
40
|
+
const errors: string[] = [];
|
|
41
|
+
|
|
42
|
+
for (const account of config.accounts) {
|
|
43
|
+
try {
|
|
44
|
+
const results = await browserCdpAdapter.research({
|
|
45
|
+
platform: account.platform as BrowserPlatform,
|
|
46
|
+
keyword: account.name,
|
|
47
|
+
limit: 10,
|
|
48
|
+
});
|
|
49
|
+
const transformed = transformCompetitorResults(
|
|
50
|
+
results,
|
|
51
|
+
account.name,
|
|
52
|
+
account.domain,
|
|
53
|
+
account.platform,
|
|
54
|
+
);
|
|
55
|
+
items.push(...transformed);
|
|
56
|
+
} catch (err: unknown) {
|
|
57
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
58
|
+
errors.push(`Competitor ${account.name} (${account.platform}) failed: ${msg}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return { items, source: "competitor", errors };
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { parseRssItems } from "./rss.js";
|
|
3
|
+
import type { RssItem } from "./rss.js";
|
|
4
|
+
|
|
5
|
+
describe("parseRssItems", () => {
|
|
6
|
+
it("transforms RSS items into IntelItems", () => {
|
|
7
|
+
const items: RssItem[] = [
|
|
8
|
+
{
|
|
9
|
+
title: "AI Breakthrough",
|
|
10
|
+
link: "https://example.com/article",
|
|
11
|
+
contentSnippet: "Major AI advancement announced today",
|
|
12
|
+
isoDate: "2024-01-15T10:00:00Z",
|
|
13
|
+
categories: ["tech", "ai"],
|
|
14
|
+
},
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
const result = parseRssItems(items, "科技", ["rss-source"]);
|
|
18
|
+
expect(result).toHaveLength(1);
|
|
19
|
+
expect(result[0].title).toBe("AI Breakthrough");
|
|
20
|
+
expect(result[0].domain).toBe("科技");
|
|
21
|
+
expect(result[0].source).toBe("rss");
|
|
22
|
+
expect(result[0].sourceUrl).toBe("https://example.com/article");
|
|
23
|
+
expect(result[0].summary).toBe("Major AI advancement announced today");
|
|
24
|
+
expect(result[0].tags).toEqual(["rss-source", "tech", "ai"]);
|
|
25
|
+
expect(result[0].collectedAt).toBe("2024-01-15T10:00:00Z");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("handles missing fields gracefully", () => {
|
|
29
|
+
const items: RssItem[] = [
|
|
30
|
+
{ title: "No Link Article" },
|
|
31
|
+
{ title: undefined }, // should be filtered out
|
|
32
|
+
{ title: "Minimal", content: "Full content here that is quite long and should be sliced" },
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
const result = parseRssItems(items, "默认");
|
|
36
|
+
expect(result).toHaveLength(2);
|
|
37
|
+
expect(result[0].title).toBe("No Link Article");
|
|
38
|
+
expect(result[0].sourceUrl).toBeUndefined();
|
|
39
|
+
expect(result[0].summary).toBe("");
|
|
40
|
+
expect(result[0].tags).toEqual([]);
|
|
41
|
+
expect(result[1].title).toBe("Minimal");
|
|
42
|
+
expect(result[1].summary).toContain("Full content");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("uses isoDate when available, falls back to now", () => {
|
|
46
|
+
const withDate: RssItem[] = [{ title: "Dated", isoDate: "2024-06-01T00:00:00Z" }];
|
|
47
|
+
const withoutDate: RssItem[] = [{ title: "Undated" }];
|
|
48
|
+
|
|
49
|
+
const dated = parseRssItems(withDate, "test");
|
|
50
|
+
const undated = parseRssItems(withoutDate, "test");
|
|
51
|
+
|
|
52
|
+
expect(dated[0].collectedAt).toBe("2024-06-01T00:00:00Z");
|
|
53
|
+
// Should be a valid ISO date (now-ish)
|
|
54
|
+
expect(new Date(undated[0].collectedAt).getTime()).toBeGreaterThan(0);
|
|
55
|
+
});
|
|
56
|
+
});
|