skyloom 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +44 -1
- package/dist/core/agent.js.map +1 -1
- package/dist/core/arbitrate.d.ts +32 -0
- package/dist/core/arbitrate.d.ts.map +1 -0
- package/dist/core/arbitrate.js +136 -0
- package/dist/core/arbitrate.js.map +1 -0
- package/dist/core/estimate.d.ts +30 -0
- package/dist/core/estimate.d.ts.map +1 -0
- package/dist/core/estimate.js +94 -0
- package/dist/core/estimate.js.map +1 -0
- package/dist/core/evolve.d.ts +43 -0
- package/dist/core/evolve.d.ts.map +1 -0
- package/dist/core/evolve.js +201 -0
- package/dist/core/evolve.js.map +1 -0
- package/dist/core/filter.d.ts +16 -0
- package/dist/core/filter.d.ts.map +1 -0
- package/dist/core/filter.js +91 -0
- package/dist/core/filter.js.map +1 -0
- package/dist/core/index.d.ts +7 -1
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +13 -2
- package/dist/core/index.js.map +1 -1
- package/dist/core/longdoc.d.ts +41 -0
- package/dist/core/longdoc.d.ts.map +1 -0
- package/dist/core/longdoc.js +128 -0
- package/dist/core/longdoc.js.map +1 -0
- package/dist/core/sandbox.d.ts +24 -0
- package/dist/core/sandbox.d.ts.map +1 -0
- package/dist/core/sandbox.js +158 -0
- package/dist/core/sandbox.js.map +1 -0
- package/dist/tools/builtin.d.ts.map +1 -1
- package/dist/tools/builtin.js +3 -3
- package/dist/tools/builtin.js.map +1 -1
- package/package.json +1 -1
- package/src/core/agent.ts +33 -1
- package/src/core/arbitrate.ts +162 -0
- package/src/core/estimate.ts +104 -0
- package/src/core/evolve.ts +191 -0
- package/src/core/filter.ts +103 -0
- package/src/core/index.ts +8 -2
- package/src/core/longdoc.ts +155 -0
- package/src/core/sandbox.ts +142 -0
- package/src/tools/builtin.ts +4 -6
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 资源估算模块 — Token & time budget estimation for task planning.
|
|
3
|
+
*
|
|
4
|
+
* Helps Snow and other planning agents estimate the cost of
|
|
5
|
+
* proposed sub-tasks before committing to execution.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Task } from "./agent";
|
|
9
|
+
|
|
10
|
+
/* ═══════════════════════════════════════
|
|
11
|
+
Token estimation
|
|
12
|
+
═══════════════════════════════════════ */
|
|
13
|
+
const CJK_REGEX = /[一-鿿-ゟ가-㐀-䶿]/g;
|
|
14
|
+
|
|
15
|
+
/** Estimate tokens for a given text (CJK ~2 each, ASCII ~4 chars each). */
|
|
16
|
+
export function estimateTokens(text: string): number {
|
|
17
|
+
const cjk = (text.match(CJK_REGEX) || []).length;
|
|
18
|
+
return cjk * 2 + Math.ceil((text.length - cjk) / 4);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/* ═══════════════════════════════════════
|
|
22
|
+
Per-task-type cost estimates
|
|
23
|
+
═══════════════════════════════════════ */
|
|
24
|
+
const TASK_TYPE_PATTERNS: Array<[RegExp, number, number]> = [
|
|
25
|
+
// [pattern, estimated tokens, estimated tools]
|
|
26
|
+
[/read|read_file|grep|search|查|搜索|list/i, 2000, 2],
|
|
27
|
+
[/write|write_file|生成|写|create|implement/i, 4000, 5],
|
|
28
|
+
[/edit|edit_file|改|修改|fix|修复/i, 3000, 3],
|
|
29
|
+
[/delete|delete_file|删|rm/i, 1500, 2],
|
|
30
|
+
[/deploy|部署|publish|发布|release/i, 8000, 8],
|
|
31
|
+
[/review|审查|audit|审计|scan|扫描/i, 5000, 4],
|
|
32
|
+
[/test|测试|run_test|coverage/i, 3000, 3],
|
|
33
|
+
[/research|研究|调研|analyze|分析/i, 6000, 4],
|
|
34
|
+
[/orchestrate|编排|multi-step|多步/i, 12000, 10],
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/** Estimate cost for a single task description. */
|
|
38
|
+
export function estimateTaskCost(description: string): { tokens: number; tools: number; timeSeconds: number } {
|
|
39
|
+
let tokens = 2000; // base
|
|
40
|
+
let tools = 2; // base
|
|
41
|
+
for (const [pattern, t, tc] of TASK_TYPE_PATTERNS) {
|
|
42
|
+
if (pattern.test(description)) { tokens = Math.max(tokens, t); tools = Math.max(tools, tc); }
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Time estimate: ~0.5s per tool call + 2s per 1k tokens
|
|
46
|
+
const timeSeconds = (tokens / 1000) * 2 + tools * 0.5 + 2;
|
|
47
|
+
return { tokens, tools, timeSeconds };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/* ═══════════════════════════════════════
|
|
51
|
+
Task plan cost summary
|
|
52
|
+
═══════════════════════════════════════ */
|
|
53
|
+
export interface PlanEstimate {
|
|
54
|
+
totalTokens: number;
|
|
55
|
+
totalTools: number;
|
|
56
|
+
totalTimeSeconds: number;
|
|
57
|
+
perTask: Array<{ id: string; tokens: number; tools: number; time: number }>;
|
|
58
|
+
warnings: string[];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function estimateTaskPlan(tasks: Task[]): PlanEstimate {
|
|
62
|
+
const perTask: PlanEstimate["perTask"] = [];
|
|
63
|
+
let totalTokens = 500; // system prompt overhead
|
|
64
|
+
let totalTools = 0;
|
|
65
|
+
let totalTime = 5; // init overhead
|
|
66
|
+
const warnings: string[] = [];
|
|
67
|
+
|
|
68
|
+
for (const t of tasks) {
|
|
69
|
+
const est = estimateTaskCost(t.description);
|
|
70
|
+
perTask.push({ id: t.id, tokens: est.tokens, tools: est.tools, time: est.timeSeconds });
|
|
71
|
+
totalTokens += est.tokens;
|
|
72
|
+
totalTools += est.tools;
|
|
73
|
+
totalTime += est.timeSeconds;
|
|
74
|
+
|
|
75
|
+
if (est.timeSeconds > 60) warnings.push(`Task ${t.id} may take >${Math.round(est.timeSeconds)}s`);
|
|
76
|
+
if (est.tools > 10) warnings.push(`Task ${t.id} uses many tool calls (${est.tools})`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (totalTokens > 64000) warnings.push(`Total token estimate (${totalTokens}) exceeds typical context window`);
|
|
80
|
+
if (totalTime > 120) warnings.push(`Estimated total time (${Math.round(totalTime)}s) is significant`);
|
|
81
|
+
if (tasks.length > 6) warnings.push(`Large number of sub-tasks (${tasks.length}) — consider merging simpler ones`);
|
|
82
|
+
|
|
83
|
+
return { totalTokens, totalTools, totalTimeSeconds: Math.round(totalTime), perTask, warnings };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* ═══════════════════════════════════════
|
|
87
|
+
Format estimate for display
|
|
88
|
+
═══════════════════════════════════════ */
|
|
89
|
+
export function formatPlanEstimate(est: PlanEstimate): string {
|
|
90
|
+
const lines: string[] = [
|
|
91
|
+
`## Plan Estimate`,
|
|
92
|
+
`| Task | Tokens | Tools | Time |`,
|
|
93
|
+
`|------|--------|-------|------|`,
|
|
94
|
+
...est.perTask.map(t => `| ${t.id} | ${t.tokens} | ${t.tools} | ${t.time.toFixed(0)}s |`),
|
|
95
|
+
`| **Total** | **${est.totalTokens}** | **${est.totalTools}** | **${est.totalTimeSeconds}s** |`,
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
if (est.warnings.length > 0) {
|
|
99
|
+
lines.push("", "### Warnings");
|
|
100
|
+
for (const w of est.warnings) lines.push(`- ⚠ ${w}`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return lines.join("\n");
|
|
104
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 自我进化模块 — Prompt self-optimization via failure analysis.
|
|
3
|
+
*
|
|
4
|
+
* When an agent repeatedly fails at similar tasks, this module analyzes
|
|
5
|
+
* the failure patterns and suggests targeted improvements to the agent's
|
|
6
|
+
* System Prompt. The agent can then apply these suggestions to improve
|
|
7
|
+
* future performance.
|
|
8
|
+
*
|
|
9
|
+
* Architecture:
|
|
10
|
+
* Failure log → Pattern analysis → Prompt diff → Agent.applyDiff()
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as fs from "fs";
|
|
14
|
+
import * as path from "path";
|
|
15
|
+
import { USER_CONFIG_DIR } from "./config";
|
|
16
|
+
import { getLogger } from "./logger";
|
|
17
|
+
|
|
18
|
+
const log = getLogger("evolve");
|
|
19
|
+
|
|
20
|
+
/* ═══════════════════════════════════════
|
|
21
|
+
Prompt diff — a suggested change
|
|
22
|
+
═══════════════════════════════════════ */
|
|
23
|
+
export interface PromptDiff {
|
|
24
|
+
id: string;
|
|
25
|
+
ts: string;
|
|
26
|
+
agent: string;
|
|
27
|
+
reason: string; // Why this change is needed
|
|
28
|
+
before: string; // Old prompt fragment
|
|
29
|
+
after: string; // New prompt fragment
|
|
30
|
+
applied: boolean;
|
|
31
|
+
improvement?: string; // Measured improvement after applying
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/* ═══════════════════════════════════════
|
|
35
|
+
Failure analysis
|
|
36
|
+
═══════════════════════════════════════ */
|
|
37
|
+
export interface FailureAnalysis {
|
|
38
|
+
agent: string;
|
|
39
|
+
period: string;
|
|
40
|
+
totalCalls: number;
|
|
41
|
+
failureCount: number;
|
|
42
|
+
topFailures: Array<{ pattern: string; count: number }>;
|
|
43
|
+
suggestedDiffs: PromptDiff[];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const evolveDir = path.join(USER_CONFIG_DIR, "evolve");
|
|
47
|
+
function ensureDir() { if (!fs.existsSync(evolveDir)) fs.mkdirSync(evolveDir, { recursive: true }); }
|
|
48
|
+
|
|
49
|
+
/** Analyze recent failures from the learning module and suggest prompt improvements. */
|
|
50
|
+
export function analyzeFailures(
|
|
51
|
+
agent: string,
|
|
52
|
+
experiences: Array<{ pattern: string; solution: string; frequency: number; lastSeen: string }>,
|
|
53
|
+
systemPrompt: string
|
|
54
|
+
): FailureAnalysis {
|
|
55
|
+
const recent = experiences.filter(e => {
|
|
56
|
+
try { return new Date(e.lastSeen).getTime() > Date.now() - 7 * 86400000; }
|
|
57
|
+
catch { return false; }
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const topFailures = recent
|
|
61
|
+
.sort((a, b) => b.frequency - a.frequency)
|
|
62
|
+
.slice(0, 5)
|
|
63
|
+
.map(e => ({ pattern: e.pattern, count: e.frequency }));
|
|
64
|
+
|
|
65
|
+
const suggestedDiffs: PromptDiff[] = [];
|
|
66
|
+
|
|
67
|
+
// Rule-based suggestions from failure patterns
|
|
68
|
+
for (const f of topFailures) {
|
|
69
|
+
const lower = f.pattern.toLowerCase();
|
|
70
|
+
|
|
71
|
+
// Search storm → add search budget rule
|
|
72
|
+
if ((lower.includes("search") || lower.includes("web_search")) && f.count >= 3) {
|
|
73
|
+
const rule = `- 搜索不超过 5 轮。5 轮后直接基于已有信息综合回答。`;
|
|
74
|
+
if (!systemPrompt.includes("搜索不超过")) {
|
|
75
|
+
suggestedDiffs.push({
|
|
76
|
+
id: Math.random().toString(36).slice(2, 8),
|
|
77
|
+
ts: new Date().toISOString(), agent,
|
|
78
|
+
reason: `搜索风暴 (${f.count} 次重复搜索)`,
|
|
79
|
+
before: "", after: rule, applied: false,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Empty response → add deliverable checklist
|
|
85
|
+
if ((lower.includes("empty") || lower.includes("placeholder") || lower.includes("完成了")) && f.count >= 2) {
|
|
86
|
+
const rule = `- 完成任务后,必须输出实际产物(代码/文件路径/数据),禁止只说"完成了"而无产出。`;
|
|
87
|
+
if (!systemPrompt.includes("必须输出实际产物")) {
|
|
88
|
+
suggestedDiffs.push({
|
|
89
|
+
id: Math.random().toString(36).slice(2, 8),
|
|
90
|
+
ts: new Date().toISOString(), agent,
|
|
91
|
+
reason: `空响应/占位 (${f.count} 次)`,
|
|
92
|
+
before: "", after: rule, applied: false,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Tool not found → add tool discovery to prompt
|
|
98
|
+
if (lower.includes("does not exist") || lower.includes("tool") && lower.includes("not found")) {
|
|
99
|
+
const rule = `- 使用不熟悉的工具前先调 list_skills 查看可用工具列表。`;
|
|
100
|
+
if (!systemPrompt.includes("list_skills")) {
|
|
101
|
+
suggestedDiffs.push({
|
|
102
|
+
id: Math.random().toString(36).slice(2, 8),
|
|
103
|
+
ts: new Date().toISOString(), agent,
|
|
104
|
+
reason: `工具不存在 (${f.count} 次)`,
|
|
105
|
+
before: "", after: rule, applied: false,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// File not found → add path verification rule
|
|
111
|
+
if (lower.includes("file not found") || lower.includes("directory not found")) {
|
|
112
|
+
const rule = `- 文件操作前先用 list_directory 或 read_file 确认路径存在。`;
|
|
113
|
+
if (!systemPrompt.includes("确认路径存在")) {
|
|
114
|
+
suggestedDiffs.push({
|
|
115
|
+
id: Math.random().toString(36).slice(2, 8),
|
|
116
|
+
ts: new Date().toISOString(), agent,
|
|
117
|
+
reason: `文件路径错误 (${f.count} 次)`,
|
|
118
|
+
before: "", after: rule, applied: false,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// General rule: if failure rate > 20%, suggest self-review
|
|
125
|
+
const recentExperiences = recent.filter(e => {
|
|
126
|
+
try { return new Date(e.lastSeen).getTime() > Date.now() - 3 * 86400000; }
|
|
127
|
+
catch { return false; }
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Deduplicate suggestions
|
|
131
|
+
const seen = new Set<string>();
|
|
132
|
+
const uniqueDiffs = suggestedDiffs.filter(d => {
|
|
133
|
+
const key = d.after.slice(0, 30);
|
|
134
|
+
if (seen.has(key)) return false;
|
|
135
|
+
seen.add(key); return true;
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// Max 3 suggestions per analysis
|
|
139
|
+
return {
|
|
140
|
+
agent, period: "last 7 days",
|
|
141
|
+
totalCalls: 0, failureCount: 0,
|
|
142
|
+
topFailures,
|
|
143
|
+
suggestedDiffs: uniqueDiffs.slice(0, 3),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/* ═══════════════════════════════════════
|
|
148
|
+
Apply prompt diff to agent
|
|
149
|
+
═══════════════════════════════════════ */
|
|
150
|
+
export function applyPromptDiff(agent: any, diff: PromptDiff): boolean {
|
|
151
|
+
try {
|
|
152
|
+
const currentPrompt = agent.systemPrompt;
|
|
153
|
+
if (!diff.after || currentPrompt.includes(diff.after.slice(0, 20))) return false;
|
|
154
|
+
|
|
155
|
+
// Append the new rule after "## 行为守则" or "## Behavior" section
|
|
156
|
+
const marker = currentPrompt.includes("行为守则") ? "## 行为守则" : "## Behavior";
|
|
157
|
+
const idx = currentPrompt.indexOf(marker);
|
|
158
|
+
if (idx < 0) { agent.systemPrompt += "\n" + diff.after; }
|
|
159
|
+
else {
|
|
160
|
+
const insertPoint = currentPrompt.indexOf("\n", currentPrompt.indexOf("\n-", idx) + 1);
|
|
161
|
+
agent.systemPrompt = currentPrompt.slice(0, insertPoint) + "\n" + diff.after + "\n" + currentPrompt.slice(insertPoint);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
diff.applied = true;
|
|
165
|
+
diff.improvement = "pending evaluation";
|
|
166
|
+
|
|
167
|
+
// Persist the diff
|
|
168
|
+
ensureDir();
|
|
169
|
+
const file = path.join(evolveDir, `${diff.agent}_diffs.jsonl`);
|
|
170
|
+
fs.appendFileSync(file, JSON.stringify(diff) + "\n");
|
|
171
|
+
|
|
172
|
+
agent.rebuildSystemPrompt();
|
|
173
|
+
return true;
|
|
174
|
+
} catch (e) {
|
|
175
|
+
log.warn("apply_prompt_diff_failed", { agent: diff.agent, error: String(e) });
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Get all applied diffs for an agent. */
|
|
181
|
+
export function getAppliedDiffs(agent: string): PromptDiff[] {
|
|
182
|
+
const diffs: PromptDiff[] = [];
|
|
183
|
+
try {
|
|
184
|
+
const file = path.join(evolveDir, `${agent}_diffs.jsonl`);
|
|
185
|
+
if (fs.existsSync(file)) {
|
|
186
|
+
const lines = fs.readFileSync(file, "utf-8").split("\n").filter(Boolean);
|
|
187
|
+
for (const line of lines) { try { diffs.push(JSON.parse(line)); } catch { } }
|
|
188
|
+
}
|
|
189
|
+
} catch { /* ignore */ }
|
|
190
|
+
return diffs;
|
|
191
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 输出过滤模块 — sensitive information sanitization.
|
|
3
|
+
*
|
|
4
|
+
* Before agent responses reach the user (or are persisted),
|
|
5
|
+
* scan for and redact sensitive patterns like API keys,
|
|
6
|
+
* tokens, passwords, PII, and internal paths.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/* ═══════════════════════════════════════
|
|
10
|
+
Detection patterns — compiled once at module load
|
|
11
|
+
═══════════════════════════════════════ */
|
|
12
|
+
const SENSITIVE_PATTERNS: Array<[RegExp, string]> = [
|
|
13
|
+
// API keys & tokens
|
|
14
|
+
[/sk-[a-zA-Z0-9]{32,}/g, "[REDACTED:API_KEY]"],
|
|
15
|
+
[/(?:api_key|apikey|secret_key|access_token|auth_token)\s*[:=]\s*["']?[^\s"']{8,}["']?/gi, "$1: [REDACTED]"],
|
|
16
|
+
[/ghp_[a-zA-Z0-9]{36}/g, "[REDACTED:GITHUB_TOKEN]"],
|
|
17
|
+
[/gho_[a-zA-Z0-9]{36}/g, "[REDACTED:GITHUB_TOKEN]"],
|
|
18
|
+
|
|
19
|
+
// AWS credentials
|
|
20
|
+
[/AKIA[0-9A-Z]{16}/g, "[REDACTED:AWS_KEY]"],
|
|
21
|
+
[/(?:aws_access_key_id|aws_secret_access_key)\s*[:=]\s*["']?[^\s"']+/gi, "$1: [REDACTED]"],
|
|
22
|
+
|
|
23
|
+
// Passwords
|
|
24
|
+
[/(?:password|passwd|pwd)\s*[:=]\s*["']?[^\s"']{4,}["']?/gi, "$1: [REDACTED]"],
|
|
25
|
+
[/(?:密码|口令)\s*[:=]\s*["']?[^\s"']{2,}["']?/g, "$1: [已脱敏]"],
|
|
26
|
+
|
|
27
|
+
// Connection strings
|
|
28
|
+
[/(?:mongodb|postgres|mysql|redis):\/\/[^\s]+/g, "[REDACTED:DB_URI]"],
|
|
29
|
+
[/(?:jdbc|odbc):[^\s]+/g, "[REDACTED:DB_URI]"],
|
|
30
|
+
|
|
31
|
+
// Private keys
|
|
32
|
+
[/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END .*?PRIVATE KEY-----/g, "[REDACTED:PRIVATE_KEY]"],
|
|
33
|
+
|
|
34
|
+
// IP addresses (local only)
|
|
35
|
+
[/192\.168\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
|
|
36
|
+
[/10\.\d{1,3}\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
|
|
37
|
+
[/172\.(1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
|
|
38
|
+
|
|
39
|
+
// File paths
|
|
40
|
+
[/(?:\/etc\/(?:passwd|shadow|hosts|sudoers))/g, "[REDACTED:SYSTEM_PATH]"],
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
/* Email masking (function-based, handled separately) */
|
|
44
|
+
const EMAIL_RE = /([a-zA-Z0-9._%+-]{3,})@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/g;
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
/* ═══════════════════════════════════════
|
|
48
|
+
Filter function
|
|
49
|
+
═══════════════════════════════════════ */
|
|
50
|
+
export interface FilterResult {
|
|
51
|
+
clean: string;
|
|
52
|
+
redacted: boolean;
|
|
53
|
+
count: number;
|
|
54
|
+
details: string[];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function filterOutput(text: string): FilterResult {
|
|
58
|
+
if (!text) return { clean: "", redacted: false, count: 0, details: [] };
|
|
59
|
+
|
|
60
|
+
let clean = text;
|
|
61
|
+
let count = 0;
|
|
62
|
+
const details: string[] = [];
|
|
63
|
+
|
|
64
|
+
// Email masking (function-based replacement)
|
|
65
|
+
let emailCount = 0;
|
|
66
|
+
clean = clean.replace(EMAIL_RE, (full, user, domain) => {
|
|
67
|
+
emailCount++;
|
|
68
|
+
return (user as string).slice(0, 2) + "***@" + (domain as string);
|
|
69
|
+
});
|
|
70
|
+
if (emailCount > 0) {
|
|
71
|
+
count += emailCount;
|
|
72
|
+
details.push(`Masked ${emailCount}x email addresses`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const [pattern, replacement] of SENSITIVE_PATTERNS) {
|
|
76
|
+
const matches = clean.match(pattern);
|
|
77
|
+
if (matches) {
|
|
78
|
+
count += matches.length;
|
|
79
|
+
if (typeof replacement === "string") {
|
|
80
|
+
details.push(`Redacted ${matches.length}x ${pattern.source.slice(0, 30)}`);
|
|
81
|
+
} else {
|
|
82
|
+
details.push(`Masked ${matches.length}x email addresses`);
|
|
83
|
+
}
|
|
84
|
+
clean = clean.replace(pattern, replacement as string);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { clean, redacted: count > 0, count, details };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/* ═══════════════════════════════════════
|
|
92
|
+
Quick check — is filtering needed?
|
|
93
|
+
═══════════════════════════════════════ */
|
|
94
|
+
export function needsFiltering(text: string): boolean {
|
|
95
|
+
if (!text) return false;
|
|
96
|
+
// Quick scan with the most common patterns
|
|
97
|
+
if (/sk-[a-zA-Z0-9]{32,}/.test(text)) return true;
|
|
98
|
+
if (/api_key.*[:=]/.test(text)) return true;
|
|
99
|
+
if (/password.*[:=]/.test(text)) return true;
|
|
100
|
+
if (/-----BEGIN.*PRIVATE KEY-----/.test(text)) return true;
|
|
101
|
+
if (EMAIL_RE.test(text)) return true;
|
|
102
|
+
return false;
|
|
103
|
+
}
|
package/src/core/index.ts
CHANGED
|
@@ -26,6 +26,12 @@ export * from './skill';
|
|
|
26
26
|
export * from './router';
|
|
27
27
|
export * from './agent';
|
|
28
28
|
export * from './factory';
|
|
29
|
+
export * from './security';
|
|
30
|
+
export * from './learn';
|
|
31
|
+
export * from './longdoc';
|
|
32
|
+
export * from './filter';
|
|
33
|
+
export * from './estimate';
|
|
34
|
+
export * from './arbitrate';
|
|
29
35
|
|
|
30
|
-
// Version
|
|
31
|
-
export const VERSION = '1.
|
|
36
|
+
// Version — read from package.json
|
|
37
|
+
export const VERSION = (() => { try { return require('../../package.json').version; } catch { return '1.6.0'; } })();
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 长文档处理策略 — sliding window + summary chain.
|
|
3
|
+
*
|
|
4
|
+
* When an input exceeds the agent's effective context window,
|
|
5
|
+
* split into overlapping chunks, summarize each, then chain
|
|
6
|
+
* summaries into a final digest.
|
|
7
|
+
*
|
|
8
|
+
* Architecture:
|
|
9
|
+
* Input → Chunk(sliding window) → Per-chunk Summary → Chain → Final Digest
|
|
10
|
+
*
|
|
11
|
+
* All summaries are generated by the calling agent's LLM, so quality
|
|
12
|
+
* depends on the model in use. The chunker is pure text processing
|
|
13
|
+
* and works without any LLM call.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import type { BaseAgent } from "./agent";
|
|
17
|
+
|
|
18
|
+
/* ═══════════════════════════════════════
|
|
19
|
+
Chunker — split text into overlapping windows
|
|
20
|
+
═══════════════════════════════════════ */
|
|
21
|
+
export interface ChunkOptions {
|
|
22
|
+
/** Target chunk size in characters (default 6000) */
|
|
23
|
+
chunkSize?: number;
|
|
24
|
+
/** Overlap between consecutive chunks in characters (default 800) */
|
|
25
|
+
overlap?: number;
|
|
26
|
+
/** Minimum chunk size before we stop splitting (default 500) */
|
|
27
|
+
minChunk?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function chunkText(text: string, opts?: ChunkOptions): string[] {
|
|
31
|
+
const cs = opts?.chunkSize ?? 6000;
|
|
32
|
+
const ol = opts?.overlap ?? 800;
|
|
33
|
+
const min = opts?.minChunk ?? 500;
|
|
34
|
+
const chunks: string[] = [];
|
|
35
|
+
|
|
36
|
+
if (text.length <= cs + min) { chunks.push(text); return chunks; }
|
|
37
|
+
|
|
38
|
+
let start = 0;
|
|
39
|
+
while (start < text.length) {
|
|
40
|
+
let end = start + cs;
|
|
41
|
+
if (end >= text.length) { end = text.length; }
|
|
42
|
+
else {
|
|
43
|
+
// Try to break at paragraph boundary
|
|
44
|
+
const searchEnd = Math.min(end + 400, text.length);
|
|
45
|
+
const paraBreak = text.lastIndexOf("\n\n", searchEnd);
|
|
46
|
+
if (paraBreak > start + min) end = paraBreak;
|
|
47
|
+
else {
|
|
48
|
+
const lineBreak = text.lastIndexOf("\n", searchEnd);
|
|
49
|
+
if (lineBreak > start + min) end = lineBreak;
|
|
50
|
+
else {
|
|
51
|
+
const space = text.lastIndexOf(" ", searchEnd);
|
|
52
|
+
if (space > start + min) end = space;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
chunks.push(text.slice(start, end).trim());
|
|
58
|
+
if (end >= text.length) break;
|
|
59
|
+
start = end - ol;
|
|
60
|
+
if (start < 0) start = 0;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return chunks;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/* ═══════════════════════════════════════
|
|
67
|
+
Summary chain — ask agent to summarize chunks then chain
|
|
68
|
+
═══════════════════════════════════════ */
|
|
69
|
+
export interface SummaryOptions {
|
|
70
|
+
/** Max total chars for the final digest (default 3000) */
|
|
71
|
+
maxDigestChars?: number;
|
|
72
|
+
/** Custom summarization prompt for each chunk */
|
|
73
|
+
chunkPrompt?: string;
|
|
74
|
+
/** Custom chain prompt for combining summaries */
|
|
75
|
+
chainPrompt?: string;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const DEFAULT_CHUNK_PROMPT = `Summarize the following text concisely. Keep all key facts, names, numbers, and code snippets. Output the summary directly without preamble. Limit to 300 words.
|
|
79
|
+
|
|
80
|
+
Text:
|
|
81
|
+
{text}`;
|
|
82
|
+
|
|
83
|
+
const DEFAULT_CHAIN_PROMPT = `Combine the following section summaries into a single coherent digest. Preserve all key facts, remove redundancy. Output directly without preamble.
|
|
84
|
+
|
|
85
|
+
{summaries}`;
|
|
86
|
+
|
|
87
|
+
export async function summarizeLongDoc(
|
|
88
|
+
agent: BaseAgent,
|
|
89
|
+
text: string,
|
|
90
|
+
opts?: SummaryOptions
|
|
91
|
+
): Promise<string> {
|
|
92
|
+
const maxDigest = opts?.maxDigestChars ?? 3000;
|
|
93
|
+
const chunks = chunkText(text);
|
|
94
|
+
|
|
95
|
+
// Single chunk — no summarization needed
|
|
96
|
+
if (chunks.length <= 1) {
|
|
97
|
+
if (text.length <= maxDigest) return text;
|
|
98
|
+
const prompt = (opts?.chunkPrompt || DEFAULT_CHUNK_PROMPT).replace("{text}", text);
|
|
99
|
+
return agent.chatOneshot(prompt, { maxTokens: maxDigest });
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Multi-chunk: summarize each, then chain
|
|
103
|
+
const summaries: string[] = [];
|
|
104
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
105
|
+
const prompt = (opts?.chunkPrompt || DEFAULT_CHUNK_PROMPT).replace("{text}", chunks[i]);
|
|
106
|
+
try {
|
|
107
|
+
const s = await agent.chatOneshot(prompt, { maxTokens: 600 });
|
|
108
|
+
summaries.push(s);
|
|
109
|
+
} catch {
|
|
110
|
+
summaries.push(chunks[i].slice(0, 400) + "...");
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Chain summaries
|
|
115
|
+
if (summaries.length === 1) return summaries[0].slice(0, maxDigest);
|
|
116
|
+
|
|
117
|
+
const joined = summaries.map((s, i) => `## Section ${i + 1}\n${s}`).join("\n\n");
|
|
118
|
+
if (joined.length <= maxDigest) return joined;
|
|
119
|
+
|
|
120
|
+
const chainPrompt = (opts?.chainPrompt || DEFAULT_CHAIN_PROMPT).replace("{summaries}", joined);
|
|
121
|
+
const final = await agent.chatOneshot(chainPrompt, { maxTokens: maxDigest });
|
|
122
|
+
return final.slice(0, maxDigest);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/* ═══════════════════════════════════════
|
|
126
|
+
Structured data parsing helpers
|
|
127
|
+
═══════════════════════════════════════ */
|
|
128
|
+
export function parseStructuredInput(input: string): {
|
|
129
|
+
hasTable: boolean; hasJSON: boolean; hasCSV: boolean;
|
|
130
|
+
extractedJSON: string | null; extractedTable: string[][] | null;
|
|
131
|
+
} {
|
|
132
|
+
const result = { hasTable: false, hasJSON: false, hasCSV: false, extractedJSON: null as string | null, extractedTable: null as string[][] | null };
|
|
133
|
+
|
|
134
|
+
// Detect JSON
|
|
135
|
+
const jsonMatch = input.match(/\{[\s\S]*\}|\[[\s\S]*\]/);
|
|
136
|
+
if (jsonMatch) {
|
|
137
|
+
try { JSON.parse(jsonMatch[0]); result.hasJSON = true; result.extractedJSON = jsonMatch[0]; }
|
|
138
|
+
catch { /* not valid JSON */ }
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Detect markdown table
|
|
142
|
+
const tableMatch = input.match(/\|[\s\S]*?\|/);
|
|
143
|
+
if (tableMatch) {
|
|
144
|
+
result.hasTable = true;
|
|
145
|
+
const lines = input.split("\n").filter(l => l.includes("|") && !l.startsWith("|---") && !l.startsWith("| --"));
|
|
146
|
+
result.extractedTable = lines.map(l => l.split("|").filter(c => c.trim()).map(c => c.trim()));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Detect CSV
|
|
150
|
+
if (input.includes(",") && input.split("\n").filter(l => l.includes(",")).length >= 2) {
|
|
151
|
+
result.hasCSV = true;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return result;
|
|
155
|
+
}
|