skyloom 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ /**
2
+ * 输出过滤模块 — sensitive information sanitization.
3
+ *
4
+ * Before agent responses reach the user (or are persisted),
5
+ * scan for and redact sensitive patterns like API keys,
6
+ * tokens, passwords, PII, and internal paths.
7
+ */
8
+
9
+ /* ═══════════════════════════════════════
10
+ Detection patterns — compiled once at module load
11
+ ═══════════════════════════════════════ */
12
+ const SENSITIVE_PATTERNS: Array<[RegExp, string]> = [
13
+ // API keys & tokens
14
+ [/sk-[a-zA-Z0-9]{32,}/g, "[REDACTED:API_KEY]"],
15
+ [/(?:api_key|apikey|secret_key|access_token|auth_token)\s*[:=]\s*["']?[^\s"']{8,}["']?/gi, "$1: [REDACTED]"],
16
+ [/ghp_[a-zA-Z0-9]{36}/g, "[REDACTED:GITHUB_TOKEN]"],
17
+ [/gho_[a-zA-Z0-9]{36}/g, "[REDACTED:GITHUB_TOKEN]"],
18
+
19
+ // AWS credentials
20
+ [/AKIA[0-9A-Z]{16}/g, "[REDACTED:AWS_KEY]"],
21
+ [/(?:aws_access_key_id|aws_secret_access_key)\s*[:=]\s*["']?[^\s"']+/gi, "$1: [REDACTED]"],
22
+
23
+ // Passwords
24
+ [/(?:password|passwd|pwd)\s*[:=]\s*["']?[^\s"']{4,}["']?/gi, "$1: [REDACTED]"],
25
+ [/(?:密码|口令)\s*[:=]\s*["']?[^\s"']{2,}["']?/g, "$1: [已脱敏]"],
26
+
27
+ // Connection strings
28
+ [/(?:mongodb|postgres|mysql|redis):\/\/[^\s]+/g, "[REDACTED:DB_URI]"],
29
+ [/(?:jdbc|odbc):[^\s]+/g, "[REDACTED:DB_URI]"],
30
+
31
+ // Private keys
32
+ [/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END .*?PRIVATE KEY-----/g, "[REDACTED:PRIVATE_KEY]"],
33
+
34
+ // IP addresses (local only)
35
+ [/192\.168\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
36
+ [/10\.\d{1,3}\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
37
+ [/172\.(1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}/g, "[REDACTED:LAN_IP]"],
38
+
39
+ // File paths
40
+ [/(?:\/etc\/(?:passwd|shadow|hosts|sudoers))/g, "[REDACTED:SYSTEM_PATH]"],
41
+ ];
42
+
43
+ /* Email masking (function-based, handled separately) */
44
+ const EMAIL_RE = /([a-zA-Z0-9._%+-]{3,})@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/g;
45
+
46
+
47
+ /* ═══════════════════════════════════════
48
+ Filter function
49
+ ═══════════════════════════════════════ */
50
+ export interface FilterResult {
51
+ clean: string;
52
+ redacted: boolean;
53
+ count: number;
54
+ details: string[];
55
+ }
56
+
57
+ export function filterOutput(text: string): FilterResult {
58
+ if (!text) return { clean: "", redacted: false, count: 0, details: [] };
59
+
60
+ let clean = text;
61
+ let count = 0;
62
+ const details: string[] = [];
63
+
64
+ // Email masking (function-based replacement)
65
+ let emailCount = 0;
66
+ clean = clean.replace(EMAIL_RE, (full, user, domain) => {
67
+ emailCount++;
68
+ return (user as string).slice(0, 2) + "***@" + (domain as string);
69
+ });
70
+ if (emailCount > 0) {
71
+ count += emailCount;
72
+ details.push(`Masked ${emailCount}x email addresses`);
73
+ }
74
+
75
+ for (const [pattern, replacement] of SENSITIVE_PATTERNS) {
76
+ const matches = clean.match(pattern);
77
+ if (matches) {
78
+ count += matches.length;
79
+ if (typeof replacement === "string") {
80
+ details.push(`Redacted ${matches.length}x ${pattern.source.slice(0, 30)}`);
81
+ } else {
82
+ details.push(`Masked ${matches.length}x email addresses`);
83
+ }
84
+ clean = clean.replace(pattern, replacement as string);
85
+ }
86
+ }
87
+
88
+ return { clean, redacted: count > 0, count, details };
89
+ }
90
+
91
+ /* ═══════════════════════════════════════
92
+ Quick check — is filtering needed?
93
+ ═══════════════════════════════════════ */
94
+ export function needsFiltering(text: string): boolean {
95
+ if (!text) return false;
96
+ // Quick scan with the most common patterns
97
+ if (/sk-[a-zA-Z0-9]{32,}/.test(text)) return true;
98
+ if (/api_key.*[:=]/.test(text)) return true;
99
+ if (/password.*[:=]/.test(text)) return true;
100
+ if (/-----BEGIN.*PRIVATE KEY-----/.test(text)) return true;
101
+ if (EMAIL_RE.test(text)) return true;
102
+ return false;
103
+ }
package/src/core/index.ts CHANGED
@@ -26,6 +26,12 @@ export * from './skill';
26
26
  export * from './router';
27
27
  export * from './agent';
28
28
  export * from './factory';
29
+ export * from './security';
30
+ export * from './learn';
31
+ export * from './longdoc';
32
+ export * from './filter';
33
+ export * from './estimate';
34
+ export * from './arbitrate';
29
35
 
30
- // Version
31
- export const VERSION = '1.4.0';
36
+ // Version — read from package.json
37
+ export const VERSION = (() => { try { return require('../../package.json').version; } catch { return '1.6.0'; } })();
@@ -0,0 +1,155 @@
1
+ /**
2
+ * 长文档处理策略 — sliding window + summary chain.
3
+ *
4
+ * When an input exceeds the agent's effective context window,
5
+ * split into overlapping chunks, summarize each, then chain
6
+ * summaries into a final digest.
7
+ *
8
+ * Architecture:
9
+ * Input → Chunk(sliding window) → Per-chunk Summary → Chain → Final Digest
10
+ *
11
+ * All summaries are generated by the calling agent's LLM, so quality
12
+ * depends on the model in use. The chunker is pure text processing
13
+ * and works without any LLM call.
14
+ */
15
+
16
+ import type { BaseAgent } from "./agent";
17
+
18
+ /* ═══════════════════════════════════════
19
+ Chunker — split text into overlapping windows
20
+ ═══════════════════════════════════════ */
21
+ export interface ChunkOptions {
22
+ /** Target chunk size in characters (default 6000) */
23
+ chunkSize?: number;
24
+ /** Overlap between consecutive chunks in characters (default 800) */
25
+ overlap?: number;
26
+ /** Minimum chunk size before we stop splitting (default 500) */
27
+ minChunk?: number;
28
+ }
29
+
30
+ export function chunkText(text: string, opts?: ChunkOptions): string[] {
31
+ const cs = opts?.chunkSize ?? 6000;
32
+ const ol = opts?.overlap ?? 800;
33
+ const min = opts?.minChunk ?? 500;
34
+ const chunks: string[] = [];
35
+
36
+ if (text.length <= cs + min) { chunks.push(text); return chunks; }
37
+
38
+ let start = 0;
39
+ while (start < text.length) {
40
+ let end = start + cs;
41
+ if (end >= text.length) { end = text.length; }
42
+ else {
43
+ // Try to break at paragraph boundary
44
+ const searchEnd = Math.min(end + 400, text.length);
45
+ const paraBreak = text.lastIndexOf("\n\n", searchEnd);
46
+ if (paraBreak > start + min) end = paraBreak;
47
+ else {
48
+ const lineBreak = text.lastIndexOf("\n", searchEnd);
49
+ if (lineBreak > start + min) end = lineBreak;
50
+ else {
51
+ const space = text.lastIndexOf(" ", searchEnd);
52
+ if (space > start + min) end = space;
53
+ }
54
+ }
55
+ }
56
+
57
+ chunks.push(text.slice(start, end).trim());
58
+ if (end >= text.length) break;
59
+ start = end - ol;
60
+ if (start < 0) start = 0;
61
+ }
62
+
63
+ return chunks;
64
+ }
65
+
66
+ /* ═══════════════════════════════════════
67
+ Summary chain — ask agent to summarize chunks then chain
68
+ ═══════════════════════════════════════ */
69
+ export interface SummaryOptions {
70
+ /** Max total chars for the final digest (default 3000) */
71
+ maxDigestChars?: number;
72
+ /** Custom summarization prompt for each chunk */
73
+ chunkPrompt?: string;
74
+ /** Custom chain prompt for combining summaries */
75
+ chainPrompt?: string;
76
+ }
77
+
78
+ const DEFAULT_CHUNK_PROMPT = `Summarize the following text concisely. Keep all key facts, names, numbers, and code snippets. Output the summary directly without preamble. Limit to 300 words.
79
+
80
+ Text:
81
+ {text}`;
82
+
83
+ const DEFAULT_CHAIN_PROMPT = `Combine the following section summaries into a single coherent digest. Preserve all key facts, remove redundancy. Output directly without preamble.
84
+
85
+ {summaries}`;
86
+
87
+ export async function summarizeLongDoc(
88
+ agent: BaseAgent,
89
+ text: string,
90
+ opts?: SummaryOptions
91
+ ): Promise<string> {
92
+ const maxDigest = opts?.maxDigestChars ?? 3000;
93
+ const chunks = chunkText(text);
94
+
95
+ // Single chunk — no summarization needed
96
+ if (chunks.length <= 1) {
97
+ if (text.length <= maxDigest) return text;
98
+ const prompt = (opts?.chunkPrompt || DEFAULT_CHUNK_PROMPT).replace("{text}", text);
99
+ return agent.chatOneshot(prompt, { maxTokens: maxDigest });
100
+ }
101
+
102
+ // Multi-chunk: summarize each, then chain
103
+ const summaries: string[] = [];
104
+ for (let i = 0; i < chunks.length; i++) {
105
+ const prompt = (opts?.chunkPrompt || DEFAULT_CHUNK_PROMPT).replace("{text}", chunks[i]);
106
+ try {
107
+ const s = await agent.chatOneshot(prompt, { maxTokens: 600 });
108
+ summaries.push(s);
109
+ } catch {
110
+ summaries.push(chunks[i].slice(0, 400) + "...");
111
+ }
112
+ }
113
+
114
+ // Chain summaries
115
+ if (summaries.length === 1) return summaries[0].slice(0, maxDigest);
116
+
117
+ const joined = summaries.map((s, i) => `## Section ${i + 1}\n${s}`).join("\n\n");
118
+ if (joined.length <= maxDigest) return joined;
119
+
120
+ const chainPrompt = (opts?.chainPrompt || DEFAULT_CHAIN_PROMPT).replace("{summaries}", joined);
121
+ const final = await agent.chatOneshot(chainPrompt, { maxTokens: maxDigest });
122
+ return final.slice(0, maxDigest);
123
+ }
124
+
125
+ /* ═══════════════════════════════════════
126
+ Structured data parsing helpers
127
+ ═══════════════════════════════════════ */
128
+ export function parseStructuredInput(input: string): {
129
+ hasTable: boolean; hasJSON: boolean; hasCSV: boolean;
130
+ extractedJSON: string | null; extractedTable: string[][] | null;
131
+ } {
132
+ const result = { hasTable: false, hasJSON: false, hasCSV: false, extractedJSON: null as string | null, extractedTable: null as string[][] | null };
133
+
134
+ // Detect JSON
135
+ const jsonMatch = input.match(/\{[\s\S]*\}|\[[\s\S]*\]/);
136
+ if (jsonMatch) {
137
+ try { JSON.parse(jsonMatch[0]); result.hasJSON = true; result.extractedJSON = jsonMatch[0]; }
138
+ catch { /* not valid JSON */ }
139
+ }
140
+
141
+ // Detect markdown table
142
+ const tableMatch = input.match(/\|[\s\S]*?\|/);
143
+ if (tableMatch) {
144
+ result.hasTable = true;
145
+ const lines = input.split("\n").filter(l => l.includes("|") && !l.startsWith("|---") && !l.startsWith("| --"));
146
+ result.extractedTable = lines.map(l => l.split("|").filter(c => c.trim()).map(c => c.trim()));
147
+ }
148
+
149
+ // Detect CSV
150
+ if (input.includes(",") && input.split("\n").filter(l => l.includes(",")).length >= 2) {
151
+ result.hasCSV = true;
152
+ }
153
+
154
+ return result;
155
+ }