skyloom 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/skills/api_integrator/SKILL.md +15 -0
- package/config/skills/arch_designer/SKILL.md +13 -0
- package/config/skills/ci_cd_manager/SKILL.md +14 -0
- package/config/skills/code_analysis/SKILL.md +13 -0
- package/config/skills/code_generator/SKILL.md +12 -0
- package/config/skills/code_reviewer/SKILL.md +13 -0
- package/config/skills/content_writer/SKILL.md +14 -0
- package/config/skills/data_transformer/SKILL.md +15 -0
- package/config/skills/document_analysis/SKILL.md +13 -0
- package/config/skills/emotional_companion/SKILL.md +15 -0
- package/config/skills/performance_checker/SKILL.md +14 -0
- package/config/skills/security_auditor/SKILL.md +14 -0
- package/config/skills/self_evolve/SKILL.md +13 -0
- package/config/skills/sys_operator/SKILL.md +15 -0
- package/config/skills/task_planner/SKILL.md +14 -0
- package/config/skills/web_research/SKILL.md +14 -0
- package/config/skills/workflow_designer/SKILL.md +13 -0
- package/dist/cli/main.js +96 -7
- package/dist/cli/main.js.map +1 -1
- package/dist/core/graph.d.ts +49 -0
- package/dist/core/graph.d.ts.map +1 -0
- package/dist/core/graph.js +182 -0
- package/dist/core/graph.js.map +1 -0
- package/dist/core/llm.d.ts.map +1 -1
- package/dist/core/llm.js +19 -4
- package/dist/core/llm.js.map +1 -1
- package/dist/core/vector.d.ts +43 -0
- package/dist/core/vector.d.ts.map +1 -0
- package/dist/core/vector.js +150 -0
- package/dist/core/vector.js.map +1 -0
- package/package.json +1 -1
- package/src/cli/main.ts +61 -6
- package/src/core/graph.ts +156 -0
- package/src/core/llm.ts +17 -3
- package/src/core/vector.ts +152 -0
package/src/cli/main.ts
CHANGED
|
@@ -77,6 +77,12 @@ program.command("web").option("-p,--port <p>", "port", "3000")
|
|
|
77
77
|
program.command("mcp").action(() => { import("../core/mcp_server").then(m => m.startMCPServer()); });
|
|
78
78
|
program.command("config").action(() => { const c = loadConfig(); process.stdout.write(chalk.cyan("\nConfig: ") + USER_CONFIG_DIR + "\n"); for (const [n, a] of Object.entries(c.agents || {})) process.stdout.write(` ${chalk.bold(n)}: ${(a as any).model || "default"}\n`); });
|
|
79
79
|
program.command("init").action(() => { if (!fs.existsSync(USER_CONFIG_DIR)) fs.mkdirSync(USER_CONFIG_DIR, { recursive: true }); process.stdout.write(chalk.green("✓ ") + USER_CONFIG_DIR + "\n"); });
|
|
80
|
+
program.command("apikey").description("Manage API keys (persisted to ~/.skyloom/config.yaml)")
|
|
81
|
+
.argument("[action]", "set|list").argument("[provider]", "e.g. deepseek").argument("[key]", "API key")
|
|
82
|
+
.action((action?: string, provider?: string, key?: string) => {
|
|
83
|
+
if (action === "set" && provider && key) { saveApiKey(provider, key); process.stdout.write(chalk.green("✓ Saved " + provider + " API key\n")); }
|
|
84
|
+
else { process.stdout.write(chalk.dim("Usage: sky apikey set deepseek YOUR_KEY\n")); }
|
|
85
|
+
});
|
|
80
86
|
program.command("version").action(() => { process.stdout.write(`Skyloom v${VERSION}\n`); });
|
|
81
87
|
|
|
82
88
|
/* ═══════════════════════════════════════
|
|
@@ -143,20 +149,45 @@ function render(text: string): string[] {
|
|
|
143
149
|
/* ═══════════════════════════════════════
|
|
144
150
|
Chat loop
|
|
145
151
|
═══════════════════════════════════════ */
|
|
146
|
-
/*
|
|
152
|
+
/* API key persistence — read from config file too */
|
|
147
153
|
function checkApiKeys(): string | null {
|
|
148
|
-
|
|
149
|
-
|
|
154
|
+
// Check env vars
|
|
155
|
+
const envKeys = ["DEEPSEEK_API_KEY","OPENAI_API_KEY","ANTHROPIC_API_KEY","GROQ_API_KEY","OPENROUTER_API_KEY"];
|
|
156
|
+
for (const k of envKeys) { if (process.env[k]) return "env:" + k; }
|
|
157
|
+
// Check config file
|
|
158
|
+
try {
|
|
159
|
+
const path = require("path"); const fs = require("fs"); const yaml = require("yaml");
|
|
160
|
+
const cfgPath = path.join(require("os").homedir(), ".skyloom", "config.yaml");
|
|
161
|
+
if (fs.existsSync(cfgPath)) {
|
|
162
|
+
const cfg = yaml.parse(fs.readFileSync(cfgPath, "utf-8")) || {};
|
|
163
|
+
const keys = cfg.api_keys || {};
|
|
164
|
+
for (const [p, k] of Object.entries(keys)) { if (k) return "cfg:" + p; }
|
|
165
|
+
}
|
|
166
|
+
} catch { /* ignore */ }
|
|
150
167
|
return null;
|
|
151
168
|
}
|
|
152
169
|
|
|
170
|
+
/** Save API key to config file */
|
|
171
|
+
function saveApiKey(provider: string, key: string): void {
|
|
172
|
+
const path = require("path"); const fs = require("fs"); const yaml = require("yaml");
|
|
173
|
+
const cfgPath = path.join(require("os").homedir(), ".skyloom", "config.yaml");
|
|
174
|
+
const dir = path.dirname(cfgPath);
|
|
175
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
176
|
+
let cfg: any = {};
|
|
177
|
+
if (fs.existsSync(cfgPath)) { try { cfg = yaml.parse(fs.readFileSync(cfgPath, "utf-8")) || {}; } catch { } }
|
|
178
|
+
if (!cfg.api_keys) cfg.api_keys = {};
|
|
179
|
+
cfg.api_keys[provider] = key;
|
|
180
|
+
fs.writeFileSync(cfgPath, yaml.stringify(cfg), "utf-8");
|
|
181
|
+
}
|
|
182
|
+
|
|
153
183
|
async function chat(agentName: string, modelOverride?: string): Promise<void> {
|
|
154
184
|
const haveKey = checkApiKeys();
|
|
155
185
|
if (!haveKey) {
|
|
156
186
|
process.stdout.write("\n" + chalk.yellow(" ⚠ No API key configured.\n"));
|
|
157
|
-
process.stdout.write(chalk.dim("
|
|
158
|
-
process.stdout.write(chalk.dim("
|
|
159
|
-
process.stdout.write(chalk.dim("
|
|
187
|
+
process.stdout.write(chalk.dim(" Quick setup:\n"));
|
|
188
|
+
process.stdout.write(chalk.dim(" sky apikey set deepseek sk-your-key-here\n"));
|
|
189
|
+
process.stdout.write(chalk.dim(" Or env var:\n"));
|
|
190
|
+
process.stdout.write(chalk.dim(" $env:DEEPSEEK_API_KEY = \"sk-your-key\"\n\n"));
|
|
160
191
|
process.exit(1);
|
|
161
192
|
}
|
|
162
193
|
|
|
@@ -164,6 +195,22 @@ async function chat(agentName: string, modelOverride?: string): Promise<void> {
|
|
|
164
195
|
let agent = ctx.agentMap.get(agentName);
|
|
165
196
|
if (!agent) { process.stdout.write(chalk.red("Unknown agent: " + agentName) + "\n"); return; }
|
|
166
197
|
await agent.init();
|
|
198
|
+
|
|
199
|
+
// Wire up security approval — prompt user for HIGH/CRITICAL operations
|
|
200
|
+
try {
|
|
201
|
+
const { getSecurity, DangerLevel } = require("../core/security");
|
|
202
|
+
const sec = getSecurity();
|
|
203
|
+
sec.setApprovalCallback(async (tool: string, args: Record<string, any>, level: number) => {
|
|
204
|
+
process.stdout.write(chalk.yellow(`\n ⚠ ${tool} ( danger level ${level} )\n`));
|
|
205
|
+
process.stdout.write(chalk.dim(` args: ${JSON.stringify(args).slice(0, 80)}\n`));
|
|
206
|
+
const answer = await new Promise<string>(resolve => {
|
|
207
|
+
const rl2 = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
208
|
+
rl2.question(chalk.red(" Approve? [y/N] "), (a: string) => { rl2.close(); resolve(a.trim().toLowerCase()); });
|
|
209
|
+
});
|
|
210
|
+
return answer === "y" || answer === "yes";
|
|
211
|
+
});
|
|
212
|
+
} catch { /* security module optional */ }
|
|
213
|
+
|
|
167
214
|
// eslint-disable-next-line prefer-const
|
|
168
215
|
let currentAgent = agent; // mutable for agent switching
|
|
169
216
|
welcome(agent);
|
|
@@ -192,6 +239,14 @@ async function chat(agentName: string, modelOverride?: string): Promise<void> {
|
|
|
192
239
|
if (cmdL === "/compact") { const r = await currentAgent.compact(); process.stdout.write(chalk.green(" ✓ " + r + "\n\n")); ask(); return; }
|
|
193
240
|
if (cmdL === "/version") { process.stdout.write(" Skyloom v" + VERSION + "\n"); ask(); return; }
|
|
194
241
|
if (cmdL.startsWith("/task ")) { const g = inp.slice(6); process.stdout.write(chalk.cyan("\n ✦ " + g + "\n\n")); await runTask(g); ask(); return; }
|
|
242
|
+
if (cmdL.startsWith("/apikey set ")) { const parts = inp.split(/\s+/); if (parts.length >= 4) { saveApiKey(parts[2], parts[3]); process.stdout.write(chalk.green(" ✓ Saved " + parts[2] + " API key to ~/.skyloom/config.yaml\n\n")); } else { process.stdout.write(chalk.yellow(" Usage: /apikey set <provider> <key>\n\n")); } ask(); return; }
|
|
243
|
+
if (cmdL === "/apikey") {
|
|
244
|
+
const providers = ["openai","deepseek","anthropic","groq","openrouter"];
|
|
245
|
+
process.stdout.write(chalk.bold("\n API Keys:\n"));
|
|
246
|
+
for (const p of providers) { const envVar = p.toUpperCase() + "_API_KEY"; const hasEnv = !!process.env[envVar]; process.stdout.write(chalk.dim(" " + p.padEnd(14) + (hasEnv ? chalk.green("env") : chalk.dim("—")) + "\n")); }
|
|
247
|
+
process.stdout.write(chalk.dim("\n Save: /apikey set <provider> <key>\n\n"));
|
|
248
|
+
ask(); return;
|
|
249
|
+
}
|
|
195
250
|
if (inp.startsWith("/")) { process.stdout.write(helpText()); ask(); return; }
|
|
196
251
|
|
|
197
252
|
// ── Chat ──
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 简易知识图谱 — entity-relation storage in SQLite.
|
|
3
|
+
*
|
|
4
|
+
* Lightweight triple store: (subject, predicate, object) with metadata.
|
|
5
|
+
* Used for: project info, tool preferences, dependency relationships.
|
|
6
|
+
*
|
|
7
|
+
* Schema:
|
|
8
|
+
* CREATE TABLE triples (subj, pred, obj, agent, ts, meta)
|
|
9
|
+
*
|
|
10
|
+
* Queries:
|
|
11
|
+
* - Find all relations for an entity
|
|
12
|
+
* - Find all entities matching a predicate
|
|
13
|
+
* - Transitive closure (2-hop max for performance)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import * as fs from "fs";
|
|
17
|
+
import * as path from "path";
|
|
18
|
+
import { USER_CONFIG_DIR } from "./config";
|
|
19
|
+
import { getLogger } from "./logger";
|
|
20
|
+
|
|
21
|
+
const log = getLogger("graph");
|
|
22
|
+
|
|
23
|
+
/* ═══════════════════════════════════════
|
|
24
|
+
Triple store — in-memory + optional persistence
|
|
25
|
+
═══════════════════════════════════════ */
|
|
26
|
+
interface Triple {
|
|
27
|
+
subj: string;
|
|
28
|
+
pred: string;
|
|
29
|
+
obj: string;
|
|
30
|
+
agent: string;
|
|
31
|
+
ts: string;
|
|
32
|
+
meta?: Record<string, string>;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class KnowledgeGraph {
|
|
36
|
+
private triples: Triple[] = [];
|
|
37
|
+
private indexPath: string;
|
|
38
|
+
|
|
39
|
+
constructor(name: string = "default") {
|
|
40
|
+
this.indexPath = path.join(USER_CONFIG_DIR, `kg_${name}.json`);
|
|
41
|
+
this.load();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Add a fact: (subject, predicate, object). */
|
|
45
|
+
add(subj: string, pred: string, obj: string, agent: string = "system", meta?: Record<string, string>): void {
|
|
46
|
+
// Deduplicate
|
|
47
|
+
const exists = this.triples.find(t => t.subj === subj && t.pred === pred && t.obj === obj);
|
|
48
|
+
if (exists) { exists.ts = new Date().toISOString(); if (meta) exists.meta = { ...exists.meta, ...meta }; return; }
|
|
49
|
+
|
|
50
|
+
this.triples.push({ subj, pred, obj, agent, ts: new Date().toISOString(), meta });
|
|
51
|
+
if (this.triples.length > 5000) this.triples.splice(0, this.triples.length - 5000);
|
|
52
|
+
this.save();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Find all facts about an entity. */
|
|
56
|
+
about(entity: string, limit: number = 20): Triple[] {
|
|
57
|
+
return this.triples.filter(t => t.subj === entity || t.obj === entity).slice(-limit);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Find all subjects matching a predicate. */
|
|
61
|
+
byPredicate(pred: string): Triple[] {
|
|
62
|
+
return this.triples.filter(t => t.pred === pred);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Find all objects for a subject-predicate pair. */
|
|
66
|
+
find(subj: string, pred: string): Triple[] {
|
|
67
|
+
return this.triples.filter(t => t.subj === subj && t.pred === pred);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Transitive expansion: 2-hop from a starting entity. */
|
|
71
|
+
expand(entity: string, maxDepth: number = 2): Triple[] {
|
|
72
|
+
const seen = new Set<Triple>();
|
|
73
|
+
const queue = [entity];
|
|
74
|
+
for (let depth = 0; depth < maxDepth && queue.length > 0; depth++) {
|
|
75
|
+
const current = queue.shift()!;
|
|
76
|
+
const facts = this.about(current, 10);
|
|
77
|
+
for (const f of facts) {
|
|
78
|
+
if (seen.has(f)) continue;
|
|
79
|
+
seen.add(f);
|
|
80
|
+
if (f.subj === current && !queue.includes(f.obj)) queue.push(f.obj);
|
|
81
|
+
if (f.obj === current && !queue.includes(f.subj)) queue.push(f.subj);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return Array.from(seen);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Remove a fact. */
|
|
88
|
+
remove(subj: string, pred: string, obj: string): void {
|
|
89
|
+
this.triples = this.triples.filter(t => !(t.subj === subj && t.pred === pred && t.obj === obj));
|
|
90
|
+
this.save();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Search for entities or predicates containing a keyword. */
|
|
94
|
+
search(keyword: string, limit: number = 15): Triple[] {
|
|
95
|
+
const k = keyword.toLowerCase();
|
|
96
|
+
return this.triples.filter(t => t.subj.toLowerCase().includes(k) || t.pred.toLowerCase().includes(k) || t.obj.toLowerCase().includes(k)).slice(-limit);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Format facts as readable text. */
|
|
100
|
+
format(entity?: string): string {
|
|
101
|
+
const facts = entity ? this.about(entity) : this.triples.slice(-30);
|
|
102
|
+
if (facts.length === 0) return "(no facts)";
|
|
103
|
+
const bySubj = new Map<string, string[]>();
|
|
104
|
+
for (const f of facts) {
|
|
105
|
+
if (!bySubj.has(f.subj)) bySubj.set(f.subj, []);
|
|
106
|
+
bySubj.get(f.subj)!.push(`${f.pred} → ${f.obj}`);
|
|
107
|
+
}
|
|
108
|
+
const lines: string[] = [];
|
|
109
|
+
for (const [subj, preds] of bySubj) {
|
|
110
|
+
lines.push(`**${subj}**: ${preds.join(", ")}`);
|
|
111
|
+
}
|
|
112
|
+
return lines.join("\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
get size(): number { return this.triples.length; }
|
|
116
|
+
|
|
117
|
+
private save(): void {
|
|
118
|
+
try {
|
|
119
|
+
const dir = path.dirname(this.indexPath);
|
|
120
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
121
|
+
fs.writeFileSync(this.indexPath, JSON.stringify(this.triples.slice(-2000)), "utf-8");
|
|
122
|
+
} catch (e) { log.warn("kg_save_failed", { error: String(e) }); }
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
private load(): void {
|
|
126
|
+
try {
|
|
127
|
+
if (fs.existsSync(this.indexPath)) {
|
|
128
|
+
this.triples = JSON.parse(fs.readFileSync(this.indexPath, "utf-8"));
|
|
129
|
+
}
|
|
130
|
+
} catch { this.triples = []; }
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/* ── Auto-extract facts from conversation ── */
|
|
135
|
+
const RELATION_PATTERNS: Array<[RegExp, string]> = [
|
|
136
|
+
[/(\w+) (?:是|为|属于|使用|用|用到了|采用) (.+?)(?:[。,,.\n]|$)/g, "is"],
|
|
137
|
+
[/(\w+) (?:版本|version|v) (?:是|为)? ?(\d[\d.]*)/gi, "version"],
|
|
138
|
+
[/(\w+) (?:depends|依赖|需要|requires) (\w+)/gi, "depends_on"],
|
|
139
|
+
[/(\w+) (?:config|配置) (?:为|是)? (.+?)(?:[。,,.\n]|$)/gi, "config"],
|
|
140
|
+
[/(\w+) (?:file|path|文件|路径) (?:在|为|at) (.+?)(?:[。,,.\n]|$)/gi, "located_at"],
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
export function extractFacts(text: string, agent: string): Array<[string, string, string]> {
|
|
144
|
+
const facts: Array<[string, string, string]> = [];
|
|
145
|
+
for (const [pattern, pred] of RELATION_PATTERNS) {
|
|
146
|
+
let match;
|
|
147
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
148
|
+
const subj = match[1].trim().toLowerCase();
|
|
149
|
+
const obj = match[2].trim();
|
|
150
|
+
if (subj.length >= 2 && obj.length >= 2 && subj !== obj) {
|
|
151
|
+
facts.push([subj, pred, obj]);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return facts;
|
|
156
|
+
}
|
package/src/core/llm.ts
CHANGED
|
@@ -756,9 +756,23 @@ export class LLMClient {
|
|
|
756
756
|
else { const l = model.toLowerCase(); if (l.includes("claude")) provider = "anthropic"; else if (l.includes("deepseek")) provider = "deepseek"; else if (l.includes("groq")) provider = "groq"; else if (l.includes("openrouter")) provider = "openrouter"; else if (l.includes("gemini")) provider = "gemini"; }
|
|
757
757
|
const envMap = getProviderEnvMap();
|
|
758
758
|
const envVar = envMap.get(provider) || (provider.toUpperCase() + "_API_KEY");
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
759
|
+
|
|
760
|
+
// 1. Check environment variable first
|
|
761
|
+
let key = process.env[envVar];
|
|
762
|
+
if (key) return key;
|
|
763
|
+
|
|
764
|
+
// 2. Check config file (~/.skyloom/config.yaml)
|
|
765
|
+
try {
|
|
766
|
+
const fs = require("fs"); const path = require("path"); const yaml = require("yaml");
|
|
767
|
+
const cfgPath = path.join(require("os").homedir(), ".skyloom", "config.yaml");
|
|
768
|
+
if (fs.existsSync(cfgPath)) {
|
|
769
|
+
const cfg = yaml.parse(fs.readFileSync(cfgPath, "utf-8")) || {};
|
|
770
|
+
const keys = cfg.api_keys || {};
|
|
771
|
+
if (keys[provider]) return keys[provider];
|
|
772
|
+
}
|
|
773
|
+
} catch { /* ignore */ }
|
|
774
|
+
|
|
775
|
+
throw new Error("Missing " + envVar + ". Run: sky apikey set " + provider + " YOUR_KEY");
|
|
762
776
|
}
|
|
763
777
|
|
|
764
778
|
private getBaseUrl(model: string): string {
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 向量语义搜索 — TF-IDF + Cosine similarity, zero dependencies.
|
|
3
|
+
*
|
|
4
|
+
* Replaces n-gram Jaccard as the default semantic scorer.
|
|
5
|
+
* - IDF pre-computed from document corpus
|
|
6
|
+
* - Cosine similarity on TF-IDF vectors
|
|
7
|
+
* - CJK-aware tokenization (bigram for CJK, whitespace for ASCII)
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* const idx = new VectorIndex();
|
|
11
|
+
* idx.addDocuments(docs);
|
|
12
|
+
* const results = idx.search("deploy script", 5);
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/* ═══════════════════════════════════════
|
|
16
|
+
Tokenizer — CJK-aware
|
|
17
|
+
═══════════════════════════════════════ */
|
|
18
|
+
const CJK = /[一-鿿-ゟ가-]/;
|
|
19
|
+
|
|
20
|
+
function tokenize(text: string): string[] {
|
|
21
|
+
const tokens: string[] = [];
|
|
22
|
+
let i = 0;
|
|
23
|
+
while (i < text.length) {
|
|
24
|
+
if (CJK.test(text[i])) {
|
|
25
|
+
if (i + 1 < text.length && CJK.test(text[i + 1])) {
|
|
26
|
+
tokens.push(text.slice(i, i + 2)); i += 2;
|
|
27
|
+
} else {
|
|
28
|
+
tokens.push(text[i]); i++;
|
|
29
|
+
}
|
|
30
|
+
} else if (/[A-Za-z0-9_]/.test(text[i])) {
|
|
31
|
+
let j = i;
|
|
32
|
+
while (j < text.length && /[A-Za-z0-9_]/.test(text[j])) j++;
|
|
33
|
+
tokens.push(text.slice(i, j).toLowerCase()); i = j;
|
|
34
|
+
} else {
|
|
35
|
+
i++;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return tokens;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/* ═══════════════════════════════════════
|
|
42
|
+
TF-IDF Vector computation
|
|
43
|
+
═══════════════════════════════════════ */
|
|
44
|
+
interface DocVector {
|
|
45
|
+
id: string;
|
|
46
|
+
tf: Map<string, number>;
|
|
47
|
+
norm: number;
|
|
48
|
+
content: string;
|
|
49
|
+
meta?: Record<string, any>;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export class VectorIndex {
|
|
53
|
+
private docs: DocVector[] = [];
|
|
54
|
+
private idf: Map<string, number> = new Map();
|
|
55
|
+
private totalDocs = 0;
|
|
56
|
+
|
|
57
|
+
/** Add a document to the index. */
|
|
58
|
+
addDocument(id: string, content: string, meta?: Record<string, any>): void {
|
|
59
|
+
const tokens = tokenize(content);
|
|
60
|
+
const tf = new Map<string, number>();
|
|
61
|
+
for (const t of tokens) { tf.set(t, (tf.get(t) || 0) + 1); }
|
|
62
|
+
|
|
63
|
+
// Normalize by doc length
|
|
64
|
+
const tfIdf = new Map<string, number>();
|
|
65
|
+
let normSq = 0;
|
|
66
|
+
for (const [term, freq] of tf) {
|
|
67
|
+
const tfVal = freq / tokens.length;
|
|
68
|
+
const idfVal = this.idf.get(term) || 0;
|
|
69
|
+
const val = tfVal * Math.max(0.1, idfVal);
|
|
70
|
+
tfIdf.set(term, val);
|
|
71
|
+
normSq += val * val;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const norm = Math.sqrt(normSq);
|
|
75
|
+
this.docs.push({ id, tf: tfIdf, norm, content: content.slice(0, 500), meta });
|
|
76
|
+
this.totalDocs++;
|
|
77
|
+
|
|
78
|
+
// Update IDF
|
|
79
|
+
for (const term of tf.keys()) {
|
|
80
|
+
this.idf.set(term, Math.log((this.totalDocs + 1) / ((this.docFrequency(term) + 1))));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
addDocuments(docs: Array<{ id: string; content: string; meta?: Record<string, any> }>): void {
|
|
85
|
+
for (const d of docs) this.addDocument(d.id, d.content, d.meta);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
private docFrequency(term: string): number {
|
|
89
|
+
let count = 0;
|
|
90
|
+
for (const d of this.docs) { if (d.tf.has(term)) count++; }
|
|
91
|
+
return count;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Search for documents similar to query. Returns [score, doc] pairs. */
|
|
95
|
+
search(query: string, topK: number = 5, minScore: number = 0.01): Array<[number, DocVector]> {
|
|
96
|
+
const queryTokens = tokenize(query);
|
|
97
|
+
const queryTf = new Map<string, number>();
|
|
98
|
+
for (const t of queryTokens) { queryTf.set(t, (queryTf.get(t) || 0) + 1); }
|
|
99
|
+
|
|
100
|
+
// Query vector
|
|
101
|
+
const qv = new Map<string, number>();
|
|
102
|
+
let qNormSq = 0;
|
|
103
|
+
for (const [term, freq] of queryTf) {
|
|
104
|
+
const tfVal = freq / queryTokens.length;
|
|
105
|
+
const idfVal = this.idf.get(term) || 0;
|
|
106
|
+
const val = tfVal * Math.max(0.1, idfVal);
|
|
107
|
+
qv.set(term, val);
|
|
108
|
+
qNormSq += val * val;
|
|
109
|
+
}
|
|
110
|
+
const qNorm = Math.sqrt(qNormSq);
|
|
111
|
+
if (qNorm === 0) return [];
|
|
112
|
+
|
|
113
|
+
// Cosine similarity against all docs
|
|
114
|
+
const scored: Array<[number, DocVector]> = [];
|
|
115
|
+
for (const doc of this.docs) {
|
|
116
|
+
if (doc.norm === 0) continue;
|
|
117
|
+
let dot = 0;
|
|
118
|
+
for (const [term, qVal] of qv) {
|
|
119
|
+
dot += qVal * (doc.tf.get(term) || 0);
|
|
120
|
+
}
|
|
121
|
+
const score = dot / (qNorm * doc.norm);
|
|
122
|
+
if (score >= minScore) scored.push([score, doc]);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
scored.sort((a, b) => b[0] - a[0]);
|
|
126
|
+
return scored.slice(0, topK);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Remove a document by ID. */
|
|
130
|
+
removeDocument(id: string): void {
|
|
131
|
+
this.docs = this.docs.filter(d => d.id !== id);
|
|
132
|
+
this.totalDocs = this.docs.length;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
get size(): number { return this.docs.length; }
|
|
136
|
+
|
|
137
|
+
clear(): void { this.docs = []; this.idf.clear(); this.totalDocs = 0; }
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/* ═══════════════════════════════════════
|
|
141
|
+
Singleton instance for memory recall
|
|
142
|
+
═══════════════════════════════════════ */
|
|
143
|
+
let globalIndex: VectorIndex | null = null;
|
|
144
|
+
|
|
145
|
+
export function getVectorIndex(): VectorIndex {
|
|
146
|
+
if (!globalIndex) globalIndex = new VectorIndex();
|
|
147
|
+
return globalIndex;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export function resetVectorIndex(): void {
|
|
151
|
+
globalIndex = new VectorIndex();
|
|
152
|
+
}
|