skyloom 1.13.2 → 1.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core/agent.ts CHANGED
@@ -29,6 +29,7 @@ import {
29
29
  SIG_LOOP_HARDSTOP,
30
30
  } from './agent_helpers';
31
31
  import { selectRelevantTools } from './tool_router';
32
+ import { getModelInfo } from './catalog';
32
33
 
33
34
  const log = getLogger('agent');
34
35
 
@@ -1069,20 +1070,34 @@ export class BaseAgent {
1069
1070
  return `compressed ${toSummarize.length} messages (${summary.length} char digest)`;
1070
1071
  }
1071
1072
 
1073
+ /** Resolve the model id this agent runs on (mirrors LLMClient.getModel). */
1074
+ protected resolveModelId(): string {
1075
+ const c: any = this.config;
1076
+ return c.agents?.[this.name]?.model || c.default_model || c.llm?.default_model || c.llm?.defaultModel || 'gpt-4o';
1077
+ }
1078
+
1079
+ /** The active model's real context window (tokens), from the catalog. */
1080
+ protected contextWindow(): number {
1081
+ const info = getModelInfo(this.resolveModelId());
1082
+ return info?.context && info.context > 0 ? info.context : 128000;
1083
+ }
1084
+
1072
1085
  contextUsage(): Record<string, any> {
1073
1086
  const usage = this.memory.getContextWindowUsage();
1087
+ const max = this.contextWindow();
1074
1088
  return {
1075
1089
  estimatedTokens: usage.estimatedTokens,
1076
- maxTokens: 128000,
1077
- pct: Math.min(100, Math.round((usage.estimatedTokens / 128000) * 100)),
1090
+ maxTokens: max,
1091
+ pct: Math.min(100, Math.round((usage.estimatedTokens / max) * 100)),
1078
1092
  messageCount: usage.messageCount,
1079
- model: (this.config as any).llm?.defaultModel || 'unknown',
1093
+ model: this.resolveModelId(),
1080
1094
  };
1081
1095
  }
1082
1096
 
1083
1097
  protected shouldAutoCompact(): boolean {
1084
1098
  const usage = this.memory.getContextWindowUsage();
1085
- return (usage.estimatedTokens / 128000) > 0.92;
1099
+ // Compact before hitting the real window — leave ~20% headroom for the reply.
1100
+ return usage.estimatedTokens > this.contextWindow() * 0.8;
1086
1101
  }
1087
1102
 
1088
1103
  protected activeToolNames(): string[] {
@@ -78,6 +78,7 @@ export class Memory {
78
78
  private loaded = false;
79
79
  private pendingPersists: Set<Promise<void>> = new Set();
80
80
  private activeSession: string | null = null;
81
+ private saveTimer: ReturnType<typeof setTimeout> | null = null;
81
82
 
82
83
  // short_term is mutated from both main chat loop and handlers
83
84
  // All mutations go through a short critical section
@@ -232,6 +233,18 @@ export class Memory {
232
233
  }
233
234
  }
234
235
 
236
+ /**
237
+ * Debounced save to disk. sql.js is in-memory, so without this the database
238
+ * file is never written and sessions / long-term memory would not survive a
239
+ * restart. Coalesces bursts of writes; the timer is unref'd so it never keeps
240
+ * the process alive (close() does the final synchronous save).
241
+ */
242
+ private scheduleSave(): void {
243
+ if (!this.db || this.saveTimer) return;
244
+ this.saveTimer = setTimeout(() => { this.saveTimer = null; this.persistDb(); }, 300);
245
+ if (typeof (this.saveTimer as any).unref === 'function') (this.saveTimer as any).unref();
246
+ }
247
+
235
248
  /**
236
249
  * Execute a SELECT query and return array of row objects.
237
250
  */
@@ -279,6 +292,7 @@ export class Memory {
279
292
  // Sql.js rejects `undefined` in bind arrays; normalize to `null`
280
293
  const safe = params ? params.map((v) => v === undefined ? null : v) : undefined;
281
294
  this.db.run(sql, safe);
295
+ this.scheduleSave(); // every write goes through here — persist (debounced)
282
296
  } catch (err) {
283
297
  logger.warn('db_run_failed', { sql: sql.slice(0, 80), error: String(err) });
284
298
  }
@@ -477,7 +491,9 @@ export class Memory {
477
491
  */
478
492
  async close(): Promise<void> {
479
493
  if (this.db) {
494
+ if (this.saveTimer) { clearTimeout(this.saveTimer); this.saveTimer = null; }
480
495
  await this.flushPending();
496
+ this.persistDb(); // final synchronous save to disk
481
497
  this.db.close();
482
498
  }
483
499
  }
@@ -0,0 +1,134 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { FogAgent } from "../src/agents/fog";
3
+ import { MessageBus } from "../src/core/bus";
4
+ import { ToolRegistry } from "../src/core/tool";
5
+ import { SkillRegistry } from "../src/core/skill";
6
+
7
+ /**
8
+ * Characterization tests for the agent chat/tool loop, driven by a scripted
9
+ * mock LLM (no network). These lock in the behavior of the ~275-line hot path
10
+ * (chatStreamImpl / llmLoop / tool execution / anti-loop guard) so it can be
11
+ * refactored safely (Phase 3) — and they guard against regressions like the
12
+ * first-message crash.
13
+ */
14
+
15
+ interface Turn { content?: string; toolCalls?: { name: string; args?: any }[]; reasoning?: string }
16
+
17
+ class MockLLM {
18
+ calls = 0;
19
+ constructor(private turns: Turn[]) {}
20
+ private turn(): Turn { const t = this.turns[Math.min(this.calls, this.turns.length - 1)]; this.calls++; return t || {}; }
21
+ private toolCallObjs(t: Turn) {
22
+ return (t.toolCalls || []).map((tc, i) => ({
23
+ id: `call_${this.calls}_${i}`, type: "function",
24
+ function: { name: tc.name, arguments: JSON.stringify(tc.args || {}) },
25
+ }));
26
+ }
27
+ async *streamWithTools(): AsyncGenerator<any> {
28
+ const t = this.turn();
29
+ if (t.reasoning) yield { type: "reasoning", text: t.reasoning };
30
+ if (t.content) yield { type: "content", text: t.content };
31
+ for (const tc of this.toolCallObjs(t)) yield { type: "tool_call", toolCall: tc };
32
+ yield { type: "done", usage: { promptTokens: 1, completionTokens: 1 } };
33
+ }
34
+ async complete(): Promise<any> {
35
+ const t = this.turn();
36
+ return { content: t.content || "", toolCalls: this.toolCallObjs(t), model: "mock", usage: { promptTokens: 1, completionTokens: 1 }, cost: 0, truncated: false };
37
+ }
38
+ getTotalCost() { return 0; }
39
+ getModel() { return "mock"; }
40
+ setLogger() { /* noop */ }
41
+ }
42
+
43
+ function makeAgent(turns: Turn[], tools: { name: string; handler: (a: any) => Promise<string> }[] = []) {
44
+ const reg = new ToolRegistry();
45
+ for (const t of tools) reg.register({ name: t.name, description: t.name, handler: t.handler });
46
+ const config = { agents: { fog: {} }, llm: { language: "zh" }, memory: { shortTermLimit: 100, dbPath: "/tmp/sky-test" } };
47
+ const agent = new FogAgent(config as any, new MockLLM(turns) as any, new MessageBus(), reg, new SkillRegistry());
48
+ return agent;
49
+ }
50
+
51
+ async function collect(gen: AsyncGenerator<any>, cap = 500): Promise<any[]> {
52
+ const evs: any[] = [];
53
+ for await (const ev of gen) { evs.push(ev); if (evs.length > cap) break; }
54
+ return evs;
55
+ }
56
+
57
+ describe("agent · chat loop (mock LLM)", () => {
58
+ it("streams a simple reply and records both messages", async () => {
59
+ const agent = makeAgent([{ content: "你好,我是雾。" }]);
60
+ const evs = await collect(agent.chatStream("你好"));
61
+ const text = evs.filter((e) => e.type === "content").map((e) => e.text).join("");
62
+ expect(text).toContain("你好,我是雾。");
63
+
64
+ const msgs = agent.memory.getMessages();
65
+ expect(msgs[0]).toMatchObject({ role: "user", content: "你好" }); // regression: user msg present
66
+ expect(msgs.some((m) => m.role === "assistant" && String(m.content).includes("雾"))).toBe(true);
67
+ });
68
+
69
+ it("blocking chat() returns the reply", async () => {
70
+ const agent = makeAgent([{ content: "答案是 42" }]);
71
+ const reply = await agent.chat("问题?");
72
+ expect(reply).toContain("42");
73
+ });
74
+
75
+ it("streams reasoning before content", async () => {
76
+ const agent = makeAgent([{ reasoning: "先想一下…", content: "结论。" }]);
77
+ const evs = await collect(agent.chatStream("?"));
78
+ expect(evs.some((e) => e.type === "reasoning")).toBe(true);
79
+ expect(evs.filter((e) => e.type === "content").map((e) => e.text).join("")).toContain("结论。");
80
+ });
81
+
82
+ it("executes a tool call then produces the final answer", async () => {
83
+ let received: any = null;
84
+ const agent = makeAgent(
85
+ [{ toolCalls: [{ name: "echo", args: { text: "hi" } }] }, { content: "工具回显: hi" }],
86
+ [{ name: "echo", handler: async (a) => { received = a; return `echo:${a.text}`; } }],
87
+ );
88
+ const evs = await collect(agent.chatStream("用 echo 工具"));
89
+ expect(received).toEqual({ text: "hi" }); // tool actually ran with parsed args
90
+ expect(evs.some((e) => e.type === "tool_status" && e.tool_name === "echo")).toBe(true);
91
+ expect(evs.some((e) => e.type === "tool_done" && e.tool_name === "echo" && e.success)).toBe(true);
92
+ expect(evs.filter((e) => e.type === "content").map((e) => e.text).join("")).toContain("工具回显");
93
+ // tool result recorded to memory
94
+ expect(agent.memory.getMessages().some((m) => m.role === "tool" && String(m.content).includes("echo:hi"))).toBe(true);
95
+ });
96
+
97
+ it("terminates (does not loop forever) when the model repeats the same tool call", async () => {
98
+ // Script the same tool call far beyond the round cap; the anti-loop guard must stop it.
99
+ const turns: Turn[] = Array.from({ length: 60 }, () => ({ toolCalls: [{ name: "spin", args: { n: 1 } }] }));
100
+ const llm = new MockLLM(turns);
101
+ const reg = new ToolRegistry();
102
+ reg.register({ name: "spin", description: "spin", handler: async () => "still spinning" });
103
+ const config = { agents: { fog: {} }, llm: {}, memory: { shortTermLimit: 200, dbPath: "/tmp/sky-test" } };
104
+ const agent = new FogAgent(config as any, llm as any, new MessageBus(), reg, new SkillRegistry());
105
+
106
+ const evs = await collect(agent.chatStream("loop please"), 2000);
107
+ // It must finish (the generator returns), not hang, and not call the model unboundedly.
108
+ expect(evs.some((e) => e.type === "done")).toBe(true);
109
+ expect(llm.calls).toBeLessThan(50); // bounded by the round cap / guard, not 60+
110
+ }, 15000);
111
+ });
112
+
113
+ describe("agent · context window (catalog-aware compaction)", () => {
114
+ it("contextUsage uses the active model's real window from the catalog", () => {
115
+ const agent = makeAgent([{ content: "x" }]);
116
+ (agent as any).config.agents.fog.model = "mixtral-8x7b"; // 32768
117
+ expect(agent.contextUsage().maxTokens).toBe(32768);
118
+ expect(agent.contextUsage().model).toBe("mixtral-8x7b");
119
+ (agent as any).config.agents.fog.model = "gemini-2.5-pro"; // 1048576
120
+ expect(agent.contextUsage().maxTokens).toBe(1048576);
121
+ });
122
+
123
+ it("auto-compaction triggers for a small window but not a large one (same history)", () => {
124
+ const agent = makeAgent([{ content: "x" }]);
125
+ const big = "字".repeat(800); // CJK ~2 tokens/char
126
+ for (let i = 0; i < 20; i++) agent.memory.addMessage("user", big); // ~32k tokens
127
+
128
+ (agent as any).config.agents.fog.model = "mixtral-8x7b"; // 32768 window -> over budget
129
+ expect((agent as any).shouldAutoCompact()).toBe(true);
130
+
131
+ (agent as any).config.agents.fog.model = "gemini-2.5-pro"; // 1M window -> fine
132
+ expect((agent as any).shouldAutoCompact()).toBe(false);
133
+ });
134
+ });
@@ -132,6 +132,29 @@ describe("Memory · long-term (SQLite)", () => {
132
132
  }
133
133
  });
134
134
 
135
+ it("persists sessions + messages to disk and reloads across instances (regression)", async () => {
136
+ // Previously persistDb() was never called, so nothing survived a restart and
137
+ // session resume was impossible. close() must save; a fresh instance must reload.
138
+ const cfg = tmpConfig(); // shared dbPath for both instances
139
+ const a = new Memory(cfg, "fog");
140
+ await a.initDb();
141
+ const sid = await a.createSession("s1");
142
+ a.addMessage("user", "the sky is blue");
143
+ a.addMessage("assistant", "noted: sky is blue");
144
+ await a.remember("fact1", "value1", "auto");
145
+ await a.close(); // must flush to disk
146
+
147
+ const b = new Memory(cfg, "fog");
148
+ await b.initDb();
149
+ const sessions = await b.listSessions();
150
+ expect(sessions.some((s) => s.id === sid)).toBe(true);
151
+ expect(await b.loadSession(sid)).toBe(true);
152
+ const msgs = b.getMessages().filter((m) => m.role !== "system");
153
+ expect(msgs.some((m) => String(m.content).includes("sky is blue"))).toBe(true);
154
+ expect((await b.recall("fact1"))[0]?.value).toBe("value1"); // long-term memory survived too
155
+ await b.close();
156
+ });
157
+
135
158
  it("getMemoryStats returns a populated object", async () => {
136
159
  const mem = new Memory(tmpConfig(), "fog");
137
160
  await mem.initDb();