@aion0/forge 0.9.18 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ /**
2
+ * buildMemoryContext — assemble the memory chunk for the agent-loop
3
+ * system prompt.
4
+ *
5
+ * Wraps store.listBlocks (for pinned + recall) and store.search (for
6
+ * query-driven retrieval) and post-filters out internal bookkeeping
7
+ * blocks like the summarizer cursor/health by key prefix. The actual
8
+ * string rendering reuses renderMemoryContext(blocks, hits) — this
9
+ * helper is just the assembly + filtering layer so callers don't have
10
+ * to think about it.
11
+ *
12
+ * Why post-filter instead of extending MemoryStore.search/listBlocks
13
+ * with scope filters: the existing API is flat key/value across both
14
+ * backends (LocalMemoryStore + Temper) and we want zero changes there.
15
+ * Forge owns the key naming convention (see lib/memory/keys.ts), so we
16
+ * own the prefix-exclusion decision client-side.
17
+ */
18
+
19
+ import type { MemoryBlock, MemoryStore, SearchHit } from './memory-store';
20
+ import { renderMemoryContext } from './temper';
21
+ import { INTERNAL_KEY_PREFIXES } from '../memory/keys';
22
+
23
+ export interface BuildMemoryContextOpts {
24
+ store: MemoryStore;
25
+ /** Used as `store.search(query)` — typically the latest user message. */
26
+ currentUserMessage?: string;
27
+ /** Cap on hits returned from store.search. Default 6. */
28
+ topK?: number;
29
+ /** Cap on inlined pinned blocks. Default 50 (renderMemoryContext default). */
30
+ maxBlocks?: number;
31
+ /** Prefixes that mark internal-only blocks (cursor / health / etc).
32
+ * Defaults to lib/memory/keys.INTERNAL_KEY_PREFIXES. */
33
+ excludeKeyPrefixes?: readonly string[];
34
+ }
35
+
36
+ export interface BuildMemoryContextResult {
37
+ text: string;
38
+ blocks: MemoryBlock[];
39
+ hits: SearchHit[];
40
+ }
41
+
42
+ export async function buildMemoryContext(opts: BuildMemoryContextOpts): Promise<BuildMemoryContextResult> {
43
+ const {
44
+ store,
45
+ currentUserMessage,
46
+ topK = 6,
47
+ maxBlocks = 50,
48
+ excludeKeyPrefixes = INTERNAL_KEY_PREFIXES,
49
+ } = opts;
50
+
51
+ const blocks = filterInternal(
52
+ await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
53
+ excludeKeyPrefixes,
54
+ ).slice(0, maxBlocks);
55
+
56
+ const q = (currentUserMessage || '').trim();
57
+ let hits: SearchHit[] = [];
58
+ if (q) {
59
+ const rawHits = await safe(() => store.search(q, topK), [] as SearchHit[]);
60
+ hits = filterInternalHits(rawHits, excludeKeyPrefixes);
61
+ }
62
+
63
+ return { text: renderMemoryContext(blocks, hits), blocks, hits };
64
+ }
65
+
66
+ function filterInternal(blocks: MemoryBlock[], prefixes: readonly string[]): MemoryBlock[] {
67
+ if (prefixes.length === 0) return blocks;
68
+ return blocks.filter((b) => !prefixes.some((p) => b.key.startsWith(p)));
69
+ }
70
+
71
+ function filterInternalHits(hits: SearchHit[], prefixes: readonly string[]): SearchHit[] {
72
+ if (prefixes.length === 0) return hits;
73
+ // SearchHit.id encodes its source: LocalMemoryStore returns 'block:<key>'
74
+ // for block-derived hits. Temper returns Graphiti UUIDs — those won't
75
+ // match prefixes, so they pass through (correct: Temper hits aren't
76
+ // direct block references).
77
+ return hits.filter((h) => {
78
+ if (!h.id?.startsWith('block:')) return true;
79
+ const key = h.id.slice('block:'.length);
80
+ return !prefixes.some((p) => key.startsWith(p));
81
+ });
82
+ }
83
+
84
+ async function safe<T>(fn: () => Promise<T>, fallback: T): Promise<T> {
85
+ try {
86
+ return await fn();
87
+ } catch (err) {
88
+ console.warn('[buildMemoryContext]', err instanceof Error ? err.message : err);
89
+ return fallback;
90
+ }
91
+ }
@@ -77,11 +77,14 @@ export const openaiAdapter: LlmAdapter = {
77
77
  };
78
78
  }
79
79
 
80
+ // Some providers (litellm/vLLM) reject `tools: []` — they want the
81
+ // field omitted entirely when there are no tools.
82
+ const hasTools = Object.keys(tools).length > 0;
80
83
  const result = streamText({
81
84
  model: client(req.model),
82
85
  system: req.system,
83
86
  messages: historyToModelMessages(req.history),
84
- tools,
87
+ ...(hasTools ? { tools } : {}),
85
88
  maxOutputTokens: req.maxTokens,
86
89
  });
87
90
 
@@ -133,26 +133,43 @@ export class LocalMemoryStore implements MemoryStore {
133
133
  const q = (query || '').trim();
134
134
  if (!q) return [];
135
135
  const cap = Math.min(50, Math.max(1, limit));
136
- const like = `%${q.replace(/[%_]/g, (m) => '\\' + m)}%`;
136
+ // Tokenize on whitespace and OR-match. Natural-language queries
137
+ // like "tell me about the X" can't be AND-matched (stop words
138
+ // wouldn't appear in stored content), so OR keeps recall useful.
139
+ // Drop tokens shorter than 3 chars to avoid runaway noise. If
140
+ // every token is too short, fall back to a single-substring match
141
+ // on the raw query.
142
+ const allTokens = q.split(/\s+/).filter((t) => t.length > 0);
143
+ const tokens = allTokens.filter((t) => t.length >= 3);
144
+ const useTokens = tokens.length > 0 ? tokens : [q];
145
+ const likes = useTokens.map((t) => `%${t.replace(/[%_]/g, (m) => '\\' + m)}%`);
137
146
  const conn = db();
138
147
 
148
+ const blockWhere = useTokens
149
+ .map(() => `(value LIKE ? ESCAPE '\\' OR key LIKE ? ESCAPE '\\' OR description LIKE ? ESCAPE '\\')`)
150
+ .join(' OR ');
151
+ const blockParams: unknown[] = [this.ns];
152
+ for (const like of likes) { blockParams.push(like, like, like); }
153
+ blockParams.push(cap);
139
154
  const blockHits = conn.prepare(
140
155
  `SELECT key, value, description, updated_at
141
156
  FROM memory_blocks
142
157
  WHERE ns = ?
143
- AND (value LIKE ? ESCAPE '\\' OR key LIKE ? ESCAPE '\\' OR description LIKE ? ESCAPE '\\')
158
+ AND (${blockWhere})
144
159
  ORDER BY pinned DESC, updated_at DESC
145
160
  LIMIT ?`,
146
- ).all(this.ns, like, like, like, cap) as Array<Pick<BlockRow, 'key' | 'value' | 'description' | 'updated_at'>>;
161
+ ).all(...blockParams) as Array<Pick<BlockRow, 'key' | 'value' | 'description' | 'updated_at'>>;
147
162
 
163
+ const episodeWhere = useTokens.map(() => `content LIKE ? ESCAPE '\\'`).join(' OR ');
164
+ const episodeParams: unknown[] = [this.ns, ...likes, cap];
148
165
  const episodeHits = conn.prepare(
149
166
  `SELECT id, content, reference_time, created_at
150
167
  FROM memory_episodes
151
168
  WHERE ns = ?
152
- AND content LIKE ? ESCAPE '\\'
169
+ AND (${episodeWhere})
153
170
  ORDER BY created_at DESC
154
171
  LIMIT ?`,
155
- ).all(this.ns, like, cap) as Array<Pick<EpisodeRow, 'id' | 'content' | 'reference_time' | 'created_at'>>;
172
+ ).all(...episodeParams) as Array<Pick<EpisodeRow, 'id' | 'content' | 'reference_time' | 'created_at'>>;
156
173
 
157
174
  const hits: SearchHit[] = [];
158
175
  for (const b of blockHits) {
@@ -265,6 +265,55 @@ export function listMessages(session_id: string, opts?: { limit?: number; after_
265
265
  return rows.map(rowToMessage);
266
266
  }
267
267
 
268
+ /** Last N messages in chronological order — used by agent-loop to cap LLM context. */
269
+ export function listRecentMessages(session_id: string, limit: number): Message[] {
270
+ ensureSchema();
271
+ const rows = db().prepare(`
272
+ SELECT * FROM chat_messages WHERE session_id = ?
273
+ ORDER BY ts DESC LIMIT ?
274
+ `).all(session_id, limit) as MessageRow[];
275
+ return rows.map(rowToMessage).reverse();
276
+ }
277
+
278
+ /**
279
+ * Take the most recent messages, stopping when either the message-count
280
+ * budget OR the token-estimate budget would be exceeded. Walks
281
+ * newest-first so the most recent dialogue is always kept; returns
282
+ * chronological order for the LLM history slot.
283
+ *
284
+ * msgBudget is enforced via SQL LIMIT (cheap). tokenBudget is enforced
285
+ * via the caller-supplied estimator (decoupled to avoid pulling the
286
+ * token-estimator into the storage layer).
287
+ */
288
+ export function listMessagesCapped(
289
+ session_id: string,
290
+ msgBudget: number,
291
+ tokenBudget: number,
292
+ estimateTokens: (m: Message) => number,
293
+ ): Message[] {
294
+ ensureSchema();
295
+ const cap = Math.max(1, Math.floor(msgBudget));
296
+ // Pull newest-first via SQL — bounded by msgBudget so we never load
297
+ // more rows than we could possibly keep.
298
+ const rows = db().prepare(`
299
+ SELECT * FROM chat_messages WHERE session_id = ?
300
+ ORDER BY ts DESC LIMIT ?
301
+ `).all(session_id, cap) as MessageRow[];
302
+ const newestFirst = rows.map(rowToMessage);
303
+
304
+ // Now apply tokenBudget walking newest → oldest. Always keep at
305
+ // least one (so an oversized last message doesn't strand the loop).
306
+ const kept: Message[] = [];
307
+ let used = 0;
308
+ for (const m of newestFirst) {
309
+ const cost = estimateTokens(m);
310
+ if (kept.length > 0 && used + cost > tokenBudget) break;
311
+ kept.push(m);
312
+ used += cost;
313
+ }
314
+ return kept.reverse();
315
+ }
316
+
268
317
  export function deleteMessage(id: string): boolean {
269
318
  ensureSchema();
270
319
  const r = db().prepare(`DELETE FROM chat_messages WHERE id = ?`).run(id);
@@ -144,6 +144,12 @@ async function handleSessionDelete(_req: IncomingMessage, res: ServerResponse, i
144
144
  }
145
145
 
146
146
  async function handleSessionClearMessages(_req: IncomingMessage, res: ServerResponse, id: string): Promise<void> {
147
+ // Intent: "Clear chat" only drops chat_messages rows. memory_store
148
+ // blocks (cursor / health / summary / facts) stay — once a fact has
149
+ // been extracted into long-term memory it should survive clearing
150
+ // the conversation it came from. Users can delete memory explicitly
151
+ // from the memory tab if they really want to forget. See
152
+ // forge-chat-memory-summarizer-design.md §11 decision 3.
147
153
  const session = getSession(id);
148
154
  if (!session) return sendJson(res, 404, { error: 'session not found' });
149
155
  const removed = clearSessionMessages(id);
package/lib/init.ts CHANGED
@@ -94,6 +94,15 @@ export function ensureInitialized() {
94
94
  catch (e) { console.warn('[init] ensureScratchProject failed:', (e as Error).message); }
95
95
  });
96
96
  time('migrateSecrets', migrateSecrets);
97
+ time('migrateAgentsFlatten', () => {
98
+ try {
99
+ const { migrateAgentsFlatten } = require('./agents/migrate');
100
+ const settings = loadSettings();
101
+ if (migrateAgentsFlatten(settings)) {
102
+ saveSettings(settings);
103
+ }
104
+ } catch (e) { console.warn('[init] migrateAgentsFlatten failed:', (e as Error).message); }
105
+ });
97
106
  time('migratePluginSecrets', () => {
98
107
  try {
99
108
  const { migratePluginSecrets } = require('./plugins/registry');
@@ -249,6 +258,7 @@ export function ensureInitialized() {
249
258
  startWorkspaceProcess(); // spawns workspace-standalone
250
259
  startBrowserBridgeProcess(); // spawns browser-bridge-standalone
251
260
  startChatProcess(); // spawns chat-standalone
261
+ startMemoryProcess(); // spawns memory-standalone
252
262
 
253
263
  const settings = loadSettings();
254
264
  if (settings.tunnelAutoStart) {
@@ -402,3 +412,18 @@ function startBrowserBridgeProcess() {
402
412
  });
403
413
  tester.listen(bridgePort);
404
414
  }
415
+
416
+ let memoryChild: ReturnType<typeof spawn> | null = null;
417
+
418
+ function startMemoryProcess() {
419
+ if (memoryChild) return;
420
+ // No HTTP port — pure background poller. Just spawn-if-not-running.
421
+ const script = join(process.cwd(), 'lib', 'memory-standalone.ts');
422
+ memoryChild = spawn('npx', ['tsx', script], {
423
+ stdio: ['ignore', 'inherit', 'inherit'],
424
+ env: { ...process.env },
425
+ detached: false,
426
+ });
427
+ memoryChild.on('exit', () => { memoryChild = null; });
428
+ console.log('[memory] Started standalone (pid:', memoryChild.pid, ')');
429
+ }
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Compact chat messages into a summarizer-friendly transcript.
3
+ *
4
+ * Raw tool_use / tool_result blocks can each carry kilobytes of JSON
5
+ * (stack traces, encoded args, HTML responses). Feeding those into the
6
+ * summarizer LLM wastes input tokens and crowds out actual dialogue.
7
+ *
8
+ * This module flattens each Message into one or more text lines:
9
+ * - text blocks pass through (truncated to MAX_TEXT_CHARS)
10
+ * - tool_use → `tool[name](key1, key2, …)`
11
+ * - tool_result → `→ ok: <first line>` or `→ err: <first line>`
12
+ *
13
+ * Output is a plain string ready to drop into the summarizer prompt.
14
+ */
15
+
16
+ import type { ContentBlock, Message, ToolResultBlock, ToolUseBlock } from '../chat/types';
17
+
18
+ const MAX_TEXT_CHARS = 1200;
19
+ const MAX_TOOL_RESULT_CHARS = 200;
20
+ const MAX_INPUT_KEYS = 8;
21
+
22
+ export function compressMessagesForSummarizer(messages: Message[]): string {
23
+ const lines: string[] = [];
24
+ for (const m of messages) {
25
+ for (const block of m.blocks) {
26
+ const rendered = renderBlock(m.role, block);
27
+ if (rendered) lines.push(rendered);
28
+ }
29
+ }
30
+ return lines.join('\n');
31
+ }
32
+
33
+ function renderBlock(role: 'user' | 'assistant', block: ContentBlock): string | null {
34
+ if (block.type === 'text') {
35
+ const text = truncate(block.text.trim(), MAX_TEXT_CHARS);
36
+ if (!text) return null;
37
+ return `${role}: ${text}`;
38
+ }
39
+ if (block.type === 'tool_use') {
40
+ return `${role}: ${renderToolUse(block)}`;
41
+ }
42
+ if (block.type === 'tool_result') {
43
+ return `${role}: ${renderToolResult(block)}`;
44
+ }
45
+ return null;
46
+ }
47
+
48
+ function renderToolUse(block: ToolUseBlock): string {
49
+ const keys = block.input && typeof block.input === 'object'
50
+ ? Object.keys(block.input as Record<string, unknown>).slice(0, MAX_INPUT_KEYS)
51
+ : [];
52
+ const argsStr = keys.length > 0 ? `(${keys.join(', ')})` : '()';
53
+ return `tool[${block.name}]${argsStr}`;
54
+ }
55
+
56
+ function renderToolResult(block: ToolResultBlock): string {
57
+ const firstLine = (block.content ?? '').split(/\r?\n/, 1)[0] ?? '';
58
+ const head = truncate(firstLine, MAX_TOOL_RESULT_CHARS);
59
+ return block.is_error ? `→ err: ${head}` : `→ ok: ${head}`;
60
+ }
61
+
62
+ function truncate(s: string, max: number): string {
63
+ if (s.length <= max) return s;
64
+ return s.slice(0, max) + '…';
65
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Key-naming conventions for the chat memory summarizer.
3
+ *
4
+ * MemoryStore is a flat key/value API; this module encodes the
5
+ * design's "scope + subject" classification into deterministic key
6
+ * strings so that:
7
+ * - cursor / health blocks land on a single stable key (upsert covers)
8
+ * - repeated ingest of the same fact maps to the same key (upsert
9
+ * replaces, not appends — this is what "memory reinforcement" looks
10
+ * like at the storage layer)
11
+ * - buildMemoryContext can post-filter by prefix to keep internal
12
+ * bookkeeping blocks (cursor / health) out of the LLM prompt
13
+ *
14
+ * See forge-chat-memory-summarizer-design.md §4.2 for the full table.
15
+ */
16
+
17
+ import { createHash } from 'node:crypto';
18
+
19
+ /** Stable 12-char content hash. Used in fact keys so re-ingesting the
20
+ * same fact maps to the same memory block (upsert = reinforcement). */
21
+ export function stableHash(input: string): string {
22
+ return createHash('sha256').update(input).digest('hex').slice(0, 12);
23
+ }
24
+
25
+ /** Session summary at a given cursor end-ts. */
26
+ export function summaryKey(sessionId: string, toTs: number): string {
27
+ return `chat:${sessionId}:summary:${toTs}`;
28
+ }
29
+
30
+ /** Long-term fact. scope/subject classify it; hash keys re-ingest. */
31
+ export function factKey(scope: string, subject: string, contentHash: string): string {
32
+ return `fact:${scope}:${subject}:${contentHash}`;
33
+ }
34
+
35
+ /** Per-session ingest progress cursor. One row per session. */
36
+ export function cursorKey(sessionId: string): string {
37
+ return `forge.summarizer.cursor:${sessionId}`;
38
+ }
39
+
40
+ /** Per-session summarizer health (last_run, errors, counts). */
41
+ export function healthKey(sessionId: string): string {
42
+ return `forge.summarizer.health:${sessionId}`;
43
+ }
44
+
45
+ /** Prefixes buildMemoryContext should exclude when rendering context —
46
+ * bookkeeping blocks the LLM shouldn't see. */
47
+ export const INTERNAL_KEY_PREFIXES: readonly string[] = [
48
+ 'forge.summarizer.cursor:',
49
+ 'forge.summarizer.health:',
50
+ ];
51
+
52
+ export interface CursorValue {
53
+ last_ingested_ts: number;
54
+ last_run_ts: number;
55
+ ingest_count: number;
56
+ }
57
+
58
+ export interface SummaryValue {
59
+ text: string;
60
+ from_ts: number;
61
+ to_ts: number;
62
+ message_count: number;
63
+ model: string;
64
+ provider: string;
65
+ ingest_ts: number;
66
+ }
67
+
68
+ export interface FactValue {
69
+ content: string;
70
+ subject_kind: string;
71
+ subject: string;
72
+ source_ref: string;
73
+ confidence: number | null;
74
+ extracted_by: 'summarizer' | string;
75
+ }
76
+
77
+ export interface HealthValue {
78
+ last_run_ts: number;
79
+ error: string | null;
80
+ ingest_count: number;
81
+ last_token_estimate: number;
82
+ }