npm - @aion0/forge - Versions diffs - 0.9.18 → 0.10.2 - Mend

@aion0/forge 0.9.18 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/RELEASE_NOTES.md +4 -18
package/app/api/agents/[id]/test/route.ts +4 -2
package/app/api/agents/route.ts +26 -6
package/app/api/memory/blocks/route.ts +56 -0
package/app/api/monitor/route.ts +2 -0
package/app/api/schedules/extract/route.ts +8 -6
package/app/chat/page.tsx +189 -2
package/bin/forge-server.mjs +3 -2
package/components/MonitorPanel.tsx +2 -0
package/components/SettingsModal.tsx +87 -68
package/lib/agents/claude-adapter.ts +6 -1
package/lib/agents/generic-adapter.ts +2 -1
package/lib/agents/index.ts +23 -19
package/lib/agents/migrate.ts +159 -0
package/lib/chat/agent-loop.ts +53 -24
package/lib/chat/build-memory-context.ts +91 -0
package/lib/chat/llm/openai.ts +4 -1
package/lib/chat/local-memory.ts +22 -5
package/lib/chat/session-store.ts +49 -0
package/lib/chat-standalone.ts +6 -0
package/lib/init.ts +25 -0
package/lib/memory/compress-messages.ts +65 -0
package/lib/memory/keys.ts +82 -0
package/lib/memory/temper-summary.ts +485 -0
package/lib/memory/token-estimate.ts +28 -0
package/lib/memory-standalone.ts +108 -0
package/lib/settings.ts +84 -22
package/lib/workspace/skill-installer.ts +26 -6
package/package.json +1 -1
package/scripts/test-agents-migrate.ts +149 -0
package/scripts/test-memory-local.ts +139 -0
package/scripts/test-memory-upsert.ts +106 -0

package/lib/chat/build-memory-context.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * buildMemoryContext — assemble the memory chunk for the agent-loop
+ * system prompt.
+ *
+ * Wraps store.listBlocks (for pinned + recall) and store.search (for
+ * query-driven retrieval) and post-filters out internal bookkeeping
+ * blocks like the summarizer cursor/health by key prefix. The actual
+ * string rendering reuses renderMemoryContext(blocks, hits) — this
+ * helper is just the assembly + filtering layer so callers don't have
+ * to think about it.
+ *
+ * Why post-filter instead of extending MemoryStore.search/listBlocks
+ * with scope filters: the existing API is flat key/value across both
+ * backends (LocalMemoryStore + Temper) and we want zero changes there.
+ * Forge owns the key naming convention (see lib/memory/keys.ts), so we
+ * own the prefix-exclusion decision client-side.
+ */
+import type { MemoryBlock, MemoryStore, SearchHit } from './memory-store';
+import { renderMemoryContext } from './temper';
+import { INTERNAL_KEY_PREFIXES } from '../memory/keys';
+export interface BuildMemoryContextOpts {
+  store: MemoryStore;
+  /** Used as `store.search(query)` — typically the latest user message. */
+  currentUserMessage?: string;
+  /** Cap on hits returned from store.search. Default 6. */
+  topK?: number;
+  /** Cap on inlined pinned blocks. Default 50 (renderMemoryContext default). */
+  maxBlocks?: number;
+  /** Prefixes that mark internal-only blocks (cursor / health / etc).
+   *  Defaults to lib/memory/keys.INTERNAL_KEY_PREFIXES. */
+  excludeKeyPrefixes?: readonly string[];
+}
+export interface BuildMemoryContextResult {
+  text: string;
+  blocks: MemoryBlock[];
+  hits: SearchHit[];
+}
+export async function buildMemoryContext(opts: BuildMemoryContextOpts): Promise<BuildMemoryContextResult> {
+  const {
+    store,
+    currentUserMessage,
+    topK = 6,
+    maxBlocks = 50,
+    excludeKeyPrefixes = INTERNAL_KEY_PREFIXES,
+  } = opts;
+  const blocks = filterInternal(
+    await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
+    excludeKeyPrefixes,
+  ).slice(0, maxBlocks);
+  const q = (currentUserMessage || '').trim();
+  let hits: SearchHit[] = [];
+  if (q) {
+    const rawHits = await safe(() => store.search(q, topK), [] as SearchHit[]);
+    hits = filterInternalHits(rawHits, excludeKeyPrefixes);
+  }
+  return { text: renderMemoryContext(blocks, hits), blocks, hits };
+}
+function filterInternal(blocks: MemoryBlock[], prefixes: readonly string[]): MemoryBlock[] {
+  if (prefixes.length === 0) return blocks;
+  return blocks.filter((b) => !prefixes.some((p) => b.key.startsWith(p)));
+}
+function filterInternalHits(hits: SearchHit[], prefixes: readonly string[]): SearchHit[] {
+  if (prefixes.length === 0) return hits;
+  // SearchHit.id encodes its source: LocalMemoryStore returns 'block:<key>'
+  // for block-derived hits. Temper returns Graphiti UUIDs — those won't
+  // match prefixes, so they pass through (correct: Temper hits aren't
+  // direct block references).
+  return hits.filter((h) => {
+    if (!h.id?.startsWith('block:')) return true;
+    const key = h.id.slice('block:'.length);
+    return !prefixes.some((p) => key.startsWith(p));
+  });
+}
+async function safe<T>(fn: () => Promise<T>, fallback: T): Promise<T> {
+  try {
+    return await fn();
+  } catch (err) {
+    console.warn('[buildMemoryContext]', err instanceof Error ? err.message : err);
+    return fallback;
+  }
+}

package/lib/chat/llm/openai.ts CHANGED Viewed

@@ -77,11 +77,14 @@ export const openaiAdapter: LlmAdapter = {
       };
     }
+    // Some providers (litellm/vLLM) reject `tools: []` — they want the
+    // field omitted entirely when there are no tools.
+    const hasTools = Object.keys(tools).length > 0;
     const result = streamText({
       model: client(req.model),
       system: req.system,
       messages: historyToModelMessages(req.history),
-      tools,
+      ...(hasTools ? { tools } : {}),
       maxOutputTokens: req.maxTokens,
     });

package/lib/chat/local-memory.ts CHANGED Viewed

@@ -133,26 +133,43 @@ export class LocalMemoryStore implements MemoryStore {
     const q = (query || '').trim();
     if (!q) return [];
     const cap = Math.min(50, Math.max(1, limit));
-    const like = `%${q.replace(/[%_]/g, (m) => '\\' + m)}%`;
+    // Tokenize on whitespace and OR-match. Natural-language queries
+    // like "tell me about the X" can't be AND-matched (stop words
+    // wouldn't appear in stored content), so OR keeps recall useful.
+    // Drop tokens shorter than 3 chars to avoid runaway noise. If
+    // every token is too short, fall back to a single-substring match
+    // on the raw query.
+    const allTokens = q.split(/\s+/).filter((t) => t.length > 0);
+    const tokens = allTokens.filter((t) => t.length >= 3);
+    const useTokens = tokens.length > 0 ? tokens : [q];
+    const likes = useTokens.map((t) => `%${t.replace(/[%_]/g, (m) => '\\' + m)}%`);
     const conn = db();
+    const blockWhere = useTokens
+      .map(() => `(value LIKE ? ESCAPE '\\' OR key LIKE ? ESCAPE '\\' OR description LIKE ? ESCAPE '\\')`)
+      .join(' OR ');
+    const blockParams: unknown[] = [this.ns];
+    for (const like of likes) { blockParams.push(like, like, like); }
+    blockParams.push(cap);
     const blockHits = conn.prepare(
       `SELECT key, value, description, updated_at
          FROM memory_blocks
         WHERE ns = ?
-          AND (value LIKE ? ESCAPE '\\' OR key LIKE ? ESCAPE '\\' OR description LIKE ? ESCAPE '\\')
+          AND (${blockWhere})
         ORDER BY pinned DESC, updated_at DESC
         LIMIT ?`,
-    ).all(this.ns, like, like, like, cap) as Array<Pick<BlockRow, 'key' | 'value' | 'description' | 'updated_at'>>;
+    ).all(...blockParams) as Array<Pick<BlockRow, 'key' | 'value' | 'description' | 'updated_at'>>;
+    const episodeWhere = useTokens.map(() => `content LIKE ? ESCAPE '\\'`).join(' OR ');
+    const episodeParams: unknown[] = [this.ns, ...likes, cap];
     const episodeHits = conn.prepare(
       `SELECT id, content, reference_time, created_at
          FROM memory_episodes
         WHERE ns = ?
-          AND content LIKE ? ESCAPE '\\'
+          AND (${episodeWhere})
         ORDER BY created_at DESC
         LIMIT ?`,
-    ).all(this.ns, like, cap) as Array<Pick<EpisodeRow, 'id' | 'content' | 'reference_time' | 'created_at'>>;
+    ).all(...episodeParams) as Array<Pick<EpisodeRow, 'id' | 'content' | 'reference_time' | 'created_at'>>;
     const hits: SearchHit[] = [];
     for (const b of blockHits) {

package/lib/chat/session-store.ts CHANGED Viewed

@@ -265,6 +265,55 @@ export function listMessages(session_id: string, opts?: { limit?: number; after_
   return rows.map(rowToMessage);
 }
+/** Last N messages in chronological order — used by agent-loop to cap LLM context. */
+export function listRecentMessages(session_id: string, limit: number): Message[] {
+  ensureSchema();
+  const rows = db().prepare(`
+    SELECT * FROM chat_messages WHERE session_id = ?
+    ORDER BY ts DESC LIMIT ?
+  `).all(session_id, limit) as MessageRow[];
+  return rows.map(rowToMessage).reverse();
+}
+/**
+ * Take the most recent messages, stopping when either the message-count
+ * budget OR the token-estimate budget would be exceeded. Walks
+ * newest-first so the most recent dialogue is always kept; returns
+ * chronological order for the LLM history slot.
+ *
+ * msgBudget is enforced via SQL LIMIT (cheap). tokenBudget is enforced
+ * via the caller-supplied estimator (decoupled to avoid pulling the
+ * token-estimator into the storage layer).
+ */
+export function listMessagesCapped(
+  session_id: string,
+  msgBudget: number,
+  tokenBudget: number,
+  estimateTokens: (m: Message) => number,
+): Message[] {
+  ensureSchema();
+  const cap = Math.max(1, Math.floor(msgBudget));
+  // Pull newest-first via SQL — bounded by msgBudget so we never load
+  // more rows than we could possibly keep.
+  const rows = db().prepare(`
+    SELECT * FROM chat_messages WHERE session_id = ?
+    ORDER BY ts DESC LIMIT ?
+  `).all(session_id, cap) as MessageRow[];
+  const newestFirst = rows.map(rowToMessage);
+  // Now apply tokenBudget walking newest → oldest. Always keep at
+  // least one (so an oversized last message doesn't strand the loop).
+  const kept: Message[] = [];
+  let used = 0;
+  for (const m of newestFirst) {
+    const cost = estimateTokens(m);
+    if (kept.length > 0 && used + cost > tokenBudget) break;
+    kept.push(m);
+    used += cost;
+  }
+  return kept.reverse();
+}
 export function deleteMessage(id: string): boolean {
   ensureSchema();
   const r = db().prepare(`DELETE FROM chat_messages WHERE id = ?`).run(id);

package/lib/chat-standalone.ts CHANGED Viewed

@@ -144,6 +144,12 @@ async function handleSessionDelete(_req: IncomingMessage, res: ServerResponse, i
 }
 async function handleSessionClearMessages(_req: IncomingMessage, res: ServerResponse, id: string): Promise<void> {
+  // Intent: "Clear chat" only drops chat_messages rows. memory_store
+  // blocks (cursor / health / summary / facts) stay — once a fact has
+  // been extracted into long-term memory it should survive clearing
+  // the conversation it came from. Users can delete memory explicitly
+  // from the memory tab if they really want to forget. See
+  // forge-chat-memory-summarizer-design.md §11 decision 3.
   const session = getSession(id);
   if (!session) return sendJson(res, 404, { error: 'session not found' });
   const removed = clearSessionMessages(id);

package/lib/init.ts CHANGED Viewed

@@ -94,6 +94,15 @@ export function ensureInitialized() {
     catch (e) { console.warn('[init] ensureScratchProject failed:', (e as Error).message); }
   });
   time('migrateSecrets', migrateSecrets);
+  time('migrateAgentsFlatten', () => {
+    try {
+      const { migrateAgentsFlatten } = require('./agents/migrate');
+      const settings = loadSettings();
+      if (migrateAgentsFlatten(settings)) {
+        saveSettings(settings);
+      }
+    } catch (e) { console.warn('[init] migrateAgentsFlatten failed:', (e as Error).message); }
+  });
   time('migratePluginSecrets', () => {
     try {
       const { migratePluginSecrets } = require('./plugins/registry');
@@ -249,6 +258,7 @@ export function ensureInitialized() {
   startWorkspaceProcess(); // spawns workspace-standalone
   startBrowserBridgeProcess(); // spawns browser-bridge-standalone
   startChatProcess(); // spawns chat-standalone
+  startMemoryProcess(); // spawns memory-standalone
   const settings = loadSettings();
   if (settings.tunnelAutoStart) {
@@ -402,3 +412,18 @@ function startBrowserBridgeProcess() {
   });
   tester.listen(bridgePort);
 }
+let memoryChild: ReturnType<typeof spawn> | null = null;
+function startMemoryProcess() {
+  if (memoryChild) return;
+  // No HTTP port — pure background poller. Just spawn-if-not-running.
+  const script = join(process.cwd(), 'lib', 'memory-standalone.ts');
+  memoryChild = spawn('npx', ['tsx', script], {
+    stdio: ['ignore', 'inherit', 'inherit'],
+    env: { ...process.env },
+    detached: false,
+  });
+  memoryChild.on('exit', () => { memoryChild = null; });
+  console.log('[memory] Started standalone (pid:', memoryChild.pid, ')');
+}

package/lib/memory/compress-messages.ts ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * Compact chat messages into a summarizer-friendly transcript.
+ *
+ * Raw tool_use / tool_result blocks can each carry kilobytes of JSON
+ * (stack traces, encoded args, HTML responses). Feeding those into the
+ * summarizer LLM wastes input tokens and crowds out actual dialogue.
+ *
+ * This module flattens each Message into one or more text lines:
+ *   - text blocks pass through (truncated to MAX_TEXT_CHARS)
+ *   - tool_use → `tool[name](key1, key2, …)`
+ *   - tool_result → `→ ok: <first line>` or `→ err: <first line>`
+ *
+ * Output is a plain string ready to drop into the summarizer prompt.
+ */
+import type { ContentBlock, Message, ToolResultBlock, ToolUseBlock } from '../chat/types';
+const MAX_TEXT_CHARS = 1200;
+const MAX_TOOL_RESULT_CHARS = 200;
+const MAX_INPUT_KEYS = 8;
+export function compressMessagesForSummarizer(messages: Message[]): string {
+  const lines: string[] = [];
+  for (const m of messages) {
+    for (const block of m.blocks) {
+      const rendered = renderBlock(m.role, block);
+      if (rendered) lines.push(rendered);
+    }
+  }
+  return lines.join('\n');
+}
+function renderBlock(role: 'user' | 'assistant', block: ContentBlock): string | null {
+  if (block.type === 'text') {
+    const text = truncate(block.text.trim(), MAX_TEXT_CHARS);
+    if (!text) return null;
+    return `${role}: ${text}`;
+  }
+  if (block.type === 'tool_use') {
+    return `${role}: ${renderToolUse(block)}`;
+  }
+  if (block.type === 'tool_result') {
+    return `${role}: ${renderToolResult(block)}`;
+  }
+  return null;
+}
+function renderToolUse(block: ToolUseBlock): string {
+  const keys = block.input && typeof block.input === 'object'
+    ? Object.keys(block.input as Record<string, unknown>).slice(0, MAX_INPUT_KEYS)
+    : [];
+  const argsStr = keys.length > 0 ? `(${keys.join(', ')})` : '()';
+  return `tool[${block.name}]${argsStr}`;
+}
+function renderToolResult(block: ToolResultBlock): string {
+  const firstLine = (block.content ?? '').split(/\r?\n/, 1)[0] ?? '';
+  const head = truncate(firstLine, MAX_TOOL_RESULT_CHARS);
+  return block.is_error ? `→ err: ${head}` : `→ ok: ${head}`;
+}
+function truncate(s: string, max: number): string {
+  if (s.length <= max) return s;
+  return s.slice(0, max) + '…';
+}

package/lib/memory/keys.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Key-naming conventions for the chat memory summarizer.
+ *
+ * MemoryStore is a flat key/value API; this module encodes the
+ * design's "scope + subject" classification into deterministic key
+ * strings so that:
+ *   - cursor / health blocks land on a single stable key (upsert covers)
+ *   - repeated ingest of the same fact maps to the same key (upsert
+ *     replaces, not appends — this is what "memory reinforcement" looks
+ *     like at the storage layer)
+ *   - buildMemoryContext can post-filter by prefix to keep internal
+ *     bookkeeping blocks (cursor / health) out of the LLM prompt
+ *
+ * See forge-chat-memory-summarizer-design.md §4.2 for the full table.
+ */
+import { createHash } from 'node:crypto';
+/** Stable 12-char content hash. Used in fact keys so re-ingesting the
+ *  same fact maps to the same memory block (upsert = reinforcement). */
+export function stableHash(input: string): string {
+  return createHash('sha256').update(input).digest('hex').slice(0, 12);
+}
+/** Session summary at a given cursor end-ts. */
+export function summaryKey(sessionId: string, toTs: number): string {
+  return `chat:${sessionId}:summary:${toTs}`;
+}
+/** Long-term fact. scope/subject classify it; hash keys re-ingest. */
+export function factKey(scope: string, subject: string, contentHash: string): string {
+  return `fact:${scope}:${subject}:${contentHash}`;
+}
+/** Per-session ingest progress cursor. One row per session. */
+export function cursorKey(sessionId: string): string {
+  return `forge.summarizer.cursor:${sessionId}`;
+}
+/** Per-session summarizer health (last_run, errors, counts). */
+export function healthKey(sessionId: string): string {
+  return `forge.summarizer.health:${sessionId}`;
+}
+/** Prefixes buildMemoryContext should exclude when rendering context —
+ *  bookkeeping blocks the LLM shouldn't see. */
+export const INTERNAL_KEY_PREFIXES: readonly string[] = [
+  'forge.summarizer.cursor:',
+  'forge.summarizer.health:',
+];
+export interface CursorValue {
+  last_ingested_ts: number;
+  last_run_ts: number;
+  ingest_count: number;
+}
+export interface SummaryValue {
+  text: string;
+  from_ts: number;
+  to_ts: number;
+  message_count: number;
+  model: string;
+  provider: string;
+  ingest_ts: number;
+}
+export interface FactValue {
+  content: string;
+  subject_kind: string;
+  subject: string;
+  source_ref: string;
+  confidence: number | null;
+  extracted_by: 'summarizer' | string;
+}
+export interface HealthValue {
+  last_run_ts: number;
+  error: string | null;
+  ingest_count: number;
+  last_token_estimate: number;
+}