npm - @ijfw/memory-server - Versions diffs - 1.5.5 → 1.6.0 - Mend

@ijfw/memory-server 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/bin/ijfw-dashboard +20 -1
package/package.json +4 -3
package/src/audit-roster.js +89 -12
package/src/brain/tiered-llm.js +57 -7
package/src/cross-orchestrator-cli.js +344 -4
package/src/cross-project-search.js +39 -1
package/src/dashboard-server.js +7 -1
package/src/dream/runner.mjs +560 -8
package/src/handlers/brain-handler.js +101 -1
package/src/importers/discover.js +1 -1
package/src/memory/bench-metrics.js +289 -0
package/src/memory/benchmark.js +1 -1
package/src/memory/search.js +53 -1
package/src/orchestrator/plan-checker.js +1 -1
package/src/profile/audit.js +671 -0
package/src/profile/capture.js +871 -0
package/src/profile/derive-dialectic.js +242 -0
package/src/profile/derive-heuristic.js +733 -0
package/src/profile/derive.js +156 -0
package/src/profile/egress.js +306 -0
package/src/profile/eval/build-real-probes.mjs +197 -0
package/src/profile/eval/corpus-from-reddit.mjs +166 -0
package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
package/src/profile/eval/gate-b-behavior.mjs +420 -0
package/src/profile/eval/gate-b-decision-run.mjs +171 -0
package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
package/src/profile/eval/gate-b-run.mjs +417 -0
package/src/profile/eval/gate-b-run.test.mjs +204 -0
package/src/profile/eval/gate-c-capture.mjs +323 -0
package/src/profile/eval/harness.mjs +551 -0
package/src/profile/eval/instrument-validation.mjs +248 -0
package/src/profile/eval/instrument-validation.test.mjs +125 -0
package/src/profile/eval/multi-subject-harness.mjs +106 -0
package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
package/src/profile/eval/personas.test.mjs +83 -0
package/src/profile/eval/plumbing.test.mjs +69 -0
package/src/profile/eval/prereg.mjs +130 -0
package/src/profile/eval/prereg.test.mjs +78 -0
package/src/profile/eval/real-corpus.test.mjs +103 -0
package/src/profile/eval/real-personas.mjs +109 -0
package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
package/src/profile/eval/run-real-corpus.mjs +358 -0
package/src/profile/eval/slug-quality.mjs +464 -0
package/src/profile/eval/stylometry-features.js +85 -0
package/src/profile/eval/stylometry-reference.js +16 -0
package/src/profile/eval/stylometry.js +224 -0
package/src/profile/eval/stylometry.test.mjs +103 -0
package/src/profile/eval/synthetic-personas.js +91 -0
package/src/profile/eval/verifier-features.mjs +170 -0
package/src/profile/eval/verifier-logreg.mjs +74 -0
package/src/profile/eval/verifier-pair.mjs +122 -0
package/src/profile/eval/verifier-reference.mjs +68 -0
package/src/profile/eval/verifier-scorer.mjs +30 -0
package/src/profile/eval/wrong-target-control.mjs +168 -0
package/src/profile/eval/wrong-target-control.test.mjs +124 -0
package/src/profile/exemplar-capture.js +232 -0
package/src/profile/exemplar-retrieve.js +138 -0
package/src/profile/exemplar-store.js +314 -0
package/src/profile/lock.js +64 -0
package/src/profile/merge.js +624 -0
package/src/profile/path-policy.js +213 -0
package/src/profile/precision-stamp.mjs +151 -0
package/src/profile/render-brief.js +717 -0
package/src/profile/schema.js +244 -0
package/src/profile/sensitivity.js +249 -0
package/src/profile/serve.js +345 -0
package/src/profile/store.js +261 -0
package/src/profile/telemetry.js +289 -0
package/src/recovery/checkpoint.js +7 -1
package/src/server.js +185 -14
package/src/.registry-meta-key.pem +0 -3

package/bin/ijfw-dashboard CHANGED Viewed

@@ -79,6 +79,20 @@ const argv = process.argv.slice(2);
 const sub  = argv[0] || 'status';
 const noOpen = argv.includes('--no-open');
+// Optional `--port N` override (documented in the usage banner). When set we
+// forward it to the daemon via IJFW_DASHBOARD_PORT so the server's port-walk
+// starts there instead of the default 37891. Unset = current behaviour. This
+// also makes the dashboard testable on a scratch port without touching 37891.
+function parsePortFlag(args) {
+  const i = args.indexOf('--port');
+  if (i !== -1 && args[i + 1]) {
+    const n = parseInt(args[i + 1], 10);
+    if (Number.isInteger(n) && n > 0 && n < 65536) return String(n);
+  }
+  return null;
+}
+const portOverride = parsePortFlag(argv);
 if (sub === 'start') {
   ensureDir();
   const existingPid = readPid();
@@ -97,7 +111,12 @@ if (sub === 'start') {
   const child = spawn(process.execPath, [SERVER_JS, '--daemon'], {
     detached: true,
     stdio: ['ignore', 'ignore', 'ignore'],
-    env: { ...process.env, IJFW_PID_FILE: PID_FILE, IJFW_PORT_FILE: PORT_FILE },
+    env: {
+      ...process.env,
+      IJFW_PID_FILE: PID_FILE,
+      IJFW_PORT_FILE: PORT_FILE,
+      ...(portOverride ? { IJFW_DASHBOARD_PORT: portOverride } : {}),
+    },
   });
   child.unref();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ijfw/memory-server",
-  "version": "1.5.5",
+  "version": "1.6.0",
   "description": "Cross-platform persistent memory server for IJFW. 14 MCP tools (memory + admin/update + brain). Works with 15 platforms: 14 via MCP (Claude Code, Codex, Gemini CLI, Cursor, Windsurf, Copilot, Hermes, Wayland, OpenCode, QwenCode, Cline, KimiCode, OpenClaw, Antigravity) plus Aider via the rules-only tier.",
   "author": "Sean Donahoe",
   "contributors": [
@@ -28,10 +28,11 @@
   "scripts": {
     "start": "node src/server.js",
     "dev": "node --watch src/server.js",
-    "test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
+    "test": "node test.js && node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js test/profile-*.test.mjs src/profile/eval/*.test.mjs",
     "test:smoke": "node test.js",
     "test:full": "node --experimental-sqlite --test --test-force-exit test-*.js test/brain/test-*.js test/memory/test-*.js test/integration/test-*.js",
-    "test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js"
+    "test:graders": "node test/grade-symbol-graph-spec.js && node test/grade-symbol-graph-consistency.js && node test/grade-cascading-staleness.js && node test/grade-project-types.js",
+    "functional-smoke": "node scripts/run-functional-smoke.mjs"
   },
   "engines": {
     "node": ">=18.0.0"

package/src/audit-roster.js CHANGED Viewed

@@ -86,8 +86,24 @@ export const ROSTER = [
     family: 'google',
     model: '',
     name: 'Gemini CLI',
-    invoke: 'gemini',
-    note: 'Strong on security + architectural patterns. Auto-detects piped stdin for headless mode.',
+    // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `gemini` BREAKS on the
+    // current CLI (verified gemini-cli 0.43.0) for two reasons:
+    //   1. Trusted-directory gate (added like codex's --skip-git-repo-check):
+    //      a non-interactive invoke outside a "trusted" folder exits immediately
+    //      with "Gemini CLI is not running in a trusted directory" and emits ZERO
+    //      output. --skip-trust bypasses it (env GEMINI_CLI_TRUST_WORKSPACE=true
+    //      is the alternative). Without it the CLI path NEVER succeeds; runs were
+    //      only ever rescued by the API fallback (every historical receipt shows
+    //      gemini source='api', never 'cli').
+    //   2. Recursive IJFW-MCP autostart: when the ijfw gemini extension is
+    //      loaded, `gemini` autostarts the IJFW MCP server (the same self-
+    //      referential hang codex dodges via mcp_servers.ijfw-memory.enabled=false).
+    //      `-e none` loads zero extensions, removing the autostart latency/noise.
+    // Prompt is delivered on stdin: piped non-TTY stdin triggers headless mode,
+    // so no -p value is needed (and `-p` with no value is a yargs error under
+    // whitespace-split argv). Verified returning findings end-to-end 2026-06-08.
+    invoke: 'gemini --skip-trust -e none',
+    note: 'Strong on security + architectural patterns. Reads the prompt from piped stdin in headless mode. --skip-trust bypasses the trusted-directory gate (gemini-cli >=0.43); -e none disables extensions so the IJFW MCP server is not recursively autostarted.',
     detect: (env) => Boolean(env.GEMINI_CLI || env.GOOGLE_CLOUD_PROJECT_GEMINI) || /gemini-cli/i.test(env._ || ''),
     // model is resolved at call-time via model-refresh.js (24h-cached probe).
     get apiFallback() {
@@ -99,8 +115,22 @@ export const ROSTER = [
     family: 'oss',
     model: '',
     name: 'Qwen Code',
-    invoke: 'qwen -p',
-    note: 'Apache-2.0 weights (Qwen3-Coder-480B-A35B), agentic-tuned (~67% SWE-Bench Verified). Fork of gemini-cli; supports qwen-oauth (free Coding Plan tier), plus openai/anthropic/gemini auth-types via `qwen auth`. Diversity value for Trident: third independent training lineage outside openai/google.',
+    // v1.6.0 xaudit-fix — STALE-ARGV repair. `qwen -p` (no value) is wrong on
+    // current Qwen Code (verified 0.15.6): `-p/--prompt` is now DEPRECATED and a
+    // bare `-p` with no argument is a yargs error under whitespace-split argv.
+    // The prompt is delivered on stdin (this fork ignores the positional `query`
+    // and prints "No input provided via stdin" if nothing is piped). Bare
+    // `qwen <stdin>` ALSO fails silently: the implicit startup auto-discovery
+    // runs an approval/confirmation flow that auto-cancels under non-TTY stdin
+    // and emits only "Operation cancelled." with ZERO findings (the gemini-cli-
+    // fork analog of gemini's trusted-directory gate). `--bare` skips that
+    // auto-discovery and `--yolo` auto-approves all actions, so the headless run
+    // reaches the model — or fails FAST + CLEAN on auth ("No auth type is
+    // selected ... before running in non-interactive mode" in ~1s when neither a
+    // qwen-oauth login nor DASHSCOPE_API_KEY is configured). Verified 2026-06-08:
+    // `--bare --yolo` errors cleanly on auth in 1s instead of the silent cancel.
+    invoke: 'qwen --bare --yolo',
+    note: 'Apache-2.0 weights (Qwen3-Coder-480B-A35B), agentic-tuned (~67% SWE-Bench Verified). Fork of gemini-cli; supports qwen-oauth (free Coding Plan tier), plus openai/anthropic/gemini auth-types via `qwen auth`. Reads the prompt from piped stdin; --bare skips the startup auto-discovery that auto-cancels under non-TTY stdin, --yolo auto-approves. Diversity value for Trident: third independent training lineage outside openai/google.',
     detect: (env) => Boolean(env.QWEN_SESSION) || /(?:^|\W)qwen(?:\W|$)/i.test(env._ || ''),
     apiFallback: { provider: 'openai-compat', model: 'qwen3-coder-plus', authEnv: 'DASHSCOPE_API_KEY', endpoint: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions' },
   },
@@ -119,8 +149,18 @@ export const ROSTER = [
     family: 'oss',
     model: '',
     name: 'Kimi (Moonshot)',
-    invoke: 'kimi',
-    note: 'Moonshot AI Kimi K2 series (Chinese open-source lineage, separate from DeepSeek). Long-context strength makes it useful for whole-file or whole-module audits where context window matters. OpenAI-compatible API via platform.moonshot.ai. Detection is left at false because no canonical session env var ships with Kimi today -- prefer double-coverage over false self-exclusion.',
+    // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `kimi` (verified 1.38.0) is a
+    // Click-style `kimi [OPTIONS] COMMAND` agent that PROMPTS INTERACTIVELY by
+    // default (`--prompt ... Default: prompt interactively`), so a piped prompt
+    // on stdin would stall waiting on a TTY. `--print` is the non-interactive
+    // "print mode" (it implicitly adds `--yolo`) and, with `--input-format text`
+    // (the default), the input is read from piped stdin. `--quiet` is the shipped
+    // alias for `--print --output-format text --final-message-only`, which prints
+    // ONLY the final assistant message to stdout (the "resume session" hint goes
+    // to stderr, keeping stdout clean for fence parsing). Verified 2026-06-08:
+    // `kimi --print --quiet` returned a clean JSON fence with both seeded bugs.
+    invoke: 'kimi --print --quiet',
+    note: 'Moonshot AI Kimi K2 series (Chinese open-source lineage, separate from DeepSeek). Long-context strength makes it useful for whole-file or whole-module audits where context window matters. `kimi --print --quiet` is the headless mode (reads the prompt from piped stdin, prints only the final assistant message). OpenAI-compatible API via platform.moonshot.ai. Detection is left at false because no canonical session env var ships with Kimi today -- prefer double-coverage over false self-exclusion.',
     detect: () => false,
     apiFallback: { provider: 'openai-compat', model: 'kimi-k2.6', authEnv: 'MOONSHOT_API_KEY', endpoint: 'https://api.moonshot.ai/v1/chat/completions' },
   },
@@ -129,8 +169,16 @@ export const ROSTER = [
     family: 'oss',
     model: '',
     name: 'opencode',
-    invoke: 'opencode',
-    note: 'OSS / local-friendly; good when privacy matters.',
+    // v1.6.0 xaudit-fix — STALE-ARGV repair. Bare `opencode` launches the
+    // INTERACTIVE TUI (verified opencode 1.15.11): it renders a full-screen
+    // ANSI app and never consumes the piped prompt, so every cross-audit run
+    // hung until the per-auditor SIGKILL timeout (status='timeout'). opencode
+    // has NO apiFallback, so there was no rescue path — opencode was 100%
+    // broken for every user. The non-interactive subcommand is `opencode run`,
+    // which reads the prompt from stdin and prints the model reply to stdout
+    // (verified returning a clean response 2026-06-08).
+    invoke: 'opencode run',
+    note: 'OSS / local-friendly; good when privacy matters. `opencode run` is the headless subcommand (bare `opencode` opens the interactive TUI and would hang).',
     detect: (env) => Boolean(env.OPENCODE_SESSION || env.OPENCODE_HOME),
     apiFallback: null,
   },
@@ -149,8 +197,20 @@ export const ROSTER = [
     family: 'openai',
     model: '',
     name: 'Copilot CLI',
-    invoke: 'gh copilot suggest',
-    note: 'Convenient if gh CLI is already authenticated.',
+    // v1.6.0 xaudit-fix — STALE-ARGV repair. `gh copilot suggest` was the OLD
+    // `gh` CLI extension, which only echoes a single shell-command suggestion
+    // (not a code audit) and is a different binary entirely. The standalone
+    // GitHub Copilot CLI (`copilot`, verified 1.0.59) is the real headless
+    // auditor: `-p/--prompt` runs a non-interactive scripting turn, and a bare
+    // `-p` (no inline value) consumes the prompt from piped stdin. Non-
+    // interactive mode REQUIRES `--allow-all-tools` (per the CLI's own help:
+    // "required for non-interactive mode"), otherwise it blocks on a tool-
+    // permission confirmation. Verified 2026-06-08: `copilot -p --allow-all-tools`
+    // reaches the service in seconds and fails FAST + CLEAN when the account's
+    // Copilot is org-policy-restricted (an environmental auth/entitlement error,
+    // not a hang). detect() left on GH_COPILOT_TOKEN/COPILOT_CLI_SESSION.
+    invoke: 'copilot -p --allow-all-tools',
+    note: 'Standalone GitHub Copilot CLI (`copilot`, not the old `gh copilot` extension). `-p` reads the prompt from piped stdin; `--allow-all-tools` is required for non-interactive mode. Convenient if Copilot CLI is already authenticated and not org-policy-restricted.',
     detect: (env) => Boolean(env.GH_COPILOT_TOKEN || env.COPILOT_CLI_SESSION),
     apiFallback: null,
   },
@@ -159,8 +219,14 @@ export const ROSTER = [
     family: 'anthropic',
     model: '',
     name: 'Claude Code',
+    // `claude -p` (== --print) is the headless mode: reads the prompt from piped
+    // stdin and prints the reply to stdout. Verified 2026-06-08 returning fast +
+    // clean (4s) — in this repo claude is the CALLER (CLAUDECODE=1), so detect()
+    // below correctly self-excludes it from its own Trident; the invoke is only
+    // exercised when a NON-claude caller picks claude as an auditor. When claude
+    // IS a valid (non-self) auditor, `claude -p` is the correct current form.
     invoke: 'claude -p',
-    note: 'Anthropic; useful when you want a second Claude pass in a fresh session.',
+    note: 'Anthropic; useful when you want a second Claude pass in a fresh session. `claude -p` reads the prompt from piped stdin (headless --print mode). Self-excluded via detect() when claude is the caller.',
     detect: (env) => Boolean(env.CLAUDECODE || env.CLAUDE_CODE_ENTRYPOINT || env.CLAUDE_PLUGIN_ROOT),
     // model is resolved at call-time via model-refresh.js (24h-cached probe).
     get apiFallback() {
@@ -206,7 +272,18 @@ export function isInstalled(id) {
   const bin = entry.invoke.split(/\s+/)[0];
   // POSIX `command -v` is the portable existence check; bash builtin form
   // works reliably across macOS + Linux. spawnSync exit code = 0 → present.
-  const r = spawnSync('bash', ['-lc', `command -v ${JSON.stringify(bin)} >/dev/null 2>&1`], { timeout: 2000 });
+  //
+  // v1.6.0 diag-fix — `command -v` returns success for a regular file on PATH
+  // even when it is NOT executable, so a dangling/non-+x file shadowing a real
+  // auditor name made doctor report `cli_installed: true` for something that
+  // CANNOT actually be invoked (the "present-claimed but invocation-broken"
+  // false report). We now additionally require the resolved target to pass
+  // `[ -x ]` (or be a shell builtin/keyword/function with no filesystem path,
+  // which `command -v` reports without a leading slash — those are genuinely
+  // runnable). A real installed CLI is an executable file and still passes.
+  const probe = `p=$(command -v ${JSON.stringify(bin)} 2>/dev/null) || exit 1; ` +
+    `case "$p" in /*) [ -x "$p" ] ;; *) : ;; esac`;
+  const r = spawnSync('bash', ['-lc', probe], { timeout: 2000 });
   const installed = r.status === 0;
   _installedCache.set(id, { value: installed, ts: Date.now() });
   return installed;

package/src/brain/tiered-llm.js CHANGED Viewed

@@ -26,7 +26,7 @@ export function resolveTierModel(tier, env = process.env) {
   throw new Error(`tiered-llm: unknown tier '${tier}'`);
 }
-function defaultCallers() {
+export function defaultCallers() {
   return {
     async local({ url, model, prompt, maxTokens }) {
       // Ollama-compatible /api/generate -- streamless single-response mode.
@@ -39,8 +39,48 @@ function defaultCallers() {
       const data = await res.json();
       return { text: data.response || '', usage: { input: data.prompt_eval_count, output: data.eval_count }, model, via: 'local' };
     },
-    async anthropic({ model, prompt, maxTokens, apiKey }) {
+    async openaiLocal({ url, model, prompt, maxTokens, temperature }) {
+      // OpenAI-compatible /chat/completions -- used by the bench to grade on a
+      // LOCAL vLLM-served synth model. `url` already includes the API base
+      // (e.g. http://localhost:8000/v1). enable_thinking:false is REQUIRED:
+      // Qwen3.6 is a hybrid-reasoning model that otherwise emits a thinking
+      // trace instead of the answer; vLLM passes this through to the chat
+      // template. NO silent fallback to a cloud model -- callTiered routes here
+      // WITHOUT a try/catch so a local-synth failure surfaces honestly.
+      const body = {
+        model,
+        messages: [{ role: 'user', content: prompt }],
+        max_tokens: maxTokens,
+        chat_template_kwargs: { enable_thinking: false },
+      };
+      if (typeof temperature === 'number') body.temperature = temperature;
+      const res = await fetch(url.replace(/\/$/, '') + '/chat/completions', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(body),
+      });
+      if (!res.ok) throw new Error(`openai-local LLM HTTP ${res.status}`);
+      const data = await res.json();
+      const choice = data.choices && data.choices[0];
+      if (!choice || !choice.message) throw new Error('openai-local LLM: missing choice in response');
+      const usage = data.usage || {};
+      return {
+        text: choice.message.content || '',
+        usage: { input: usage.prompt_tokens, output: usage.completion_tokens },
+        model,
+        via: 'openai-local',
+      };
+    },
+    async anthropic({ model, prompt, maxTokens, apiKey, temperature }) {
       if (!apiKey) throw new Error('tiered-llm: ANTHROPIC_API_KEY (or IJFW_BRAIN_API_KEY) required for Anthropic fallback');
+      const payload = {
+        model,
+        max_tokens: maxTokens,
+        messages: [{ role: 'user', content: prompt }],
+      };
+      // Optional, backward-compatible: omitted -> API default. Used by the
+      // benchmark harness to pin temperature:0 for deterministic answers.
+      if (typeof temperature === 'number') payload.temperature = temperature;
       const res = await fetch('https://api.anthropic.com/v1/messages', {
         method: 'POST',
         headers: {
@@ -48,11 +88,7 @@ function defaultCallers() {
           'x-api-key': apiKey,
           'anthropic-version': '2023-06-01',
         },
-        body: JSON.stringify({
-          model,
-          max_tokens: maxTokens,
-          messages: [{ role: 'user', content: prompt }],
-        }),
+        body: JSON.stringify(payload),
       });
       if (!res.ok) throw new Error(`Anthropic HTTP ${res.status}`);
       const data = await res.json();
@@ -67,6 +103,19 @@ export async function callTiered(tier, prompt, opts = {}) {
   const model = resolveTierModel(tier, env);
   const maxTokens = opts.maxTokens || DEFAULT_MAX_TOKENS[tier] || 512;
   const callers = opts._callers || defaultCallers();
+  // Opt-in OpenAI-compatible local synth (bench): point at a vLLM server.
+  // FAILS LOUD by design -- no try/catch, no Anthropic fallback. If this
+  // errors, the bench must error too rather than silently grade on a cloud
+  // model from a different family (which would corrupt the experiment).
+  if (env.IJFW_BENCH_SYNTH_URL) {
+    return callers.openaiLocal({
+      url: env.IJFW_BENCH_SYNTH_URL,
+      model,
+      prompt,
+      maxTokens,
+      temperature: opts.temperature,
+    });
+  }
   if (env.IJFW_BRAIN_LOCAL_URL) {
     try {
       return await callers.local({ url: env.IJFW_BRAIN_LOCAL_URL, model, prompt, maxTokens });
@@ -79,5 +128,6 @@ export async function callTiered(tier, prompt, opts = {}) {
     prompt,
     maxTokens,
     apiKey: env.IJFW_BRAIN_API_KEY || env.ANTHROPIC_API_KEY,
+    temperature: opts.temperature,
   });
 }