npm - modelstat - Versions diffs - 0.0.35 → 0.0.37 - Mend

modelstat 0.0.35 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -4500,6 +4500,11 @@ var init_schemas = __esm({
       provider: external_exports.enum(PROVIDERS),
       provider_account_id: external_exports.string().max(200),
       provider_account_label: external_exports.string().max(200).nullable(),
+      /** Human-facing labels — what the user recognises the account by.
+       *  Populated from the keychain blob / OAuth JWT where available. */
+      account_email: external_exports.string().max(200).nullable().optional(),
+      account_org: external_exports.string().max(200).nullable().optional(),
+      display_name: external_exports.string().max(200).nullable().optional(),
       owner_scope: external_exports.enum(IDENTITY_OWNER_SCOPES).default("unassigned"),
       detection_source: external_exports.string().max(80)
     });
@@ -7302,14 +7307,19 @@ async function probeIdentities(os2) {
         { timeout: 3e3 }
       ).toString();
       const body = JSON.parse(out);
-      const tok = body.claudeAiOauth?.accessToken;
+      const oauth = body.claudeAiOauth;
+      const tok = oauth?.accessToken;
       if (tok) {
-        const refresh = body.claudeAiOauth?.refreshToken;
-        const seed = (refresh ?? tok).slice(0, 48);
+        const email = oauth?.account?.email_address ?? oauth?.account?.email ?? null;
+        const orgName = oauth?.organization?.name ?? null;
+        const stableId = oauth?.account?.uuid ?? oauth?.organization?.uuid ?? (oauth?.refreshToken ?? tok).slice(0, 48);
         ids.push({
           provider: "anthropic",
-          provider_account_id: seed,
-          provider_account_label: body.claudeAiOauth?.subscriptionType ?? "Claude Code account",
+          provider_account_id: stableId,
+          provider_account_label: email ?? orgName ?? oauth?.subscriptionType ?? "Claude account",
+          account_email: email,
+          account_org: orgName ?? oauth?.subscriptionType ?? null,
+          display_name: null,
           owner_scope: "unassigned",
           detection_source: "claude_keychain"
         });
@@ -7328,6 +7338,8 @@ async function probeIdentities(os2) {
       const jwt = obj.tokens?.id_token;
       let email = null;
       let sub = null;
+      let name = null;
+      let org = null;
       let provider = "openai";
       if (jwt) {
         const parts = jwt.split(".");
@@ -7339,17 +7351,23 @@ async function probeIdentities(os2) {
             );
             email = body.email ?? null;
             sub = body.sub ?? null;
+            name = body.name ?? null;
+            const oai = body["https://api.openai.com/auth"];
+            org = oai?.organization_id ?? oai?.chatgpt_plan_type ?? null;
             if (body.auth_provider === "google") provider = "openai";
           } catch {
           }
         }
       }
-      const pid = sub ?? obj.tokens?.account_id ?? email;
+      const pid = obj.tokens?.account_id ?? sub ?? email;
       if (pid) {
         ids.push({
           provider,
           provider_account_id: pid,
           provider_account_label: email,
+          account_email: email,
+          account_org: org,
+          display_name: name,
           owner_scope: "unassigned",
           detection_source: "codex_auth_json"
         });
@@ -7371,6 +7389,7 @@ async function probeIdentities(os2) {
           provider: "google",
           provider_account_id: email,
           provider_account_label: email,
+          account_email: email,
           owner_scope: "unassigned",
           detection_source: "gemini_oauth_creds"
         });
@@ -7395,6 +7414,7 @@ async function probeIdentities(os2) {
                 provider: "cursor",
                 provider_account_id: auth.sub ?? auth.email,
                 provider_account_label: auth.email ?? null,
+                account_email: auth.email ?? null,
                 owner_scope: "unassigned",
                 detection_source: "cursor_global_storage"
               });
@@ -42435,9 +42455,9 @@ var require_range = __commonJS({
       parseRange(range) {
         const memoOpts = (this.options.includePrerelease && FLAG_INCLUDE_PRERELEASE) | (this.options.loose && FLAG_LOOSE);
         const memoKey = memoOpts + ":" + range;
-        const cached = cache.get(memoKey);
-        if (cached) {
-          return cached;
+        const cached2 = cache.get(memoKey);
+        if (cached2) {
+          return cached2;
         }
         const loose = this.options.loose;
         const hr = loose ? re[t.HYPHENRANGELOOSE] : re[t.HYPHENRANGE];
@@ -44254,17 +44274,17 @@ var init_queue = __esm({
 });
 // ../../packages/companion-core/src/pipeline/prompts.ts
-var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_MAX_TOKENS, ABSTRACT_OUTPUT_MAX_CHARS, SUMMARISER_TEMPERATURE, QWEN_CHARS_PER_TOKEN;
+var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_MAX_TOKENS, SUMMARISER_TEMPERATURE, QWEN_CHARS_PER_TOKEN, ABSTRACT_OUTPUT_MAX_CHARS;
 var init_prompts = __esm({
   "../../packages/companion-core/src/pipeline/prompts.ts"() {
     "use strict";
-    OLLAMA_CHAT_MODEL = "qwen3.5:0.8b";
+    OLLAMA_CHAT_MODEL = "qwen3:4b";
     OLLAMA_EMBED_MODEL = "bge-small-en-v1.5";
-    SUMMARISER_SYSTEM_PROMPT = "Write the SHORTEST paragraph (1-3 sentences) that captures EXACTLY what was ACHIEVED in this coding segment, packed with the concrete domain keywords the developer used. Lead with an outcome verb \u2014 shipped, fixed, migrated, ramped, wired, diagnosed, refactored, designed, deployed, reverted, instrumented. Name the specific things touched: feature names, frameworks, components, bug classes, decisions. Density beats length: a 50-char sentence that names the actual feature beats 200 chars of filler. Skip narration ('the developer'), skip vague verbs ('worked on', 'explored', 'looked into'), skip preamble. If only metadata is given, name the project + tool + visible action concisely. Never quote excerpts verbatim. No PII, no API keys, no file paths, no code literals. Reply with ONLY the paragraph.";
-    SUMMARISER_MAX_TOKENS = 160;
-    ABSTRACT_OUTPUT_MAX_CHARS = 400;
+    SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. If the user message includes sampled conversation excerpts, base your summary on what the developer was actually working on (the substance \u2014 what was being built, debugged, refactored, or designed). If only metadata is given, paraphrase the metadata. Never quote the excerpts verbatim. No PII, no code literals, no file paths, no API keys. Reply with only the sentence.";
+    SUMMARISER_MAX_TOKENS = 120;
     SUMMARISER_TEMPERATURE = 0.2;
     QWEN_CHARS_PER_TOKEN = 3.3;
+    ABSTRACT_OUTPUT_MAX_CHARS = 240;
   }
 });
@@ -44819,6 +44839,14 @@ function ollamaSummarize(cfg = defaultOllamaConfig()) {
       body: JSON.stringify({
         model: cfg.chatModel,
         stream: false,
+        // Disable reasoning. qwen3 (the default summariser family) is a
+        // thinking model: with `think` on it spends the entire
+        // `num_predict` budget on a <think> block and returns EMPTY
+        // content, so the summariser saw "" and the whole pipeline
+        // crash-looped at preflight. We only want the final terse
+        // abstract, never the chain-of-thought. Ollama ignores this
+        // field for non-thinking models, so it's safe across families.
+        think: false,
         options: {
           temperature: SUMMARISER_TEMPERATURE,
           num_predict: Math.min(maxTokens, SUMMARISER_MAX_TOKENS)
@@ -44848,6 +44876,10 @@ function ollamaCognize(cfg = defaultOllamaConfig()) {
           model: cfg.chatModel,
           stream: false,
           format: "json",
+          // Same reason as the summariser: no thinking budget, just the
+          // JSON cognition tags. Thinking models otherwise emit a long
+          // <think> block and return empty content.
+          think: false,
           options: {
             temperature: COGNITION_TEMPERATURE,
             num_predict: COGNITION_MAX_TOKENS
@@ -44985,12 +45017,21 @@ async function loadOnce(cfg) {
     const modelPath = await ensureLlamaModel(cfg);
     const llama = await llamaMod.getLlama();
     const model = await llama.loadModel({ modelPath });
-    const context = await model.createContext({ contextSize: cfg.contextSize });
-    const session = new llamaMod.LlamaChatSession({
-      contextSequence: context.getSequence(),
+    const summariserContext = await model.createContext({
+      contextSize: cfg.contextSize
+    });
+    const cognizerContext = await model.createContext({
+      contextSize: Math.min(cfg.contextSize, 2048)
+    });
+    const summarizer = new llamaMod.LlamaChatSession({
+      contextSequence: summariserContext.getSequence(),
       systemPrompt: SUMMARISER_SYSTEM_PROMPT
     });
-    loaded = { session };
+    const cognizer = new llamaMod.LlamaChatSession({
+      contextSequence: cognizerContext.getSequence(),
+      systemPrompt: COGNITION_SYSTEM_PROMPT
+    });
+    loaded = { summarizer, cognizer };
     return loaded;
   })();
   try {
@@ -45002,11 +45043,11 @@ async function loadOnce(cfg) {
 }
 function llamaSummarize(cfg = defaultLlamaConfig()) {
   return async ({ prompt, maxTokens }) => {
-    const { session } = await loadOnce(cfg);
+    const { summarizer } = await loadOnce(cfg);
     const run = inflight.then(async () => {
-      session.resetChatHistory();
+      summarizer.resetChatHistory();
       void maxTokens;
-      const raw = await session.prompt(prompt, {
+      const raw = await summarizer.prompt(prompt, {
         temperature: SUMMARISER_TEMPERATURE,
         maxTokens: LLAMA_MAX_TOKENS
       });
@@ -45022,11 +45063,42 @@ function llamaSummarize(cfg = defaultLlamaConfig()) {
     return run;
   };
 }
+function llamaCognize(cfg = defaultLlamaConfig()) {
+  return async ({ abstract }) => {
+    if (!abstract || abstract.trim().length < 12) return null;
+    let loadedSessions;
+    try {
+      loadedSessions = await loadOnce(cfg);
+    } catch {
+      return null;
+    }
+    const { cognizer } = loadedSessions;
+    const run = inflight.then(async () => {
+      cognizer.resetChatHistory();
+      const raw = await cognizer.prompt(buildCognitionUserPrompt(abstract), {
+        temperature: COGNITION_TEMPERATURE,
+        // Qwen3.5 likes to "think" before answering. Give it a small
+        // budget — the JSON answer is ~30 tokens but the thinking can
+        // run 200-400. The strip below removes the <think> block.
+        maxTokens: COGNITION_MAX_TOKENS + 400
+      });
+      const stripped = stripThinking(raw ?? "");
+      return parseCognitionReply(stripped);
+    });
+    inflight = run.catch(() => void 0);
+    try {
+      return await run;
+    } catch {
+      return null;
+    }
+  };
+}
 var DEFAULT_LLAMA_MODEL_URL, LLAMA_MAX_TOKENS, loaded, loadPromise, inflight;
 var init_llama = __esm({
   "../../packages/companion-core/src/node/llama.ts"() {
     "use strict";
     init_prompts();
+    init_cognition();
     DEFAULT_LLAMA_MODEL_URL = "https://huggingface.co/lmstudio-community/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf";
     LLAMA_MAX_TOKENS = 1024;
     loaded = null;
@@ -45035,15 +45107,77 @@ var init_llama = __esm({
   }
 });
+// ../../packages/companion-core/src/node/transformersjs-embed.ts
+async function loadPipeline(model) {
+  if (cached) return cached;
+  if (loadFailedPermanently) return null;
+  if (!loadPromise2) {
+    loadPromise2 = (async () => {
+      try {
+        const moduleId = "@huggingface/transformers";
+        const tjs = await import(
+          /* @vite-ignore */
+          moduleId
+        );
+        const p = await tjs.pipeline("feature-extraction", model, {
+          device: "cpu",
+          dtype: "fp32"
+        });
+        cached = p;
+        return p;
+      } catch (err) {
+        const msg = err.message;
+        if (/unsupported|architecture|not supported|onnx|cannot resolve/i.test(msg)) {
+          loadFailedPermanently = true;
+        }
+        console.warn(
+          `[modelstat] transformers.js embedder unavailable (segments will be re-embedded server-side): ${msg}`
+        );
+        loadPromise2 = null;
+        return null;
+      }
+    })();
+  }
+  return loadPromise2;
+}
+function createTransformersJsEmbedder(model = DEFAULT_MODEL) {
+  return async (text) => {
+    if (!text || text.trim().length === 0) return [];
+    const pipe = await loadPipeline(model);
+    if (!pipe) return [];
+    try {
+      const out = await pipe(text, { pooling: "mean", normalize: true });
+      return Array.from(out.data);
+    } catch (err) {
+      console.warn(
+        `[modelstat] embed error (returning empty vector, server will re-embed): ${err.message}`
+      );
+      return [];
+    }
+  };
+}
+var cached, loadPromise2, loadFailedPermanently, DEFAULT_MODEL;
+var init_transformersjs_embed = __esm({
+  "../../packages/companion-core/src/node/transformersjs-embed.ts"() {
+    "use strict";
+    cached = null;
+    loadPromise2 = null;
+    loadFailedPermanently = false;
+    DEFAULT_MODEL = "Xenova/bge-small-en-v1.5";
+  }
+});
 // ../../packages/companion-core/src/node/index.ts
 var node_exports = {};
 __export(node_exports, {
   DEFAULT_LLAMA_MODEL_URL: () => DEFAULT_LLAMA_MODEL_URL,
   FileQueueStore: () => FileQueueStore,
   SqliteQueueStore: () => FileQueueStore,
+  createTransformersJsEmbedder: () => createTransformersJsEmbedder,
   defaultLlamaConfig: () => defaultLlamaConfig,
   defaultOllamaConfig: () => defaultOllamaConfig,
   ensureLlamaModel: () => ensureLlamaModel,
+  llamaCognize: () => llamaCognize,
   llamaSummarize: () => llamaSummarize,
   ollamaCognize: () => ollamaCognize,
   ollamaEmbed: () => ollamaEmbed,
@@ -45057,6 +45191,103 @@ var init_node2 = __esm({
     init_file_queue_store();
     init_ollama();
     init_llama();
+    init_transformersjs_embed();
+  }
+});
+// ../../packages/companion-core/src/redact/privacy-filter.ts
+async function createPrivacyFilterRedactor(opts = {}) {
+  const isBrowser = typeof globalThis !== "undefined" && typeof globalThis.window !== "undefined";
+  const device = opts.device ?? (isBrowser ? "webgpu" : "cpu");
+  const dtype = opts.dtype ?? "q4";
+  const modelId = opts.model ?? "openai/privacy-filter";
+  let cached2 = null;
+  let loadPromise3 = null;
+  async function loadPipeline2() {
+    if (cached2) return cached2;
+    if (!loadPromise3) {
+      loadPromise3 = (async () => {
+        try {
+          const moduleId = "@huggingface/transformers";
+          const tjs = await import(
+            /* @vite-ignore */
+            moduleId
+          );
+          const p = await tjs.pipeline("token-classification", modelId, {
+            device,
+            dtype,
+            ...opts.onProgress ? { progress_callback: opts.onProgress } : {}
+          });
+          cached2 = p;
+          return p;
+        } catch (err) {
+          loadPromise3 = null;
+          console.warn(
+            "[privacy-filter] adapter unavailable \u2014 install @huggingface/transformers in the consuming package to enable model-based redaction. Falling back to pass-through.",
+            err.message
+          );
+          return null;
+        }
+      })();
+    }
+    return loadPromise3;
+  }
+  return async function redactWithPrivacyFilter(text) {
+    const empty = {
+      text,
+      counts: {
+        secrets_found: 0,
+        emails_redacted: 0,
+        paths_redacted_absolute: 0
+      }
+    };
+    if (!text) return empty;
+    const classify = await loadPipeline2();
+    if (!classify) return empty;
+    let tokens;
+    try {
+      tokens = await classify(text);
+    } catch (err) {
+      console.warn(
+        "[privacy-filter] inference failed, returning input unchanged:",
+        err.message
+      );
+      return empty;
+    }
+    const spans = [];
+    for (const t of tokens) {
+      const ent = t.entity ?? "";
+      if (!ent || ent === "O" || ent === "0") continue;
+      if (t.start == null || t.end == null || t.end <= t.start) continue;
+      const type = ent.replace(/^[BILUE]-/, "").toUpperCase();
+      const last = spans[spans.length - 1];
+      if (last && last.type === type && t.start - last.end <= 2) {
+        last.end = t.end;
+      } else {
+        spans.push({ type, start: t.start, end: t.end });
+      }
+    }
+    spans.sort((a, b) => b.start - a.start);
+    let out = text;
+    const extra = {};
+    for (const s of spans) {
+      extra[`pf_${s.type.toLowerCase()}`] = (extra[`pf_${s.type.toLowerCase()}`] ?? 0) + 1;
+      out = out.slice(0, s.start) + `[REDACTED:${s.type}]` + out.slice(s.end);
+    }
+    return {
+      text: out,
+      counts: {
+        secrets_found: 0,
+        emails_redacted: 0,
+        paths_redacted_absolute: 0,
+        ...extra
+      }
+    };
+  };
+}
+var init_privacy_filter = __esm({
+  "../../packages/companion-core/src/redact/privacy-filter.ts"() {
+    "use strict";
   }
 });
@@ -45080,11 +45311,26 @@ async function probeOllama(baseUrl) {
     return false;
   }
 }
-function bundledAdapters() {
+async function bundledAdapters() {
+  const llamaCfg = defaultLlamaConfig();
   return {
-    embed: async () => [],
-    summarize: llamaSummarize(defaultLlamaConfig()),
-    tokenize: (text) => Math.max(1, Math.ceil(text.length / 4))
+    // Same transformers.js BGE-small embedder as the Ollama path. The
+    // bundled-llama path used to ship vector-less (empty arrays);
+    // hooking embeddings here means even no-Ollama installs get
+    // proper segment-vs-leaf cosine matching at classify time.
+    embed: createTransformersJsEmbedder(),
+    summarize: llamaSummarize(llamaCfg),
+    tokenize: (text) => Math.max(1, Math.ceil(text.length / 4)),
+    cognize: llamaCognize(llamaCfg),
+    // Model-based PII redactor (OpenAI Privacy Filter via
+    // transformers.js / ONNX). Runs locally on CPU after the regex
+    // pass in packages/core/redact.ts. ~1 GB model downloaded on
+    // first run; subsequent runs reuse the cached weights. The
+    // factory is async because it dynamic-imports
+    // @huggingface/transformers — if the optional peer dep isn't
+    // installed it returns a pass-through redactor (regex pass is
+    // still the last line of defence).
+    redact: await createPrivacyFilterRedactor()
   };
 }
 async function getAdapters() {
@@ -45097,18 +45343,21 @@ async function getAdapters() {
       `[modelstat] ollama up at ${ollamaCfg.baseUrl} \u2014 using ${ollamaCfg.chatModel} for summarisation`
     );
     adapters = {
-      embed: ollamaEmbed(ollamaCfg),
+      // BGE-small via transformers.js — same model the server uses
+      // via fastembed, so segment vectors land in the same 384-dim
+      // space as leaf-description vectors and cosine similarity is
+      // directly meaningful. We do NOT use ollamaEmbed here because
+      // Ollama's library doesn't host bge-small (404 on pull) and
+      // shipping MiniLM-via-Ollama vs BGE-small-server would break
+      // cross-source similarity.
+      embed: createTransformersJsEmbedder(),
       summarize: ollamaSummarize(ollamaCfg),
       tokenize: ollamaTokenize(),
-      // Cognition pass — best-effort. Reads each abstract back out
-      // and tags the user's mood + meta-cognitive mode, appending a
-      // "[Mood: …] [Mind: …]" suffix the server proposer reads as
-      // ordinary abstract text. Free, runs locally on the same Ollama
-      // daemon as the summariser. The bundled (node-llama-cpp) path
-      // doesn't get cognition — it'd require a second model context
-      // and the segment ships fine without the suffix; install Ollama
-      // for emotion tagging.
-      cognize: ollamaCognize(ollamaCfg)
+      cognize: ollamaCognize(ollamaCfg),
+      // Privacy filter — same OpenAI Privacy Filter model regardless
+      // of which summariser/embedder runtime we ended up on. Factory
+      // is async (dynamic-imports @huggingface/transformers).
+      redact: await createPrivacyFilterRedactor()
     };
     return adapters;
   }
@@ -45122,7 +45371,7 @@ async function getAdapters() {
   console.log(
     "[modelstat] using bundled local summariser (Qwen3.5-4B, runs on this machine)"
   );
-  adapters = bundledAdapters();
+  adapters = await bundledAdapters();
   return adapters;
 }
 async function buildSegments(events) {
@@ -45147,6 +45396,7 @@ var init_pipeline2 = __esm({
     "use strict";
     init_pipeline();
     init_node2();
+    init_privacy_filter();
     adapters = null;
     probed = false;
   }
@@ -45268,7 +45518,7 @@ var init_scan = __esm({
     init_pipeline2();
     init_config2();
     init_api();
-    AGENT_VERSION = "agent-0.0.35";
+    AGENT_VERSION = "agent-0.0.37";
     BATCH_MAX_EVENTS = 2e3;
   }
 });
@@ -47360,7 +47610,7 @@ var init_daemon = __esm({
     init_config2();
     init_lock();
     init_scan();
-    AGENT_VERSION2 = "agent-0.0.35";
+    AGENT_VERSION2 = "agent-0.0.37";
     HEARTBEAT_INTERVAL_MS = 1e4;
     SCAN_INTERVAL_MS = 5 * 60 * 1e3;
     status = {
@@ -47568,6 +47818,11 @@ ${programArgs}
   <key>EnvironmentVariables</key>
   <dict>
     <key>PATH</key><string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin</string>
+    <!-- Heap headroom for the startup scan of a large transcript backlog.
+         Node's default old-space ceiling (~4 GB) OOM-crashed the daemon on
+         big histories; raise it well below typical RAM. Inherited by the
+         tray-spawned 'modelstat start' child too (NODE_OPTIONS propagates). -->
+    <key>NODE_OPTIONS</key><string>--max-old-space-size=8192</string>
   </dict>
   <key>WorkingDirectory</key><string>${home()}</string>
 </dict>
@@ -47631,6 +47886,10 @@ Wants=network-online.target
 [Service]
 Type=simple
+# Heap headroom for the startup scan of a large transcript backlog \u2014
+# Node's default ~4 GB old-space ceiling OOM-crashed the daemon on big
+# histories.
+Environment=NODE_OPTIONS=--max-old-space-size=8192
 ExecStart=${nodeBinary()} ${cliPath} start
 Restart=always
 RestartSec=10
@@ -47785,7 +48044,7 @@ function tryOpenBrowser(url) {
     return false;
   }
 }
-var AGENT_VERSION3 = "agent-0.0.35";
+var AGENT_VERSION3 = "agent-0.0.37";
 function osFamily() {
   const p = platform4();
   if (p === "darwin") return "macos";