npm - modelstat - Versions diffs - 0.0.23 → 0.0.25 - Mend

modelstat 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -1,4 +1,13 @@
 #!/usr/bin/env node
+{
+  const [__msMaj, __msMin] = process.versions.node.split('.').map(Number);
+  if (__msMaj < 20 || (__msMaj === 20 && __msMin < 18)) {
+    process.stderr.write(`modelstat requires Node \u2265 20.18 (you have ${process.version}).\n`);
+    process.stderr.write('Install Node 20+: https://nodejs.org\n');
+    process.stderr.write('Debian/Ubuntu: curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - && sudo apt-get install -y nodejs\n');
+    process.exit(1);
+  }
+}
 import { createRequire as __modelstatCR } from "node:module";
 const require = __modelstatCR(import.meta.url);
 var __create = Object.create;
@@ -4339,6 +4348,18 @@ var init_schemas = __esm({
       tool_calls: external_exports.record(external_exports.string(), external_exports.number().int().nonnegative()).default({}),
       // Files touched, relative to git root. Never absolute — scrubbed by agent.
       files_touched: external_exports.array(external_exports.string().max(512)).max(256).default([]),
+      // Redacted excerpt of the conversation turn (user prompt or
+      // assistant response). The PARSER is responsible for:
+      //   1. Pulling a representative snippet from the turn (≤320 chars).
+      //   2. Running it through @modelstat/core/redact PLUS, when
+      //      available, the on-device Privacy Filter adapter.
+      //   3. Stripping code blocks and file-path noise.
+      // Optional — events without it fall back to metadata-only abstracts
+      // (the historical behaviour). The companion-core pipeline runs
+      // redact() over it again as defence-in-depth before building the
+      // summarize prompt; it never gets stored long-term server-side, only
+      // used to construct the summarize input.
+      content_excerpt: external_exports.string().max(320).optional(),
       // Reference to originating file for reparsing
       source_file: external_exports.string().max(1024).nullable(),
       source_byte_offset: external_exports.number().int().nonnegative().nullable(),
@@ -4354,7 +4375,7 @@ var init_schemas = __esm({
       secrets_found: external_exports.number().int().nonnegative().default(0),
       emails_redacted: external_exports.number().int().nonnegative().default(0),
       paths_redacted_absolute: external_exports.number().int().nonnegative().default(0)
-    });
+    }).catchall(external_exports.number().int().nonnegative());
     TaxonomyHintRooted = external_exports.object({
       root_key: external_exports.string().max(60),
       name: external_exports.string().max(120),
@@ -4651,6 +4672,28 @@ import { createHash } from "crypto";
 import { createReadStream } from "fs";
 import { stat } from "fs/promises";
 import { createInterface } from "readline";
+function extractExcerpt(content) {
+  if (!content) return void 0;
+  let text = "";
+  if (typeof content === "string") {
+    text = content;
+  } else if (Array.isArray(content)) {
+    const parts = [];
+    for (const block of content) {
+      if (block && block.type === "text" && typeof block.text === "string") {
+        parts.push(block.text);
+      }
+    }
+    text = parts.join(" ");
+  }
+  if (!text) return void 0;
+  text = text.replace(/```[\s\S]*?```/g, " ").replace(/`[^`]*`/g, " ");
+  text = text.replace(/\s+/g, " ").trim();
+  if (!text) return void 0;
+  const cleaned = redact(text).text;
+  const truncated = cleaned.slice(0, 320);
+  return truncated.length > 0 ? truncated : void 0;
+}
 async function parseClaudeCodeJsonl(ctx) {
   const events = [];
   let rawLines = 0;
@@ -4701,6 +4744,7 @@ async function parseClaudeCodeJsonl(ctx) {
         continue;
       }
       const slug = guessRepoSlugFromPath(cwd);
+      const excerpt = extractExcerpt(a.message?.content);
       events.push({
         source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
         ts: a.timestamp,
@@ -4729,6 +4773,7 @@ async function parseClaudeCodeJsonl(ctx) {
         duration_ms: null,
         tool_calls: {},
         files_touched: [],
+        ...excerpt ? { content_excerpt: excerpt } : {},
         source_file: ctx.sourceFile,
         source_byte_offset: offsetAtLineStart,
         // Files in ~/.claude/projects/ come from the Claude Code app
@@ -4743,6 +4788,7 @@ async function parseClaudeCodeJsonl(ctx) {
         skipped += 1;
         continue;
       }
+      const excerpt = extractExcerpt(u.message?.content);
       events.push({
         source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
         ts: u.timestamp,
@@ -4759,6 +4805,7 @@ async function parseClaudeCodeJsonl(ctx) {
         duration_ms: null,
         tool_calls: {},
         files_touched: [],
+        ...excerpt ? { content_excerpt: excerpt } : {},
         source_file: ctx.sourceFile,
         source_byte_offset: offsetAtLineStart,
         billing: "subscription"
@@ -44197,9 +44244,9 @@ var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_
 var init_prompts = __esm({
   "../../packages/companion-core/src/pipeline/prompts.ts"() {
     "use strict";
-    OLLAMA_CHAT_MODEL = "qwen3:0.6b";
+    OLLAMA_CHAT_MODEL = "qwen3.5:0.8b";
     OLLAMA_EMBED_MODEL = "bge-small-en-v1.5";
-    SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. Focus on what the human was trying to accomplish. No quotes, no PII, no code literals, no file paths. Reply with only the sentence.";
+    SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. If the user message includes sampled conversation excerpts, base your summary on what the developer was actually working on (the substance \u2014 what was being built, debugged, refactored, or designed). If only metadata is given, paraphrase the metadata. Never quote the excerpts verbatim. No PII, no code literals, no file paths, no API keys. Reply with only the sentence.";
     SUMMARISER_MAX_TOKENS = 120;
     SUMMARISER_TEMPERATURE = 0.2;
     QWEN_CHARS_PER_TOKEN = 3.3;
@@ -44310,7 +44357,14 @@ async function summariseSlice(sessionId, slice, adapters2) {
     first.files_touched?.length ? `files touched: ${first.files_touched.slice(0, 5).join(", ")}` : null,
     Object.keys(first.tool_calls ?? {}).length ? `tool calls: ${Object.keys(first.tool_calls).slice(0, 5).join(", ")}` : null
   ].filter(Boolean).join("; ");
-  const prompt = `Session context: ${promptFacts || "generic coding session"}.
+  const excerpts = sampleAndRedactExcerpts(slice);
+  const excerptBlock = excerpts.length ? excerpts.map((e, i) => `  [turn ${i + 1}] "${e.replace(/\s+/g, " ").trim()}"`).join("\n") : "";
+  const prompt = excerptBlock ? `Session context: ${promptFacts || "generic coding session"}.
+Sampled excerpts from the conversation (already redacted of PII and secrets):
+${excerptBlock}
+Write ONE sentence (\u2264240 chars) describing what the human was working on. Focus on the substance \u2014 what was being built, debugged, or designed. No quotes, no PII, no code literals, no file paths.` : `Session context: ${promptFacts || "generic coding session"}.
 Write one sentence describing what the human was doing.`;
   let rawAbstract;
   try {
@@ -44318,7 +44372,20 @@ Write one sentence describing what the human was doing.`;
   } catch {
     rawAbstract = promptFacts || `${first.tool} session with ${slice.length} turns`;
   }
-  const redacted = redact(rawAbstract);
+  const regexPass = redact(rawAbstract);
+  let abstractText = regexPass.text;
+  const counts = { ...regexPass.counts };
+  if (adapters2.redact) {
+    try {
+      const modelPass = await adapters2.redact(regexPass.text);
+      abstractText = modelPass.text;
+      for (const [k, v] of Object.entries(modelPass.counts)) {
+        if (k.startsWith("pf_")) counts[k] = v;
+      }
+    } catch {
+    }
+  }
+  const redacted = { text: abstractText, counts };
   const tags = [
     { root_key: "tools", name: first.tool, confidence: 1 },
     { root_key: "providers", name: first.provider, confidence: 1 }
@@ -44359,11 +44426,39 @@ Write one sentence describing what the human was doing.`;
     abstract: redacted.text.slice(0, ABSTRACT_MAX_CHARS),
     tokens,
     tags,
+    // counts is `Record<string, number>` after the optional model
+    // merge; the schema's RedactionReport requires the three regex
+    // counters (always populated from regexPass.counts) plus a
+    // number-valued catchall for pf_*.
     redaction: redacted.counts,
     source_event_ids: sourceEventIds,
     abstract_embedding: segmentEmbedding && segmentEmbedding.length === 384 ? segmentEmbedding : void 0
   };
 }
+function sampleAndRedactExcerpts(slice) {
+  const withContent = [];
+  for (let i = 0; i < slice.length; i++) {
+    const c = slice[i]?.content_excerpt;
+    if (c && c.trim().length > 0) withContent.push({ idx: i, text: c });
+  }
+  if (withContent.length === 0) return [];
+  const picks = [0];
+  if (withContent.length > 1) picks.push(withContent.length - 1);
+  for (const frac of [0.25, 0.5, 0.75]) {
+    const idx = Math.floor(withContent.length * frac);
+    if (!picks.includes(idx)) picks.push(idx);
+    if (picks.length >= 5) break;
+  }
+  picks.sort((a, b) => a - b);
+  const out = [];
+  for (const i of picks) {
+    const raw = withContent[i]?.text;
+    if (!raw) continue;
+    const redacted = redact(raw).text;
+    out.push(redacted.slice(0, 200));
+  }
+  return out;
+}
 function turnSurface(e) {
   const parts = [e.kind, e.tool];
   if (e.model) parts.push(e.model);