modelstat 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,13 @@
1
1
  #!/usr/bin/env node
2
+ {
3
+ const [__msMaj, __msMin] = process.versions.node.split('.').map(Number);
4
+ if (__msMaj < 20 || (__msMaj === 20 && __msMin < 18)) {
5
+ process.stderr.write(`modelstat requires Node \u2265 20.18 (you have ${process.version}).\n`);
6
+ process.stderr.write('Install Node 20+: https://nodejs.org\n');
7
+ process.stderr.write('Debian/Ubuntu: curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - && sudo apt-get install -y nodejs\n');
8
+ process.exit(1);
9
+ }
10
+ }
2
11
  import { createRequire as __modelstatCR } from "node:module";
3
12
  const require = __modelstatCR(import.meta.url);
4
13
  var __create = Object.create;
@@ -4339,6 +4348,18 @@ var init_schemas = __esm({
4339
4348
  tool_calls: external_exports.record(external_exports.string(), external_exports.number().int().nonnegative()).default({}),
4340
4349
  // Files touched, relative to git root. Never absolute — scrubbed by agent.
4341
4350
  files_touched: external_exports.array(external_exports.string().max(512)).max(256).default([]),
4351
+ // Redacted excerpt of the conversation turn (user prompt or
4352
+ // assistant response). The PARSER is responsible for:
4353
+ // 1. Pulling a representative snippet from the turn (≤320 chars).
4354
+ // 2. Running it through @modelstat/core/redact PLUS, when
4355
+ // available, the on-device Privacy Filter adapter.
4356
+ // 3. Stripping code blocks and file-path noise.
4357
+ // Optional — events without it fall back to metadata-only abstracts
4358
+ // (the historical behaviour). The companion-core pipeline runs
4359
+ // redact() over it again as defence-in-depth before building the
4360
+ // summarize prompt; it never gets stored long-term server-side, only
4361
+ // used to construct the summarize input.
4362
+ content_excerpt: external_exports.string().max(320).optional(),
4342
4363
  // Reference to originating file for reparsing
4343
4364
  source_file: external_exports.string().max(1024).nullable(),
4344
4365
  source_byte_offset: external_exports.number().int().nonnegative().nullable(),
@@ -4354,7 +4375,7 @@ var init_schemas = __esm({
4354
4375
  secrets_found: external_exports.number().int().nonnegative().default(0),
4355
4376
  emails_redacted: external_exports.number().int().nonnegative().default(0),
4356
4377
  paths_redacted_absolute: external_exports.number().int().nonnegative().default(0)
4357
- });
4378
+ }).catchall(external_exports.number().int().nonnegative());
4358
4379
  TaxonomyHintRooted = external_exports.object({
4359
4380
  root_key: external_exports.string().max(60),
4360
4381
  name: external_exports.string().max(120),
@@ -4651,6 +4672,28 @@ import { createHash } from "crypto";
4651
4672
  import { createReadStream } from "fs";
4652
4673
  import { stat } from "fs/promises";
4653
4674
  import { createInterface } from "readline";
4675
+ function extractExcerpt(content) {
4676
+ if (!content) return void 0;
4677
+ let text = "";
4678
+ if (typeof content === "string") {
4679
+ text = content;
4680
+ } else if (Array.isArray(content)) {
4681
+ const parts = [];
4682
+ for (const block of content) {
4683
+ if (block && block.type === "text" && typeof block.text === "string") {
4684
+ parts.push(block.text);
4685
+ }
4686
+ }
4687
+ text = parts.join(" ");
4688
+ }
4689
+ if (!text) return void 0;
4690
+ text = text.replace(/```[\s\S]*?```/g, " ").replace(/`[^`]*`/g, " ");
4691
+ text = text.replace(/\s+/g, " ").trim();
4692
+ if (!text) return void 0;
4693
+ const cleaned = redact(text).text;
4694
+ const truncated = cleaned.slice(0, 320);
4695
+ return truncated.length > 0 ? truncated : void 0;
4696
+ }
4654
4697
  async function parseClaudeCodeJsonl(ctx) {
4655
4698
  const events = [];
4656
4699
  let rawLines = 0;
@@ -4701,6 +4744,7 @@ async function parseClaudeCodeJsonl(ctx) {
4701
4744
  continue;
4702
4745
  }
4703
4746
  const slug = guessRepoSlugFromPath(cwd);
4747
+ const excerpt = extractExcerpt(a.message?.content);
4704
4748
  events.push({
4705
4749
  source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
4706
4750
  ts: a.timestamp,
@@ -4729,6 +4773,7 @@ async function parseClaudeCodeJsonl(ctx) {
4729
4773
  duration_ms: null,
4730
4774
  tool_calls: {},
4731
4775
  files_touched: [],
4776
+ ...excerpt ? { content_excerpt: excerpt } : {},
4732
4777
  source_file: ctx.sourceFile,
4733
4778
  source_byte_offset: offsetAtLineStart,
4734
4779
  // Files in ~/.claude/projects/ come from the Claude Code app
@@ -4743,6 +4788,7 @@ async function parseClaudeCodeJsonl(ctx) {
4743
4788
  skipped += 1;
4744
4789
  continue;
4745
4790
  }
4791
+ const excerpt = extractExcerpt(u.message?.content);
4746
4792
  events.push({
4747
4793
  source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
4748
4794
  ts: u.timestamp,
@@ -4759,6 +4805,7 @@ async function parseClaudeCodeJsonl(ctx) {
4759
4805
  duration_ms: null,
4760
4806
  tool_calls: {},
4761
4807
  files_touched: [],
4808
+ ...excerpt ? { content_excerpt: excerpt } : {},
4762
4809
  source_file: ctx.sourceFile,
4763
4810
  source_byte_offset: offsetAtLineStart,
4764
4811
  billing: "subscription"
@@ -44197,9 +44244,9 @@ var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_
44197
44244
  var init_prompts = __esm({
44198
44245
  "../../packages/companion-core/src/pipeline/prompts.ts"() {
44199
44246
  "use strict";
44200
- OLLAMA_CHAT_MODEL = "qwen3:0.6b";
44247
+ OLLAMA_CHAT_MODEL = "qwen3.5:0.8b";
44201
44248
  OLLAMA_EMBED_MODEL = "bge-small-en-v1.5";
44202
- SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. Focus on what the human was trying to accomplish. No quotes, no PII, no code literals, no file paths. Reply with only the sentence.";
44249
+ SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. If the user message includes sampled conversation excerpts, base your summary on what the developer was actually working on (the substance \u2014 what was being built, debugged, refactored, or designed). If only metadata is given, paraphrase the metadata. Never quote the excerpts verbatim. No PII, no code literals, no file paths, no API keys. Reply with only the sentence.";
44203
44250
  SUMMARISER_MAX_TOKENS = 120;
44204
44251
  SUMMARISER_TEMPERATURE = 0.2;
44205
44252
  QWEN_CHARS_PER_TOKEN = 3.3;
@@ -44310,7 +44357,14 @@ async function summariseSlice(sessionId, slice, adapters2) {
44310
44357
  first.files_touched?.length ? `files touched: ${first.files_touched.slice(0, 5).join(", ")}` : null,
44311
44358
  Object.keys(first.tool_calls ?? {}).length ? `tool calls: ${Object.keys(first.tool_calls).slice(0, 5).join(", ")}` : null
44312
44359
  ].filter(Boolean).join("; ");
44313
- const prompt = `Session context: ${promptFacts || "generic coding session"}.
44360
+ const excerpts = sampleAndRedactExcerpts(slice);
44361
+ const excerptBlock = excerpts.length ? excerpts.map((e, i) => ` [turn ${i + 1}] "${e.replace(/\s+/g, " ").trim()}"`).join("\n") : "";
44362
+ const prompt = excerptBlock ? `Session context: ${promptFacts || "generic coding session"}.
44363
+
44364
+ Sampled excerpts from the conversation (already redacted of PII and secrets):
44365
+ ${excerptBlock}
44366
+
44367
+ Write ONE sentence (\u2264240 chars) describing what the human was working on. Focus on the substance \u2014 what was being built, debugged, or designed. No quotes, no PII, no code literals, no file paths.` : `Session context: ${promptFacts || "generic coding session"}.
44314
44368
  Write one sentence describing what the human was doing.`;
44315
44369
  let rawAbstract;
44316
44370
  try {
@@ -44318,7 +44372,20 @@ Write one sentence describing what the human was doing.`;
44318
44372
  } catch {
44319
44373
  rawAbstract = promptFacts || `${first.tool} session with ${slice.length} turns`;
44320
44374
  }
44321
- const redacted = redact(rawAbstract);
44375
+ const regexPass = redact(rawAbstract);
44376
+ let abstractText = regexPass.text;
44377
+ const counts = { ...regexPass.counts };
44378
+ if (adapters2.redact) {
44379
+ try {
44380
+ const modelPass = await adapters2.redact(regexPass.text);
44381
+ abstractText = modelPass.text;
44382
+ for (const [k, v] of Object.entries(modelPass.counts)) {
44383
+ if (k.startsWith("pf_")) counts[k] = v;
44384
+ }
44385
+ } catch {
44386
+ }
44387
+ }
44388
+ const redacted = { text: abstractText, counts };
44322
44389
  const tags = [
44323
44390
  { root_key: "tools", name: first.tool, confidence: 1 },
44324
44391
  { root_key: "providers", name: first.provider, confidence: 1 }
@@ -44359,11 +44426,39 @@ Write one sentence describing what the human was doing.`;
44359
44426
  abstract: redacted.text.slice(0, ABSTRACT_MAX_CHARS),
44360
44427
  tokens,
44361
44428
  tags,
44429
+ // counts is `Record<string, number>` after the optional model
44430
+ // merge; the schema's RedactionReport requires the three regex
44431
+ // counters (always populated from regexPass.counts) plus a
44432
+ // number-valued catchall for pf_*.
44362
44433
  redaction: redacted.counts,
44363
44434
  source_event_ids: sourceEventIds,
44364
44435
  abstract_embedding: segmentEmbedding && segmentEmbedding.length === 384 ? segmentEmbedding : void 0
44365
44436
  };
44366
44437
  }
44438
+ function sampleAndRedactExcerpts(slice) {
44439
+ const withContent = [];
44440
+ for (let i = 0; i < slice.length; i++) {
44441
+ const c = slice[i]?.content_excerpt;
44442
+ if (c && c.trim().length > 0) withContent.push({ idx: i, text: c });
44443
+ }
44444
+ if (withContent.length === 0) return [];
44445
+ const picks = [0];
44446
+ if (withContent.length > 1) picks.push(withContent.length - 1);
44447
+ for (const frac of [0.25, 0.5, 0.75]) {
44448
+ const idx = Math.floor(withContent.length * frac);
44449
+ if (!picks.includes(idx)) picks.push(idx);
44450
+ if (picks.length >= 5) break;
44451
+ }
44452
+ picks.sort((a, b) => a - b);
44453
+ const out = [];
44454
+ for (const i of picks) {
44455
+ const raw = withContent[i]?.text;
44456
+ if (!raw) continue;
44457
+ const redacted = redact(raw).text;
44458
+ out.push(redacted.slice(0, 200));
44459
+ }
44460
+ return out;
44461
+ }
44367
44462
  function turnSurface(e) {
44368
44463
  const parts = [e.kind, e.tool];
44369
44464
  if (e.model) parts.push(e.model);