modelstat 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +100 -5
- package/dist/cli.mjs.map +1 -1
- package/package.json +2 -2
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
{
|
|
3
|
+
const [__msMaj, __msMin] = process.versions.node.split('.').map(Number);
|
|
4
|
+
if (__msMaj < 20 || (__msMaj === 20 && __msMin < 18)) {
|
|
5
|
+
process.stderr.write(`modelstat requires Node \u2265 20.18 (you have ${process.version}).\n`);
|
|
6
|
+
process.stderr.write('Install Node 20+: https://nodejs.org\n');
|
|
7
|
+
process.stderr.write('Debian/Ubuntu: curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - && sudo apt-get install -y nodejs\n');
|
|
8
|
+
process.exit(1);
|
|
9
|
+
}
|
|
10
|
+
}
|
|
2
11
|
import { createRequire as __modelstatCR } from "node:module";
|
|
3
12
|
const require = __modelstatCR(import.meta.url);
|
|
4
13
|
var __create = Object.create;
|
|
@@ -4339,6 +4348,18 @@ var init_schemas = __esm({
|
|
|
4339
4348
|
tool_calls: external_exports.record(external_exports.string(), external_exports.number().int().nonnegative()).default({}),
|
|
4340
4349
|
// Files touched, relative to git root. Never absolute — scrubbed by agent.
|
|
4341
4350
|
files_touched: external_exports.array(external_exports.string().max(512)).max(256).default([]),
|
|
4351
|
+
// Redacted excerpt of the conversation turn (user prompt or
|
|
4352
|
+
// assistant response). The PARSER is responsible for:
|
|
4353
|
+
// 1. Pulling a representative snippet from the turn (≤320 chars).
|
|
4354
|
+
// 2. Running it through @modelstat/core/redact PLUS, when
|
|
4355
|
+
// available, the on-device Privacy Filter adapter.
|
|
4356
|
+
// 3. Stripping code blocks and file-path noise.
|
|
4357
|
+
// Optional — events without it fall back to metadata-only abstracts
|
|
4358
|
+
// (the historical behaviour). The companion-core pipeline runs
|
|
4359
|
+
// redact() over it again as defence-in-depth before building the
|
|
4360
|
+
// summarize prompt; it never gets stored long-term server-side, only
|
|
4361
|
+
// used to construct the summarize input.
|
|
4362
|
+
content_excerpt: external_exports.string().max(320).optional(),
|
|
4342
4363
|
// Reference to originating file for reparsing
|
|
4343
4364
|
source_file: external_exports.string().max(1024).nullable(),
|
|
4344
4365
|
source_byte_offset: external_exports.number().int().nonnegative().nullable(),
|
|
@@ -4354,7 +4375,7 @@ var init_schemas = __esm({
|
|
|
4354
4375
|
secrets_found: external_exports.number().int().nonnegative().default(0),
|
|
4355
4376
|
emails_redacted: external_exports.number().int().nonnegative().default(0),
|
|
4356
4377
|
paths_redacted_absolute: external_exports.number().int().nonnegative().default(0)
|
|
4357
|
-
});
|
|
4378
|
+
}).catchall(external_exports.number().int().nonnegative());
|
|
4358
4379
|
TaxonomyHintRooted = external_exports.object({
|
|
4359
4380
|
root_key: external_exports.string().max(60),
|
|
4360
4381
|
name: external_exports.string().max(120),
|
|
@@ -4651,6 +4672,28 @@ import { createHash } from "crypto";
|
|
|
4651
4672
|
import { createReadStream } from "fs";
|
|
4652
4673
|
import { stat } from "fs/promises";
|
|
4653
4674
|
import { createInterface } from "readline";
|
|
4675
|
+
function extractExcerpt(content) {
|
|
4676
|
+
if (!content) return void 0;
|
|
4677
|
+
let text = "";
|
|
4678
|
+
if (typeof content === "string") {
|
|
4679
|
+
text = content;
|
|
4680
|
+
} else if (Array.isArray(content)) {
|
|
4681
|
+
const parts = [];
|
|
4682
|
+
for (const block of content) {
|
|
4683
|
+
if (block && block.type === "text" && typeof block.text === "string") {
|
|
4684
|
+
parts.push(block.text);
|
|
4685
|
+
}
|
|
4686
|
+
}
|
|
4687
|
+
text = parts.join(" ");
|
|
4688
|
+
}
|
|
4689
|
+
if (!text) return void 0;
|
|
4690
|
+
text = text.replace(/```[\s\S]*?```/g, " ").replace(/`[^`]*`/g, " ");
|
|
4691
|
+
text = text.replace(/\s+/g, " ").trim();
|
|
4692
|
+
if (!text) return void 0;
|
|
4693
|
+
const cleaned = redact(text).text;
|
|
4694
|
+
const truncated = cleaned.slice(0, 320);
|
|
4695
|
+
return truncated.length > 0 ? truncated : void 0;
|
|
4696
|
+
}
|
|
4654
4697
|
async function parseClaudeCodeJsonl(ctx) {
|
|
4655
4698
|
const events = [];
|
|
4656
4699
|
let rawLines = 0;
|
|
@@ -4701,6 +4744,7 @@ async function parseClaudeCodeJsonl(ctx) {
|
|
|
4701
4744
|
continue;
|
|
4702
4745
|
}
|
|
4703
4746
|
const slug = guessRepoSlugFromPath(cwd);
|
|
4747
|
+
const excerpt = extractExcerpt(a.message?.content);
|
|
4704
4748
|
events.push({
|
|
4705
4749
|
source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
|
|
4706
4750
|
ts: a.timestamp,
|
|
@@ -4729,6 +4773,7 @@ async function parseClaudeCodeJsonl(ctx) {
|
|
|
4729
4773
|
duration_ms: null,
|
|
4730
4774
|
tool_calls: {},
|
|
4731
4775
|
files_touched: [],
|
|
4776
|
+
...excerpt ? { content_excerpt: excerpt } : {},
|
|
4732
4777
|
source_file: ctx.sourceFile,
|
|
4733
4778
|
source_byte_offset: offsetAtLineStart,
|
|
4734
4779
|
// Files in ~/.claude/projects/ come from the Claude Code app
|
|
@@ -4743,6 +4788,7 @@ async function parseClaudeCodeJsonl(ctx) {
|
|
|
4743
4788
|
skipped += 1;
|
|
4744
4789
|
continue;
|
|
4745
4790
|
}
|
|
4791
|
+
const excerpt = extractExcerpt(u.message?.content);
|
|
4746
4792
|
events.push({
|
|
4747
4793
|
source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
|
|
4748
4794
|
ts: u.timestamp,
|
|
@@ -4759,6 +4805,7 @@ async function parseClaudeCodeJsonl(ctx) {
|
|
|
4759
4805
|
duration_ms: null,
|
|
4760
4806
|
tool_calls: {},
|
|
4761
4807
|
files_touched: [],
|
|
4808
|
+
...excerpt ? { content_excerpt: excerpt } : {},
|
|
4762
4809
|
source_file: ctx.sourceFile,
|
|
4763
4810
|
source_byte_offset: offsetAtLineStart,
|
|
4764
4811
|
billing: "subscription"
|
|
@@ -44197,9 +44244,9 @@ var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_
|
|
|
44197
44244
|
var init_prompts = __esm({
|
|
44198
44245
|
"../../packages/companion-core/src/pipeline/prompts.ts"() {
|
|
44199
44246
|
"use strict";
|
|
44200
|
-
OLLAMA_CHAT_MODEL = "qwen3:0.
|
|
44247
|
+
OLLAMA_CHAT_MODEL = "qwen3.5:0.8b";
|
|
44201
44248
|
OLLAMA_EMBED_MODEL = "bge-small-en-v1.5";
|
|
44202
|
-
SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters.
|
|
44249
|
+
SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. If the user message includes sampled conversation excerpts, base your summary on what the developer was actually working on (the substance \u2014 what was being built, debugged, refactored, or designed). If only metadata is given, paraphrase the metadata. Never quote the excerpts verbatim. No PII, no code literals, no file paths, no API keys. Reply with only the sentence.";
|
|
44203
44250
|
SUMMARISER_MAX_TOKENS = 120;
|
|
44204
44251
|
SUMMARISER_TEMPERATURE = 0.2;
|
|
44205
44252
|
QWEN_CHARS_PER_TOKEN = 3.3;
|
|
@@ -44310,7 +44357,14 @@ async function summariseSlice(sessionId, slice, adapters2) {
|
|
|
44310
44357
|
first.files_touched?.length ? `files touched: ${first.files_touched.slice(0, 5).join(", ")}` : null,
|
|
44311
44358
|
Object.keys(first.tool_calls ?? {}).length ? `tool calls: ${Object.keys(first.tool_calls).slice(0, 5).join(", ")}` : null
|
|
44312
44359
|
].filter(Boolean).join("; ");
|
|
44313
|
-
const
|
|
44360
|
+
const excerpts = sampleAndRedactExcerpts(slice);
|
|
44361
|
+
const excerptBlock = excerpts.length ? excerpts.map((e, i) => ` [turn ${i + 1}] "${e.replace(/\s+/g, " ").trim()}"`).join("\n") : "";
|
|
44362
|
+
const prompt = excerptBlock ? `Session context: ${promptFacts || "generic coding session"}.
|
|
44363
|
+
|
|
44364
|
+
Sampled excerpts from the conversation (already redacted of PII and secrets):
|
|
44365
|
+
${excerptBlock}
|
|
44366
|
+
|
|
44367
|
+
Write ONE sentence (\u2264240 chars) describing what the human was working on. Focus on the substance \u2014 what was being built, debugged, or designed. No quotes, no PII, no code literals, no file paths.` : `Session context: ${promptFacts || "generic coding session"}.
|
|
44314
44368
|
Write one sentence describing what the human was doing.`;
|
|
44315
44369
|
let rawAbstract;
|
|
44316
44370
|
try {
|
|
@@ -44318,7 +44372,20 @@ Write one sentence describing what the human was doing.`;
|
|
|
44318
44372
|
} catch {
|
|
44319
44373
|
rawAbstract = promptFacts || `${first.tool} session with ${slice.length} turns`;
|
|
44320
44374
|
}
|
|
44321
|
-
const
|
|
44375
|
+
const regexPass = redact(rawAbstract);
|
|
44376
|
+
let abstractText = regexPass.text;
|
|
44377
|
+
const counts = { ...regexPass.counts };
|
|
44378
|
+
if (adapters2.redact) {
|
|
44379
|
+
try {
|
|
44380
|
+
const modelPass = await adapters2.redact(regexPass.text);
|
|
44381
|
+
abstractText = modelPass.text;
|
|
44382
|
+
for (const [k, v] of Object.entries(modelPass.counts)) {
|
|
44383
|
+
if (k.startsWith("pf_")) counts[k] = v;
|
|
44384
|
+
}
|
|
44385
|
+
} catch {
|
|
44386
|
+
}
|
|
44387
|
+
}
|
|
44388
|
+
const redacted = { text: abstractText, counts };
|
|
44322
44389
|
const tags = [
|
|
44323
44390
|
{ root_key: "tools", name: first.tool, confidence: 1 },
|
|
44324
44391
|
{ root_key: "providers", name: first.provider, confidence: 1 }
|
|
@@ -44359,11 +44426,39 @@ Write one sentence describing what the human was doing.`;
|
|
|
44359
44426
|
abstract: redacted.text.slice(0, ABSTRACT_MAX_CHARS),
|
|
44360
44427
|
tokens,
|
|
44361
44428
|
tags,
|
|
44429
|
+
// counts is `Record<string, number>` after the optional model
|
|
44430
|
+
// merge; the schema's RedactionReport requires the three regex
|
|
44431
|
+
// counters (always populated from regexPass.counts) plus a
|
|
44432
|
+
// number-valued catchall for pf_*.
|
|
44362
44433
|
redaction: redacted.counts,
|
|
44363
44434
|
source_event_ids: sourceEventIds,
|
|
44364
44435
|
abstract_embedding: segmentEmbedding && segmentEmbedding.length === 384 ? segmentEmbedding : void 0
|
|
44365
44436
|
};
|
|
44366
44437
|
}
|
|
44438
|
+
function sampleAndRedactExcerpts(slice) {
|
|
44439
|
+
const withContent = [];
|
|
44440
|
+
for (let i = 0; i < slice.length; i++) {
|
|
44441
|
+
const c = slice[i]?.content_excerpt;
|
|
44442
|
+
if (c && c.trim().length > 0) withContent.push({ idx: i, text: c });
|
|
44443
|
+
}
|
|
44444
|
+
if (withContent.length === 0) return [];
|
|
44445
|
+
const picks = [0];
|
|
44446
|
+
if (withContent.length > 1) picks.push(withContent.length - 1);
|
|
44447
|
+
for (const frac of [0.25, 0.5, 0.75]) {
|
|
44448
|
+
const idx = Math.floor(withContent.length * frac);
|
|
44449
|
+
if (!picks.includes(idx)) picks.push(idx);
|
|
44450
|
+
if (picks.length >= 5) break;
|
|
44451
|
+
}
|
|
44452
|
+
picks.sort((a, b) => a - b);
|
|
44453
|
+
const out = [];
|
|
44454
|
+
for (const i of picks) {
|
|
44455
|
+
const raw = withContent[i]?.text;
|
|
44456
|
+
if (!raw) continue;
|
|
44457
|
+
const redacted = redact(raw).text;
|
|
44458
|
+
out.push(redacted.slice(0, 200));
|
|
44459
|
+
}
|
|
44460
|
+
return out;
|
|
44461
|
+
}
|
|
44367
44462
|
function turnSurface(e) {
|
|
44368
44463
|
const parts = [e.kind, e.tool];
|
|
44369
44464
|
if (e.model) parts.push(e.model);
|