modelstat 0.0.23 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,87 @@
1
+ Copyright (c) 2026 ModelState Inc
2
+
3
+ Source-Available License
4
+
5
+ 1. Grant of Rights
6
+
7
+ Subject to the terms of this License, you are granted a non-exclusive, worldwide,
8
+ non-transferable, non-sublicensable license to:
9
+
10
+ - View, read, and inspect the source code
11
+ - Build, modify, and run the software
12
+ - Use the software internally, including in production, solely to access or interact
13
+ with ModelState Inc’s hosted services
14
+
15
+ 2. Permitted Use
16
+
17
+ You may use the software as a client, agent, or self-hosted component that connects
18
+ to and depends on ModelState Inc’s cloud or hosted services.
19
+
20
+ 3. Restrictions
21
+
22
+ You may NOT, without explicit prior written permission from ModelState Inc:
23
+
24
+ - Use the software to provide a hosted, managed, or SaaS service to third parties
25
+ - Use the software in any product or service that competes with ModelState Inc
26
+ - Use the software to build or operate an alternative to ModelState Inc’s services
27
+ - Redistribute, sublicense, sell, license, or commercially exploit the software
28
+ - Offer the software (modified or unmodified) as part of a commercial offering
29
+ - Make the software available to third parties as a service
30
+ - Use the software for the benefit of third parties (including multi-tenant or shared environments)
31
+ - Circumvent or attempt to circumvent the limitations of this License
32
+ - Remove or alter any licensing, copyright, or attribution notices
33
+
34
+ 4. Definition of Competing Service
35
+
36
+ “Competing Service” means any product or service that provides substantially similar
37
+ functionality to ModelState Inc’s offerings, including but not limited to:
38
+
39
+ - AI or LLM usage tracking, monitoring, or observability systems
40
+ - Model analytics platforms or dashboards
41
+ - Inference tracking, logging, or telemetry pipelines
42
+ - Evaluation, benchmarking, or quality analysis systems for AI/ML models
43
+ - Cost tracking, performance tracking, or optimization systems for model inference
44
+ - Any system that collects, processes, analyzes, or visualizes usage or behavior of AI or machine learning models, including large language models (LLMs), when offered as a product or service
45
+
46
+ 5. Internal Use
47
+
48
+ Use of the software is permitted only for your internal business or personal use,
49
+ and not for the benefit of third parties.
50
+
51
+ 6. Network Use Restriction
52
+
53
+ You may not use the software to expose APIs, endpoints, dashboards, or services to external
54
+ users except as part of accessing ModelState Inc’s services.
55
+
56
+ 7. Data Extraction Restriction
57
+
58
+ You may not use the software to replicate, extract, reverse engineer, or reconstruct
59
+ ModelState Inc’s service behavior, APIs, data models, or system design for the purpose
60
+ of building, improving, or operating a competing system or service.
61
+
62
+ 8. Ownership
63
+
64
+ All rights, title, and interest in the software remain exclusively with ModelState Inc.
65
+ The software is licensed, not sold.
66
+
67
+ 9. Contributions
68
+
69
+ Unless explicitly agreed otherwise in writing, any contributions submitted to the software
70
+ grant ModelState Inc a perpetual, irrevocable, worldwide, royalty-free license to use,
71
+ modify, and distribute those contributions.
72
+
73
+ 10. Termination
74
+
75
+ This License terminates automatically if you violate any of its terms.
76
+ Upon termination, you must immediately cease all use and delete all copies of the software.
77
+
78
+ 11. Disclaimer of Warranty
79
+
80
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
81
+ EXPRESS OR IMPLIED.
82
+
83
+ 12. Limitation of Liability
84
+
85
+ IN NO EVENT SHALL MODELSTATE INC BE LIABLE FOR ANY CLAIM, DAMAGES,
86
+ OR OTHER LIABILITY ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE
87
+ OR ITS USE OR OTHER DEALINGS IN THE SOFTWARE.
package/dist/cli.mjs CHANGED
@@ -4339,6 +4339,18 @@ var init_schemas = __esm({
4339
4339
  tool_calls: external_exports.record(external_exports.string(), external_exports.number().int().nonnegative()).default({}),
4340
4340
  // Files touched, relative to git root. Never absolute — scrubbed by agent.
4341
4341
  files_touched: external_exports.array(external_exports.string().max(512)).max(256).default([]),
4342
+ // Redacted excerpt of the conversation turn (user prompt or
4343
+ // assistant response). The PARSER is responsible for:
4344
+ // 1. Pulling a representative snippet from the turn (≤320 chars).
4345
+ // 2. Running it through @modelstat/core/redact PLUS, when
4346
+ // available, the on-device Privacy Filter adapter.
4347
+ // 3. Stripping code blocks and file-path noise.
4348
+ // Optional — events without it fall back to metadata-only abstracts
4349
+ // (the historical behaviour). The companion-core pipeline runs
4350
+ // redact() over it again as defence-in-depth before building the
4351
+ // summarize prompt; it never gets stored long-term server-side, only
4352
+ // used to construct the summarize input.
4353
+ content_excerpt: external_exports.string().max(320).optional(),
4342
4354
  // Reference to originating file for reparsing
4343
4355
  source_file: external_exports.string().max(1024).nullable(),
4344
4356
  source_byte_offset: external_exports.number().int().nonnegative().nullable(),
@@ -4354,7 +4366,7 @@ var init_schemas = __esm({
4354
4366
  secrets_found: external_exports.number().int().nonnegative().default(0),
4355
4367
  emails_redacted: external_exports.number().int().nonnegative().default(0),
4356
4368
  paths_redacted_absolute: external_exports.number().int().nonnegative().default(0)
4357
- });
4369
+ }).catchall(external_exports.number().int().nonnegative());
4358
4370
  TaxonomyHintRooted = external_exports.object({
4359
4371
  root_key: external_exports.string().max(60),
4360
4372
  name: external_exports.string().max(120),
@@ -4651,6 +4663,28 @@ import { createHash } from "crypto";
4651
4663
  import { createReadStream } from "fs";
4652
4664
  import { stat } from "fs/promises";
4653
4665
  import { createInterface } from "readline";
4666
+ function extractExcerpt(content) {
4667
+ if (!content) return void 0;
4668
+ let text = "";
4669
+ if (typeof content === "string") {
4670
+ text = content;
4671
+ } else if (Array.isArray(content)) {
4672
+ const parts = [];
4673
+ for (const block of content) {
4674
+ if (block && block.type === "text" && typeof block.text === "string") {
4675
+ parts.push(block.text);
4676
+ }
4677
+ }
4678
+ text = parts.join(" ");
4679
+ }
4680
+ if (!text) return void 0;
4681
+ text = text.replace(/```[\s\S]*?```/g, " ").replace(/`[^`]*`/g, " ");
4682
+ text = text.replace(/\s+/g, " ").trim();
4683
+ if (!text) return void 0;
4684
+ const cleaned = redact(text).text;
4685
+ const truncated = cleaned.slice(0, 320);
4686
+ return truncated.length > 0 ? truncated : void 0;
4687
+ }
4654
4688
  async function parseClaudeCodeJsonl(ctx) {
4655
4689
  const events = [];
4656
4690
  let rawLines = 0;
@@ -4701,6 +4735,7 @@ async function parseClaudeCodeJsonl(ctx) {
4701
4735
  continue;
4702
4736
  }
4703
4737
  const slug = guessRepoSlugFromPath(cwd);
4738
+ const excerpt = extractExcerpt(a.message?.content);
4704
4739
  events.push({
4705
4740
  source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
4706
4741
  ts: a.timestamp,
@@ -4729,6 +4764,7 @@ async function parseClaudeCodeJsonl(ctx) {
4729
4764
  duration_ms: null,
4730
4765
  tool_calls: {},
4731
4766
  files_touched: [],
4767
+ ...excerpt ? { content_excerpt: excerpt } : {},
4732
4768
  source_file: ctx.sourceFile,
4733
4769
  source_byte_offset: offsetAtLineStart,
4734
4770
  // Files in ~/.claude/projects/ come from the Claude Code app
@@ -4743,6 +4779,7 @@ async function parseClaudeCodeJsonl(ctx) {
4743
4779
  skipped += 1;
4744
4780
  continue;
4745
4781
  }
4782
+ const excerpt = extractExcerpt(u.message?.content);
4746
4783
  events.push({
4747
4784
  source_event_id: sourceEventId(ctx.deviceId, ctx.sourceFile, offsetAtLineStart),
4748
4785
  ts: u.timestamp,
@@ -4759,6 +4796,7 @@ async function parseClaudeCodeJsonl(ctx) {
4759
4796
  duration_ms: null,
4760
4797
  tool_calls: {},
4761
4798
  files_touched: [],
4799
+ ...excerpt ? { content_excerpt: excerpt } : {},
4762
4800
  source_file: ctx.sourceFile,
4763
4801
  source_byte_offset: offsetAtLineStart,
4764
4802
  billing: "subscription"
@@ -44197,9 +44235,9 @@ var OLLAMA_CHAT_MODEL, OLLAMA_EMBED_MODEL, SUMMARISER_SYSTEM_PROMPT, SUMMARISER_
44197
44235
  var init_prompts = __esm({
44198
44236
  "../../packages/companion-core/src/pipeline/prompts.ts"() {
44199
44237
  "use strict";
44200
- OLLAMA_CHAT_MODEL = "qwen3:0.6b";
44238
+ OLLAMA_CHAT_MODEL = "qwen3.5:0.8b";
44201
44239
  OLLAMA_EMBED_MODEL = "bge-small-en-v1.5";
44202
- SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. Focus on what the human was trying to accomplish. No quotes, no PII, no code literals, no file paths. Reply with only the sentence.";
44240
+ SUMMARISER_SYSTEM_PROMPT = "You summarise an AI coding session in ONE sentence, \u2264 240 characters. If the user message includes sampled conversation excerpts, base your summary on what the developer was actually working on (the substance \u2014 what was being built, debugged, refactored, or designed). If only metadata is given, paraphrase the metadata. Never quote the excerpts verbatim. No PII, no code literals, no file paths, no API keys. Reply with only the sentence.";
44203
44241
  SUMMARISER_MAX_TOKENS = 120;
44204
44242
  SUMMARISER_TEMPERATURE = 0.2;
44205
44243
  QWEN_CHARS_PER_TOKEN = 3.3;
@@ -44310,7 +44348,14 @@ async function summariseSlice(sessionId, slice, adapters2) {
44310
44348
  first.files_touched?.length ? `files touched: ${first.files_touched.slice(0, 5).join(", ")}` : null,
44311
44349
  Object.keys(first.tool_calls ?? {}).length ? `tool calls: ${Object.keys(first.tool_calls).slice(0, 5).join(", ")}` : null
44312
44350
  ].filter(Boolean).join("; ");
44313
- const prompt = `Session context: ${promptFacts || "generic coding session"}.
44351
+ const excerpts = sampleAndRedactExcerpts(slice);
44352
+ const excerptBlock = excerpts.length ? excerpts.map((e, i) => ` [turn ${i + 1}] "${e.replace(/\s+/g, " ").trim()}"`).join("\n") : "";
44353
+ const prompt = excerptBlock ? `Session context: ${promptFacts || "generic coding session"}.
44354
+
44355
+ Sampled excerpts from the conversation (already redacted of PII and secrets):
44356
+ ${excerptBlock}
44357
+
44358
+ Write ONE sentence (\u2264240 chars) describing what the human was working on. Focus on the substance \u2014 what was being built, debugged, or designed. No quotes, no PII, no code literals, no file paths.` : `Session context: ${promptFacts || "generic coding session"}.
44314
44359
  Write one sentence describing what the human was doing.`;
44315
44360
  let rawAbstract;
44316
44361
  try {
@@ -44318,7 +44363,20 @@ Write one sentence describing what the human was doing.`;
44318
44363
  } catch {
44319
44364
  rawAbstract = promptFacts || `${first.tool} session with ${slice.length} turns`;
44320
44365
  }
44321
- const redacted = redact(rawAbstract);
44366
+ const regexPass = redact(rawAbstract);
44367
+ let abstractText = regexPass.text;
44368
+ const counts = { ...regexPass.counts };
44369
+ if (adapters2.redact) {
44370
+ try {
44371
+ const modelPass = await adapters2.redact(regexPass.text);
44372
+ abstractText = modelPass.text;
44373
+ for (const [k, v] of Object.entries(modelPass.counts)) {
44374
+ if (k.startsWith("pf_")) counts[k] = v;
44375
+ }
44376
+ } catch {
44377
+ }
44378
+ }
44379
+ const redacted = { text: abstractText, counts };
44322
44380
  const tags = [
44323
44381
  { root_key: "tools", name: first.tool, confidence: 1 },
44324
44382
  { root_key: "providers", name: first.provider, confidence: 1 }
@@ -44359,11 +44417,39 @@ Write one sentence describing what the human was doing.`;
44359
44417
  abstract: redacted.text.slice(0, ABSTRACT_MAX_CHARS),
44360
44418
  tokens,
44361
44419
  tags,
44420
+ // counts is `Record<string, number>` after the optional model
44421
+ // merge; the schema's RedactionReport requires the three regex
44422
+ // counters (always populated from regexPass.counts) plus a
44423
+ // number-valued catchall for pf_*.
44362
44424
  redaction: redacted.counts,
44363
44425
  source_event_ids: sourceEventIds,
44364
44426
  abstract_embedding: segmentEmbedding && segmentEmbedding.length === 384 ? segmentEmbedding : void 0
44365
44427
  };
44366
44428
  }
44429
+ function sampleAndRedactExcerpts(slice) {
44430
+ const withContent = [];
44431
+ for (let i = 0; i < slice.length; i++) {
44432
+ const c = slice[i]?.content_excerpt;
44433
+ if (c && c.trim().length > 0) withContent.push({ idx: i, text: c });
44434
+ }
44435
+ if (withContent.length === 0) return [];
44436
+ const picks = [0];
44437
+ if (withContent.length > 1) picks.push(withContent.length - 1);
44438
+ for (const frac of [0.25, 0.5, 0.75]) {
44439
+ const idx = Math.floor(withContent.length * frac);
44440
+ if (!picks.includes(idx)) picks.push(idx);
44441
+ if (picks.length >= 5) break;
44442
+ }
44443
+ picks.sort((a, b) => a - b);
44444
+ const out = [];
44445
+ for (const i of picks) {
44446
+ const raw = withContent[i]?.text;
44447
+ if (!raw) continue;
44448
+ const redacted = redact(raw).text;
44449
+ out.push(redacted.slice(0, 200));
44450
+ }
44451
+ return out;
44452
+ }
44367
44453
  function turnSurface(e) {
44368
44454
  const parts = [e.kind, e.tool];
44369
44455
  if (e.model) parts.push(e.model);