modelstat 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -5200,7 +5200,7 @@ var init_schemas = __esm({
5200
5200
  scripts: external_exports.array(external_exports.object({ token: external_exports.string().max(200), summary: external_exports.string().max(200) })).max(8).default([]),
5201
5201
  /** Extractor confidence in [0, 1]. */
5202
5202
  confidence: external_exports.number().min(0).max(1).default(0),
5203
- /** Provenance of the extraction, e.g. `shell.v2`. */
5203
+ /** Provenance of the extraction, e.g. `shell.v3`. */
5204
5204
  extractor: external_exports.string().max(40)
5205
5205
  }).strict();
5206
5206
  ToolCallWire = external_exports.object({
@@ -5390,6 +5390,214 @@ var init_src = __esm({
5390
5390
  }
5391
5391
  });
5392
5392
 
5393
+ // ../../packages/parsers/src/tool-action/executable.ts
5394
+ function splitStatements(command) {
5395
+ const out = [];
5396
+ let cur = "";
5397
+ let single = false;
5398
+ let double = false;
5399
+ for (let i = 0; i < command.length; i++) {
5400
+ const c = command[i];
5401
+ if (single) {
5402
+ cur += c;
5403
+ if (c === "'") single = false;
5404
+ continue;
5405
+ }
5406
+ if (double) {
5407
+ if (c === "\\" && i + 1 < command.length) {
5408
+ cur += c + command[i + 1];
5409
+ i++;
5410
+ continue;
5411
+ }
5412
+ cur += c;
5413
+ if (c === '"') double = false;
5414
+ continue;
5415
+ }
5416
+ if (c === "'") {
5417
+ single = true;
5418
+ cur += c;
5419
+ continue;
5420
+ }
5421
+ if (c === '"') {
5422
+ double = true;
5423
+ cur += c;
5424
+ continue;
5425
+ }
5426
+ if (c === "\\" && i + 1 < command.length) {
5427
+ cur += c + command[i + 1];
5428
+ i++;
5429
+ continue;
5430
+ }
5431
+ if (c === "#" && (cur === "" || /\s$/.test(cur))) {
5432
+ out.push(cur);
5433
+ cur = "";
5434
+ while (i + 1 < command.length && command[i + 1] !== "\n") i++;
5435
+ continue;
5436
+ }
5437
+ if (c === "\n" || c === ";") {
5438
+ out.push(cur);
5439
+ cur = "";
5440
+ continue;
5441
+ }
5442
+ if (c === "&" || c === "|") {
5443
+ out.push(cur);
5444
+ cur = "";
5445
+ if (command[i + 1] === c) i++;
5446
+ continue;
5447
+ }
5448
+ cur += c;
5449
+ }
5450
+ out.push(cur);
5451
+ return out;
5452
+ }
5453
+ function basename(token) {
5454
+ const parts = token.split("/");
5455
+ return parts[parts.length - 1] ?? token;
5456
+ }
5457
+ function stripOpener(token) {
5458
+ let t = token;
5459
+ if (t.startsWith("$(")) t = t.slice(2);
5460
+ else if (t.startsWith("(")) t = t.slice(1);
5461
+ if (t.startsWith("`")) t = t.slice(1);
5462
+ return t;
5463
+ }
5464
+ function substitutionProgram(token) {
5465
+ const rhs = token.slice(token.indexOf("=") + 1);
5466
+ if (rhs.startsWith("$((")) return null;
5467
+ if (rhs.startsWith("$(") || rhs.startsWith("`")) {
5468
+ const inner = stripOpener(rhs);
5469
+ return inner || null;
5470
+ }
5471
+ return null;
5472
+ }
5473
+ function looksLikeProgram(cand) {
5474
+ if (!PROGRAM.test(cand)) return false;
5475
+ if (/^\d+$/.test(cand)) return false;
5476
+ if ((cand.match(/\./g)?.length ?? 0) >= 2) return false;
5477
+ const lower = cand.toLowerCase();
5478
+ return !DATA_EXTENSIONS.some((ext) => lower.endsWith(ext));
5479
+ }
5480
+ function scanStatement(stmt) {
5481
+ for (const tok of stmt.split(/\s+/).filter(Boolean)) {
5482
+ let t = tok;
5483
+ if (BRACKETS.has(t) || FUNCTION_DEF.test(t) || WRAPPERS.has(t)) continue;
5484
+ if (ASSIGNMENT.test(t)) {
5485
+ const sub = substitutionProgram(t);
5486
+ if (sub) t = sub;
5487
+ else continue;
5488
+ }
5489
+ const cand = basename(stripOpener(t).replace(/[)"'`;,]+$/, "")).toLowerCase();
5490
+ if (!cand || cand.startsWith("-")) break;
5491
+ if (NOISE_BUILTINS.has(cand)) return { builtin: cand };
5492
+ if (looksLikeProgram(cand)) return { program: cand };
5493
+ break;
5494
+ }
5495
+ return {};
5496
+ }
5497
+ function extractExecutable(command) {
5498
+ let fallback = null;
5499
+ for (const raw of splitStatements(command)) {
5500
+ const stmt = raw.trim();
5501
+ if (!stmt || stmt.startsWith("#")) continue;
5502
+ const { program, builtin } = scanStatement(stmt);
5503
+ if (program) {
5504
+ return program.length > MAX_EXECUTABLE_CHARS ? OTHER_BUCKET : program;
5505
+ }
5506
+ if (builtin && !fallback) fallback = builtin;
5507
+ }
5508
+ return fallback ?? OTHER_BUCKET;
5509
+ }
5510
+ var OTHER_BUCKET, MAX_EXECUTABLE_CHARS, WRAPPERS, BRACKETS, NOISE_BUILTINS, DATA_EXTENSIONS, ASSIGNMENT, FUNCTION_DEF, PROGRAM;
5511
+ var init_executable = __esm({
5512
+ "../../packages/parsers/src/tool-action/executable.ts"() {
5513
+ "use strict";
5514
+ OTHER_BUCKET = "(other)";
5515
+ MAX_EXECUTABLE_CHARS = 80;
5516
+ WRAPPERS = /* @__PURE__ */ new Set([
5517
+ "sudo",
5518
+ "doas",
5519
+ "env",
5520
+ "command",
5521
+ "exec",
5522
+ "builtin",
5523
+ "nohup",
5524
+ "setsid",
5525
+ "time",
5526
+ "nice",
5527
+ "ionice",
5528
+ "chrt",
5529
+ "stdbuf",
5530
+ "xargs",
5531
+ // control-flow openers that immediately precede a command
5532
+ "then",
5533
+ "do",
5534
+ "else"
5535
+ ]);
5536
+ BRACKETS = /* @__PURE__ */ new Set(["{", "}", "(", ")"]);
5537
+ NOISE_BUILTINS = /* @__PURE__ */ new Set([
5538
+ "cd",
5539
+ "pushd",
5540
+ "popd",
5541
+ "echo",
5542
+ "printf",
5543
+ "export",
5544
+ "unset",
5545
+ "set",
5546
+ "readonly",
5547
+ "typeset",
5548
+ "declare",
5549
+ "local",
5550
+ "alias",
5551
+ "source",
5552
+ ".",
5553
+ "eval",
5554
+ ":",
5555
+ "true",
5556
+ "false",
5557
+ "read",
5558
+ "wait",
5559
+ "trap",
5560
+ "umask",
5561
+ "shift",
5562
+ "return",
5563
+ "getopts",
5564
+ "hash",
5565
+ "let",
5566
+ "test",
5567
+ "[",
5568
+ "[[",
5569
+ // loop / conditional keywords
5570
+ "for",
5571
+ "while",
5572
+ "until",
5573
+ "if",
5574
+ "elif",
5575
+ "fi",
5576
+ "case",
5577
+ "esac",
5578
+ "select",
5579
+ "function",
5580
+ "done"
5581
+ ]);
5582
+ DATA_EXTENSIONS = [
5583
+ ".output",
5584
+ ".txt",
5585
+ ".log",
5586
+ ".json",
5587
+ ".jsonl",
5588
+ ".md",
5589
+ ".csv",
5590
+ ".tmp",
5591
+ ".out",
5592
+ ".git",
5593
+ ".lock"
5594
+ ];
5595
+ ASSIGNMENT = /^[A-Za-z_][A-Za-z0-9_]*=/;
5596
+ FUNCTION_DEF = /^[A-Za-z_][A-Za-z0-9_]*\(\)?$/;
5597
+ PROGRAM = /^[A-Za-z0-9][A-Za-z0-9._+-]*$/;
5598
+ }
5599
+ });
5600
+
5393
5601
  // ../../packages/parsers/src/tool-action/scripts.ts
5394
5602
  function detectScriptRefs(command) {
5395
5603
  const refs = [];
@@ -5462,9 +5670,9 @@ function extractToolAction(call) {
5462
5670
  let param_shape = null;
5463
5671
  let command_redacted = null;
5464
5672
  if (command != null) {
5465
- const [head = "", ...rest] = command.trim().split(/\s+/);
5466
- executable = basename(head) || null;
5467
- param_shape = clampChars(paramShape(rest.join(" ")), MAX_FIELD_CHARS) || null;
5673
+ executable = extractExecutable(command);
5674
+ const args = command.trim().split(/\s+/).slice(1).join(" ");
5675
+ param_shape = clampChars(paramShape(args), MAX_FIELD_CHARS) || null;
5468
5676
  command_redacted = clampChars(redact(command, call.cwd ?? void 0).text, MAX_FIELD_CHARS) || null;
5469
5677
  }
5470
5678
  return {
@@ -5479,7 +5687,9 @@ function extractToolAction(call) {
5479
5687
  command_redacted,
5480
5688
  scripts: [],
5481
5689
  confidence: 0,
5482
- extractor: `${surface}.v1`
5690
+ // Per-surface provenance. shell bumped to v3 (normalized executable, see
5691
+ // `extractExecutable`); builtin/mcp extraction is unchanged → still v1.
5692
+ extractor: `${surface}.${surface === "shell" ? "v3" : "v1"}`
5483
5693
  };
5484
5694
  }
5485
5695
  function extractLocalToolContext(call) {
@@ -5500,14 +5710,13 @@ function shellCommandOf(input) {
5500
5710
  }
5501
5711
  return null;
5502
5712
  }
5503
- function basename(token) {
5504
- return token.split("/").pop() ?? token;
5505
- }
5506
5713
  var MAX_FIELD_CHARS;
5507
5714
  var init_tool_action = __esm({
5508
5715
  "../../packages/parsers/src/tool-action/index.ts"() {
5509
5716
  "use strict";
5510
5717
  init_src();
5718
+ init_executable();
5719
+ init_executable();
5511
5720
  init_scripts();
5512
5721
  MAX_FIELD_CHARS = 16384;
5513
5722
  }
@@ -47573,7 +47782,7 @@ var init_scan = __esm({
47573
47782
  init_api();
47574
47783
  init_config2();
47575
47784
  init_pipeline2();
47576
- DAEMON_VERSION = true ? "daemon-0.3.0" : "daemon-dev";
47785
+ DAEMON_VERSION = true ? "daemon-0.3.1" : "daemon-dev";
47577
47786
  BATCH_MAX_EVENTS = INGEST_BATCH_MAX_EVENTS;
47578
47787
  BATCH_MAX_TOOL_CALLS = 2e4;
47579
47788
  BATCH_BUFFER_HARD_CAP = BATCH_MAX_EVENTS * 2;
@@ -50083,7 +50292,7 @@ var init_daemon = __esm({
50083
50292
  init_machine_key();
50084
50293
  init_scan();
50085
50294
  init_single_flight();
50086
- DAEMON_VERSION2 = true ? "daemon-0.3.0" : "daemon-dev";
50295
+ DAEMON_VERSION2 = true ? "daemon-0.3.1" : "daemon-dev";
50087
50296
  HEARTBEAT_INTERVAL_MS = 1e4;
50088
50297
  SCAN_INTERVAL_MS = 5 * 60 * 1e3;
50089
50298
  DISCOVERY_INTERVAL_MS = 6e4;
@@ -50685,7 +50894,7 @@ function tryOpenBrowser(url) {
50685
50894
  return false;
50686
50895
  }
50687
50896
  }
50688
- var DAEMON_VERSION3 = true ? "daemon-0.3.0" : "daemon-dev";
50897
+ var DAEMON_VERSION3 = true ? "daemon-0.3.1" : "daemon-dev";
50689
50898
  function osFamily() {
50690
50899
  const p = platform5();
50691
50900
  if (p === "darwin") return "macos";