modelstat 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +162 -10
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -46292,6 +46292,83 @@ var init_session_metadata2 = __esm({
|
|
|
46292
46292
|
}
|
|
46293
46293
|
});
|
|
46294
46294
|
|
|
46295
|
+
// ../../packages/daemon-core/src/pipeline/redaction.ts
|
|
46296
|
+
function composeRedactors(...redactors) {
|
|
46297
|
+
return async (text) => {
|
|
46298
|
+
let out = text;
|
|
46299
|
+
const counts = {};
|
|
46300
|
+
for (const r of redactors) {
|
|
46301
|
+
try {
|
|
46302
|
+
const res = await r(out);
|
|
46303
|
+
out = res.text;
|
|
46304
|
+
for (const [k, v] of Object.entries(res.counts)) counts[k] = (counts[k] ?? 0) + v;
|
|
46305
|
+
} catch {
|
|
46306
|
+
}
|
|
46307
|
+
}
|
|
46308
|
+
return { text: out, counts };
|
|
46309
|
+
};
|
|
46310
|
+
}
|
|
46311
|
+
function shouldDeepRedact(text) {
|
|
46312
|
+
if (!text) return false;
|
|
46313
|
+
if (/[=]|--|:\/\/|@|\bbearer\b|token|secret|password|passwd|credential|api[_-]?key|private[_-]?key/i.test(text)) {
|
|
46314
|
+
return true;
|
|
46315
|
+
}
|
|
46316
|
+
return /[A-Za-z0-9/+_-]{20,}/.test(text);
|
|
46317
|
+
}
|
|
46318
|
+
function parseRedactReply(raw) {
|
|
46319
|
+
const out = [];
|
|
46320
|
+
const seen = /* @__PURE__ */ new Set();
|
|
46321
|
+
for (const line of raw.split("\n")) {
|
|
46322
|
+
const s = line.trim().replace(/^["'`]+|["'`]+$/g, "");
|
|
46323
|
+
if (!s || s.toUpperCase() === "NONE") continue;
|
|
46324
|
+
if (s.length < MIN_CANDIDATE_CHARS) continue;
|
|
46325
|
+
if (SAFE_WORDS.has(s.toLowerCase())) continue;
|
|
46326
|
+
if (s.startsWith("[REDACTED")) continue;
|
|
46327
|
+
if (seen.has(s)) continue;
|
|
46328
|
+
seen.add(s);
|
|
46329
|
+
out.push(s);
|
|
46330
|
+
}
|
|
46331
|
+
return out;
|
|
46332
|
+
}
|
|
46333
|
+
function escapeRe(s) {
|
|
46334
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
46335
|
+
}
|
|
46336
|
+
function applyLlmRedactions(text, candidates) {
|
|
46337
|
+
let out = text;
|
|
46338
|
+
let count = 0;
|
|
46339
|
+
for (const cand of [...candidates].sort((a, b) => b.length - a.length)) {
|
|
46340
|
+
if (!out.includes(cand)) continue;
|
|
46341
|
+
const before = out;
|
|
46342
|
+
out = out.replace(new RegExp(escapeRe(cand), "g"), LLM_REDACTION_MARKER);
|
|
46343
|
+
if (out !== before) count += 1;
|
|
46344
|
+
}
|
|
46345
|
+
return { text: out, count };
|
|
46346
|
+
}
|
|
46347
|
+
var LLM_REDACTION_MARKER, MIN_CANDIDATE_CHARS, SAFE_WORDS, REDACT_SYSTEM_PROMPT, REDACT_MAX_TOKENS, REDACT_TEMPERATURE;
|
|
46348
|
+
var init_redaction = __esm({
|
|
46349
|
+
"../../packages/daemon-core/src/pipeline/redaction.ts"() {
|
|
46350
|
+
"use strict";
|
|
46351
|
+
LLM_REDACTION_MARKER = "[REDACTED:llm]";
|
|
46352
|
+
MIN_CANDIDATE_CHARS = 8;
|
|
46353
|
+
SAFE_WORDS = /* @__PURE__ */ new Set([
|
|
46354
|
+
"production",
|
|
46355
|
+
"staging",
|
|
46356
|
+
"localhost",
|
|
46357
|
+
"endpoint",
|
|
46358
|
+
"database",
|
|
46359
|
+
"password",
|
|
46360
|
+
// the literal word (e.g. a flag name), not a value
|
|
46361
|
+
"secret",
|
|
46362
|
+
"token",
|
|
46363
|
+
"credential",
|
|
46364
|
+
"[redacted"
|
|
46365
|
+
]);
|
|
46366
|
+
REDACT_SYSTEM_PROMPT = "You are a security redaction reviewer. You are given a single shell command that has already been partly redacted. Find any remaining SECRETS or sensitive credentials still present in plaintext: API keys, access tokens, bearer tokens, passwords, private keys, connection strings with credentials, or other high-entropy secret values. Do NOT flag: program names, flags, file paths, hostnames, service/environment names (prod, dev), or existing [REDACTED:...] markers. Output ONLY the exact secret substrings, one per line, copied verbatim character-for-character as they appear in the command. If there are no remaining secrets, output exactly NONE. Output nothing else \u2014 no prose, no explanation, no numbering.";
|
|
46367
|
+
REDACT_MAX_TOKENS = 512;
|
|
46368
|
+
REDACT_TEMPERATURE = 0;
|
|
46369
|
+
}
|
|
46370
|
+
});
|
|
46371
|
+
|
|
46295
46372
|
// ../../packages/daemon-core/src/pipeline/index.ts
|
|
46296
46373
|
async function buildSegmentsForSession(events, adapters2, onProgress) {
|
|
46297
46374
|
if (events.length === 0) return [];
|
|
@@ -46606,6 +46683,7 @@ var init_pipeline = __esm({
|
|
|
46606
46683
|
init_script_summary();
|
|
46607
46684
|
init_session_metadata2();
|
|
46608
46685
|
init_title();
|
|
46686
|
+
init_redaction();
|
|
46609
46687
|
SEGMENT_TIME_GAP_MS = 15 * 6e4;
|
|
46610
46688
|
SEGMENT_TOPIC_THRESHOLD = 0.35;
|
|
46611
46689
|
SEGMENT_MAX_TURNS = 100;
|
|
@@ -46899,7 +46977,14 @@ async function loadOnce(cfg) {
|
|
|
46899
46977
|
contextSequence: linkExtractContext.getSequence(),
|
|
46900
46978
|
systemPrompt: LINK_EXTRACT_SYSTEM_PROMPT
|
|
46901
46979
|
});
|
|
46902
|
-
|
|
46980
|
+
const redactorContext = await model.createContext({
|
|
46981
|
+
contextSize: Math.min(cfg.contextSize, 2048)
|
|
46982
|
+
});
|
|
46983
|
+
const redactor = new llamaMod.LlamaChatSession({
|
|
46984
|
+
contextSequence: redactorContext.getSequence(),
|
|
46985
|
+
systemPrompt: REDACT_SYSTEM_PROMPT
|
|
46986
|
+
});
|
|
46987
|
+
loaded = { summarizer, cognizer, entitler, scriptSummarizer: scriptSummarizer2, linkExtractor, redactor };
|
|
46903
46988
|
return loaded;
|
|
46904
46989
|
})();
|
|
46905
46990
|
try {
|
|
@@ -47046,6 +47131,39 @@ function llamaExtractLinks(cfg = defaultLlamaConfig()) {
|
|
|
47046
47131
|
}
|
|
47047
47132
|
};
|
|
47048
47133
|
}
|
|
47134
|
+
function llamaRedact(cfg = defaultLlamaConfig()) {
|
|
47135
|
+
return async (text) => {
|
|
47136
|
+
const unchanged = { text, counts: {} };
|
|
47137
|
+
if (!shouldDeepRedact(text)) return unchanged;
|
|
47138
|
+
let loadedSessions;
|
|
47139
|
+
try {
|
|
47140
|
+
loadedSessions = await loadOnce(cfg);
|
|
47141
|
+
} catch {
|
|
47142
|
+
return unchanged;
|
|
47143
|
+
}
|
|
47144
|
+
const { redactor } = loadedSessions;
|
|
47145
|
+
const run = inflight.then(async () => {
|
|
47146
|
+
redactor.resetChatHistory();
|
|
47147
|
+
const raw = await redactor.prompt(text, {
|
|
47148
|
+
temperature: REDACT_TEMPERATURE,
|
|
47149
|
+
// Thinking budget on top of the short list of substrings.
|
|
47150
|
+
maxTokens: REDACT_MAX_TOKENS + 400
|
|
47151
|
+
});
|
|
47152
|
+
return stripThinking(raw ?? "");
|
|
47153
|
+
});
|
|
47154
|
+
inflight = run.catch(() => void 0);
|
|
47155
|
+
let reply;
|
|
47156
|
+
try {
|
|
47157
|
+
reply = await run;
|
|
47158
|
+
} catch {
|
|
47159
|
+
return unchanged;
|
|
47160
|
+
}
|
|
47161
|
+
const candidates = parseRedactReply(reply);
|
|
47162
|
+
if (candidates.length === 0) return unchanged;
|
|
47163
|
+
const { text: redacted, count } = applyLlmRedactions(text, candidates);
|
|
47164
|
+
return { text: redacted, counts: count > 0 ? { llm_secrets: count } : {} };
|
|
47165
|
+
};
|
|
47166
|
+
}
|
|
47049
47167
|
var DEFAULT_LLAMA_MODEL_URL, LLAMA_MAX_TOKENS, loaded, loadPromise, inflight, llamaInstance;
|
|
47050
47168
|
var init_llama = __esm({
|
|
47051
47169
|
"../../packages/daemon-core/src/node/llama.ts"() {
|
|
@@ -47054,6 +47172,7 @@ var init_llama = __esm({
|
|
|
47054
47172
|
init_prompts();
|
|
47055
47173
|
init_script_summary();
|
|
47056
47174
|
init_session_metadata2();
|
|
47175
|
+
init_redaction();
|
|
47057
47176
|
init_title();
|
|
47058
47177
|
DEFAULT_LLAMA_MODEL_URL = "https://huggingface.co/lmstudio-community/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf";
|
|
47059
47178
|
LLAMA_MAX_TOKENS = 1024;
|
|
@@ -47274,6 +47393,7 @@ __export(node_exports, {
|
|
|
47274
47393
|
llamaCognize: () => llamaCognize,
|
|
47275
47394
|
llamaEntitle: () => llamaEntitle,
|
|
47276
47395
|
llamaExtractLinks: () => llamaExtractLinks,
|
|
47396
|
+
llamaRedact: () => llamaRedact,
|
|
47277
47397
|
llamaScriptSummarize: () => llamaScriptSummarize,
|
|
47278
47398
|
llamaSummarize: () => llamaSummarize,
|
|
47279
47399
|
ollamaCognize: () => ollamaCognize,
|
|
@@ -47404,6 +47524,23 @@ var init_privacy_filter = __esm({
|
|
|
47404
47524
|
});
|
|
47405
47525
|
|
|
47406
47526
|
// src/enrich-scripts.ts
|
|
47527
|
+
async function enrichToolCallRedaction(drafts, redactModel) {
|
|
47528
|
+
const cache2 = /* @__PURE__ */ new Map();
|
|
47529
|
+
for (const draft of drafts) {
|
|
47530
|
+
const action = draft.action;
|
|
47531
|
+
const cmd = action?.command_redacted;
|
|
47532
|
+
if (!action || !cmd) continue;
|
|
47533
|
+
try {
|
|
47534
|
+
let deep = cache2.get(cmd);
|
|
47535
|
+
if (deep === void 0) {
|
|
47536
|
+
deep = (await redactModel(cmd)).text;
|
|
47537
|
+
cache2.set(cmd, deep);
|
|
47538
|
+
}
|
|
47539
|
+
action.command_redacted = deep;
|
|
47540
|
+
} catch {
|
|
47541
|
+
}
|
|
47542
|
+
}
|
|
47543
|
+
}
|
|
47407
47544
|
function defaultRoots(cwd) {
|
|
47408
47545
|
if (!cwd) return [];
|
|
47409
47546
|
const seg = cwd.replace(/\/+$/, "").split("/");
|
|
@@ -47453,7 +47590,14 @@ async function enrichOneAction(action, ctx, deps) {
|
|
|
47453
47590
|
if (!content.trim()) continue;
|
|
47454
47591
|
const summaryRaw = await deps.summarize({ ref, content });
|
|
47455
47592
|
if (!summaryRaw) continue;
|
|
47456
|
-
|
|
47593
|
+
let summaryText = redact(summaryRaw).text;
|
|
47594
|
+
if (deps.modelRedact) {
|
|
47595
|
+
try {
|
|
47596
|
+
summaryText = (await deps.modelRedact(summaryText)).text;
|
|
47597
|
+
} catch {
|
|
47598
|
+
}
|
|
47599
|
+
}
|
|
47600
|
+
const summary = summaryText.trim().slice(0, MAX_SUMMARY_CHARS);
|
|
47457
47601
|
if (!summary) continue;
|
|
47458
47602
|
seen.add(token);
|
|
47459
47603
|
out.push({ token, summary });
|
|
@@ -47514,7 +47658,12 @@ async function bundledAdapters() {
|
|
|
47514
47658
|
// @huggingface/transformers — if the optional peer dep isn't
|
|
47515
47659
|
// installed it returns a pass-through redactor (regex pass is
|
|
47516
47660
|
// still the last line of defence).
|
|
47517
|
-
|
|
47661
|
+
// Defense-in-depth redaction, layers 2+3, stacked behind one adapter and
|
|
47662
|
+
// applied to BOTH the abstract (in daemon-core) and `command_redacted` (in
|
|
47663
|
+
// enrichRedaction below): the OpenAI Privacy Filter (NER/PII) then the
|
|
47664
|
+
// local-LLM backstop for secrets the fixed patterns miss. Layer 1 (the
|
|
47665
|
+
// deterministic regex floor in @modelstat/core/redact) already ran first.
|
|
47666
|
+
redact: composeRedactors(await createPrivacyFilterRedactor(), llamaRedact(llamaCfg))
|
|
47518
47667
|
};
|
|
47519
47668
|
}
|
|
47520
47669
|
async function getAdapters() {
|
|
@@ -47546,8 +47695,10 @@ async function buildSessionMetadata2(segments, events) {
|
|
|
47546
47695
|
});
|
|
47547
47696
|
}
|
|
47548
47697
|
async function enrichScripts(drafts, contexts = []) {
|
|
47549
|
-
if (
|
|
47550
|
-
await getAdapters();
|
|
47698
|
+
if (drafts.length === 0) return;
|
|
47699
|
+
const built = await getAdapters();
|
|
47700
|
+
if (built.redact) await enrichToolCallRedaction(drafts, built.redact);
|
|
47701
|
+
if (contexts.length === 0) return;
|
|
47551
47702
|
if (!scriptSummarizer) scriptSummarizer = llamaScriptSummarize(defaultLlamaConfig());
|
|
47552
47703
|
await enrichToolCallScripts(drafts, contexts, {
|
|
47553
47704
|
summarize: scriptSummarizer,
|
|
@@ -47555,7 +47706,8 @@ async function enrichScripts(drafts, contexts = []) {
|
|
|
47555
47706
|
readFile: async (path5) => {
|
|
47556
47707
|
const buf = await fsReadFile(path5);
|
|
47557
47708
|
return buf.subarray(0, MAX_SCRIPT_READ_BYTES).toString("utf8");
|
|
47558
|
-
}
|
|
47709
|
+
},
|
|
47710
|
+
modelRedact: built.redact
|
|
47559
47711
|
});
|
|
47560
47712
|
}
|
|
47561
47713
|
async function preflightSummariser() {
|
|
@@ -47786,7 +47938,7 @@ var init_scan = __esm({
|
|
|
47786
47938
|
init_api();
|
|
47787
47939
|
init_config2();
|
|
47788
47940
|
init_pipeline2();
|
|
47789
|
-
DAEMON_VERSION = true ? "daemon-0.
|
|
47941
|
+
DAEMON_VERSION = true ? "daemon-0.4.0" : "daemon-dev";
|
|
47790
47942
|
BATCH_MAX_EVENTS = INGEST_BATCH_MAX_EVENTS;
|
|
47791
47943
|
BATCH_MAX_TOOL_CALLS = 2e4;
|
|
47792
47944
|
BATCH_BUFFER_HARD_CAP = BATCH_MAX_EVENTS * 2;
|
|
@@ -47999,7 +48151,7 @@ var PROCESSING_VERSION;
|
|
|
47999
48151
|
var init_processing_version = __esm({
|
|
48000
48152
|
"src/processing-version.ts"() {
|
|
48001
48153
|
"use strict";
|
|
48002
|
-
PROCESSING_VERSION =
|
|
48154
|
+
PROCESSING_VERSION = 6;
|
|
48003
48155
|
}
|
|
48004
48156
|
});
|
|
48005
48157
|
|
|
@@ -50296,7 +50448,7 @@ var init_daemon = __esm({
|
|
|
50296
50448
|
init_machine_key();
|
|
50297
50449
|
init_scan();
|
|
50298
50450
|
init_single_flight();
|
|
50299
|
-
DAEMON_VERSION2 = true ? "daemon-0.
|
|
50451
|
+
DAEMON_VERSION2 = true ? "daemon-0.4.0" : "daemon-dev";
|
|
50300
50452
|
HEARTBEAT_INTERVAL_MS = 1e4;
|
|
50301
50453
|
SCAN_INTERVAL_MS = 5 * 60 * 1e3;
|
|
50302
50454
|
DISCOVERY_INTERVAL_MS = 6e4;
|
|
@@ -50898,7 +51050,7 @@ function tryOpenBrowser(url) {
|
|
|
50898
51050
|
return false;
|
|
50899
51051
|
}
|
|
50900
51052
|
}
|
|
50901
|
-
var DAEMON_VERSION3 = true ? "daemon-0.
|
|
51053
|
+
var DAEMON_VERSION3 = true ? "daemon-0.4.0" : "daemon-dev";
|
|
50902
51054
|
function osFamily() {
|
|
50903
51055
|
const p = platform5();
|
|
50904
51056
|
if (p === "darwin") return "macos";
|