modelstat 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +167 -11
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -4620,9 +4620,13 @@ var init_redact_floor = __esm({
|
|
|
4620
4620
|
replacement: "<REDACTED:modelstat_device_secret>"
|
|
4621
4621
|
},
|
|
4622
4622
|
// Generic env-style KEY=VALUE where KEY names a secret. Keeps the var name.
|
|
4623
|
+
// The keyword may be the WHOLE name (`SECRET=`, `TOKEN=`) or part of it
|
|
4624
|
+
// (`AWS_SECRET_ACCESS_KEY=`), so the prefix is `[A-Z0-9_]*` (zero-or-more) —
|
|
4625
|
+
// a mandatory leading `[A-Z]` here used to eat the first letter and miss every
|
|
4626
|
+
// bare-keyword name, leaking `SECRET="…"` / `TOKEN="…"` straight to the wire.
|
|
4623
4627
|
{
|
|
4624
4628
|
name: "env_secret",
|
|
4625
|
-
pattern: /\b([A-
|
|
4629
|
+
pattern: /\b([A-Z0-9_]*(?:TOKEN|KEY|SECRET|PASSWORD|PASSWD|PASSPHRASE|CREDENTIAL|API)[A-Z0-9_]*)\s*[:=]\s*['"]?([^\s'"]{12,})['"]?/g,
|
|
4626
4630
|
replacement: "$1=<REDACTED:env_secret>"
|
|
4627
4631
|
},
|
|
4628
4632
|
{
|
|
@@ -46288,6 +46292,83 @@ var init_session_metadata2 = __esm({
|
|
|
46288
46292
|
}
|
|
46289
46293
|
});
|
|
46290
46294
|
|
|
46295
|
+
// ../../packages/daemon-core/src/pipeline/redaction.ts
|
|
46296
|
+
function composeRedactors(...redactors) {
|
|
46297
|
+
return async (text) => {
|
|
46298
|
+
let out = text;
|
|
46299
|
+
const counts = {};
|
|
46300
|
+
for (const r of redactors) {
|
|
46301
|
+
try {
|
|
46302
|
+
const res = await r(out);
|
|
46303
|
+
out = res.text;
|
|
46304
|
+
for (const [k, v] of Object.entries(res.counts)) counts[k] = (counts[k] ?? 0) + v;
|
|
46305
|
+
} catch {
|
|
46306
|
+
}
|
|
46307
|
+
}
|
|
46308
|
+
return { text: out, counts };
|
|
46309
|
+
};
|
|
46310
|
+
}
|
|
46311
|
+
function shouldDeepRedact(text) {
|
|
46312
|
+
if (!text) return false;
|
|
46313
|
+
if (/[=]|--|:\/\/|@|\bbearer\b|token|secret|password|passwd|credential|api[_-]?key|private[_-]?key/i.test(text)) {
|
|
46314
|
+
return true;
|
|
46315
|
+
}
|
|
46316
|
+
return /[A-Za-z0-9/+_-]{20,}/.test(text);
|
|
46317
|
+
}
|
|
46318
|
+
function parseRedactReply(raw) {
|
|
46319
|
+
const out = [];
|
|
46320
|
+
const seen = /* @__PURE__ */ new Set();
|
|
46321
|
+
for (const line of raw.split("\n")) {
|
|
46322
|
+
const s = line.trim().replace(/^["'`]+|["'`]+$/g, "");
|
|
46323
|
+
if (!s || s.toUpperCase() === "NONE") continue;
|
|
46324
|
+
if (s.length < MIN_CANDIDATE_CHARS) continue;
|
|
46325
|
+
if (SAFE_WORDS.has(s.toLowerCase())) continue;
|
|
46326
|
+
if (s.startsWith("[REDACTED")) continue;
|
|
46327
|
+
if (seen.has(s)) continue;
|
|
46328
|
+
seen.add(s);
|
|
46329
|
+
out.push(s);
|
|
46330
|
+
}
|
|
46331
|
+
return out;
|
|
46332
|
+
}
|
|
46333
|
+
function escapeRe(s) {
|
|
46334
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
46335
|
+
}
|
|
46336
|
+
function applyLlmRedactions(text, candidates) {
|
|
46337
|
+
let out = text;
|
|
46338
|
+
let count = 0;
|
|
46339
|
+
for (const cand of [...candidates].sort((a, b) => b.length - a.length)) {
|
|
46340
|
+
if (!out.includes(cand)) continue;
|
|
46341
|
+
const before = out;
|
|
46342
|
+
out = out.replace(new RegExp(escapeRe(cand), "g"), LLM_REDACTION_MARKER);
|
|
46343
|
+
if (out !== before) count += 1;
|
|
46344
|
+
}
|
|
46345
|
+
return { text: out, count };
|
|
46346
|
+
}
|
|
46347
|
+
var LLM_REDACTION_MARKER, MIN_CANDIDATE_CHARS, SAFE_WORDS, REDACT_SYSTEM_PROMPT, REDACT_MAX_TOKENS, REDACT_TEMPERATURE;
|
|
46348
|
+
var init_redaction = __esm({
|
|
46349
|
+
"../../packages/daemon-core/src/pipeline/redaction.ts"() {
|
|
46350
|
+
"use strict";
|
|
46351
|
+
LLM_REDACTION_MARKER = "[REDACTED:llm]";
|
|
46352
|
+
MIN_CANDIDATE_CHARS = 8;
|
|
46353
|
+
SAFE_WORDS = /* @__PURE__ */ new Set([
|
|
46354
|
+
"production",
|
|
46355
|
+
"staging",
|
|
46356
|
+
"localhost",
|
|
46357
|
+
"endpoint",
|
|
46358
|
+
"database",
|
|
46359
|
+
"password",
|
|
46360
|
+
// the literal word (e.g. a flag name), not a value
|
|
46361
|
+
"secret",
|
|
46362
|
+
"token",
|
|
46363
|
+
"credential",
|
|
46364
|
+
"[redacted"
|
|
46365
|
+
]);
|
|
46366
|
+
REDACT_SYSTEM_PROMPT = "You are a security redaction reviewer. You are given a single shell command that has already been partly redacted. Find any remaining SECRETS or sensitive credentials still present in plaintext: API keys, access tokens, bearer tokens, passwords, private keys, connection strings with credentials, or other high-entropy secret values. Do NOT flag: program names, flags, file paths, hostnames, service/environment names (prod, dev), or existing [REDACTED:...] markers. Output ONLY the exact secret substrings, one per line, copied verbatim character-for-character as they appear in the command. If there are no remaining secrets, output exactly NONE. Output nothing else \u2014 no prose, no explanation, no numbering.";
|
|
46367
|
+
REDACT_MAX_TOKENS = 512;
|
|
46368
|
+
REDACT_TEMPERATURE = 0;
|
|
46369
|
+
}
|
|
46370
|
+
});
|
|
46371
|
+
|
|
46291
46372
|
// ../../packages/daemon-core/src/pipeline/index.ts
|
|
46292
46373
|
async function buildSegmentsForSession(events, adapters2, onProgress) {
|
|
46293
46374
|
if (events.length === 0) return [];
|
|
@@ -46602,6 +46683,7 @@ var init_pipeline = __esm({
|
|
|
46602
46683
|
init_script_summary();
|
|
46603
46684
|
init_session_metadata2();
|
|
46604
46685
|
init_title();
|
|
46686
|
+
init_redaction();
|
|
46605
46687
|
SEGMENT_TIME_GAP_MS = 15 * 6e4;
|
|
46606
46688
|
SEGMENT_TOPIC_THRESHOLD = 0.35;
|
|
46607
46689
|
SEGMENT_MAX_TURNS = 100;
|
|
@@ -46895,7 +46977,14 @@ async function loadOnce(cfg) {
|
|
|
46895
46977
|
contextSequence: linkExtractContext.getSequence(),
|
|
46896
46978
|
systemPrompt: LINK_EXTRACT_SYSTEM_PROMPT
|
|
46897
46979
|
});
|
|
46898
|
-
|
|
46980
|
+
const redactorContext = await model.createContext({
|
|
46981
|
+
contextSize: Math.min(cfg.contextSize, 2048)
|
|
46982
|
+
});
|
|
46983
|
+
const redactor = new llamaMod.LlamaChatSession({
|
|
46984
|
+
contextSequence: redactorContext.getSequence(),
|
|
46985
|
+
systemPrompt: REDACT_SYSTEM_PROMPT
|
|
46986
|
+
});
|
|
46987
|
+
loaded = { summarizer, cognizer, entitler, scriptSummarizer: scriptSummarizer2, linkExtractor, redactor };
|
|
46899
46988
|
return loaded;
|
|
46900
46989
|
})();
|
|
46901
46990
|
try {
|
|
@@ -47042,6 +47131,39 @@ function llamaExtractLinks(cfg = defaultLlamaConfig()) {
|
|
|
47042
47131
|
}
|
|
47043
47132
|
};
|
|
47044
47133
|
}
|
|
47134
|
+
function llamaRedact(cfg = defaultLlamaConfig()) {
|
|
47135
|
+
return async (text) => {
|
|
47136
|
+
const unchanged = { text, counts: {} };
|
|
47137
|
+
if (!shouldDeepRedact(text)) return unchanged;
|
|
47138
|
+
let loadedSessions;
|
|
47139
|
+
try {
|
|
47140
|
+
loadedSessions = await loadOnce(cfg);
|
|
47141
|
+
} catch {
|
|
47142
|
+
return unchanged;
|
|
47143
|
+
}
|
|
47144
|
+
const { redactor } = loadedSessions;
|
|
47145
|
+
const run = inflight.then(async () => {
|
|
47146
|
+
redactor.resetChatHistory();
|
|
47147
|
+
const raw = await redactor.prompt(text, {
|
|
47148
|
+
temperature: REDACT_TEMPERATURE,
|
|
47149
|
+
// Thinking budget on top of the short list of substrings.
|
|
47150
|
+
maxTokens: REDACT_MAX_TOKENS + 400
|
|
47151
|
+
});
|
|
47152
|
+
return stripThinking(raw ?? "");
|
|
47153
|
+
});
|
|
47154
|
+
inflight = run.catch(() => void 0);
|
|
47155
|
+
let reply;
|
|
47156
|
+
try {
|
|
47157
|
+
reply = await run;
|
|
47158
|
+
} catch {
|
|
47159
|
+
return unchanged;
|
|
47160
|
+
}
|
|
47161
|
+
const candidates = parseRedactReply(reply);
|
|
47162
|
+
if (candidates.length === 0) return unchanged;
|
|
47163
|
+
const { text: redacted, count } = applyLlmRedactions(text, candidates);
|
|
47164
|
+
return { text: redacted, counts: count > 0 ? { llm_secrets: count } : {} };
|
|
47165
|
+
};
|
|
47166
|
+
}
|
|
47045
47167
|
var DEFAULT_LLAMA_MODEL_URL, LLAMA_MAX_TOKENS, loaded, loadPromise, inflight, llamaInstance;
|
|
47046
47168
|
var init_llama = __esm({
|
|
47047
47169
|
"../../packages/daemon-core/src/node/llama.ts"() {
|
|
@@ -47050,6 +47172,7 @@ var init_llama = __esm({
|
|
|
47050
47172
|
init_prompts();
|
|
47051
47173
|
init_script_summary();
|
|
47052
47174
|
init_session_metadata2();
|
|
47175
|
+
init_redaction();
|
|
47053
47176
|
init_title();
|
|
47054
47177
|
DEFAULT_LLAMA_MODEL_URL = "https://huggingface.co/lmstudio-community/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf";
|
|
47055
47178
|
LLAMA_MAX_TOKENS = 1024;
|
|
@@ -47270,6 +47393,7 @@ __export(node_exports, {
|
|
|
47270
47393
|
llamaCognize: () => llamaCognize,
|
|
47271
47394
|
llamaEntitle: () => llamaEntitle,
|
|
47272
47395
|
llamaExtractLinks: () => llamaExtractLinks,
|
|
47396
|
+
llamaRedact: () => llamaRedact,
|
|
47273
47397
|
llamaScriptSummarize: () => llamaScriptSummarize,
|
|
47274
47398
|
llamaSummarize: () => llamaSummarize,
|
|
47275
47399
|
ollamaCognize: () => ollamaCognize,
|
|
@@ -47400,6 +47524,23 @@ var init_privacy_filter = __esm({
|
|
|
47400
47524
|
});
|
|
47401
47525
|
|
|
47402
47526
|
// src/enrich-scripts.ts
|
|
47527
|
+
async function enrichToolCallRedaction(drafts, redactModel) {
|
|
47528
|
+
const cache2 = /* @__PURE__ */ new Map();
|
|
47529
|
+
for (const draft of drafts) {
|
|
47530
|
+
const action = draft.action;
|
|
47531
|
+
const cmd = action?.command_redacted;
|
|
47532
|
+
if (!action || !cmd) continue;
|
|
47533
|
+
try {
|
|
47534
|
+
let deep = cache2.get(cmd);
|
|
47535
|
+
if (deep === void 0) {
|
|
47536
|
+
deep = (await redactModel(cmd)).text;
|
|
47537
|
+
cache2.set(cmd, deep);
|
|
47538
|
+
}
|
|
47539
|
+
action.command_redacted = deep;
|
|
47540
|
+
} catch {
|
|
47541
|
+
}
|
|
47542
|
+
}
|
|
47543
|
+
}
|
|
47403
47544
|
function defaultRoots(cwd) {
|
|
47404
47545
|
if (!cwd) return [];
|
|
47405
47546
|
const seg = cwd.replace(/\/+$/, "").split("/");
|
|
@@ -47449,7 +47590,14 @@ async function enrichOneAction(action, ctx, deps) {
|
|
|
47449
47590
|
if (!content.trim()) continue;
|
|
47450
47591
|
const summaryRaw = await deps.summarize({ ref, content });
|
|
47451
47592
|
if (!summaryRaw) continue;
|
|
47452
|
-
|
|
47593
|
+
let summaryText = redact(summaryRaw).text;
|
|
47594
|
+
if (deps.modelRedact) {
|
|
47595
|
+
try {
|
|
47596
|
+
summaryText = (await deps.modelRedact(summaryText)).text;
|
|
47597
|
+
} catch {
|
|
47598
|
+
}
|
|
47599
|
+
}
|
|
47600
|
+
const summary = summaryText.trim().slice(0, MAX_SUMMARY_CHARS);
|
|
47453
47601
|
if (!summary) continue;
|
|
47454
47602
|
seen.add(token);
|
|
47455
47603
|
out.push({ token, summary });
|
|
@@ -47510,7 +47658,12 @@ async function bundledAdapters() {
|
|
|
47510
47658
|
// @huggingface/transformers — if the optional peer dep isn't
|
|
47511
47659
|
// installed it returns a pass-through redactor (regex pass is
|
|
47512
47660
|
// still the last line of defence).
|
|
47513
|
-
|
|
47661
|
+
// Defense-in-depth redaction, layers 2+3, stacked behind one adapter and
|
|
47662
|
+
// applied to BOTH the abstract (in daemon-core) and `command_redacted` (in
|
|
47663
|
+
// enrichRedaction below): the OpenAI Privacy Filter (NER/PII) then the
|
|
47664
|
+
// local-LLM backstop for secrets the fixed patterns miss. Layer 1 (the
|
|
47665
|
+
// deterministic regex floor in @modelstat/core/redact) already ran first.
|
|
47666
|
+
redact: composeRedactors(await createPrivacyFilterRedactor(), llamaRedact(llamaCfg))
|
|
47514
47667
|
};
|
|
47515
47668
|
}
|
|
47516
47669
|
async function getAdapters() {
|
|
@@ -47542,8 +47695,10 @@ async function buildSessionMetadata2(segments, events) {
|
|
|
47542
47695
|
});
|
|
47543
47696
|
}
|
|
47544
47697
|
async function enrichScripts(drafts, contexts = []) {
|
|
47545
|
-
if (
|
|
47546
|
-
await getAdapters();
|
|
47698
|
+
if (drafts.length === 0) return;
|
|
47699
|
+
const built = await getAdapters();
|
|
47700
|
+
if (built.redact) await enrichToolCallRedaction(drafts, built.redact);
|
|
47701
|
+
if (contexts.length === 0) return;
|
|
47547
47702
|
if (!scriptSummarizer) scriptSummarizer = llamaScriptSummarize(defaultLlamaConfig());
|
|
47548
47703
|
await enrichToolCallScripts(drafts, contexts, {
|
|
47549
47704
|
summarize: scriptSummarizer,
|
|
@@ -47551,7 +47706,8 @@ async function enrichScripts(drafts, contexts = []) {
|
|
|
47551
47706
|
readFile: async (path5) => {
|
|
47552
47707
|
const buf = await fsReadFile(path5);
|
|
47553
47708
|
return buf.subarray(0, MAX_SCRIPT_READ_BYTES).toString("utf8");
|
|
47554
|
-
}
|
|
47709
|
+
},
|
|
47710
|
+
modelRedact: built.redact
|
|
47555
47711
|
});
|
|
47556
47712
|
}
|
|
47557
47713
|
async function preflightSummariser() {
|
|
@@ -47782,7 +47938,7 @@ var init_scan = __esm({
|
|
|
47782
47938
|
init_api();
|
|
47783
47939
|
init_config2();
|
|
47784
47940
|
init_pipeline2();
|
|
47785
|
-
DAEMON_VERSION = true ? "daemon-0.
|
|
47941
|
+
DAEMON_VERSION = true ? "daemon-0.4.0" : "daemon-dev";
|
|
47786
47942
|
BATCH_MAX_EVENTS = INGEST_BATCH_MAX_EVENTS;
|
|
47787
47943
|
BATCH_MAX_TOOL_CALLS = 2e4;
|
|
47788
47944
|
BATCH_BUFFER_HARD_CAP = BATCH_MAX_EVENTS * 2;
|
|
@@ -47995,7 +48151,7 @@ var PROCESSING_VERSION;
|
|
|
47995
48151
|
var init_processing_version = __esm({
|
|
47996
48152
|
"src/processing-version.ts"() {
|
|
47997
48153
|
"use strict";
|
|
47998
|
-
PROCESSING_VERSION =
|
|
48154
|
+
PROCESSING_VERSION = 6;
|
|
47999
48155
|
}
|
|
48000
48156
|
});
|
|
48001
48157
|
|
|
@@ -50292,7 +50448,7 @@ var init_daemon = __esm({
|
|
|
50292
50448
|
init_machine_key();
|
|
50293
50449
|
init_scan();
|
|
50294
50450
|
init_single_flight();
|
|
50295
|
-
DAEMON_VERSION2 = true ? "daemon-0.
|
|
50451
|
+
DAEMON_VERSION2 = true ? "daemon-0.4.0" : "daemon-dev";
|
|
50296
50452
|
HEARTBEAT_INTERVAL_MS = 1e4;
|
|
50297
50453
|
SCAN_INTERVAL_MS = 5 * 60 * 1e3;
|
|
50298
50454
|
DISCOVERY_INTERVAL_MS = 6e4;
|
|
@@ -50894,7 +51050,7 @@ function tryOpenBrowser(url) {
|
|
|
50894
51050
|
return false;
|
|
50895
51051
|
}
|
|
50896
51052
|
}
|
|
50897
|
-
var DAEMON_VERSION3 = true ? "daemon-0.
|
|
51053
|
+
var DAEMON_VERSION3 = true ? "daemon-0.4.0" : "daemon-dev";
|
|
50898
51054
|
function osFamily() {
|
|
50899
51055
|
const p = platform5();
|
|
50900
51056
|
if (p === "darwin") return "macos";
|