skein-cli 0.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +180 -0
- package/bin/skein.mjs +7 -0
- package/dist/adapters/aider/conv-reader.d.ts +7 -0
- package/dist/adapters/aider/conv-reader.js +203 -0
- package/dist/adapters/aider/conv-reader.js.map +1 -0
- package/dist/adapters/aider/index.d.ts +2 -0
- package/dist/adapters/aider/index.js +123 -0
- package/dist/adapters/aider/index.js.map +1 -0
- package/dist/adapters/base.d.ts +71 -0
- package/dist/adapters/base.js +38 -0
- package/dist/adapters/base.js.map +1 -0
- package/dist/adapters/claude-code/conv-reader.d.ts +2 -0
- package/dist/adapters/claude-code/conv-reader.js +155 -0
- package/dist/adapters/claude-code/conv-reader.js.map +1 -0
- package/dist/adapters/claude-code/index.d.ts +3 -0
- package/dist/adapters/claude-code/index.js +517 -0
- package/dist/adapters/claude-code/index.js.map +1 -0
- package/dist/adapters/claude-desktop/index.d.ts +2 -0
- package/dist/adapters/claude-desktop/index.js +95 -0
- package/dist/adapters/claude-desktop/index.js.map +1 -0
- package/dist/adapters/codex/index.d.ts +2 -0
- package/dist/adapters/codex/index.js +472 -0
- package/dist/adapters/codex/index.js.map +1 -0
- package/dist/adapters/cursor/index.d.ts +2 -0
- package/dist/adapters/cursor/index.js +255 -0
- package/dist/adapters/cursor/index.js.map +1 -0
- package/dist/adapters/opencode/conv-reader.d.ts +3 -0
- package/dist/adapters/opencode/conv-reader.js +190 -0
- package/dist/adapters/opencode/conv-reader.js.map +1 -0
- package/dist/adapters/opencode/index.d.ts +2 -0
- package/dist/adapters/opencode/index.js +349 -0
- package/dist/adapters/opencode/index.js.map +1 -0
- package/dist/adapters/registry.d.ts +4 -0
- package/dist/adapters/registry.js +26 -0
- package/dist/adapters/registry.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.js +54 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/conv.d.ts +2 -0
- package/dist/commands/conv.js +261 -0
- package/dist/commands/conv.js.map +1 -0
- package/dist/commands/doctor.d.ts +2 -0
- package/dist/commands/doctor.js +85 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.js +38 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/memory.d.ts +2 -0
- package/dist/commands/memory.js +197 -0
- package/dist/commands/memory.js.map +1 -0
- package/dist/commands/migrate.d.ts +2 -0
- package/dist/commands/migrate.js +102 -0
- package/dist/commands/migrate.js.map +1 -0
- package/dist/commands/profile.d.ts +2 -0
- package/dist/commands/profile.js +183 -0
- package/dist/commands/profile.js.map +1 -0
- package/dist/commands/redact.d.ts +2 -0
- package/dist/commands/redact.js +73 -0
- package/dist/commands/redact.js.map +1 -0
- package/dist/commands/trace.d.ts +2 -0
- package/dist/commands/trace.js +181 -0
- package/dist/commands/trace.js.map +1 -0
- package/dist/commands/view.d.ts +13 -0
- package/dist/commands/view.js +184 -0
- package/dist/commands/view.js.map +1 -0
- package/dist/commands/watch.d.ts +6 -0
- package/dist/commands/watch.js +61 -0
- package/dist/commands/watch.js.map +1 -0
- package/dist/commands/wrap.d.ts +2 -0
- package/dist/commands/wrap.js +131 -0
- package/dist/commands/wrap.js.map +1 -0
- package/dist/conv/chatgpt-import.d.ts +33 -0
- package/dist/conv/chatgpt-import.js +145 -0
- package/dist/conv/chatgpt-import.js.map +1 -0
- package/dist/conv/cursor.d.ts +12 -0
- package/dist/conv/cursor.js +55 -0
- package/dist/conv/cursor.js.map +1 -0
- package/dist/conv/export.d.ts +40 -0
- package/dist/conv/export.js +215 -0
- package/dist/conv/export.js.map +1 -0
- package/dist/conv/replay/aider.d.ts +24 -0
- package/dist/conv/replay/aider.js +56 -0
- package/dist/conv/replay/aider.js.map +1 -0
- package/dist/conv/replay/claude-code.d.ts +38 -0
- package/dist/conv/replay/claude-code.js +80 -0
- package/dist/conv/replay/claude-code.js.map +1 -0
- package/dist/conv/replay/context.d.ts +25 -0
- package/dist/conv/replay/context.js +63 -0
- package/dist/conv/replay/context.js.map +1 -0
- package/dist/conv/replay/index.d.ts +22 -0
- package/dist/conv/replay/index.js +84 -0
- package/dist/conv/replay/index.js.map +1 -0
- package/dist/conv/search.d.ts +67 -0
- package/dist/conv/search.js +379 -0
- package/dist/conv/search.js.map +1 -0
- package/dist/conv/sink.d.ts +26 -0
- package/dist/conv/sink.js +76 -0
- package/dist/conv/sink.js.map +1 -0
- package/dist/conv/sources/aider.d.ts +13 -0
- package/dist/conv/sources/aider.js +95 -0
- package/dist/conv/sources/aider.js.map +1 -0
- package/dist/conv/sources/claude-code.d.ts +13 -0
- package/dist/conv/sources/claude-code.js +189 -0
- package/dist/conv/sources/claude-code.js.map +1 -0
- package/dist/conv/sources/codex.d.ts +15 -0
- package/dist/conv/sources/codex.js +175 -0
- package/dist/conv/sources/codex.js.map +1 -0
- package/dist/conv/sources/opencode.d.ts +12 -0
- package/dist/conv/sources/opencode.js +92 -0
- package/dist/conv/sources/opencode.js.map +1 -0
- package/dist/conv/store.d.ts +10 -0
- package/dist/conv/store.js +75 -0
- package/dist/conv/store.js.map +1 -0
- package/dist/conv/title.d.ts +9 -0
- package/dist/conv/title.js +74 -0
- package/dist/conv/title.js.map +1 -0
- package/dist/conv/watcher.d.ts +28 -0
- package/dist/conv/watcher.js +75 -0
- package/dist/conv/watcher.js.map +1 -0
- package/dist/conv/zip.d.ts +11 -0
- package/dist/conv/zip.js +82 -0
- package/dist/conv/zip.js.map +1 -0
- package/dist/hf/client.d.ts +41 -0
- package/dist/hf/client.js +87 -0
- package/dist/hf/client.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/ir/index.d.ts +24 -0
- package/dist/ir/index.js +23 -0
- package/dist/ir/index.js.map +1 -0
- package/dist/ir/profile.d.ts +55 -0
- package/dist/ir/profile.js +177 -0
- package/dist/ir/profile.js.map +1 -0
- package/dist/memory/index.d.ts +41 -0
- package/dist/memory/index.js +109 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/search.d.ts +24 -0
- package/dist/memory/search.js +114 -0
- package/dist/memory/search.js.map +1 -0
- package/dist/memory/sinks/claude-mem.d.ts +13 -0
- package/dist/memory/sinks/claude-mem.js +100 -0
- package/dist/memory/sinks/claude-mem.js.map +1 -0
- package/dist/memory/sinks/codex.d.ts +12 -0
- package/dist/memory/sinks/codex.js +82 -0
- package/dist/memory/sinks/codex.js.map +1 -0
- package/dist/memory/sinks/context.d.ts +21 -0
- package/dist/memory/sinks/context.js +42 -0
- package/dist/memory/sinks/context.js.map +1 -0
- package/dist/memory/sources/chatgpt.d.ts +22 -0
- package/dist/memory/sources/chatgpt.js +98 -0
- package/dist/memory/sources/chatgpt.js.map +1 -0
- package/dist/memory/sources/claude-mem.d.ts +8 -0
- package/dist/memory/sources/claude-mem.js +104 -0
- package/dist/memory/sources/claude-mem.js.map +1 -0
- package/dist/memory/sources/codex.d.ts +8 -0
- package/dist/memory/sources/codex.js +77 -0
- package/dist/memory/sources/codex.js.map +1 -0
- package/dist/memory/store.d.ts +19 -0
- package/dist/memory/store.js +82 -0
- package/dist/memory/store.js.map +1 -0
- package/dist/proxy/http.d.ts +21 -0
- package/dist/proxy/http.js +205 -0
- package/dist/proxy/http.js.map +1 -0
- package/dist/proxy/recorder.d.ts +35 -0
- package/dist/proxy/recorder.js +221 -0
- package/dist/proxy/recorder.js.map +1 -0
- package/dist/proxy/streaming.d.ts +33 -0
- package/dist/proxy/streaming.js +185 -0
- package/dist/proxy/streaming.js.map +1 -0
- package/dist/redactor/entropy.d.ts +29 -0
- package/dist/redactor/entropy.js +98 -0
- package/dist/redactor/entropy.js.map +1 -0
- package/dist/redactor/index.d.ts +52 -0
- package/dist/redactor/index.js +152 -0
- package/dist/redactor/index.js.map +1 -0
- package/dist/redactor/ner.d.ts +53 -0
- package/dist/redactor/ner.js +97 -0
- package/dist/redactor/ner.js.map +1 -0
- package/dist/redactor/pii-patterns.d.ts +22 -0
- package/dist/redactor/pii-patterns.js +187 -0
- package/dist/redactor/pii-patterns.js.map +1 -0
- package/dist/redactor/secret-patterns.d.ts +27 -0
- package/dist/redactor/secret-patterns.js +475 -0
- package/dist/redactor/secret-patterns.js.map +1 -0
- package/dist/schema/conv.d.ts +698 -0
- package/dist/schema/conv.js +85 -0
- package/dist/schema/conv.js.map +1 -0
- package/dist/schema/index.d.ts +2 -0
- package/dist/schema/index.js +3 -0
- package/dist/schema/index.js.map +1 -0
- package/dist/schema/manifest.d.ts +1531 -0
- package/dist/schema/manifest.js +179 -0
- package/dist/schema/manifest.js.map +1 -0
- package/dist/schema/memory.d.ts +107 -0
- package/dist/schema/memory.js +45 -0
- package/dist/schema/memory.js.map +1 -0
- package/dist/schema/trace.d.ts +164 -0
- package/dist/schema/trace.js +89 -0
- package/dist/schema/trace.js.map +1 -0
- package/dist/trace/consent.d.ts +30 -0
- package/dist/trace/consent.js +60 -0
- package/dist/trace/consent.js.map +1 -0
- package/dist/trace/extract.d.ts +22 -0
- package/dist/trace/extract.js +168 -0
- package/dist/trace/extract.js.map +1 -0
- package/dist/trace/ml-pii.d.ts +33 -0
- package/dist/trace/ml-pii.js +35 -0
- package/dist/trace/ml-pii.js.map +1 -0
- package/dist/trace/push.d.ts +59 -0
- package/dist/trace/push.js +141 -0
- package/dist/trace/push.js.map +1 -0
- package/dist/trace/serialize.d.ts +23 -0
- package/dist/trace/serialize.js +67 -0
- package/dist/trace/serialize.js.map +1 -0
- package/dist/ui/banner.d.ts +2 -0
- package/dist/ui/banner.js +17 -0
- package/dist/ui/banner.js.map +1 -0
- package/dist/ui/box.d.ts +4 -0
- package/dist/ui/box.js +38 -0
- package/dist/ui/box.js.map +1 -0
- package/dist/ui/index.d.ts +5 -0
- package/dist/ui/index.js +5 -0
- package/dist/ui/index.js.map +1 -0
- package/dist/ui/spinner.d.ts +10 -0
- package/dist/ui/spinner.js +44 -0
- package/dist/ui/spinner.js.map +1 -0
- package/dist/ui/table.d.ts +9 -0
- package/dist/ui/table.js +55 -0
- package/dist/ui/table.js.map +1 -0
- package/dist/util/frontmatter.d.ts +6 -0
- package/dist/util/frontmatter.js +20 -0
- package/dist/util/frontmatter.js.map +1 -0
- package/dist/util/fs.d.ts +5 -0
- package/dist/util/fs.js +41 -0
- package/dist/util/fs.js.map +1 -0
- package/dist/util/ids.d.ts +3 -0
- package/dist/util/ids.js +16 -0
- package/dist/util/ids.js.map +1 -0
- package/dist/util/log.d.ts +13 -0
- package/dist/util/log.js +33 -0
- package/dist/util/log.js.map +1 -0
- package/dist/util/paths.d.ts +18 -0
- package/dist/util/paths.js +36 -0
- package/dist/util/paths.js.map +1 -0
- package/dist/version.d.ts +5 -0
- package/dist/version.js +32 -0
- package/dist/version.js.map +1 -0
- package/package.json +86 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { ensureDir } from '../util/fs.js';
|
|
4
|
+
import { getToolPaths } from '../util/paths.js';
|
|
5
|
+
export const TRACE_CONSENT_SCHEMA = 'skein-trace-consent/1';
|
|
6
|
+
/** Bump when the publishing terms change; older consent is then treated as stale. */
|
|
7
|
+
export const TRACE_TERMS_VERSION = '2026-06-26';
|
|
8
|
+
function traceDir() {
|
|
9
|
+
return join(getToolPaths().skeinDir, 'trace');
|
|
10
|
+
}
|
|
11
|
+
function consentPath() {
|
|
12
|
+
return join(traceDir(), 'consent.json');
|
|
13
|
+
}
|
|
14
|
+
function keymapPath() {
|
|
15
|
+
return join(traceDir(), 'keymap.jsonl');
|
|
16
|
+
}
|
|
17
|
+
export async function readConsent() {
|
|
18
|
+
try {
|
|
19
|
+
return JSON.parse(await fs.readFile(consentPath(), 'utf8'));
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export async function recordConsent(input) {
|
|
26
|
+
await ensureDir(traceDir());
|
|
27
|
+
const consent = {
|
|
28
|
+
schema: TRACE_CONSENT_SCHEMA,
|
|
29
|
+
acceptedAt: input.acceptedAt,
|
|
30
|
+
termsVersion: TRACE_TERMS_VERSION,
|
|
31
|
+
publish: true,
|
|
32
|
+
...(input.handle ? { handle: input.handle } : {}),
|
|
33
|
+
};
|
|
34
|
+
await fs.writeFile(consentPath(), `${JSON.stringify(consent, null, 2)}\n`, 'utf8');
|
|
35
|
+
return consent;
|
|
36
|
+
}
|
|
37
|
+
export async function hasConsent() {
|
|
38
|
+
const consent = await readConsent();
|
|
39
|
+
return Boolean(consent && consent.termsVersion === TRACE_TERMS_VERSION);
|
|
40
|
+
}
|
|
41
|
+
export async function appendKeyMap(entries) {
|
|
42
|
+
if (entries.length === 0)
|
|
43
|
+
return;
|
|
44
|
+
await ensureDir(traceDir());
|
|
45
|
+
const body = entries.map(e => JSON.stringify(e)).join('\n');
|
|
46
|
+
await fs.appendFile(keymapPath(), `${body}\n`, 'utf8');
|
|
47
|
+
}
|
|
48
|
+
export async function readKeyMap() {
|
|
49
|
+
try {
|
|
50
|
+
const raw = await fs.readFile(keymapPath(), 'utf8');
|
|
51
|
+
return raw
|
|
52
|
+
.split('\n')
|
|
53
|
+
.filter(Boolean)
|
|
54
|
+
.map(line => JSON.parse(line));
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=consent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consent.js","sourceRoot":"","sources":["../../src/trace/consent.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAA;AAE/C,MAAM,CAAC,MAAM,oBAAoB,GAAG,uBAAgC,CAAA;AACpE,qFAAqF;AACrF,MAAM,CAAC,MAAM,mBAAmB,GAAG,YAAY,CAAA;AAuB/C,SAAS,QAAQ;IACf,OAAO,IAAI,CAAC,YAAY,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;AAC/C,CAAC;AAED,SAAS,WAAW;IAClB,OAAO,IAAI,CAAC,QAAQ,EAAE,EAAE,cAAc,CAAC,CAAA;AACzC,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,IAAI,CAAC,QAAQ,EAAE,EAAE,cAAc,CAAC,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,EAAE,MAAM,CAAC,CAAiB,CAAA;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAA;IAClB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,KAGnC;IACC,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAA;IAC3B,MAAM,OAAO,GAAiB;QAC5B,MAAM,EAAE,oBAAoB;QAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,YAAY,EAAE,mBAAmB;QACjC,OAAO,EAAE,IAAI;QACb,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAClD,CAAA;IACD,MAAM,EAAE,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IAClF,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,OAAO,GAAG,MAAM,WAAW,EAAE,CAAA;IACnC,OAAO,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,YAAY,KAAK,mBAAmB,CAAC,CAAA;AACzE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAA+B;IAChE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAM;IAChC,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAA;IAC3B,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC3D,MAAM,EAAE,CAAC,UAAU,CAAC,UAAU,EAAE,EAAE,GAAG,IAAI,IAAI,EAAE,MAAM,CAAC,CAAA;AACxD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,MAAM,CAAC,CAAA;QACnD,OAAO,GAAG;aACP,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,OAAO,CAAC;aACf,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAgB,CAAC,CAAA;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { ConvEvent } from '../schema/conv.js';
|
|
2
|
+
import { type TraceRow } from '../schema/trace.js';
|
|
3
|
+
export interface ExtractOptions {
|
|
4
|
+
/** Injectable id generator for deterministic tests. Defaults to randomUUID. */
|
|
5
|
+
newId?: () => string;
|
|
6
|
+
}
|
|
7
|
+
export interface ExtractResult {
|
|
8
|
+
rows: TraceRow[];
|
|
9
|
+
sessionKey: string;
|
|
10
|
+
sourceSessionId: string;
|
|
11
|
+
harness: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Derive per-turn trace rows from one captured session's events.
|
|
15
|
+
*
|
|
16
|
+
* Each row carries redacted conversation content (prompt, output, reasoning,
|
|
17
|
+
* tool I/O) alongside harness labels, coarse buckets, surrogate keys, and
|
|
18
|
+
* tool-call structure. Every content string passes through `scrub()` first.
|
|
19
|
+
*/
|
|
20
|
+
export declare function extractSession(events: readonly ConvEvent[], opts?: ExtractOptions): ExtractResult;
|
|
21
|
+
/** Convenience: just the rows (used by the redaction-safety test). */
|
|
22
|
+
export declare function extractTraceRows(events: readonly ConvEvent[], opts?: ExtractOptions): TraceRow[];
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { redactText } from '../redactor/index.js';
|
|
3
|
+
import { EFFORT_LEVELS, } from '../schema/trace.js';
|
|
4
|
+
import { TRACE_SCHEMA } from '../version.js';
|
|
5
|
+
const EFFORT_SET = new Set(EFFORT_LEVELS);
|
|
6
|
+
function normalizeEffort(raw) {
|
|
7
|
+
if (raw && EFFORT_SET.has(raw))
|
|
8
|
+
return raw;
|
|
9
|
+
return 'unknown';
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Tool/function names are structural and allowlisted, but defensively cleaned:
|
|
13
|
+
* strip anything that could smuggle a path or argument and cap the length.
|
|
14
|
+
*/
|
|
15
|
+
function sanitizeName(name) {
|
|
16
|
+
return name.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 64);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* The single, load-bearing privacy control: every content string is scrubbed by
|
|
20
|
+
* the tiered secret/PII redactor before it is written to a row. Raw content
|
|
21
|
+
* never reaches a TraceRow.
|
|
22
|
+
*/
|
|
23
|
+
function scrub(text) {
|
|
24
|
+
return redactText(text).text;
|
|
25
|
+
}
|
|
26
|
+
/** Redacted tool result for one tool_use id, taken from the following user message. */
|
|
27
|
+
function resultFor(next, toolUseId) {
|
|
28
|
+
if (!next)
|
|
29
|
+
return {};
|
|
30
|
+
for (const block of next.content) {
|
|
31
|
+
if (block.type === 'tool_result' && block.tool_use_id === toolUseId) {
|
|
32
|
+
const raw = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
33
|
+
return { text: scrub(raw), ...(block.is_error ? { isError: true } : {}) };
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return {};
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Gather the redacted user prompt that triggered an assistant turn: the text of
|
|
40
|
+
* the contiguous run of user messages immediately preceding index `i` (stopping
|
|
41
|
+
* at the previous assistant turn). Tool-result-only user messages contribute
|
|
42
|
+
* nothing here — their payloads belong to `toolIO`.
|
|
43
|
+
*/
|
|
44
|
+
function precedingPrompt(msgs, i) {
|
|
45
|
+
const parts = [];
|
|
46
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
47
|
+
const prev = msgs[j];
|
|
48
|
+
if (!prev || prev.role === 'assistant')
|
|
49
|
+
break;
|
|
50
|
+
if (prev.role !== 'user')
|
|
51
|
+
continue;
|
|
52
|
+
for (const block of prev.content) {
|
|
53
|
+
if (block.type === 'text')
|
|
54
|
+
parts.unshift(block.text);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return scrub(parts.join('\n'));
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Derive per-turn trace rows from one captured session's events.
|
|
61
|
+
*
|
|
62
|
+
* Each row carries redacted conversation content (prompt, output, reasoning,
|
|
63
|
+
* tool I/O) alongside harness labels, coarse buckets, surrogate keys, and
|
|
64
|
+
* tool-call structure. Every content string passes through `scrub()` first.
|
|
65
|
+
*/
|
|
66
|
+
export function extractSession(events, opts = {}) {
|
|
67
|
+
const newId = opts.newId ?? (() => randomUUID());
|
|
68
|
+
const start = events.find((e) => e.type === 'session.start');
|
|
69
|
+
if (!start)
|
|
70
|
+
return { rows: [], sessionKey: '', sourceSessionId: '', harness: '' };
|
|
71
|
+
const sessionKey = newId();
|
|
72
|
+
const harness = start.tool;
|
|
73
|
+
const provider = start.provider ?? 'unknown';
|
|
74
|
+
const model = start.model ?? 'unknown';
|
|
75
|
+
const modelKind = start.modelKind ?? 'unknown';
|
|
76
|
+
const effort = normalizeEffort(start.effort);
|
|
77
|
+
const msgs = events.filter((e) => e.type === 'message');
|
|
78
|
+
const rows = [];
|
|
79
|
+
let turnIndex = 0;
|
|
80
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
81
|
+
const m = msgs[i];
|
|
82
|
+
if (!m || m.role !== 'assistant')
|
|
83
|
+
continue;
|
|
84
|
+
const next = msgs[i + 1];
|
|
85
|
+
const callCounts = new Map();
|
|
86
|
+
const toolIO = [];
|
|
87
|
+
const outputParts = [];
|
|
88
|
+
const reasoningParts = [];
|
|
89
|
+
let toolUseCount = 0;
|
|
90
|
+
for (const block of m.content) {
|
|
91
|
+
if (block.type === 'text') {
|
|
92
|
+
outputParts.push(block.text);
|
|
93
|
+
}
|
|
94
|
+
else if (block.type === 'thinking') {
|
|
95
|
+
reasoningParts.push(block.thinking);
|
|
96
|
+
}
|
|
97
|
+
else if (block.type === 'tool_use') {
|
|
98
|
+
toolUseCount++;
|
|
99
|
+
const key = sanitizeName(block.name);
|
|
100
|
+
callCounts.set(key, (callCounts.get(key) ?? 0) + 1);
|
|
101
|
+
const { text: result, isError } = resultFor(next, block.id);
|
|
102
|
+
toolIO.push({
|
|
103
|
+
name: key,
|
|
104
|
+
input: scrub(JSON.stringify(block.input ?? null)),
|
|
105
|
+
...(result !== undefined ? { result } : {}),
|
|
106
|
+
...(isError ? { isError: true } : {}),
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
const toolCalls = [...callCounts.entries()].map(([name, count]) => ({
|
|
111
|
+
name,
|
|
112
|
+
count,
|
|
113
|
+
}));
|
|
114
|
+
const reasoning = reasoningParts.length ? scrub(reasoningParts.join('\n')) : undefined;
|
|
115
|
+
let outcome = 'unknown';
|
|
116
|
+
let errorClass;
|
|
117
|
+
if (next) {
|
|
118
|
+
let sawResult = false;
|
|
119
|
+
let sawError = false;
|
|
120
|
+
for (const block of next.content) {
|
|
121
|
+
if (block.type === 'tool_result') {
|
|
122
|
+
sawResult = true;
|
|
123
|
+
if (block.is_error)
|
|
124
|
+
sawError = true;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (sawResult) {
|
|
128
|
+
outcome = sawError ? 'error' : 'ok';
|
|
129
|
+
if (sawError)
|
|
130
|
+
errorClass = 'tool_error';
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (toolUseCount === 0 && outcome === 'unknown')
|
|
134
|
+
outcome = 'ok';
|
|
135
|
+
rows.push({
|
|
136
|
+
schema: TRACE_SCHEMA,
|
|
137
|
+
rowId: newId(),
|
|
138
|
+
sessionKey,
|
|
139
|
+
turnIndex,
|
|
140
|
+
provider,
|
|
141
|
+
model,
|
|
142
|
+
modelKind,
|
|
143
|
+
harness,
|
|
144
|
+
...(start.toolVersion ? { harnessVersion: start.toolVersion } : {}),
|
|
145
|
+
effort,
|
|
146
|
+
tokensIn: m.tokens?.in ?? null,
|
|
147
|
+
tokensOut: m.tokens?.out ?? null,
|
|
148
|
+
latencyMs: m.latencyMs ?? null,
|
|
149
|
+
costUsd: m.costUsd ?? null,
|
|
150
|
+
toolCalls,
|
|
151
|
+
toolCallDepth: toolUseCount,
|
|
152
|
+
prompt: precedingPrompt(msgs, i),
|
|
153
|
+
output: scrub(outputParts.join('\n')),
|
|
154
|
+
...(reasoning !== undefined ? { reasoning } : {}),
|
|
155
|
+
toolIO,
|
|
156
|
+
outcome,
|
|
157
|
+
...(errorClass ? { errorClass } : {}),
|
|
158
|
+
ts: m.ts,
|
|
159
|
+
});
|
|
160
|
+
turnIndex++;
|
|
161
|
+
}
|
|
162
|
+
return { rows, sessionKey, sourceSessionId: start.id, harness };
|
|
163
|
+
}
|
|
164
|
+
/** Convenience: just the rows (used by the redaction-safety test). */
|
|
165
|
+
export function extractTraceRows(events, opts = {}) {
|
|
166
|
+
return extractSession(events, opts).rows;
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/trace/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAA;AAEjD,OAAO,EACL,aAAa,GAId,MAAM,oBAAoB,CAAA;AAC3B,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AAiB5C,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,aAAa,CAAC,CAAA;AAEjD,SAAS,eAAe,CAAC,GAAuB;IAC9C,IAAI,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,GAAyB,CAAA;IAChE,OAAO,SAAS,CAAA;AAClB,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,IAAY;IAChC,OAAO,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;AAC3D,CAAC;AAED;;;;GAIG;AACH,SAAS,KAAK,CAAC,IAAY;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC,IAAI,CAAA;AAC9B,CAAC;AAED,uFAAuF;AACvF,SAAS,SAAS,CAChB,IAA6B,EAC7B,SAAiB;IAEjB,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAA;IACpB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QACjC,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACpE,MAAM,GAAG,GAAG,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;YAC7F,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAA;QAC3E,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAA;AACX,CAAC;AAED;;;;;GAKG;AACH,SAAS,eAAe,CAAC,IAA4B,EAAE,CAAS;IAC9D,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW;YAAE,MAAK;QAC7C,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM;YAAE,SAAQ;QAClC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjC,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM;gBAAE,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACtD,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;AAChC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAC5B,MAA4B,EAC5B,OAAuB,EAAE;IAEzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC,UAAU,EAAE,CAAC,CAAA;IAChD,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAyB,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,eAAe,CAAC,CAAA;IACnF,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAA;IAEjF,MAAM,UAAU,GAAG,KAAK,EAAE,CAAA;IAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,IAAI,SAAS,CAAA;IAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,IAAI,SAAS,CAAA;IACtC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,SAAS,CAAA;IAC9C,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;IAE5C,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAoB,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAA;IACzE,MAAM,IAAI,GAAe,EAAE,CAAA;IAC3B,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW;YAAE,SAAQ;QAE1C,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACxB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;QAC5C,MAAM,MAAM,GAAa,EAAE,CAAA;QAC3B,MAAM,WAAW,GAAa,EAAE,CAAA;QAChC,MAAM,cAAc,GAAa,EAAE,CAAA;QACnC,IAAI,YAAY,GAAG,CAAC,CAAA;QAEpB,KAAK,MAAM,KAAK,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;YAC9B,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;YAC9B,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;gBACrC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAA;YACrC,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;gBACrC,YAAY,EAAE,CAAA;gBACd,MAAM,GAAG,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBACpC,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBACnD,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC,CAAA;gBAC3D,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,GAAG;oBACT,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;oBACjD,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC3C,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACtC,CAAC,CAAA;YACJ,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAwB,CAAC,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;YACvF,IAAI;YACJ,KAAK;SACN,CAAC,CAAC,CAAA;QACH,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;QAEtF,IAAI,OAAO,GAAwB,SAAS,CAAA;QAC5C,IAAI,UAA8B,CAAA;QAClC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,SAAS,GAAG,KAAK,CAAA;YACrB,IAAI,QAAQ,GAAG,KAAK,CAAA;YACpB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBACjC,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;oBACjC,SAAS,GAAG,IAAI,CAAA;oBAChB,IAAI,KAAK,CAAC,QAAQ;wBAAE,QAAQ,GAAG,IAAI,CAAA;gBACrC,CAAC;YACH,CAAC;YACD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;gBACnC,IAAI,QAAQ;oBAAE,UAAU,GAAG,YAAY,CAAA;YACzC,CAAC;QACH,CAAC;QACD,IAAI,YAAY,KAAK,CAAC,IAAI,OAAO,KAAK,SAAS;YAAE,OAAO,GAAG,IAAI,CAAA;QAE/D,IAAI,CAAC,IAAI,CAAC;YACR,MAAM,EAAE,YAAY;YACpB,KAAK,EAAE,KAAK,EAAE;YACd,UAAU;YACV,SAAS;YACT,QAAQ;YACR,KAAK;YACL,SAAS;YACT,OAAO;YACP,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnE,MAAM;YACN,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,IAAI,IAAI;YAC9B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,IAAI;YAChC,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI;YAC9B,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,IAAI;YAC1B,SAAS;YACT,aAAa,EAAE,YAAY;YAC3B,MAAM,EAAE,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;YAChC,MAAM,EAAE,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjD,MAAM;YACN,OAAO;YACP,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrC,EAAE,EAAE,CAAC,CAAC,EAAE;SACT,CAAC,CAAA;QACF,SAAS,EAAE,CAAA;IACb,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,KAAK,CAAC,EAAE,EAAE,OAAO,EAAE,CAAA;AACjE,CAAC;AAED,sEAAsE;AACtE,MAAM,UAAU,gBAAgB,CAC9B,MAA4B,EAC5B,OAAuB,EAAE;IAEzB,OAAO,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,IAAI,CAAA;AAC1C,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { type NerHit } from '../redactor/ner.js';
|
|
2
|
+
import type { TraceRow } from '../schema/trace.js';
|
|
3
|
+
/**
|
|
4
|
+
* Optional ML PII tier for the published trace path.
|
|
5
|
+
*
|
|
6
|
+
* The extractor always runs the synchronous secret/entropy/PII-regex tiers, so
|
|
7
|
+
* every content string in a row is already scrubbed of detected secrets and
|
|
8
|
+
* structured PII. This pass adds the ML NER tier on top — catching names,
|
|
9
|
+
* addresses, and other free-form PII the regex tiers miss — and only when the
|
|
10
|
+
* contributor explicitly opts in (`--ml-pii` / `SKEIN_TRACE_ML_PII`).
|
|
11
|
+
*
|
|
12
|
+
* It is a no-op-safe post-pass: if the ML model is unavailable it returns the
|
|
13
|
+
* rows unchanged and reports `available: false`, so callers on the publish path
|
|
14
|
+
* can FAIL CLOSED rather than silently ship under-redacted content.
|
|
15
|
+
*/
|
|
16
|
+
export interface MlPiiOptions {
|
|
17
|
+
/** Minimum model score for a hit to count. Defaults to the NER default (0.5). */
|
|
18
|
+
minScore?: number;
|
|
19
|
+
/** Injectable analyzer (tests). Defaults to the real NER analyzer. */
|
|
20
|
+
analyze?: (text: string) => Promise<NerHit[]>;
|
|
21
|
+
/** Injectable availability probe (tests). Defaults to loading the NER pipeline. */
|
|
22
|
+
ensureAvailable?: () => Promise<boolean>;
|
|
23
|
+
}
|
|
24
|
+
export interface MlPiiResult {
|
|
25
|
+
rows: TraceRow[];
|
|
26
|
+
/** False when the ML model could not be loaded — no ML redaction was applied. */
|
|
27
|
+
available: boolean;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Apply the ML PII tier to every content string of every row. Returns the rows
|
|
31
|
+
* unchanged with `available: false` when the model can't be loaded.
|
|
32
|
+
*/
|
|
33
|
+
export declare function applyMlPiiToRows(rows: readonly TraceRow[], opts?: MlPiiOptions): Promise<MlPiiResult>;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { loadNerPipeline, nerPiiAnalyze, redactNer } from '../redactor/ner.js';
|
|
2
|
+
/**
|
|
3
|
+
* Apply the ML PII tier to every content string of every row. Returns the rows
|
|
4
|
+
* unchanged with `available: false` when the model can't be loaded.
|
|
5
|
+
*/
|
|
6
|
+
export async function applyMlPiiToRows(rows, opts = {}) {
|
|
7
|
+
const analyze = opts.analyze ?? ((text) => nerPiiAnalyze(text, opts.minScore));
|
|
8
|
+
const available = opts.ensureAvailable
|
|
9
|
+
? await opts.ensureAvailable()
|
|
10
|
+
: (await loadNerPipeline()) !== undefined;
|
|
11
|
+
if (!available)
|
|
12
|
+
return { rows: [...rows], available: false };
|
|
13
|
+
const scrub = async (text) => {
|
|
14
|
+
if (!text)
|
|
15
|
+
return text;
|
|
16
|
+
return redactNer(text, await analyze(text)).text;
|
|
17
|
+
};
|
|
18
|
+
const out = [];
|
|
19
|
+
for (const row of rows) {
|
|
20
|
+
const toolIO = await Promise.all(row.toolIO.map(async (io) => ({
|
|
21
|
+
...io,
|
|
22
|
+
input: await scrub(io.input),
|
|
23
|
+
...(io.result !== undefined ? { result: await scrub(io.result) } : {}),
|
|
24
|
+
})));
|
|
25
|
+
out.push({
|
|
26
|
+
...row,
|
|
27
|
+
prompt: await scrub(row.prompt),
|
|
28
|
+
output: await scrub(row.output),
|
|
29
|
+
...(row.reasoning !== undefined ? { reasoning: await scrub(row.reasoning) } : {}),
|
|
30
|
+
toolIO,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
return { rows: out, available: true };
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=ml-pii.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ml-pii.js","sourceRoot":"","sources":["../../src/trace/ml-pii.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAe,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAA;AAgC3F;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAyB,EACzB,OAAqB,EAAE;IAEvB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAA;IACtF,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe;QACpC,CAAC,CAAC,MAAM,IAAI,CAAC,eAAe,EAAE;QAC9B,CAAC,CAAC,CAAC,MAAM,eAAe,EAAE,CAAC,KAAK,SAAS,CAAA;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,EAAE,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAA;IAE5D,MAAM,KAAK,GAAG,KAAK,EAAE,IAAY,EAAmB,EAAE;QACpD,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAA;QACtB,OAAO,SAAS,CAAC,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC,CAAA;IAED,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,EAAE,EAAC,EAAE,CAAC,CAAC;YAC1B,GAAG,EAAE;YACL,KAAK,EAAE,MAAM,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC;YAC5B,GAAG,CAAC,EAAE,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACvE,CAAC,CAAC,CACJ,CAAA;QACD,GAAG,CAAC,IAAI,CAAC;YACP,GAAG,GAAG;YACN,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;YAC/B,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;YAC/B,GAAG,CAAC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjF,MAAM;SACP,CAAC,CAAA;IACJ,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;AACvC,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { type TraceRow } from '../schema/trace.js';
|
|
2
|
+
import { type KeyMapEntry } from './consent.js';
|
|
3
|
+
export { serializeJsonl as serializeRows } from './serialize.js';
|
|
4
|
+
/** The single community traces dataset (redacted conversation content). */
|
|
5
|
+
export declare const TRACE_REPO = "skein-community/coding-agent-traces";
|
|
6
|
+
export declare const TRACE_DATASET_PATH = "data/traces.jsonl";
|
|
7
|
+
/** Status of the optional ML PII tier for a build/push. */
|
|
8
|
+
export interface MlPiiStatus {
|
|
9
|
+
/** The contributor opted in (`--ml-pii` / `SKEIN_TRACE_ML_PII`). */
|
|
10
|
+
requested: boolean;
|
|
11
|
+
/** The ML model loaded and the tier actually ran. False when unavailable. */
|
|
12
|
+
available: boolean;
|
|
13
|
+
}
|
|
14
|
+
export interface TraceBuildResult {
|
|
15
|
+
rows: TraceRow[];
|
|
16
|
+
mappings: KeyMapEntry[];
|
|
17
|
+
sessions: number;
|
|
18
|
+
mlPii: MlPiiStatus;
|
|
19
|
+
}
|
|
20
|
+
export interface TracePushOptions {
|
|
21
|
+
builtAt: string;
|
|
22
|
+
dryRun?: boolean;
|
|
23
|
+
/** Required for a real (non-dry-run) push. */
|
|
24
|
+
confirm?: boolean;
|
|
25
|
+
skipAlreadyPushed?: boolean;
|
|
26
|
+
/** Opt in to the ML PII tier. A real push FAILS CLOSED if it's unavailable. */
|
|
27
|
+
mlPii?: boolean;
|
|
28
|
+
token?: string;
|
|
29
|
+
endpoint?: string;
|
|
30
|
+
private?: boolean;
|
|
31
|
+
}
|
|
32
|
+
export interface TracePushPlan {
|
|
33
|
+
repo: string;
|
|
34
|
+
datasetPath: string;
|
|
35
|
+
format: 'jsonl';
|
|
36
|
+
rows: number;
|
|
37
|
+
sessions: number;
|
|
38
|
+
bytes: number;
|
|
39
|
+
columns: readonly string[];
|
|
40
|
+
skippedSessions: number;
|
|
41
|
+
mlPii: MlPiiStatus;
|
|
42
|
+
committed: boolean;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Build redacted-content trace rows from every captured session. The sync
|
|
46
|
+
* secret/entropy/PII-regex tiers always run at extraction time; the ML PII tier
|
|
47
|
+
* runs here as an opt-in post-pass when `mlPii` is set.
|
|
48
|
+
*/
|
|
49
|
+
export declare function buildTraceRows(opts: {
|
|
50
|
+
builtAt: string;
|
|
51
|
+
mlPii?: boolean;
|
|
52
|
+
}): Promise<TraceBuildResult>;
|
|
53
|
+
/**
|
|
54
|
+
* Plan (and optionally perform) a push of redacted-content traces to the single
|
|
55
|
+
* trace dataset. Content is already scrubbed at extraction time. A real push
|
|
56
|
+
* requires recorded consent AND explicit confirm; dry-run never touches the
|
|
57
|
+
* network.
|
|
58
|
+
*/
|
|
59
|
+
export declare function pushTraces(opts: TracePushOptions): Promise<TracePushPlan>;
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { createReadStream } from 'node:fs';
|
|
2
|
+
import { createInterface } from 'node:readline';
|
|
3
|
+
import { listStoredSessions } from '../conv/store.js';
|
|
4
|
+
import { isPushed, markPushed } from '../conv/search.js';
|
|
5
|
+
import { HfClient } from '../hf/client.js';
|
|
6
|
+
import { TRACE_COLUMNS } from '../schema/trace.js';
|
|
7
|
+
import { appendKeyMap, hasConsent } from './consent.js';
|
|
8
|
+
import { extractSession } from './extract.js';
|
|
9
|
+
import { applyMlPiiToRows } from './ml-pii.js';
|
|
10
|
+
import { serializeJsonl } from './serialize.js';
|
|
11
|
+
// Re-exported for callers (and tests) that import the JSONL serializer from here.
|
|
12
|
+
export { serializeJsonl as serializeRows } from './serialize.js';
|
|
13
|
+
/** The single community traces dataset (redacted conversation content). */
|
|
14
|
+
export const TRACE_REPO = 'skein-community/coding-agent-traces';
|
|
15
|
+
export const TRACE_DATASET_PATH = 'data/traces.jsonl';
|
|
16
|
+
/**
|
|
17
|
+
* Build redacted-content trace rows from every captured session. The sync
|
|
18
|
+
* secret/entropy/PII-regex tiers always run at extraction time; the ML PII tier
|
|
19
|
+
* runs here as an opt-in post-pass when `mlPii` is set.
|
|
20
|
+
*/
|
|
21
|
+
export async function buildTraceRows(opts) {
|
|
22
|
+
const files = await listStoredSessions();
|
|
23
|
+
let rows = [];
|
|
24
|
+
const mappings = [];
|
|
25
|
+
for (const file of files) {
|
|
26
|
+
const events = await readEvents(file);
|
|
27
|
+
const result = extractSession(events);
|
|
28
|
+
if (result.rows.length === 0)
|
|
29
|
+
continue;
|
|
30
|
+
rows.push(...result.rows);
|
|
31
|
+
mappings.push({
|
|
32
|
+
sessionKey: result.sessionKey,
|
|
33
|
+
sourceSessionId: result.sourceSessionId,
|
|
34
|
+
harness: result.harness,
|
|
35
|
+
rowCount: result.rows.length,
|
|
36
|
+
builtAt: opts.builtAt,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
let mlAvailable = false;
|
|
40
|
+
if (opts.mlPii) {
|
|
41
|
+
const applied = await applyMlPiiToRows(rows);
|
|
42
|
+
rows = applied.rows;
|
|
43
|
+
mlAvailable = applied.available;
|
|
44
|
+
}
|
|
45
|
+
return {
|
|
46
|
+
rows,
|
|
47
|
+
mappings,
|
|
48
|
+
sessions: mappings.length,
|
|
49
|
+
mlPii: { requested: Boolean(opts.mlPii), available: mlAvailable },
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Plan (and optionally perform) a push of redacted-content traces to the single
|
|
54
|
+
* trace dataset. Content is already scrubbed at extraction time. A real push
|
|
55
|
+
* requires recorded consent AND explicit confirm; dry-run never touches the
|
|
56
|
+
* network.
|
|
57
|
+
*/
|
|
58
|
+
export async function pushTraces(opts) {
|
|
59
|
+
const build = await buildTraceRows({ builtAt: opts.builtAt, mlPii: Boolean(opts.mlPii) });
|
|
60
|
+
let included = build.mappings;
|
|
61
|
+
let skippedSessions = 0;
|
|
62
|
+
if (opts.skipAlreadyPushed) {
|
|
63
|
+
const kept = [];
|
|
64
|
+
for (const m of build.mappings) {
|
|
65
|
+
if (await isPushed(m.sourceSessionId, TRACE_REPO))
|
|
66
|
+
skippedSessions++;
|
|
67
|
+
else
|
|
68
|
+
kept.push(m);
|
|
69
|
+
}
|
|
70
|
+
included = kept;
|
|
71
|
+
}
|
|
72
|
+
const includedKeys = new Set(included.map(m => m.sessionKey));
|
|
73
|
+
const rows = build.rows.filter(r => includedKeys.has(r.sessionKey));
|
|
74
|
+
const content = serializeJsonl(rows);
|
|
75
|
+
const plan = {
|
|
76
|
+
repo: TRACE_REPO,
|
|
77
|
+
datasetPath: TRACE_DATASET_PATH,
|
|
78
|
+
format: 'jsonl',
|
|
79
|
+
rows: rows.length,
|
|
80
|
+
sessions: included.length,
|
|
81
|
+
bytes: content.byteLength,
|
|
82
|
+
columns: TRACE_COLUMNS,
|
|
83
|
+
skippedSessions,
|
|
84
|
+
mlPii: build.mlPii,
|
|
85
|
+
committed: false,
|
|
86
|
+
};
|
|
87
|
+
if (opts.dryRun)
|
|
88
|
+
return plan;
|
|
89
|
+
if (opts.mlPii && !build.mlPii.available) {
|
|
90
|
+
throw new Error('ML PII redaction was requested (--ml-pii) but the model is unavailable; ' +
|
|
91
|
+
'refusing to publish potentially under-redacted content. Install the optional ' +
|
|
92
|
+
'`@huggingface/transformers` dependency (and allow the model to download) or retry ' +
|
|
93
|
+
'without --ml-pii.');
|
|
94
|
+
}
|
|
95
|
+
if (!(await hasConsent())) {
|
|
96
|
+
throw new Error('No recorded consent. Run `skein trace consent --accept` before pushing.');
|
|
97
|
+
}
|
|
98
|
+
if (!opts.confirm) {
|
|
99
|
+
throw new Error('Refusing to push without explicit confirmation (pass --yes).');
|
|
100
|
+
}
|
|
101
|
+
const token = opts.token ?? process.env['HF_TOKEN'] ?? process.env['HUGGINGFACE_TOKEN'] ?? '';
|
|
102
|
+
if (!token)
|
|
103
|
+
throw new Error('HF_TOKEN env var is required for a real push.');
|
|
104
|
+
const client = new HfClient(token, opts.endpoint);
|
|
105
|
+
const [organization, name] = TRACE_REPO.split('/');
|
|
106
|
+
if (!organization || !name)
|
|
107
|
+
throw new Error(`Invalid trace repo id: ${TRACE_REPO}`);
|
|
108
|
+
await client.ensureDataset({
|
|
109
|
+
organization,
|
|
110
|
+
name,
|
|
111
|
+
type: 'dataset',
|
|
112
|
+
private: opts.private ?? false,
|
|
113
|
+
});
|
|
114
|
+
await client.commitFiles({
|
|
115
|
+
repoId: TRACE_REPO,
|
|
116
|
+
summary: `skein: ${rows.length} trace rows from ${included.length} sessions`,
|
|
117
|
+
description: 'Redacted-content traces pushed via `skein trace push`.',
|
|
118
|
+
files: [{ path: TRACE_DATASET_PATH, content }],
|
|
119
|
+
});
|
|
120
|
+
await appendKeyMap(included);
|
|
121
|
+
for (const m of included)
|
|
122
|
+
await markPushed(m.sourceSessionId, TRACE_REPO, content.byteLength);
|
|
123
|
+
return { ...plan, committed: true };
|
|
124
|
+
}
|
|
125
|
+
async function readEvents(file) {
|
|
126
|
+
const out = [];
|
|
127
|
+
const stream = createReadStream(file, { encoding: 'utf8' });
|
|
128
|
+
const rl = createInterface({ input: stream, crlfDelay: Number.POSITIVE_INFINITY });
|
|
129
|
+
for await (const line of rl) {
|
|
130
|
+
if (!line.trim())
|
|
131
|
+
continue;
|
|
132
|
+
try {
|
|
133
|
+
out.push(JSON.parse(line));
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
// skip malformed lines
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return out;
|
|
140
|
+
}
|
|
141
|
+
//# sourceMappingURL=push.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"push.js","sourceRoot":"","sources":["../../src/trace/push.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AACrD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AACxD,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAA;AAE1C,OAAO,EAAE,aAAa,EAAiB,MAAM,oBAAoB,CAAA;AACjE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAA;AACzE,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAA;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAE/C,kFAAkF;AAClF,OAAO,EAAE,cAAc,IAAI,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEhE,2EAA2E;AAC3E,MAAM,CAAC,MAAM,UAAU,GAAG,qCAAqC,CAAA;AAC/D,MAAM,CAAC,MAAM,kBAAkB,GAAG,mBAAmB,CAAA;AA2CrD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAGpC;IACC,MAAM,KAAK,GAAG,MAAM,kBAAkB,EAAE,CAAA;IACxC,IAAI,IAAI,GAAe,EAAE,CAAA;IACzB,MAAM,QAAQ,GAAkB,EAAE,CAAA;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAA;QACrC,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QACtC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,CAAA;QACzB,QAAQ,CAAC,IAAI,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,eAAe,EAAE,MAAM,CAAC,eAAe;YACvC,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;YAC5B,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAA;IACJ,CAAC;IAED,IAAI,WAAW,GAAG,KAAK,CAAA;IACvB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,GAAG,OAAO,CAAC,IAAI,CAAA;QACnB,WAAW,GAAG,OAAO,CAAC,SAAS,CAAA;IACjC,CAAC;IAED,OAAO;QACL,IAAI;QACJ,QAAQ;QACR,QAAQ,EAAE,QAAQ,CAAC,MAAM;QACzB,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,WAAW,EAAE;KAClE,CAAA;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAsB;IACrD,MAAM,KAAK,GAAG,MAAM,cAAc,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;IAEzF,IAAI,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAA;IAC7B,IAAI,eAAe,GAAG,CAAC,CAAA;IACvB,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAkB,EAAE,CAAA;QAC9B,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YAC/B,IAAI,MAAM,QAAQ,CAAC,CAAC,CAAC,eAAe,EAAE,UAAU,CAAC;gBAAE,eAAe,EAAE,CAAA;;gBAC/D,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACnB,CAAC;QACD,QAAQ,GAAG,IAAI,CAAA;IACjB,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAA;IAC7D,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAA;IACnE,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;IAEpC,MAAM,IAAI,GAAkB;QAC1B,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,kBAAkB;QAC/B,MAAM,EAAE,OAAO;QACf,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,QAAQ,EAAE,QAAQ,CAAC,MAAM;QACzB,KAAK,EAAE,OAAO,CAAC,UAAU;QACzB,OAAO,EAAE,aAAa;QACtB,eAAe;QACf,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,SAAS,EAAE,KAAK;KACjB,CAAA;IAED,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IAE5B,IAAI,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;QACzC,MAAM,IAAI,KAAK,CACb,0EAA0E;YACxE,+EAA+E;YAC/E,oFAAoF;YACpF,mBAAmB,CACtB,CAAA;IACH,CAAC;IACD,IAAI,CAAC,CAAC,MAAM,UAAU,EAAE,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,yEAAyE,CAAC,CAAA;IAC5F,CAAC;IACD,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,8DAA8D,CAAC,CAAA;IACjF,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,IAAI,EAAE,CAAA;IAC7F,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;IAE5E,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAA;IACjD,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAClD,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,UAAU,EAAE,CAAC,CAAA;IACnF,MAAM,MAAM,CAAC,aAAa,CAAC;QACzB,YAAY;QACZ,IAAI;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,KAAK;KAC/B,CAAC,CAAA;IACF,MAAM,MAAM,CAAC,WAAW,CAAC;QACvB,MAAM,EAAE,UAAU;QAClB,OAAO,EAAE,UAAU,IAAI,CAAC,MAAM,oBAAoB,QAAQ,CAAC,MAAM,WAAW;QAC5E,WAAW,EAAE,wDAAwD;QACrE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,kBAAkB,EAAE,OAAO,EAAE,CAAC;KAC/C,CAAC,CAAA;IACF,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAA;IAC5B,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,MAAM,UAAU,CAAC,CAAC,CAAC,eAAe,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC,CAAA;IAE7F,OAAO,EAAE,GAAG,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;AACrC,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,IAAY;IACpC,MAAM,GAAG,GAAgB,EAAE,CAAA;IAC3B,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;IAC3D,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAA;IAClF,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;QAC5B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAQ;QAC1B,IAAI,CAAC;YACH,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAc,CAAC,CAAA;QACzC,CAAC;QAAC,MAAM,CAAC;YACP,uBAAuB;QACzB,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
import { type TraceRow } from '../schema/trace.js';
|
|
3
|
+
/** Output formats for `skein trace export`. */
|
|
4
|
+
export type TraceFormat = 'jsonl' | 'json' | 'csv';
|
|
5
|
+
export declare const TRACE_FORMATS: readonly TraceFormat[];
|
|
6
|
+
/** One JSON object per line. The Hub auto-generates the Parquet view from this. */
|
|
7
|
+
export declare function serializeJsonl(rows: readonly TraceRow[]): Buffer;
|
|
8
|
+
/** A single, pretty-printed JSON array. */
|
|
9
|
+
export declare function serializeJson(rows: readonly TraceRow[]): Buffer;
|
|
10
|
+
/**
|
|
11
|
+
* Flat CSV with one header row (the allowlisted columns, in order) and one line
|
|
12
|
+
* per trace row. Nested columns (`toolCalls`, `toolIO`) are encoded as JSON
|
|
13
|
+
* strings inside their cell; absent optional fields become empty cells. Cells
|
|
14
|
+
* are RFC-4180 quoted when they contain a comma, quote, CR, or LF.
|
|
15
|
+
*/
|
|
16
|
+
export declare function serializeCsv(rows: readonly TraceRow[]): Buffer;
|
|
17
|
+
/** Serialize rows in the requested format. */
|
|
18
|
+
export declare function serializeTraces(rows: readonly TraceRow[], format: TraceFormat): Buffer;
|
|
19
|
+
/**
|
|
20
|
+
* Resolve the output format: an explicit `--format` wins (validated); otherwise
|
|
21
|
+
* infer from the `--out` file extension; otherwise default to `jsonl`.
|
|
22
|
+
*/
|
|
23
|
+
export declare function resolveTraceFormat(explicit: string | undefined, outPath?: string): TraceFormat;
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
import { TRACE_COLUMNS } from '../schema/trace.js';
|
|
3
|
+
export const TRACE_FORMATS = ['jsonl', 'json', 'csv'];
|
|
4
|
+
/** One JSON object per line. The Hub auto-generates the Parquet view from this. */
|
|
5
|
+
export function serializeJsonl(rows) {
|
|
6
|
+
if (rows.length === 0)
|
|
7
|
+
return Buffer.from('', 'utf8');
|
|
8
|
+
return Buffer.from(`${rows.map(r => JSON.stringify(r)).join('\n')}\n`, 'utf8');
|
|
9
|
+
}
|
|
10
|
+
/** A single, pretty-printed JSON array. */
|
|
11
|
+
export function serializeJson(rows) {
|
|
12
|
+
return Buffer.from(`${JSON.stringify(rows, null, 2)}\n`, 'utf8');
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Flat CSV with one header row (the allowlisted columns, in order) and one line
|
|
16
|
+
* per trace row. Nested columns (`toolCalls`, `toolIO`) are encoded as JSON
|
|
17
|
+
* strings inside their cell; absent optional fields become empty cells. Cells
|
|
18
|
+
* are RFC-4180 quoted when they contain a comma, quote, CR, or LF.
|
|
19
|
+
*/
|
|
20
|
+
export function serializeCsv(rows) {
|
|
21
|
+
const header = TRACE_COLUMNS.join(',');
|
|
22
|
+
const lines = rows.map(row => TRACE_COLUMNS.map(col => csvCell(row[col])).join(','));
|
|
23
|
+
return Buffer.from(`${[header, ...lines].join('\n')}\n`, 'utf8');
|
|
24
|
+
}
|
|
25
|
+
/** Serialize rows in the requested format. */
|
|
26
|
+
export function serializeTraces(rows, format) {
|
|
27
|
+
switch (format) {
|
|
28
|
+
case 'json':
|
|
29
|
+
return serializeJson(rows);
|
|
30
|
+
case 'csv':
|
|
31
|
+
return serializeCsv(rows);
|
|
32
|
+
default:
|
|
33
|
+
return serializeJsonl(rows);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Resolve the output format: an explicit `--format` wins (validated); otherwise
|
|
38
|
+
* infer from the `--out` file extension; otherwise default to `jsonl`.
|
|
39
|
+
*/
|
|
40
|
+
export function resolveTraceFormat(explicit, outPath) {
|
|
41
|
+
if (explicit) {
|
|
42
|
+
const lower = explicit.toLowerCase();
|
|
43
|
+
if (!TRACE_FORMATS.includes(lower)) {
|
|
44
|
+
throw new Error(`Unknown --format "${explicit}". Use one of: ${TRACE_FORMATS.join(', ')}.`);
|
|
45
|
+
}
|
|
46
|
+
return lower;
|
|
47
|
+
}
|
|
48
|
+
if (outPath) {
|
|
49
|
+
const ext = outPath.slice(outPath.lastIndexOf('.') + 1).toLowerCase();
|
|
50
|
+
if (ext === 'json')
|
|
51
|
+
return 'json';
|
|
52
|
+
if (ext === 'csv')
|
|
53
|
+
return 'csv';
|
|
54
|
+
if (ext === 'jsonl' || ext === 'ndjson')
|
|
55
|
+
return 'jsonl';
|
|
56
|
+
}
|
|
57
|
+
return 'jsonl';
|
|
58
|
+
}
|
|
59
|
+
function csvCell(value) {
|
|
60
|
+
if (value === undefined || value === null)
|
|
61
|
+
return '';
|
|
62
|
+
const raw = typeof value === 'object' ? JSON.stringify(value) : String(value);
|
|
63
|
+
if (/[",\r\n]/.test(raw))
|
|
64
|
+
return `"${raw.replace(/"/g, '""')}"`;
|
|
65
|
+
return raw;
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=serialize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"serialize.js","sourceRoot":"","sources":["../../src/trace/serialize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAA;AACpC,OAAO,EAAE,aAAa,EAAiB,MAAM,oBAAoB,CAAA;AAIjE,MAAM,CAAC,MAAM,aAAa,GAA2B,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAA;AAE7E,mFAAmF;AACnF,MAAM,UAAU,cAAc,CAAC,IAAyB;IACtD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,MAAM,CAAC,CAAA;IACrD,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;AAChF,CAAC;AAED,2CAA2C;AAC3C,MAAM,UAAU,aAAa,CAAC,IAAyB;IACrD,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;AAClE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAyB;IACpD,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAC3B,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,CAAE,GAA+B,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CACnF,CAAA;IACD,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;AAClE,CAAC;AAED,8CAA8C;AAC9C,MAAM,UAAU,eAAe,CAAC,IAAyB,EAAE,MAAmB;IAC5E,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,MAAM;YACT,OAAO,aAAa,CAAC,IAAI,CAAC,CAAA;QAC5B,KAAK,KAAK;YACR,OAAO,YAAY,CAAC,IAAI,CAAC,CAAA;QAC3B;YACE,OAAO,cAAc,CAAC,IAAI,CAAC,CAAA;IAC/B,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,QAA4B,EAAE,OAAgB;IAC/E,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAA;QACpC,IAAI,CAAE,aAAmC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1D,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,kBAAkB,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAC7F,CAAC;QACD,OAAO,KAAoB,CAAA;IAC7B,CAAC;IACD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;QACrE,IAAI,GAAG,KAAK,MAAM;YAAE,OAAO,MAAM,CAAA;QACjC,IAAI,GAAG,KAAK,KAAK;YAAE,OAAO,KAAK,CAAA;QAC/B,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,QAAQ;YAAE,OAAO,OAAO,CAAA;IACzD,CAAC;IACD,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,SAAS,OAAO,CAAC,KAAc;IAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,EAAE,CAAA;IACpD,MAAM,GAAG,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;IAC7E,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAA;IAC/D,OAAO,GAAG,CAAA;AACZ,CAAC"}
|