ccsniff 1.0.28 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/package.json +1 -1
- package/src/cli.js +17 -2
- package/src/index.cjs +32 -1
- package/src/index.js +31 -0
- package/src/unsloth.js +109 -0
package/README.md
CHANGED
|
@@ -40,8 +40,33 @@ npx ccsniff --since 24h --grep "rs-exec" --limit 50
|
|
|
40
40
|
npx ccsniff --since 7d --role user --json
|
|
41
41
|
npx ccsniff -f # tail new events live
|
|
42
42
|
npx ccsniff --rollup out.ndjson --since 7d
|
|
43
|
+
npx ccsniff --unsloth train.jsonl --since 7d --no-subagents
|
|
44
|
+
npx ccsniff --unsloth train.jsonl --unsloth-format sharegpt --since 7d
|
|
43
45
|
```
|
|
44
46
|
|
|
47
|
+
### Unsloth training export
|
|
48
|
+
|
|
49
|
+
`--unsloth <out>` writes one JSONL line per Claude Code session, ready for
|
|
50
|
+
Unsloth / TRL conversational fine-tuning. All filter flags (`--since`,
|
|
51
|
+
`--project`, `--session`, `--no-subagents`, ...) apply.
|
|
52
|
+
|
|
53
|
+
Two formats are supported via `--unsloth-format`:
|
|
54
|
+
|
|
55
|
+
- `messages` (default) — OpenAI / ChatML shape with native tool calling:
|
|
56
|
+
```json
|
|
57
|
+
{"session_id":"...","messages":[
|
|
58
|
+
{"role":"user","content":"find foobar"},
|
|
59
|
+
{"role":"assistant","content":null,"tool_calls":[{"id":"tu1","type":"function","function":{"name":"Grep","arguments":"{\"pattern\":\"foobar\"}"}}]},
|
|
60
|
+
{"role":"tool","tool_call_id":"tu1","content":"hit at line 3"},
|
|
61
|
+
{"role":"assistant","content":"done"}
|
|
62
|
+
]}
|
|
63
|
+
```
|
|
64
|
+
- `sharegpt` — `{conversations:[{from:human|gpt|tool, value}]}`, compatible
|
|
65
|
+
with `standardize_sharegpt`. Tool calls are inlined into the `gpt` turn as
|
|
66
|
+
`<tool_call>name(json-args)</tool_call>`.
|
|
67
|
+
|
|
68
|
+
Sessions with no user/assistant turn pair are skipped (no training value).
|
|
69
|
+
|
|
45
70
|
## API
|
|
46
71
|
|
|
47
72
|
### `watch(projectsDir?)` → `JsonlWatcher`
|
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { JsonlReplayer, rollup } from './index.js';
|
|
2
|
+
import { JsonlReplayer, rollup, vault } from './index.js';
|
|
3
|
+
import { toUnslothMessages, toShareGPT } from './unsloth.js';
|
|
4
|
+
import fs from 'fs';
|
|
3
5
|
import path from 'path';
|
|
4
6
|
|
|
5
7
|
if (process.argv[2] === 'gui') {
|
|
@@ -22,8 +24,10 @@ if (process.argv[2] === 'gui') {
|
|
|
22
24
|
process.stdin.resume();
|
|
23
25
|
} else {
|
|
24
26
|
|
|
27
|
+
{ const r = vault(); if (r.copied > 0) process.stderr.write(`# vault: ${r.copied} copied → ~/.claude/history-backup\n`); }
|
|
28
|
+
|
|
25
29
|
const FLAGS = {
|
|
26
|
-
string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort'],
|
|
30
|
+
string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort', 'unsloth', 'unsloth-format'],
|
|
27
31
|
multi: ['grep', 'igrep', 'role', 'type', 'tool', 'session', 'sid', 'project', 'cwd'],
|
|
28
32
|
number: ['limit', 'head', 'tail-n', 'ctx', 'truncate'],
|
|
29
33
|
bool: ['json', 'ndjson', 'tail', 'f', 'full', 'reverse', 'invert', 'no-subagents', 'only-subagents', 'no-meta', 'only-meta', 'list-sessions', 'list-projects', 'list-tools', 'stats', 'count', 'help', 'h'],
|
|
@@ -97,6 +101,8 @@ OUTPUT
|
|
|
97
101
|
-f, --tail live tail after replay
|
|
98
102
|
--rollup <out> dump filtered events to file
|
|
99
103
|
--format ndjson|sqlite rollup format (default ndjson; sqlite needs better-sqlite3)
|
|
104
|
+
--unsloth <out> write Unsloth training JSONL (one conversation per session per line)
|
|
105
|
+
--unsloth-format <fmt> messages (OpenAI/ChatML, default) | sharegpt
|
|
100
106
|
|
|
101
107
|
EXAMPLES
|
|
102
108
|
ccsniff --since 24h --grep "rs-exec" --limit 50
|
|
@@ -358,6 +364,15 @@ if (opts.count) {
|
|
|
358
364
|
process.exit(0);
|
|
359
365
|
}
|
|
360
366
|
|
|
367
|
+
if (opts.unsloth) {
|
|
368
|
+
const fmt = opts['unsloth-format'] || 'messages';
|
|
369
|
+
const recs = fmt === 'sharegpt' ? toShareGPT(rows) : toUnslothMessages(rows);
|
|
370
|
+
const body = recs.map(r => JSON.stringify(r)).join('\n') + (recs.length ? '\n' : '');
|
|
371
|
+
fs.writeFileSync(opts.unsloth, body);
|
|
372
|
+
process.stderr.write(`# unsloth(${fmt}): ${recs.length} conversations → ${opts.unsloth}\n`);
|
|
373
|
+
process.exit(0);
|
|
374
|
+
}
|
|
375
|
+
|
|
361
376
|
for (const ev of rows) process.stdout.write(formatRow(ev, opts));
|
|
362
377
|
process.stderr.write(`# ${stats.events} events / ${stats.files} files / ${rows.length} matched\n`);
|
|
363
378
|
|
package/src/index.cjs
CHANGED
|
@@ -228,6 +228,37 @@ function replay(projectsDir, opts) {
|
|
|
228
228
|
return new JsonlReplayer(projectsDir);
|
|
229
229
|
}
|
|
230
230
|
|
|
231
|
+
function vault({ projectsDir = DEFAULT_DIR, destDir = path.join(os.homedir(), '.claude', 'history-backup') } = {}) {
|
|
232
|
+
if (!fs.existsSync(projectsDir)) return { copied: 0, skipped: 0 };
|
|
233
|
+
let copied = 0, skipped = 0;
|
|
234
|
+
const walk = (src, depth) => {
|
|
235
|
+
if (depth > 5) return;
|
|
236
|
+
let entries;
|
|
237
|
+
try { entries = fs.readdirSync(src, { withFileTypes: true }); } catch { return; }
|
|
238
|
+
for (const d of entries) {
|
|
239
|
+
const srcPath = path.join(src, d.name);
|
|
240
|
+
const rel = path.relative(projectsDir, srcPath);
|
|
241
|
+
const dstPath = path.join(destDir, rel);
|
|
242
|
+
if (d.isDirectory()) { walk(srcPath, depth + 1); continue; }
|
|
243
|
+
if (!d.name.endsWith('.jsonl')) continue;
|
|
244
|
+
let srcStat;
|
|
245
|
+
try { srcStat = fs.statSync(srcPath); } catch { continue; }
|
|
246
|
+
try {
|
|
247
|
+
const dstStat = fs.statSync(dstPath);
|
|
248
|
+
if (dstStat.size === srcStat.size && dstStat.mtimeMs >= srcStat.mtimeMs) { skipped++; continue; }
|
|
249
|
+
} catch {}
|
|
250
|
+
try {
|
|
251
|
+
fs.mkdirSync(path.dirname(dstPath), { recursive: true });
|
|
252
|
+
fs.copyFileSync(srcPath, dstPath);
|
|
253
|
+
fs.utimesSync(dstPath, srcStat.atime, srcStat.mtime);
|
|
254
|
+
copied++;
|
|
255
|
+
} catch {}
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
walk(projectsDir, 0);
|
|
259
|
+
return { copied, skipped };
|
|
260
|
+
}
|
|
261
|
+
|
|
231
262
|
async function rollup({ projectsDir, since = 0, out, format = 'ndjson' } = {}) {
|
|
232
263
|
if (!out) throw new Error('rollup: out path required');
|
|
233
264
|
const r = new JsonlReplayer(projectsDir);
|
|
@@ -299,4 +330,4 @@ async function rollupSqlite(r, { since, out }) {
|
|
|
299
330
|
return { ...stats, rows, format: 'sqlite', out };
|
|
300
331
|
}
|
|
301
332
|
|
|
302
|
-
module.exports = { JsonlWatcher, JsonlReplayer, watch, replay, rollup };
|
|
333
|
+
module.exports = { JsonlWatcher, JsonlReplayer, watch, replay, rollup, vault };
|
package/src/index.js
CHANGED
|
@@ -234,6 +234,37 @@ export function replay(projectsDir, opts) {
|
|
|
234
234
|
return new JsonlReplayer(projectsDir);
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
+
export function vault({ projectsDir = DEFAULT_DIR, destDir = path.join(os.homedir(), '.claude', 'history-backup') } = {}) {
|
|
238
|
+
if (!fs.existsSync(projectsDir)) return { copied: 0, skipped: 0 };
|
|
239
|
+
let copied = 0, skipped = 0;
|
|
240
|
+
const walk = (src, depth) => {
|
|
241
|
+
if (depth > 5) return;
|
|
242
|
+
let entries;
|
|
243
|
+
try { entries = fs.readdirSync(src, { withFileTypes: true }); } catch { return; }
|
|
244
|
+
for (const d of entries) {
|
|
245
|
+
const srcPath = path.join(src, d.name);
|
|
246
|
+
const rel = path.relative(projectsDir, srcPath);
|
|
247
|
+
const dstPath = path.join(destDir, rel);
|
|
248
|
+
if (d.isDirectory()) { walk(srcPath, depth + 1); continue; }
|
|
249
|
+
if (!d.name.endsWith('.jsonl')) continue;
|
|
250
|
+
let srcStat;
|
|
251
|
+
try { srcStat = fs.statSync(srcPath); } catch { continue; }
|
|
252
|
+
try {
|
|
253
|
+
const dstStat = fs.statSync(dstPath);
|
|
254
|
+
if (dstStat.size === srcStat.size && dstStat.mtimeMs >= srcStat.mtimeMs) { skipped++; continue; }
|
|
255
|
+
} catch {}
|
|
256
|
+
try {
|
|
257
|
+
fs.mkdirSync(path.dirname(dstPath), { recursive: true });
|
|
258
|
+
fs.copyFileSync(srcPath, dstPath);
|
|
259
|
+
fs.utimesSync(dstPath, srcStat.atime, srcStat.mtime);
|
|
260
|
+
copied++;
|
|
261
|
+
} catch {}
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
walk(projectsDir, 0);
|
|
265
|
+
return { copied, skipped };
|
|
266
|
+
}
|
|
267
|
+
|
|
237
268
|
export async function rollup({ projectsDir, since = 0, out, format = 'ndjson' } = {}) {
|
|
238
269
|
if (!out) throw new Error('rollup: out path required');
|
|
239
270
|
const r = new JsonlReplayer(projectsDir);
|
package/src/unsloth.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
function textOf(b) {
|
|
2
|
+
if (typeof b.text === 'string') return b.text;
|
|
3
|
+
if (typeof b.content === 'string') return b.content;
|
|
4
|
+
if (Array.isArray(b.content)) return b.content.map(c => c?.text || '').join('');
|
|
5
|
+
return '';
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function groupBySession(events) {
|
|
9
|
+
const m = new Map();
|
|
10
|
+
for (const ev of events) {
|
|
11
|
+
const sid = ev.conversation?.id || 'unknown';
|
|
12
|
+
if (!m.has(sid)) m.set(sid, []);
|
|
13
|
+
m.get(sid).push(ev);
|
|
14
|
+
}
|
|
15
|
+
for (const arr of m.values()) arr.sort((a, b) => (a.timestamp || 0) - (b.timestamp || 0));
|
|
16
|
+
return m;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function buildMessagesForSession(evs) {
|
|
20
|
+
const messages = [];
|
|
21
|
+
let cur = null;
|
|
22
|
+
const flush = () => { if (cur && (cur.content || (cur.tool_calls && cur.tool_calls.length))) messages.push(cur); cur = null; };
|
|
23
|
+
for (const ev of evs) {
|
|
24
|
+
const b = ev.block || {};
|
|
25
|
+
const t = b.type;
|
|
26
|
+
if (ev.role === 'user') {
|
|
27
|
+
if (t === 'tool_result') {
|
|
28
|
+
flush();
|
|
29
|
+
const txt = textOf(b);
|
|
30
|
+
messages.push({ role: 'tool', tool_call_id: b.tool_use_id || '', content: txt });
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (t === 'text' && !b.isMeta) {
|
|
34
|
+
const txt = textOf(b);
|
|
35
|
+
if (!txt.trim()) continue;
|
|
36
|
+
if (cur && cur.role === 'user') cur.content += '\n' + txt;
|
|
37
|
+
else { flush(); cur = { role: 'user', content: txt }; }
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (ev.role === 'assistant') {
|
|
42
|
+
if (!cur || cur.role !== 'assistant') { flush(); cur = { role: 'assistant', content: '' }; }
|
|
43
|
+
if (t === 'text') {
|
|
44
|
+
const txt = textOf(b);
|
|
45
|
+
if (txt) cur.content = cur.content ? cur.content + '\n' + txt : txt;
|
|
46
|
+
} else if (t === 'thinking') {
|
|
47
|
+
continue;
|
|
48
|
+
} else if (t === 'tool_use') {
|
|
49
|
+
if (!cur.tool_calls) cur.tool_calls = [];
|
|
50
|
+
cur.tool_calls.push({
|
|
51
|
+
id: b.id || '',
|
|
52
|
+
type: 'function',
|
|
53
|
+
function: { name: b.name || '', arguments: JSON.stringify(b.input || {}) },
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
flush();
|
|
60
|
+
for (const m of messages) {
|
|
61
|
+
if (m.role === 'assistant' && m.tool_calls && !m.content) m.content = null;
|
|
62
|
+
}
|
|
63
|
+
return messages;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function hasTrainingValue(messages) {
|
|
67
|
+
let hasUser = false, hasAsst = false;
|
|
68
|
+
for (const m of messages) {
|
|
69
|
+
if (m.role === 'user') hasUser = true;
|
|
70
|
+
if (m.role === 'assistant') hasAsst = true;
|
|
71
|
+
}
|
|
72
|
+
return hasUser && hasAsst;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function toUnslothMessages(events) {
|
|
76
|
+
const sessions = groupBySession(events);
|
|
77
|
+
const out = [];
|
|
78
|
+
for (const [sid, evs] of sessions) {
|
|
79
|
+
const messages = buildMessagesForSession(evs);
|
|
80
|
+
if (!hasTrainingValue(messages)) continue;
|
|
81
|
+
out.push({ session_id: sid, messages });
|
|
82
|
+
}
|
|
83
|
+
return out;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function toShareGPT(events) {
|
|
87
|
+
const sessions = groupBySession(events);
|
|
88
|
+
const out = [];
|
|
89
|
+
for (const [sid, evs] of sessions) {
|
|
90
|
+
const messages = buildMessagesForSession(evs);
|
|
91
|
+
if (!hasTrainingValue(messages)) continue;
|
|
92
|
+
const conversations = [];
|
|
93
|
+
for (const m of messages) {
|
|
94
|
+
if (m.role === 'user') conversations.push({ from: 'human', value: m.content });
|
|
95
|
+
else if (m.role === 'assistant') {
|
|
96
|
+
let v = m.content || '';
|
|
97
|
+
if (m.tool_calls && m.tool_calls.length) {
|
|
98
|
+
const calls = m.tool_calls.map(c => `<tool_call>${c.function.name}(${c.function.arguments})</tool_call>`).join('\n');
|
|
99
|
+
v = v ? v + '\n' + calls : calls;
|
|
100
|
+
}
|
|
101
|
+
conversations.push({ from: 'gpt', value: v });
|
|
102
|
+
} else if (m.role === 'tool') {
|
|
103
|
+
conversations.push({ from: 'tool', value: m.content });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
out.push({ session_id: sid, conversations });
|
|
107
|
+
}
|
|
108
|
+
return out;
|
|
109
|
+
}
|