ccsniff 1.0.29 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/package.json +1 -1
- package/src/cli.js +14 -1
- package/src/unsloth.js +109 -0
package/README.md
CHANGED
|
@@ -40,8 +40,33 @@ npx ccsniff --since 24h --grep "rs-exec" --limit 50
|
|
|
40
40
|
npx ccsniff --since 7d --role user --json
|
|
41
41
|
npx ccsniff -f # tail new events live
|
|
42
42
|
npx ccsniff --rollup out.ndjson --since 7d
|
|
43
|
+
npx ccsniff --unsloth train.jsonl --since 7d --no-subagents
|
|
44
|
+
npx ccsniff --unsloth train.jsonl --unsloth-format sharegpt --since 7d
|
|
43
45
|
```
|
|
44
46
|
|
|
47
|
+
### Unsloth training export
|
|
48
|
+
|
|
49
|
+
`--unsloth <out>` writes one JSONL line per Claude Code session, ready for
|
|
50
|
+
Unsloth / TRL conversational fine-tuning. All filter flags (`--since`,
|
|
51
|
+
`--project`, `--session`, `--no-subagents`, ...) apply.
|
|
52
|
+
|
|
53
|
+
Two formats are supported via `--unsloth-format`:
|
|
54
|
+
|
|
55
|
+
- `messages` (default) — OpenAI / ChatML shape with native tool calling:
|
|
56
|
+
```json
|
|
57
|
+
{"session_id":"...","messages":[
|
|
58
|
+
{"role":"user","content":"find foobar"},
|
|
59
|
+
{"role":"assistant","content":null,"tool_calls":[{"id":"tu1","type":"function","function":{"name":"Grep","arguments":"{\"pattern\":\"foobar\"}"}}]},
|
|
60
|
+
{"role":"tool","tool_call_id":"tu1","content":"hit at line 3"},
|
|
61
|
+
{"role":"assistant","content":"done"}
|
|
62
|
+
]}
|
|
63
|
+
```
|
|
64
|
+
- `sharegpt` — `{conversations:[{from:human|gpt|tool, value}]}`, compatible
|
|
65
|
+
with `standardize_sharegpt`. Tool calls are inlined into the `gpt` turn as
|
|
66
|
+
`<tool_call>name(json-args)</tool_call>`.
|
|
67
|
+
|
|
68
|
+
Sessions with no user/assistant turn pair are skipped (no training value).
|
|
69
|
+
|
|
45
70
|
## API
|
|
46
71
|
|
|
47
72
|
### `watch(projectsDir?)` → `JsonlWatcher`
|
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { JsonlReplayer, rollup, vault } from './index.js';
|
|
3
|
+
import { toUnslothMessages, toShareGPT } from './unsloth.js';
|
|
4
|
+
import fs from 'fs';
|
|
3
5
|
import path from 'path';
|
|
4
6
|
|
|
5
7
|
if (process.argv[2] === 'gui') {
|
|
@@ -25,7 +27,7 @@ if (process.argv[2] === 'gui') {
|
|
|
25
27
|
{ const r = vault(); if (r.copied > 0) process.stderr.write(`# vault: ${r.copied} copied → ~/.claude/history-backup\n`); }
|
|
26
28
|
|
|
27
29
|
const FLAGS = {
|
|
28
|
-
string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort'],
|
|
30
|
+
string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort', 'unsloth', 'unsloth-format'],
|
|
29
31
|
multi: ['grep', 'igrep', 'role', 'type', 'tool', 'session', 'sid', 'project', 'cwd'],
|
|
30
32
|
number: ['limit', 'head', 'tail-n', 'ctx', 'truncate'],
|
|
31
33
|
bool: ['json', 'ndjson', 'tail', 'f', 'full', 'reverse', 'invert', 'no-subagents', 'only-subagents', 'no-meta', 'only-meta', 'list-sessions', 'list-projects', 'list-tools', 'stats', 'count', 'help', 'h'],
|
|
@@ -99,6 +101,8 @@ OUTPUT
|
|
|
99
101
|
-f, --tail live tail after replay
|
|
100
102
|
--rollup <out> dump filtered events to file
|
|
101
103
|
--format ndjson|sqlite rollup format (default ndjson; sqlite needs better-sqlite3)
|
|
104
|
+
--unsloth <out> write Unsloth training JSONL (one conversation per session per line)
|
|
105
|
+
--unsloth-format <fmt> messages (OpenAI/ChatML, default) | sharegpt
|
|
102
106
|
|
|
103
107
|
EXAMPLES
|
|
104
108
|
ccsniff --since 24h --grep "rs-exec" --limit 50
|
|
@@ -360,6 +364,15 @@ if (opts.count) {
|
|
|
360
364
|
process.exit(0);
|
|
361
365
|
}
|
|
362
366
|
|
|
367
|
+
if (opts.unsloth) {
|
|
368
|
+
const fmt = opts['unsloth-format'] || 'messages';
|
|
369
|
+
const recs = fmt === 'sharegpt' ? toShareGPT(rows) : toUnslothMessages(rows);
|
|
370
|
+
const body = recs.map(r => JSON.stringify(r)).join('\n') + (recs.length ? '\n' : '');
|
|
371
|
+
fs.writeFileSync(opts.unsloth, body);
|
|
372
|
+
process.stderr.write(`# unsloth(${fmt}): ${recs.length} conversations → ${opts.unsloth}\n`);
|
|
373
|
+
process.exit(0);
|
|
374
|
+
}
|
|
375
|
+
|
|
363
376
|
for (const ev of rows) process.stdout.write(formatRow(ev, opts));
|
|
364
377
|
process.stderr.write(`# ${stats.events} events / ${stats.files} files / ${rows.length} matched\n`);
|
|
365
378
|
|
package/src/unsloth.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
function textOf(b) {
|
|
2
|
+
if (typeof b.text === 'string') return b.text;
|
|
3
|
+
if (typeof b.content === 'string') return b.content;
|
|
4
|
+
if (Array.isArray(b.content)) return b.content.map(c => c?.text || '').join('');
|
|
5
|
+
return '';
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function groupBySession(events) {
|
|
9
|
+
const m = new Map();
|
|
10
|
+
for (const ev of events) {
|
|
11
|
+
const sid = ev.conversation?.id || 'unknown';
|
|
12
|
+
if (!m.has(sid)) m.set(sid, []);
|
|
13
|
+
m.get(sid).push(ev);
|
|
14
|
+
}
|
|
15
|
+
for (const arr of m.values()) arr.sort((a, b) => (a.timestamp || 0) - (b.timestamp || 0));
|
|
16
|
+
return m;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function buildMessagesForSession(evs) {
|
|
20
|
+
const messages = [];
|
|
21
|
+
let cur = null;
|
|
22
|
+
const flush = () => { if (cur && (cur.content || (cur.tool_calls && cur.tool_calls.length))) messages.push(cur); cur = null; };
|
|
23
|
+
for (const ev of evs) {
|
|
24
|
+
const b = ev.block || {};
|
|
25
|
+
const t = b.type;
|
|
26
|
+
if (ev.role === 'user') {
|
|
27
|
+
if (t === 'tool_result') {
|
|
28
|
+
flush();
|
|
29
|
+
const txt = textOf(b);
|
|
30
|
+
messages.push({ role: 'tool', tool_call_id: b.tool_use_id || '', content: txt });
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (t === 'text' && !b.isMeta) {
|
|
34
|
+
const txt = textOf(b);
|
|
35
|
+
if (!txt.trim()) continue;
|
|
36
|
+
if (cur && cur.role === 'user') cur.content += '\n' + txt;
|
|
37
|
+
else { flush(); cur = { role: 'user', content: txt }; }
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (ev.role === 'assistant') {
|
|
42
|
+
if (!cur || cur.role !== 'assistant') { flush(); cur = { role: 'assistant', content: '' }; }
|
|
43
|
+
if (t === 'text') {
|
|
44
|
+
const txt = textOf(b);
|
|
45
|
+
if (txt) cur.content = cur.content ? cur.content + '\n' + txt : txt;
|
|
46
|
+
} else if (t === 'thinking') {
|
|
47
|
+
continue;
|
|
48
|
+
} else if (t === 'tool_use') {
|
|
49
|
+
if (!cur.tool_calls) cur.tool_calls = [];
|
|
50
|
+
cur.tool_calls.push({
|
|
51
|
+
id: b.id || '',
|
|
52
|
+
type: 'function',
|
|
53
|
+
function: { name: b.name || '', arguments: JSON.stringify(b.input || {}) },
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
flush();
|
|
60
|
+
for (const m of messages) {
|
|
61
|
+
if (m.role === 'assistant' && m.tool_calls && !m.content) m.content = null;
|
|
62
|
+
}
|
|
63
|
+
return messages;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function hasTrainingValue(messages) {
|
|
67
|
+
let hasUser = false, hasAsst = false;
|
|
68
|
+
for (const m of messages) {
|
|
69
|
+
if (m.role === 'user') hasUser = true;
|
|
70
|
+
if (m.role === 'assistant') hasAsst = true;
|
|
71
|
+
}
|
|
72
|
+
return hasUser && hasAsst;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function toUnslothMessages(events) {
|
|
76
|
+
const sessions = groupBySession(events);
|
|
77
|
+
const out = [];
|
|
78
|
+
for (const [sid, evs] of sessions) {
|
|
79
|
+
const messages = buildMessagesForSession(evs);
|
|
80
|
+
if (!hasTrainingValue(messages)) continue;
|
|
81
|
+
out.push({ session_id: sid, messages });
|
|
82
|
+
}
|
|
83
|
+
return out;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function toShareGPT(events) {
|
|
87
|
+
const sessions = groupBySession(events);
|
|
88
|
+
const out = [];
|
|
89
|
+
for (const [sid, evs] of sessions) {
|
|
90
|
+
const messages = buildMessagesForSession(evs);
|
|
91
|
+
if (!hasTrainingValue(messages)) continue;
|
|
92
|
+
const conversations = [];
|
|
93
|
+
for (const m of messages) {
|
|
94
|
+
if (m.role === 'user') conversations.push({ from: 'human', value: m.content });
|
|
95
|
+
else if (m.role === 'assistant') {
|
|
96
|
+
let v = m.content || '';
|
|
97
|
+
if (m.tool_calls && m.tool_calls.length) {
|
|
98
|
+
const calls = m.tool_calls.map(c => `<tool_call>${c.function.name}(${c.function.arguments})</tool_call>`).join('\n');
|
|
99
|
+
v = v ? v + '\n' + calls : calls;
|
|
100
|
+
}
|
|
101
|
+
conversations.push({ from: 'gpt', value: v });
|
|
102
|
+
} else if (m.role === 'tool') {
|
|
103
|
+
conversations.push({ from: 'tool', value: m.content });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
out.push({ session_id: sid, conversations });
|
|
107
|
+
}
|
|
108
|
+
return out;
|
|
109
|
+
}
|