ccsniff 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,8 +40,33 @@ npx ccsniff --since 24h --grep "rs-exec" --limit 50
40
40
  npx ccsniff --since 7d --role user --json
41
41
  npx ccsniff -f # tail new events live
42
42
  npx ccsniff --rollup out.ndjson --since 7d
43
+ npx ccsniff --unsloth train.jsonl --since 7d --no-subagents
44
+ npx ccsniff --unsloth train.jsonl --unsloth-format sharegpt --since 7d
43
45
  ```
44
46
 
47
+ ### Unsloth training export
48
+
49
+ `--unsloth <out>` writes one JSONL line per Claude Code session, ready for
50
+ Unsloth / TRL conversational fine-tuning. All filter flags (`--since`,
51
+ `--project`, `--session`, `--no-subagents`, ...) apply.
52
+
53
+ Two formats are supported via `--unsloth-format`:
54
+
55
+ - `messages` (default) — OpenAI / ChatML shape with native tool calling:
56
+ ```json
57
+ {"session_id":"...","messages":[
58
+ {"role":"user","content":"find foobar"},
59
+ {"role":"assistant","content":null,"tool_calls":[{"id":"tu1","type":"function","function":{"name":"Grep","arguments":"{\"pattern\":\"foobar\"}"}}]},
60
+ {"role":"tool","tool_call_id":"tu1","content":"hit at line 3"},
61
+ {"role":"assistant","content":"done"}
62
+ ]}
63
+ ```
64
+ - `sharegpt` — `{conversations:[{from:human|gpt|tool, value}]}`, compatible
65
+ with `standardize_sharegpt`. Tool calls are inlined into the `gpt` turn as
66
+ `<tool_call>name(json-args)</tool_call>`.
67
+
68
+ Sessions with no user/assistant turn pair are skipped (no training value).
69
+
45
70
  ## API
46
71
 
47
72
  ### `watch(projectsDir?)` → `JsonlWatcher`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ccsniff",
3
- "version": "1.0.29",
3
+ "version": "1.0.31",
4
4
  "description": "Watch Claude Code JSONL output files and emit structured events as a Node.js EventEmitter",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
package/src/cli.js CHANGED
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { JsonlReplayer, rollup, vault } from './index.js';
3
+ import { toUnslothMessages, toShareGPT } from './unsloth.js';
4
+ import fs from 'fs';
3
5
  import path from 'path';
4
6
 
5
7
  if (process.argv[2] === 'gui') {
@@ -25,7 +27,7 @@ if (process.argv[2] === 'gui') {
25
27
  { const r = vault(); if (r.copied > 0) process.stderr.write(`# vault: ${r.copied} copied → ~/.claude/history-backup\n`); }
26
28
 
27
29
  const FLAGS = {
28
- string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort'],
30
+ string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort', 'unsloth', 'unsloth-format'],
29
31
  multi: ['grep', 'igrep', 'role', 'type', 'tool', 'session', 'sid', 'project', 'cwd'],
30
32
  number: ['limit', 'head', 'tail-n', 'ctx', 'truncate'],
31
33
  bool: ['json', 'ndjson', 'tail', 'f', 'full', 'reverse', 'invert', 'no-subagents', 'only-subagents', 'no-meta', 'only-meta', 'list-sessions', 'list-projects', 'list-tools', 'stats', 'count', 'help', 'h'],
@@ -99,6 +101,8 @@ OUTPUT
99
101
  -f, --tail live tail after replay
100
102
  --rollup <out> dump filtered events to file
101
103
  --format ndjson|sqlite rollup format (default ndjson; sqlite needs better-sqlite3)
104
+ --unsloth <out> write Unsloth training JSONL (one conversation per session per line)
105
+ --unsloth-format <fmt> messages (OpenAI/ChatML, default) | sharegpt
102
106
 
103
107
  EXAMPLES
104
108
  ccsniff --since 24h --grep "rs-exec" --limit 50
@@ -360,6 +364,15 @@ if (opts.count) {
360
364
  process.exit(0);
361
365
  }
362
366
 
367
+ if (opts.unsloth) {
368
+ const fmt = opts['unsloth-format'] || 'messages';
369
+ const recs = fmt === 'sharegpt' ? toShareGPT(rows) : toUnslothMessages(rows);
370
+ const body = recs.map(r => JSON.stringify(r)).join('\n') + (recs.length ? '\n' : '');
371
+ fs.writeFileSync(opts.unsloth, body);
372
+ process.stderr.write(`# unsloth(${fmt}): ${recs.length} conversations → ${opts.unsloth}\n`);
373
+ process.exit(0);
374
+ }
375
+
363
376
  for (const ev of rows) process.stdout.write(formatRow(ev, opts));
364
377
  process.stderr.write(`# ${stats.events} events / ${stats.files} files / ${rows.length} matched\n`);
365
378
 
package/src/unsloth.js ADDED
@@ -0,0 +1,109 @@
1
+ function textOf(b) {
2
+ if (typeof b.text === 'string') return b.text;
3
+ if (typeof b.content === 'string') return b.content;
4
+ if (Array.isArray(b.content)) return b.content.map(c => c?.text || '').join('');
5
+ return '';
6
+ }
7
+
8
+ function groupBySession(events) {
9
+ const m = new Map();
10
+ for (const ev of events) {
11
+ const sid = ev.conversation?.id || 'unknown';
12
+ if (!m.has(sid)) m.set(sid, []);
13
+ m.get(sid).push(ev);
14
+ }
15
+ for (const arr of m.values()) arr.sort((a, b) => (a.timestamp || 0) - (b.timestamp || 0));
16
+ return m;
17
+ }
18
+
19
+ function buildMessagesForSession(evs) {
20
+ const messages = [];
21
+ let cur = null;
22
+ const flush = () => { if (cur && (cur.content || (cur.tool_calls && cur.tool_calls.length))) messages.push(cur); cur = null; };
23
+ for (const ev of evs) {
24
+ const b = ev.block || {};
25
+ const t = b.type;
26
+ if (ev.role === 'user') {
27
+ if (t === 'tool_result') {
28
+ flush();
29
+ const txt = textOf(b);
30
+ messages.push({ role: 'tool', tool_call_id: b.tool_use_id || '', content: txt });
31
+ continue;
32
+ }
33
+ if (t === 'text' && !b.isMeta) {
34
+ const txt = textOf(b);
35
+ if (!txt.trim()) continue;
36
+ if (cur && cur.role === 'user') cur.content += '\n' + txt;
37
+ else { flush(); cur = { role: 'user', content: txt }; }
38
+ }
39
+ continue;
40
+ }
41
+ if (ev.role === 'assistant') {
42
+ if (!cur || cur.role !== 'assistant') { flush(); cur = { role: 'assistant', content: '' }; }
43
+ if (t === 'text') {
44
+ const txt = textOf(b);
45
+ if (txt) cur.content = cur.content ? cur.content + '\n' + txt : txt;
46
+ } else if (t === 'thinking') {
47
+ continue;
48
+ } else if (t === 'tool_use') {
49
+ if (!cur.tool_calls) cur.tool_calls = [];
50
+ cur.tool_calls.push({
51
+ id: b.id || '',
52
+ type: 'function',
53
+ function: { name: b.name || '', arguments: JSON.stringify(b.input || {}) },
54
+ });
55
+ }
56
+ continue;
57
+ }
58
+ }
59
+ flush();
60
+ for (const m of messages) {
61
+ if (m.role === 'assistant' && m.tool_calls && !m.content) m.content = null;
62
+ }
63
+ return messages;
64
+ }
65
+
66
+ function hasTrainingValue(messages) {
67
+ let hasUser = false, hasAsst = false;
68
+ for (const m of messages) {
69
+ if (m.role === 'user') hasUser = true;
70
+ if (m.role === 'assistant') hasAsst = true;
71
+ }
72
+ return hasUser && hasAsst;
73
+ }
74
+
75
+ export function toUnslothMessages(events) {
76
+ const sessions = groupBySession(events);
77
+ const out = [];
78
+ for (const [sid, evs] of sessions) {
79
+ const messages = buildMessagesForSession(evs);
80
+ if (!hasTrainingValue(messages)) continue;
81
+ out.push({ session_id: sid, messages });
82
+ }
83
+ return out;
84
+ }
85
+
86
+ export function toShareGPT(events) {
87
+ const sessions = groupBySession(events);
88
+ const out = [];
89
+ for (const [sid, evs] of sessions) {
90
+ const messages = buildMessagesForSession(evs);
91
+ if (!hasTrainingValue(messages)) continue;
92
+ const conversations = [];
93
+ for (const m of messages) {
94
+ if (m.role === 'user') conversations.push({ from: 'human', value: m.content });
95
+ else if (m.role === 'assistant') {
96
+ let v = m.content || '';
97
+ if (m.tool_calls && m.tool_calls.length) {
98
+ const calls = m.tool_calls.map(c => `<tool_call>${c.function.name}(${c.function.arguments})</tool_call>`).join('\n');
99
+ v = v ? v + '\n' + calls : calls;
100
+ }
101
+ conversations.push({ from: 'gpt', value: v });
102
+ } else if (m.role === 'tool') {
103
+ conversations.push({ from: 'tool', value: m.content });
104
+ }
105
+ }
106
+ out.push({ session_id: sid, conversations });
107
+ }
108
+ return out;
109
+ }