ccsniff 1.0.28 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,8 +40,33 @@ npx ccsniff --since 24h --grep "rs-exec" --limit 50
40
40
  npx ccsniff --since 7d --role user --json
41
41
  npx ccsniff -f # tail new events live
42
42
  npx ccsniff --rollup out.ndjson --since 7d
43
+ npx ccsniff --unsloth train.jsonl --since 7d --no-subagents
44
+ npx ccsniff --unsloth train.jsonl --unsloth-format sharegpt --since 7d
43
45
  ```
44
46
 
47
+ ### Unsloth training export
48
+
49
+ `--unsloth <out>` writes one JSONL line per Claude Code session, ready for
50
+ Unsloth / TRL conversational fine-tuning. All filter flags (`--since`,
51
+ `--project`, `--session`, `--no-subagents`, ...) apply.
52
+
53
+ Two formats are supported via `--unsloth-format`:
54
+
55
+ - `messages` (default) — OpenAI / ChatML shape with native tool calling:
56
+ ```json
57
+ {"session_id":"...","messages":[
58
+ {"role":"user","content":"find foobar"},
59
+ {"role":"assistant","content":null,"tool_calls":[{"id":"tu1","type":"function","function":{"name":"Grep","arguments":"{\"pattern\":\"foobar\"}"}}]},
60
+ {"role":"tool","tool_call_id":"tu1","content":"hit at line 3"},
61
+ {"role":"assistant","content":"done"}
62
+ ]}
63
+ ```
64
+ - `sharegpt` — `{conversations:[{from:human|gpt|tool, value}]}`, compatible
65
+ with `standardize_sharegpt`. Tool calls are inlined into the `gpt` turn as
66
+ `<tool_call>name(json-args)</tool_call>`.
67
+
68
+ Sessions with no user/assistant turn pair are skipped (no training value).
69
+
45
70
  ## API
46
71
 
47
72
  ### `watch(projectsDir?)` → `JsonlWatcher`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ccsniff",
3
- "version": "1.0.28",
3
+ "version": "1.0.30",
4
4
  "description": "Watch Claude Code JSONL output files and emit structured events as a Node.js EventEmitter",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
package/src/cli.js CHANGED
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env node
2
- import { JsonlReplayer, rollup } from './index.js';
2
+ import { JsonlReplayer, rollup, vault } from './index.js';
3
+ import { toUnslothMessages, toShareGPT } from './unsloth.js';
4
+ import fs from 'fs';
3
5
  import path from 'path';
4
6
 
5
7
  if (process.argv[2] === 'gui') {
@@ -22,8 +24,10 @@ if (process.argv[2] === 'gui') {
22
24
  process.stdin.resume();
23
25
  } else {
24
26
 
27
+ { const r = vault(); if (r.copied > 0) process.stderr.write(`# vault: ${r.copied} copied → ~/.claude/history-backup\n`); }
28
+
25
29
  const FLAGS = {
26
- string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort'],
30
+ string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort', 'unsloth', 'unsloth-format'],
27
31
  multi: ['grep', 'igrep', 'role', 'type', 'tool', 'session', 'sid', 'project', 'cwd'],
28
32
  number: ['limit', 'head', 'tail-n', 'ctx', 'truncate'],
29
33
  bool: ['json', 'ndjson', 'tail', 'f', 'full', 'reverse', 'invert', 'no-subagents', 'only-subagents', 'no-meta', 'only-meta', 'list-sessions', 'list-projects', 'list-tools', 'stats', 'count', 'help', 'h'],
@@ -97,6 +101,8 @@ OUTPUT
97
101
  -f, --tail live tail after replay
98
102
  --rollup <out> dump filtered events to file
99
103
  --format ndjson|sqlite rollup format (default ndjson; sqlite needs better-sqlite3)
104
+ --unsloth <out> write Unsloth training JSONL (one conversation per session per line)
105
+ --unsloth-format <fmt> messages (OpenAI/ChatML, default) | sharegpt
100
106
 
101
107
  EXAMPLES
102
108
  ccsniff --since 24h --grep "rs-exec" --limit 50
@@ -358,6 +364,15 @@ if (opts.count) {
358
364
  process.exit(0);
359
365
  }
360
366
 
367
+ if (opts.unsloth) {
368
+ const fmt = opts['unsloth-format'] || 'messages';
369
+ const recs = fmt === 'sharegpt' ? toShareGPT(rows) : toUnslothMessages(rows);
370
+ const body = recs.map(r => JSON.stringify(r)).join('\n') + (recs.length ? '\n' : '');
371
+ fs.writeFileSync(opts.unsloth, body);
372
+ process.stderr.write(`# unsloth(${fmt}): ${recs.length} conversations → ${opts.unsloth}\n`);
373
+ process.exit(0);
374
+ }
375
+
361
376
  for (const ev of rows) process.stdout.write(formatRow(ev, opts));
362
377
  process.stderr.write(`# ${stats.events} events / ${stats.files} files / ${rows.length} matched\n`);
363
378
 
package/src/index.cjs CHANGED
@@ -228,6 +228,37 @@ function replay(projectsDir, opts) {
228
228
  return new JsonlReplayer(projectsDir);
229
229
  }
230
230
 
231
+ function vault({ projectsDir = DEFAULT_DIR, destDir = path.join(os.homedir(), '.claude', 'history-backup') } = {}) {
232
+ if (!fs.existsSync(projectsDir)) return { copied: 0, skipped: 0 };
233
+ let copied = 0, skipped = 0;
234
+ const walk = (src, depth) => {
235
+ if (depth > 5) return;
236
+ let entries;
237
+ try { entries = fs.readdirSync(src, { withFileTypes: true }); } catch { return; }
238
+ for (const d of entries) {
239
+ const srcPath = path.join(src, d.name);
240
+ const rel = path.relative(projectsDir, srcPath);
241
+ const dstPath = path.join(destDir, rel);
242
+ if (d.isDirectory()) { walk(srcPath, depth + 1); continue; }
243
+ if (!d.name.endsWith('.jsonl')) continue;
244
+ let srcStat;
245
+ try { srcStat = fs.statSync(srcPath); } catch { continue; }
246
+ try {
247
+ const dstStat = fs.statSync(dstPath);
248
+ if (dstStat.size === srcStat.size && dstStat.mtimeMs >= srcStat.mtimeMs) { skipped++; continue; }
249
+ } catch {}
250
+ try {
251
+ fs.mkdirSync(path.dirname(dstPath), { recursive: true });
252
+ fs.copyFileSync(srcPath, dstPath);
253
+ fs.utimesSync(dstPath, srcStat.atime, srcStat.mtime);
254
+ copied++;
255
+ } catch {}
256
+ }
257
+ };
258
+ walk(projectsDir, 0);
259
+ return { copied, skipped };
260
+ }
261
+
231
262
  async function rollup({ projectsDir, since = 0, out, format = 'ndjson' } = {}) {
232
263
  if (!out) throw new Error('rollup: out path required');
233
264
  const r = new JsonlReplayer(projectsDir);
@@ -299,4 +330,4 @@ async function rollupSqlite(r, { since, out }) {
299
330
  return { ...stats, rows, format: 'sqlite', out };
300
331
  }
301
332
 
302
- module.exports = { JsonlWatcher, JsonlReplayer, watch, replay, rollup };
333
+ module.exports = { JsonlWatcher, JsonlReplayer, watch, replay, rollup, vault };
package/src/index.js CHANGED
@@ -234,6 +234,37 @@ export function replay(projectsDir, opts) {
234
234
  return new JsonlReplayer(projectsDir);
235
235
  }
236
236
 
237
+ export function vault({ projectsDir = DEFAULT_DIR, destDir = path.join(os.homedir(), '.claude', 'history-backup') } = {}) {
238
+ if (!fs.existsSync(projectsDir)) return { copied: 0, skipped: 0 };
239
+ let copied = 0, skipped = 0;
240
+ const walk = (src, depth) => {
241
+ if (depth > 5) return;
242
+ let entries;
243
+ try { entries = fs.readdirSync(src, { withFileTypes: true }); } catch { return; }
244
+ for (const d of entries) {
245
+ const srcPath = path.join(src, d.name);
246
+ const rel = path.relative(projectsDir, srcPath);
247
+ const dstPath = path.join(destDir, rel);
248
+ if (d.isDirectory()) { walk(srcPath, depth + 1); continue; }
249
+ if (!d.name.endsWith('.jsonl')) continue;
250
+ let srcStat;
251
+ try { srcStat = fs.statSync(srcPath); } catch { continue; }
252
+ try {
253
+ const dstStat = fs.statSync(dstPath);
254
+ if (dstStat.size === srcStat.size && dstStat.mtimeMs >= srcStat.mtimeMs) { skipped++; continue; }
255
+ } catch {}
256
+ try {
257
+ fs.mkdirSync(path.dirname(dstPath), { recursive: true });
258
+ fs.copyFileSync(srcPath, dstPath);
259
+ fs.utimesSync(dstPath, srcStat.atime, srcStat.mtime);
260
+ copied++;
261
+ } catch {}
262
+ }
263
+ };
264
+ walk(projectsDir, 0);
265
+ return { copied, skipped };
266
+ }
267
+
237
268
  export async function rollup({ projectsDir, since = 0, out, format = 'ndjson' } = {}) {
238
269
  if (!out) throw new Error('rollup: out path required');
239
270
  const r = new JsonlReplayer(projectsDir);
package/src/unsloth.js ADDED
@@ -0,0 +1,109 @@
1
+ function textOf(b) {
2
+ if (typeof b.text === 'string') return b.text;
3
+ if (typeof b.content === 'string') return b.content;
4
+ if (Array.isArray(b.content)) return b.content.map(c => c?.text || '').join('');
5
+ return '';
6
+ }
7
+
8
+ function groupBySession(events) {
9
+ const m = new Map();
10
+ for (const ev of events) {
11
+ const sid = ev.conversation?.id || 'unknown';
12
+ if (!m.has(sid)) m.set(sid, []);
13
+ m.get(sid).push(ev);
14
+ }
15
+ for (const arr of m.values()) arr.sort((a, b) => (a.timestamp || 0) - (b.timestamp || 0));
16
+ return m;
17
+ }
18
+
19
+ function buildMessagesForSession(evs) {
20
+ const messages = [];
21
+ let cur = null;
22
+ const flush = () => { if (cur && (cur.content || (cur.tool_calls && cur.tool_calls.length))) messages.push(cur); cur = null; };
23
+ for (const ev of evs) {
24
+ const b = ev.block || {};
25
+ const t = b.type;
26
+ if (ev.role === 'user') {
27
+ if (t === 'tool_result') {
28
+ flush();
29
+ const txt = textOf(b);
30
+ messages.push({ role: 'tool', tool_call_id: b.tool_use_id || '', content: txt });
31
+ continue;
32
+ }
33
+ if (t === 'text' && !b.isMeta) {
34
+ const txt = textOf(b);
35
+ if (!txt.trim()) continue;
36
+ if (cur && cur.role === 'user') cur.content += '\n' + txt;
37
+ else { flush(); cur = { role: 'user', content: txt }; }
38
+ }
39
+ continue;
40
+ }
41
+ if (ev.role === 'assistant') {
42
+ if (!cur || cur.role !== 'assistant') { flush(); cur = { role: 'assistant', content: '' }; }
43
+ if (t === 'text') {
44
+ const txt = textOf(b);
45
+ if (txt) cur.content = cur.content ? cur.content + '\n' + txt : txt;
46
+ } else if (t === 'thinking') {
47
+ continue;
48
+ } else if (t === 'tool_use') {
49
+ if (!cur.tool_calls) cur.tool_calls = [];
50
+ cur.tool_calls.push({
51
+ id: b.id || '',
52
+ type: 'function',
53
+ function: { name: b.name || '', arguments: JSON.stringify(b.input || {}) },
54
+ });
55
+ }
56
+ continue;
57
+ }
58
+ }
59
+ flush();
60
+ for (const m of messages) {
61
+ if (m.role === 'assistant' && m.tool_calls && !m.content) m.content = null;
62
+ }
63
+ return messages;
64
+ }
65
+
66
+ function hasTrainingValue(messages) {
67
+ let hasUser = false, hasAsst = false;
68
+ for (const m of messages) {
69
+ if (m.role === 'user') hasUser = true;
70
+ if (m.role === 'assistant') hasAsst = true;
71
+ }
72
+ return hasUser && hasAsst;
73
+ }
74
+
75
+ export function toUnslothMessages(events) {
76
+ const sessions = groupBySession(events);
77
+ const out = [];
78
+ for (const [sid, evs] of sessions) {
79
+ const messages = buildMessagesForSession(evs);
80
+ if (!hasTrainingValue(messages)) continue;
81
+ out.push({ session_id: sid, messages });
82
+ }
83
+ return out;
84
+ }
85
+
86
+ export function toShareGPT(events) {
87
+ const sessions = groupBySession(events);
88
+ const out = [];
89
+ for (const [sid, evs] of sessions) {
90
+ const messages = buildMessagesForSession(evs);
91
+ if (!hasTrainingValue(messages)) continue;
92
+ const conversations = [];
93
+ for (const m of messages) {
94
+ if (m.role === 'user') conversations.push({ from: 'human', value: m.content });
95
+ else if (m.role === 'assistant') {
96
+ let v = m.content || '';
97
+ if (m.tool_calls && m.tool_calls.length) {
98
+ const calls = m.tool_calls.map(c => `<tool_call>${c.function.name}(${c.function.arguments})</tool_call>`).join('\n');
99
+ v = v ? v + '\n' + calls : calls;
100
+ }
101
+ conversations.push({ from: 'gpt', value: v });
102
+ } else if (m.role === 'tool') {
103
+ conversations.push({ from: 'tool', value: m.content });
104
+ }
105
+ }
106
+ out.push({ session_id: sid, conversations });
107
+ }
108
+ return out;
109
+ }