npm - ccsniff - Versions diffs - 1.0.29 → 1.0.31 - Mend

ccsniff 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -40,8 +40,33 @@ npx ccsniff --since 24h --grep "rs-exec" --limit 50
 npx ccsniff --since 7d --role user --json
 npx ccsniff -f                     # tail new events live
 npx ccsniff --rollup out.ndjson --since 7d
+npx ccsniff --unsloth train.jsonl --since 7d --no-subagents
+npx ccsniff --unsloth train.jsonl --unsloth-format sharegpt --since 7d
 ```
+### Unsloth training export
+`--unsloth <out>` writes one JSONL line per Claude Code session, ready for
+Unsloth / TRL conversational fine-tuning. All filter flags (`--since`,
+`--project`, `--session`, `--no-subagents`, ...) apply.
+Two formats are supported via `--unsloth-format`:
+- `messages` (default) — OpenAI / ChatML shape with native tool calling:
+  ```json
+  {"session_id":"...","messages":[
+    {"role":"user","content":"find foobar"},
+    {"role":"assistant","content":null,"tool_calls":[{"id":"tu1","type":"function","function":{"name":"Grep","arguments":"{\"pattern\":\"foobar\"}"}}]},
+    {"role":"tool","tool_call_id":"tu1","content":"hit at line 3"},
+    {"role":"assistant","content":"done"}
+  ]}
+  ```
+- `sharegpt` — `{conversations:[{from:human|gpt|tool, value}]}`, compatible
+  with `standardize_sharegpt`. Tool calls are inlined into the `gpt` turn as
+  `<tool_call>name(json-args)</tool_call>`.
+Sessions with no user/assistant turn pair are skipped (no training value).
 ## API
 ### `watch(projectsDir?)` → `JsonlWatcher`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ccsniff",
-  "version": "1.0.29",
+  "version": "1.0.31",
   "description": "Watch Claude Code JSONL output files and emit structured events as a Node.js EventEmitter",
   "type": "module",
   "main": "./src/index.js",

package/src/cli.js CHANGED Viewed

@@ -1,5 +1,7 @@
 #!/usr/bin/env node
 import { JsonlReplayer, rollup, vault } from './index.js';
+import { toUnslothMessages, toShareGPT } from './unsloth.js';
+import fs from 'fs';
 import path from 'path';
 if (process.argv[2] === 'gui') {
@@ -25,7 +27,7 @@ if (process.argv[2] === 'gui') {
 { const r = vault(); if (r.copied > 0) process.stderr.write(`# vault: ${r.copied} copied → ~/.claude/history-backup\n`); }
 const FLAGS = {
-  string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort'],
+  string: ['since', 'until', 'before', 'after', 'grep', 'igrep', 'cwd', 'project', 'role', 'type', 'tool', 'session', 'sid', 'parent', 'rollup', 'format', 'sort', 'unsloth', 'unsloth-format'],
   multi: ['grep', 'igrep', 'role', 'type', 'tool', 'session', 'sid', 'project', 'cwd'],
   number: ['limit', 'head', 'tail-n', 'ctx', 'truncate'],
   bool: ['json', 'ndjson', 'tail', 'f', 'full', 'reverse', 'invert', 'no-subagents', 'only-subagents', 'no-meta', 'only-meta', 'list-sessions', 'list-projects', 'list-tools', 'stats', 'count', 'help', 'h'],
@@ -99,6 +101,8 @@ OUTPUT
   -f, --tail             live tail after replay
   --rollup <out>         dump filtered events to file
   --format ndjson|sqlite rollup format (default ndjson; sqlite needs better-sqlite3)
+  --unsloth <out>        write Unsloth training JSONL (one conversation per session per line)
+  --unsloth-format <fmt> messages (OpenAI/ChatML, default) | sharegpt
 EXAMPLES
   ccsniff --since 24h --grep "rs-exec" --limit 50
@@ -360,6 +364,15 @@ if (opts.count) {
   process.exit(0);
 }
+if (opts.unsloth) {
+  const fmt = opts['unsloth-format'] || 'messages';
+  const recs = fmt === 'sharegpt' ? toShareGPT(rows) : toUnslothMessages(rows);
+  const body = recs.map(r => JSON.stringify(r)).join('\n') + (recs.length ? '\n' : '');
+  fs.writeFileSync(opts.unsloth, body);
+  process.stderr.write(`# unsloth(${fmt}): ${recs.length} conversations → ${opts.unsloth}\n`);
+  process.exit(0);
+}
 for (const ev of rows) process.stdout.write(formatRow(ev, opts));
 process.stderr.write(`# ${stats.events} events / ${stats.files} files / ${rows.length} matched\n`);

package/src/unsloth.js ADDED Viewed

@@ -0,0 +1,109 @@
+function textOf(b) {
+  if (typeof b.text === 'string') return b.text;
+  if (typeof b.content === 'string') return b.content;
+  if (Array.isArray(b.content)) return b.content.map(c => c?.text || '').join('');
+  return '';
+}
+function groupBySession(events) {
+  const m = new Map();
+  for (const ev of events) {
+    const sid = ev.conversation?.id || 'unknown';
+    if (!m.has(sid)) m.set(sid, []);
+    m.get(sid).push(ev);
+  }
+  for (const arr of m.values()) arr.sort((a, b) => (a.timestamp || 0) - (b.timestamp || 0));
+  return m;
+}
+function buildMessagesForSession(evs) {
+  const messages = [];
+  let cur = null;
+  const flush = () => { if (cur && (cur.content || (cur.tool_calls && cur.tool_calls.length))) messages.push(cur); cur = null; };
+  for (const ev of evs) {
+    const b = ev.block || {};
+    const t = b.type;
+    if (ev.role === 'user') {
+      if (t === 'tool_result') {
+        flush();
+        const txt = textOf(b);
+        messages.push({ role: 'tool', tool_call_id: b.tool_use_id || '', content: txt });
+        continue;
+      }
+      if (t === 'text' && !b.isMeta) {
+        const txt = textOf(b);
+        if (!txt.trim()) continue;
+        if (cur && cur.role === 'user') cur.content += '\n' + txt;
+        else { flush(); cur = { role: 'user', content: txt }; }
+      }
+      continue;
+    }
+    if (ev.role === 'assistant') {
+      if (!cur || cur.role !== 'assistant') { flush(); cur = { role: 'assistant', content: '' }; }
+      if (t === 'text') {
+        const txt = textOf(b);
+        if (txt) cur.content = cur.content ? cur.content + '\n' + txt : txt;
+      } else if (t === 'thinking') {
+        continue;
+      } else if (t === 'tool_use') {
+        if (!cur.tool_calls) cur.tool_calls = [];
+        cur.tool_calls.push({
+          id: b.id || '',
+          type: 'function',
+          function: { name: b.name || '', arguments: JSON.stringify(b.input || {}) },
+        });
+      }
+      continue;
+    }
+  }
+  flush();
+  for (const m of messages) {
+    if (m.role === 'assistant' && m.tool_calls && !m.content) m.content = null;
+  }
+  return messages;
+}
+function hasTrainingValue(messages) {
+  let hasUser = false, hasAsst = false;
+  for (const m of messages) {
+    if (m.role === 'user') hasUser = true;
+    if (m.role === 'assistant') hasAsst = true;
+  }
+  return hasUser && hasAsst;
+}
+export function toUnslothMessages(events) {
+  const sessions = groupBySession(events);
+  const out = [];
+  for (const [sid, evs] of sessions) {
+    const messages = buildMessagesForSession(evs);
+    if (!hasTrainingValue(messages)) continue;
+    out.push({ session_id: sid, messages });
+  }
+  return out;
+}
+export function toShareGPT(events) {
+  const sessions = groupBySession(events);
+  const out = [];
+  for (const [sid, evs] of sessions) {
+    const messages = buildMessagesForSession(evs);
+    if (!hasTrainingValue(messages)) continue;
+    const conversations = [];
+    for (const m of messages) {
+      if (m.role === 'user') conversations.push({ from: 'human', value: m.content });
+      else if (m.role === 'assistant') {
+        let v = m.content || '';
+        if (m.tool_calls && m.tool_calls.length) {
+          const calls = m.tool_calls.map(c => `<tool_call>${c.function.name}(${c.function.arguments})</tool_call>`).join('\n');
+          v = v ? v + '\n' + calls : calls;
+        }
+        conversations.push({ from: 'gpt', value: v });
+      } else if (m.role === 'tool') {
+        conversations.push({ from: 'tool', value: m.content });
+      }
+    }
+    out.push({ session_id: sid, conversations });
+  }
+  return out;
+}