claude-attribution 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Tests for the line attribution algorithm in src/attribution/differ.ts.
3
+ *
4
+ * The algorithm is the core of claude-attribution: it determines whether each
5
+ * committed line was written by AI (Claude), a human, or is "mixed" (Claude
6
+ * wrote it but the human modified it before committing).
7
+ *
8
+ * Run with: bun test
9
+ */
10
+ import { test, expect, describe } from "bun:test";
11
+ import {
12
+ attributeLines,
13
+ aggregateTotals,
14
+ hashLine,
15
+ } from "../attribution/differ.ts";
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // hashLine
19
+ // ---------------------------------------------------------------------------
20
+ describe("hashLine", () => {
21
+ test("returns 16-character hex string", () => {
22
+ const h = hashLine("hello world");
23
+ expect(h).toHaveLength(16);
24
+ expect(h).toMatch(/^[0-9a-f]+$/);
25
+ });
26
+
27
+ test("trims whitespace before hashing", () => {
28
+ expect(hashLine(" hello ")).toBe(hashLine("hello"));
29
+ expect(hashLine("\thello\t")).toBe(hashLine("hello"));
30
+ });
31
+
32
+ test("different content produces different hashes", () => {
33
+ expect(hashLine("foo")).not.toBe(hashLine("bar"));
34
+ });
35
+
36
+ test("empty string produces consistent hash", () => {
37
+ expect(hashLine("")).toBe(hashLine(" "));
38
+ });
39
+ });
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // attributeLines — basic classification
43
+ // ---------------------------------------------------------------------------
44
+ describe("attributeLines — basic classification", () => {
45
+ test("line in after but not before → AI", () => {
46
+ const before: string[] = [];
47
+ const after = ["const x = 1;"];
48
+ const committed = ["const x = 1;"];
49
+ const { attribution, stats } = attributeLines(before, after, committed);
50
+ expect(attribution[0]).toBe("AI");
51
+ expect(stats.ai).toBe(1);
52
+ expect(stats.human).toBe(0);
53
+ });
54
+
55
+ test("line in both before and after → HUMAN (pre-existing)", () => {
56
+ const before = ["const x = 1;"];
57
+ const after = ["const x = 1;", "const y = 2;"];
58
+ const committed = ["const x = 1;"]; // only the pre-existing line committed
59
+ const { attribution, stats } = attributeLines(before, after, committed);
60
+ expect(attribution[0]).toBe("HUMAN");
61
+ expect(stats.human).toBe(1);
62
+ expect(stats.ai).toBe(0);
63
+ });
64
+
65
+ test("line not in after, same position as AI line → MIXED (human replaced Claude's line)", () => {
66
+ // Claude wrote "const x = 1;" at position 0.
67
+ // Human replaced it with "const y = 2;" before committing.
68
+ // The committed line doesn't match after-snapshot, but after-snapshot[0] is
69
+ // an AI line that differs → classified as MIXED (Claude wrote something here, human changed it).
70
+ const before: string[] = [];
71
+ const after = ["const x = 1;"];
72
+ const committed = ["const y = 2;"]; // human replaced Claude's line
73
+ const { attribution } = attributeLines(before, after, committed);
74
+ expect(attribution[0]).toBe("MIXED");
75
+ });
76
+
77
+ test("line not in after, committed file longer than after-snapshot → HUMAN", () => {
78
+ // When the committed file has more lines than the after-snapshot, the
79
+ // positional MIXED check doesn't fire for out-of-bounds positions.
80
+ const before: string[] = [];
81
+ const after = ["const x = 1;"]; // only 1 line
82
+ // Human committed 2 lines — position 1 is beyond after-snapshot length
83
+ const committed = ["const x = 1;", "human added this"];
84
+ const { attribution } = attributeLines(before, after, committed);
85
+ expect(attribution[0]).toBe("AI"); // position 0 matches after-snapshot exactly
86
+ expect(attribution[1]).toBe("HUMAN"); // position 1 is beyond after-snapshot — no MIXED
87
+ });
88
+
89
+ test("empty lines always → HUMAN regardless of snapshot", () => {
90
+ const before: string[] = [];
91
+ const after = ["", "const x = 1;", ""];
92
+ const committed = ["", "const x = 1;", ""];
93
+ const { attribution } = attributeLines(before, after, committed);
94
+ expect(attribution[0]).toBe("HUMAN"); // empty line
95
+ expect(attribution[1]).toBe("AI"); // real content Claude wrote
96
+ expect(attribution[2]).toBe("HUMAN"); // empty line
97
+ });
98
+ });
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // attributeLines — all-AI and all-HUMAN scenarios
102
+ // ---------------------------------------------------------------------------
103
+ describe("attributeLines — all-AI and all-HUMAN", () => {
104
+ test("all-AI: before empty, after matches committed", () => {
105
+ const before: string[] = [];
106
+ const after = ["line 1", "line 2", "line 3"];
107
+ const committed = ["line 1", "line 2", "line 3"];
108
+ const { stats } = attributeLines(before, after, committed);
109
+ expect(stats.ai).toBe(3);
110
+ expect(stats.human).toBe(0);
111
+ expect(stats.mixed).toBe(0);
112
+ expect(stats.pctAi).toBe(100);
113
+ });
114
+
115
+ test("all-HUMAN: empty before and after (no Claude involvement)", () => {
116
+ const before: string[] = [];
117
+ const after: string[] = [];
118
+ const committed = ["human line 1", "human line 2"];
119
+ const { stats } = attributeLines(before, after, committed);
120
+ expect(stats.ai).toBe(0);
121
+ expect(stats.human).toBe(2);
122
+ expect(stats.pctAi).toBe(0);
123
+ });
124
+
125
+ test("all-HUMAN: committed lines all existed before Claude touched the file", () => {
126
+ const before = ["existing line 1", "existing line 2"];
127
+ const after = ["existing line 1", "existing line 2", "new line"];
128
+ const committed = ["existing line 1", "existing line 2"]; // human reverted Claude's addition
129
+ const { stats } = attributeLines(before, after, committed);
130
+ expect(stats.ai).toBe(0);
131
+ expect(stats.human).toBe(2);
132
+ });
133
+ });
134
+
135
+ // ---------------------------------------------------------------------------
136
+ // attributeLines — MIXED detection (best-effort positional)
137
+ // ---------------------------------------------------------------------------
138
+ describe("attributeLines — MIXED detection", () => {
139
+ test("Claude wrote line at position i, human changed it → MIXED", () => {
140
+ // Claude wrote "const x = 1;" at position 0
141
+ // Human changed it to "const x = 42;" before committing
142
+ const before: string[] = [];
143
+ const after = ["const x = 1;"];
144
+ const committed = ["const x = 42;"]; // human modified
145
+ const { attribution } = attributeLines(before, after, committed);
146
+ // The committed hash doesn't match after-snapshot (HUMAN classification),
147
+ // but after-snapshot[0] was an AI line that differs from committed[0] → MIXED
148
+ expect(attribution[0]).toBe("MIXED");
149
+ });
150
+
151
+ test("no MIXED when committed line matches AI line exactly", () => {
152
+ const before: string[] = [];
153
+ const after = ["const x = 1;"];
154
+ const committed = ["const x = 1;"];
155
+ const { attribution } = attributeLines(before, after, committed);
156
+ expect(attribution[0]).toBe("AI"); // not MIXED — committed matches exactly
157
+ });
158
+ });
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // attributeLines — identical-line limitation
162
+ // ---------------------------------------------------------------------------
163
+ describe("attributeLines — identical-line limitation (known behavior)", () => {
164
+ test("identical content in after but not before → conservatively attributed as AI", () => {
165
+ // This documents the known limitation: if a human writes the same line as
166
+ // Claude, the algorithm attributes it as AI because the hash matches.
167
+ // This is a conservative bias toward AI for identical content.
168
+ const before: string[] = [];
169
+ // Claude's after-snapshot contains "}"
170
+ const after = ["}"];
171
+ // Human also writes "}" (same content) — indistinguishable from Claude's line
172
+ const committed = ["}"];
173
+ const { attribution } = attributeLines(before, after, committed);
174
+ // Correctly documented: attributed as AI, even if human wrote it
175
+ expect(attribution[0]).toBe("AI");
176
+ // This is the known limitation: identical content = same hash = AI attribution
177
+ });
178
+
179
+ test("if the same line exists in before AND after, it stays HUMAN", () => {
180
+ // If the line existed before Claude touched the file, it's unambiguously HUMAN
181
+ const before = ["}"];
182
+ const after = ["}", "const x = 1;"];
183
+ const committed = ["}"];
184
+ const { attribution } = attributeLines(before, after, committed);
185
+ expect(attribution[0]).toBe("HUMAN");
186
+ });
187
+ });
188
+
189
+ // ---------------------------------------------------------------------------
190
+ // attributeLines — stats shape
191
+ // ---------------------------------------------------------------------------
192
+ describe("attributeLines — stats shape", () => {
193
+ test("pctAi rounds correctly", () => {
194
+ // 1 AI out of 3 total = 33%
195
+ const before: string[] = [];
196
+ const after = ["ai line"];
197
+ const committed = ["ai line", "human line 1", "human line 2"];
198
+ const { stats } = attributeLines(before, after, committed);
199
+ expect(stats.ai).toBe(1);
200
+ expect(stats.human).toBe(2);
201
+ expect(stats.pctAi).toBe(33);
202
+ });
203
+
204
+ test("pctAi is 0 for empty file", () => {
205
+ const { stats } = attributeLines([], [], []);
206
+ expect(stats.pctAi).toBe(0);
207
+ expect(stats.total).toBe(0);
208
+ });
209
+
210
+ test("stats.path is empty string (caller sets it)", () => {
211
+ const { stats } = attributeLines([], ["line"], ["line"]);
212
+ expect(stats.path).toBe("");
213
+ });
214
+ });
215
+
216
+ // ---------------------------------------------------------------------------
217
+ // aggregateTotals
218
+ // ---------------------------------------------------------------------------
219
+ describe("aggregateTotals", () => {
220
+ test("sums ai, human, mixed, total across files", () => {
221
+ const files = [
222
+ { path: "a.ts", ai: 10, human: 5, mixed: 1, total: 16, pctAi: 62 },
223
+ { path: "b.ts", ai: 20, human: 10, mixed: 2, total: 32, pctAi: 62 },
224
+ ];
225
+ const totals = aggregateTotals(files);
226
+ expect(totals.ai).toBe(30);
227
+ expect(totals.human).toBe(15);
228
+ expect(totals.mixed).toBe(3);
229
+ expect(totals.total).toBe(48);
230
+ // Math.round(30/48*100) = Math.round(62.5) = 63 (JS rounds half-up)
231
+ expect(totals.pctAi).toBe(63);
232
+ });
233
+
234
+ test("returns zero pctAi for empty file list", () => {
235
+ const totals = aggregateTotals([]);
236
+ expect(totals.pctAi).toBe(0);
237
+ expect(totals.total).toBe(0);
238
+ });
239
+
240
+ test("single file — totals equal that file's stats", () => {
241
+ const files = [
242
+ { path: "a.ts", ai: 5, human: 3, mixed: 0, total: 8, pctAi: 62 },
243
+ ];
244
+ const totals = aggregateTotals(files);
245
+ expect(totals.ai).toBe(5);
246
+ expect(totals.human).toBe(3);
247
+ // Math.round(5/8*100) = Math.round(62.5) = 63 (JS rounds half-up)
248
+ expect(totals.pctAi).toBe(63);
249
+ });
250
+ });
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Checkpoint storage for claude-attribution.
3
+ *
4
+ * A "checkpoint" is a snapshot of a file's line content saved to disk so that
5
+ * the post-commit hook can compare Claude's writes against the committed state.
6
+ *
7
+ * Two checkpoint types per file per session:
8
+ * - "before": file content before Claude's first edit (saved by pre-tool-use.ts)
9
+ * - "after": file content after Claude's last edit (saved by post-tool-use.ts)
10
+ *
11
+ * Checkpoints are stored in /tmp/claude-attribution/<session_id>/<file_hash>.<type>.json
12
+ * and survive session close so commits can be attributed after Claude is closed.
13
+ * The OS clears /tmp on reboot; stale checkpoints are harmless.
14
+ */
15
+ import { createHash } from "crypto";
16
+ import { chmod, mkdir, readFile, writeFile } from "fs/promises";
17
+ import { existsSync } from "fs";
18
+ import { dirname, join } from "path";
19
+
20
+ const CHECKPOINT_BASE = "/tmp/claude-attribution";
21
+
22
+ /**
23
+ * Allowed characters in session_id values used as path components.
24
+ * Prevents path traversal attacks (e.g., session_id = "../../etc/passwd").
25
+ * Claude Code session IDs are UUID-like strings with hyphens.
26
+ */
27
+ export const SESSION_ID_RE = /^[a-zA-Z0-9_-]{1,128}$/;
28
+
29
+ export function validateSessionId(sessionId: string): void {
30
+ if (!SESSION_ID_RE.test(sessionId)) {
31
+ throw new Error(
32
+ `Invalid session_id: ${JSON.stringify(sessionId.slice(0, 50))}`,
33
+ );
34
+ }
35
+ }
36
+
37
+ export interface CheckpointData {
38
+ filePath: string;
39
+ lines: string[];
40
+ timestamp: string;
41
+ }
42
+
43
+ /** Stable hash of an absolute file path → short hex string used as filename. */
44
+ function filePathKey(filePath: string): string {
45
+ return createHash("sha256").update(filePath).digest("hex").slice(0, 16);
46
+ }
47
+
48
+ function checkpointPath(
49
+ sessionId: string,
50
+ filePath: string,
51
+ type: "before" | "after",
52
+ ): string {
53
+ return join(
54
+ CHECKPOINT_BASE,
55
+ sessionId,
56
+ `${filePathKey(filePath)}.${type}.json`,
57
+ );
58
+ }
59
+
60
+ /**
61
+ * Read the current file content and save it as a before/after checkpoint.
62
+ *
63
+ * The checkpoint directory is created with mode 0700 so other users on the
64
+ * same machine cannot read another developer's file snapshots.
65
+ */
66
+ export async function saveCheckpoint(
67
+ sessionId: string,
68
+ filePath: string,
69
+ type: "before" | "after",
70
+ ): Promise<void> {
71
+ validateSessionId(sessionId);
72
+ const content = existsSync(filePath) ? await readFile(filePath, "utf8") : "";
73
+ const data: CheckpointData = {
74
+ filePath,
75
+ lines: content.split("\n"),
76
+ timestamp: new Date().toISOString(),
77
+ };
78
+ const dest = checkpointPath(sessionId, filePath, type);
79
+ const dir = dirname(dest);
80
+ // Ensure the base directory exists first (no mode — it's not sensitive).
81
+ await mkdir(CHECKPOINT_BASE, { recursive: true });
82
+ // Create the session directory with mode 0o700 atomically so no race window
83
+ // exists between creation and permission-setting. When the directory already
84
+ // exists (e.g., a prior session or an upgrade from an older installation that
85
+ // used a permissive umask), Node/Bun does NOT apply the mode option, so we
86
+ // explicitly chmod afterwards to enforce the security invariant on existing dirs.
87
+ const sessionDir = join(CHECKPOINT_BASE, sessionId);
88
+ await mkdir(sessionDir, { recursive: true, mode: 0o700 });
89
+ await chmod(sessionDir, 0o700);
90
+ await mkdir(dir, { recursive: true, mode: 0o700 });
91
+ // Compact JSON (no indentation) — these files are machine-read only
92
+ await writeFile(dest, JSON.stringify(data));
93
+ }
94
+
95
+ /** Load a checkpoint. Returns null if it doesn't exist. */
96
+ export async function loadCheckpoint(
97
+ sessionId: string,
98
+ filePath: string,
99
+ type: "before" | "after",
100
+ ): Promise<CheckpointData | null> {
101
+ validateSessionId(sessionId);
102
+ const src = checkpointPath(sessionId, filePath, type);
103
+ if (!existsSync(src)) return null;
104
+ const raw = await readFile(src, "utf8");
105
+ return JSON.parse(raw) as CheckpointData;
106
+ }
107
+
108
+ /**
109
+ * Remove all checkpoints for a session.
110
+ *
111
+ * NOTE: This is intentionally NOT called from stop.ts (the SessionEnd hook).
112
+ * Checkpoints must survive session close because a developer may close Claude
113
+ * Code before committing. If they were deleted here, the next commit would
114
+ * show 0% AI attribution. The OS clears /tmp on reboot, which is sufficient.
115
+ *
116
+ * This export is kept for potential use by an explicit cleanup command.
117
+ */
118
+ export async function clearCheckpoints(sessionId: string): Promise<void> {
119
+ validateSessionId(sessionId);
120
+ const sessionDir = join(CHECKPOINT_BASE, sessionId);
121
+ if (!existsSync(sessionDir)) return;
122
+ const { rm } = await import("fs/promises");
123
+ await rm(sessionDir, { recursive: true, force: true });
124
+ }
125
+
126
+ /** Write the active session ID into the repo's attribution-state directory. */
127
+ export async function writeCurrentSession(
128
+ repoRoot: string,
129
+ sessionId: string,
130
+ ): Promise<void> {
131
+ const stateDir = join(repoRoot, ".claude", "attribution-state");
132
+ await mkdir(stateDir, { recursive: true });
133
+ await writeFile(join(stateDir, "current-session"), sessionId);
134
+ }
135
+
136
+ /** Read the active session ID from the repo's attribution-state directory. */
137
+ export async function readCurrentSession(
138
+ repoRoot: string,
139
+ ): Promise<string | null> {
140
+ const statePath = join(
141
+ repoRoot,
142
+ ".claude",
143
+ "attribution-state",
144
+ "current-session",
145
+ );
146
+ if (!existsSync(statePath)) return null;
147
+ return (await readFile(statePath, "utf8")).trim() || null;
148
+ }
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Post-commit attribution runner.
3
+ *
4
+ * Called by the .git/hooks/post-commit hook after every `git commit`.
5
+ * Usage: <attribution-root>/src/run.sh <attribution-root>/src/attribution/commit.ts
6
+ *
7
+ * Pipeline:
8
+ * 1. Read the active session ID from .claude/attribution-state/current-session
9
+ * 2. Get the HEAD commit SHA and list of changed files
10
+ * 3. For each changed file (in parallel):
11
+ * a. Read the committed file content via `git show HEAD:<path>`
12
+ * b. Load the before/after checkpoints from /tmp/claude-attribution/<session>/
13
+ * c. Run attributeLines(before, after, committed) → AI / HUMAN / MIXED per line
14
+ * d. If no checkpoint exists → all HUMAN (file not touched in this session)
15
+ * 4. Write the AttributionResult as a git note under refs/notes/claude-attribution
16
+ * 5. Append a summary to .claude/logs/attribution.jsonl
17
+ * 6. Print a one-line summary to stdout
18
+ *
19
+ * This hook runs asynchronously after the commit is recorded — it cannot block the commit.
20
+ * All errors are caught and logged; the process always exits 0.
21
+ */
22
+ import { resolve, join } from "path";
23
+ import { mkdir, appendFile } from "fs/promises";
24
+ import { loadCheckpoint, readCurrentSession } from "./checkpoint.ts";
25
+ import {
26
+ attributeLines,
27
+ aggregateTotals,
28
+ type AttributionResult,
29
+ type FileAttribution,
30
+ } from "./differ.ts";
31
+ import {
32
+ writeNote,
33
+ headSha,
34
+ filesInCommit,
35
+ committedContent,
36
+ currentBranch,
37
+ } from "./git-notes.ts";
38
+ import {
39
+ otelEndpoint,
40
+ otelHeaders,
41
+ readOtelContext,
42
+ buildSessionSpan,
43
+ exportOtlpSpans,
44
+ clearOtelContext,
45
+ } from "./otel.ts";
46
+
47
+ async function main() {
48
+ const repoRoot = resolve(process.cwd());
49
+ const sessionId = await readCurrentSession(repoRoot);
50
+
51
+ const [sha, branch, changedFiles] = await Promise.all([
52
+ headSha(repoRoot),
53
+ currentBranch(repoRoot),
54
+ filesInCommit(repoRoot),
55
+ ]);
56
+
57
+ // Process files in parallel — each file attribution is independent
58
+ const fileResults = (
59
+ await Promise.all(
60
+ changedFiles.map(async (relPath): Promise<FileAttribution | null> => {
61
+ const absPath = join(repoRoot, relPath);
62
+ const committed = await committedContent(repoRoot, relPath);
63
+
64
+ // Deleted file — skip attribution
65
+ if (committed === null) return null;
66
+
67
+ // Binary file — null bytes indicate binary content; line-splitting produces garbage
68
+ if (committed.includes("\0")) return null;
69
+
70
+ const committedLines = committed.split("\n");
71
+
72
+ if (!sessionId) {
73
+ // No active Claude session — everything is HUMAN
74
+ return {
75
+ path: relPath,
76
+ ai: 0,
77
+ human: committedLines.length,
78
+ mixed: 0,
79
+ total: committedLines.length,
80
+ pctAi: 0,
81
+ };
82
+ }
83
+
84
+ const before = await loadCheckpoint(sessionId, absPath, "before");
85
+ const after = await loadCheckpoint(sessionId, absPath, "after");
86
+
87
+ if (!after) {
88
+ // No Claude checkpoint for this file — all HUMAN
89
+ return {
90
+ path: relPath,
91
+ ai: 0,
92
+ human: committedLines.length,
93
+ mixed: 0,
94
+ total: committedLines.length,
95
+ pctAi: 0,
96
+ };
97
+ }
98
+
99
+ const beforeLines = before?.lines ?? [];
100
+ const { stats } = attributeLines(
101
+ beforeLines,
102
+ after.lines,
103
+ committedLines,
104
+ );
105
+ return { ...stats, path: relPath };
106
+ }),
107
+ )
108
+ ).filter((r): r is FileAttribution => r !== null);
109
+
110
+ const result: AttributionResult = {
111
+ commit: sha,
112
+ session: sessionId,
113
+ branch,
114
+ timestamp: new Date().toISOString(),
115
+ files: fileResults,
116
+ totals: aggregateTotals(fileResults),
117
+ };
118
+
119
+ // Write git note
120
+ await writeNote(result, repoRoot);
121
+
122
+ // Append to local log
123
+ const logDir = join(repoRoot, ".claude", "logs");
124
+ await mkdir(logDir, { recursive: true });
125
+ await appendFile(
126
+ join(logDir, "attribution.jsonl"),
127
+ JSON.stringify(result) + "\n",
128
+ );
129
+
130
+ const { totals } = result;
131
+ if (totals.total > 0) {
132
+ console.log(
133
+ `[claude-attribution] ${sha.slice(0, 7)} — ${totals.ai} AI / ${totals.human} human / ${totals.mixed} mixed lines (${totals.pctAi}% AI)`,
134
+ );
135
+ }
136
+
137
+ // OTel: close the root session span and export it
138
+ const endpoint = otelEndpoint();
139
+ if (endpoint) {
140
+ try {
141
+ const ctx = await readOtelContext(repoRoot);
142
+ if (ctx) {
143
+ const span = buildSessionSpan(
144
+ ctx,
145
+ sha,
146
+ branch,
147
+ totals,
148
+ new Date().toISOString(),
149
+ );
150
+ await exportOtlpSpans([span], endpoint, otelHeaders());
151
+ await clearOtelContext(repoRoot);
152
+ }
153
+ } catch {
154
+ // Silent — never block the commit
155
+ }
156
+ }
157
+ }
158
+
159
+ main().catch((err) => {
160
+ // Never block the commit — soft fail
161
+ console.error("[claude-attribution] post-commit error:", err);
162
+ process.exit(0);
163
+ });