npm - claude-attribution - Versions diffs - 1.0.0 - Mend

claude-attribution 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +431 -0
package/bin/claude-attribution +9 -0
package/package.json +26 -0
package/src/__tests__/differ.test.ts +250 -0
package/src/attribution/checkpoint.ts +148 -0
package/src/attribution/commit.ts +163 -0
package/src/attribution/differ.ts +154 -0
package/src/attribution/git-notes.ts +185 -0
package/src/attribution/otel.ts +233 -0
package/src/cli.ts +109 -0
package/src/commands/pr.ts +164 -0
package/src/export/pr-summary.ts +204 -0
package/src/hooks/post-tool-use.ts +105 -0
package/src/hooks/pre-tool-use.ts +95 -0
package/src/hooks/stop.ts +33 -0
package/src/hooks/subagent.ts +72 -0
package/src/lib/hooks.ts +60 -0
package/src/metrics/calculate.ts +21 -0
package/src/metrics/collect.ts +369 -0
package/src/metrics/mark-start.ts +40 -0
package/src/metrics/transcript.ts +245 -0
package/src/run.sh +25 -0
package/src/setup/install.ts +321 -0
package/src/setup/templates/hooks.json +57 -0
package/src/setup/templates/metrics-command.md +27 -0
package/src/setup/templates/post-commit.sh +4 -0
package/src/setup/templates/pr-command.md +33 -0
package/src/setup/templates/pre-push.sh +4 -0
package/src/setup/templates/start-command.md +25 -0
package/src/setup/uninstall.ts +175 -0

package/src/__tests__/differ.test.ts ADDED Viewed

@@ -0,0 +1,250 @@
+/**
+ * Tests for the line attribution algorithm in src/attribution/differ.ts.
+ *
+ * The algorithm is the core of claude-attribution: it determines whether each
+ * committed line was written by AI (Claude), a human, or is "mixed" (Claude
+ * wrote it but the human modified it before committing).
+ *
+ * Run with: bun test
+ */
+import { test, expect, describe } from "bun:test";
+import {
+	attributeLines,
+	aggregateTotals,
+	hashLine,
+} from "../attribution/differ.ts";
+// ---------------------------------------------------------------------------
+// hashLine
+// ---------------------------------------------------------------------------
+describe("hashLine", () => {
+	test("returns 16-character hex string", () => {
+		const h = hashLine("hello world");
+		expect(h).toHaveLength(16);
+		expect(h).toMatch(/^[0-9a-f]+$/);
+	});
+	test("trims whitespace before hashing", () => {
+		expect(hashLine("  hello  ")).toBe(hashLine("hello"));
+		expect(hashLine("\thello\t")).toBe(hashLine("hello"));
+	});
+	test("different content produces different hashes", () => {
+		expect(hashLine("foo")).not.toBe(hashLine("bar"));
+	});
+	test("empty string produces consistent hash", () => {
+		expect(hashLine("")).toBe(hashLine("  "));
+	});
+});
+// ---------------------------------------------------------------------------
+// attributeLines — basic classification
+// ---------------------------------------------------------------------------
+describe("attributeLines — basic classification", () => {
+	test("line in after but not before → AI", () => {
+		const before: string[] = [];
+		const after = ["const x = 1;"];
+		const committed = ["const x = 1;"];
+		const { attribution, stats } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("AI");
+		expect(stats.ai).toBe(1);
+		expect(stats.human).toBe(0);
+	});
+	test("line in both before and after → HUMAN (pre-existing)", () => {
+		const before = ["const x = 1;"];
+		const after = ["const x = 1;", "const y = 2;"];
+		const committed = ["const x = 1;"]; // only the pre-existing line committed
+		const { attribution, stats } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("HUMAN");
+		expect(stats.human).toBe(1);
+		expect(stats.ai).toBe(0);
+	});
+	test("line not in after, same position as AI line → MIXED (human replaced Claude's line)", () => {
+		// Claude wrote "const x = 1;" at position 0.
+		// Human replaced it with "const y = 2;" before committing.
+		// The committed line doesn't match after-snapshot, but after-snapshot[0] is
+		// an AI line that differs → classified as MIXED (Claude wrote something here, human changed it).
+		const before: string[] = [];
+		const after = ["const x = 1;"];
+		const committed = ["const y = 2;"]; // human replaced Claude's line
+		const { attribution } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("MIXED");
+	});
+	test("line not in after, committed file longer than after-snapshot → HUMAN", () => {
+		// When the committed file has more lines than the after-snapshot, the
+		// positional MIXED check doesn't fire for out-of-bounds positions.
+		const before: string[] = [];
+		const after = ["const x = 1;"]; // only 1 line
+		// Human committed 2 lines — position 1 is beyond after-snapshot length
+		const committed = ["const x = 1;", "human added this"];
+		const { attribution } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("AI"); // position 0 matches after-snapshot exactly
+		expect(attribution[1]).toBe("HUMAN"); // position 1 is beyond after-snapshot — no MIXED
+	});
+	test("empty lines always → HUMAN regardless of snapshot", () => {
+		const before: string[] = [];
+		const after = ["", "const x = 1;", ""];
+		const committed = ["", "const x = 1;", ""];
+		const { attribution } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("HUMAN"); // empty line
+		expect(attribution[1]).toBe("AI"); // real content Claude wrote
+		expect(attribution[2]).toBe("HUMAN"); // empty line
+	});
+});
+// ---------------------------------------------------------------------------
+// attributeLines — all-AI and all-HUMAN scenarios
+// ---------------------------------------------------------------------------
+describe("attributeLines — all-AI and all-HUMAN", () => {
+	test("all-AI: before empty, after matches committed", () => {
+		const before: string[] = [];
+		const after = ["line 1", "line 2", "line 3"];
+		const committed = ["line 1", "line 2", "line 3"];
+		const { stats } = attributeLines(before, after, committed);
+		expect(stats.ai).toBe(3);
+		expect(stats.human).toBe(0);
+		expect(stats.mixed).toBe(0);
+		expect(stats.pctAi).toBe(100);
+	});
+	test("all-HUMAN: empty before and after (no Claude involvement)", () => {
+		const before: string[] = [];
+		const after: string[] = [];
+		const committed = ["human line 1", "human line 2"];
+		const { stats } = attributeLines(before, after, committed);
+		expect(stats.ai).toBe(0);
+		expect(stats.human).toBe(2);
+		expect(stats.pctAi).toBe(0);
+	});
+	test("all-HUMAN: committed lines all existed before Claude touched the file", () => {
+		const before = ["existing line 1", "existing line 2"];
+		const after = ["existing line 1", "existing line 2", "new line"];
+		const committed = ["existing line 1", "existing line 2"]; // human reverted Claude's addition
+		const { stats } = attributeLines(before, after, committed);
+		expect(stats.ai).toBe(0);
+		expect(stats.human).toBe(2);
+	});
+});
+// ---------------------------------------------------------------------------
+// attributeLines — MIXED detection (best-effort positional)
+// ---------------------------------------------------------------------------
+describe("attributeLines — MIXED detection", () => {
+	test("Claude wrote line at position i, human changed it → MIXED", () => {
+		// Claude wrote "const x = 1;" at position 0
+		// Human changed it to "const x = 42;" before committing
+		const before: string[] = [];
+		const after = ["const x = 1;"];
+		const committed = ["const x = 42;"]; // human modified
+		const { attribution } = attributeLines(before, after, committed);
+		// The committed hash doesn't match after-snapshot (HUMAN classification),
+		// but after-snapshot[0] was an AI line that differs from committed[0] → MIXED
+		expect(attribution[0]).toBe("MIXED");
+	});
+	test("no MIXED when committed line matches AI line exactly", () => {
+		const before: string[] = [];
+		const after = ["const x = 1;"];
+		const committed = ["const x = 1;"];
+		const { attribution } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("AI"); // not MIXED — committed matches exactly
+	});
+});
+// ---------------------------------------------------------------------------
+// attributeLines — identical-line limitation
+// ---------------------------------------------------------------------------
+describe("attributeLines — identical-line limitation (known behavior)", () => {
+	test("identical content in after but not before → conservatively attributed as AI", () => {
+		// This documents the known limitation: if a human writes the same line as
+		// Claude, the algorithm attributes it as AI because the hash matches.
+		// This is a conservative bias toward AI for identical content.
+		const before: string[] = [];
+		// Claude's after-snapshot contains "}"
+		const after = ["}"];
+		// Human also writes "}" (same content) — indistinguishable from Claude's line
+		const committed = ["}"];
+		const { attribution } = attributeLines(before, after, committed);
+		// Correctly documented: attributed as AI, even if human wrote it
+		expect(attribution[0]).toBe("AI");
+		// This is the known limitation: identical content = same hash = AI attribution
+	});
+	test("if the same line exists in before AND after, it stays HUMAN", () => {
+		// If the line existed before Claude touched the file, it's unambiguously HUMAN
+		const before = ["}"];
+		const after = ["}", "const x = 1;"];
+		const committed = ["}"];
+		const { attribution } = attributeLines(before, after, committed);
+		expect(attribution[0]).toBe("HUMAN");
+	});
+});
+// ---------------------------------------------------------------------------
+// attributeLines — stats shape
+// ---------------------------------------------------------------------------
+describe("attributeLines — stats shape", () => {
+	test("pctAi rounds correctly", () => {
+		// 1 AI out of 3 total = 33%
+		const before: string[] = [];
+		const after = ["ai line"];
+		const committed = ["ai line", "human line 1", "human line 2"];
+		const { stats } = attributeLines(before, after, committed);
+		expect(stats.ai).toBe(1);
+		expect(stats.human).toBe(2);
+		expect(stats.pctAi).toBe(33);
+	});
+	test("pctAi is 0 for empty file", () => {
+		const { stats } = attributeLines([], [], []);
+		expect(stats.pctAi).toBe(0);
+		expect(stats.total).toBe(0);
+	});
+	test("stats.path is empty string (caller sets it)", () => {
+		const { stats } = attributeLines([], ["line"], ["line"]);
+		expect(stats.path).toBe("");
+	});
+});
+// ---------------------------------------------------------------------------
+// aggregateTotals
+// ---------------------------------------------------------------------------
+describe("aggregateTotals", () => {
+	test("sums ai, human, mixed, total across files", () => {
+		const files = [
+			{ path: "a.ts", ai: 10, human: 5, mixed: 1, total: 16, pctAi: 62 },
+			{ path: "b.ts", ai: 20, human: 10, mixed: 2, total: 32, pctAi: 62 },
+		];
+		const totals = aggregateTotals(files);
+		expect(totals.ai).toBe(30);
+		expect(totals.human).toBe(15);
+		expect(totals.mixed).toBe(3);
+		expect(totals.total).toBe(48);
+		// Math.round(30/48*100) = Math.round(62.5) = 63 (JS rounds half-up)
+		expect(totals.pctAi).toBe(63);
+	});
+	test("returns zero pctAi for empty file list", () => {
+		const totals = aggregateTotals([]);
+		expect(totals.pctAi).toBe(0);
+		expect(totals.total).toBe(0);
+	});
+	test("single file — totals equal that file's stats", () => {
+		const files = [
+			{ path: "a.ts", ai: 5, human: 3, mixed: 0, total: 8, pctAi: 62 },
+		];
+		const totals = aggregateTotals(files);
+		expect(totals.ai).toBe(5);
+		expect(totals.human).toBe(3);
+		// Math.round(5/8*100) = Math.round(62.5) = 63 (JS rounds half-up)
+		expect(totals.pctAi).toBe(63);
+	});
+});

package/src/attribution/checkpoint.ts ADDED Viewed

@@ -0,0 +1,148 @@
+/**
+ * Checkpoint storage for claude-attribution.
+ *
+ * A "checkpoint" is a snapshot of a file's line content saved to disk so that
+ * the post-commit hook can compare Claude's writes against the committed state.
+ *
+ * Two checkpoint types per file per session:
+ *   - "before": file content before Claude's first edit (saved by pre-tool-use.ts)
+ *   - "after":  file content after Claude's last edit (saved by post-tool-use.ts)
+ *
+ * Checkpoints are stored in /tmp/claude-attribution/<session_id>/<file_hash>.<type>.json
+ * and survive session close so commits can be attributed after Claude is closed.
+ * The OS clears /tmp on reboot; stale checkpoints are harmless.
+ */
+import { createHash } from "crypto";
+import { chmod, mkdir, readFile, writeFile } from "fs/promises";
+import { existsSync } from "fs";
+import { dirname, join } from "path";
+const CHECKPOINT_BASE = "/tmp/claude-attribution";
+/**
+ * Allowed characters in session_id values used as path components.
+ * Prevents path traversal attacks (e.g., session_id = "../../etc/passwd").
+ * Claude Code session IDs are UUID-like strings with hyphens.
+ */
+export const SESSION_ID_RE = /^[a-zA-Z0-9_-]{1,128}$/;
+export function validateSessionId(sessionId: string): void {
+	if (!SESSION_ID_RE.test(sessionId)) {
+		throw new Error(
+			`Invalid session_id: ${JSON.stringify(sessionId.slice(0, 50))}`,
+		);
+	}
+}
+export interface CheckpointData {
+	filePath: string;
+	lines: string[];
+	timestamp: string;
+}
+/** Stable hash of an absolute file path → short hex string used as filename. */
+function filePathKey(filePath: string): string {
+	return createHash("sha256").update(filePath).digest("hex").slice(0, 16);
+}
+function checkpointPath(
+	sessionId: string,
+	filePath: string,
+	type: "before" | "after",
+): string {
+	return join(
+		CHECKPOINT_BASE,
+		sessionId,
+		`${filePathKey(filePath)}.${type}.json`,
+	);
+}
+/**
+ * Read the current file content and save it as a before/after checkpoint.
+ *
+ * The checkpoint directory is created with mode 0700 so other users on the
+ * same machine cannot read another developer's file snapshots.
+ */
+export async function saveCheckpoint(
+	sessionId: string,
+	filePath: string,
+	type: "before" | "after",
+): Promise<void> {
+	validateSessionId(sessionId);
+	const content = existsSync(filePath) ? await readFile(filePath, "utf8") : "";
+	const data: CheckpointData = {
+		filePath,
+		lines: content.split("\n"),
+		timestamp: new Date().toISOString(),
+	};
+	const dest = checkpointPath(sessionId, filePath, type);
+	const dir = dirname(dest);
+	// Ensure the base directory exists first (no mode — it's not sensitive).
+	await mkdir(CHECKPOINT_BASE, { recursive: true });
+	// Create the session directory with mode 0o700 atomically so no race window
+	// exists between creation and permission-setting.  When the directory already
+	// exists (e.g., a prior session or an upgrade from an older installation that
+	// used a permissive umask), Node/Bun does NOT apply the mode option, so we
+	// explicitly chmod afterwards to enforce the security invariant on existing dirs.
+	const sessionDir = join(CHECKPOINT_BASE, sessionId);
+	await mkdir(sessionDir, { recursive: true, mode: 0o700 });
+	await chmod(sessionDir, 0o700);
+	await mkdir(dir, { recursive: true, mode: 0o700 });
+	// Compact JSON (no indentation) — these files are machine-read only
+	await writeFile(dest, JSON.stringify(data));
+}
+/** Load a checkpoint. Returns null if it doesn't exist. */
+export async function loadCheckpoint(
+	sessionId: string,
+	filePath: string,
+	type: "before" | "after",
+): Promise<CheckpointData | null> {
+	validateSessionId(sessionId);
+	const src = checkpointPath(sessionId, filePath, type);
+	if (!existsSync(src)) return null;
+	const raw = await readFile(src, "utf8");
+	return JSON.parse(raw) as CheckpointData;
+}
+/**
+ * Remove all checkpoints for a session.
+ *
+ * NOTE: This is intentionally NOT called from stop.ts (the SessionEnd hook).
+ * Checkpoints must survive session close because a developer may close Claude
+ * Code before committing. If they were deleted here, the next commit would
+ * show 0% AI attribution. The OS clears /tmp on reboot, which is sufficient.
+ *
+ * This export is kept for potential use by an explicit cleanup command.
+ */
+export async function clearCheckpoints(sessionId: string): Promise<void> {
+	validateSessionId(sessionId);
+	const sessionDir = join(CHECKPOINT_BASE, sessionId);
+	if (!existsSync(sessionDir)) return;
+	const { rm } = await import("fs/promises");
+	await rm(sessionDir, { recursive: true, force: true });
+}
+/** Write the active session ID into the repo's attribution-state directory. */
+export async function writeCurrentSession(
+	repoRoot: string,
+	sessionId: string,
+): Promise<void> {
+	const stateDir = join(repoRoot, ".claude", "attribution-state");
+	await mkdir(stateDir, { recursive: true });
+	await writeFile(join(stateDir, "current-session"), sessionId);
+}
+/** Read the active session ID from the repo's attribution-state directory. */
+export async function readCurrentSession(
+	repoRoot: string,
+): Promise<string | null> {
+	const statePath = join(
+		repoRoot,
+		".claude",
+		"attribution-state",
+		"current-session",
+	);
+	if (!existsSync(statePath)) return null;
+	return (await readFile(statePath, "utf8")).trim() || null;
+}

package/src/attribution/commit.ts ADDED Viewed

@@ -0,0 +1,163 @@
+/**
+ * Post-commit attribution runner.
+ *
+ * Called by the .git/hooks/post-commit hook after every `git commit`.
+ * Usage: <attribution-root>/src/run.sh <attribution-root>/src/attribution/commit.ts
+ *
+ * Pipeline:
+ *   1. Read the active session ID from .claude/attribution-state/current-session
+ *   2. Get the HEAD commit SHA and list of changed files
+ *   3. For each changed file (in parallel):
+ *      a. Read the committed file content via `git show HEAD:<path>`
+ *      b. Load the before/after checkpoints from /tmp/claude-attribution/<session>/
+ *      c. Run attributeLines(before, after, committed) → AI / HUMAN / MIXED per line
+ *      d. If no checkpoint exists → all HUMAN (file not touched in this session)
+ *   4. Write the AttributionResult as a git note under refs/notes/claude-attribution
+ *   5. Append a summary to .claude/logs/attribution.jsonl
+ *   6. Print a one-line summary to stdout
+ *
+ * This hook runs asynchronously after the commit is recorded — it cannot block the commit.
+ * All errors are caught and logged; the process always exits 0.
+ */
+import { resolve, join } from "path";
+import { mkdir, appendFile } from "fs/promises";
+import { loadCheckpoint, readCurrentSession } from "./checkpoint.ts";
+import {
+	attributeLines,
+	aggregateTotals,
+	type AttributionResult,
+	type FileAttribution,
+} from "./differ.ts";
+import {
+	writeNote,
+	headSha,
+	filesInCommit,
+	committedContent,
+	currentBranch,
+} from "./git-notes.ts";
+import {
+	otelEndpoint,
+	otelHeaders,
+	readOtelContext,
+	buildSessionSpan,
+	exportOtlpSpans,
+	clearOtelContext,
+} from "./otel.ts";
+async function main() {
+	const repoRoot = resolve(process.cwd());
+	const sessionId = await readCurrentSession(repoRoot);
+	const [sha, branch, changedFiles] = await Promise.all([
+		headSha(repoRoot),
+		currentBranch(repoRoot),
+		filesInCommit(repoRoot),
+	]);
+	// Process files in parallel — each file attribution is independent
+	const fileResults = (
+		await Promise.all(
+			changedFiles.map(async (relPath): Promise<FileAttribution | null> => {
+				const absPath = join(repoRoot, relPath);
+				const committed = await committedContent(repoRoot, relPath);
+				// Deleted file — skip attribution
+				if (committed === null) return null;
+				// Binary file — null bytes indicate binary content; line-splitting produces garbage
+				if (committed.includes("\0")) return null;
+				const committedLines = committed.split("\n");
+				if (!sessionId) {
+					// No active Claude session — everything is HUMAN
+					return {
+						path: relPath,
+						ai: 0,
+						human: committedLines.length,
+						mixed: 0,
+						total: committedLines.length,
+						pctAi: 0,
+					};
+				}
+				const before = await loadCheckpoint(sessionId, absPath, "before");
+				const after = await loadCheckpoint(sessionId, absPath, "after");
+				if (!after) {
+					// No Claude checkpoint for this file — all HUMAN
+					return {
+						path: relPath,
+						ai: 0,
+						human: committedLines.length,
+						mixed: 0,
+						total: committedLines.length,
+						pctAi: 0,
+					};
+				}
+				const beforeLines = before?.lines ?? [];
+				const { stats } = attributeLines(
+					beforeLines,
+					after.lines,
+					committedLines,
+				);
+				return { ...stats, path: relPath };
+			}),
+		)
+	).filter((r): r is FileAttribution => r !== null);
+	const result: AttributionResult = {
+		commit: sha,
+		session: sessionId,
+		branch,
+		timestamp: new Date().toISOString(),
+		files: fileResults,
+		totals: aggregateTotals(fileResults),
+	};
+	// Write git note
+	await writeNote(result, repoRoot);
+	// Append to local log
+	const logDir = join(repoRoot, ".claude", "logs");
+	await mkdir(logDir, { recursive: true });
+	await appendFile(
+		join(logDir, "attribution.jsonl"),
+		JSON.stringify(result) + "\n",
+	);
+	const { totals } = result;
+	if (totals.total > 0) {
+		console.log(
+			`[claude-attribution] ${sha.slice(0, 7)} — ${totals.ai} AI / ${totals.human} human / ${totals.mixed} mixed lines (${totals.pctAi}% AI)`,
+		);
+	}
+	// OTel: close the root session span and export it
+	const endpoint = otelEndpoint();
+	if (endpoint) {
+		try {
+			const ctx = await readOtelContext(repoRoot);
+			if (ctx) {
+				const span = buildSessionSpan(
+					ctx,
+					sha,
+					branch,
+					totals,
+					new Date().toISOString(),
+				);
+				await exportOtlpSpans([span], endpoint, otelHeaders());
+				await clearOtelContext(repoRoot);
+			}
+		} catch {
+			// Silent — never block the commit
+		}
+	}
+}
+main().catch((err) => {
+	// Never block the commit — soft fail
+	console.error("[claude-attribution] post-commit error:", err);
+	process.exit(0);
+});