ai-code-stats 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. ai_code_stats/__init__.py +15 -0
  2. ai_code_stats/agents/__init__.py +1 -0
  3. ai_code_stats/agents/base.py +40 -0
  4. ai_code_stats/agents/claude_code.py +95 -0
  5. ai_code_stats/agents/codex.py +174 -0
  6. ai_code_stats/agents/registry.py +25 -0
  7. ai_code_stats/attribution.py +141 -0
  8. ai_code_stats/classify.py +203 -0
  9. ai_code_stats/cli.py +216 -0
  10. ai_code_stats/config.py +171 -0
  11. ai_code_stats/diffutil.py +96 -0
  12. ai_code_stats/githook/__init__.py +1 -0
  13. ai_code_stats/githook/post_commit.py +214 -0
  14. ai_code_stats/gitutil.py +51 -0
  15. ai_code_stats/hooks/__init__.py +1 -0
  16. ai_code_stats/hooks/session_event.py +14 -0
  17. ai_code_stats/hooks/tool_event.py +141 -0
  18. ai_code_stats/identity.py +89 -0
  19. ai_code_stats/install/__init__.py +5 -0
  20. ai_code_stats/install/agent_install.py +182 -0
  21. ai_code_stats/install/git_install.py +114 -0
  22. ai_code_stats/models.py +237 -0
  23. ai_code_stats/paths.py +85 -0
  24. ai_code_stats/py.typed +0 -0
  25. ai_code_stats/reporters/__init__.py +1 -0
  26. ai_code_stats/reporters/base.py +60 -0
  27. ai_code_stats/reporters/command.py +45 -0
  28. ai_code_stats/reporters/http_webhook.py +79 -0
  29. ai_code_stats/reporters/json_file.py +24 -0
  30. ai_code_stats/reporters/registry.py +104 -0
  31. ai_code_stats/storage.py +119 -0
  32. ai_code_stats/tokens.py +68 -0
  33. ai_code_stats/util.py +39 -0
  34. ai_code_stats-0.1.0.dist-info/METADATA +179 -0
  35. ai_code_stats-0.1.0.dist-info/RECORD +38 -0
  36. ai_code_stats-0.1.0.dist-info/WHEEL +5 -0
  37. ai_code_stats-0.1.0.dist-info/entry_points.txt +2 -0
  38. ai_code_stats-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,45 @@
1
+ """自定义命令 Reporter:启动用户指定的进程,信封 JSON 经 stdin 传入。
2
+
3
+ 最灵活的逃生口——可对接任何能读 stdin 的脚本(推送 Kafka、写数据库、转发等)。
4
+ 信封同时通过环境变量 ``AI_CODE_STATS_EVENT`` 提供。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import subprocess
12
+ from typing import Any, Dict, List
13
+
14
+ from .base import Reporter, ReportResult
15
+
16
+
17
+ class CommandReporter(Reporter):
18
+ type_name = "command"
19
+
20
+ def send(self, envelope: Dict[str, Any]) -> ReportResult:
21
+ argv = self.options.get("argv")
22
+ if not argv or not isinstance(argv, list):
23
+ return ReportResult(ok=False, detail="command 缺少 argv(字符串数组)")
24
+
25
+ payload = json.dumps(envelope, ensure_ascii=False)
26
+ env = dict(os.environ)
27
+ env["AI_CODE_STATS_EVENT"] = payload
28
+ timeout = float(self.options.get("timeout", 30))
29
+ argv_str: List[str] = [str(a) for a in argv]
30
+ try:
31
+ proc = subprocess.run(
32
+ argv_str,
33
+ input=payload,
34
+ text=True,
35
+ capture_output=True,
36
+ env=env,
37
+ timeout=timeout,
38
+ )
39
+ except FileNotFoundError:
40
+ return ReportResult(ok=False, detail=f"找不到命令: {argv_str[0]}")
41
+ except subprocess.TimeoutExpired:
42
+ return ReportResult(ok=False, detail="命令超时")
43
+ if proc.returncode != 0:
44
+ return ReportResult(ok=False, detail=f"退出码 {proc.returncode}: {proc.stderr[:200]}")
45
+ return ReportResult(ok=True, detail="ok")
@@ -0,0 +1,79 @@
1
+ """HTTP Webhook Reporter:POST JSON 到可配置 URL。
2
+
3
+ - 有 ``requests`` 时用之;否则回退到标准库 ``urllib``,零额外依赖也能跑。
4
+ - ``mapping`` 可把信封映射成任意后端期望的扁平结构,适配多种协议。
5
+ - ``headers`` 支持 ``${ENV:TOKEN}`` 注入(在配置加载阶段已插值)。
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import time
12
+ import urllib.error
13
+ import urllib.request
14
+ from typing import Any, Dict
15
+
16
+ from .base import Reporter, ReportResult, apply_mapping
17
+
18
+
19
+ class HttpWebhookReporter(Reporter):
20
+ type_name = "http_webhook"
21
+
22
+ def send(self, envelope: Dict[str, Any]) -> ReportResult:
23
+ url = self.options.get("url")
24
+ if not url:
25
+ return ReportResult(ok=False, detail="http_webhook 缺少 url")
26
+
27
+ method = (self.options.get("method") or "POST").upper()
28
+ headers = dict(self.options.get("headers") or {})
29
+ headers.setdefault("Content-Type", "application/json")
30
+ timeout = float(self.options.get("timeout", 10))
31
+ verify = bool(self.options.get("verify", True))
32
+ retries = int(self.options.get("retries", 2))
33
+ backoff = float(self.options.get("backoff", 0.5))
34
+
35
+ mapping = self.options.get("mapping")
36
+ payload = apply_mapping(envelope, mapping) if mapping else envelope
37
+ body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
38
+
39
+ last_detail = ""
40
+ for attempt in range(retries + 1):
41
+ ok, last_detail = self._post(url, method, headers, body, timeout, verify)
42
+ if ok:
43
+ return ReportResult(ok=True, detail=f"{url} ({last_detail})")
44
+ if attempt < retries:
45
+ time.sleep(backoff * (2 ** attempt))
46
+ return ReportResult(ok=False, detail=f"{url}: {last_detail}")
47
+
48
+ def _post(self, url, method, headers, body, timeout, verify):
49
+ try:
50
+ import requests # type: ignore
51
+
52
+ resp = requests.request(
53
+ method, url, data=body, headers=headers, timeout=timeout, verify=verify
54
+ )
55
+ if 200 <= resp.status_code < 300:
56
+ return True, f"HTTP {resp.status_code}"
57
+ return False, f"HTTP {resp.status_code}: {resp.text[:200]}"
58
+ except ImportError:
59
+ return self._post_urllib(url, method, headers, body, timeout, verify)
60
+ except Exception as exc: # noqa: BLE001
61
+ return False, str(exc)
62
+
63
+ def _post_urllib(self, url, method, headers, body, timeout, verify):
64
+ req = urllib.request.Request(url, data=body, headers=headers, method=method)
65
+ ctx = None
66
+ if not verify:
67
+ import ssl
68
+
69
+ ctx = ssl.create_default_context()
70
+ ctx.check_hostname = False
71
+ ctx.verify_mode = ssl.CERT_NONE
72
+ try:
73
+ with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
74
+ code = resp.getcode()
75
+ return (200 <= code < 300), f"HTTP {code}"
76
+ except urllib.error.HTTPError as exc:
77
+ return False, f"HTTP {exc.code}: {exc.reason}"
78
+ except Exception as exc: # noqa: BLE001
79
+ return False, str(exc)
@@ -0,0 +1,24 @@
1
+ """本地 JSON 文件 Reporter:把信封追加为 JSONL。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Dict
8
+
9
+ from .base import Reporter, ReportResult, render_path
10
+
11
+
12
+ class JsonFileReporter(Reporter):
13
+ type_name = "json_file"
14
+
15
+ def send(self, envelope: Dict[str, Any]) -> ReportResult:
16
+ template = self.options.get("path", "{repo_data}/reports.jsonl")
17
+ path = Path(render_path(template, self.context)).expanduser()
18
+ try:
19
+ path.parent.mkdir(parents=True, exist_ok=True)
20
+ with open(path, "a", encoding="utf-8") as fh:
21
+ fh.write(json.dumps(envelope, ensure_ascii=False) + "\n")
22
+ except OSError as exc:
23
+ return ReportResult(ok=False, detail=f"写文件失败 {path}: {exc}")
24
+ return ReportResult(ok=True, detail=str(path))
@@ -0,0 +1,104 @@
1
+ """Reporter 注册表、派发与失败重试队列。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Tuple, Type
8
+
9
+ from .. import paths
10
+ from ..config import interpolate_env
11
+ from .base import Reporter, ReporterContext, ReportResult
12
+ from .command import CommandReporter
13
+ from .http_webhook import HttpWebhookReporter
14
+ from .json_file import JsonFileReporter
15
+
16
+ REPORTER_TYPES: Dict[str, Type[Reporter]] = {
17
+ JsonFileReporter.type_name: JsonFileReporter,
18
+ HttpWebhookReporter.type_name: HttpWebhookReporter,
19
+ CommandReporter.type_name: CommandReporter,
20
+ }
21
+
22
+ RETRY_FILE = "retry_queue.jsonl"
23
+
24
+
25
+ def build_reporter(cfg: Dict[str, Any], context: ReporterContext) -> Optional[Reporter]:
26
+ rtype = cfg.get("type")
27
+ cls = REPORTER_TYPES.get(rtype)
28
+ if cls is None:
29
+ return None
30
+ options = {k: v for k, v in cfg.items() if k != "type"}
31
+ options = interpolate_env(options)
32
+ return cls(options=options, context=context)
33
+
34
+
35
+ def dispatch(
36
+ envelope: Dict[str, Any],
37
+ reporter_configs: List[Dict[str, Any]],
38
+ context: ReporterContext,
39
+ enqueue_on_failure: bool = True,
40
+ ) -> List[Tuple[str, ReportResult]]:
41
+ """把信封派发给所有 Reporter;失败的可入重试队列。返回 (type, result) 列表。"""
42
+ results: List[Tuple[str, ReportResult]] = []
43
+ for cfg in reporter_configs or []:
44
+ reporter = build_reporter(cfg, context)
45
+ if reporter is None:
46
+ results.append((str(cfg.get("type")), ReportResult(False, "未知 reporter 类型")))
47
+ continue
48
+ try:
49
+ res = reporter.send(envelope)
50
+ except Exception as exc: # noqa: BLE001
51
+ res = ReportResult(False, f"异常: {exc}")
52
+ results.append((reporter.type_name, res))
53
+ if not res.ok and enqueue_on_failure:
54
+ _enqueue_retry(cfg, envelope)
55
+ return results
56
+
57
+
58
+ def _retry_path() -> Path:
59
+ return paths.ensure_dir(paths.user_data_dir()) / RETRY_FILE
60
+
61
+
62
+ def _enqueue_retry(cfg: Dict[str, Any], envelope: Dict[str, Any]) -> None:
63
+ try:
64
+ with open(_retry_path(), "a", encoding="utf-8") as fh:
65
+ fh.write(json.dumps({"reporter": cfg, "envelope": envelope}, ensure_ascii=False) + "\n")
66
+ except OSError:
67
+ pass
68
+
69
+
70
+ def flush_retries(context: ReporterContext) -> Tuple[int, int]:
71
+ """重试队列里的失败项;成功的丢弃,失败的保留。返回 (成功数, 剩余数)。"""
72
+ path = _retry_path()
73
+ if not path.exists():
74
+ return (0, 0)
75
+ items: List[Dict[str, Any]] = []
76
+ with open(path, "r", encoding="utf-8") as fh:
77
+ for line in fh:
78
+ line = line.strip()
79
+ if not line:
80
+ continue
81
+ try:
82
+ items.append(json.loads(line))
83
+ except json.JSONDecodeError:
84
+ continue
85
+
86
+ succeeded = 0
87
+ remaining: List[Dict[str, Any]] = []
88
+ for item in items:
89
+ reporter = build_reporter(item.get("reporter", {}), context)
90
+ if reporter is None:
91
+ continue
92
+ try:
93
+ res = reporter.send(item.get("envelope", {}))
94
+ except Exception: # noqa: BLE001
95
+ res = ReportResult(False, "异常")
96
+ if res.ok:
97
+ succeeded += 1
98
+ else:
99
+ remaining.append(item)
100
+
101
+ with open(path, "w", encoding="utf-8") as fh:
102
+ for item in remaining:
103
+ fh.write(json.dumps(item, ensure_ascii=False) + "\n")
104
+ return (succeeded, len(remaining))
@@ -0,0 +1,119 @@
1
+ """每仓库的事件存储与消费游标。
2
+
3
+ 目录布局(``<repo>/.git/ai-code-stats/``):
4
+ pending.jsonl —— 尚未被任何 commit 消费的 AI 编辑事件
5
+ consumed.jsonl —— 已被某次 commit 归因消费的事件(审计用)
6
+ token_snapshots.json —— 各 session 上次读到的 token 累计值(用于算增量)
7
+ reports.jsonl —— 默认 json_file Reporter 的产出(由 Reporter 写)
8
+
9
+ 采用「append + 重写」的简单方案:commit 时把命中的事件移入 consumed,重写剩余 pending。
10
+ 单进程串行调用(hook 由 Agent / git 顺序触发),无需复杂锁。
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import tempfile
18
+ from pathlib import Path
19
+ from typing import Any, Dict, Iterable, List
20
+
21
+ from . import paths
22
+
23
+ PENDING_FILE = "pending.jsonl"
24
+ CONSUMED_FILE = "consumed.jsonl"
25
+ TOKEN_SNAPSHOT_FILE = "token_snapshots.json"
26
+ STATE_FILE = "state.json"
27
+
28
+
29
+ class Storage:
30
+ """绑定到某个仓库 data 目录的存储句柄。"""
31
+
32
+ def __init__(self, repo_root: Path):
33
+ self.repo_root = Path(repo_root)
34
+ self.dir = paths.repo_data_dir(self.repo_root)
35
+
36
+ # ---- pending 事件 -------------------------------------------------
37
+ def append_event(self, event: Dict[str, Any]) -> None:
38
+ paths.ensure_dir(self.dir)
39
+ line = json.dumps(event, ensure_ascii=False)
40
+ with open(self.dir / PENDING_FILE, "a", encoding="utf-8") as fh:
41
+ fh.write(line + "\n")
42
+
43
+ def read_pending(self) -> List[Dict[str, Any]]:
44
+ return _read_jsonl(self.dir / PENDING_FILE)
45
+
46
+ def consume(self, keep: Iterable[Dict[str, Any]], consumed: Iterable[Dict[str, Any]]) -> None:
47
+ """把 ``consumed`` 追加到 consumed.jsonl,并用 ``keep`` 重写 pending.jsonl。"""
48
+ paths.ensure_dir(self.dir)
49
+ consumed = list(consumed)
50
+ if consumed:
51
+ with open(self.dir / CONSUMED_FILE, "a", encoding="utf-8") as fh:
52
+ for ev in consumed:
53
+ fh.write(json.dumps(ev, ensure_ascii=False) + "\n")
54
+ _atomic_write_jsonl(self.dir / PENDING_FILE, list(keep))
55
+
56
+ # ---- token 快照 ---------------------------------------------------
57
+ def load_token_snapshots(self) -> Dict[str, Dict[str, Any]]:
58
+ path = self.dir / TOKEN_SNAPSHOT_FILE
59
+ try:
60
+ with open(path, "r", encoding="utf-8") as fh:
61
+ data = json.load(fh)
62
+ return data if isinstance(data, dict) else {}
63
+ except (FileNotFoundError, json.JSONDecodeError):
64
+ return {}
65
+
66
+ def save_token_snapshots(self, snapshots: Dict[str, Dict[str, Any]]) -> None:
67
+ paths.ensure_dir(self.dir)
68
+ path = self.dir / TOKEN_SNAPSHOT_FILE
69
+ _atomic_write(path, json.dumps(snapshots, ensure_ascii=False, indent=2))
70
+
71
+ # ---- 状态(去重游标等)-------------------------------------------
72
+ def load_state(self) -> Dict[str, Any]:
73
+ path = self.dir / STATE_FILE
74
+ try:
75
+ with open(path, "r", encoding="utf-8") as fh:
76
+ data = json.load(fh)
77
+ return data if isinstance(data, dict) else {}
78
+ except (FileNotFoundError, json.JSONDecodeError):
79
+ return {}
80
+
81
+ def save_state(self, state: Dict[str, Any]) -> None:
82
+ paths.ensure_dir(self.dir)
83
+ _atomic_write(self.dir / STATE_FILE, json.dumps(state, ensure_ascii=False, indent=2))
84
+
85
+
86
+ def _read_jsonl(path: Path) -> List[Dict[str, Any]]:
87
+ out: List[Dict[str, Any]] = []
88
+ try:
89
+ with open(path, "r", encoding="utf-8") as fh:
90
+ for line in fh:
91
+ line = line.strip()
92
+ if not line:
93
+ continue
94
+ try:
95
+ out.append(json.loads(line))
96
+ except json.JSONDecodeError:
97
+ # 容错:跳过损坏行而非整体失败。
98
+ continue
99
+ except FileNotFoundError:
100
+ return []
101
+ return out
102
+
103
+
104
+ def _atomic_write(path: Path, content: str) -> None:
105
+ """原子写:先写临时文件再 os.replace,避免 hook 中途崩溃导致半截文件。"""
106
+ paths.ensure_dir(path.parent)
107
+ fd, tmp = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
108
+ try:
109
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
110
+ fh.write(content)
111
+ os.replace(tmp, path)
112
+ finally:
113
+ if os.path.exists(tmp):
114
+ os.unlink(tmp)
115
+
116
+
117
+ def _atomic_write_jsonl(path: Path, rows: List[Dict[str, Any]]) -> None:
118
+ content = "".join(json.dumps(r, ensure_ascii=False) + "\n" for r in rows)
119
+ _atomic_write(path, content)
@@ -0,0 +1,68 @@
1
+ """Token 用量聚合(纯函数,不读文件)。
2
+
3
+ 口径说明:归属到一次 commit 的 token = 自上次 commit 以来、本仓库相关 session
4
+ 的累计 token 增量。具体的「累计读取」由各 Agent 适配器负责(读 transcript /
5
+ rollout 日志),这里只做:累计→增量、增量求和、组装 ``TokenUsage``。
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, Iterable, List
11
+
12
+ from .models import TokenUsage
13
+
14
+ USAGE_FIELDS = ("input", "output", "cache_read")
15
+
16
+
17
+ def normalize_usage(d: Dict[str, Any]) -> Dict[str, int]:
18
+ """把任意来源的 usage dict 收敛到固定字段的整数。"""
19
+ out = {f: 0 for f in USAGE_FIELDS}
20
+ if not isinstance(d, dict):
21
+ return out
22
+ for f in USAGE_FIELDS:
23
+ try:
24
+ out[f] = max(int(d.get(f, 0) or 0), 0)
25
+ except (TypeError, ValueError):
26
+ out[f] = 0
27
+ return out
28
+
29
+
30
+ def delta_usage(cumulative: Dict[str, Any], committed: Dict[str, Any]) -> Dict[str, int]:
31
+ """累计值减去上次已归属值,得到本次增量(逐字段下限 0)。"""
32
+ cur = normalize_usage(cumulative)
33
+ base = normalize_usage(committed)
34
+ return {f: max(cur[f] - base[f], 0) for f in USAGE_FIELDS}
35
+
36
+
37
+ def usage_total(usage: Dict[str, int]) -> int:
38
+ u = normalize_usage(usage)
39
+ return u["input"] + u["output"]
40
+
41
+
42
+ def build_token_usage(per_session: Iterable[Dict[str, Any]]) -> TokenUsage:
43
+ """把多个 session 的增量组装成 commit 级 ``TokenUsage``。
44
+
45
+ 每个元素形如 ``{"session_id":..., "agent":..., "usage": {input,output,cache_read}}``。
46
+ ``total`` 按 input+output 计(cache_read 不重复计入花费,但单列出来)。
47
+ """
48
+ agg = {f: 0 for f in USAGE_FIELDS}
49
+ by_session: List[Dict[str, Any]] = []
50
+ for item in per_session:
51
+ usage = normalize_usage(item.get("usage", {}))
52
+ for f in USAGE_FIELDS:
53
+ agg[f] += usage[f]
54
+ by_session.append(
55
+ {
56
+ "session_id": item.get("session_id", ""),
57
+ "agent": item.get("agent", ""),
58
+ "usage": usage,
59
+ "total": usage_total(usage),
60
+ }
61
+ )
62
+ return TokenUsage(
63
+ input=agg["input"],
64
+ output=agg["output"],
65
+ cache_read=agg["cache_read"],
66
+ total=agg["input"] + agg["output"],
67
+ by_session=by_session,
68
+ )
ai_code_stats/util.py ADDED
@@ -0,0 +1,39 @@
1
+ """通用小工具:时间戳、producer 元信息、信封封装。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import platform
6
+ import socket
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict
9
+
10
+ from . import PLUGIN_NAME, __version__
11
+ from .models import ReportEnvelope
12
+
13
+
14
+ def utcnow_iso() -> str:
15
+ """当前 UTC 时间的 ISO-8601 字符串(秒级,带 Z)。"""
16
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
17
+
18
+
19
+ def producer_info() -> Dict[str, Any]:
20
+ """上报信封里的 producer 段:标识产出方与运行环境。"""
21
+ try:
22
+ host = socket.gethostname()
23
+ except OSError: # pragma: no cover
24
+ host = ""
25
+ return {
26
+ "plugin": PLUGIN_NAME,
27
+ "version": __version__,
28
+ "host": host,
29
+ "os": platform.system().lower(), # darwin/windows/linux
30
+ }
31
+
32
+
33
+ def make_envelope(kind: str, data: Dict[str, Any]) -> ReportEnvelope:
34
+ return ReportEnvelope(
35
+ kind=kind,
36
+ data=data,
37
+ produced_at=utcnow_iso(),
38
+ producer=producer_info(),
39
+ )
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-code-stats
3
+ Version: 0.1.0
4
+ Summary: 统计 CodingAgent (Claude Code / Codex) 的 AI 代码采纳率、AI 代码行数与 token 消耗,按 git 仓库 × 提交人维度上报
5
+ Author: ai-code-stats
6
+ License: MIT
7
+ Keywords: claude-code,codex,git,metrics,ai-coding
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Provides-Extra: http
11
+ Requires-Dist: requests>=2.25; extra == "http"
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.0; extra == "dev"
14
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
15
+
16
+ # ai-code-stats
17
+
18
+ 统计 **CodingAgent(Claude Code / Codex)生成代码的采纳率、AI 代码行数与 token 消耗**,
19
+ 按 **git 仓库 × 提交人** 维度,在每次提交时上报。上报后端可插拔(HTTP / 本地文件 / 自定义命令),
20
+ 数据用带版本的 JSON Schema 定义,跨 macOS / Windows / Linux。
21
+
22
+ > 📖 **完整使用说明(安装/配置/上报示例/排查)见 [docs/USAGE.md](docs/USAGE.md)。**
23
+
24
+ ## 它能回答什么
25
+
26
+ - 这次提交里 **AI 写了多少行**、**人最终采纳了多少**(采纳率)。
27
+ - 每次提交的 **总代码行数 / AI 代码行数 / AI 占比**,分「全量」和「有效代码」两种口径。
28
+ - 这次提交关联的 AI **token 消耗**(input / output / cache)。
29
+
30
+ ## 工作原理
31
+
32
+ ```
33
+ AI 编辑 (PostToolUse 钩子) git 提交 (post-commit / post-merge 钩子)
34
+ ┌─────────────────────────┐ ┌──────────────────────────────────────┐
35
+ │ 解析 Edit/Write/apply_patch│ │ 取 commit 变更(含重命名检测) │
36
+ │ 新增行 → 归一化 + 哈希 │ ───▶ │ 与窗口内 AI 指纹做「多重集消费式匹配」 │
37
+ │ 标记是否「有效代码」 │ pending│ 算 采纳率 / AI 占比 / token │
38
+ │ 落 .git/ai-code-stats/ │ │ 组 JSON 信封 → 派发各 Reporter │
39
+ └─────────────────────────┘ └──────────────────────────────────────┘
40
+ ```
41
+
42
+ - **采纳率** = 落入本次 commit 的 AI 行数 / 窗口内 AI 生成的行数。
43
+ - **AI 占比** = 匹配到 AI 指纹的 commit 新增行 / commit 总新增行。
44
+ - 匹配基于**归一化内容哈希**,所以即使 AI 写的代码被移动到别的文件也能命中。
45
+
46
+ ## 安装
47
+
48
+ 需要 Python ≥ 3.9 与 git。
49
+
50
+ ```bash
51
+ pip install ai-code-stats # 或:pip install -e .(开发)
52
+
53
+ # 在目标仓库根目录执行,安装 git 钩子 + Claude + Codex 钩子
54
+ ai-code-stats install
55
+
56
+ # 只装某一项 / 预览不写入
57
+ ai-code-stats install --git
58
+ ai-code-stats install --claude --scope user # 写 ~/.claude/settings.json
59
+ ai-code-stats install --codex --dry-run
60
+
61
+ # 卸载(幂等,保留你自己的钩子内容)
62
+ ai-code-stats uninstall
63
+ ```
64
+
65
+ > Codex 钩子写入 `$CODEX_HOME/config.toml`(默认 `~/.codex/config.toml`)。由于 Codex 钩子
66
+ > schema 仍在演进,安装后建议 `ai-code-stats install --codex --dry-run` 核对,并确认你的
67
+ > Codex 版本支持内联 `[[hooks.PostToolUse]]`。
68
+
69
+ ## 配置
70
+
71
+ 解析顺序(后者覆盖前者):内置默认 → 用户级 `config.json` → 仓库 `.ai-code-stats.json` →
72
+ `AI_CODE_STATS_CONFIG` 指向的文件。字符串支持 `${ENV:VAR}` 注入密钥。
73
+
74
+ ```jsonc
75
+ {
76
+ "enabled": true,
77
+ "privacy": {
78
+ "store_plaintext": true, // 本地是否保留 AI 行明文(仅落在 .git/ 内)
79
+ "redact_in_reports": true // 上报只含统计数字,不含源码
80
+ },
81
+ "files": {
82
+ "include": [], // 为空=按已知代码扩展名统计;非空=只统计匹配项
83
+ "exclude": ["**/node_modules/**", "**/*.min.js", "package-lock.json"]
84
+ },
85
+ "attribution": {
86
+ "count_modes": ["raw", "effective"],
87
+ "primary": "effective", // 主指标用「有效代码」口径
88
+ "merge_strategy": "skip", // merge 提交:skip 或 first_parent
89
+ "detect_renames": true
90
+ },
91
+ "reporters": [
92
+ { "type": "json_file", "path": "{repo_data}/reports.jsonl" },
93
+ { "type": "http_webhook",
94
+ "url": "https://metrics.example.com/ingest",
95
+ "headers": { "Authorization": "Bearer ${ENV:AI_CODE_STATS_TOKEN}" },
96
+ "mapping": { // 把信封映射成任意后端 schema(点路径取值)
97
+ "repo": "data.repo_id",
98
+ "rate": "data.ai.effective.adoption_rate",
99
+ "tokens": "data.tokens.total"
100
+ }
101
+ },
102
+ { "type": "command", "argv": ["my-forwarder"] } // 信封 JSON 经 stdin 传入
103
+ ]
104
+ }
105
+ ```
106
+
107
+ ### 统计口径
108
+
109
+ - **raw(全量)**:所有新增/删除行。
110
+ - **effective(有效代码)**:剔除空行与纯注释行(按语言注释语法识别)。
111
+
112
+ ### 文件过滤
113
+
114
+ 默认只统计已知代码语言扩展名的文件,并排除 lock 文件、生成产物、vendored 目录、二进制。
115
+ 可用 `files.include` / `files.exclude`(glob,支持 `**`)定制。
116
+
117
+ ## 数据契约
118
+
119
+ `schemas/` 下三份带版本的 JSON Schema:
120
+
121
+ | Schema | 用途 |
122
+ |--------|------|
123
+ | `ai_edit_event.schema.json` | 单次 AI 编辑事件(本地暂存) |
124
+ | `commit_stat.schema.json` | 一次提交的完整统计 |
125
+ | `report_envelope.schema.json` | 上报统一信封 |
126
+
127
+ 信封示例:
128
+
129
+ ```json
130
+ {
131
+ "schema_version": "1.0",
132
+ "kind": "commit_stat",
133
+ "produced_at": "2026-06-15T08:00:00Z",
134
+ "producer": { "plugin": "ai-code-stats", "version": "0.1.0", "os": "darwin" },
135
+ "data": {
136
+ "repo_id": "github.com/org/repo",
137
+ "commit": { "sha": "…", "branch": "main", "is_merge": false },
138
+ "committer": { "name": "Dev", "email": "dev@x.com" },
139
+ "totals": { "files_changed": 2, "raw": { "lines_added": 5 }, "effective": { "lines_added": 3 } },
140
+ "ai": {
141
+ "raw": { "ai_lines_added": 4, "adoption_rate": 1.0, "ai_share_of_commit": 0.8 },
142
+ "effective": { "ai_lines_added": 3, "adoption_rate": 1.0, "ai_share_of_commit": 1.0 }
143
+ },
144
+ "tokens": { "input": 120, "output": 30, "total": 150 }
145
+ }
146
+ }
147
+ ```
148
+
149
+ ## 常用命令
150
+
151
+ ```bash
152
+ ai-code-stats status # 查看待归因事件与 token 快照
153
+ ai-code-stats report # 打印当前 HEAD 的统计信封(不发送、不消费)
154
+ ai-code-stats flush # 重试发送失败的上报队列
155
+ ```
156
+
157
+ ## 隐私
158
+
159
+ - AI 行**明文只落在仓库内 `.git/ai-code-stats/`**,不会被提交(在 `.git/` 下)。
160
+ - 上报默认 `redact_in_reports=true`,**只发统计数字**,不含源码。
161
+ - 需要更强隐私可设 `privacy.store_plaintext=false`,本地只存哈希。
162
+
163
+ ## 已知限制
164
+
165
+ - `merge` 提交默认跳过归因(diff 含合并噪声),可配 `first_parent`。
166
+ - `rebase` / `cherry-pick` / `commit --amend` 下采纳率为近似值。
167
+ - token 归属按「自上次提交以来该 session 的累计增量」估算,跨多仓库并行会有近似。
168
+
169
+ ## 开发
170
+
171
+ ```bash
172
+ PYTHONPATH=src python3 -m pytest # 运行测试
173
+ PYTHONPATH=src python3 -m ai_code_stats.cli --help
174
+ ```
175
+
176
+ 架构分层:`agents/`(Agent 适配)· `classify`(过滤/分类)· `attribution`(归因)·
177
+ `tokens`(token 聚合)· `reporters/`(可插拔上报)· `githook/`(提交统计)· `install/`(安装器)。
178
+ 新增上报后端:实现 `reporters/base.Reporter` 并在 `reporters/registry.REPORTER_TYPES` 注册。
179
+ 新增 Agent:实现 `agents/base.AgentAdapter` 并在 `agents/registry` 注册。