PyPI - ai-code-stats - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ai-code-stats 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

ai_code_stats/__init__.py +15 -0
ai_code_stats/agents/__init__.py +1 -0
ai_code_stats/agents/base.py +40 -0
ai_code_stats/agents/claude_code.py +95 -0
ai_code_stats/agents/codex.py +174 -0
ai_code_stats/agents/registry.py +25 -0
ai_code_stats/attribution.py +141 -0
ai_code_stats/classify.py +203 -0
ai_code_stats/cli.py +216 -0
ai_code_stats/config.py +171 -0
ai_code_stats/diffutil.py +96 -0
ai_code_stats/githook/__init__.py +1 -0
ai_code_stats/githook/post_commit.py +214 -0
ai_code_stats/gitutil.py +51 -0
ai_code_stats/hooks/__init__.py +1 -0
ai_code_stats/hooks/session_event.py +14 -0
ai_code_stats/hooks/tool_event.py +141 -0
ai_code_stats/identity.py +89 -0
ai_code_stats/install/__init__.py +5 -0
ai_code_stats/install/agent_install.py +182 -0
ai_code_stats/install/git_install.py +114 -0
ai_code_stats/models.py +237 -0
ai_code_stats/paths.py +85 -0
ai_code_stats/py.typed +0 -0
ai_code_stats/reporters/__init__.py +1 -0
ai_code_stats/reporters/base.py +60 -0
ai_code_stats/reporters/command.py +45 -0
ai_code_stats/reporters/http_webhook.py +79 -0
ai_code_stats/reporters/json_file.py +24 -0
ai_code_stats/reporters/registry.py +104 -0
ai_code_stats/storage.py +119 -0
ai_code_stats/tokens.py +68 -0
ai_code_stats/util.py +39 -0
ai_code_stats-0.1.0.dist-info/METADATA +179 -0
ai_code_stats-0.1.0.dist-info/RECORD +38 -0
ai_code_stats-0.1.0.dist-info/WHEEL +5 -0
ai_code_stats-0.1.0.dist-info/entry_points.txt +2 -0
ai_code_stats-0.1.0.dist-info/top_level.txt +1 -0

ai_code_stats/reporters/command.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""自定义命令 Reporter：启动用户指定的进程，信封 JSON 经 stdin 传入。
+最灵活的逃生口——可对接任何能读 stdin 的脚本（推送 Kafka、写数据库、转发等）。
+信封同时通过环境变量 ``AI_CODE_STATS_EVENT`` 提供。
+"""
+from __future__ import annotations
+import json
+import os
+import subprocess
+from typing import Any, Dict, List
+from .base import Reporter, ReportResult
+class CommandReporter(Reporter):
+    type_name = "command"
+    def send(self, envelope: Dict[str, Any]) -> ReportResult:
+        argv = self.options.get("argv")
+        if not argv or not isinstance(argv, list):
+            return ReportResult(ok=False, detail="command 缺少 argv（字符串数组）")
+        payload = json.dumps(envelope, ensure_ascii=False)
+        env = dict(os.environ)
+        env["AI_CODE_STATS_EVENT"] = payload
+        timeout = float(self.options.get("timeout", 30))
+        argv_str: List[str] = [str(a) for a in argv]
+        try:
+            proc = subprocess.run(
+                argv_str,
+                input=payload,
+                text=True,
+                capture_output=True,
+                env=env,
+                timeout=timeout,
+            )
+        except FileNotFoundError:
+            return ReportResult(ok=False, detail=f"找不到命令: {argv_str[0]}")
+        except subprocess.TimeoutExpired:
+            return ReportResult(ok=False, detail="命令超时")
+        if proc.returncode != 0:
+            return ReportResult(ok=False, detail=f"退出码 {proc.returncode}: {proc.stderr[:200]}")
+        return ReportResult(ok=True, detail="ok")

ai_code_stats/reporters/http_webhook.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""HTTP Webhook Reporter：POST JSON 到可配置 URL。
+- 有 ``requests`` 时用之；否则回退到标准库 ``urllib``，零额外依赖也能跑。
+- ``mapping`` 可把信封映射成任意后端期望的扁平结构，适配多种协议。
+- ``headers`` 支持 ``${ENV:TOKEN}`` 注入（在配置加载阶段已插值）。
+"""
+from __future__ import annotations
+import json
+import time
+import urllib.error
+import urllib.request
+from typing import Any, Dict
+from .base import Reporter, ReportResult, apply_mapping
+class HttpWebhookReporter(Reporter):
+    type_name = "http_webhook"
+    def send(self, envelope: Dict[str, Any]) -> ReportResult:
+        url = self.options.get("url")
+        if not url:
+            return ReportResult(ok=False, detail="http_webhook 缺少 url")
+        method = (self.options.get("method") or "POST").upper()
+        headers = dict(self.options.get("headers") or {})
+        headers.setdefault("Content-Type", "application/json")
+        timeout = float(self.options.get("timeout", 10))
+        verify = bool(self.options.get("verify", True))
+        retries = int(self.options.get("retries", 2))
+        backoff = float(self.options.get("backoff", 0.5))
+        mapping = self.options.get("mapping")
+        payload = apply_mapping(envelope, mapping) if mapping else envelope
+        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+        last_detail = ""
+        for attempt in range(retries + 1):
+            ok, last_detail = self._post(url, method, headers, body, timeout, verify)
+            if ok:
+                return ReportResult(ok=True, detail=f"{url} ({last_detail})")
+            if attempt < retries:
+                time.sleep(backoff * (2 ** attempt))
+        return ReportResult(ok=False, detail=f"{url}: {last_detail}")
+    def _post(self, url, method, headers, body, timeout, verify):
+        try:
+            import requests  # type: ignore
+            resp = requests.request(
+                method, url, data=body, headers=headers, timeout=timeout, verify=verify
+            )
+            if 200 <= resp.status_code < 300:
+                return True, f"HTTP {resp.status_code}"
+            return False, f"HTTP {resp.status_code}: {resp.text[:200]}"
+        except ImportError:
+            return self._post_urllib(url, method, headers, body, timeout, verify)
+        except Exception as exc:  # noqa: BLE001
+            return False, str(exc)
+    def _post_urllib(self, url, method, headers, body, timeout, verify):
+        req = urllib.request.Request(url, data=body, headers=headers, method=method)
+        ctx = None
+        if not verify:
+            import ssl
+            ctx = ssl.create_default_context()
+            ctx.check_hostname = False
+            ctx.verify_mode = ssl.CERT_NONE
+        try:
+            with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
+                code = resp.getcode()
+                return (200 <= code < 300), f"HTTP {code}"
+        except urllib.error.HTTPError as exc:
+            return False, f"HTTP {exc.code}: {exc.reason}"
+        except Exception as exc:  # noqa: BLE001
+            return False, str(exc)

ai_code_stats/reporters/json_file.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""本地 JSON 文件 Reporter：把信封追加为 JSONL。"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict
+from .base import Reporter, ReportResult, render_path
+class JsonFileReporter(Reporter):
+    type_name = "json_file"
+    def send(self, envelope: Dict[str, Any]) -> ReportResult:
+        template = self.options.get("path", "{repo_data}/reports.jsonl")
+        path = Path(render_path(template, self.context)).expanduser()
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            with open(path, "a", encoding="utf-8") as fh:
+                fh.write(json.dumps(envelope, ensure_ascii=False) + "\n")
+        except OSError as exc:
+            return ReportResult(ok=False, detail=f"写文件失败 {path}: {exc}")
+        return ReportResult(ok=True, detail=str(path))

ai_code_stats/reporters/registry.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Reporter 注册表、派发与失败重试队列。"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Type
+from .. import paths
+from ..config import interpolate_env
+from .base import Reporter, ReporterContext, ReportResult
+from .command import CommandReporter
+from .http_webhook import HttpWebhookReporter
+from .json_file import JsonFileReporter
+REPORTER_TYPES: Dict[str, Type[Reporter]] = {
+    JsonFileReporter.type_name: JsonFileReporter,
+    HttpWebhookReporter.type_name: HttpWebhookReporter,
+    CommandReporter.type_name: CommandReporter,
+}
+RETRY_FILE = "retry_queue.jsonl"
+def build_reporter(cfg: Dict[str, Any], context: ReporterContext) -> Optional[Reporter]:
+    rtype = cfg.get("type")
+    cls = REPORTER_TYPES.get(rtype)
+    if cls is None:
+        return None
+    options = {k: v for k, v in cfg.items() if k != "type"}
+    options = interpolate_env(options)
+    return cls(options=options, context=context)
+def dispatch(
+    envelope: Dict[str, Any],
+    reporter_configs: List[Dict[str, Any]],
+    context: ReporterContext,
+    enqueue_on_failure: bool = True,
+) -> List[Tuple[str, ReportResult]]:
+    """把信封派发给所有 Reporter；失败的可入重试队列。返回 (type, result) 列表。"""
+    results: List[Tuple[str, ReportResult]] = []
+    for cfg in reporter_configs or []:
+        reporter = build_reporter(cfg, context)
+        if reporter is None:
+            results.append((str(cfg.get("type")), ReportResult(False, "未知 reporter 类型")))
+            continue
+        try:
+            res = reporter.send(envelope)
+        except Exception as exc:  # noqa: BLE001
+            res = ReportResult(False, f"异常: {exc}")
+        results.append((reporter.type_name, res))
+        if not res.ok and enqueue_on_failure:
+            _enqueue_retry(cfg, envelope)
+    return results
+def _retry_path() -> Path:
+    return paths.ensure_dir(paths.user_data_dir()) / RETRY_FILE
+def _enqueue_retry(cfg: Dict[str, Any], envelope: Dict[str, Any]) -> None:
+    try:
+        with open(_retry_path(), "a", encoding="utf-8") as fh:
+            fh.write(json.dumps({"reporter": cfg, "envelope": envelope}, ensure_ascii=False) + "\n")
+    except OSError:
+        pass
+def flush_retries(context: ReporterContext) -> Tuple[int, int]:
+    """重试队列里的失败项；成功的丢弃，失败的保留。返回 (成功数, 剩余数)。"""
+    path = _retry_path()
+    if not path.exists():
+        return (0, 0)
+    items: List[Dict[str, Any]] = []
+    with open(path, "r", encoding="utf-8") as fh:
+        for line in fh:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                items.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue
+    succeeded = 0
+    remaining: List[Dict[str, Any]] = []
+    for item in items:
+        reporter = build_reporter(item.get("reporter", {}), context)
+        if reporter is None:
+            continue
+        try:
+            res = reporter.send(item.get("envelope", {}))
+        except Exception:  # noqa: BLE001
+            res = ReportResult(False, "异常")
+        if res.ok:
+            succeeded += 1
+        else:
+            remaining.append(item)
+    with open(path, "w", encoding="utf-8") as fh:
+        for item in remaining:
+            fh.write(json.dumps(item, ensure_ascii=False) + "\n")
+    return (succeeded, len(remaining))

ai_code_stats/storage.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""每仓库的事件存储与消费游标。
+目录布局（``<repo>/.git/ai-code-stats/``）：
+    pending.jsonl     —— 尚未被任何 commit 消费的 AI 编辑事件
+    consumed.jsonl    —— 已被某次 commit 归因消费的事件（审计用）
+    token_snapshots.json —— 各 session 上次读到的 token 累计值（用于算增量）
+    reports.jsonl     —— 默认 json_file Reporter 的产出（由 Reporter 写）
+采用「append + 重写」的简单方案：commit 时把命中的事件移入 consumed，重写剩余 pending。
+单进程串行调用（hook 由 Agent / git 顺序触发），无需复杂锁。
+"""
+from __future__ import annotations
+import json
+import os
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, Iterable, List
+from . import paths
+PENDING_FILE = "pending.jsonl"
+CONSUMED_FILE = "consumed.jsonl"
+TOKEN_SNAPSHOT_FILE = "token_snapshots.json"
+STATE_FILE = "state.json"
+class Storage:
+    """绑定到某个仓库 data 目录的存储句柄。"""
+    def __init__(self, repo_root: Path):
+        self.repo_root = Path(repo_root)
+        self.dir = paths.repo_data_dir(self.repo_root)
+    # ---- pending 事件 -------------------------------------------------
+    def append_event(self, event: Dict[str, Any]) -> None:
+        paths.ensure_dir(self.dir)
+        line = json.dumps(event, ensure_ascii=False)
+        with open(self.dir / PENDING_FILE, "a", encoding="utf-8") as fh:
+            fh.write(line + "\n")
+    def read_pending(self) -> List[Dict[str, Any]]:
+        return _read_jsonl(self.dir / PENDING_FILE)
+    def consume(self, keep: Iterable[Dict[str, Any]], consumed: Iterable[Dict[str, Any]]) -> None:
+        """把 ``consumed`` 追加到 consumed.jsonl，并用 ``keep`` 重写 pending.jsonl。"""
+        paths.ensure_dir(self.dir)
+        consumed = list(consumed)
+        if consumed:
+            with open(self.dir / CONSUMED_FILE, "a", encoding="utf-8") as fh:
+                for ev in consumed:
+                    fh.write(json.dumps(ev, ensure_ascii=False) + "\n")
+        _atomic_write_jsonl(self.dir / PENDING_FILE, list(keep))
+    # ---- token 快照 ---------------------------------------------------
+    def load_token_snapshots(self) -> Dict[str, Dict[str, Any]]:
+        path = self.dir / TOKEN_SNAPSHOT_FILE
+        try:
+            with open(path, "r", encoding="utf-8") as fh:
+                data = json.load(fh)
+            return data if isinstance(data, dict) else {}
+        except (FileNotFoundError, json.JSONDecodeError):
+            return {}
+    def save_token_snapshots(self, snapshots: Dict[str, Dict[str, Any]]) -> None:
+        paths.ensure_dir(self.dir)
+        path = self.dir / TOKEN_SNAPSHOT_FILE
+        _atomic_write(path, json.dumps(snapshots, ensure_ascii=False, indent=2))
+    # ---- 状态（去重游标等）-------------------------------------------
+    def load_state(self) -> Dict[str, Any]:
+        path = self.dir / STATE_FILE
+        try:
+            with open(path, "r", encoding="utf-8") as fh:
+                data = json.load(fh)
+            return data if isinstance(data, dict) else {}
+        except (FileNotFoundError, json.JSONDecodeError):
+            return {}
+    def save_state(self, state: Dict[str, Any]) -> None:
+        paths.ensure_dir(self.dir)
+        _atomic_write(self.dir / STATE_FILE, json.dumps(state, ensure_ascii=False, indent=2))
+def _read_jsonl(path: Path) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    try:
+        with open(path, "r", encoding="utf-8") as fh:
+            for line in fh:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    out.append(json.loads(line))
+                except json.JSONDecodeError:
+                    # 容错：跳过损坏行而非整体失败。
+                    continue
+    except FileNotFoundError:
+        return []
+    return out
+def _atomic_write(path: Path, content: str) -> None:
+    """原子写：先写临时文件再 os.replace，避免 hook 中途崩溃导致半截文件。"""
+    paths.ensure_dir(path.parent)
+    fd, tmp = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as fh:
+            fh.write(content)
+        os.replace(tmp, path)
+    finally:
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+def _atomic_write_jsonl(path: Path, rows: List[Dict[str, Any]]) -> None:
+    content = "".join(json.dumps(r, ensure_ascii=False) + "\n" for r in rows)
+    _atomic_write(path, content)

ai_code_stats/tokens.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Token 用量聚合（纯函数，不读文件）。
+口径说明：归属到一次 commit 的 token = 自上次 commit 以来、本仓库相关 session
+的累计 token 增量。具体的「累计读取」由各 Agent 适配器负责（读 transcript /
+rollout 日志），这里只做：累计→增量、增量求和、组装 ``TokenUsage``。
+"""
+from __future__ import annotations
+from typing import Any, Dict, Iterable, List
+from .models import TokenUsage
+USAGE_FIELDS = ("input", "output", "cache_read")
+def normalize_usage(d: Dict[str, Any]) -> Dict[str, int]:
+    """把任意来源的 usage dict 收敛到固定字段的整数。"""
+    out = {f: 0 for f in USAGE_FIELDS}
+    if not isinstance(d, dict):
+        return out
+    for f in USAGE_FIELDS:
+        try:
+            out[f] = max(int(d.get(f, 0) or 0), 0)
+        except (TypeError, ValueError):
+            out[f] = 0
+    return out
+def delta_usage(cumulative: Dict[str, Any], committed: Dict[str, Any]) -> Dict[str, int]:
+    """累计值减去上次已归属值，得到本次增量（逐字段下限 0）。"""
+    cur = normalize_usage(cumulative)
+    base = normalize_usage(committed)
+    return {f: max(cur[f] - base[f], 0) for f in USAGE_FIELDS}
+def usage_total(usage: Dict[str, int]) -> int:
+    u = normalize_usage(usage)
+    return u["input"] + u["output"]
+def build_token_usage(per_session: Iterable[Dict[str, Any]]) -> TokenUsage:
+    """把多个 session 的增量组装成 commit 级 ``TokenUsage``。
+    每个元素形如 ``{"session_id":..., "agent":..., "usage": {input,output,cache_read}}``。
+    ``total`` 按 input+output 计（cache_read 不重复计入花费，但单列出来）。
+    """
+    agg = {f: 0 for f in USAGE_FIELDS}
+    by_session: List[Dict[str, Any]] = []
+    for item in per_session:
+        usage = normalize_usage(item.get("usage", {}))
+        for f in USAGE_FIELDS:
+            agg[f] += usage[f]
+        by_session.append(
+            {
+                "session_id": item.get("session_id", ""),
+                "agent": item.get("agent", ""),
+                "usage": usage,
+                "total": usage_total(usage),
+            }
+        )
+    return TokenUsage(
+        input=agg["input"],
+        output=agg["output"],
+        cache_read=agg["cache_read"],
+        total=agg["input"] + agg["output"],
+        by_session=by_session,
+    )

ai_code_stats/util.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""通用小工具：时间戳、producer 元信息、信封封装。"""
+from __future__ import annotations
+import platform
+import socket
+from datetime import datetime, timezone
+from typing import Any, Dict
+from . import PLUGIN_NAME, __version__
+from .models import ReportEnvelope
+def utcnow_iso() -> str:
+    """当前 UTC 时间的 ISO-8601 字符串（秒级，带 Z）。"""
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+def producer_info() -> Dict[str, Any]:
+    """上报信封里的 producer 段：标识产出方与运行环境。"""
+    try:
+        host = socket.gethostname()
+    except OSError:  # pragma: no cover
+        host = ""
+    return {
+        "plugin": PLUGIN_NAME,
+        "version": __version__,
+        "host": host,
+        "os": platform.system().lower(),  # darwin/windows/linux
+    }
+def make_envelope(kind: str, data: Dict[str, Any]) -> ReportEnvelope:
+    return ReportEnvelope(
+        kind=kind,
+        data=data,
+        produced_at=utcnow_iso(),
+        producer=producer_info(),
+    )

ai_code_stats-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,179 @@
+Metadata-Version: 2.4
+Name: ai-code-stats
+Version: 0.1.0
+Summary: 统计 CodingAgent (Claude Code / Codex) 的 AI 代码采纳率、AI 代码行数与 token 消耗，按 git 仓库 × 提交人维度上报
+Author: ai-code-stats
+License: MIT
+Keywords: claude-code,codex,git,metrics,ai-coding
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Provides-Extra: http
+Requires-Dist: requests>=2.25; extra == "http"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: jsonschema>=4.0; extra == "dev"
+# ai-code-stats
+统计 **CodingAgent（Claude Code / Codex）生成代码的采纳率、AI 代码行数与 token 消耗**，
+按 **git 仓库 × 提交人** 维度，在每次提交时上报。上报后端可插拔（HTTP / 本地文件 / 自定义命令），
+数据用带版本的 JSON Schema 定义，跨 macOS / Windows / Linux。
+> 📖 **完整使用说明（安装/配置/上报示例/排查）见 [docs/USAGE.md](docs/USAGE.md)。**
+## 它能回答什么
+- 这次提交里 **AI 写了多少行**、**人最终采纳了多少**（采纳率）。
+- 每次提交的 **总代码行数 / AI 代码行数 / AI 占比**，分「全量」和「有效代码」两种口径。
+- 这次提交关联的 AI **token 消耗**（input / output / cache）。
+## 工作原理
+```
+ AI 编辑 (PostToolUse 钩子)            git 提交 (post-commit / post-merge 钩子)
+ ┌─────────────────────────┐         ┌──────────────────────────────────────┐
+ │ 解析 Edit/Write/apply_patch│        │ 取 commit 变更（含重命名检测）          │
+ │ 新增行 → 归一化 + 哈希      │  ───▶  │ 与窗口内 AI 指纹做「多重集消费式匹配」    │
+ │ 标记是否「有效代码」        │ pending│ 算 采纳率 / AI 占比 / token             │
+ │ 落 .git/ai-code-stats/      │        │ 组 JSON 信封 → 派发各 Reporter          │
+ └─────────────────────────┘         └──────────────────────────────────────┘
+```
+- **采纳率** = 落入本次 commit 的 AI 行数 / 窗口内 AI 生成的行数。
+- **AI 占比** = 匹配到 AI 指纹的 commit 新增行 / commit 总新增行。
+- 匹配基于**归一化内容哈希**，所以即使 AI 写的代码被移动到别的文件也能命中。
+## 安装
+需要 Python ≥ 3.9 与 git。
+```bash
+pip install ai-code-stats          # 或：pip install -e .（开发）
+# 在目标仓库根目录执行，安装 git 钩子 + Claude + Codex 钩子
+ai-code-stats install
+# 只装某一项 / 预览不写入
+ai-code-stats install --git
+ai-code-stats install --claude --scope user      # 写 ~/.claude/settings.json
+ai-code-stats install --codex --dry-run
+# 卸载（幂等，保留你自己的钩子内容）
+ai-code-stats uninstall
+```
+> Codex 钩子写入 `$CODEX_HOME/config.toml`（默认 `~/.codex/config.toml`）。由于 Codex 钩子
+> schema 仍在演进，安装后建议 `ai-code-stats install --codex --dry-run` 核对，并确认你的
+> Codex 版本支持内联 `[[hooks.PostToolUse]]`。
+## 配置
+解析顺序（后者覆盖前者）：内置默认 → 用户级 `config.json` → 仓库 `.ai-code-stats.json` →
+`AI_CODE_STATS_CONFIG` 指向的文件。字符串支持 `${ENV:VAR}` 注入密钥。
+```jsonc
+{
+  "enabled": true,
+  "privacy": {
+    "store_plaintext": true,     // 本地是否保留 AI 行明文（仅落在 .git/ 内）
+    "redact_in_reports": true    // 上报只含统计数字，不含源码
+  },
+  "files": {
+    "include": [],               // 为空=按已知代码扩展名统计；非空=只统计匹配项
+    "exclude": ["**/node_modules/**", "**/*.min.js", "package-lock.json"]
+  },
+  "attribution": {
+    "count_modes": ["raw", "effective"],
+    "primary": "effective",      // 主指标用「有效代码」口径
+    "merge_strategy": "skip",    // merge 提交：skip 或 first_parent
+    "detect_renames": true
+  },
+  "reporters": [
+    { "type": "json_file", "path": "{repo_data}/reports.jsonl" },
+    { "type": "http_webhook",
+      "url": "https://metrics.example.com/ingest",
+      "headers": { "Authorization": "Bearer ${ENV:AI_CODE_STATS_TOKEN}" },
+      "mapping": {                // 把信封映射成任意后端 schema（点路径取值）
+        "repo": "data.repo_id",
+        "rate": "data.ai.effective.adoption_rate",
+        "tokens": "data.tokens.total"
+      }
+    },
+    { "type": "command", "argv": ["my-forwarder"] }  // 信封 JSON 经 stdin 传入
+  ]
+}
+```
+### 统计口径
+- **raw（全量）**：所有新增/删除行。
+- **effective（有效代码）**：剔除空行与纯注释行（按语言注释语法识别）。
+### 文件过滤
+默认只统计已知代码语言扩展名的文件，并排除 lock 文件、生成产物、vendored 目录、二进制。
+可用 `files.include` / `files.exclude`（glob，支持 `**`）定制。
+## 数据契约
+`schemas/` 下三份带版本的 JSON Schema：
+| Schema | 用途 |
+|--------|------|
+| `ai_edit_event.schema.json` | 单次 AI 编辑事件（本地暂存） |
+| `commit_stat.schema.json`   | 一次提交的完整统计 |
+| `report_envelope.schema.json` | 上报统一信封 |
+信封示例：
+```json
+{
+  "schema_version": "1.0",
+  "kind": "commit_stat",
+  "produced_at": "2026-06-15T08:00:00Z",
+  "producer": { "plugin": "ai-code-stats", "version": "0.1.0", "os": "darwin" },
+  "data": {
+    "repo_id": "github.com/org/repo",
+    "commit": { "sha": "…", "branch": "main", "is_merge": false },
+    "committer": { "name": "Dev", "email": "dev@x.com" },
+    "totals": { "files_changed": 2, "raw": { "lines_added": 5 }, "effective": { "lines_added": 3 } },
+    "ai": {
+      "raw":       { "ai_lines_added": 4, "adoption_rate": 1.0, "ai_share_of_commit": 0.8 },
+      "effective": { "ai_lines_added": 3, "adoption_rate": 1.0, "ai_share_of_commit": 1.0 }
+    },
+    "tokens": { "input": 120, "output": 30, "total": 150 }
+  }
+}
+```
+## 常用命令
+```bash
+ai-code-stats status              # 查看待归因事件与 token 快照
+ai-code-stats report              # 打印当前 HEAD 的统计信封（不发送、不消费）
+ai-code-stats flush               # 重试发送失败的上报队列
+```
+## 隐私
+- AI 行**明文只落在仓库内 `.git/ai-code-stats/`**，不会被提交（在 `.git/` 下）。
+- 上报默认 `redact_in_reports=true`，**只发统计数字**，不含源码。
+- 需要更强隐私可设 `privacy.store_plaintext=false`，本地只存哈希。
+## 已知限制
+- `merge` 提交默认跳过归因（diff 含合并噪声），可配 `first_parent`。
+- `rebase` / `cherry-pick` / `commit --amend` 下采纳率为近似值。
+- token 归属按「自上次提交以来该 session 的累计增量」估算，跨多仓库并行会有近似。
+## 开发
+```bash
+PYTHONPATH=src python3 -m pytest        # 运行测试
+PYTHONPATH=src python3 -m ai_code_stats.cli --help
+```
+架构分层：`agents/`（Agent 适配）· `classify`（过滤/分类）· `attribution`（归因）·
+`tokens`（token 聚合）· `reporters/`（可插拔上报）· `githook/`（提交统计）· `install/`（安装器）。
+新增上报后端：实现 `reporters/base.Reporter` 并在 `reporters/registry.REPORTER_TYPES` 注册。
+新增 Agent：实现 `agents/base.AgentAdapter` 并在 `agents/registry` 注册。