ai-code-stats 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_code_stats/__init__.py +15 -0
- ai_code_stats/agents/__init__.py +1 -0
- ai_code_stats/agents/base.py +40 -0
- ai_code_stats/agents/claude_code.py +95 -0
- ai_code_stats/agents/codex.py +174 -0
- ai_code_stats/agents/registry.py +25 -0
- ai_code_stats/attribution.py +141 -0
- ai_code_stats/classify.py +203 -0
- ai_code_stats/cli.py +216 -0
- ai_code_stats/config.py +171 -0
- ai_code_stats/diffutil.py +96 -0
- ai_code_stats/githook/__init__.py +1 -0
- ai_code_stats/githook/post_commit.py +214 -0
- ai_code_stats/gitutil.py +51 -0
- ai_code_stats/hooks/__init__.py +1 -0
- ai_code_stats/hooks/session_event.py +14 -0
- ai_code_stats/hooks/tool_event.py +141 -0
- ai_code_stats/identity.py +89 -0
- ai_code_stats/install/__init__.py +5 -0
- ai_code_stats/install/agent_install.py +182 -0
- ai_code_stats/install/git_install.py +114 -0
- ai_code_stats/models.py +237 -0
- ai_code_stats/paths.py +85 -0
- ai_code_stats/py.typed +0 -0
- ai_code_stats/reporters/__init__.py +1 -0
- ai_code_stats/reporters/base.py +60 -0
- ai_code_stats/reporters/command.py +45 -0
- ai_code_stats/reporters/http_webhook.py +79 -0
- ai_code_stats/reporters/json_file.py +24 -0
- ai_code_stats/reporters/registry.py +104 -0
- ai_code_stats/storage.py +119 -0
- ai_code_stats/tokens.py +68 -0
- ai_code_stats/util.py +39 -0
- ai_code_stats-0.1.0.dist-info/METADATA +179 -0
- ai_code_stats-0.1.0.dist-info/RECORD +38 -0
- ai_code_stats-0.1.0.dist-info/WHEEL +5 -0
- ai_code_stats-0.1.0.dist-info/entry_points.txt +2 -0
- ai_code_stats-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""文件过滤与行分类。
|
|
2
|
+
|
|
3
|
+
两个职责,捕获阶段与提交阶段共用:
|
|
4
|
+
|
|
5
|
+
1. **文件过滤**:决定某个文件是否计入统计(按文件名/类型限制)。
|
|
6
|
+
- 命中 ``exclude`` glob → 排除。
|
|
7
|
+
- ``include`` 非空:必须命中某条 include glob 才计入。
|
|
8
|
+
- ``include`` 为空:仅计入「已知代码语言扩展名」的文件(默认行为,避免把
|
|
9
|
+
文档/数据/二进制算进代码行)。
|
|
10
|
+
|
|
11
|
+
2. **行分类**:把新增/删除行分成「全量(raw)」与「有效(effective)」两类。
|
|
12
|
+
- raw:所有行。
|
|
13
|
+
- effective:剔除①空行/纯空白行;②纯注释行(按语言注释语法)。
|
|
14
|
+
块注释在一段连续行内做状态跟踪;未知语言退化为只剔空行。
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
from pathlib import PurePosixPath
|
|
21
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
22
|
+
|
|
23
|
+
# 扩展名(小写,含点)→ 语言键。
|
|
24
|
+
EXT_TO_LANG: Dict[str, str] = {
|
|
25
|
+
".py": "python", ".pyi": "python",
|
|
26
|
+
".js": "clike", ".jsx": "clike", ".mjs": "clike", ".cjs": "clike",
|
|
27
|
+
".ts": "clike", ".tsx": "clike",
|
|
28
|
+
".java": "clike", ".kt": "clike", ".kts": "clike", ".scala": "clike",
|
|
29
|
+
".c": "clike", ".h": "clike", ".cc": "clike", ".cpp": "clike",
|
|
30
|
+
".hpp": "clike", ".cxx": "clike", ".hxx": "clike",
|
|
31
|
+
".cs": "clike", ".go": "clike", ".rs": "clike", ".swift": "clike",
|
|
32
|
+
".php": "clike", ".m": "clike", ".mm": "clike", ".dart": "clike",
|
|
33
|
+
".rb": "ruby",
|
|
34
|
+
".sh": "hash", ".bash": "hash", ".zsh": "hash", ".fish": "hash",
|
|
35
|
+
".yaml": "hash", ".yml": "hash", ".toml": "hash", ".ini": "hash",
|
|
36
|
+
".cfg": "hash", ".conf": "hash", ".dockerfile": "hash",
|
|
37
|
+
".pl": "hash", ".pm": "hash", ".r": "hash", ".tf": "hash",
|
|
38
|
+
".sql": "sql",
|
|
39
|
+
".html": "xml", ".htm": "xml", ".xml": "xml", ".vue": "xml",
|
|
40
|
+
".svg": "xml",
|
|
41
|
+
".css": "block_only", ".scss": "scss", ".less": "scss",
|
|
42
|
+
".lua": "lua",
|
|
43
|
+
".clj": "lisp", ".cljs": "lisp", ".el": "lisp", ".lisp": "lisp",
|
|
44
|
+
".hs": "haskell",
|
|
45
|
+
".md": "markdown", ".markdown": "markdown",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# 语言键 → 注释规则:line=行注释前缀,block=块注释 (start,end) 对。
|
|
49
|
+
LANG_RULES: Dict[str, Dict[str, Any]] = {
|
|
50
|
+
"python": {"line": ["#"], "block": [('"""', '"""'), ("'''", "'''")]},
|
|
51
|
+
"clike": {"line": ["//"], "block": [("/*", "*/")]},
|
|
52
|
+
"ruby": {"line": ["#"], "block": [("=begin", "=end")]},
|
|
53
|
+
"hash": {"line": ["#"], "block": []},
|
|
54
|
+
"sql": {"line": ["--", "#"], "block": [("/*", "*/")]},
|
|
55
|
+
"xml": {"line": [], "block": [("<!--", "-->")]},
|
|
56
|
+
"block_only": {"line": [], "block": [("/*", "*/")]},
|
|
57
|
+
"scss": {"line": ["//"], "block": [("/*", "*/")]},
|
|
58
|
+
"lua": {"line": ["--"], "block": [("--[[", "]]")]},
|
|
59
|
+
"lisp": {"line": [";"], "block": []},
|
|
60
|
+
"haskell": {"line": ["--"], "block": [("{-", "-}")]},
|
|
61
|
+
# markdown 没有代码意义上的注释,仅剔空行。
|
|
62
|
+
"markdown": {"line": [], "block": []},
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _glob_to_regex(pattern: str) -> re.Pattern:
|
|
67
|
+
"""把 glob(支持 ``**`` / ``*`` / ``?``)编译成匹配 posix 路径的正则。"""
|
|
68
|
+
i, n = 0, len(pattern)
|
|
69
|
+
out = ["(?s:"]
|
|
70
|
+
while i < n:
|
|
71
|
+
c = pattern[i]
|
|
72
|
+
if c == "*":
|
|
73
|
+
if i + 1 < n and pattern[i + 1] == "*":
|
|
74
|
+
if i + 2 < n and pattern[i + 2] == "/":
|
|
75
|
+
# ``**/`` 表示零或多层目录前缀
|
|
76
|
+
i += 3
|
|
77
|
+
out.append("(?:.*/)?")
|
|
78
|
+
else:
|
|
79
|
+
# 结尾或非 / 跟随的 ``**`` 匹配任意后缀(含子目录)
|
|
80
|
+
i += 2
|
|
81
|
+
out.append(".*")
|
|
82
|
+
else:
|
|
83
|
+
out.append("[^/]*")
|
|
84
|
+
i += 1
|
|
85
|
+
elif c == "?":
|
|
86
|
+
out.append("[^/]")
|
|
87
|
+
i += 1
|
|
88
|
+
else:
|
|
89
|
+
out.append(re.escape(c))
|
|
90
|
+
i += 1
|
|
91
|
+
out.append(")\\Z")
|
|
92
|
+
return re.compile("".join(out))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
_GLOB_CACHE: Dict[str, re.Pattern] = {}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _matches_any(path: str, patterns: List[str]) -> bool:
|
|
99
|
+
posix = PurePosixPath(path).as_posix()
|
|
100
|
+
base = PurePosixPath(path).name
|
|
101
|
+
for pat in patterns:
|
|
102
|
+
rx = _GLOB_CACHE.get(pat)
|
|
103
|
+
if rx is None:
|
|
104
|
+
rx = _glob_to_regex(pat)
|
|
105
|
+
_GLOB_CACHE[pat] = rx
|
|
106
|
+
# 既匹配完整相对路径,也匹配纯文件名(方便写 "yarn.lock" 这类)。
|
|
107
|
+
if rx.match(posix) or rx.match(base):
|
|
108
|
+
return True
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class Classifier:
|
|
113
|
+
"""绑定一份配置的过滤/分类器。"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, files_config: Optional[Dict[str, Any]] = None,
|
|
116
|
+
comment_rules_override: Optional[Dict[str, Any]] = None):
|
|
117
|
+
files_config = files_config or {}
|
|
118
|
+
self.include: List[str] = list(files_config.get("include", []) or [])
|
|
119
|
+
self.exclude: List[str] = list(files_config.get("exclude", []) or [])
|
|
120
|
+
self.ext_override: Dict[str, str] = dict(files_config.get("language_extensions", {}) or {})
|
|
121
|
+
self.comment_override: Dict[str, Any] = dict(comment_rules_override or {})
|
|
122
|
+
|
|
123
|
+
# ---- 文件过滤 ----------------------------------------------------
|
|
124
|
+
def language_for(self, path: str) -> Optional[str]:
|
|
125
|
+
name = PurePosixPath(path).name.lower()
|
|
126
|
+
# 先看完整文件名特例(如 Dockerfile / Makefile)。
|
|
127
|
+
if name in ("dockerfile",):
|
|
128
|
+
return "hash"
|
|
129
|
+
if name in ("makefile",):
|
|
130
|
+
return "hash"
|
|
131
|
+
ext = PurePosixPath(path).suffix.lower()
|
|
132
|
+
if ext in self.ext_override:
|
|
133
|
+
return self.ext_override[ext]
|
|
134
|
+
return EXT_TO_LANG.get(ext)
|
|
135
|
+
|
|
136
|
+
def is_included(self, path: str) -> bool:
|
|
137
|
+
if _matches_any(path, self.exclude):
|
|
138
|
+
return False
|
|
139
|
+
if self.include:
|
|
140
|
+
return _matches_any(path, self.include)
|
|
141
|
+
# include 为空:默认只计已知代码语言文件。
|
|
142
|
+
return self.language_for(path) is not None
|
|
143
|
+
|
|
144
|
+
# ---- 行分类 ------------------------------------------------------
|
|
145
|
+
def _rules(self, language: Optional[str]) -> Dict[str, Any]:
|
|
146
|
+
if language and language in self.comment_override:
|
|
147
|
+
return self.comment_override[language]
|
|
148
|
+
return LANG_RULES.get(language or "", {"line": [], "block": []})
|
|
149
|
+
|
|
150
|
+
def classify_lines(self, lines: List[str], path: str) -> List[bool]:
|
|
151
|
+
"""返回每行的 is_effective 列表。``lines`` 应是一段连续行(如一个 hunk)。"""
|
|
152
|
+
language = self.language_for(path)
|
|
153
|
+
rules = self._rules(language)
|
|
154
|
+
line_prefixes: List[str] = rules.get("line", [])
|
|
155
|
+
blocks: List[Tuple[str, str]] = [tuple(b) for b in rules.get("block", [])]
|
|
156
|
+
|
|
157
|
+
result: List[bool] = []
|
|
158
|
+
in_block_end: Optional[str] = None # 当前所处块注释的结束符
|
|
159
|
+
for raw in lines:
|
|
160
|
+
stripped = raw.strip()
|
|
161
|
+
if in_block_end is not None:
|
|
162
|
+
# 处于块注释中:查找结束符。
|
|
163
|
+
idx = stripped.find(in_block_end)
|
|
164
|
+
if idx >= 0:
|
|
165
|
+
after = stripped[idx + len(in_block_end):].strip()
|
|
166
|
+
in_block_end = None
|
|
167
|
+
result.append(bool(after)) # 结束符后还有代码才算有效
|
|
168
|
+
else:
|
|
169
|
+
result.append(False)
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
if not stripped:
|
|
173
|
+
result.append(False)
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
# 行注释:整行以注释前缀开头 → 非有效。
|
|
177
|
+
if any(stripped.startswith(p) for p in line_prefixes):
|
|
178
|
+
result.append(False)
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# 块注释起始。
|
|
182
|
+
matched_block = False
|
|
183
|
+
for start, end in blocks:
|
|
184
|
+
if stripped.startswith(start):
|
|
185
|
+
rest = stripped[len(start):]
|
|
186
|
+
end_idx = rest.find(end)
|
|
187
|
+
if end_idx >= 0:
|
|
188
|
+
after = rest[end_idx + len(end):].strip()
|
|
189
|
+
result.append(bool(after)) # 单行块注释,后有代码才有效
|
|
190
|
+
else:
|
|
191
|
+
in_block_end = end
|
|
192
|
+
result.append(False)
|
|
193
|
+
matched_block = True
|
|
194
|
+
break
|
|
195
|
+
if matched_block:
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
result.append(True)
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
def is_effective_line(self, line: str, path: str) -> bool:
|
|
202
|
+
"""单行判定(无跨行块注释状态),用于捕获阶段逐行打标。"""
|
|
203
|
+
return self.classify_lines([line], path)[0]
|
ai_code_stats/cli.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""ai-code-stats 命令行入口。
|
|
2
|
+
|
|
3
|
+
子命令:
|
|
4
|
+
install / uninstall 安装、卸载 git 与 Agent 钩子
|
|
5
|
+
hook Agent 钩子被调用时的入口(读 stdin JSON)
|
|
6
|
+
githook git 钩子入口(post-commit)
|
|
7
|
+
report 打印当前 HEAD 的统计信封(不发送、不消费)
|
|
8
|
+
status 查看待归因事件与 token 快照
|
|
9
|
+
flush 重试发送失败队列
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from . import __version__
|
|
21
|
+
from . import config as config_mod
|
|
22
|
+
from . import paths as paths_mod
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---- pythonpath 自动探测(开发/源码运行时需要)-------------------------
|
|
26
|
+
def _default_pythonpath() -> Optional[str]:
|
|
27
|
+
import ai_code_stats
|
|
28
|
+
|
|
29
|
+
pkg_parent = Path(ai_code_stats.__file__).resolve().parents[1]
|
|
30
|
+
if "site-packages" in str(pkg_parent) or "dist-packages" in str(pkg_parent):
|
|
31
|
+
return None # 已正常安装,无需注入
|
|
32
|
+
return str(pkg_parent)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _repo_root_or_exit(explicit: Optional[str]) -> Path:
|
|
36
|
+
from .gitutil import repo_root
|
|
37
|
+
|
|
38
|
+
start = Path(explicit) if explicit else Path.cwd()
|
|
39
|
+
root = repo_root(start)
|
|
40
|
+
if root is None:
|
|
41
|
+
print("错误:当前不在 git 仓库内", file=sys.stderr)
|
|
42
|
+
sys.exit(2)
|
|
43
|
+
return root
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---- install / uninstall ----------------------------------------------
|
|
47
|
+
def cmd_install(args: argparse.Namespace) -> int:
|
|
48
|
+
from .install import agent_install, git_install
|
|
49
|
+
|
|
50
|
+
python_exe = sys.executable
|
|
51
|
+
pythonpath = args.pythonpath or _default_pythonpath()
|
|
52
|
+
targets = _resolve_targets(args)
|
|
53
|
+
repo_root = None
|
|
54
|
+
if "git" in targets or (("claude" in targets) and args.scope == "project"):
|
|
55
|
+
repo_root = _repo_root_or_exit(args.repo)
|
|
56
|
+
|
|
57
|
+
msgs: List[str] = []
|
|
58
|
+
if "git" in targets:
|
|
59
|
+
msgs.append("[git] " + git_install.install(repo_root, python_exe, pythonpath, args.dry_run))
|
|
60
|
+
if "claude" in targets:
|
|
61
|
+
msgs.append("[claude] " + agent_install.install_claude(
|
|
62
|
+
repo_root, python_exe, scope=args.scope, pythonpath=pythonpath, dry_run=args.dry_run))
|
|
63
|
+
if "codex" in targets:
|
|
64
|
+
msgs.append("[codex] " + agent_install.install_codex(python_exe, pythonpath, args.dry_run))
|
|
65
|
+
print("\n".join(msgs))
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cmd_uninstall(args: argparse.Namespace) -> int:
|
|
70
|
+
from .install import agent_install, git_install
|
|
71
|
+
|
|
72
|
+
targets = _resolve_targets(args)
|
|
73
|
+
repo_root = None
|
|
74
|
+
if "git" in targets or (("claude" in targets) and args.scope == "project"):
|
|
75
|
+
repo_root = _repo_root_or_exit(args.repo)
|
|
76
|
+
msgs: List[str] = []
|
|
77
|
+
if "git" in targets:
|
|
78
|
+
msgs.append("[git] " + git_install.uninstall(repo_root))
|
|
79
|
+
if "claude" in targets:
|
|
80
|
+
msgs.append("[claude] " + agent_install.uninstall_claude(repo_root, scope=args.scope))
|
|
81
|
+
if "codex" in targets:
|
|
82
|
+
msgs.append("[codex] " + agent_install.uninstall_codex())
|
|
83
|
+
print("\n".join(msgs))
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _resolve_targets(args: argparse.Namespace) -> List[str]:
|
|
88
|
+
chosen = [t for t in ("git", "claude", "codex") if getattr(args, t)]
|
|
89
|
+
return chosen if chosen else ["git", "claude", "codex"]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---- hook / githook ----------------------------------------------------
|
|
93
|
+
def cmd_hook(args: argparse.Namespace) -> int:
|
|
94
|
+
from .hooks import session_event, tool_event
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
raw = sys.stdin.read()
|
|
98
|
+
payload = json.loads(raw) if raw.strip() else {}
|
|
99
|
+
except json.JSONDecodeError:
|
|
100
|
+
return 0 # 钩子静默失败
|
|
101
|
+
if not isinstance(payload, dict):
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
event = (args.event or "").lower()
|
|
105
|
+
if event in ("stop", "session_end", "sessionend", "session_start", "sessionstart"):
|
|
106
|
+
return session_event.process(args.agent, payload)
|
|
107
|
+
return tool_event.process(args.agent, payload)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def cmd_githook(args: argparse.Namespace) -> int:
|
|
111
|
+
from .githook import post_commit
|
|
112
|
+
|
|
113
|
+
return post_commit.process(dry_run=args.dry_run)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ---- report / status / flush ------------------------------------------
|
|
117
|
+
def cmd_report(args: argparse.Namespace) -> int:
|
|
118
|
+
from .githook import post_commit
|
|
119
|
+
|
|
120
|
+
_repo_root_or_exit(args.repo)
|
|
121
|
+
return post_commit.process(cwd=Path(args.repo) if args.repo else None, dry_run=True)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def cmd_status(args: argparse.Namespace) -> int:
|
|
125
|
+
from .storage import Storage
|
|
126
|
+
|
|
127
|
+
root = _repo_root_or_exit(args.repo)
|
|
128
|
+
cfg = config_mod.load_config(root)
|
|
129
|
+
storage = Storage(root)
|
|
130
|
+
pending = storage.read_pending()
|
|
131
|
+
snaps = storage.load_token_snapshots()
|
|
132
|
+
|
|
133
|
+
by_agent: dict = {}
|
|
134
|
+
ai_lines = 0
|
|
135
|
+
for ev in pending:
|
|
136
|
+
by_agent[ev.get("agent", "?")] = by_agent.get(ev.get("agent", "?"), 0) + 1
|
|
137
|
+
ai_lines += len(ev.get("added_lines", []))
|
|
138
|
+
|
|
139
|
+
print(f"仓库数据目录: {storage.dir}")
|
|
140
|
+
print(f"启用状态: {cfg.enabled}")
|
|
141
|
+
print(f"待归因 AI 事件: {len(pending)} 条,累计 {ai_lines} 行")
|
|
142
|
+
for agent, n in sorted(by_agent.items()):
|
|
143
|
+
print(f" - {agent}: {n} 条")
|
|
144
|
+
print(f"已知 session token 快照: {len(snaps)} 个")
|
|
145
|
+
for sid, entry in snaps.items():
|
|
146
|
+
cum = entry.get("cumulative", {})
|
|
147
|
+
print(f" - {sid[:12]} [{entry.get('agent','')}] 累计 in={cum.get('input',0)} out={cum.get('output',0)}")
|
|
148
|
+
return 0
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def cmd_flush(args: argparse.Namespace) -> int:
|
|
152
|
+
from .reporters import registry
|
|
153
|
+
from .reporters.base import ReporterContext
|
|
154
|
+
|
|
155
|
+
ctx = ReporterContext(repo_data_dir=str(paths_mod.user_data_dir()))
|
|
156
|
+
succeeded, remaining = registry.flush_retries(ctx)
|
|
157
|
+
print(f"重试成功 {succeeded} 条,剩余 {remaining} 条")
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---- 解析器 ------------------------------------------------------------
|
|
162
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
163
|
+
p = argparse.ArgumentParser(prog="ai-code-stats", description="AI 代码采纳率与 token 统计插件")
|
|
164
|
+
p.add_argument("--version", action="version", version=f"ai-code-stats {__version__}")
|
|
165
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
166
|
+
|
|
167
|
+
def add_targets(sp):
|
|
168
|
+
sp.add_argument("--git", action="store_true", help="只针对 git 钩子")
|
|
169
|
+
sp.add_argument("--claude", action="store_true", help="只针对 Claude Code")
|
|
170
|
+
sp.add_argument("--codex", action="store_true", help="只针对 Codex")
|
|
171
|
+
sp.add_argument("--scope", choices=["project", "user"], default="project",
|
|
172
|
+
help="Claude settings 作用域(默认 project)")
|
|
173
|
+
sp.add_argument("--repo", help="仓库路径(默认当前目录)")
|
|
174
|
+
sp.add_argument("--pythonpath", help="注入钩子命令的 PYTHONPATH(开发用)")
|
|
175
|
+
|
|
176
|
+
sp_i = sub.add_parser("install", help="安装钩子")
|
|
177
|
+
add_targets(sp_i)
|
|
178
|
+
sp_i.add_argument("--dry-run", action="store_true", help="只预览不写入")
|
|
179
|
+
sp_i.set_defaults(func=cmd_install)
|
|
180
|
+
|
|
181
|
+
sp_u = sub.add_parser("uninstall", help="卸载钩子")
|
|
182
|
+
add_targets(sp_u)
|
|
183
|
+
sp_u.set_defaults(func=cmd_uninstall)
|
|
184
|
+
|
|
185
|
+
sp_h = sub.add_parser("hook", help="Agent 钩子入口(读 stdin)")
|
|
186
|
+
sp_h.add_argument("--agent", required=True, help="claude_code | codex")
|
|
187
|
+
sp_h.add_argument("--event", help="事件名(如 stop)")
|
|
188
|
+
sp_h.set_defaults(func=cmd_hook)
|
|
189
|
+
|
|
190
|
+
sp_g = sub.add_parser("githook", help="git 钩子入口")
|
|
191
|
+
sp_g.add_argument("kind", nargs="?", default="post-commit", help="钩子类型(post-commit)")
|
|
192
|
+
sp_g.add_argument("--dry-run", action="store_true", help="打印信封不发送")
|
|
193
|
+
sp_g.set_defaults(func=cmd_githook)
|
|
194
|
+
|
|
195
|
+
sp_r = sub.add_parser("report", help="打印当前 HEAD 的统计信封(不发送)")
|
|
196
|
+
sp_r.add_argument("--repo", help="仓库路径")
|
|
197
|
+
sp_r.set_defaults(func=cmd_report)
|
|
198
|
+
|
|
199
|
+
sp_s = sub.add_parser("status", help="查看待归因事件与 token 快照")
|
|
200
|
+
sp_s.add_argument("--repo", help="仓库路径")
|
|
201
|
+
sp_s.set_defaults(func=cmd_status)
|
|
202
|
+
|
|
203
|
+
sp_f = sub.add_parser("flush", help="重试失败的上报队列")
|
|
204
|
+
sp_f.set_defaults(func=cmd_flush)
|
|
205
|
+
|
|
206
|
+
return p
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
210
|
+
parser = build_parser()
|
|
211
|
+
args = parser.parse_args(argv)
|
|
212
|
+
return args.func(args)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == "__main__":
|
|
216
|
+
sys.exit(main())
|
ai_code_stats/config.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""配置加载与合并。
|
|
2
|
+
|
|
3
|
+
解析顺序(后者覆盖前者,逐键深合并):
|
|
4
|
+
1. 内置默认值(``DEFAULT_CONFIG``)
|
|
5
|
+
2. 用户级配置文件(``paths.user_config_file()``)
|
|
6
|
+
3. 仓库级配置文件(``<repo>/.ai-code-stats.json``)
|
|
7
|
+
4. ``AI_CODE_STATS_CONFIG`` 环境变量指向的文件
|
|
8
|
+
|
|
9
|
+
字符串值支持 ``${ENV:VAR}`` 占位插值,用于在 header 等处注入密钥,避免明文落盘。
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import copy
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, Optional
|
|
20
|
+
|
|
21
|
+
from . import paths
|
|
22
|
+
|
|
23
|
+
REPO_CONFIG_FILENAME = ".ai-code-stats.json"
|
|
24
|
+
|
|
25
|
+
DEFAULT_CONFIG: Dict[str, Any] = {
|
|
26
|
+
"enabled": True,
|
|
27
|
+
"privacy": {
|
|
28
|
+
# 本地是否保留 AI 行明文(关闭则只存哈希;用户已选择明文+哈希)。
|
|
29
|
+
"store_plaintext": True,
|
|
30
|
+
# 上报载荷是否脱敏(默认 true:上报只含统计数字,不含源码明文)。
|
|
31
|
+
"redact_in_reports": True,
|
|
32
|
+
},
|
|
33
|
+
"files": {
|
|
34
|
+
# 为空表示「全部代码文件」;非空则只统计匹配 include 的文件。
|
|
35
|
+
"include": [],
|
|
36
|
+
# 排除项优先级高于 include。
|
|
37
|
+
"exclude": [
|
|
38
|
+
"**/node_modules/**",
|
|
39
|
+
"**/vendor/**",
|
|
40
|
+
"**/dist/**",
|
|
41
|
+
"**/build/**",
|
|
42
|
+
"**/.venv/**",
|
|
43
|
+
"**/*.min.js",
|
|
44
|
+
"**/*.min.css",
|
|
45
|
+
"package-lock.json",
|
|
46
|
+
"**/package-lock.json",
|
|
47
|
+
"yarn.lock",
|
|
48
|
+
"**/yarn.lock",
|
|
49
|
+
"pnpm-lock.yaml",
|
|
50
|
+
"**/pnpm-lock.yaml",
|
|
51
|
+
"poetry.lock",
|
|
52
|
+
"**/poetry.lock",
|
|
53
|
+
"Cargo.lock",
|
|
54
|
+
"**/Cargo.lock",
|
|
55
|
+
"go.sum",
|
|
56
|
+
"**/go.sum",
|
|
57
|
+
],
|
|
58
|
+
# 额外的扩展名→语言映射覆盖(合并进 classify 内置表)。
|
|
59
|
+
"language_extensions": {},
|
|
60
|
+
},
|
|
61
|
+
"attribution": {
|
|
62
|
+
# 行归一化方式:"strip"=去首尾空白后哈希。
|
|
63
|
+
"normalize": "strip",
|
|
64
|
+
# 短于该长度(归一化后)的行不参与 AI 指纹匹配,降低误命中。
|
|
65
|
+
"min_line_length": 1,
|
|
66
|
+
# 输出口径。
|
|
67
|
+
"count_modes": ["raw", "effective"],
|
|
68
|
+
"primary": "effective",
|
|
69
|
+
# 行分类注释规则覆盖:{ "lang": {"line": ["//"], "block": [["/*","*/"]]} }
|
|
70
|
+
"comment_rules": {},
|
|
71
|
+
# merge 提交处理:"skip"=仅记 totals 不算采纳率;"first_parent"=对 first-parent diff 统计。
|
|
72
|
+
"merge_strategy": "skip",
|
|
73
|
+
"detect_renames": True,
|
|
74
|
+
"detect_copies": False,
|
|
75
|
+
},
|
|
76
|
+
# 启用的 Agent 适配器。
|
|
77
|
+
"agents": ["claude_code", "codex"],
|
|
78
|
+
# 上报后端列表,按序派发。
|
|
79
|
+
"reporters": [
|
|
80
|
+
{"type": "json_file", "path": "{repo_data}/reports.jsonl"},
|
|
81
|
+
],
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
_ENV_PATTERN = re.compile(r"\$\{ENV:([A-Za-z_][A-Za-z0-9_]*)\}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
|
88
|
+
"""递归合并 override 进 base(返回新 dict,不改原对象)。
|
|
89
|
+
|
|
90
|
+
dict 逐键合并;其余类型(含 list)整体覆盖。
|
|
91
|
+
"""
|
|
92
|
+
result = copy.deepcopy(base)
|
|
93
|
+
for key, value in override.items():
|
|
94
|
+
if (
|
|
95
|
+
key in result
|
|
96
|
+
and isinstance(result[key], dict)
|
|
97
|
+
and isinstance(value, dict)
|
|
98
|
+
):
|
|
99
|
+
result[key] = _deep_merge(result[key], value)
|
|
100
|
+
else:
|
|
101
|
+
result[key] = copy.deepcopy(value)
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _load_json_file(path: Path) -> Dict[str, Any]:
|
|
106
|
+
try:
|
|
107
|
+
with open(path, "r", encoding="utf-8") as fh:
|
|
108
|
+
data = json.load(fh)
|
|
109
|
+
except FileNotFoundError:
|
|
110
|
+
return {}
|
|
111
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
112
|
+
raise ConfigError(f"无法解析配置文件 {path}: {exc}") from exc
|
|
113
|
+
if not isinstance(data, dict):
|
|
114
|
+
raise ConfigError(f"配置文件 {path} 顶层必须是 JSON 对象")
|
|
115
|
+
return data
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def interpolate_env(value: Any) -> Any:
|
|
119
|
+
"""递归把字符串里的 ``${ENV:VAR}`` 替换为环境变量值(缺失则替换为空串)。"""
|
|
120
|
+
if isinstance(value, str):
|
|
121
|
+
return _ENV_PATTERN.sub(lambda m: os.environ.get(m.group(1), ""), value)
|
|
122
|
+
if isinstance(value, list):
|
|
123
|
+
return [interpolate_env(v) for v in value]
|
|
124
|
+
if isinstance(value, dict):
|
|
125
|
+
return {k: interpolate_env(v) for k, v in value.items()}
|
|
126
|
+
return value
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ConfigError(Exception):
|
|
130
|
+
"""配置解析/校验错误。"""
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class Config:
|
|
134
|
+
"""已合并的配置视图,提供按点路径取值的便捷方法。"""
|
|
135
|
+
|
|
136
|
+
def __init__(self, data: Dict[str, Any]):
|
|
137
|
+
self.data = data
|
|
138
|
+
|
|
139
|
+
def get(self, dotted: str, default: Any = None) -> Any:
|
|
140
|
+
node: Any = self.data
|
|
141
|
+
for part in dotted.split("."):
|
|
142
|
+
if isinstance(node, dict) and part in node:
|
|
143
|
+
node = node[part]
|
|
144
|
+
else:
|
|
145
|
+
return default
|
|
146
|
+
return node
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def enabled(self) -> bool:
|
|
150
|
+
return bool(self.data.get("enabled", True))
|
|
151
|
+
|
|
152
|
+
def __repr__(self) -> str: # pragma: no cover - 调试用
|
|
153
|
+
return f"Config({json.dumps(self.data, ensure_ascii=False)})"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def load_config(repo_root: Optional[Path] = None) -> Config:
|
|
157
|
+
"""加载并合并配置。``repo_root`` 提供时叠加仓库级配置。"""
|
|
158
|
+
merged = copy.deepcopy(DEFAULT_CONFIG)
|
|
159
|
+
|
|
160
|
+
user_file = paths.user_config_file()
|
|
161
|
+
merged = _deep_merge(merged, _load_json_file(user_file))
|
|
162
|
+
|
|
163
|
+
if repo_root is not None:
|
|
164
|
+
repo_file = Path(repo_root) / REPO_CONFIG_FILENAME
|
|
165
|
+
merged = _deep_merge(merged, _load_json_file(repo_file))
|
|
166
|
+
|
|
167
|
+
env_file = os.environ.get("AI_CODE_STATS_CONFIG")
|
|
168
|
+
if env_file:
|
|
169
|
+
merged = _deep_merge(merged, _load_json_file(Path(env_file).expanduser()))
|
|
170
|
+
|
|
171
|
+
return Config(merged)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""从字符串编辑或补丁文本中提取「新增行 / 删除行」。
|
|
2
|
+
|
|
3
|
+
支持三类输入:
|
|
4
|
+
- 字符串前后对比(Claude 的 Edit/MultiEdit:old_string → new_string)。
|
|
5
|
+
- git 统一 diff(``--- / +++ / @@`` 三联)。
|
|
6
|
+
- Codex / OpenAI ``apply_patch`` 信封(``*** Begin Patch`` ... ``*** End Patch``)。
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import difflib
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Dict, List, Tuple
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class PatchedFile:
|
|
19
|
+
path: str
|
|
20
|
+
added: List[str] = field(default_factory=list)
|
|
21
|
+
removed: List[str] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def added_lines_between(old: str, new: str) -> Tuple[List[str], List[str]]:
|
|
25
|
+
"""对比两段文本,返回 (新增行, 删除行),忽略未变化的上下文行。"""
|
|
26
|
+
old_lines = old.splitlines()
|
|
27
|
+
new_lines = new.splitlines()
|
|
28
|
+
sm = difflib.SequenceMatcher(a=old_lines, b=new_lines, autojunk=False)
|
|
29
|
+
added: List[str] = []
|
|
30
|
+
removed: List[str] = []
|
|
31
|
+
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
|
32
|
+
if tag in ("insert", "replace"):
|
|
33
|
+
added.extend(new_lines[j1:j2])
|
|
34
|
+
if tag in ("delete", "replace"):
|
|
35
|
+
removed.extend(old_lines[i1:i2])
|
|
36
|
+
return added, removed
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_DIFF_FILE_RE = re.compile(r"^\+\+\+ (?:b/)?(.+?)\s*$")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def parse_unified_diff(diff: str) -> List[PatchedFile]:
|
|
43
|
+
files: List[PatchedFile] = []
|
|
44
|
+
current: PatchedFile | None = None
|
|
45
|
+
for line in diff.splitlines():
|
|
46
|
+
m = _DIFF_FILE_RE.match(line)
|
|
47
|
+
if m:
|
|
48
|
+
path = m.group(1)
|
|
49
|
+
if path == "/dev/null":
|
|
50
|
+
path = ""
|
|
51
|
+
current = PatchedFile(path=path)
|
|
52
|
+
files.append(current)
|
|
53
|
+
continue
|
|
54
|
+
if current is None:
|
|
55
|
+
continue
|
|
56
|
+
if line.startswith("+++") or line.startswith("---") or line.startswith("@@"):
|
|
57
|
+
continue
|
|
58
|
+
if line.startswith("+"):
|
|
59
|
+
current.added.append(line[1:])
|
|
60
|
+
elif line.startswith("-"):
|
|
61
|
+
current.removed.append(line[1:])
|
|
62
|
+
return [f for f in files if f.path]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_APPLY_HEADER_RE = re.compile(r"^\*\*\* (Add|Update|Delete) File: (.+?)\s*$")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def parse_apply_patch(patch: str) -> List[PatchedFile]:
|
|
69
|
+
"""解析 OpenAI/Codex apply_patch 信封。"""
|
|
70
|
+
files: List[PatchedFile] = []
|
|
71
|
+
current: PatchedFile | None = None
|
|
72
|
+
for line in patch.splitlines():
|
|
73
|
+
if line.startswith("*** Begin Patch") or line.startswith("*** End Patch"):
|
|
74
|
+
continue
|
|
75
|
+
m = _APPLY_HEADER_RE.match(line)
|
|
76
|
+
if m:
|
|
77
|
+
current = PatchedFile(path=m.group(2))
|
|
78
|
+
files.append(current)
|
|
79
|
+
continue
|
|
80
|
+
if current is None:
|
|
81
|
+
continue
|
|
82
|
+
if line.startswith("@@"):
|
|
83
|
+
continue
|
|
84
|
+
if line.startswith("+"):
|
|
85
|
+
current.added.append(line[1:])
|
|
86
|
+
elif line.startswith("-"):
|
|
87
|
+
current.removed.append(line[1:])
|
|
88
|
+
# 以空格开头的上下文行与 "*** Move to:" 等其它指令忽略
|
|
89
|
+
return [f for f in files if f.path]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def parse_patch(text: str) -> List[PatchedFile]:
|
|
93
|
+
"""自动识别补丁类型并解析。"""
|
|
94
|
+
if "*** Begin Patch" in text or _APPLY_HEADER_RE.search(text or ""):
|
|
95
|
+
return parse_apply_patch(text)
|
|
96
|
+
return parse_unified_diff(text)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""git 钩子入口(post-commit):在每次提交时计算并上报统计。"""
|