jfox-cli 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/PKG-INFO +1 -1
  2. jfox_cli-0.2.1/docs/superpowers/specs/2026-04-13-pr-auto-code-review-design.md +286 -0
  3. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/__init__.py +1 -1
  4. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/cli.py +123 -0
  5. jfox_cli-0.2.1/jfox/git_extractor.py +178 -0
  6. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/performance.py +4 -0
  7. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/pyproject.toml +1 -1
  8. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_bm25_batch.py +55 -0
  9. jfox_cli-0.2.1/tests/unit/test_git_extractor.py +284 -0
  10. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_index_kb_param.py +41 -0
  11. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/uv.lock +1 -1
  12. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/.githooks/pre-push +0 -0
  13. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/.github/workflows/integration-test.yml +0 -0
  14. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/.github/workflows/publish.yml +0 -0
  15. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/.gitignore +0 -0
  16. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/.python-version +0 -0
  17. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/AGENTS.md +0 -0
  18. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/CHANGELOG.md +0 -0
  19. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/CLAUDE.md +0 -0
  20. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/DEVELOPMENT_PLAN.md +0 -0
  21. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/README.md +0 -0
  22. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/SESSION.md +0 -0
  23. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/SESSION_SUMMARY.md +0 -0
  24. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-11-bulk-import-bm25-fix.md +0 -0
  25. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-11-edit-command.md +0 -0
  26. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-11-unify-format-option.md +0 -0
  27. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-ci-coverage-optimization.md +0 -0
  28. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-edit-content-file.md +0 -0
  29. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-fix-index-rebuild-clear.md +0 -0
  30. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-fix-index-verify-id-mismatch.md +0 -0
  31. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-fix-jfox-health-skill-kb-param.md +0 -0
  32. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-index-kb-param.md +0 -0
  33. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-lazy-import-perf.md +0 -0
  34. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/plans/2026-04-12-skill-redesign.md +0 -0
  35. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/specs/2026-04-03-bugfixes-design.md +0 -0
  36. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/docs/superpowers/specs/2026-04-12-skill-redesign-design.md +0 -0
  37. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jessica-jones-static-cable.md +0 -0
  38. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/__main__.py +0 -0
  39. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/bm25_index.py +0 -0
  40. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/config.py +0 -0
  41. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/embedding_backend.py +0 -0
  42. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/formatters.py +0 -0
  43. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/global_config.py +0 -0
  44. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/graph.py +0 -0
  45. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/indexer.py +0 -0
  46. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/kb_manager.py +0 -0
  47. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/models.py +0 -0
  48. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/note.py +0 -0
  49. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/search_engine.py +0 -0
  50. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/template.py +0 -0
  51. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/template_cli.py +0 -0
  52. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/jfox/vector_store.py +0 -0
  53. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/pytest.ini +0 -0
  54. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/run_full_test.ps1 +0 -0
  55. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skill/evals/evals.json +0 -0
  56. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skill/knowledge-base-notes/SKILL.md +0 -0
  57. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skill/knowledge-base-workspace/SKILL.md +0 -0
  58. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skills-recommend/README.md +0 -0
  59. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skills-recommend/claude-code/jfox-common/SKILL.md +0 -0
  60. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skills-recommend/claude-code/jfox-ingest/SKILL.md +0 -0
  61. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skills-recommend/claude-code/jfox-organize/SKILL.md +0 -0
  62. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/skills-recommend/claude-code/jfox-search/SKILL.md +0 -0
  63. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/COVERAGE_PLAN.md +0 -0
  64. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/MIGRATION.md +0 -0
  65. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/TESTS.md +0 -0
  66. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/conftest.py +0 -0
  67. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/integration/__init__.py +0 -0
  68. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/integration/test_backlinks.py +0 -0
  69. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/performance/__init__.py +0 -0
  70. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/performance/test_performance.py +0 -0
  71. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_advanced_features.py +0 -0
  72. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_cli_format.py +0 -0
  73. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_config_unit.py +0 -0
  74. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_core_workflow.py +0 -0
  75. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_hybrid_search.py +0 -0
  76. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_integration.py +0 -0
  77. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_kb_current.py +0 -0
  78. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/test_suggest_links.py +0 -0
  79. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/__init__.py +0 -0
  80. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_edit.py +0 -0
  81. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_format_unify.py +0 -0
  82. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_formatters.py +0 -0
  83. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_global_config.py +0 -0
  84. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_indexer_clear_before_rebuild.py +0 -0
  85. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_indexer_verify.py +0 -0
  86. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_kb_manager.py +0 -0
  87. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_lazy_import.py +0 -0
  88. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_template.py +0 -0
  89. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_template_cli.py +0 -0
  90. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/unit/test_vector_store_clear.py +0 -0
  91. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/utils/__init__.py +0 -0
  92. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/utils/assertions.py +0 -0
  93. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/utils/jfox_cli.py +0 -0
  94. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/utils/note_generator.py +0 -0
  95. {jfox_cli-0.2.0 → jfox_cli-0.2.1}/tests/utils/temp_kb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jfox-cli
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: JFox - Zettelkasten 知识管理 CLI 工具
5
5
  Project-URL: Homepage, https://github.com/zhuxixi/jfox
6
6
  Project-URL: Repository, https://github.com/zhuxixi/jfox
@@ -0,0 +1,286 @@
1
+ # PR 自动 Code Review 系统设计
2
+
3
+ ## 概述
4
+
5
+ 本系统实现当 GitHub 上任何仓库创建或更新 PR 时,自动触发 Claude Code 的 code review,并将结果提交到 PR 评论中。
6
+
7
+ **核心目标:**
8
+ - 全自动:PR 创建/更新后自动触发,无需人工干预
9
+ - 多仓库:支持监控账号下所有可访问的仓库
10
+ - 轻量级:单 Python 脚本,资源占用低
11
+ - 易部署:Windows 环境一键启动
12
+
13
+ ---
14
+
15
+ ## 架构
16
+
17
+ ```
18
+ ┌─────────────────┐ ┌─────────────┐ ┌──────────────────────────┐
19
+ │ GitHub Webhook │────▶│ smee.io │────▶│ Flask Webhook Receiver │
20
+ │ (PR opened/ │ │ (proxy) │ │ (Windows 本地运行) │
21
+ │ synchronize) │ └─────────────┘ └──────────┬───────────────┘
22
+ └─────────────────┘ │
23
+
24
+ ┌──────────────────────┐
25
+ │ 调用 Claude Code │
26
+ │ claude code │
27
+ │ --print │
28
+ │ /code-review:code-review <PR_URL>
29
+ └──────────┬───────────┘
30
+
31
+
32
+ ┌──────────────────────┐
33
+ │ 提交 Review 评论 │
34
+ │ (gh pr review ...) │
35
+ └──────────────────────┘
36
+ ```
37
+
38
+ ---
39
+
40
+ ## 组件说明
41
+
42
+ ### 1. smee.io(Webhook 代理)
43
+
44
+ GitHub Webhook 需要公网地址,smee.io 是 GitHub 官方提供的免费代理服务,将 webhook 事件转发到本地。
45
+
46
+ - 访问 https://smee.io 获取一个唯一 URL
47
+ - 本地客户端连接到 smee.io,接收事件
48
+ - 无需暴露本地端口到公网
49
+
50
+ ### 2. Flask Webhook 接收器
51
+
52
+ 核心服务,负责:
53
+
54
+ 1. **接收事件**:监听来自 smee.io 的 POST 请求
55
+ 2. **过滤事件**:只处理 `pull_request.opened` 和 `pull_request.synchronize`
56
+ 3. **提取信息**:从 payload 获取 PR URL、仓库、分支等信息
57
+ 4. **触发 Review**:调用 Claude Code 执行 code review
58
+ 5. **提交结果**:使用 `gh` CLI 将 review 结果提交到 PR
59
+
60
+ **技术栈:**
61
+ - Python 3.10+
62
+ - Flask(轻量级 web 框架)
63
+ - requests(HTTP 客户端)
64
+
65
+ ### 3. Claude Code 调用
66
+
67
+ 通过 subprocess 调用本地安装的 Claude Code:
68
+
69
+ ```bash
70
+ claude code --print /code-review:code-review <PR_URL>
71
+ ```
72
+
73
+ Claude Code 会分析 PR 并返回 review 结果(JSON 或文本格式)。
74
+
75
+ ### 4. GitHub CLI (gh)
76
+
77
+ 用于提交 review 评论,复用已有的认证:
78
+
79
+ ```bash
80
+ gh pr review <PR_URL> --comment -b "<review_result>"
81
+ ```
82
+
83
+ ---
84
+
85
+ ## 数据流
86
+
87
+ ### PR 创建场景
88
+
89
+ ```
90
+ 1. 用户在 GitHub 创建 PR
91
+ 2. GitHub 发送 webhook 到配置的 smee.io URL
92
+ 3. smee.io 转发到本地 Flask 服务
93
+ 4. Flask 验证事件签名(可选)
94
+ 5. Flask 提取 PR URL: https://github.com/owner/repo/pull/123
95
+ 6. Flask 调用: claude code --print /code-review:code-review <URL>
96
+ 7. Claude Code 返回 review 结果
97
+ 8. Flask 调用: gh pr review <URL> --comment -b "<result>"
98
+ 9. 评论出现在 PR 中
99
+ ```
100
+
101
+ ### PR 更新场景(push 新代码)
102
+
103
+ ```
104
+ 1. 用户 push 新 commit 到 PR 分支
105
+ 2. GitHub 发送 `pull_request.synchronize` 事件
106
+ 3. 同上流程,重新执行 code review
107
+ 4. 新评论追加到 PR
108
+ ```
109
+
110
+ ---
111
+
112
+ ## 配置
113
+
114
+ 配置文件:`config.json`
115
+
116
+ ```json
117
+ {
118
+ "smee_url": "https://smee.io/YOUR_UNIQUE_CHANNEL",
119
+ "port": 3000,
120
+ "log_level": "INFO",
121
+ "review_on_open": true,
122
+ "review_on_sync": true,
123
+ "skip_drafts": true,
124
+ "max_pr_age_hours": 24
125
+ }
126
+ ```
127
+
128
+ **配置项说明:**
129
+
130
+ | 配置项 | 说明 | 默认值 |
131
+ |--------|------|--------|
132
+ | `smee_url` | smee.io 提供的唯一 URL | 必填 |
133
+ | `port` | 本地 Flask 服务端口 | 3000 |
134
+ | `log_level` | 日志级别 | INFO |
135
+ | `review_on_open` | PR 创建时自动 review | true |
136
+ | `review_on_sync` | PR 更新时自动 review | true |
137
+ | `skip_drafts` | 跳过 Draft PR | true |
138
+ | `max_pr_age_hours` | 只 review 24 小时内创建的 PR(防止误触发历史 PR) | 24 |
139
+
140
+ ---
141
+
142
+ ## 安装与运行
143
+
144
+ ### 前置依赖
145
+
146
+ 1. Python 3.10+
147
+ 2. Claude Code 已安装并登录
148
+ 3. GitHub CLI (gh) 已安装并登录
149
+
150
+ ### 安装
151
+
152
+ ```bash
153
+ # 克隆或下载项目
154
+ git clone <repo>
155
+ cd pr-auto-reviewer
156
+
157
+ # 创建虚拟环境
158
+ python -m venv venv
159
+ venv\Scripts\activate
160
+
161
+ # 安装依赖
162
+ pip install -r requirements.txt
163
+ ```
164
+
165
+ ### 配置
166
+
167
+ 1. 访问 https://smee.io 获取一个新的 channel URL
168
+ 2. 复制 `config.example.json` 为 `config.json`
169
+ 3. 将 smee_url 填入配置
170
+
171
+ ### 启动
172
+
173
+ ```bash
174
+ # 方式1:直接运行(前台)
175
+ python webhook_server.py
176
+
177
+ # 方式2:后台运行(Windows)
178
+ start /B python webhook_server.py
179
+ ```
180
+
181
+ ### 配置 GitHub Webhook
182
+
183
+ 对于要监控的仓库,在 Settings > Webhooks 中添加:
184
+
185
+ - **Payload URL**: 你的 smee.io URL (如 `https://smee.io/abc123`)
186
+ - **Content type**: `application/json`
187
+ - **Events**: 选择 "Pull requests"
188
+ - **Active**: 勾选
189
+
190
+ **注意**:每个仓库都需要单独配置 webhook。如果想要监控所有仓库,可以考虑使用 GitHub App(需要额外配置)。
191
+
192
+ ---
193
+
194
+ ## 错误处理
195
+
196
+ | 错误场景 | 处理方式 |
197
+ |----------|----------|
198
+ | smee.io 连接断开 | 自动重连,指数退避 |
199
+ | Claude Code 调用失败 | 记录日志,重试 3 次 |
200
+ | gh CLI 认证过期 | 记录错误,通知用户 |
201
+ | PR 不存在或无权访问 | 跳过,记录警告 |
202
+ | 网络超时 | 重试 3 次,每次间隔 5 秒 |
203
+ | Review 结果为空 | 跳过提交评论 |
204
+
205
+ ---
206
+
207
+ ## 日志
208
+
209
+ 日志输出到控制台和文件 `logs/webhook.log`:
210
+
211
+ ```
212
+ 2026-04-13 10:30:15 [INFO] 收到 webhook: pull_request.opened, repo=owner/repo, pr=#123
213
+ 2026-04-13 10:30:15 [INFO] 开始 review: https://github.com/owner/repo/pull/123
214
+ 2026-04-13 10:31:02 [INFO] Review 完成,提交评论
215
+ 2026-04-13 10:31:03 [INFO] 评论提交成功
216
+ ```
217
+
218
+ ---
219
+
220
+ ## 安全考虑
221
+
222
+ 1. **Webhook 签名验证**(可选):
223
+ - 配置 GitHub webhook secret
224
+ - 本地验证 HMAC 签名,防止伪造请求
225
+
226
+ 2. **Token 安全**:
227
+ - 复用 gh CLI 的认证,不存储 PAT
228
+ - gh CLI 使用系统密钥管理器存储 token
229
+
230
+ 3. **访问控制**:
231
+ - Flask 服务只绑定 localhost(`127.0.0.1`)
232
+ - 不暴露到公网
233
+
234
+ 4. **日志脱敏**:
235
+ - 不记录敏感信息
236
+ - PR URL 等基础信息正常记录
237
+
238
+ ---
239
+
240
+ ## 扩展性
241
+
242
+ ### 未来可扩展的功能
243
+
244
+ 1. **更细粒度的控制**:
245
+ - 按仓库配置不同的 review 规则
246
+ - 支持 `.github/code-review-config.yml`
247
+
248
+ 2. **更多触发条件**:
249
+ - 只在特定标签的 PR 上触发
250
+ - 只在特定分支的 PR 上触发
251
+
252
+ 3. **结果通知**:
253
+ - 发送到 Slack/飞书
254
+ - 发送邮件通知
255
+
256
+ 4. **Review 缓存**:
257
+ - 对相同 commit 的 PR 返回缓存结果
258
+ - 减少 API 调用和计算成本
259
+
260
+ ---
261
+
262
+ ## 文件结构
263
+
264
+ ```
265
+ pr-auto-reviewer/
266
+ ├── webhook_server.py # 主服务
267
+ ├── config.py # 配置加载
268
+ ├── github_client.py # GitHub API 封装
269
+ ├── reviewer.py # Claude Code 调用封装
270
+ ├── requirements.txt # 依赖
271
+ ├── config.example.json # 配置示例
272
+ ├── config.json # 实际配置(gitignore)
273
+ ├── logs/ # 日志目录
274
+ │ └── webhook.log
275
+ └── README.md # 使用说明
276
+ ```
277
+
278
+ ---
279
+
280
+ ## 成功标准
281
+
282
+ - [ ] PR 创建后 1 分钟内自动触发 review
283
+ - [ ] Review 结果成功提交到 PR 评论
284
+ - [ ] PR 更新后自动重新 review
285
+ - [ ] 服务稳定运行 7 天无崩溃
286
+ - [ ] 支持同时监控多个仓库
@@ -1,5 +1,5 @@
1
1
  """JFox - Zettelkasten 知识管理工具"""
2
2
 
3
- __version__ = "0.2.0"
3
+ __version__ = "0.2.1"
4
4
  __author__ = "User"
5
5
  __email__ = "user@example.com"
@@ -1702,15 +1702,29 @@ def _index_impl(action: str, output_format: str):
1702
1702
  console.print("[yellow]Rebuilding index...[/yellow]")
1703
1703
  count = indexer.index_all()
1704
1704
 
1705
+ # 同时重建 BM25 索引
1706
+ from . import note as note_module
1707
+ from .bm25_index import get_bm25_index
1708
+
1709
+ bm25_index = get_bm25_index()
1710
+ notes = note_module.list_notes(limit=10000)
1711
+ bm25_success = bm25_index.rebuild_from_notes(notes)
1712
+
1705
1713
  result = {
1706
1714
  "success": True,
1707
1715
  "indexed": count,
1716
+ "bm25_rebuilt": bm25_success,
1717
+ "bm25_indexed": len(notes),
1708
1718
  }
1709
1719
 
1710
1720
  if output_format == "json":
1711
1721
  print(output_json(result))
1712
1722
  else:
1713
1723
  console.print(f"[green]✓[/green] Indexed {count} notes")
1724
+ if bm25_success:
1725
+ console.print(f"[green]✓[/green] BM25 index rebuilt: {len(notes)} notes")
1726
+ else:
1727
+ console.print("[yellow]⚠[/yellow] ChromaDB rebuilt, but BM25 rebuild failed")
1714
1728
 
1715
1729
  elif action == "verify":
1716
1730
  verification = indexer.verify_index()
@@ -2155,6 +2169,115 @@ def kb(
2155
2169
  # =============================================================================
2156
2170
 
2157
2171
 
2172
+ def _ingest_log_impl(
2173
+ repo_path: str,
2174
+ limit: int,
2175
+ note_type: str,
2176
+ batch_size: int,
2177
+ output_format: str,
2178
+ json_output: bool,
2179
+ ):
2180
+ """从 Git 仓库提取 commit 历史并导入为笔记"""
2181
+ from .git_extractor import commits_to_notes, extract_commits
2182
+ from .performance import bulk_import_notes
2183
+
2184
+ # 提取 commits
2185
+ commits = extract_commits(repo_path, limit=limit)
2186
+
2187
+ if not commits:
2188
+ result = {
2189
+ "success": True,
2190
+ "imported": 0,
2191
+ "total": 0,
2192
+ "message": "没有找到 commit 记录",
2193
+ }
2194
+ if output_format == "json":
2195
+ print(output_json(result))
2196
+ else:
2197
+ console.print("[yellow]![/yellow] 没有找到 commit 记录")
2198
+ return
2199
+
2200
+ # 转换为笔记格式
2201
+ notes_data = commits_to_notes(commits, repo_path=repo_path)
2202
+
2203
+ if output_format != "json":
2204
+ console.print(f"[yellow]提取了 {len(notes_data)} 条 commit,正在导入...[/yellow]")
2205
+
2206
+ import_result = bulk_import_notes(
2207
+ notes_data=notes_data,
2208
+ note_type=note_type,
2209
+ batch_size=batch_size,
2210
+ show_progress=output_format != "json",
2211
+ )
2212
+
2213
+ result = {
2214
+ "success": True,
2215
+ "repo_path": str(Path(repo_path).resolve()),
2216
+ "commits_extracted": len(commits),
2217
+ **import_result,
2218
+ }
2219
+
2220
+ if output_format == "json":
2221
+ print(output_json(result))
2222
+ else:
2223
+ console.print(f"[green]✓[/green] 导入: {import_result['imported']}")
2224
+ console.print(f"[red]✗[/red] 失败: {import_result['failed']}")
2225
+ console.print(f"总计: {import_result['total']}")
2226
+
2227
+
2228
+ @app.command()
2229
+ def ingest_log(
2230
+ repo_path: str = typer.Argument(..., help="本地 Git 仓库路径"),
2231
+ limit: int = typer.Option(50, "--limit", "-n", help="提取 commit 数量"),
2232
+ note_type: str = typer.Option("fleeting", "--type", "-t", help="笔记类型"),
2233
+ batch_size: int = typer.Option(32, "--batch-size", "-b", help="批处理大小"),
2234
+ kb: Optional[str] = typer.Option(None, "--kb", "-k", help="目标知识库名称"),
2235
+ output_format: str = typer.Option("table", "--format", "-f", help="输出格式: json, table"),
2236
+ json_output: bool = typer.Option(
2237
+ False, "--json", help="JSON 输出(快捷方式,等同于 --format json)"
2238
+ ),
2239
+ ):
2240
+ """
2241
+ 从 Git 仓库提取 commit 历史并导入为笔记
2242
+
2243
+ 使用 block 分隔符格式提取 git log,自动处理 UTF-8 编码和路径规范化。
2244
+
2245
+ 示例:
2246
+ jfox ingest-log ./my-project --limit 50
2247
+ jfox ingest-log ./my-project --kb work --type permanent
2248
+ """
2249
+ try:
2250
+ # 处理 --json 快捷方式
2251
+ if json_output:
2252
+ output_format = "json"
2253
+
2254
+ # 如果指定了知识库,临时切换
2255
+ if kb:
2256
+ from .config import use_kb
2257
+
2258
+ with use_kb(kb):
2259
+ _ingest_log_impl(
2260
+ repo_path, limit, note_type, batch_size, output_format, json_output
2261
+ )
2262
+ else:
2263
+ _ingest_log_impl(repo_path, limit, note_type, batch_size, output_format, json_output)
2264
+
2265
+ except ValueError as e:
2266
+ result = {"success": False, "error": str(e)}
2267
+ if output_format == "json":
2268
+ print(output_json(result))
2269
+ else:
2270
+ console.print(f"[red]✗[/red] {e}")
2271
+ raise typer.Exit(1)
2272
+ except Exception as e:
2273
+ result = {"success": False, "error": str(e)}
2274
+ if output_format == "json":
2275
+ print(output_json(result))
2276
+ else:
2277
+ console.print(f"[red]✗[/red] Error: {e}")
2278
+ raise typer.Exit(1)
2279
+
2280
+
2158
2281
  @app.command()
2159
2282
  def bulk_import(
2160
2283
  file_path: str = typer.Argument(..., help="JSON 文件路径,包含笔记数据"),
@@ -0,0 +1,178 @@
1
+ """Git 仓库数据提取模块
2
+
3
+ 从本地 Git 仓库提取 commit 历史,转换为结构化数据。
4
+ """
5
+
6
+ import logging
7
+ import subprocess
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # git log block 分隔符
14
+ _COMMIT_DELIMITER = "---COMMIT_START---"
15
+ # git log format 模板
16
+ _GIT_LOG_FORMAT = (
17
+ f"{_COMMIT_DELIMITER}%n" f"Hash: %H%n" f"Subject: %s%n" f"Author: %an%n" f"Date: %ad%n" f"%n%b"
18
+ )
19
+
20
+
21
+ def parse_git_log_output(raw: str) -> List[Dict[str, str]]:
22
+ """
23
+ 解析 git log block 分隔符格式的输出
24
+
25
+ Args:
26
+ raw: git log 原始输出
27
+
28
+ Returns:
29
+ commit 列表,每项包含 hash, subject, author, date, body
30
+ """
31
+ if not raw.strip():
32
+ return []
33
+
34
+ commits = []
35
+ blocks = raw.split(_COMMIT_DELIMITER)
36
+
37
+ for block in blocks:
38
+ block = block.strip()
39
+ if not block:
40
+ continue
41
+
42
+ commit: Dict[str, str] = {
43
+ "hash": "",
44
+ "subject": "",
45
+ "author": "",
46
+ "date": "",
47
+ "body": "",
48
+ }
49
+
50
+ lines = block.split("\n")
51
+ i = 0
52
+ body_lines = []
53
+
54
+ # Parse header fields
55
+ for i, line in enumerate(lines):
56
+ line = line.rstrip()
57
+ if line.startswith("Hash:"):
58
+ commit["hash"] = line[5:].strip()
59
+ elif line.startswith("Subject:"):
60
+ commit["subject"] = line[8:].strip()
61
+ elif line.startswith("Author:"):
62
+ commit["author"] = line[7:].strip()
63
+ elif line.startswith("Date:"):
64
+ commit["date"] = line[5:].strip()
65
+ elif line == "":
66
+ # Empty line marks end of headers, start collecting body
67
+ body_lines = lines[i + 1 :]
68
+ break
69
+
70
+ # Join body lines with newlines
71
+ commit["body"] = "\n".join(body_lines).strip()
72
+
73
+ if commit["hash"]:
74
+ commits.append(commit)
75
+
76
+ return commits
77
+
78
+
79
+ def extract_commits(repo_path: str, limit: int = 50) -> List[Dict[str, str]]:
80
+ """
81
+ 从 Git 仓库提取 commit 历史
82
+
83
+ Args:
84
+ repo_path: 仓库路径(支持 Windows / Git Bash 路径)
85
+ limit: 最大提取条数
86
+
87
+ Returns:
88
+ commit 列表,每项包含 hash, subject, author, date, body
89
+
90
+ Raises:
91
+ ValueError: 路径不是 Git 仓库 或 git 未安装
92
+ """
93
+ repo = Path(repo_path).resolve()
94
+
95
+ cmd = [
96
+ "git",
97
+ "-C",
98
+ str(repo),
99
+ "log",
100
+ f"--format={_GIT_LOG_FORMAT}",
101
+ "--date=short",
102
+ f"-{limit}",
103
+ ]
104
+
105
+ try:
106
+ result = subprocess.run(
107
+ cmd,
108
+ capture_output=True,
109
+ text=True,
110
+ encoding="utf-8",
111
+ errors="replace",
112
+ )
113
+ except FileNotFoundError:
114
+ raise ValueError("git 命令未找到,请确认 git 已安装")
115
+
116
+ if result.returncode != 0:
117
+ stderr = result.stderr.strip()
118
+ raise ValueError(f"git log 执行失败: {stderr}")
119
+
120
+ return parse_git_log_output(result.stdout)
121
+
122
+
123
+ def commits_to_notes(
124
+ commits: List[Dict[str, str]],
125
+ repo_name: Optional[str] = None,
126
+ repo_path: Optional[str] = None,
127
+ ) -> List[Dict[str, Any]]:
128
+ """
129
+ 将 commit 列表转换为 bulk-import 兼容的笔记格式
130
+
131
+ Args:
132
+ commits: parse_git_log_output 的返回值
133
+ repo_name: 仓库名称(用于标签),None 时从 repo_path 提取
134
+ repo_path: 仓库路径(repo_name 为 None 时使用)
135
+
136
+ Returns:
137
+ 笔记数据列表,兼容 bulk_import_notes() 输入格式
138
+ """
139
+ if not commits:
140
+ return []
141
+
142
+ if not repo_name:
143
+ if repo_path:
144
+ repo_name = Path(repo_path).resolve().name
145
+ else:
146
+ repo_name = "unknown"
147
+
148
+ notes = []
149
+ for c in commits:
150
+ short_hash = c["hash"][:7]
151
+
152
+ # 清理 body:去掉 Co-authored-by 行和末尾空行
153
+ body = c.get("body", "")
154
+ body_lines = [
155
+ line
156
+ for line in body.split("\n")
157
+ if line.strip() and not line.strip().lower().startswith("co-authored-by:")
158
+ ]
159
+ clean_body = "\n".join(body_lines).strip()
160
+
161
+ content_parts = [
162
+ f"Commit: {short_hash}",
163
+ f"Author: {c['author']}",
164
+ f"Date: {c['date']}",
165
+ ]
166
+ if clean_body:
167
+ content_parts.append("")
168
+ content_parts.append(clean_body)
169
+
170
+ notes.append(
171
+ {
172
+ "title": c["subject"],
173
+ "content": "\n".join(content_parts),
174
+ "tags": [f"source:{repo_name}", "source:git-log"],
175
+ }
176
+ )
177
+
178
+ return notes
@@ -251,6 +251,10 @@ def bulk_import_notes(
251
251
  documents = [f"{n.title}\n{n.content}" for n in notes]
252
252
  embeddings = backend.encode(documents).tolist()
253
253
 
254
+ # 确保 VectorStore 已初始化
255
+ if vector_store.collection is None:
256
+ vector_store.init()
257
+
254
258
  # 批量添加到 ChromaDB
255
259
  ids = [n.id for n in notes]
256
260
  metadatas = [
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "jfox-cli"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "JFox - Zettelkasten 知识管理 CLI 工具"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}