jfox-cli 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/.github/workflows/integration-test.yml +98 -4
  2. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/CLAUDE.md +4 -0
  3. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/PKG-INFO +1 -1
  4. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/__init__.py +1 -1
  5. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/bm25_index.py +110 -106
  6. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/cli.py +532 -418
  7. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/config.py +32 -29
  8. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/embedding_backend.py +10 -11
  9. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/formatters.py +55 -66
  10. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/global_config.py +56 -64
  11. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/graph.py +54 -57
  12. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/indexer.py +114 -95
  13. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/kb_manager.py +62 -67
  14. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/models.py +40 -37
  15. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/note.py +281 -110
  16. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/performance.py +93 -90
  17. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/search_engine.py +65 -52
  18. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/template.py +64 -53
  19. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/template_cli.py +47 -42
  20. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/vector_store.py +76 -51
  21. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/pyproject.toml +4 -2
  22. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/claude-code/jfox-insert/SKILL.md +25 -0
  23. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/claude-code/jfox-organize/SKILL.md +7 -2
  24. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/conftest.py +96 -89
  25. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/integration/test_backlinks.py +35 -51
  26. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/performance/test_performance.py +76 -75
  27. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_advanced_features.py +58 -29
  28. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_cli_format.py +40 -31
  29. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_config_unit.py +74 -70
  30. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_core_workflow.py +140 -129
  31. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_hybrid_search.py +40 -37
  32. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_integration.py +11 -11
  33. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_kb_current.py +18 -20
  34. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_bm25_batch.py +11 -7
  35. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_edit.py +9 -12
  36. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_format_unify.py +30 -12
  37. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_formatters.py +35 -24
  38. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_global_config.py +157 -174
  39. jfox_cli-0.1.5/tests/unit/test_index_kb_param.py +92 -0
  40. jfox_cli-0.1.5/tests/unit/test_indexer_clear_before_rebuild.py +69 -0
  41. jfox_cli-0.1.5/tests/unit/test_indexer_verify.py +53 -0
  42. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_kb_manager.py +114 -127
  43. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_template.py +32 -43
  44. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/test_template_cli.py +250 -233
  45. jfox_cli-0.1.5/tests/unit/test_vector_store_clear.py +76 -0
  46. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/utils/assertions.py +45 -32
  47. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/utils/jfox_cli.py +84 -105
  48. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/utils/note_generator.py +44 -51
  49. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/utils/temp_kb.py +5 -5
  50. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/uv.lock +1 -1
  51. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/.githooks/pre-push +0 -0
  52. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/.github/workflows/publish.yml +0 -0
  53. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/.gitignore +0 -0
  54. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/.python-version +0 -0
  55. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/AGENTS.md +0 -0
  56. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/DEVELOPMENT_PLAN.md +0 -0
  57. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/README.md +0 -0
  58. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/SESSION.md +0 -0
  59. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/SESSION_SUMMARY.md +0 -0
  60. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/docs/superpowers/specs/2026-04-03-bugfixes-design.md +0 -0
  61. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jessica-jones-static-cable.md +0 -0
  62. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/jfox/__main__.py +0 -0
  63. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/pytest.ini +0 -0
  64. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/run_full_test.ps1 +0 -0
  65. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skill/evals/evals.json +0 -0
  66. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skill/knowledge-base-notes/SKILL.md +0 -0
  67. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skill/knowledge-base-workspace/SKILL.md +0 -0
  68. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/README.md +0 -0
  69. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/claude-code/jfox-health/SKILL.md +0 -0
  70. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/claude-code/jfox-init/SKILL.md +0 -0
  71. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/skills-recommend/claude-code/jfox-search/SKILL.md +0 -0
  72. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/COVERAGE_PLAN.md +0 -0
  73. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/MIGRATION.md +0 -0
  74. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/TESTS.md +0 -0
  75. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/integration/__init__.py +0 -0
  76. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/performance/__init__.py +0 -0
  77. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/test_suggest_links.py +0 -0
  78. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/unit/__init__.py +0 -0
  79. {jfox_cli-0.1.4 → jfox_cli-0.1.5}/tests/utils/__init__.py +0 -0
@@ -14,6 +14,7 @@ on:
14
14
  - 'jfox/**'
15
15
  - 'tests/**'
16
16
  - 'pyproject.toml'
17
+ - '.github/workflows/integration-test.yml'
17
18
  # 允许手动触发
18
19
  workflow_dispatch:
19
20
  inputs:
@@ -33,6 +34,32 @@ env:
33
34
  PYTHONUTF8: 1
34
35
 
35
36
  jobs:
37
+ # ============ Lint 检查 ============
38
+ lint:
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - name: Checkout code
42
+ uses: actions/checkout@v4
43
+
44
+ - name: Set up Python
45
+ uses: actions/setup-python@v5
46
+ with:
47
+ python-version: '3.11'
48
+
49
+ - uses: astral-sh/setup-uv@v4
50
+ with:
51
+ version: "latest"
52
+ enable-cache: true
53
+
54
+ - name: Install dependencies
55
+ run: uv sync --extra dev
56
+
57
+ - name: Run ruff check
58
+ run: uv run ruff check jfox/ tests/
59
+
60
+ - name: Run black check
61
+ run: uv run black --check jfox/ tests/
62
+
36
63
  # ============ 快速测试(PR 和 push 触发)============
37
64
  test-fast:
38
65
  runs-on: ${{ matrix.os }}
@@ -102,12 +129,20 @@ jobs:
102
129
  version: "latest"
103
130
  enable-cache: true
104
131
 
105
- - name: Cache model
132
+ - name: Cache model (Unix)
133
+ if: runner.os != 'Windows'
106
134
  uses: actions/cache@v4
107
135
  with:
108
136
  path: ~/.cache/torch/sentence_transformers
109
137
  key: ${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2
110
138
 
139
+ - name: Cache model (Windows)
140
+ if: runner.os == 'Windows'
141
+ uses: actions/cache@v4
142
+ with:
143
+ path: ~\AppData\Local\torch\sentence_transformers
144
+ key: ${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2
145
+
111
146
  - name: Install dependencies
112
147
  run: uv sync --extra dev
113
148
 
@@ -152,12 +187,20 @@ jobs:
152
187
  version: "latest"
153
188
  enable-cache: true
154
189
 
155
- - name: Cache model
190
+ - name: Cache model (Unix)
191
+ if: runner.os != 'Windows'
156
192
  uses: actions/cache@v4
157
193
  with:
158
194
  path: ~/.cache/torch/sentence_transformers
159
195
  key: ${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2
160
196
 
197
+ - name: Cache model (Windows)
198
+ if: runner.os == 'Windows'
199
+ uses: actions/cache@v4
200
+ with:
201
+ path: ~\AppData\Local\torch\sentence_transformers
202
+ key: ${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2
203
+
161
204
  - name: Install dependencies
162
205
  run: uv sync --extra dev
163
206
 
@@ -178,7 +221,25 @@ jobs:
178
221
  path: |
179
222
  .pytest_cache/
180
223
 
181
- # ============ 覆盖率报告汇总 ============
224
+ # ============ 质量门禁(所有必须测试通过才算成功)============
225
+ quality-gate:
226
+ runs-on: ubuntu-latest
227
+ needs: [lint, test-fast]
228
+ if: always()
229
+ steps:
230
+ - name: Check all jobs passed
231
+ run: |
232
+ echo "lint: ${{ needs.lint.result }}"
233
+ echo "test-fast: ${{ needs.test-fast.result }}"
234
+ if [[ "${{ needs.lint.result }}" == "success" && "${{ needs.test-fast.result }}" == "success" ]]; then
235
+ echo "Quality gate passed!"
236
+ exit 0
237
+ else
238
+ echo "Quality gate FAILED!"
239
+ exit 1
240
+ fi
241
+
242
+ # ============ 覆盖率报告(仅 fast 测试通过后)============
182
243
  coverage:
183
244
  runs-on: ubuntu-latest
184
245
  needs: [test-fast]
@@ -203,9 +264,42 @@ jobs:
203
264
 
204
265
  - name: Run coverage
205
266
  run: |
206
- uv run pytest tests/ -m "not embedding and not slow" --cov=jfox --cov-report=xml --cov-report=html -v --timeout=300
267
+ uv run pytest tests/ -m "not embedding and not slow" --cov=jfox --cov-report=xml --cov-report=html --cov-report=term -v --timeout=300
207
268
  timeout-minutes: 25
208
269
 
270
+ - name: Post coverage comment on PR
271
+ if: github.event_name == 'pull_request'
272
+ env:
273
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
274
+ run: |
275
+ python -c "
276
+ import xml.etree.ElementTree as ET
277
+ import subprocess
278
+
279
+ tree = ET.parse('coverage.xml')
280
+ root = tree.getroot()
281
+ rate = float(root.attrib['line-rate'])
282
+ lines_covered = int(root.attrib['lines-covered'])
283
+ lines_valid = int(root.attrib['lines-valid'])
284
+
285
+ rows = []
286
+ for cls in root.iter('class'):
287
+ name = cls.attrib['filename']
288
+ r = float(cls.attrib['line-rate'])
289
+ rows.append((name, r))
290
+ rows.sort(key=lambda x: x[1])
291
+
292
+ comment = '## Test Coverage\n\n'
293
+ comment += '**Overall: {:.1f}%** ({}/{} lines)\n\n'.format(rate * 100, lines_covered, lines_valid)
294
+ comment += '| Module | Coverage | Status |\n|--------|----------|--------|\n'
295
+ for name, r in rows:
296
+ icon = ':green_circle:' if r >= 0.8 else ':yellow_circle:' if r >= 0.5 else ':red_circle:'
297
+ comment += '| {} | {:.1f}% | {} |\n'.format(name, r * 100, icon)
298
+
299
+ pr = '${{ github.event.pull_request.number }}'
300
+ subprocess.run(['gh', 'pr', 'comment', pr, '--body', comment])
301
+ "
302
+
209
303
  - name: Upload coverage report
210
304
  uses: actions/upload-artifact@v4
211
305
  with:
@@ -126,6 +126,10 @@ Four jobs in `.github/workflows/integration-test.yml`:
126
126
  - Set `PYTHONUTF8=1` and `chcp 65001` for encoding
127
127
  - HuggingFace mirror for China: `export HF_ENDPOINT=https://hf-mirror.com`
128
128
 
129
+ ## Branch Rules
130
+
131
+ - **main 是保护分支**,不能直接 commit 或 push。所有改动必须通过新分支 + PR 合入。
132
+
129
133
  ## Gotchas
130
134
 
131
135
  - `pytest.ini` `addopts` includes `-v`, so `pytest tests/` already runs verbose — adding `-v` manually is redundant
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jfox-cli
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: JFox - Zettelkasten 知识管理 CLI 工具
5
5
  Project-URL: Homepage, https://github.com/zhuxixi/jfox
6
6
  Project-URL: Repository, https://github.com/zhuxixi/jfox
@@ -1,5 +1,5 @@
1
1
  """JFox - Zettelkasten 知识管理工具"""
2
2
 
3
- __version__ = "0.1.4"
3
+ __version__ = "0.1.5"
4
4
  __author__ = "User"
5
5
  __email__ = "user@example.com"
@@ -9,7 +9,7 @@ import logging
9
9
  import pickle
10
10
  import re
11
11
  from pathlib import Path
12
- from typing import Dict, List, Optional, Set, Tuple
12
+ from typing import Dict, List, Optional, Tuple
13
13
 
14
14
  from rank_bm25 import BM25Okapi
15
15
 
@@ -21,73 +21,73 @@ logger = logging.getLogger(__name__)
21
21
  class BM25Index:
22
22
  """
23
23
  BM25 索引管理器
24
-
24
+
25
25
  负责构建、保存、加载和查询 BM25 索引。
26
26
  支持增量更新和全量重建。
27
27
  """
28
-
28
+
29
29
  INDEX_VERSION = 1
30
30
  INDEX_FILENAME = "bm25_index.pkl"
31
31
  METADATA_FILENAME = "bm25_metadata.json"
32
-
32
+
33
33
  def __init__(self, index_dir: Optional[Path] = None):
34
34
  """
35
35
  初始化 BM25 索引
36
-
36
+
37
37
  Args:
38
38
  index_dir: 索引文件存放目录,默认为 config.zk_dir
39
39
  """
40
40
  self.index_dir = index_dir or config.zk_dir
41
41
  self.index_path = self.index_dir / self.INDEX_FILENAME
42
42
  self.metadata_path = self.index_dir / self.METADATA_FILENAME
43
-
43
+
44
44
  # 索引数据
45
45
  self.bm25: Optional[BM25Okapi] = None
46
46
  self.documents: List[str] = [] # 分词后的文档列表
47
- self.doc_ids: List[str] = [] # 文档 ID 列表
47
+ self.doc_ids: List[str] = [] # 文档 ID 列表
48
48
  self.doc_mapping: Dict[str, int] = {} # note_id -> index
49
-
49
+
50
50
  # 加载已有索引
51
51
  self._load()
52
-
52
+
53
53
  def _tokenize(self, text: str) -> List[str]:
54
54
  """
55
55
  分词函数 - 适配中英文
56
-
56
+
57
57
  Args:
58
58
  text: 输入文本
59
-
59
+
60
60
  Returns:
61
61
  分词结果列表
62
62
  """
63
63
  if not text:
64
64
  return []
65
-
65
+
66
66
  # 转换为小写
67
67
  text = text.lower()
68
-
68
+
69
69
  # 提取中文字符串(2-10字)和英文单词
70
70
  # 中文按字符分割,英文按单词分割
71
71
  tokens = []
72
-
72
+
73
73
  # 匹配中文字符
74
- chinese_chars = re.findall(r'[\u4e00-\u9fff]', text)
74
+ chinese_chars = re.findall(r"[\u4e00-\u9fff]", text)
75
75
  tokens.extend(chinese_chars)
76
-
76
+
77
77
  # 匹配英文单词(包括下划线连接的变量名)
78
- english_words = re.findall(r'[a-z][a-z0-9_]{0,20}', text)
78
+ english_words = re.findall(r"[a-z][a-z0-9_]{0,20}", text)
79
79
  tokens.extend(english_words)
80
-
80
+
81
81
  # 匹配数字
82
- numbers = re.findall(r'\d+', text)
82
+ numbers = re.findall(r"\d+", text)
83
83
  tokens.extend(numbers)
84
-
84
+
85
85
  return tokens
86
-
86
+
87
87
  def _load(self) -> bool:
88
88
  """
89
89
  从磁盘加载索引
90
-
90
+
91
91
  Returns:
92
92
  是否成功加载
93
93
  """
@@ -95,91 +95,93 @@ class BM25Index:
95
95
  if not self.index_path.exists() or not self.metadata_path.exists():
96
96
  logger.info("BM25 index not found, will create new index")
97
97
  return False
98
-
98
+
99
99
  # 加载元数据
100
- with open(self.metadata_path, 'r', encoding='utf-8') as f:
100
+ with open(self.metadata_path, "r", encoding="utf-8") as f:
101
101
  metadata = json.load(f)
102
-
102
+
103
103
  # 检查版本
104
- if metadata.get('version') != self.INDEX_VERSION:
105
- logger.warning(f"BM25 index version mismatch: {metadata.get('version')} != {self.INDEX_VERSION}")
104
+ if metadata.get("version") != self.INDEX_VERSION:
105
+ logger.warning(
106
+ f"BM25 index version mismatch: {metadata.get('version')} != {self.INDEX_VERSION}"
107
+ )
106
108
  return False
107
-
109
+
108
110
  # 加载索引
109
- with open(self.index_path, 'rb') as f:
111
+ with open(self.index_path, "rb") as f:
110
112
  index_data = pickle.load(f)
111
-
112
- self.bm25 = index_data['bm25']
113
- self.documents = index_data['documents']
114
- self.doc_ids = index_data['doc_ids']
115
- self.doc_mapping = index_data['doc_mapping']
116
-
113
+
114
+ self.bm25 = index_data["bm25"]
115
+ self.documents = index_data["documents"]
116
+ self.doc_ids = index_data["doc_ids"]
117
+ self.doc_mapping = index_data["doc_mapping"]
118
+
117
119
  logger.info(f"Loaded BM25 index: {len(self.doc_ids)} documents")
118
120
  return True
119
-
121
+
120
122
  except Exception as e:
121
123
  logger.error(f"Failed to load BM25 index: {e}")
122
124
  self._reset()
123
125
  return False
124
-
126
+
125
127
  def _save(self) -> bool:
126
128
  """
127
129
  保存索引到磁盘
128
-
130
+
129
131
  Returns:
130
132
  是否成功保存
131
133
  """
132
134
  try:
133
135
  # 确保目录存在
134
136
  self.index_dir.mkdir(parents=True, exist_ok=True)
135
-
137
+
136
138
  # 保存元数据
137
139
  metadata = {
138
- 'version': self.INDEX_VERSION,
139
- 'doc_count': len(self.doc_ids),
140
+ "version": self.INDEX_VERSION,
141
+ "doc_count": len(self.doc_ids),
140
142
  }
141
- with open(self.metadata_path, 'w', encoding='utf-8') as f:
143
+ with open(self.metadata_path, "w", encoding="utf-8") as f:
142
144
  json.dump(metadata, f, ensure_ascii=False, indent=2)
143
-
145
+
144
146
  # 保存索引数据
145
147
  index_data = {
146
- 'bm25': self.bm25,
147
- 'documents': self.documents,
148
- 'doc_ids': self.doc_ids,
149
- 'doc_mapping': self.doc_mapping,
148
+ "bm25": self.bm25,
149
+ "documents": self.documents,
150
+ "doc_ids": self.doc_ids,
151
+ "doc_mapping": self.doc_mapping,
150
152
  }
151
- with open(self.index_path, 'wb') as f:
153
+ with open(self.index_path, "wb") as f:
152
154
  pickle.dump(index_data, f)
153
-
155
+
154
156
  logger.info(f"Saved BM25 index: {len(self.doc_ids)} documents")
155
157
  return True
156
-
158
+
157
159
  except Exception as e:
158
160
  logger.error(f"Failed to save BM25 index: {e}")
159
161
  return False
160
-
162
+
161
163
  def _reset(self):
162
164
  """重置索引状态"""
163
165
  self.bm25 = None
164
166
  self.documents = []
165
167
  self.doc_ids = []
166
168
  self.doc_mapping = {}
167
-
169
+
168
170
  def _rebuild_index(self):
169
171
  """重新构建 BM25 索引"""
170
172
  if self.documents:
171
173
  self.bm25 = BM25Okapi(self.documents)
172
174
  else:
173
175
  self.bm25 = None
174
-
176
+
175
177
  def add_document(self, note_id: str, content: str) -> bool:
176
178
  """
177
179
  添加文档到索引(增量更新)
178
-
180
+
179
181
  Args:
180
182
  note_id: 笔记 ID
181
183
  content: 笔记内容
182
-
184
+
183
185
  Returns:
184
186
  是否成功添加
185
187
  """
@@ -187,68 +189,68 @@ class BM25Index:
187
189
  # 如果已存在,先移除
188
190
  if note_id in self.doc_mapping:
189
191
  self.remove_document(note_id)
190
-
192
+
191
193
  # 分词
192
194
  tokens = self._tokenize(content)
193
195
  if not tokens:
194
196
  return True
195
-
197
+
196
198
  # 添加到索引
197
199
  idx = len(self.documents)
198
200
  self.documents.append(tokens)
199
201
  self.doc_ids.append(note_id)
200
202
  self.doc_mapping[note_id] = idx
201
-
203
+
202
204
  # 重建索引
203
205
  self._rebuild_index()
204
-
206
+
205
207
  # 保存
206
208
  self._save()
207
-
209
+
208
210
  return True
209
-
211
+
210
212
  except Exception as e:
211
213
  logger.error(f"Failed to add document {note_id}: {e}")
212
214
  return False
213
-
215
+
214
216
  def remove_document(self, note_id: str) -> bool:
215
217
  """
216
218
  从索引中移除文档
217
-
219
+
218
220
  Args:
219
221
  note_id: 笔记 ID
220
-
222
+
221
223
  Returns:
222
224
  是否成功移除
223
225
  """
224
226
  try:
225
227
  if note_id not in self.doc_mapping:
226
228
  return True
227
-
229
+
228
230
  idx = self.doc_mapping[note_id]
229
-
231
+
230
232
  # 移除数据
231
233
  self.documents.pop(idx)
232
234
  self.doc_ids.pop(idx)
233
235
  del self.doc_mapping[note_id]
234
-
236
+
235
237
  # 更新其他文档的索引
236
238
  self.doc_mapping = {}
237
239
  for i, doc_id in enumerate(self.doc_ids):
238
240
  self.doc_mapping[doc_id] = i
239
-
241
+
240
242
  # 重建索引
241
243
  self._rebuild_index()
242
-
244
+
243
245
  # 保存
244
246
  self._save()
245
-
247
+
246
248
  return True
247
-
249
+
248
250
  except Exception as e:
249
251
  logger.error(f"Failed to remove document {note_id}: {e}")
250
252
  return False
251
-
253
+
252
254
  def add_documents_batch(self, documents: List[Tuple[str, str]]) -> bool:
253
255
  """
254
256
  批量添加文档到索引(高效版本)
@@ -309,7 +311,7 @@ class BM25Index:
309
311
  logger.info(f"Batch added {len(documents)} documents to BM25 index")
310
312
  return True
311
313
 
312
- except Exception as e:
314
+ except Exception:
313
315
  # 恢复到批次前的状态
314
316
  self.documents = saved_docs
315
317
  self.doc_ids = saved_ids
@@ -324,114 +326,116 @@ class BM25Index:
324
326
  def search(self, query: str, top_k: int = 5) -> List[Dict]:
325
327
  """
326
328
  搜索文档
327
-
329
+
328
330
  Args:
329
331
  query: 搜索查询
330
332
  top_k: 返回结果数量
331
-
333
+
332
334
  Returns:
333
335
  搜索结果列表,每项包含 note_id 和 score
334
336
  """
335
337
  if not self.bm25 or not self.documents:
336
338
  return []
337
-
339
+
338
340
  try:
339
341
  # 分词
340
342
  query_tokens = self._tokenize(query)
341
343
  if not query_tokens:
342
344
  return []
343
-
345
+
344
346
  # BM25 搜索
345
347
  scores = self.bm25.get_scores(query_tokens)
346
-
348
+
347
349
  # 获取 top_k 结果
348
350
  top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
349
-
351
+
350
352
  results = []
351
353
  for idx in top_indices:
352
354
  # BM25 分数可能为负,只要大于最小值就返回
353
355
  if scores[idx] > -10: # 使用合理的阈值
354
- results.append({
355
- 'note_id': self.doc_ids[idx],
356
- 'score': float(scores[idx]),
357
- })
358
-
356
+ results.append(
357
+ {
358
+ "note_id": self.doc_ids[idx],
359
+ "score": float(scores[idx]),
360
+ }
361
+ )
362
+
359
363
  return results
360
-
364
+
361
365
  except Exception as e:
362
366
  logger.error(f"BM25 search failed: {e}")
363
367
  return []
364
-
368
+
365
369
  def rebuild_from_notes(self, notes: List) -> bool:
366
370
  """
367
371
  从笔记列表全量重建索引
368
-
372
+
369
373
  Args:
370
374
  notes: Note 对象列表
371
-
375
+
372
376
  Returns:
373
377
  是否成功重建
374
378
  """
375
379
  try:
376
380
  self._reset()
377
-
381
+
378
382
  for note in notes:
379
383
  # 组合标题和内容
380
384
  content = f"{note.title} {note.content}"
381
385
  tokens = self._tokenize(content)
382
-
386
+
383
387
  if tokens:
384
388
  idx = len(self.documents)
385
389
  self.documents.append(tokens)
386
390
  self.doc_ids.append(note.id)
387
391
  self.doc_mapping[note.id] = idx
388
-
392
+
389
393
  # 构建索引
390
394
  self._rebuild_index()
391
-
395
+
392
396
  # 保存
393
397
  self._save()
394
-
398
+
395
399
  logger.info(f"Rebuilt BM25 index from {len(notes)} notes")
396
400
  return True
397
-
401
+
398
402
  except Exception as e:
399
403
  logger.error(f"Failed to rebuild BM25 index: {e}")
400
404
  return False
401
-
405
+
402
406
  def get_stats(self) -> Dict:
403
407
  """
404
408
  获取索引统计信息
405
-
409
+
406
410
  Returns:
407
411
  统计信息字典
408
412
  """
409
413
  return {
410
- 'indexed': len(self.doc_ids),
411
- 'version': self.INDEX_VERSION,
412
- 'index_path': str(self.index_path),
413
- 'index_exists': self.index_path.exists(),
414
+ "indexed": len(self.doc_ids),
415
+ "version": self.INDEX_VERSION,
416
+ "index_path": str(self.index_path),
417
+ "index_exists": self.index_path.exists(),
414
418
  }
415
-
419
+
416
420
  def clear(self) -> bool:
417
421
  """
418
422
  清空索引
419
-
423
+
420
424
  Returns:
421
425
  是否成功清空
422
426
  """
423
427
  try:
424
428
  self._reset()
425
-
429
+
426
430
  # 删除文件
427
431
  if self.index_path.exists():
428
432
  self.index_path.unlink()
429
433
  if self.metadata_path.exists():
430
434
  self.metadata_path.unlink()
431
-
435
+
432
436
  logger.info("Cleared BM25 index")
433
437
  return True
434
-
438
+
435
439
  except Exception as e:
436
440
  logger.error(f"Failed to clear BM25 index: {e}")
437
441
  return False
@@ -444,7 +448,7 @@ _bm25_index: Optional[BM25Index] = None
444
448
  def get_bm25_index() -> BM25Index:
445
449
  """
446
450
  获取 BM25 索引实例(单例模式)
447
-
451
+
448
452
  Returns:
449
453
  BM25Index 实例
450
454
  """