@hupan56/wlkj 2.2.6 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
repowiki.py - 读取 Qoder IDE 生成的 Repo Wiki, 转成搜索可用的索引。
|
|
4
|
+
|
|
5
|
+
Repo Wiki 是 Qoder IDE 用 LLM 对整个仓库生成的语义级知识图谱:
|
|
6
|
+
- wiki_catalogs: 模块/功能文档 (含 dependent_files 源文件映射)
|
|
7
|
+
- wiki_items: 文档标题/描述
|
|
8
|
+
- knowledge_relations: 模块间父子/依赖关系
|
|
9
|
+
- content/*.md: 带行号引用的人类可读文档
|
|
10
|
+
|
|
11
|
+
位置: data/code/{project}/.qoder/repowiki/{lang}/meta/repowiki-metadata.json
|
|
12
|
+
data/code/{project}/.qoder/repowiki/{lang}/content/{分类}/{页面}.md
|
|
13
|
+
|
|
14
|
+
本模块把这些转成:
|
|
15
|
+
- wiki-index.json: {关键词: [{project, title, catalog_id, md_path, dependent_files}]}
|
|
16
|
+
- 供 search_index --wiki 和 context_pack 使用
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Dict, List, Optional
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_repowiki_dirs(code_dir: str) -> List[tuple]:
|
|
27
|
+
"""扫描 data/code/{project}/.qoder/repowiki/, 返回 [(project, wiki_root), ...]。"""
|
|
28
|
+
results = []
|
|
29
|
+
if not os.path.isdir(code_dir):
|
|
30
|
+
return results
|
|
31
|
+
for proj in os.listdir(code_dir):
|
|
32
|
+
# 找 repowiki 目录 (可能是 zh/ 或 en/)
|
|
33
|
+
wiki_base = os.path.join(code_dir, proj, '.qoder', 'repowiki')
|
|
34
|
+
if not os.path.isdir(wiki_base):
|
|
35
|
+
continue
|
|
36
|
+
for lang in os.listdir(wiki_base):
|
|
37
|
+
lang_dir = os.path.join(wiki_base, lang)
|
|
38
|
+
meta = os.path.join(lang_dir, 'meta', 'repowiki-metadata.json')
|
|
39
|
+
if os.path.isfile(meta):
|
|
40
|
+
results.append((proj, lang_dir))
|
|
41
|
+
break # 每个项目只取一个语言版本
|
|
42
|
+
return results
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_wiki_metadata(wiki_root: str) -> Optional[dict]:
|
|
46
|
+
"""加载 repowiki-metadata.json。"""
|
|
47
|
+
meta_path = os.path.join(wiki_root, 'meta', 'repowiki-metadata.json')
|
|
48
|
+
try:
|
|
49
|
+
with open(meta_path, encoding='utf-8') as f:
|
|
50
|
+
return json.load(f)
|
|
51
|
+
except (OSError, json.JSONDecodeError):
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _extract_keywords(text: str) -> List[str]:
|
|
56
|
+
"""从标题/描述提取搜索关键词 (中文分词 + 英文小写)。"""
|
|
57
|
+
if not text:
|
|
58
|
+
return []
|
|
59
|
+
words = set()
|
|
60
|
+
# 英文: 按非字母数字分割, 保留长度>=3的
|
|
61
|
+
for tok in re.split(r'[^a-zA-Z0-9]+', text):
|
|
62
|
+
tok = tok.strip().lower()
|
|
63
|
+
if len(tok) >= 3:
|
|
64
|
+
words.add(tok)
|
|
65
|
+
# 中文: 整段作为关键词 (让 CN_MAP 在搜索时做分词扩展)
|
|
66
|
+
# 提取连续中文片段
|
|
67
|
+
for m in re.finditer(r'[\u4e00-\u9fff]{2,}', text):
|
|
68
|
+
words.add(m.group())
|
|
69
|
+
return list(words)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def build_wiki_index(code_dir: str, output_path: str) -> dict:
|
|
73
|
+
"""扫描所有项目的 Repo Wiki, 构建 wiki-index.json。
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
构建的索引 dict: {keyword: [{project, title, catalog_id, md_path, dependent_files, description}]}
|
|
77
|
+
"""
|
|
78
|
+
wiki_dirs = find_repowiki_dirs(code_dir)
|
|
79
|
+
if not wiki_dirs:
|
|
80
|
+
return {}
|
|
81
|
+
|
|
82
|
+
index = {} # keyword -> [entry, ...]
|
|
83
|
+
stats = {'projects': 0, 'pages': 0, 'keywords': 0}
|
|
84
|
+
|
|
85
|
+
for project, wiki_root in wiki_dirs:
|
|
86
|
+
meta = load_wiki_metadata(wiki_root)
|
|
87
|
+
if not meta:
|
|
88
|
+
continue
|
|
89
|
+
stats['projects'] += 1
|
|
90
|
+
|
|
91
|
+
catalogs = meta.get('wiki_catalogs', [])
|
|
92
|
+
items = meta.get('wiki_items', [])
|
|
93
|
+
# catalog_id -> title 映射
|
|
94
|
+
id_to_title = {it.get('catalog_id'): it.get('title', '') for it in items}
|
|
95
|
+
|
|
96
|
+
for cat in catalogs:
|
|
97
|
+
cat_id = cat.get('id', '')
|
|
98
|
+
name = cat.get('name', '')
|
|
99
|
+
desc = cat.get('description', '')
|
|
100
|
+
dep_files = cat.get('dependent_files', '')
|
|
101
|
+
title = id_to_title.get(cat_id, name)
|
|
102
|
+
|
|
103
|
+
# 找对应的 markdown 文档
|
|
104
|
+
md_path = _find_md_for_catalog(wiki_root, cat_id, name)
|
|
105
|
+
if not md_path:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
stats['pages'] += 1
|
|
109
|
+
|
|
110
|
+
# 提取关键词 (标题 + 描述 + 依赖文件名)
|
|
111
|
+
kw_text = ' '.join([title, name, desc])
|
|
112
|
+
# 依赖文件也作为关键词 (异常.java → 搜"异常"能命中)
|
|
113
|
+
if dep_files:
|
|
114
|
+
kw_text += ' ' + dep_files.replace(',', ' ').replace('/', ' ')
|
|
115
|
+
keywords = _extract_keywords(kw_text)
|
|
116
|
+
|
|
117
|
+
entry = {
|
|
118
|
+
'project': project,
|
|
119
|
+
'title': title or name,
|
|
120
|
+
'catalog_id': cat_id,
|
|
121
|
+
'md_path': md_path,
|
|
122
|
+
'dependent_files': [f.strip() for f in dep_files.split(',') if f.strip()] if dep_files else [],
|
|
123
|
+
'description': desc,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
for kw in keywords:
|
|
127
|
+
index.setdefault(kw, [])
|
|
128
|
+
if entry not in index[kw]:
|
|
129
|
+
index[kw].append(entry)
|
|
130
|
+
|
|
131
|
+
stats['keywords'] = len(index)
|
|
132
|
+
|
|
133
|
+
# 写入
|
|
134
|
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
135
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
136
|
+
json.dump({'index': index, 'stats': stats}, f, ensure_ascii=False, indent=2)
|
|
137
|
+
|
|
138
|
+
return {'index': index, 'stats': stats}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _find_md_for_catalog(wiki_root: str, catalog_id: str, name: str) -> Optional[str]:
|
|
142
|
+
"""在 content/ 目录下找对应的 markdown 文件。
|
|
143
|
+
|
|
144
|
+
文件名通常等于 catalog name (.md), 在某个子分类目录下。
|
|
145
|
+
"""
|
|
146
|
+
content_dir = os.path.join(wiki_root, 'content')
|
|
147
|
+
if not os.path.isdir(content_dir):
|
|
148
|
+
return None
|
|
149
|
+
target = name + '.md'
|
|
150
|
+
for root, dirs, files in os.walk(content_dir):
|
|
151
|
+
if target in files:
|
|
152
|
+
return os.path.join(root, target)
|
|
153
|
+
# 模糊匹配: name 的第一段
|
|
154
|
+
short = name.split('-')[0].split('(')[0].strip()
|
|
155
|
+
if short and short != name:
|
|
156
|
+
target2 = short + '.md'
|
|
157
|
+
for root, dirs, files in os.walk(content_dir):
|
|
158
|
+
for fn in files:
|
|
159
|
+
if fn == target2 or short in fn:
|
|
160
|
+
return os.path.join(root, fn)
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def load_wiki_index(index_dir: str) -> dict:
|
|
165
|
+
"""加载已构建的 wiki-index.json。返回 {} 表示不存在。"""
|
|
166
|
+
path = os.path.join(index_dir, 'wiki-index.json')
|
|
167
|
+
try:
|
|
168
|
+
with open(path, encoding='utf-8') as f:
|
|
169
|
+
data = json.load(f)
|
|
170
|
+
return data.get('index', {}) if isinstance(data, dict) else data
|
|
171
|
+
except (OSError, json.JSONDecodeError):
|
|
172
|
+
return {}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def search_wiki(query: str, index_dir: str, code_dir: str,
|
|
176
|
+
cn_map: Optional[dict] = None) -> List[dict]:
|
|
177
|
+
"""在 Repo Wiki 索引里搜索。
|
|
178
|
+
|
|
179
|
+
自动扩展中文查询 (用 cn_map), 返回匹配的 wiki 条目列表。
|
|
180
|
+
如果 wiki-index.json 不存在, 自动构建。
|
|
181
|
+
"""
|
|
182
|
+
wiki_idx = load_wiki_index(index_dir)
|
|
183
|
+
if not wiki_idx:
|
|
184
|
+
# 自动构建
|
|
185
|
+
out = os.path.join(index_dir, 'wiki-index.json')
|
|
186
|
+
result = build_wiki_index(code_dir, out)
|
|
187
|
+
if isinstance(result, dict):
|
|
188
|
+
wiki_idx = result.get('index', {})
|
|
189
|
+
else:
|
|
190
|
+
wiki_idx = result
|
|
191
|
+
|
|
192
|
+
if not wiki_idx:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
# 扩展查询词 (复用 search_index 的 CN_MAP 分词逻辑)
|
|
196
|
+
words = set()
|
|
197
|
+
q_lower = query.lower()
|
|
198
|
+
words.add(q_lower)
|
|
199
|
+
# 中文分词: 用 cn_map 拆
|
|
200
|
+
if cn_map:
|
|
201
|
+
remaining = query
|
|
202
|
+
for cn in sorted(cn_map.keys(), key=len, reverse=True):
|
|
203
|
+
if cn in remaining:
|
|
204
|
+
for en in cn_map[cn].split():
|
|
205
|
+
words.add(en.lower())
|
|
206
|
+
remaining = remaining.replace(cn, ' ')
|
|
207
|
+
# 剩余英文片段
|
|
208
|
+
for frag in remaining.split():
|
|
209
|
+
if frag and frag.isascii():
|
|
210
|
+
words.add(frag.lower())
|
|
211
|
+
# 中文整段也加
|
|
212
|
+
for m in re.finditer(r'[\u4e00-\u9fff]{2,}', query):
|
|
213
|
+
words.add(m.group())
|
|
214
|
+
|
|
215
|
+
# 搜索: 子串匹配, 标题命中优先 (TF 排序)
|
|
216
|
+
scored = [] # (score, entry)
|
|
217
|
+
seen_titles = set()
|
|
218
|
+
for kw, entries in wiki_idx.items():
|
|
219
|
+
kw_lower = kw.lower()
|
|
220
|
+
for word in words:
|
|
221
|
+
if word in kw_lower or kw_lower in word:
|
|
222
|
+
for entry in entries:
|
|
223
|
+
key = (entry.get('project'), entry.get('title'))
|
|
224
|
+
if key in seen_titles:
|
|
225
|
+
continue
|
|
226
|
+
# 打分: 标题命中 = 10分, 描述命中 = 5分, 依赖文件命中 = 1分
|
|
227
|
+
title = (entry.get('title') or '').lower()
|
|
228
|
+
desc = (entry.get('description') or '').lower()
|
|
229
|
+
score = 0
|
|
230
|
+
if word in title or title in word:
|
|
231
|
+
score = 10
|
|
232
|
+
elif word in desc:
|
|
233
|
+
score = 5
|
|
234
|
+
else:
|
|
235
|
+
score = 1 # 依赖文件命中
|
|
236
|
+
if score > 0:
|
|
237
|
+
seen_titles.add(key)
|
|
238
|
+
scored.append((score, entry))
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
# 按分数降序
|
|
242
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
243
|
+
return [entry for _, entry in scored[:15]]
|
|
@@ -247,19 +247,43 @@ def _compute_and_print(query, platform, page_type):
|
|
|
247
247
|
print('- 模板: .qoder/templates/prototype-web.html / prototype-app.html')
|
|
248
248
|
print('- PRD 模板: .qoder/templates/prd-full-template.md / prd-quick-template.md')
|
|
249
249
|
|
|
250
|
-
# ---- 7. Repo Wiki ----
|
|
251
|
-
wikis = []
|
|
252
|
-
if os.path.isdir(os.path.join(BASE, '.qoder', 'repowiki')):
|
|
253
|
-
wikis.append('.qoder/repowiki/')
|
|
250
|
+
# ---- 7. Repo Wiki (语义级模块文档, 优先参考) ----
|
|
254
251
|
code_dir = os.path.join(BASE, 'data', 'code')
|
|
252
|
+
wiki_results = []
|
|
253
|
+
try:
|
|
254
|
+
from common.repowiki import search_wiki, build_wiki_index
|
|
255
|
+
from common.terms import get_cn_map_with_auto
|
|
256
|
+
wiki_idx_path = os.path.join(INDEX_DIR, 'wiki-index.json')
|
|
257
|
+
if not os.path.isfile(wiki_idx_path):
|
|
258
|
+
build_wiki_index(code_dir, wiki_idx_path)
|
|
259
|
+
cn_map = get_cn_map_with_auto()
|
|
260
|
+
wiki_results = search_wiki(query, INDEX_DIR, code_dir, cn_map)
|
|
261
|
+
except Exception:
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
# 也列出 wiki 目录 (供手动浏览)
|
|
265
|
+
wiki_dirs = []
|
|
266
|
+
if os.path.isdir(os.path.join(BASE, '.qoder', 'repowiki')):
|
|
267
|
+
wiki_dirs.append('.qoder/repowiki/')
|
|
255
268
|
if os.path.isdir(code_dir):
|
|
256
269
|
for p in sorted(os.listdir(code_dir)):
|
|
257
270
|
if os.path.isdir(os.path.join(code_dir, p, '.qoder', 'repowiki')):
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
271
|
+
wiki_dirs.append('data/code/{}/.qoder/repowiki/'.format(p))
|
|
272
|
+
|
|
273
|
+
if wiki_results or wiki_dirs:
|
|
274
|
+
print('\n## 7. Repo Wiki (语义级模块文档, 比源码更精准)')
|
|
275
|
+
if wiki_results:
|
|
276
|
+
print(' 相关模块文档 ({} 条, 带行号引用, 优先读这些):'.format(len(wiki_results)))
|
|
277
|
+
for i, wr in enumerate(wiki_results[:5], 1):
|
|
278
|
+
print(' {}. [{}] {}'.format(i, wr.get('project','?'), wr.get('title','')))
|
|
279
|
+
if wr.get('dependent_files'):
|
|
280
|
+
print(' 核心文件: ' + ', '.join(wr['dependent_files'][:2]))
|
|
281
|
+
if wr.get('md_path'):
|
|
282
|
+
print(' 文档: ' + os.path.relpath(wr['md_path'], BASE))
|
|
283
|
+
elif wiki_dirs:
|
|
284
|
+
print(' Wiki 可用但无精确匹配, 手动浏览:')
|
|
285
|
+
for w in wiki_dirs:
|
|
286
|
+
print(' - ' + w)
|
|
263
287
|
|
|
264
288
|
|
|
265
289
|
if __name__ == '__main__':
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
4
|
Weekly Knowledge Graph Update
|
|
@@ -1053,6 +1053,25 @@ def main():
|
|
|
1053
1053
|
if not (sync_only or prd_only):
|
|
1054
1054
|
verify_result = verify_indexes(config)
|
|
1055
1055
|
|
|
1056
|
+
# Step 5.6: Build Repo Wiki index (Qoder IDE 生成的语义级模块文档)
|
|
1057
|
+
if not (sync_only or prd_only):
|
|
1058
|
+
try:
|
|
1059
|
+
from common.repowiki import build_wiki_index, find_repowiki_dirs
|
|
1060
|
+
code_dir = os.path.join(BASE, 'data', 'code')
|
|
1061
|
+
wiki_dirs = find_repowiki_dirs(code_dir)
|
|
1062
|
+
if wiki_dirs:
|
|
1063
|
+
print('\n=== Building Wiki Index (Repo Wiki 语义图谱) ===')
|
|
1064
|
+
out = os.path.join(INDEX_DIR, 'wiki-index.json')
|
|
1065
|
+
r = build_wiki_index(code_dir, out)
|
|
1066
|
+
if isinstance(r, dict) and r.get('stats'):
|
|
1067
|
+
s = r['stats']
|
|
1068
|
+
print(' Wiki: {} 项目 / {} 模块文档 / {} 关键词'.format(
|
|
1069
|
+
s.get('projects',0), s.get('pages',0), s.get('keywords',0)))
|
|
1070
|
+
else:
|
|
1071
|
+
print('\n=== Wiki Index: 无 Repo Wiki (可选, Qoder IDE 生成后自动纳入) ===')
|
|
1072
|
+
except Exception as e:
|
|
1073
|
+
print(' [WARN] Wiki index build failed (不阻塞): {}'.format(str(e)[:80]))
|
|
1074
|
+
|
|
1056
1075
|
# Step 6: Write meta (读 module-map 拿真实 project 数; counts 用已加载的 ki)
|
|
1057
1076
|
module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
|
|
1058
1077
|
if shared_ki is None:
|
|
@@ -464,6 +464,52 @@ def show_vben():
|
|
|
464
464
|
print()
|
|
465
465
|
|
|
466
466
|
|
|
467
|
+
def search_wiki_cli(query):
|
|
468
|
+
"""搜 Repo Wiki (Qoder IDE 生成的语义级模块文档)。"""
|
|
469
|
+
try:
|
|
470
|
+
from common.repowiki import search_wiki, build_wiki_index
|
|
471
|
+
from common.terms import get_cn_map_with_auto
|
|
472
|
+
except ImportError:
|
|
473
|
+
print('Wiki 搜索不可用 (common.repowiki 模块缺失)')
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
cn_map = get_cn_map_with_auto()
|
|
477
|
+
code_dir = os.path.join(BASE, 'data', 'code')
|
|
478
|
+
index_dir = os.path.join(BASE, 'data', 'index')
|
|
479
|
+
|
|
480
|
+
# 自动构建 wiki 索引 (若不存在)
|
|
481
|
+
wiki_idx_path = os.path.join(index_dir, 'wiki-index.json')
|
|
482
|
+
if not os.path.isfile(wiki_idx_path):
|
|
483
|
+
print('[构建] 首次使用, 正在从 Repo Wiki 构建语义索引...')
|
|
484
|
+
r = build_wiki_index(code_dir, wiki_idx_path)
|
|
485
|
+
if isinstance(r, dict) and r.get('stats'):
|
|
486
|
+
s = r['stats']
|
|
487
|
+
print(' 完成: {} 项目 / {} 模块文档 / {} 关键词\n'.format(
|
|
488
|
+
s.get('projects',0), s.get('pages',0), s.get('keywords',0)))
|
|
489
|
+
else:
|
|
490
|
+
print(' 未找到 Repo Wiki (data/code/*/.qoder/repowiki/)')
|
|
491
|
+
print(' 在 Qoder IDE 里对项目生成 Repo Wiki 后可用')
|
|
492
|
+
return
|
|
493
|
+
|
|
494
|
+
results = search_wiki(query, index_dir, code_dir, cn_map)
|
|
495
|
+
if not results:
|
|
496
|
+
print('No wiki pages match: {}'.format(query))
|
|
497
|
+
return
|
|
498
|
+
|
|
499
|
+
print('Wiki pages matching: {} ({} found)\n'.format(query, len(results)))
|
|
500
|
+
for i, r in enumerate(results, 1):
|
|
501
|
+
print(' {}. [{}] {}'.format(i, r.get('project','?'), r.get('title','')))
|
|
502
|
+
if r.get('dependent_files'):
|
|
503
|
+
deps = r['dependent_files'][:3]
|
|
504
|
+
print(' 依赖: ' + ', '.join(deps[:2]))
|
|
505
|
+
if len(r['dependent_files']) > 2:
|
|
506
|
+
print(' (共 {} 个文件)'.format(len(r['dependent_files'])))
|
|
507
|
+
if r.get('md_path'):
|
|
508
|
+
rel = os.path.relpath(r['md_path'], BASE)
|
|
509
|
+
print(' 文档: ' + rel)
|
|
510
|
+
print('\n 提示: 读 wiki 文档 (带行号引用) 比读源码更省 token')
|
|
511
|
+
|
|
512
|
+
|
|
467
513
|
def print_usage():
|
|
468
514
|
print('Usage:')
|
|
469
515
|
print(' search_index.py <keyword> [--platform web|app] - Search code')
|
|
@@ -472,6 +518,7 @@ def print_usage():
|
|
|
472
518
|
print(' search_index.py --field <name> - Field usage')
|
|
473
519
|
print(' search_index.py --api <keyword> - API endpoints')
|
|
474
520
|
print(' search_index.py --components - UI components')
|
|
521
|
+
print(' search_index.py --wiki <keyword> - Repo Wiki 语义模块文档 (精准)')
|
|
475
522
|
print(' search_index.py --modules - Project overview')
|
|
476
523
|
print(' search_index.py --list - Top 50 keywords')
|
|
477
524
|
print(' search_index.py --vben - Vben design tokens')
|
|
@@ -500,6 +547,8 @@ if __name__ == '__main__':
|
|
|
500
547
|
search_field(args[1])
|
|
501
548
|
elif args[0] == '--api' and len(args) >= 2:
|
|
502
549
|
search_api(args[1])
|
|
550
|
+
elif args[0] == '--wiki' and len(args) >= 2:
|
|
551
|
+
search_wiki_cli(args[1])
|
|
503
552
|
elif args[0] == '--components':
|
|
504
553
|
show_components()
|
|
505
554
|
elif args[0] == '--modules':
|