@hupan56/wlkj 2.2.5 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/qoder/scripts/common/repowiki.py +243 -0
- package/templates/qoder/scripts/common/terms.py +42 -2
- package/templates/qoder/scripts/context_pack.py +33 -9
- package/templates/qoder/scripts/git_sync.py +20 -1
- package/templates/qoder/scripts/search_index.py +172 -5
package/package.json
CHANGED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
repowiki.py - 读取 Qoder IDE 生成的 Repo Wiki, 转成搜索可用的索引。
|
|
4
|
+
|
|
5
|
+
Repo Wiki 是 Qoder IDE 用 LLM 对整个仓库生成的语义级知识图谱:
|
|
6
|
+
- wiki_catalogs: 模块/功能文档 (含 dependent_files 源文件映射)
|
|
7
|
+
- wiki_items: 文档标题/描述
|
|
8
|
+
- knowledge_relations: 模块间父子/依赖关系
|
|
9
|
+
- content/*.md: 带行号引用的人类可读文档
|
|
10
|
+
|
|
11
|
+
位置: data/code/{project}/.qoder/repowiki/{lang}/meta/repowiki-metadata.json
|
|
12
|
+
data/code/{project}/.qoder/repowiki/{lang}/content/{分类}/{页面}.md
|
|
13
|
+
|
|
14
|
+
本模块把这些转成:
|
|
15
|
+
- wiki-index.json: {关键词: [{project, title, catalog_id, md_path, dependent_files}]}
|
|
16
|
+
- 供 search_index --wiki 和 context_pack 使用
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Dict, List, Optional
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_repowiki_dirs(code_dir: str) -> List[tuple]:
|
|
27
|
+
"""扫描 data/code/{project}/.qoder/repowiki/, 返回 [(project, wiki_root), ...]。"""
|
|
28
|
+
results = []
|
|
29
|
+
if not os.path.isdir(code_dir):
|
|
30
|
+
return results
|
|
31
|
+
for proj in os.listdir(code_dir):
|
|
32
|
+
# 找 repowiki 目录 (可能是 zh/ 或 en/)
|
|
33
|
+
wiki_base = os.path.join(code_dir, proj, '.qoder', 'repowiki')
|
|
34
|
+
if not os.path.isdir(wiki_base):
|
|
35
|
+
continue
|
|
36
|
+
for lang in os.listdir(wiki_base):
|
|
37
|
+
lang_dir = os.path.join(wiki_base, lang)
|
|
38
|
+
meta = os.path.join(lang_dir, 'meta', 'repowiki-metadata.json')
|
|
39
|
+
if os.path.isfile(meta):
|
|
40
|
+
results.append((proj, lang_dir))
|
|
41
|
+
break # 每个项目只取一个语言版本
|
|
42
|
+
return results
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_wiki_metadata(wiki_root: str) -> Optional[dict]:
|
|
46
|
+
"""加载 repowiki-metadata.json。"""
|
|
47
|
+
meta_path = os.path.join(wiki_root, 'meta', 'repowiki-metadata.json')
|
|
48
|
+
try:
|
|
49
|
+
with open(meta_path, encoding='utf-8') as f:
|
|
50
|
+
return json.load(f)
|
|
51
|
+
except (OSError, json.JSONDecodeError):
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _extract_keywords(text: str) -> List[str]:
|
|
56
|
+
"""从标题/描述提取搜索关键词 (中文分词 + 英文小写)。"""
|
|
57
|
+
if not text:
|
|
58
|
+
return []
|
|
59
|
+
words = set()
|
|
60
|
+
# 英文: 按非字母数字分割, 保留长度>=3的
|
|
61
|
+
for tok in re.split(r'[^a-zA-Z0-9]+', text):
|
|
62
|
+
tok = tok.strip().lower()
|
|
63
|
+
if len(tok) >= 3:
|
|
64
|
+
words.add(tok)
|
|
65
|
+
# 中文: 整段作为关键词 (让 CN_MAP 在搜索时做分词扩展)
|
|
66
|
+
# 提取连续中文片段
|
|
67
|
+
for m in re.finditer(r'[\u4e00-\u9fff]{2,}', text):
|
|
68
|
+
words.add(m.group())
|
|
69
|
+
return list(words)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def build_wiki_index(code_dir: str, output_path: str) -> dict:
|
|
73
|
+
"""扫描所有项目的 Repo Wiki, 构建 wiki-index.json。
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
构建的索引 dict: {keyword: [{project, title, catalog_id, md_path, dependent_files, description}]}
|
|
77
|
+
"""
|
|
78
|
+
wiki_dirs = find_repowiki_dirs(code_dir)
|
|
79
|
+
if not wiki_dirs:
|
|
80
|
+
return {}
|
|
81
|
+
|
|
82
|
+
index = {} # keyword -> [entry, ...]
|
|
83
|
+
stats = {'projects': 0, 'pages': 0, 'keywords': 0}
|
|
84
|
+
|
|
85
|
+
for project, wiki_root in wiki_dirs:
|
|
86
|
+
meta = load_wiki_metadata(wiki_root)
|
|
87
|
+
if not meta:
|
|
88
|
+
continue
|
|
89
|
+
stats['projects'] += 1
|
|
90
|
+
|
|
91
|
+
catalogs = meta.get('wiki_catalogs', [])
|
|
92
|
+
items = meta.get('wiki_items', [])
|
|
93
|
+
# catalog_id -> title 映射
|
|
94
|
+
id_to_title = {it.get('catalog_id'): it.get('title', '') for it in items}
|
|
95
|
+
|
|
96
|
+
for cat in catalogs:
|
|
97
|
+
cat_id = cat.get('id', '')
|
|
98
|
+
name = cat.get('name', '')
|
|
99
|
+
desc = cat.get('description', '')
|
|
100
|
+
dep_files = cat.get('dependent_files', '')
|
|
101
|
+
title = id_to_title.get(cat_id, name)
|
|
102
|
+
|
|
103
|
+
# 找对应的 markdown 文档
|
|
104
|
+
md_path = _find_md_for_catalog(wiki_root, cat_id, name)
|
|
105
|
+
if not md_path:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
stats['pages'] += 1
|
|
109
|
+
|
|
110
|
+
# 提取关键词 (标题 + 描述 + 依赖文件名)
|
|
111
|
+
kw_text = ' '.join([title, name, desc])
|
|
112
|
+
# 依赖文件也作为关键词 (异常.java → 搜"异常"能命中)
|
|
113
|
+
if dep_files:
|
|
114
|
+
kw_text += ' ' + dep_files.replace(',', ' ').replace('/', ' ')
|
|
115
|
+
keywords = _extract_keywords(kw_text)
|
|
116
|
+
|
|
117
|
+
entry = {
|
|
118
|
+
'project': project,
|
|
119
|
+
'title': title or name,
|
|
120
|
+
'catalog_id': cat_id,
|
|
121
|
+
'md_path': md_path,
|
|
122
|
+
'dependent_files': [f.strip() for f in dep_files.split(',') if f.strip()] if dep_files else [],
|
|
123
|
+
'description': desc,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
for kw in keywords:
|
|
127
|
+
index.setdefault(kw, [])
|
|
128
|
+
if entry not in index[kw]:
|
|
129
|
+
index[kw].append(entry)
|
|
130
|
+
|
|
131
|
+
stats['keywords'] = len(index)
|
|
132
|
+
|
|
133
|
+
# 写入
|
|
134
|
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
135
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
136
|
+
json.dump({'index': index, 'stats': stats}, f, ensure_ascii=False, indent=2)
|
|
137
|
+
|
|
138
|
+
return {'index': index, 'stats': stats}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _find_md_for_catalog(wiki_root: str, catalog_id: str, name: str) -> Optional[str]:
|
|
142
|
+
"""在 content/ 目录下找对应的 markdown 文件。
|
|
143
|
+
|
|
144
|
+
文件名通常等于 catalog name (.md), 在某个子分类目录下。
|
|
145
|
+
"""
|
|
146
|
+
content_dir = os.path.join(wiki_root, 'content')
|
|
147
|
+
if not os.path.isdir(content_dir):
|
|
148
|
+
return None
|
|
149
|
+
target = name + '.md'
|
|
150
|
+
for root, dirs, files in os.walk(content_dir):
|
|
151
|
+
if target in files:
|
|
152
|
+
return os.path.join(root, target)
|
|
153
|
+
# 模糊匹配: name 的第一段
|
|
154
|
+
short = name.split('-')[0].split('(')[0].strip()
|
|
155
|
+
if short and short != name:
|
|
156
|
+
target2 = short + '.md'
|
|
157
|
+
for root, dirs, files in os.walk(content_dir):
|
|
158
|
+
for fn in files:
|
|
159
|
+
if fn == target2 or short in fn:
|
|
160
|
+
return os.path.join(root, fn)
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def load_wiki_index(index_dir: str) -> dict:
|
|
165
|
+
"""加载已构建的 wiki-index.json。返回 {} 表示不存在。"""
|
|
166
|
+
path = os.path.join(index_dir, 'wiki-index.json')
|
|
167
|
+
try:
|
|
168
|
+
with open(path, encoding='utf-8') as f:
|
|
169
|
+
data = json.load(f)
|
|
170
|
+
return data.get('index', {}) if isinstance(data, dict) else data
|
|
171
|
+
except (OSError, json.JSONDecodeError):
|
|
172
|
+
return {}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def search_wiki(query: str, index_dir: str, code_dir: str,
|
|
176
|
+
cn_map: Optional[dict] = None) -> List[dict]:
|
|
177
|
+
"""在 Repo Wiki 索引里搜索。
|
|
178
|
+
|
|
179
|
+
自动扩展中文查询 (用 cn_map), 返回匹配的 wiki 条目列表。
|
|
180
|
+
如果 wiki-index.json 不存在, 自动构建。
|
|
181
|
+
"""
|
|
182
|
+
wiki_idx = load_wiki_index(index_dir)
|
|
183
|
+
if not wiki_idx:
|
|
184
|
+
# 自动构建
|
|
185
|
+
out = os.path.join(index_dir, 'wiki-index.json')
|
|
186
|
+
result = build_wiki_index(code_dir, out)
|
|
187
|
+
if isinstance(result, dict):
|
|
188
|
+
wiki_idx = result.get('index', {})
|
|
189
|
+
else:
|
|
190
|
+
wiki_idx = result
|
|
191
|
+
|
|
192
|
+
if not wiki_idx:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
# 扩展查询词 (复用 search_index 的 CN_MAP 分词逻辑)
|
|
196
|
+
words = set()
|
|
197
|
+
q_lower = query.lower()
|
|
198
|
+
words.add(q_lower)
|
|
199
|
+
# 中文分词: 用 cn_map 拆
|
|
200
|
+
if cn_map:
|
|
201
|
+
remaining = query
|
|
202
|
+
for cn in sorted(cn_map.keys(), key=len, reverse=True):
|
|
203
|
+
if cn in remaining:
|
|
204
|
+
for en in cn_map[cn].split():
|
|
205
|
+
words.add(en.lower())
|
|
206
|
+
remaining = remaining.replace(cn, ' ')
|
|
207
|
+
# 剩余英文片段
|
|
208
|
+
for frag in remaining.split():
|
|
209
|
+
if frag and frag.isascii():
|
|
210
|
+
words.add(frag.lower())
|
|
211
|
+
# 中文整段也加
|
|
212
|
+
for m in re.finditer(r'[\u4e00-\u9fff]{2,}', query):
|
|
213
|
+
words.add(m.group())
|
|
214
|
+
|
|
215
|
+
# 搜索: 子串匹配, 标题命中优先 (TF 排序)
|
|
216
|
+
scored = [] # (score, entry)
|
|
217
|
+
seen_titles = set()
|
|
218
|
+
for kw, entries in wiki_idx.items():
|
|
219
|
+
kw_lower = kw.lower()
|
|
220
|
+
for word in words:
|
|
221
|
+
if word in kw_lower or kw_lower in word:
|
|
222
|
+
for entry in entries:
|
|
223
|
+
key = (entry.get('project'), entry.get('title'))
|
|
224
|
+
if key in seen_titles:
|
|
225
|
+
continue
|
|
226
|
+
# 打分: 标题命中 = 10分, 描述命中 = 5分, 依赖文件命中 = 1分
|
|
227
|
+
title = (entry.get('title') or '').lower()
|
|
228
|
+
desc = (entry.get('description') or '').lower()
|
|
229
|
+
score = 0
|
|
230
|
+
if word in title or title in word:
|
|
231
|
+
score = 10
|
|
232
|
+
elif word in desc:
|
|
233
|
+
score = 5
|
|
234
|
+
else:
|
|
235
|
+
score = 1 # 依赖文件命中
|
|
236
|
+
if score > 0:
|
|
237
|
+
seen_titles.add(key)
|
|
238
|
+
scored.append((score, entry))
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
# 按分数降序
|
|
242
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
243
|
+
return [entry for _, entry in scored[:15]]
|
|
@@ -64,6 +64,37 @@ CN_MAP = {
|
|
|
64
64
|
'分类': 'category classify', '类型': 'type',
|
|
65
65
|
'地区': 'region area', '区域': 'zone region',
|
|
66
66
|
'时间': 'time date', '日期': 'date',
|
|
67
|
+
# 异常/品质/合规 (高频但之前缺失, 导致"异常记录"搜不到)
|
|
68
|
+
'异常': 'abnormal anomaly exception', '故障': 'fault error failure',
|
|
69
|
+
'报警': 'alarm alert warning', '违规': 'violation breach',
|
|
70
|
+
'合规': 'compliance', '风险': 'risk hazard',
|
|
71
|
+
'处理': 'handle process deal', '反馈': 'feedback',
|
|
72
|
+
# 筛选/查询 (PRD 高频, "新增筛选条件"类需求)
|
|
73
|
+
'筛选': 'filter search', '过滤': 'filter',
|
|
74
|
+
'搜索': 'search query', '查询': 'query search find',
|
|
75
|
+
'条件': 'condition filter criteria',
|
|
76
|
+
'排序': 'sort order', '分组': 'group',
|
|
77
|
+
# 页面/交互
|
|
78
|
+
'列表': 'list table', '详情': 'detail info',
|
|
79
|
+
'弹窗': 'modal dialog popup', '抽屉': 'drawer',
|
|
80
|
+
'表单': 'form', '表格': 'table grid',
|
|
81
|
+
'看板': 'dashboard board panel', '大屏': 'screen monitor dashboard',
|
|
82
|
+
'图表': 'chart graph', '柱状图': 'bar chart',
|
|
83
|
+
'饼图': 'pie chart', '折线图': 'line chart',
|
|
84
|
+
# 项目类型/业务对象 (本次需求触发)
|
|
85
|
+
'项目类型': 'projectType project type',
|
|
86
|
+
'业务对象': 'business object entity',
|
|
87
|
+
# 导入导出增强
|
|
88
|
+
'上传': 'upload import', '下载': 'download export',
|
|
89
|
+
'打印': 'print', '预览': 'preview',
|
|
90
|
+
# 人员/身份增强
|
|
91
|
+
'人员': 'person personnel staff user',
|
|
92
|
+
'负责人': 'owner manager charge',
|
|
93
|
+
'驾驶员': 'driver',
|
|
94
|
+
# 地图/位置
|
|
95
|
+
'地图': 'map gis',
|
|
96
|
+
'位置': 'location position gps',
|
|
97
|
+
'轨迹': 'track trajectory gps',
|
|
67
98
|
}
|
|
68
99
|
|
|
69
100
|
# Chinese business term -> code path/identifier patterns.
|
|
@@ -120,6 +151,15 @@ BUSINESS_PATH_MAP = {
|
|
|
120
151
|
'消息': ['message', 'Message', 'notify', 'Notify'],
|
|
121
152
|
'配置': ['config', 'Config', 'setting', 'Setting'],
|
|
122
153
|
'字典': ['dict', 'Dict', 'dictionary', 'Dictionary'],
|
|
154
|
+
# 异常/品质/合规 (新增, 解决"异常记录"搜不到)
|
|
155
|
+
'异常': ['abnormal', 'Abnormal', 'anomaly', 'Anomaly', 'exception', 'Exception'],
|
|
156
|
+
'故障': ['fault', 'Fault', 'error', 'Error', 'failure', 'Failure'],
|
|
157
|
+
'合规': ['compliance', 'Compliance'],
|
|
158
|
+
'看板': ['dashboard', 'Dashboard', 'board', 'Board', 'panel', 'Panel'],
|
|
159
|
+
'大屏': ['screen', 'Screen', 'monitor', 'Monitor', 'dashboard'],
|
|
160
|
+
'地图': ['map', 'Map', 'gis', 'GIS'],
|
|
161
|
+
'轨迹': ['track', 'Track', 'trajectory', 'gps', 'GPS'],
|
|
162
|
+
'筛选': ['filter', 'Filter', 'search', 'Search'],
|
|
123
163
|
}
|
|
124
164
|
|
|
125
165
|
# Chinese term -> single primary English keyword (derived from CN_MAP).
|
|
@@ -178,8 +218,8 @@ PRD_STOP_WORDS = {
|
|
|
178
218
|
'以及', '如果', '那么', '但是', '因为', '所以', '用户', '系统', '页面', '数据',
|
|
179
219
|
'信息', '操作', '管理', '列表', '详情', '新增', '修改', '删除', '查询', '搜索',
|
|
180
220
|
'筛选', '点击', '选择', '输入', '确认', '取消', '保存', '提交', '返回', '跳转',
|
|
181
|
-
'当前', '前端', '后端', '影响', '范围', '下拉', '条件', '字段', '全部',
|
|
182
|
-
'正常', '
|
|
221
|
+
'当前', '前端', '后端', '影响', '范围', '下拉', '条件', '字段', '全部',
|
|
222
|
+
'正常', '面板', '统计', '表页', '搜索表', '与其他', '表单', '背景',
|
|
183
223
|
'一个', '这个', '那个', '通过', '根据', '按照', '同时', '并且',
|
|
184
224
|
'要求', '需求', '描述', '说明', '备注', '注意', '重要', '优先', '级别',
|
|
185
225
|
'方案', '设计', '开发', '测试', '上线', '版本', '迭代', '更新', '发布',
|
|
@@ -247,19 +247,43 @@ def _compute_and_print(query, platform, page_type):
|
|
|
247
247
|
print('- 模板: .qoder/templates/prototype-web.html / prototype-app.html')
|
|
248
248
|
print('- PRD 模板: .qoder/templates/prd-full-template.md / prd-quick-template.md')
|
|
249
249
|
|
|
250
|
-
# ---- 7. Repo Wiki ----
|
|
251
|
-
wikis = []
|
|
252
|
-
if os.path.isdir(os.path.join(BASE, '.qoder', 'repowiki')):
|
|
253
|
-
wikis.append('.qoder/repowiki/')
|
|
250
|
+
# ---- 7. Repo Wiki (语义级模块文档, 优先参考) ----
|
|
254
251
|
code_dir = os.path.join(BASE, 'data', 'code')
|
|
252
|
+
wiki_results = []
|
|
253
|
+
try:
|
|
254
|
+
from common.repowiki import search_wiki, build_wiki_index
|
|
255
|
+
from common.terms import get_cn_map_with_auto
|
|
256
|
+
wiki_idx_path = os.path.join(INDEX_DIR, 'wiki-index.json')
|
|
257
|
+
if not os.path.isfile(wiki_idx_path):
|
|
258
|
+
build_wiki_index(code_dir, wiki_idx_path)
|
|
259
|
+
cn_map = get_cn_map_with_auto()
|
|
260
|
+
wiki_results = search_wiki(query, INDEX_DIR, code_dir, cn_map)
|
|
261
|
+
except Exception:
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
# 也列出 wiki 目录 (供手动浏览)
|
|
265
|
+
wiki_dirs = []
|
|
266
|
+
if os.path.isdir(os.path.join(BASE, '.qoder', 'repowiki')):
|
|
267
|
+
wiki_dirs.append('.qoder/repowiki/')
|
|
255
268
|
if os.path.isdir(code_dir):
|
|
256
269
|
for p in sorted(os.listdir(code_dir)):
|
|
257
270
|
if os.path.isdir(os.path.join(code_dir, p, '.qoder', 'repowiki')):
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
271
|
+
wiki_dirs.append('data/code/{}/.qoder/repowiki/'.format(p))
|
|
272
|
+
|
|
273
|
+
if wiki_results or wiki_dirs:
|
|
274
|
+
print('\n## 7. Repo Wiki (语义级模块文档, 比源码更精准)')
|
|
275
|
+
if wiki_results:
|
|
276
|
+
print(' 相关模块文档 ({} 条, 带行号引用, 优先读这些):'.format(len(wiki_results)))
|
|
277
|
+
for i, wr in enumerate(wiki_results[:5], 1):
|
|
278
|
+
print(' {}. [{}] {}'.format(i, wr.get('project','?'), wr.get('title','')))
|
|
279
|
+
if wr.get('dependent_files'):
|
|
280
|
+
print(' 核心文件: ' + ', '.join(wr['dependent_files'][:2]))
|
|
281
|
+
if wr.get('md_path'):
|
|
282
|
+
print(' 文档: ' + os.path.relpath(wr['md_path'], BASE))
|
|
283
|
+
elif wiki_dirs:
|
|
284
|
+
print(' Wiki 可用但无精确匹配, 手动浏览:')
|
|
285
|
+
for w in wiki_dirs:
|
|
286
|
+
print(' - ' + w)
|
|
263
287
|
|
|
264
288
|
|
|
265
289
|
if __name__ == '__main__':
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
4
|
Weekly Knowledge Graph Update
|
|
@@ -1053,6 +1053,25 @@ def main():
|
|
|
1053
1053
|
if not (sync_only or prd_only):
|
|
1054
1054
|
verify_result = verify_indexes(config)
|
|
1055
1055
|
|
|
1056
|
+
# Step 5.6: Build Repo Wiki index (Qoder IDE 生成的语义级模块文档)
|
|
1057
|
+
if not (sync_only or prd_only):
|
|
1058
|
+
try:
|
|
1059
|
+
from common.repowiki import build_wiki_index, find_repowiki_dirs
|
|
1060
|
+
code_dir = os.path.join(BASE, 'data', 'code')
|
|
1061
|
+
wiki_dirs = find_repowiki_dirs(code_dir)
|
|
1062
|
+
if wiki_dirs:
|
|
1063
|
+
print('\n=== Building Wiki Index (Repo Wiki 语义图谱) ===')
|
|
1064
|
+
out = os.path.join(INDEX_DIR, 'wiki-index.json')
|
|
1065
|
+
r = build_wiki_index(code_dir, out)
|
|
1066
|
+
if isinstance(r, dict) and r.get('stats'):
|
|
1067
|
+
s = r['stats']
|
|
1068
|
+
print(' Wiki: {} 项目 / {} 模块文档 / {} 关键词'.format(
|
|
1069
|
+
s.get('projects',0), s.get('pages',0), s.get('keywords',0)))
|
|
1070
|
+
else:
|
|
1071
|
+
print('\n=== Wiki Index: 无 Repo Wiki (可选, Qoder IDE 生成后自动纳入) ===')
|
|
1072
|
+
except Exception as e:
|
|
1073
|
+
print(' [WARN] Wiki index build failed (不阻塞): {}'.format(str(e)[:80]))
|
|
1074
|
+
|
|
1056
1075
|
# Step 6: Write meta (读 module-map 拿真实 project 数; counts 用已加载的 ki)
|
|
1057
1076
|
module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
|
|
1058
1077
|
if shared_ki is None:
|
|
@@ -78,7 +78,82 @@ def _search_cache_key(query, platform):
|
|
|
78
78
|
return hashlib.md5(raw.encode('utf-8')).hexdigest()[:16]
|
|
79
79
|
|
|
80
80
|
|
|
81
|
+
def _check_index_freshness():
|
|
82
|
+
"""轻量检查索引是否过期 (代码改了但索引没更新)。
|
|
83
|
+
|
|
84
|
+
抽样 data/code/ 下最新修改的文件, 与 .index-meta.json 的 last_sync 比。
|
|
85
|
+
过期则打印提示 (不阻塞搜索)。
|
|
86
|
+
用一个静默文件标记避免每次搜索都提示 (.runtime/.index-stale-warned)。
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
import datetime as _dt
|
|
90
|
+
meta_path = os.path.join(BASE, 'data', 'index', '.index-meta.json')
|
|
91
|
+
if not os.path.isfile(meta_path):
|
|
92
|
+
return
|
|
93
|
+
with open(meta_path, encoding='utf-8') as f:
|
|
94
|
+
meta = json.load(f)
|
|
95
|
+
last_sync_str = meta.get('last_sync', '')
|
|
96
|
+
if not last_sync_str:
|
|
97
|
+
return
|
|
98
|
+
last_sync = _dt.datetime.strptime(last_sync_str, '%Y-%m-%d %H:%M')
|
|
99
|
+
|
|
100
|
+
# 抽样: 找 data/code/ 下最新修改的 .java/.vue/.js 文件 (只扫一层子目录的最近文件)
|
|
101
|
+
code_dir = os.path.join(BASE, 'data', 'code')
|
|
102
|
+
if not os.path.isdir(code_dir):
|
|
103
|
+
return
|
|
104
|
+
newest_mtime = 0
|
|
105
|
+
for proj in os.listdir(code_dir):
|
|
106
|
+
proj_dir = os.path.join(code_dir, proj)
|
|
107
|
+
if not os.path.isdir(proj_dir):
|
|
108
|
+
continue
|
|
109
|
+
# 只看 src 目录下的文件 (避免 .git 干扰)
|
|
110
|
+
src_dir = os.path.join(proj_dir, 'src')
|
|
111
|
+
if not os.path.isdir(src_dir):
|
|
112
|
+
continue
|
|
113
|
+
# 抽样: os.walk 只走一层 (限制深度), 找最新 mtime
|
|
114
|
+
for root, dirs, files in os.walk(src_dir):
|
|
115
|
+
# 限制深度: 只走 3 层 (够采样, 不全扫)
|
|
116
|
+
depth = root[len(src_dir):].count(os.sep)
|
|
117
|
+
if depth >= 3:
|
|
118
|
+
dirs[:] = []
|
|
119
|
+
continue
|
|
120
|
+
for fn in files:
|
|
121
|
+
if fn.endswith(('.java', '.vue', '.js', '.ts')):
|
|
122
|
+
try:
|
|
123
|
+
mt = os.path.getmtime(os.path.join(root, fn))
|
|
124
|
+
if mt > newest_mtime:
|
|
125
|
+
newest_mtime = mt
|
|
126
|
+
except OSError:
|
|
127
|
+
pass
|
|
128
|
+
if newest_mtime > 0:
|
|
129
|
+
break # 拿到一个项目的最新就够了
|
|
130
|
+
|
|
131
|
+
if newest_mtime <= 0:
|
|
132
|
+
return
|
|
133
|
+
newest_dt = _dt.datetime.fromtimestamp(newest_mtime)
|
|
134
|
+
# 过期判定: 代码文件比索引新超过 1 小时
|
|
135
|
+
if (newest_dt - last_sync).total_seconds() > 3600:
|
|
136
|
+
# 静默标记: 避免每次搜索都提示 (索引更新后标记自动失效)
|
|
137
|
+
warned_path = os.path.join(BASE, '.qoder', '.runtime', '.index-stale-warned')
|
|
138
|
+
warn_mtime = os.path.getmtime(warned_path) if os.path.isfile(warned_path) else 0
|
|
139
|
+
meta_mtime = os.path.getmtime(meta_path)
|
|
140
|
+
if warn_mtime > meta_mtime:
|
|
141
|
+
return # 已经提示过了 (在本次索引周期内)
|
|
142
|
+
print('[提示] 代码有更新但知识图谱未刷新 (上次索引: %s)' % last_sync_str)
|
|
143
|
+
print(' 刷新: python .qoder/scripts/git_sync.py --index-only')
|
|
144
|
+
try:
|
|
145
|
+
os.makedirs(os.path.dirname(warned_path), exist_ok=True)
|
|
146
|
+
open(warned_path, 'w').close()
|
|
147
|
+
except OSError:
|
|
148
|
+
pass
|
|
149
|
+
except Exception:
|
|
150
|
+
pass # 新鲜度检查失败不阻塞搜索
|
|
151
|
+
|
|
152
|
+
|
|
81
153
|
def search_keywords(query, platform=None):
|
|
154
|
+
# 新鲜度检查 (代码改了但索引没更新 → 提示, 不阻塞)
|
|
155
|
+
_check_index_freshness()
|
|
156
|
+
|
|
82
157
|
# 结果缓存: 同 query+platform+索引未变 → 直接返回缓存输出
|
|
83
158
|
cache_key = _search_cache_key(query, platform)
|
|
84
159
|
cache_path = os.path.join(BASE, '.qoder', '.runtime', 'search-cache-%s.txt' % cache_key)
|
|
@@ -114,6 +189,53 @@ def search_keywords(query, platform=None):
|
|
|
114
189
|
pass
|
|
115
190
|
|
|
116
191
|
|
|
192
|
+
def _expand_chinese_query(query):
|
|
193
|
+
"""把中文查询拆成英文搜索词。
|
|
194
|
+
|
|
195
|
+
三级匹配:
|
|
196
|
+
1. 精确匹配: "异常" 在 CN_MAP → 直接扩展
|
|
197
|
+
2. 分词匹配: "异常记录" → 拆成 "异常" + "记录", 各自扩展
|
|
198
|
+
(贪心最长匹配: 从长到短扫 CN_MAP 的 key)
|
|
199
|
+
3. 兜底: 拆不出来的部分, 原样小写 (让英文子串索引做匹配)
|
|
200
|
+
|
|
201
|
+
同时合并 BUSINESS_PATH_MAP 的路径模式。
|
|
202
|
+
"""
|
|
203
|
+
words = []
|
|
204
|
+
# 合并 CN_MAP + PRD 自动扩展
|
|
205
|
+
try:
|
|
206
|
+
from common.terms import get_cn_map_with_auto
|
|
207
|
+
cn_map = get_cn_map_with_auto()
|
|
208
|
+
except ImportError:
|
|
209
|
+
cn_map = CN_MAP
|
|
210
|
+
|
|
211
|
+
# 1. 精确匹配
|
|
212
|
+
if query in cn_map:
|
|
213
|
+
words.extend(cn_map[query].split())
|
|
214
|
+
if query in BUSINESS_PATH_MAP:
|
|
215
|
+
words.extend(p.strip('/-').lower() for p in BUSINESS_PATH_MAP[query])
|
|
216
|
+
|
|
217
|
+
# 2. 分词匹配 (查询比单个 CN_MAP key 长 → 需要拆词)
|
|
218
|
+
if not words and len(query) >= 3:
|
|
219
|
+
# 贪心最长匹配: 把 CN_MAP keys 按长度降序排, 逐个从 query 里找
|
|
220
|
+
remaining = query
|
|
221
|
+
matched_segments = []
|
|
222
|
+
for cn in sorted(cn_map.keys(), key=len, reverse=True):
|
|
223
|
+
if cn in remaining:
|
|
224
|
+
matched_segments.append(cn)
|
|
225
|
+
remaining = remaining.replace(cn, ' ')
|
|
226
|
+
if matched_segments:
|
|
227
|
+
for seg in matched_segments:
|
|
228
|
+
words.extend(cn_map[seg].split())
|
|
229
|
+
if seg in BUSINESS_PATH_MAP:
|
|
230
|
+
words.extend(p.strip('/-').lower() for p in BUSINESS_PATH_MAP[seg])
|
|
231
|
+
# 剩余的英文/数字片段也加进去 (兜底)
|
|
232
|
+
for frag in remaining.split():
|
|
233
|
+
if frag and frag.isascii():
|
|
234
|
+
words.append(frag.lower())
|
|
235
|
+
|
|
236
|
+
return words
|
|
237
|
+
|
|
238
|
+
|
|
117
239
|
def _search_keywords_impl(query, platform=None):
|
|
118
240
|
"""实际的搜索逻辑 (原 search_keywords 内容)。"""
|
|
119
241
|
ki = load_index('keyword-index.json', hint='Run: python git_sync.py')
|
|
@@ -121,11 +243,7 @@ def _search_keywords_impl(query, platform=None):
|
|
|
121
243
|
return
|
|
122
244
|
|
|
123
245
|
# Expand Chinese query via the shared term maps (same maps used at build time)
|
|
124
|
-
words =
|
|
125
|
-
if query in CN_MAP:
|
|
126
|
-
words.extend(CN_MAP[query].split())
|
|
127
|
-
if query in BUSINESS_PATH_MAP:
|
|
128
|
-
words.extend(p.strip('/-').lower() for p in BUSINESS_PATH_MAP[query])
|
|
246
|
+
words = _expand_chinese_query(query)
|
|
129
247
|
if words:
|
|
130
248
|
words = sorted(set(w.lower() for w in words))
|
|
131
249
|
print('Chinese: {} -> English: {}'.format(query, ' '.join(words)))
|
|
@@ -346,6 +464,52 @@ def show_vben():
|
|
|
346
464
|
print()
|
|
347
465
|
|
|
348
466
|
|
|
467
|
+
def search_wiki_cli(query):
|
|
468
|
+
"""搜 Repo Wiki (Qoder IDE 生成的语义级模块文档)。"""
|
|
469
|
+
try:
|
|
470
|
+
from common.repowiki import search_wiki, build_wiki_index
|
|
471
|
+
from common.terms import get_cn_map_with_auto
|
|
472
|
+
except ImportError:
|
|
473
|
+
print('Wiki 搜索不可用 (common.repowiki 模块缺失)')
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
cn_map = get_cn_map_with_auto()
|
|
477
|
+
code_dir = os.path.join(BASE, 'data', 'code')
|
|
478
|
+
index_dir = os.path.join(BASE, 'data', 'index')
|
|
479
|
+
|
|
480
|
+
# 自动构建 wiki 索引 (若不存在)
|
|
481
|
+
wiki_idx_path = os.path.join(index_dir, 'wiki-index.json')
|
|
482
|
+
if not os.path.isfile(wiki_idx_path):
|
|
483
|
+
print('[构建] 首次使用, 正在从 Repo Wiki 构建语义索引...')
|
|
484
|
+
r = build_wiki_index(code_dir, wiki_idx_path)
|
|
485
|
+
if isinstance(r, dict) and r.get('stats'):
|
|
486
|
+
s = r['stats']
|
|
487
|
+
print(' 完成: {} 项目 / {} 模块文档 / {} 关键词\n'.format(
|
|
488
|
+
s.get('projects',0), s.get('pages',0), s.get('keywords',0)))
|
|
489
|
+
else:
|
|
490
|
+
print(' 未找到 Repo Wiki (data/code/*/.qoder/repowiki/)')
|
|
491
|
+
print(' 在 Qoder IDE 里对项目生成 Repo Wiki 后可用')
|
|
492
|
+
return
|
|
493
|
+
|
|
494
|
+
results = search_wiki(query, index_dir, code_dir, cn_map)
|
|
495
|
+
if not results:
|
|
496
|
+
print('No wiki pages match: {}'.format(query))
|
|
497
|
+
return
|
|
498
|
+
|
|
499
|
+
print('Wiki pages matching: {} ({} found)\n'.format(query, len(results)))
|
|
500
|
+
for i, r in enumerate(results, 1):
|
|
501
|
+
print(' {}. [{}] {}'.format(i, r.get('project','?'), r.get('title','')))
|
|
502
|
+
if r.get('dependent_files'):
|
|
503
|
+
deps = r['dependent_files'][:3]
|
|
504
|
+
print(' 依赖: ' + ', '.join(deps[:2]))
|
|
505
|
+
if len(r['dependent_files']) > 2:
|
|
506
|
+
print(' (共 {} 个文件)'.format(len(r['dependent_files'])))
|
|
507
|
+
if r.get('md_path'):
|
|
508
|
+
rel = os.path.relpath(r['md_path'], BASE)
|
|
509
|
+
print(' 文档: ' + rel)
|
|
510
|
+
print('\n 提示: 读 wiki 文档 (带行号引用) 比读源码更省 token')
|
|
511
|
+
|
|
512
|
+
|
|
349
513
|
def print_usage():
|
|
350
514
|
print('Usage:')
|
|
351
515
|
print(' search_index.py <keyword> [--platform web|app] - Search code')
|
|
@@ -354,6 +518,7 @@ def print_usage():
|
|
|
354
518
|
print(' search_index.py --field <name> - Field usage')
|
|
355
519
|
print(' search_index.py --api <keyword> - API endpoints')
|
|
356
520
|
print(' search_index.py --components - UI components')
|
|
521
|
+
print(' search_index.py --wiki <keyword> - Repo Wiki 语义模块文档 (精准)')
|
|
357
522
|
print(' search_index.py --modules - Project overview')
|
|
358
523
|
print(' search_index.py --list - Top 50 keywords')
|
|
359
524
|
print(' search_index.py --vben - Vben design tokens')
|
|
@@ -382,6 +547,8 @@ if __name__ == '__main__':
|
|
|
382
547
|
search_field(args[1])
|
|
383
548
|
elif args[0] == '--api' and len(args) >= 2:
|
|
384
549
|
search_api(args[1])
|
|
550
|
+
elif args[0] == '--wiki' and len(args) >= 2:
|
|
551
|
+
search_wiki_cli(args[1])
|
|
385
552
|
elif args[0] == '--components':
|
|
386
553
|
show_components()
|
|
387
554
|
elif args[0] == '--modules':
|