@chongyan/autospec 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. package/LICENSE +21 -0
  2. package/README.en.md +472 -0
  3. package/README.md +476 -0
  4. package/bin/autospec.js +3 -0
  5. package/knowledge/README.md +144 -0
  6. package/knowledge/checklists/code.md +182 -0
  7. package/knowledge/checklists/design.md +196 -0
  8. package/knowledge/checklists/release.md +70 -0
  9. package/knowledge/checklists/requirement.md +169 -0
  10. package/knowledge/checklists/test.md +46 -0
  11. package/knowledge/config/README.en.md +44 -0
  12. package/knowledge/config/README.md +44 -0
  13. package/knowledge/config/role-composition.yaml +98 -0
  14. package/knowledge/config/role-extensions.yaml +140 -0
  15. package/knowledge/config/skill-compositions.yaml +142 -0
  16. package/knowledge/config/team-stage.yaml +95 -0
  17. package/knowledge/config/team-tasks.yaml +139 -0
  18. package/knowledge/config/team-triggers.yaml +198 -0
  19. package/knowledge/config/validation-patterns.yaml +137 -0
  20. package/knowledge/domain/README.md +115 -0
  21. package/knowledge/domain/flows/README.md +194 -0
  22. package/knowledge/domain/glossary.md +143 -0
  23. package/knowledge/domain/rules.md +138 -0
  24. package/knowledge/environment/README.en.md +36 -0
  25. package/knowledge/environment/README.md +87 -0
  26. package/knowledge/environment/component-knowledge.md +316 -0
  27. package/knowledge/environment/detection-patterns.yaml +502 -0
  28. package/knowledge/environment/middleware-knowledge.md +237 -0
  29. package/knowledge/environment/template-registry.md +321 -0
  30. package/knowledge/guides/domain-driven-design.md +345 -0
  31. package/knowledge/guides/knowledge-management.md +369 -0
  32. package/knowledge/guides/requirement-engineering.md +329 -0
  33. package/knowledge/guides/stages/ai-effect-evaluator.md +93 -0
  34. package/knowledge/guides/stages/code-implementer.md +205 -0
  35. package/knowledge/guides/stages/code-reviewer.md +111 -0
  36. package/knowledge/guides/stages/consistency-checker.md +177 -0
  37. package/knowledge/guides/stages/design-planner.md +401 -0
  38. package/knowledge/guides/stages/design-reviewer.md +83 -0
  39. package/knowledge/guides/stages/integration-test-runner.md +105 -0
  40. package/knowledge/guides/stages/release-checker.md +205 -0
  41. package/knowledge/guides/stages/requirement-analyzer.md +195 -0
  42. package/knowledge/guides/stages/requirement-reviewer.md +83 -0
  43. package/knowledge/guides/stages/security-reviewer.md +89 -0
  44. package/knowledge/guides/stages/test-context-analyzer.md +250 -0
  45. package/knowledge/guides/stages/test-generator.md +241 -0
  46. package/knowledge/guides/stages/test-planner.md +183 -0
  47. package/knowledge/guides/stages/test-reviewer.md +76 -0
  48. package/knowledge/guides/stages/unit-test-runner.md +83 -0
  49. package/knowledge/guides/support/ai-agent-analyzer.md +362 -0
  50. package/knowledge/guides/support/ai-anomaly-analyzer.md +213 -0
  51. package/knowledge/guides/support/ai-artifact-evaluator.md +192 -0
  52. package/knowledge/guides/support/ai-capability-analyzer.md +193 -0
  53. package/knowledge/guides/support/ai-component-analyzer.md +169 -0
  54. package/knowledge/guides/support/ai-data-validator.md +276 -0
  55. package/knowledge/guides/support/ai-evaluation-planner.md +374 -0
  56. package/knowledge/guides/support/ai-path-evaluator.md +274 -0
  57. package/knowledge/guides/support/ai-pipeline-evaluator.md +219 -0
  58. package/knowledge/guides/support/ai-rag-analyzer.md +339 -0
  59. package/knowledge/guides/support/ai-task-assessor.md +418 -0
  60. package/knowledge/guides/support/ai-test-diagnostics.md +133 -0
  61. package/knowledge/guides/support/complexity-assessor.md +268 -0
  62. package/knowledge/guides/support/component-discovery.md +183 -0
  63. package/knowledge/guides/support/environment-scanner.md +207 -0
  64. package/knowledge/guides/support/environment-validator.md +207 -0
  65. package/knowledge/guides/support/knowledge-generator.md +234 -0
  66. package/knowledge/guides/support/methodology-extractor.md +55 -0
  67. package/knowledge/guides/support/pipeline-protocol.md +438 -0
  68. package/knowledge/guides/support/practice-logger.md +359 -0
  69. package/knowledge/guides/support/scope-inference.md +174 -0
  70. package/knowledge/guides/support/skill-distiller.md +91 -0
  71. package/knowledge/guides/support/skill-updater.md +45 -0
  72. package/knowledge/guides/support/skill-validator.md +72 -0
  73. package/knowledge/guides/support/team-orchestrator.md +323 -0
  74. package/knowledge/guides/support/tech-stack-analyzer.md +139 -0
  75. package/knowledge/guides/support/test-runner.md +254 -0
  76. package/knowledge/guides/system-design.md +352 -0
  77. package/knowledge/organization/ai-native-team.md +318 -0
  78. package/knowledge/organization/team-metrics.md +228 -0
  79. package/knowledge/principles/constitution.md +134 -0
  80. package/knowledge/principles/core-principles.md +368 -0
  81. package/knowledge/principles/design-philosophy.md +877 -0
  82. package/knowledge/principles/evolution.md +553 -0
  83. package/knowledge/process/01-requirement.md +113 -0
  84. package/knowledge/process/02-design.md +123 -0
  85. package/knowledge/process/03-implementation.md +90 -0
  86. package/knowledge/process/04-review.md +80 -0
  87. package/knowledge/process/05-testing.md +90 -0
  88. package/knowledge/process/06-delivery.md +88 -0
  89. package/knowledge/process/README.en.md +38 -0
  90. package/knowledge/process/README.md +48 -0
  91. package/knowledge/process/ai-sdlc.md +475 -0
  92. package/knowledge/process/overview.md +319 -0
  93. package/knowledge/standards/code-review.md +876 -0
  94. package/knowledge/standards/coding-style.md +940 -0
  95. package/knowledge/standards/data-consistency.md +1085 -0
  96. package/knowledge/standards/document-versioning.md +210 -0
  97. package/knowledge/standards/risk-detection.md +186 -0
  98. package/knowledge/templates/ai-evaluation.md +150 -0
  99. package/knowledge/templates/api-design.md +117 -0
  100. package/knowledge/templates/database-design.md +132 -0
  101. package/knowledge/templates/domain-driven-design.md +321 -0
  102. package/knowledge/templates/product-proposal.md +201 -0
  103. package/knowledge/templates/system-design.md +227 -0
  104. package/knowledge/templates/task-breakdown.md +107 -0
  105. package/knowledge/templates/test-case.md +170 -0
  106. package/package.json +53 -0
  107. package/plugins/.claude-plugin/plugin.json +134 -0
  108. package/plugins/agents/roles/ai-engineer.md +129 -0
  109. package/plugins/agents/roles/backend-engineer.md +165 -0
  110. package/plugins/agents/roles/ceo.md +94 -0
  111. package/plugins/agents/roles/data-engineer.md +135 -0
  112. package/plugins/agents/roles/devops-engineer.md +181 -0
  113. package/plugins/agents/roles/frontend-engineer.md +129 -0
  114. package/plugins/agents/roles/product-owner.md +98 -0
  115. package/plugins/agents/roles/quality-engineer.md +129 -0
  116. package/plugins/agents/roles/security-engineer.md +180 -0
  117. package/plugins/agents/roles/tech-lead.md +97 -0
  118. package/plugins/agents/support/blind-comparator.md +88 -0
  119. package/plugins/agents/support/consistency-checker.md +103 -0
  120. package/plugins/agents/support/failure-diagnostician.md +141 -0
  121. package/plugins/agents/support/independent-reviewer.md +80 -0
  122. package/plugins/agents/support/safety-auditor.md +121 -0
  123. package/plugins/agents/support/skill-benchmarker.md +86 -0
  124. package/plugins/agents/support/skill-forger.md +105 -0
  125. package/plugins/agents/support/stage-gate-evaluator.md +121 -0
  126. package/plugins/agents/support/test-coverage-reviewer.md +73 -0
  127. package/plugins/benchmarks/templates/README.md +44 -0
  128. package/plugins/benchmarks/templates/commands/explore-template.yaml +48 -0
  129. package/plugins/benchmarks/templates/pipeline/agile-template.yaml +84 -0
  130. package/plugins/benchmarks/templates/pipeline/waterfall-template.yaml +106 -0
  131. package/plugins/benchmarks/templates/skills/requirement-analyzer-template.yaml +48 -0
  132. package/plugins/commands/README.en.md +96 -0
  133. package/plugins/commands/README.md +96 -0
  134. package/plugins/commands/apply.md +191 -0
  135. package/plugins/commands/archive.md +76 -0
  136. package/plugins/commands/env-export.md +79 -0
  137. package/plugins/commands/env-sync.md +640 -0
  138. package/plugins/commands/env-template.md +223 -0
  139. package/plugins/commands/env-update.md +264 -0
  140. package/plugins/commands/env-validate.md +176 -0
  141. package/plugins/commands/env.md +79 -0
  142. package/plugins/commands/explore.md +76 -0
  143. package/plugins/commands/field-evolve.md +536 -0
  144. package/plugins/commands/memory.md +249 -0
  145. package/plugins/commands/project-evolve.md +821 -0
  146. package/plugins/commands/propose.md +93 -0
  147. package/plugins/commands/review.md +140 -0
  148. package/plugins/commands/run.md +224 -0
  149. package/plugins/commands/status.md +62 -0
  150. package/plugins/commands/validate.md +108 -0
  151. package/plugins/hooks/README.en.md +56 -0
  152. package/plugins/hooks/README.md +56 -0
  153. package/plugins/hooks/ai-project-guard.js +329 -0
  154. package/plugins/hooks/artifact-evaluation-hook.js +237 -0
  155. package/plugins/hooks/constitution-guard.js +211 -0
  156. package/plugins/hooks/environment-autocommit.js +264 -0
  157. package/plugins/hooks/environment-manager.js +778 -0
  158. package/plugins/hooks/execution-tracker.js +354 -0
  159. package/plugins/hooks/frozen-zone-guard.js +140 -0
  160. package/plugins/hooks/layer1-validator.js +423 -0
  161. package/plugins/hooks/lib/artifact-evaluator.js +414 -0
  162. package/plugins/hooks/lib/benchmarks/change-detector.js +390 -0
  163. package/plugins/hooks/lib/benchmarks/evaluator.js +605 -0
  164. package/plugins/hooks/lib/benchmarks/integration-example.js +169 -0
  165. package/plugins/hooks/lib/data-and-ai-detector.js +275 -0
  166. package/plugins/hooks/lib/detection-pattern-loader.js +865 -0
  167. package/plugins/hooks/lib/directory-discovery.js +395 -0
  168. package/plugins/hooks/lib/environment-config-loader.js +341 -0
  169. package/plugins/hooks/lib/environment-detector.js +553 -0
  170. package/plugins/hooks/lib/environment-evolver.js +564 -0
  171. package/plugins/hooks/lib/environment-registry.js +813 -0
  172. package/plugins/hooks/lib/execution-path.js +427 -0
  173. package/plugins/hooks/lib/hook-error-recorder.js +245 -0
  174. package/plugins/hooks/lib/hook-logger.js +538 -0
  175. package/plugins/hooks/lib/hook-runner.js +97 -0
  176. package/plugins/hooks/lib/hook-runner.sh +44 -0
  177. package/plugins/hooks/lib/hook-state-manager.js +480 -0
  178. package/plugins/hooks/lib/memory-extractor.js +377 -0
  179. package/plugins/hooks/lib/memory-manager.js +673 -0
  180. package/plugins/hooks/lib/metrics-analyzer.js +489 -0
  181. package/plugins/hooks/lib/project-evolution/auto-fixer.js +511 -0
  182. package/plugins/hooks/lib/project-evolution/memory-manager.js +346 -0
  183. package/plugins/hooks/lib/project-evolution/pattern-detector.js +476 -0
  184. package/plugins/hooks/lib/project-evolution/semantic-indexer.js +480 -0
  185. package/plugins/hooks/lib/project-structure-detector.js +326 -0
  186. package/plugins/hooks/lib/rollback-tracker.js +346 -0
  187. package/plugins/hooks/lib/source-code-scanner.js +596 -0
  188. package/plugins/hooks/lib/technology-stack-detector.js +374 -0
  189. package/plugins/hooks/lib/test-failure-analyzer.js +375 -0
  190. package/plugins/hooks/lib/test-failure-fixer.js +268 -0
  191. package/plugins/hooks/lib/trace-context.js +277 -0
  192. package/plugins/hooks/lib/validation-patterns.js +415 -0
  193. package/plugins/hooks/memory-sync.js +171 -0
  194. package/plugins/hooks/pipeline-observer.js +413 -0
  195. package/plugins/hooks/scope-sentinel.js +204 -0
  196. package/plugins/hooks/trace-initialization.js +169 -0
  197. package/plugins/memory/templates/code-quality.yaml +149 -0
  198. package/plugins/memory/templates/multi-system.yaml +155 -0
  199. package/plugins/memory/templates/team-habits.yaml +119 -0
  200. package/plugins/memory/templates/testing.yaml +121 -0
  201. package/plugins/skills/README.en.md +47 -0
  202. package/plugins/skills/README.md +104 -0
  203. package/plugins/skills/benchmark-executor/README.md +93 -0
  204. package/plugins/skills/benchmark-executor/SKILL.md +647 -0
  205. package/plugins/skills/benchmark-generator/SKILL.md +349 -0
  206. package/plugins/skills/delivery-stage/SKILL.md +203 -0
  207. package/plugins/skills/design-stage/SKILL.md +216 -0
  208. package/plugins/skills/evolution-process/SKILL.md +291 -0
  209. package/plugins/skills/exploration-phase/SKILL.md +133 -0
  210. package/plugins/skills/implementation-stage/SKILL.md +179 -0
  211. package/plugins/skills/layer1-validation/SKILL.md +79 -0
  212. package/plugins/skills/pending-dashboard/SKILL.md +109 -0
  213. package/plugins/skills/project-evolution/SKILL.md +847 -0
  214. package/plugins/skills/requirement-stage/SKILL.md +183 -0
  215. package/plugins/skills/skill-forge/SKILL.md +223 -0
  216. package/plugins/skills/skill-forge/references/description-guide.md +92 -0
  217. package/plugins/skills/skill-forge/references/quality-rubric.md +104 -0
  218. package/plugins/skills/skill-forge/references/skill-template.md +106 -0
  219. package/plugins/skills/startup-guard/SKILL.md +38 -0
  220. package/plugins/skills/testing-stage/SKILL.md +195 -0
  221. package/scripts/cli/global-init.js +288 -0
  222. package/scripts/cli/global.js +324 -0
  223. package/scripts/cli/index.js +55 -0
  224. package/scripts/cli/init.js +382 -0
  225. package/scripts/cli/list.js +69 -0
  226. package/scripts/cli/org.js +340 -0
  227. package/scripts/cli/update.js +44 -0
  228. package/scripts/config/commands.config.js +145 -0
  229. package/scripts/config/hooks.config.js +197 -0
  230. package/scripts/evolution/evolution-router.js +273 -0
  231. package/scripts/evolution/evolution-signal-collector.js +307 -0
  232. package/scripts/evolution/knowledge-loader.js +346 -0
  233. package/scripts/evolution/marketplace.js +317 -0
  234. package/scripts/evolution/version-manager.js +371 -0
  235. package/scripts/install/agents.js +106 -0
  236. package/scripts/install/commands.js +133 -0
  237. package/scripts/install/constants.js +424 -0
  238. package/scripts/install/hook-logger.js +536 -0
  239. package/scripts/install/hooks.js +110 -0
  240. package/scripts/install/index.js +39 -0
  241. package/scripts/install/skills.js +95 -0
  242. package/scripts/postinstall.js +25 -0
  243. package/scripts/state.js +376 -0
@@ -0,0 +1,219 @@
1
+ ---
2
+ name: ai-pipeline-evaluator
3
+ description: 基于 Trace 日志生成流程评测报告
4
+ type: review
5
+ ---
6
+
7
+ ## 定位
8
+
9
+ 基于过程指标 + 结果指标 + 路径指标,自动生成流程评测报告,识别改进机会,触发进化机制。
10
+
11
+ ## 输入
12
+
13
+ - 必须输入:
14
+ - traceId: 流程追踪 ID
15
+ - 可选输入:
16
+ - logs/: 相关日志文件
17
+ - metrics: 质量指标数据
18
+
19
+ ## 评测维度
20
+
21
+ ### 1. 过程指标 (权重 30%)
22
+
23
+ | 指标 | 权重 | 说明 |
24
+ |-----|------|------|
25
+ | Layer1 首次通过率 | 25% | 代码质量 |
26
+ | Layer2 首次通过率 | 20% | 设计质量 |
27
+ | 人工介入率 | 15% | 自主能力(反向) |
28
+ | 回退率 | 15% | 流程顺畅度(反向) |
29
+ | 错误率 | 10% | 稳定性(反向) |
30
+ | 阶段成功率 | 15% | 完成度 |
31
+
32
+ ### 2. 结果指标 (权重 40%)
33
+
34
+ | 指标 | 权重 | 说明 |
35
+ |-----|------|------|
36
+ | 产出物平均评分 | 40% | 产出质量 |
37
+ | 一致性评分 | 30% | 产物一致性 |
38
+ | 结构完整性 | 30% | 规范遵循 |
39
+
40
+ ### 3. 路径指标 (权重 30%)
41
+
42
+ | 指标 | 权重 | 说明 |
43
+ |-----|------|------|
44
+ | 流程合规性 | 30% | 规范遵循 |
45
+ | 执行效率 | 25% | 资源利用 |
46
+ | 决策质量 | 25% | 自主决策能力 |
47
+ | 异常处理 | 20% | 容错能力 |
48
+
49
+ ## 输出格式
50
+
51
+ ```markdown
52
+ # AutoSpec 流程评测报告
53
+
54
+ ## 执行摘要
55
+
56
+ | 指标 | 值 |
57
+ |-----|-----|
58
+ | Trace ID | {{traceId}} |
59
+ | 执行时间 | {{duration}} |
60
+ | 综合评分 | {{overallScore}}/100 ({{grade}}) |
61
+
62
+ ## 过程指标
63
+
64
+ ### Layer1/Layer2 验证
65
+ - Layer1 首次通过率: {{layer1FirstPassRate}}%
66
+ - Layer2 首次通过率: {{layer2FirstPassRate}}%
67
+
68
+ ### 流程效率
69
+ - 阶段成功率: {{stageSuccessRate}}%
70
+ - 回退次数: {{rollbackCount}}
71
+ - 人工介入次数: {{humanInterventionCount}}
72
+
73
+ ### 错误分析
74
+ - 总错误数: {{errorCount}}
75
+ - 错误类型分布: {{errorTypeDistribution}}
76
+
77
+ ## 结果指标
78
+
79
+ ### 产出物质量
80
+ | 产出物 | 评分 | 主要问题 |
81
+ |-------|------|---------|
82
+ {{#artifacts}}
83
+ | {{name}} | {{score}} | {{mainIssue}} |
84
+ {{/artifacts}}
85
+
86
+ ### 一致性检测
87
+ - 一致性问题数: {{consistencyIssues}}
88
+ - 主要不一致类型: {{mainInconsistency}}
89
+
90
+ ## 路径指标
91
+
92
+ ### 流程合规性
93
+ - 阶段路径: {{stagePath}}
94
+ - 违规项: {{violations}}
95
+
96
+ ### 执行效率
97
+ - 重复操作: {{repeatedOps}} 次
98
+ - 工具使用效率: {{toolEfficiency}}%
99
+
100
+ ### 决策质量
101
+ - 人工介入率: {{humanRate}}%
102
+ - 自动决策成功率: {{autoSuccessRate}}%
103
+
104
+ ## 改进建议
105
+
106
+ ### 高优先级
107
+ {{#highPrioritySuggestions}}
108
+ - {{.}}
109
+ {{/highPrioritySuggestions}}
110
+
111
+ ### 中优先级
112
+ {{#mediumPrioritySuggestions}}
113
+ - {{.}}
114
+ {{/mediumPrioritySuggestions}}
115
+
116
+ ## 进化触发建议
117
+
118
+ {{#evolutionTriggers}}
119
+ - **{{type}}**: {{reason}}
120
+ - 建议行动: {{action}}
121
+ {{/evolutionTriggers}}
122
+ ```
123
+
124
+ ## 执行步骤
125
+
126
+ ### Step 1: 收集数据
127
+
128
+ ```
129
+ 1. 读取 metrics.json
130
+ 2. 读取 trace 日志
131
+ 3. 读取产出物评测结果
132
+ ```
133
+
134
+ ### Step 2: 计算各维度分数
135
+
136
+ ```
137
+ 1. 计算过程指标分数
138
+ 2. 计算结果指标分数
139
+ 3. 计算路径指标分数
140
+ ```
141
+
142
+ ### Step 3: 计算综合评分
143
+
144
+ ```
145
+ overallScore = process * 0.3 + result * 0.4 + path * 0.3
146
+ grade = getGrade(overallScore)
147
+ ```
148
+
149
+ ### Step 4: 分析改进点
150
+
151
+ ```
152
+ 1. 识别低分维度
153
+ 2. 分析问题根因
154
+ 3. 生成改进建议
155
+ ```
156
+
157
+ ### Step 5: 检测进化触发
158
+
159
+ ```
160
+ 1. 检测重复错误模式
161
+ 2. 检测低效阶段
162
+ 3. 检测质量下降信号
163
+ ```
164
+
165
+ ### Step 6: 生成报告
166
+
167
+ ```
168
+ 1. 生成 Markdown 格式报告
169
+ 2. 保存到 .autospec/logs/evaluation/
170
+ 3. 更新 metrics.json
171
+ ```
172
+
173
+ ## 进化触发规则
174
+
175
+ | 条件 | 触发动作 |
176
+ |-----|---------|
177
+ | 同类错误 >= 3 次 | 创建 practice-log |
178
+ | 质量分数 < 70 | 触发 methodology 审查 |
179
+ | 新模式识别 | 触发 skill 提案 |
180
+ | 阶段效率持续低 | 触发 skill 优化 |
181
+
182
+ ## 与 metrics.json 的集成
183
+
184
+ 评测结果写入 metrics.json:
185
+
186
+ ```json
187
+ {
188
+ "qualityScore": {
189
+ "overall": 85,
190
+ "grade": "B",
191
+ "breakdown": {
192
+ "process": { "score": 80 },
193
+ "result": { "score": 88 },
194
+ "path": { "score": 86 }
195
+ }
196
+ },
197
+ "evaluationReport": {
198
+ "path": ".autospec/logs/evaluation/eval-2026-03-24.md",
199
+ "generatedAt": "2026-03-24T10:00:00Z"
200
+ }
201
+ }
202
+ ```
203
+
204
+ ## 适用场景
205
+
206
+ - 流程结束时自动触发(Stop Hook)
207
+ - 手动执行 `/autospec:evaluate`
208
+ - 定期质量审计
209
+
210
+ ## 反模式清单 (DP7)
211
+
212
+ 1. **忽略上下文**:不考虑任务复杂度
213
+ - 检测:结合任务评估调整评分标准
214
+
215
+ 2. **建议不可执行**:建议过于笼统
216
+ - 检测:每个建议必须指向具体改进点
217
+
218
+ 3. **过度惩罚**:对所有问题都扣分
219
+ - 检测:区分关键问题和次要问题
@@ -0,0 +1,339 @@
1
+ ---
2
+ name: rag-analyzer
3
+ description: 当检测到RAG应用组件时,分析RAG架构的检索策略、向量存储、文档处理流程。用于理解检索增强生成系统的结构。
4
+ type: ai
5
+ ---
6
+
7
+ ## 定位
8
+
9
+ AI专用技能。分析RAG应用的架构、检索策略、向量存储配置和文档处理流程。
10
+
11
+ ## 输入
12
+
13
+ - 必须输入:RAG代码路径或项目目录
14
+ - 可选输入:向量存储类型(已检测到的组件)
15
+
16
+ ## 输出
17
+
18
+ ```json
19
+ {
20
+ "architecture": {
21
+ "type": "standard-rag",
22
+ "description": "标准RAG架构:文档加载 → 分块 → 向量化 → 存储 → 检索 → 生成"
23
+ },
24
+ "components": {
25
+ "documentLoader": {
26
+ "type": "PDFLoader",
27
+ "sources": ["local", "s3"],
28
+ "formats": ["pdf", "txt", "md"]
29
+ },
30
+ "textSplitter": {
31
+ "type": "RecursiveCharacterTextSplitter",
32
+ "chunkSize": 500,
33
+ "chunkOverlap": 50
34
+ },
35
+ "embeddings": {
36
+ "type": "OpenAIEmbeddings",
37
+ "model": "text-embedding-3-small",
38
+ "dimension": 1536
39
+ },
40
+ "vectorStore": {
41
+ "type": "ChromaDB",
42
+ "collection": "documents",
43
+ "persistDirectory": "./chroma_db"
44
+ },
45
+ "retriever": {
46
+ "type": "similarity",
47
+ "k": 4,
48
+ "scoreThreshold": 0.7
49
+ },
50
+ "llm": {
51
+ "type": "ChatOpenAI",
52
+ "model": "gpt-4",
53
+ "temperature": 0.1
54
+ }
55
+ },
56
+ "retrievalStrategy": {
57
+ "type": "similarity",
58
+ "enhancements": [],
59
+ "description": "基础相似度检索"
60
+ },
61
+ "generationConfig": {
62
+ "promptTemplate": "基于以下上下文回答问题:\n{context}\n\n问题:{question}",
63
+ "maxTokens": 1000
64
+ },
65
+ "evaluationNeeds": {
66
+ "dimensions": ["检索准确率", "回答相关性", "上下文利用率", "幻觉率"],
67
+ "suggestions": [
68
+ "建议测试检索召回率(Recall@K)",
69
+ "建议测试生成回答的准确性",
70
+ "建议测试不同查询类型的表现"
71
+ ]
72
+ }
73
+ }
74
+ ```
75
+
76
+ ## 执行步骤
77
+
78
+ ### Step 1: 识别RAG框架(确定性)
79
+
80
+ 基于依赖和代码特征识别RAG框架:
81
+
82
+ ```
83
+ 框架识别规则:
84
+ - langchain: 依赖langchain,代码中有 VectorStore, Retriever, Document
85
+ - llamaindex: 依赖llamaindex,代码中有 Index, QueryEngine, Node
86
+ - haystack: 依赖haystack,代码中有 Pipeline, DocumentStore
87
+ - custom: 自定义RAG实现
88
+ ```
89
+
90
+ ### Step 2: Grep获取组件定义(高效)
91
+
92
+ 搜索RAG组件模式:
93
+
94
+ ```bash
95
+ # LangChain风格
96
+ grep -r -n -A 5 "VectorStore\|Retriever\|Document\|Embeddings" --include="*.py" .
97
+
98
+ # LlamaIndex风格
99
+ grep -r -n -A 5 "VectorStoreIndex\|QueryEngine\|NodeParser" --include="*.py" .
100
+
101
+ # 文档加载
102
+ grep -r -n -A 5 "Loader\|Document\|load\|split" --include="*.py" .
103
+ ```
104
+
105
+ ### Step 3: 分析组件配置(模型)
106
+
107
+ 基于代码上下文分析RAG组件:
108
+
109
+ ```
110
+ 模型输入:
111
+ {
112
+ "framework": "langchain",
113
+ "codeContexts": "
114
+ # rag.py
115
+ vectorstore = Chroma.from_documents(
116
+ documents=splits,
117
+ embedding=OpenAIEmbeddings(),
118
+ persist_directory='./chroma_db'
119
+ )
120
+ retriever = vectorstore.as_retriever(
121
+ search_type='similarity',
122
+ k=4
123
+ )
124
+ ",
125
+ "task": "分析RAG组件配置,输出JSON格式"
126
+ }
127
+ ```
128
+
129
+ ### Step 4: 分析检索策略(模型)
130
+
131
+ 分析检索增强策略:
132
+
133
+ ```
134
+ 关注点:
135
+ - 基础检索:相似度、MMR、混合检索
136
+ - 增强策略:重排序、查询改写、多查询
137
+ - 过滤条件:元数据过滤、时间范围
138
+ ```
139
+
140
+ ### Step 5: 输出结果
141
+
142
+ 汇总RAG分析结果,包括评测建议。
143
+
144
+ ## Contextual Retrieval
145
+
146
+ 根据业界最佳实践,Contextual Retrieval技术:
147
+
148
+ ### 问题背景
149
+
150
+ 传统RAG在编码信息时移除上下文,导致系统无法从知识库中检索到相关信息。
151
+
152
+ ### 解决方案:Contextual Retrieval
153
+
154
+ 使用两种子技术:
155
+ 1. **Contextual Embeddings**:上下文嵌入
156
+ 2. **Contextual BM25**:上下文BM25
157
+
158
+ **效果**:显著提升检索质量。
159
+
160
+ ### 何时使用
161
+
162
+ - 知识库小于200,000 tokens(约500页)→ 直接使用长prompt
163
+ - 使用prompt caching让成本更低
164
+
165
+ ### 检索策略类型
166
+
167
+ | 类型 | 说明 | 适用场景 |
168
+ |------|------|----------|
169
+ | **similarity** | 相似度检索 | 简单问答 |
170
+ | **mmr** | 最大边际相关性 | 多样性需求 |
171
+ | **similarity_score_threshold** | 带阈值过滤 | 高精度需求 |
172
+ | **hybrid** | 向量+关键词混合 | 精确匹配需求 |
173
+ | **multi_query** | 多查询扩展 | 复杂问题 |
174
+ | **rerank** | 重排序 | 高质量需求 |
175
+ | **contextual** | 上下文增强检索 | 复杂文档检索 |
176
+
177
+ ## 分块策略优化
178
+
179
+ 分块策略优化:
180
+
181
+ ### 分块大小选择
182
+
183
+ | 场景 | 建议chunk size | 理由 |
184
+ |------|---------------|------|
185
+ | 代码 | 100-200 tokens | 保持函数/类完整 |
186
+ | 短文档 | 300-500 tokens | 保持语义完整 |
187
+ | 长文档 | 500-1000 tokens | 平衡精度和覆盖 |
188
+ | 复杂结构 | 可变大小 | 按章节/段落 |
189
+
190
+ ### 分块优化技术
191
+
192
+ 1. **重叠**:chunk_overlap建议为chunk_size的10-20%
193
+ 2. **父子索引**:小chunk用于检索,大chunk用于生成
194
+ 3. **元数据**:添加来源、标题、摘要等元数据
195
+ 4. **上下文前缀**:为每个chunk添加文档上下文
196
+
197
+ ## 向量存储选型原则
198
+
199
+ | 场景 | 推荐选择 | 理由 |
200
+ |------|----------|------|
201
+ | 本地开发、轻量级 | 轻量级向量库 | 易于集成、无需额外服务 |
202
+ | 生产环境、需要扩展 | 云端向量数据库 | 托管服务、自动扩展 |
203
+ | 需要复杂过滤 | 高性能向量库 | 过滤能力强 |
204
+ | 大规模数据 | 分布式方案 | 水平扩展能力 |
205
+
206
+ ## RAG评估维度
207
+
208
+ 扩展评估维度:
209
+
210
+ ### 检索质量评估
211
+
212
+ | 指标 | 说明 | 测量方法 |
213
+ |------|------|----------|
214
+ | **Precision@K** | Top-K结果中相关文档比例 | 标注数据 |
215
+ | **Recall@K** | 相关文档被召回的比例 | 标注数据 |
216
+ | **MRR** | 第一个相关文档的排名倒数 | 自动计算 |
217
+ | **NDCG** | 归一化折损累计增益 | 标注数据 |
218
+
219
+ ### 生成质量评估
220
+
221
+ | 指标 | 说明 | 测量方法 |
222
+ |------|------|----------|
223
+ | **Context Relevance** | 检索内容与问题的相关程度 | LLM评估 |
224
+ | **Answer Faithfulness** | 回答与检索内容的一致性 | LLM评估 |
225
+ | **Answer Relevance** | 回答与问题的相关程度 | LLM评估 |
226
+ | **Hallucination Rate** | 幻觉内容比例 | 事实核查 |
227
+
228
+ ### 上下文优化评估
229
+
230
+ | 指标 | 说明 | 测量方法 |
231
+ |------|------|----------|
232
+ | **Context Utilization** | 模型利用检索内容的程度 | 日志分析 |
233
+ | **Chunk Quality** | 分块是否保留完整语义 | 人工评估 |
234
+ | **Retrieval Latency** | 检索延迟 | 自动统计 |
235
+
236
+ ## 调用时机
237
+
238
+ - 检测到RAG应用组件时
239
+ - AI功能开发前的设计阶段
240
+ - 需要理解现有RAG系统时
241
+
242
+ ## 示例
243
+
244
+ **输入**:
245
+ ```
246
+ 项目目录:/project
247
+ 向量存储:ChromaDB(已检测)
248
+ LLM:OpenAI(已检测)
249
+ ```
250
+
251
+ **Grep搜索**:
252
+ ```
253
+ 找到文件:rag/pipeline.py, rag/embeddings.py, rag/retriever.py
254
+ ```
255
+
256
+ **代码上下文**:
257
+ ```python
258
+ # rag/pipeline.py
259
+ from langchain.document_loaders import PyPDFLoader
260
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
261
+ from langchain.embeddings import OpenAIEmbeddings
262
+ from langchain.vectorstores import Chroma
263
+ from langchain.chains import RetrievalQA
264
+
265
+ loader = PyPDFLoader("docs.pdf")
266
+ documents = loader.load()
267
+
268
+ text_splitter = RecursiveCharacterTextSplitter(
269
+ chunk_size=500,
270
+ chunk_overlap=50
271
+ )
272
+ splits = text_splitter.split_documents(documents)
273
+
274
+ vectorstore = Chroma.from_documents(
275
+ documents=splits,
276
+ embedding=OpenAIEmbeddings(),
277
+ persist_directory="./chroma_db"
278
+ )
279
+
280
+ retriever = vectorstore.as_retriever(
281
+ search_type="similarity",
282
+ k=4
283
+ )
284
+
285
+ qa_chain = RetrievalQA.from_chain_type(
286
+ llm=ChatOpenAI(model="gpt-4"),
287
+ retriever=retriever,
288
+ return_source_documents=True
289
+ )
290
+ ```
291
+
292
+ **输出**:
293
+ ```json
294
+ {
295
+ "architecture": {
296
+ "type": "standard-rag",
297
+ "description": "标准RAG:PDF加载 → 分块 → 向量化 → Chroma存储 → 相似度检索 → GPT-4生成"
298
+ },
299
+ "components": {
300
+ "documentLoader": {
301
+ "type": "PyPDFLoader",
302
+ "formats": ["pdf"]
303
+ },
304
+ "textSplitter": {
305
+ "type": "RecursiveCharacterTextSplitter",
306
+ "chunkSize": 500,
307
+ "chunkOverlap": 50
308
+ },
309
+ "embeddings": {
310
+ "type": "OpenAIEmbeddings"
311
+ },
312
+ "vectorStore": {
313
+ "type": "Chroma",
314
+ "persistDirectory": "./chroma_db"
315
+ },
316
+ "retriever": {
317
+ "type": "similarity",
318
+ "k": 4
319
+ },
320
+ "llm": {
321
+ "type": "ChatOpenAI",
322
+ "model": "gpt-4"
323
+ }
324
+ },
325
+ "retrievalStrategy": {
326
+ "type": "similarity",
327
+ "enhancements": [],
328
+ "description": "基础相似度检索,返回top-4文档"
329
+ },
330
+ "evaluationNeeds": {
331
+ "dimensions": ["检索准确率", "回答相关性", "来源引用准确性"],
332
+ "suggestions": [
333
+ "测试PDF文档的解析质量",
334
+ "测试不同chunk大小对检索效果的影响",
335
+ "测试问答准确性"
336
+ ]
337
+ }
338
+ }
339
+ ```