code-abyss 1.6.15 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +25 -4
- package/package.json +2 -2
- package/skills/SKILL.md +24 -16
- package/skills/domains/ai/SKILL.md +2 -2
- package/skills/domains/ai/prompt-and-eval.md +279 -0
- package/skills/domains/architecture/SKILL.md +2 -3
- package/skills/domains/architecture/security-arch.md +87 -0
- package/skills/domains/data-engineering/SKILL.md +188 -26
- package/skills/domains/development/SKILL.md +1 -4
- package/skills/domains/devops/SKILL.md +3 -5
- package/skills/domains/devops/performance.md +63 -0
- package/skills/domains/devops/testing.md +97 -0
- package/skills/domains/frontend-design/SKILL.md +12 -3
- package/skills/domains/frontend-design/claymorphism/SKILL.md +117 -0
- package/skills/domains/frontend-design/claymorphism/references/tokens.css +52 -0
- package/skills/domains/frontend-design/engineering.md +287 -0
- package/skills/domains/frontend-design/glassmorphism/SKILL.md +138 -0
- package/skills/domains/frontend-design/glassmorphism/references/tokens.css +32 -0
- package/skills/domains/frontend-design/liquid-glass/SKILL.md +135 -0
- package/skills/domains/frontend-design/liquid-glass/references/tokens.css +81 -0
- package/skills/domains/frontend-design/neubrutalism/SKILL.md +141 -0
- package/skills/domains/frontend-design/neubrutalism/references/tokens.css +44 -0
- package/skills/domains/infrastructure/SKILL.md +174 -34
- package/skills/domains/mobile/SKILL.md +211 -21
- package/skills/domains/orchestration/SKILL.md +1 -0
- package/skills/domains/security/SKILL.md +4 -6
- package/skills/domains/security/blue-team.md +57 -0
- package/skills/domains/security/red-team.md +54 -0
- package/skills/domains/security/threat-intel.md +50 -0
- package/skills/orchestration/multi-agent/SKILL.md +195 -46
- package/skills/run_skill.js +134 -0
- package/skills/tools/gen-docs/SKILL.md +6 -4
- package/skills/tools/gen-docs/scripts/doc_generator.js +349 -0
- package/skills/tools/verify-change/SKILL.md +8 -6
- package/skills/tools/verify-change/scripts/change_analyzer.js +270 -0
- package/skills/tools/verify-module/SKILL.md +6 -4
- package/skills/tools/verify-module/scripts/module_scanner.js +145 -0
- package/skills/tools/verify-quality/SKILL.md +5 -3
- package/skills/tools/verify-quality/scripts/quality_checker.js +276 -0
- package/skills/tools/verify-security/SKILL.md +7 -5
- package/skills/tools/verify-security/scripts/security_scanner.js +133 -0
- package/skills/domains/COVERAGE_PLAN.md +0 -232
- package/skills/domains/ai/model-evaluation.md +0 -790
- package/skills/domains/ai/prompt-engineering.md +0 -703
- package/skills/domains/architecture/compliance.md +0 -299
- package/skills/domains/architecture/data-security.md +0 -184
- package/skills/domains/data-engineering/data-pipeline.md +0 -762
- package/skills/domains/data-engineering/data-quality.md +0 -894
- package/skills/domains/data-engineering/stream-processing.md +0 -791
- package/skills/domains/development/dart.md +0 -963
- package/skills/domains/development/kotlin.md +0 -834
- package/skills/domains/development/php.md +0 -659
- package/skills/domains/development/swift.md +0 -755
- package/skills/domains/devops/e2e-testing.md +0 -914
- package/skills/domains/devops/performance-testing.md +0 -734
- package/skills/domains/devops/testing-strategy.md +0 -667
- package/skills/domains/frontend-design/build-tools.md +0 -743
- package/skills/domains/frontend-design/performance.md +0 -734
- package/skills/domains/frontend-design/testing.md +0 -699
- package/skills/domains/infrastructure/gitops.md +0 -735
- package/skills/domains/infrastructure/iac.md +0 -855
- package/skills/domains/infrastructure/kubernetes.md +0 -1018
- package/skills/domains/mobile/android-dev.md +0 -979
- package/skills/domains/mobile/cross-platform.md +0 -795
- package/skills/domains/mobile/ios-dev.md +0 -931
- package/skills/domains/security/secrets-management.md +0 -834
- package/skills/domains/security/supply-chain.md +0 -931
- package/skills/domains/security/threat-modeling.md +0 -828
- package/skills/run_skill.py +0 -88
- package/skills/tests/README.md +0 -225
- package/skills/tests/SUMMARY.md +0 -362
- package/skills/tests/__init__.py +0 -3
- package/skills/tests/test_change_analyzer.py +0 -558
- package/skills/tests/test_doc_generator.py +0 -538
- package/skills/tests/test_module_scanner.py +0 -376
- package/skills/tests/test_quality_checker.py +0 -516
- package/skills/tests/test_security_scanner.py +0 -426
- package/skills/tools/gen-docs/scripts/doc_generator.py +0 -491
- package/skills/tools/verify-change/scripts/change_analyzer.py +0 -529
- package/skills/tools/verify-module/scripts/module_scanner.py +0 -321
- package/skills/tools/verify-quality/scripts/quality_checker.py +0 -481
- package/skills/tools/verify-security/scripts/security_scanner.py +0 -368
package/bin/install.js
CHANGED
|
@@ -4,8 +4,16 @@ const fs = require('fs');
|
|
|
4
4
|
const path = require('path');
|
|
5
5
|
const os = require('os');
|
|
6
6
|
|
|
7
|
-
const
|
|
7
|
+
const pkg = require(path.join(__dirname, '..', 'package.json'));
|
|
8
|
+
const VERSION = pkg.version;
|
|
8
9
|
const HOME = os.homedir();
|
|
10
|
+
|
|
11
|
+
// ── Node.js 版本检查 ──
|
|
12
|
+
const MIN_NODE = pkg.engines?.node?.match(/(\d+)/)?.[1] || '18';
|
|
13
|
+
if (parseInt(process.versions.node) < parseInt(MIN_NODE)) {
|
|
14
|
+
console.error(`\x1b[31m✘ 需要 Node.js >= ${MIN_NODE},当前: ${process.versions.node}\x1b[0m`);
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
9
17
|
const SKIP = ['__pycache__', '.pyc', '.pyo', '.egg-info', '.DS_Store', 'Thumbs.db', '.git'];
|
|
10
18
|
const PKG_ROOT = fs.realpathSync(path.join(__dirname, '..'));
|
|
11
19
|
|
|
@@ -209,6 +217,10 @@ function runUninstall(tgt) {
|
|
|
209
217
|
if (!fs.existsSync(manifestPath)) { fail(`未找到安装记录: ${manifestPath}`); process.exit(1); }
|
|
210
218
|
|
|
211
219
|
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
220
|
+
if (manifest.manifest_version && manifest.manifest_version > 1) {
|
|
221
|
+
fail(`manifest 版本 ${manifest.manifest_version} 不兼容,请升级 code-abyss 后再卸载`);
|
|
222
|
+
process.exit(1);
|
|
223
|
+
}
|
|
212
224
|
divider(`卸载 Code Abyss v${manifest.version}`);
|
|
213
225
|
|
|
214
226
|
(manifest.installed || []).forEach(f => {
|
|
@@ -245,7 +257,7 @@ function installCore(tgt) {
|
|
|
245
257
|
{ src: 'skills', dest: 'skills' }
|
|
246
258
|
].filter(f => f.dest !== null);
|
|
247
259
|
|
|
248
|
-
const manifest = { version: VERSION, target: tgt, timestamp: new Date().toISOString(), installed: [], backups: [] };
|
|
260
|
+
const manifest = { manifest_version: 1, version: VERSION, target: tgt, timestamp: new Date().toISOString(), installed: [], backups: [] };
|
|
249
261
|
|
|
250
262
|
filesToInstall.forEach(({ src, dest }) => {
|
|
251
263
|
const srcPath = path.join(PKG_ROOT, src);
|
|
@@ -349,6 +361,7 @@ async function installCcline(ctx) {
|
|
|
349
361
|
const { execSync } = require('child_process');
|
|
350
362
|
const cclineDir = path.join(HOME, '.claude', 'ccline');
|
|
351
363
|
const cclineBin = path.join(cclineDir, process.platform === 'win32' ? 'ccline.exe' : 'ccline');
|
|
364
|
+
const errors = [];
|
|
352
365
|
|
|
353
366
|
// 1. 检测是否已有二进制
|
|
354
367
|
let hasBin = fs.existsSync(cclineBin);
|
|
@@ -366,8 +379,9 @@ async function installCcline(ctx) {
|
|
|
366
379
|
else {
|
|
367
380
|
try { execSync('ccline --version', { stdio: 'pipe' }); hasBin = true; ok('ccline 安装成功 (全局)'); } catch (e) {}
|
|
368
381
|
}
|
|
382
|
+
if (!hasBin) errors.push('ccline 二进制安装后仍未检测到');
|
|
369
383
|
} catch (e) {
|
|
370
|
-
|
|
384
|
+
errors.push(`npm install -g @cometix/ccline 失败: ${e.message}`);
|
|
371
385
|
info(`手动安装: ${c.cyn('npm install -g @cometix/ccline')}`);
|
|
372
386
|
info(`或下载: ${c.cyn('https://github.com/Haleclipse/CCometixLine/releases')}`);
|
|
373
387
|
}
|
|
@@ -392,7 +406,7 @@ async function installCcline(ctx) {
|
|
|
392
406
|
// 无打包配置,回退到 ccline --init
|
|
393
407
|
if (hasBin && !fs.existsSync(targetConfig)) {
|
|
394
408
|
try { execSync('ccline --init', { stdio: 'inherit' }); ok('ccline 默认配置已生成'); }
|
|
395
|
-
catch (e) {
|
|
409
|
+
catch (e) { errors.push(`ccline --init 失败: ${e.message}`); }
|
|
396
410
|
}
|
|
397
411
|
}
|
|
398
412
|
|
|
@@ -401,6 +415,13 @@ async function installCcline(ctx) {
|
|
|
401
415
|
ok(`statusLine → ${c.cyn(CCLINE_STATUS_LINE.statusLine.command)}`);
|
|
402
416
|
fs.writeFileSync(ctx.settingsPath, JSON.stringify(ctx.settings, null, 2) + '\n');
|
|
403
417
|
|
|
418
|
+
// 5. 汇总报告
|
|
419
|
+
if (errors.length > 0) {
|
|
420
|
+
console.log('');
|
|
421
|
+
warn(c.b(`ccline 安装有 ${errors.length} 个问题:`));
|
|
422
|
+
errors.forEach(e => fail(` ${e}`));
|
|
423
|
+
}
|
|
424
|
+
|
|
404
425
|
console.log('');
|
|
405
426
|
warn(`需要 ${c.b('Nerd Font')} 字体才能正确显示图标`);
|
|
406
427
|
info(`推荐: FiraCode Nerd Font / JetBrainsMono Nerd Font`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "code-abyss",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.7.0",
|
|
4
4
|
"description": "邪修红尘仙·宿命深渊 - 一键为 Claude Code / Codex CLI 注入邪修人格与安全工程知识体系",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"node": ">=18.0.0"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
|
-
"test": "
|
|
39
|
+
"test": "echo \"No tests configured yet\""
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
42
|
"@inquirer/prompts": "^7.10.1"
|
package/skills/SKILL.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: sage
|
|
3
3
|
description: 邪修红尘仙·神通秘典总纲。智能路由到专业秘典。当魔尊需要任何开发、安全、架构、DevOps、AI 相关能力时,通过此入口路由到最匹配的专业秘典。
|
|
4
|
+
license: MIT
|
|
4
5
|
user-invocable: true
|
|
5
6
|
disable-model-invocation: false
|
|
6
7
|
---
|
|
@@ -12,13 +13,16 @@ disable-model-invocation: false
|
|
|
12
13
|
```
|
|
13
14
|
skills/
|
|
14
15
|
├── domains/ # 知识域秘典
|
|
15
|
-
│ ├── security/ # 攻防秘典
|
|
16
|
-
│ ├── development/ #
|
|
17
|
-
│ ├── architecture/ # 阵法秘典
|
|
18
|
-
│ ├── devops/ # 炼器秘典
|
|
19
|
-
│ ├── ai/ # 丹鼎秘典
|
|
20
|
-
│ ├── frontend-design/ # 美学秘典
|
|
21
|
-
│
|
|
16
|
+
│ ├── security/ # 攻防秘典 (7篇)
|
|
17
|
+
│ ├── development/ # 符箓秘典 (7篇)
|
|
18
|
+
│ ├── architecture/ # 阵法秘典 (5篇)
|
|
19
|
+
│ ├── devops/ # 炼器秘典 (7篇)
|
|
20
|
+
│ ├── ai/ # 丹鼎秘典 (4篇)
|
|
21
|
+
│ ├── frontend-design/ # 美学秘典 (8篇)
|
|
22
|
+
│ ├── data-engineering/ # 数据工程 (合并版)
|
|
23
|
+
│ ├── infrastructure/ # 基础设施 (合并版)
|
|
24
|
+
│ ├── mobile/ # 移动开发 (合并版)
|
|
25
|
+
│ └── orchestration/ # 协同秘典
|
|
22
26
|
├── tools/ # 工具类秘典
|
|
23
27
|
│ ├── verify-module/
|
|
24
28
|
│ ├── verify-security/
|
|
@@ -27,8 +31,7 @@ skills/
|
|
|
27
31
|
│ └── gen-docs/
|
|
28
32
|
├── orchestration/ # 协同执行引擎
|
|
29
33
|
│ └── multi-agent/
|
|
30
|
-
├──
|
|
31
|
-
├── run_skill.py
|
|
34
|
+
├── run_skill.js
|
|
32
35
|
└── SKILL.md
|
|
33
36
|
```
|
|
34
37
|
|
|
@@ -74,12 +77,12 @@ skills/
|
|
|
74
77
|
|
|
75
78
|
| 秘典 | 触发词 | 化身 | 说明 |
|
|
76
79
|
|------|--------|------|------|
|
|
77
|
-
| `red-team` | 渗透、红队、攻击链、C2
|
|
80
|
+
| `red-team` | 渗透、红队、攻击链、C2、横向移动、供应链 | 🔥 赤焰 | 红队攻击技术(含供应链安全) |
|
|
78
81
|
| `pentest` | 渗透测试、Web安全、API安全、漏洞挖掘 | 🔥 赤焰 | 全栈渗透测试 |
|
|
79
82
|
| `code-audit` | 代码审计、安全审计、危险函数、污点分析 | 🔥 赤焰 | 代码安全审计 |
|
|
80
83
|
| `vuln-research` | 漏洞研究、二进制、逆向、Exploit | 🔥 赤焰 | 漏洞研究与利用 |
|
|
81
|
-
| `blue-team` | 蓝队、检测、SOC
|
|
82
|
-
| `threat-intel` | 威胁情报、OSINT
|
|
84
|
+
| `blue-team` | 蓝队、检测、SOC、应急响应、取证、密钥管理 | ❄ 玄冰 | 蓝队防御技术(含密钥管理) |
|
|
85
|
+
| `threat-intel` | 威胁情报、OSINT、威胁狩猎、威胁建模 | 👁 天眼 | 威胁情报分析(含威胁建模) |
|
|
83
86
|
|
|
84
87
|
---
|
|
85
88
|
|
|
@@ -102,12 +105,10 @@ skills/
|
|
|
102
105
|
| 秘典 | 触发词 | 说明 |
|
|
103
106
|
|------|--------|------|
|
|
104
107
|
| `api-design` | API设计、RESTful、GraphQL、OpenAPI | API 设计规范 |
|
|
105
|
-
| `security-arch` |
|
|
108
|
+
| `security-arch` | 安全架构、零信任、IAM、数据安全、合规、GDPR | 安全架构设计(含数据安全与合规) |
|
|
106
109
|
| `cloud-native` | 云原生、容器、Kubernetes、Serverless | 云原生架构 |
|
|
107
|
-
| `data-security` | 数据安全、加密、隐私、合规 | 数据安全架构 |
|
|
108
110
|
| `message-queue` | 消息队列、Kafka、RabbitMQ、事件驱动、CQRS | 消息队列架构 |
|
|
109
111
|
| `caching` | 缓存、Redis、CDN、缓存穿透、缓存雪崩 | 缓存策略设计 |
|
|
110
|
-
| `compliance` | 合规、GDPR、SOC2、审计、数据治理 | 合规审计 |
|
|
111
112
|
|
|
112
113
|
---
|
|
113
114
|
|
|
@@ -129,8 +130,10 @@ skills/
|
|
|
129
130
|
|
|
130
131
|
| 秘典 | 触发词 | 说明 |
|
|
131
132
|
|------|--------|------|
|
|
132
|
-
| `agent-dev` | Agent、LLM应用、RAG
|
|
133
|
+
| `agent-dev` | Agent、LLM应用、RAG | AI Agent 开发 |
|
|
133
134
|
| `llm-security` | LLM安全、提示注入、AI红队 | LLM 安全测试 |
|
|
135
|
+
| `rag-system` | RAG、检索增强、向量数据库 | RAG 系统设计 |
|
|
136
|
+
| `prompt-and-eval` | Prompt工程、模型评估、基准测试 | Prompt 工程与模型评估 |
|
|
134
137
|
|
|
135
138
|
---
|
|
136
139
|
|
|
@@ -141,6 +144,11 @@ skills/
|
|
|
141
144
|
| `ui-aesthetics` | UI美学、色彩、排版、间距、设计令牌、暗色模式 | UI 美学设计 |
|
|
142
145
|
| `component-patterns` | 组件模式、布局、响应式、动画、表单、卡片 | 组件设计模式 |
|
|
143
146
|
| `ux-principles` | UX原则、可用性、无障碍、用户流程、反馈 | UX 设计原则 |
|
|
147
|
+
| `frontend-engineering` | 构建工具、前端测试、性能优化、Vite、Webpack | 前端工程化 |
|
|
148
|
+
| `claymorphism` | Claymorphism、软陶、大圆角、双内阴影 | 软陶设计风格 |
|
|
149
|
+
| `glassmorphism` | Glassmorphism、毛玻璃、模糊、透明 | 毛玻璃设计风格 |
|
|
150
|
+
| `neubrutalism` | Neubrutalism、粗野、粗边框、高饱和 | 新粗野主义风格 |
|
|
151
|
+
| `liquid-glass` | Liquid Glass、Apple、半透明、深度感知 | Apple 液态玻璃风格 |
|
|
144
152
|
|
|
145
153
|
---
|
|
146
154
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ai
|
|
3
3
|
description: AI/LLM 能力索引。Agent 开发、LLM 安全、RAG 系统。当用户提到 AI、LLM、Agent、RAG、Prompt 时路由到此。
|
|
4
|
+
license: MIT
|
|
4
5
|
---
|
|
5
6
|
|
|
6
7
|
# 丹鼎秘典 · AI/LLM 能力中枢
|
|
@@ -12,8 +13,7 @@ description: AI/LLM 能力索引。Agent 开发、LLM 安全、RAG 系统。当
|
|
|
12
13
|
| [agent-dev](agent-dev.md) | Agent 开发 | 多 Agent 编排、工具调用、RAG |
|
|
13
14
|
| [llm-security](llm-security.md) | LLM 安全 | Prompt 注入、越狱防护、输出安全 |
|
|
14
15
|
| [rag-system](rag-system.md) | RAG 系统 | 向量数据库、检索策略、重排算法 |
|
|
15
|
-
| [prompt-
|
|
16
|
-
| [model-evaluation](model-evaluation.md) | 模型评估 | RAGAS、LLM-as-Judge、基准测试 |
|
|
16
|
+
| [prompt-and-eval](prompt-and-eval.md) | Prompt 工程与模型评估 | Few-shot、CoT、ReAct、RAGAS、LLM-as-Judge |
|
|
17
17
|
|
|
18
18
|
## AI 工程原则
|
|
19
19
|
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prompt-and-eval
|
|
3
|
+
description: Prompt 工程与模型评估。Prompt 模式(Zero-shot、Few-shot、CoT、ReAct、ToT)、模板设计、RAGAS、LLM-as-Judge、基准测试、A/B 测试、持续监控。当用户提到 Prompt 工程、Few-shot、CoT、模型评估、RAGAS、LLM-as-Judge、基准测试时使用。
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Prompt 工程与模型评估
|
|
7
|
+
|
|
8
|
+
## 一、Prompt 模式
|
|
9
|
+
|
|
10
|
+
### 模式对比
|
|
11
|
+
|
|
12
|
+
| 模式 | 复杂度 | 准确性 | Token 消耗 | 适用场景 |
|
|
13
|
+
|------|--------|--------|------------|----------|
|
|
14
|
+
| Zero-shot | 低 | 中 | 低 | 简单任务、通用问题 |
|
|
15
|
+
| Few-shot | 中 | 高 | 中 | 格式化输出、分类 |
|
|
16
|
+
| CoT | 中 | 高 | 中 | 推理、数学、逻辑 |
|
|
17
|
+
| Self-Consistency | 高 | 极高 | 高 | 关键决策 |
|
|
18
|
+
| ToT | 极高 | 极高 | 极高 | 复杂规划 |
|
|
19
|
+
| ReAct | 高 | 高 | 高 | 工具调用、Agent |
|
|
20
|
+
|
|
21
|
+
### Zero-shot
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# 关键:清晰指令 + 角色设定 + 输出格式
|
|
25
|
+
prompt = """
|
|
26
|
+
你是一位资深安全工程师。
|
|
27
|
+
任务: 将以下文本分类为正面、负面或中性。
|
|
28
|
+
输入: {text}
|
|
29
|
+
输出格式: JSON {"sentiment": "...", "confidence": 0.0-1.0}
|
|
30
|
+
"""
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Few-shot
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
# 关键:2-5 个高质量示例 + 语义相似度选择
|
|
37
|
+
prompt = """
|
|
38
|
+
将评论分类:
|
|
39
|
+
|
|
40
|
+
评论: 音质很棒,佩戴舒适。 → 正面
|
|
41
|
+
评论: 电池续航太差。 → 负面
|
|
42
|
+
评论: {new_review} →
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# 动态示例选择(LangChain)
|
|
46
|
+
selector = SemanticSimilarityExampleSelector.from_examples(
|
|
47
|
+
examples, OpenAIEmbeddings(), Chroma, k=2
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Chain-of-Thought (CoT)
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# Zero-shot CoT — 魔法咒语
|
|
55
|
+
prompt = f"问题: {question}\n\n让我们一步步思考:"
|
|
56
|
+
|
|
57
|
+
# Self-Consistency — 多路投票
|
|
58
|
+
answers = [extract_answer(llm.predict(prompt, temperature=0.7)) for _ in range(5)]
|
|
59
|
+
final = Counter(answers).most_common(1)[0][0]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### ReAct
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# Thought → Action → Observation 循环
|
|
66
|
+
prompt = """
|
|
67
|
+
工具: Search[query], Calculate[expr], Finish[answer]
|
|
68
|
+
|
|
69
|
+
Thought: 我需要查询埃菲尔铁塔高度
|
|
70
|
+
Action: Search[埃菲尔铁塔高度]
|
|
71
|
+
Observation: 330 米
|
|
72
|
+
Thought: 现在知道答案了
|
|
73
|
+
Action: Finish[330 米]
|
|
74
|
+
"""
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Tree-of-Thoughts (ToT)
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
# 生成多条思路 → 评估打分 → Beam Search 选最优 → 递归扩展
|
|
81
|
+
class TreeOfThoughts:
|
|
82
|
+
def solve(self, problem):
|
|
83
|
+
thoughts = self._generate(problem, n=3)
|
|
84
|
+
scored = self._evaluate(problem, thoughts)
|
|
85
|
+
best = sorted(scored, key=lambda x: x[1], reverse=True)[:self.beam_width]
|
|
86
|
+
# 递归深入最佳路径
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 二、Prompt 设计技巧
|
|
90
|
+
|
|
91
|
+
### 模板结构
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
messages = [
|
|
95
|
+
{"role": "system", "content": "角色 + 能力边界 + 输出约束"},
|
|
96
|
+
{"role": "user", "content": "### 指令\n{task}\n### 输入\n{input}\n### 输出格式\n{format}"},
|
|
97
|
+
]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 优化原则
|
|
101
|
+
|
|
102
|
+
| 原则 | 做 | 不做 |
|
|
103
|
+
|------|-----|------|
|
|
104
|
+
| 清晰性 | 具体、可执行、有约束 | 模糊指令 |
|
|
105
|
+
| 结构化 | 分隔符、编号、格式 | 大段文字 |
|
|
106
|
+
| 示例驱动 | 2-5 个高质量示例 | 无示例 |
|
|
107
|
+
| 分步指令 | 步骤 1/2/3 | 一句话包办 |
|
|
108
|
+
| 约束边界 | 说明要做和不做什么 | 无限制 |
|
|
109
|
+
|
|
110
|
+
### 高级技巧
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# 元提示 — 用 LLM 生成 Prompt
|
|
114
|
+
meta = "你是 Prompt 专家。为以下任务生成最优 Prompt: {task}"
|
|
115
|
+
|
|
116
|
+
# 自我批评 — 生成 → 批评 → 改进
|
|
117
|
+
answer = llm(question)
|
|
118
|
+
critique = llm(f"批评: {answer}")
|
|
119
|
+
improved = llm(f"基于批评改进: {critique}")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Prompt 模板速查
|
|
123
|
+
|
|
124
|
+
```yaml
|
|
125
|
+
代码生成: "生成 {lang} 代码: {desc}。要求: 最佳实践 + 注释 + 异常处理"
|
|
126
|
+
文本摘要: "总结为 {n} 字: {text}。保留关键信息,语言简洁"
|
|
127
|
+
数据提取: "从文本提取 {fields},输出 JSON: {text}"
|
|
128
|
+
NL2SQL: "将自然语言转 SQL: {query}。表结构: {schema}"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## 三、模型评估
|
|
132
|
+
|
|
133
|
+
### 评估维度
|
|
134
|
+
|
|
135
|
+
| 维度 | 指标 | 适用场景 |
|
|
136
|
+
|------|------|----------|
|
|
137
|
+
| 准确性 | Accuracy, F1, Precision, Recall | 分类、NER |
|
|
138
|
+
| 相关性 | Relevance, Context Precision | RAG、检索 |
|
|
139
|
+
| 忠实性 | Faithfulness, Hallucination Rate | 生成任务 |
|
|
140
|
+
| 效率 | Latency P95, Throughput, Cost/1K | 生产部署 |
|
|
141
|
+
|
|
142
|
+
### RAGAS 框架
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from ragas import evaluate
|
|
146
|
+
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
|
|
147
|
+
|
|
148
|
+
dataset = Dataset.from_dict({
|
|
149
|
+
"question": questions,
|
|
150
|
+
"answer": answers,
|
|
151
|
+
"contexts": contexts,
|
|
152
|
+
"ground_truth": ground_truths,
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
result = evaluate(dataset, metrics=[
|
|
156
|
+
faithfulness, # 答案是否基于上下文(0-1)
|
|
157
|
+
answer_relevancy, # 答案与问题相关度(0-1)
|
|
158
|
+
context_precision, # 检索上下文中相关信息比例(0-1)
|
|
159
|
+
context_recall, # 上下文是否包含所需全部信息(0-1)
|
|
160
|
+
])
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### LLM-as-Judge
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
class LLMJudge:
|
|
167
|
+
def evaluate(self, question, answer, criteria):
|
|
168
|
+
prompt = f"""
|
|
169
|
+
评估答案质量(1-5 分):
|
|
170
|
+
问题: {question}
|
|
171
|
+
答案: {answer}
|
|
172
|
+
标准: {criteria}
|
|
173
|
+
|
|
174
|
+
输出 JSON: {{"accuracy": N, "completeness": N, "clarity": N, "overall": N, "feedback": "..."}}
|
|
175
|
+
"""
|
|
176
|
+
return json.loads(self.llm.predict(prompt))
|
|
177
|
+
|
|
178
|
+
# 成对比较 + ELO 排名
|
|
179
|
+
def pairwise(q, a, b):
|
|
180
|
+
# 返回 {"winner": "A"|"B", "confidence": 0-1}
|
|
181
|
+
...
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### 基准测试速查
|
|
185
|
+
|
|
186
|
+
| 基准 | 评估能力 | 核心指标 |
|
|
187
|
+
|------|----------|----------|
|
|
188
|
+
| MMLU | 多任务语言理解 | Accuracy |
|
|
189
|
+
| HumanEval | 代码生成 | Pass@k |
|
|
190
|
+
| GSM8K | 数学推理 | Accuracy (CoT) |
|
|
191
|
+
| 自定义 | 业务场景 | 加权评分 + 延迟 |
|
|
192
|
+
|
|
193
|
+
### 检索指标
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
def evaluate_retrieval(retrieved, relevant, k=5):
|
|
197
|
+
precision_at_k = len(set(retrieved[:k]) & set(relevant)) / k
|
|
198
|
+
recall_at_k = len(set(retrieved[:k]) & set(relevant)) / len(relevant)
|
|
199
|
+
# MRR: 第一个相关文档的倒数排名
|
|
200
|
+
# NDCG: 归一化折损累积增益
|
|
201
|
+
return {"precision@k": precision_at_k, "recall@k": recall_at_k, "mrr": mrr, "ndcg": ndcg}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 生成指标
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
# ROUGE: 摘要质量(rouge-1, rouge-2, rouge-l)
|
|
208
|
+
# BLEU: 翻译质量
|
|
209
|
+
from rouge import Rouge
|
|
210
|
+
rouge_scores = Rouge().get_scores(predictions, references, avg=True)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## 四、A/B 测试与监控
|
|
214
|
+
|
|
215
|
+
### A/B 测试
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
class ABTest:
|
|
219
|
+
def __init__(self, variants): # [Variant(name, model, ratio)]
|
|
220
|
+
self.variants = variants
|
|
221
|
+
|
|
222
|
+
def get_variant(self, user_id):
|
|
223
|
+
# 一致性哈希分流
|
|
224
|
+
return self.variants[hash(user_id) % 100 < cumulative_ratio]
|
|
225
|
+
|
|
226
|
+
def check_significance(self, a_scores, b_scores, alpha=0.05):
|
|
227
|
+
t_stat, p_value = stats.ttest_ind(a_scores, b_scores)
|
|
228
|
+
cohens_d = (mean(a) - mean(b)) / pooled_std
|
|
229
|
+
return {"p_value": p_value, "significant": p_value < alpha, "effect": cohens_d}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### 持续监控
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
from prometheus_client import Counter, Histogram, Gauge
|
|
236
|
+
|
|
237
|
+
request_count = Counter('llm_requests_total', 'Total', ['model', 'status'])
|
|
238
|
+
latency = Histogram('llm_latency_seconds', 'Latency', ['model'])
|
|
239
|
+
quality = Gauge('llm_quality_score', 'Quality', ['model'])
|
|
240
|
+
|
|
241
|
+
# 异常检测: Z-score > 2.0 触发告警
|
|
242
|
+
class AnomalyDetector:
|
|
243
|
+
def check(self, value):
|
|
244
|
+
z = abs((value - mean(self.window)) / std(self.window))
|
|
245
|
+
return z > self.threshold
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## 五、Checklist
|
|
249
|
+
|
|
250
|
+
### Prompt 工程
|
|
251
|
+
|
|
252
|
+
- 清晰指令 + 角色设定 + 输出格式约束
|
|
253
|
+
- 复杂任务用 CoT / ReAct
|
|
254
|
+
- 关键决策用 Self-Consistency 多路投票
|
|
255
|
+
- 版本管理 Prompt,A/B 测试对比效果
|
|
256
|
+
- 迭代优化:测试 → 分析 → 改进
|
|
257
|
+
|
|
258
|
+
### 模型评估
|
|
259
|
+
|
|
260
|
+
- 多维度评估:准确性 + 相关性 + 忠实性 + 效率
|
|
261
|
+
- RAG 用 RAGAS 四指标
|
|
262
|
+
- 自动评估 LLM-as-Judge + 定期人工抽检
|
|
263
|
+
- 标准基准(MMLU/HumanEval)+ 业务自定义基准
|
|
264
|
+
- 上线前 A/B 测试,上线后持续监控 + 异常告警
|
|
265
|
+
- 反馈闭环:收集用户反馈持续改进
|
|
266
|
+
|
|
267
|
+
## 工具速查
|
|
268
|
+
|
|
269
|
+
| 工具 | 用途 |
|
|
270
|
+
|------|------|
|
|
271
|
+
| RAGAS | RAG 专用评估 |
|
|
272
|
+
| LangSmith | LLM 应用监控 |
|
|
273
|
+
| Phoenix | 可观测性平台 |
|
|
274
|
+
| LangChain | Prompt 模板管理 |
|
|
275
|
+
| Guidance | 结构化生成 |
|
|
276
|
+
| OpenAI Evals | 模型评估框架 |
|
|
277
|
+
| W&B | 实验追踪 |
|
|
278
|
+
|
|
279
|
+
---
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: architecture
|
|
3
3
|
description: 架构设计能力索引。API设计、安全架构、云原生、数据安全。当用户提到架构、设计、API、云原生时路由到此。
|
|
4
|
+
license: MIT
|
|
4
5
|
---
|
|
5
6
|
|
|
6
7
|
# 🏗 阵法秘典 · 架构设计能力中枢
|
|
@@ -11,12 +12,10 @@ description: 架构设计能力索引。API设计、安全架构、云原生、
|
|
|
11
12
|
| Skill | 定位 | 核心能力 |
|
|
12
13
|
|-------|------|----------|
|
|
13
14
|
| [api-design](api-design.md) | API 设计 | RESTful、GraphQL、OpenAPI |
|
|
14
|
-
| [security-arch](security-arch.md) | 安全架构 | 零信任、IAM
|
|
15
|
+
| [security-arch](security-arch.md) | 安全架构 | 零信任、IAM、威胁建模、数据安全、合规审计 |
|
|
15
16
|
| [cloud-native](cloud-native.md) | 云原生 | 容器、K8s、Serverless |
|
|
16
|
-
| [data-security](data-security.md) | 数据安全 | 加密、隐私、合规 |
|
|
17
17
|
| [message-queue](message-queue.md) | 消息队列 | Kafka、RabbitMQ、事件驱动 |
|
|
18
18
|
| [caching](caching.md) | 缓存策略 | Redis、CDN、缓存一致性 |
|
|
19
|
-
| [compliance](compliance.md) | 合规审计 | GDPR、SOC2、审计日志 |
|
|
20
19
|
|
|
21
20
|
## 架构原则
|
|
22
21
|
|
|
@@ -208,3 +208,90 @@ E - Elevation of Privilege (权限提升):
|
|
|
208
208
|
- [ ] WAF
|
|
209
209
|
```
|
|
210
210
|
|
|
211
|
+
## 数据安全
|
|
212
|
+
|
|
213
|
+
### 数据分类
|
|
214
|
+
| 级别 | 类型 | 保护措施 | 示例 |
|
|
215
|
+
|------|------|----------|------|
|
|
216
|
+
| 公开 | Public | 无特殊要求 | 产品文档 |
|
|
217
|
+
| 内部 | Internal | 访问控制 | 内部Wiki |
|
|
218
|
+
| 机密 | Confidential | 加密+审计 | 客户数据 |
|
|
219
|
+
| 受限 | Restricted | 加密+审计+MFA | 密钥、PII |
|
|
220
|
+
|
|
221
|
+
### 加密要求
|
|
222
|
+
```yaml
|
|
223
|
+
传输加密:
|
|
224
|
+
- TLS 1.2+(禁用 1.0/1.1)
|
|
225
|
+
- 推荐: TLS_AES_256_GCM_SHA384 / TLS_CHACHA20_POLY1305_SHA256
|
|
226
|
+
- HSTS + 证书管理
|
|
227
|
+
|
|
228
|
+
存储加密:
|
|
229
|
+
- AES-256-GCM(对称)+ 密钥与数据分离(KMS/Vault)+ 定期轮换
|
|
230
|
+
|
|
231
|
+
密码存储:
|
|
232
|
+
- bcrypt (rounds>=12) 或 argon2,禁止 MD5/SHA1
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### 隐私保护
|
|
236
|
+
```yaml
|
|
237
|
+
数据脱敏: 姓名(张**) / 手机(138****1234) / 邮箱(z***@x.com)
|
|
238
|
+
数据最小化: 只收集必要数据 / 限制保留期限 / 定期清理 / 匿名化
|
|
239
|
+
生命周期: 创建(分类)→存储(加密)→使用(审计)→共享(脱敏)→归档(压缩)→销毁(安全删除)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### 数据安全检查清单
|
|
243
|
+
```yaml
|
|
244
|
+
- [ ] 数据资产清单 + 敏感数据识别 + 数据流映射
|
|
245
|
+
- [ ] 传输加密 + 存储加密 + 访问控制 + 数据脱敏
|
|
246
|
+
- [ ] 访问日志 + 异常检测 + DLP
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## 合规审计
|
|
250
|
+
|
|
251
|
+
### 合规框架速查
|
|
252
|
+
| 框架 | 适用范围 | 核心要求 | 处罚 |
|
|
253
|
+
|------|----------|----------|------|
|
|
254
|
+
| GDPR | 欧盟用户数据 | 数据保护、用户权利 | 营收4%或2000万欧 |
|
|
255
|
+
| SOC 2 | SaaS/云服务 | 安全、可用、机密、隐私 | 失去客户信任 |
|
|
256
|
+
| HIPAA | 医疗健康数据 | PHI保护 | $50K-$1.5M/次 |
|
|
257
|
+
| PCI DSS | 支付卡数据 | 持卡人数据保护 | $5K-$100K/月 |
|
|
258
|
+
|
|
259
|
+
### GDPR 用户权利 (DSAR)
|
|
260
|
+
| 权利 | API | SLA |
|
|
261
|
+
|------|-----|-----|
|
|
262
|
+
| 访问权 | `GET /users/{id}/data-export` | 30天 |
|
|
263
|
+
| 删除权 | `DELETE /users/{id}/data` | 30天 |
|
|
264
|
+
| 可携带权 | `GET /users/{id}/data-export?format=json` | 30天 |
|
|
265
|
+
| 限制处理 | `POST /users/{id}/restrict` | 72小时 |
|
|
266
|
+
|
|
267
|
+
### SOC 2 关键控制
|
|
268
|
+
```yaml
|
|
269
|
+
访问控制: MFA强制 + RBAC + 最小权限 + 季度审查 + 离职即撤权
|
|
270
|
+
变更管理: PR审查 + 分环境部署 + 审批流程 + 回滚方案
|
|
271
|
+
监控告警: 安全事件监控 + 异常登录检测 + 数据访问审计
|
|
272
|
+
事件响应: IR计划文档化 + 定期演练 + 72小时通知 + 事后复盘
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### 审计日志要求
|
|
276
|
+
```yaml
|
|
277
|
+
必须审计: 登录/MFA/密码变更 | 权限/角色变更 | 敏感数据访问/导出/删除 | 配置/部署变更
|
|
278
|
+
存储: 不可篡改(WORM) + 加密 + 异地备份
|
|
279
|
+
保留: 安全事件>=1年 / 访问日志>=90天 / 变更>=3年 / 合规审计>=7年
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### 合规即代码 (OPA)
|
|
283
|
+
```rego
|
|
284
|
+
deny[msg] {
|
|
285
|
+
input.resource_type == "aws_s3_bucket"
|
|
286
|
+
input.resource.acl == "public-read"
|
|
287
|
+
msg := sprintf("S3 %s must not be public", [input.resource.name])
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### 合规检查清单
|
|
292
|
+
```yaml
|
|
293
|
+
GDPR: 隐私政策 + 同意管理 + DSAR(30天) + 加密 + 保留策略 + 泄露通知(72h)
|
|
294
|
+
SOC2: 访问控制+MFA + 变更管理 + IR计划 + 漏洞管理 + 安全培训
|
|
295
|
+
审计: 日志覆盖关键操作 + 不可篡改 + 保留期限合规
|
|
296
|
+
```
|
|
297
|
+
|