kc-beta 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/context.js +8 -4
- package/src/agent/engine.js +65 -9
- package/src/agent/pipelines/initializer.js +53 -8
- package/src/agent/session-state.js +1 -0
- package/src/agent/skill-loader.js +13 -1
- package/src/agent/tools/document-parse.js +104 -21
- package/src/agent/tools/document-search.js +24 -8
- package/src/agent/tools/sandbox-exec.js +16 -5
- package/src/agent/tools/workspace-file.js +47 -20
- package/src/agent/workspace.js +24 -1
- package/src/cli/components.js +8 -1
- package/src/cli/config.js +100 -6
- package/src/cli/index.js +14 -1
- package/src/cli/onboard.js +70 -1
- package/src/config.js +43 -3
- package/src/model-tiers.json +153 -0
- package/src/providers.js +63 -66
- package/template/AGENT.md +20 -0
- package/template/skills/en/meta/compliance-judgment/SKILL.md +10 -42
- package/template/skills/en/meta/document-chunking/SKILL.md +32 -0
- package/template/skills/en/meta/document-parsing/SKILL.md +11 -18
- package/template/skills/en/meta/entity-extraction/SKILL.md +13 -28
- package/template/skills/en/meta/tree-processing/SKILL.md +19 -1
- package/template/skills/en/meta-meta/auto-model-selection/SKILL.md +53 -0
- package/template/skills/en/meta-meta/pdf-review-dashboard/SKILL.md +57 -0
- package/template/skills/en/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +24 -1
- package/template/skills/en/meta-meta/skill-authoring/SKILL.md +6 -0
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +4 -0
- package/template/skills/zh/meta/compliance-judgment/SKILL.md +41 -262
- package/template/skills/zh/meta/document-chunking/SKILL.md +32 -0
- package/template/skills/zh/meta/document-parsing/SKILL.md +65 -132
- package/template/skills/zh/meta/entity-extraction/SKILL.md +68 -230
- package/template/skills/zh/meta/tree-processing/SKILL.md +82 -194
- package/template/skills/zh/meta-meta/auto-model-selection/SKILL.md +51 -0
- package/template/skills/zh/meta-meta/pdf-review-dashboard/SKILL.md +55 -0
- package/template/skills/zh/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +79 -164
- package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +64 -185
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +95 -216
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
|
|
3
|
+
|
|
4
|
+
"siliconflow": {
|
|
5
|
+
"conductor": "Pro/zai-org/GLM-5",
|
|
6
|
+
"llm": {
|
|
7
|
+
"tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
|
|
8
|
+
"tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
|
|
9
|
+
"tier3": "Qwen/Qwen3.5-122B-A10B",
|
|
10
|
+
"tier4": "Qwen/Qwen3.5-35B-A3B"
|
|
11
|
+
},
|
|
12
|
+
"vlm": {
|
|
13
|
+
"tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
|
|
14
|
+
"tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
|
|
15
|
+
"tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
|
|
19
|
+
"aliyun": {
|
|
20
|
+
"conductor": "qwen3.6-plus",
|
|
21
|
+
"llm": {
|
|
22
|
+
"tier1": "qwen3.6-plus",
|
|
23
|
+
"tier2": "",
|
|
24
|
+
"tier3": "",
|
|
25
|
+
"tier4": ""
|
|
26
|
+
},
|
|
27
|
+
"vlm": {
|
|
28
|
+
"tier1": "qwen-vl-max",
|
|
29
|
+
"tier2": "qwen-vl-plus",
|
|
30
|
+
"tier3": ""
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
|
|
34
|
+
"volcanocloud": {
|
|
35
|
+
"conductor": "doubao-seed-2-0-pro-260215",
|
|
36
|
+
"llm": {
|
|
37
|
+
"tier1": "doubao-seed-2-0-pro-260215, deepseek-v3-2-251201",
|
|
38
|
+
"tier2": "glm-4-7-251222, doubao-1-5-pro-32k-250115",
|
|
39
|
+
"tier3": "doubao-seed-2-0-mini-260215",
|
|
40
|
+
"tier4": "doubao-seed-2-0-lite-260215, doubao-1-5-lite-32k-250115"
|
|
41
|
+
},
|
|
42
|
+
"vlm": {
|
|
43
|
+
"tier1": "doubao-vision-pro-32k-241028",
|
|
44
|
+
"tier2": "doubao-vision-lite-32k-241028",
|
|
45
|
+
"tier3": ""
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
|
|
49
|
+
"anthropic": {
|
|
50
|
+
"conductor": "claude-sonnet-4-20250514",
|
|
51
|
+
"llm": {
|
|
52
|
+
"tier1": "claude-sonnet-4-20250514",
|
|
53
|
+
"tier2": "claude-sonnet-4-20250514",
|
|
54
|
+
"tier3": "claude-haiku-4-5-20251001",
|
|
55
|
+
"tier4": "claude-haiku-4-5-20251001"
|
|
56
|
+
},
|
|
57
|
+
"vlm": {
|
|
58
|
+
"tier1": "claude-sonnet-4-20250514",
|
|
59
|
+
"tier2": "claude-haiku-4-5-20251001",
|
|
60
|
+
"tier3": "claude-haiku-4-5-20251001"
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
"openai": {
|
|
65
|
+
"conductor": "gpt-4o",
|
|
66
|
+
"llm": {
|
|
67
|
+
"tier1": "gpt-4o",
|
|
68
|
+
"tier2": "gpt-4o-mini",
|
|
69
|
+
"tier3": "gpt-4o-mini",
|
|
70
|
+
"tier4": "gpt-4o-mini"
|
|
71
|
+
},
|
|
72
|
+
"vlm": {
|
|
73
|
+
"tier1": "gpt-4o",
|
|
74
|
+
"tier2": "gpt-4o-mini",
|
|
75
|
+
"tier3": "gpt-4o-mini"
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
"zhipu": {
|
|
80
|
+
"conductor": "glm-4-plus",
|
|
81
|
+
"llm": {
|
|
82
|
+
"tier1": "glm-4-plus",
|
|
83
|
+
"tier2": "glm-4-air",
|
|
84
|
+
"tier3": "glm-4-flash",
|
|
85
|
+
"tier4": "glm-4-flash"
|
|
86
|
+
},
|
|
87
|
+
"vlm": {
|
|
88
|
+
"tier1": "glm-4v-plus",
|
|
89
|
+
"tier2": "glm-4v",
|
|
90
|
+
"tier3": "glm-4v-flash"
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
"minimax": {
|
|
95
|
+
"conductor": "MiniMax-M2.5",
|
|
96
|
+
"llm": {
|
|
97
|
+
"tier1": "MiniMax-M2.5",
|
|
98
|
+
"tier2": "MiniMax-M2.5",
|
|
99
|
+
"tier3": "MiniMax-M1",
|
|
100
|
+
"tier4": "MiniMax-M1"
|
|
101
|
+
},
|
|
102
|
+
"vlm": {
|
|
103
|
+
"tier1": "",
|
|
104
|
+
"tier2": "",
|
|
105
|
+
"tier3": ""
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
"openrouter": {
|
|
110
|
+
"conductor": "anthropic/claude-sonnet-4-20250514",
|
|
111
|
+
"llm": {
|
|
112
|
+
"tier1": "anthropic/claude-sonnet-4-20250514",
|
|
113
|
+
"tier2": "google/gemini-2.5-flash",
|
|
114
|
+
"tier3": "google/gemini-2.5-flash",
|
|
115
|
+
"tier4": "google/gemini-2.5-flash"
|
|
116
|
+
},
|
|
117
|
+
"vlm": {
|
|
118
|
+
"tier1": "anthropic/claude-sonnet-4-20250514",
|
|
119
|
+
"tier2": "google/gemini-2.5-flash",
|
|
120
|
+
"tier3": "google/gemini-2.5-flash"
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
|
|
124
|
+
"bedrock": {
|
|
125
|
+
"conductor": "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
126
|
+
"llm": {
|
|
127
|
+
"tier1": "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
128
|
+
"tier2": "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
129
|
+
"tier3": "anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
130
|
+
"tier4": "anthropic.claude-haiku-4-5-20251001-v1:0"
|
|
131
|
+
},
|
|
132
|
+
"vlm": {
|
|
133
|
+
"tier1": "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
134
|
+
"tier2": "anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
135
|
+
"tier3": "anthropic.claude-haiku-4-5-20251001-v1:0"
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
|
|
139
|
+
"custom": {
|
|
140
|
+
"conductor": "",
|
|
141
|
+
"llm": {
|
|
142
|
+
"tier1": "",
|
|
143
|
+
"tier2": "",
|
|
144
|
+
"tier3": "",
|
|
145
|
+
"tier4": ""
|
|
146
|
+
},
|
|
147
|
+
"vlm": {
|
|
148
|
+
"tier1": "",
|
|
149
|
+
"tier2": "",
|
|
150
|
+
"tier3": ""
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
package/src/providers.js
CHANGED
|
@@ -2,9 +2,32 @@
|
|
|
2
2
|
* Provider registry for Multi-LLM support.
|
|
3
3
|
* Centralizes provider metadata, default models, and model classification.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
5
|
+
* Model tier assignments (LLM + VLM) are loaded from model-tiers.json
|
|
6
|
+
* so they can be updated without touching code.
|
|
6
7
|
*/
|
|
7
8
|
|
|
9
|
+
import { readFileSync } from "node:fs";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { dirname, join } from "node:path";
|
|
12
|
+
|
|
13
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
14
|
+
const __dirname = dirname(__filename);
|
|
15
|
+
|
|
16
|
+
/** @type {Record<string, {conductor: string, llm: Record<string,string>, vlm: Record<string,string>}>} */
|
|
17
|
+
let MODEL_TIERS;
|
|
18
|
+
try {
|
|
19
|
+
MODEL_TIERS = JSON.parse(
|
|
20
|
+
readFileSync(join(__dirname, "model-tiers.json"), "utf-8")
|
|
21
|
+
);
|
|
22
|
+
} catch {
|
|
23
|
+
MODEL_TIERS = {};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Helper: get tier config for a provider, with fallbacks */
|
|
27
|
+
function getTierConfig(providerId) {
|
|
28
|
+
return MODEL_TIERS[providerId] || { conductor: "", llm: {}, vlm: {} };
|
|
29
|
+
}
|
|
30
|
+
|
|
8
31
|
const PROVIDERS = [
|
|
9
32
|
{
|
|
10
33
|
id: "siliconflow",
|
|
@@ -13,13 +36,9 @@ const PROVIDERS = [
|
|
|
13
36
|
authType: "bearer",
|
|
14
37
|
apiFormat: "openai",
|
|
15
38
|
modelsEndpoint: "/models",
|
|
16
|
-
defaultModel: "glm-5",
|
|
17
|
-
defaultTiers:
|
|
18
|
-
|
|
19
|
-
tier2: "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
|
|
20
|
-
tier3: "Qwen/Qwen3.5-122B-A10B",
|
|
21
|
-
tier4: "Qwen/Qwen3.5-35B-A3B",
|
|
22
|
-
},
|
|
39
|
+
defaultModel: getTierConfig("siliconflow").conductor || "glm-5",
|
|
40
|
+
defaultTiers: getTierConfig("siliconflow").llm,
|
|
41
|
+
defaultVlm: getTierConfig("siliconflow").vlm,
|
|
23
42
|
labels: {
|
|
24
43
|
en: "SiliconFlow (recommended for China)",
|
|
25
44
|
zh: "SiliconFlow(国内推荐)",
|
|
@@ -35,13 +54,9 @@ const PROVIDERS = [
|
|
|
35
54
|
apiFormat: "openai",
|
|
36
55
|
modelsEndpoint: null, // Aliyun coding plan doesn't support /models
|
|
37
56
|
supportsCodingPlanKey: true,
|
|
38
|
-
defaultModel: "
|
|
39
|
-
defaultTiers:
|
|
40
|
-
|
|
41
|
-
tier2: "",
|
|
42
|
-
tier3: "",
|
|
43
|
-
tier4: "",
|
|
44
|
-
},
|
|
57
|
+
defaultModel: getTierConfig("aliyun").conductor || "qwen3.6-plus",
|
|
58
|
+
defaultTiers: getTierConfig("aliyun").llm,
|
|
59
|
+
defaultVlm: getTierConfig("aliyun").vlm,
|
|
45
60
|
// Curated model list (coding plan doesn't have /models endpoint)
|
|
46
61
|
curatedModels: [
|
|
47
62
|
{ id: "qwen3.6-plus", ownedBy: "qwen" },
|
|
@@ -66,13 +81,9 @@ const PROVIDERS = [
|
|
|
66
81
|
authType: "bearer",
|
|
67
82
|
apiFormat: "openai",
|
|
68
83
|
modelsEndpoint: null, // VolcanoCloud coding plan — use curated list
|
|
69
|
-
defaultModel: "
|
|
70
|
-
defaultTiers:
|
|
71
|
-
|
|
72
|
-
tier2: "glm-4-7-251222, doubao-1-5-pro-32k-250115",
|
|
73
|
-
tier3: "doubao-seed-2-0-mini-260215",
|
|
74
|
-
tier4: "doubao-seed-2-0-lite-260215, doubao-1-5-lite-32k-250115",
|
|
75
|
-
},
|
|
84
|
+
defaultModel: getTierConfig("volcanocloud").conductor || "doubao-seed-2-0-pro-260215",
|
|
85
|
+
defaultTiers: getTierConfig("volcanocloud").llm,
|
|
86
|
+
defaultVlm: getTierConfig("volcanocloud").vlm,
|
|
76
87
|
curatedModels: [
|
|
77
88
|
{ id: "doubao-seed-2-0-pro-260215", ownedBy: "bytedance" },
|
|
78
89
|
{ id: "deepseek-v3-2-251201", ownedBy: "deepseek" },
|
|
@@ -94,13 +105,9 @@ const PROVIDERS = [
|
|
|
94
105
|
authType: "x-api-key",
|
|
95
106
|
apiFormat: "anthropic",
|
|
96
107
|
modelsEndpoint: null, // Use curated list
|
|
97
|
-
defaultModel: "claude-sonnet-4-20250514",
|
|
98
|
-
defaultTiers:
|
|
99
|
-
|
|
100
|
-
tier2: "claude-sonnet-4-20250514",
|
|
101
|
-
tier3: "claude-haiku-4-5-20251001",
|
|
102
|
-
tier4: "claude-haiku-4-5-20251001",
|
|
103
|
-
},
|
|
108
|
+
defaultModel: getTierConfig("anthropic").conductor || "claude-sonnet-4-20250514",
|
|
109
|
+
defaultTiers: getTierConfig("anthropic").llm,
|
|
110
|
+
defaultVlm: getTierConfig("anthropic").vlm,
|
|
104
111
|
curatedModels: [
|
|
105
112
|
{ id: "claude-opus-4-20250514", ownedBy: "anthropic" },
|
|
106
113
|
{ id: "claude-sonnet-4-20250514", ownedBy: "anthropic" },
|
|
@@ -118,13 +125,9 @@ const PROVIDERS = [
|
|
|
118
125
|
authType: "bearer",
|
|
119
126
|
apiFormat: "openai",
|
|
120
127
|
modelsEndpoint: "/models",
|
|
121
|
-
defaultModel: "gpt-4o",
|
|
122
|
-
defaultTiers:
|
|
123
|
-
|
|
124
|
-
tier2: "gpt-4o-mini",
|
|
125
|
-
tier3: "gpt-4o-mini",
|
|
126
|
-
tier4: "gpt-4o-mini",
|
|
127
|
-
},
|
|
128
|
+
defaultModel: getTierConfig("openai").conductor || "gpt-4o",
|
|
129
|
+
defaultTiers: getTierConfig("openai").llm,
|
|
130
|
+
defaultVlm: getTierConfig("openai").vlm,
|
|
128
131
|
labels: {
|
|
129
132
|
en: "OpenAI",
|
|
130
133
|
zh: "OpenAI",
|
|
@@ -137,13 +140,9 @@ const PROVIDERS = [
|
|
|
137
140
|
authType: "bearer",
|
|
138
141
|
apiFormat: "openai",
|
|
139
142
|
modelsEndpoint: "/models",
|
|
140
|
-
defaultModel: "glm-4-plus",
|
|
141
|
-
defaultTiers:
|
|
142
|
-
|
|
143
|
-
tier2: "glm-4-air",
|
|
144
|
-
tier3: "glm-4-flash",
|
|
145
|
-
tier4: "glm-4-flash",
|
|
146
|
-
},
|
|
143
|
+
defaultModel: getTierConfig("zhipu").conductor || "glm-4-plus",
|
|
144
|
+
defaultTiers: getTierConfig("zhipu").llm,
|
|
145
|
+
defaultVlm: getTierConfig("zhipu").vlm,
|
|
147
146
|
labels: {
|
|
148
147
|
en: "Zhipu GLM",
|
|
149
148
|
zh: "智谱 GLM",
|
|
@@ -156,13 +155,9 @@ const PROVIDERS = [
|
|
|
156
155
|
authType: "bearer",
|
|
157
156
|
apiFormat: "openai",
|
|
158
157
|
modelsEndpoint: "/models",
|
|
159
|
-
defaultModel: "MiniMax-M2.5",
|
|
160
|
-
defaultTiers:
|
|
161
|
-
|
|
162
|
-
tier2: "MiniMax-M2.5",
|
|
163
|
-
tier3: "MiniMax-M1",
|
|
164
|
-
tier4: "MiniMax-M1",
|
|
165
|
-
},
|
|
158
|
+
defaultModel: getTierConfig("minimax").conductor || "MiniMax-M2.5",
|
|
159
|
+
defaultTiers: getTierConfig("minimax").llm,
|
|
160
|
+
defaultVlm: getTierConfig("minimax").vlm,
|
|
166
161
|
labels: {
|
|
167
162
|
en: "MiniMax",
|
|
168
163
|
zh: "MiniMax",
|
|
@@ -175,13 +170,9 @@ const PROVIDERS = [
|
|
|
175
170
|
authType: "bearer",
|
|
176
171
|
apiFormat: "openai",
|
|
177
172
|
modelsEndpoint: "/models",
|
|
178
|
-
defaultModel: "anthropic/claude-sonnet-4-20250514",
|
|
179
|
-
defaultTiers:
|
|
180
|
-
|
|
181
|
-
tier2: "google/gemini-2.5-flash",
|
|
182
|
-
tier3: "google/gemini-2.5-flash",
|
|
183
|
-
tier4: "google/gemini-2.5-flash",
|
|
184
|
-
},
|
|
173
|
+
defaultModel: getTierConfig("openrouter").conductor || "anthropic/claude-sonnet-4-20250514",
|
|
174
|
+
defaultTiers: getTierConfig("openrouter").llm,
|
|
175
|
+
defaultVlm: getTierConfig("openrouter").vlm,
|
|
185
176
|
labels: {
|
|
186
177
|
en: "OpenRouter",
|
|
187
178
|
zh: "OpenRouter",
|
|
@@ -194,13 +185,9 @@ const PROVIDERS = [
|
|
|
194
185
|
authType: "aws-sigv4",
|
|
195
186
|
apiFormat: "anthropic",
|
|
196
187
|
modelsEndpoint: null,
|
|
197
|
-
defaultModel: "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
198
|
-
defaultTiers:
|
|
199
|
-
|
|
200
|
-
tier2: "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
201
|
-
tier3: "anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
202
|
-
tier4: "anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
203
|
-
},
|
|
188
|
+
defaultModel: getTierConfig("bedrock").conductor || "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
189
|
+
defaultTiers: getTierConfig("bedrock").llm,
|
|
190
|
+
defaultVlm: getTierConfig("bedrock").vlm,
|
|
204
191
|
labels: {
|
|
205
192
|
en: "AWS Bedrock (not yet supported)",
|
|
206
193
|
zh: "AWS Bedrock(暂未支持)",
|
|
@@ -213,8 +200,9 @@ const PROVIDERS = [
|
|
|
213
200
|
authType: "bearer",
|
|
214
201
|
apiFormat: "openai",
|
|
215
202
|
modelsEndpoint: "/models",
|
|
216
|
-
defaultModel: "",
|
|
217
|
-
defaultTiers:
|
|
203
|
+
defaultModel: getTierConfig("custom").conductor || "",
|
|
204
|
+
defaultTiers: getTierConfig("custom").llm,
|
|
205
|
+
defaultVlm: getTierConfig("custom").vlm,
|
|
218
206
|
labels: {
|
|
219
207
|
en: "Custom (enter base URL)",
|
|
220
208
|
zh: "自定义(输入接口地址)",
|
|
@@ -368,3 +356,12 @@ export function getCuratedModels(providerId) {
|
|
|
368
356
|
const provider = getProviderById(providerId);
|
|
369
357
|
return provider?.curatedModels || null;
|
|
370
358
|
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Get the raw model tier config for a provider (from model-tiers.json).
|
|
362
|
+
* @param {string} providerId
|
|
363
|
+
* @returns {{ conductor: string, llm: Record<string,string>, vlm: Record<string,string> }}
|
|
364
|
+
*/
|
|
365
|
+
export function getModelTierConfig(providerId) {
|
|
366
|
+
return getTierConfig(providerId);
|
|
367
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# AGENT.md — Project Context
|
|
2
|
+
|
|
3
|
+
This file is your per-project memory. Update it as you learn about the project.
|
|
4
|
+
The content here is injected into your system prompt on every turn.
|
|
5
|
+
|
|
6
|
+
## Project
|
|
7
|
+
|
|
8
|
+
<!-- What domain? What regulations? What documents? Fill this in during bootstrap. -->
|
|
9
|
+
|
|
10
|
+
## Decisions
|
|
11
|
+
|
|
12
|
+
<!-- Key decisions made with the developer user. Rule granularity, accuracy targets, model choices, scope boundaries. -->
|
|
13
|
+
|
|
14
|
+
## Domain Notes
|
|
15
|
+
|
|
16
|
+
<!-- Terminology, document formats, naming conventions, edge cases specific to this domain. -->
|
|
17
|
+
|
|
18
|
+
## User Preferences
|
|
19
|
+
|
|
20
|
+
<!-- How the developer user prefers to communicate. Reporting format, language, level of detail. -->
|
|
@@ -9,53 +9,17 @@ Judgment is the moment of truth. You have the extracted entity. You have the rul
|
|
|
9
9
|
|
|
10
10
|
## The Judgment Spectrum
|
|
11
11
|
|
|
12
|
-
Rules
|
|
12
|
+
Rules range from trivially deterministic to deeply semantic. Pick the right tool for each rule.
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
**Deterministic** — threshold checks, format validation, date arithmetic, cross-field consistency. Pure Python: free, instant, deterministic.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
**Semantic** — adequacy, completeness, consistency, compliance with templates, detecting misleading or suggestive language, assessing whether a description is fair and balanced. These require language understanding — use worker LLM.
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
```python
|
|
20
|
-
result = "pass" if extracted_ratio >= 8.0 else "fail"
|
|
21
|
-
```
|
|
22
|
-
- **Format validation**: "The loan number must match pattern XX-YYYY-ZZZZZZ."
|
|
23
|
-
```python
|
|
24
|
-
result = "pass" if re.match(r"[A-Z]{2}-\d{4}-\d{6}", loan_number) else "fail"
|
|
25
|
-
```
|
|
26
|
-
- **Date arithmetic**: "The contract must be signed within 30 days of application."
|
|
27
|
-
```python
|
|
28
|
-
result = "pass" if (sign_date - app_date).days <= 30 else "fail"
|
|
29
|
-
```
|
|
30
|
-
- **Cross-field consistency**: "The total must equal the sum of line items."
|
|
31
|
-
```python
|
|
32
|
-
result = "pass" if abs(total - sum(items)) < 0.01 else "fail"
|
|
33
|
-
```
|
|
18
|
+
Many real compliance rules require semantic judgment. "The risk disclosure must adequately describe the key risks" cannot be checked with regex or Python. "The contract description must not be misleading or suggestive" requires deep language understanding. Use worker LLM for these without hesitation.
|
|
34
19
|
|
|
35
|
-
|
|
20
|
+
Some rules combine both: extract a number (deterministic), compare to threshold (deterministic), then assess the explanation if borderline (semantic). The mix depends on the rule.
|
|
36
21
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
Rules requiring language understanding:
|
|
40
|
-
|
|
41
|
-
- **Adequacy**: "The risk disclosure must adequately describe the key risks."
|
|
42
|
-
- **Completeness**: "The management discussion must address financial performance, strategic outlook, and market conditions."
|
|
43
|
-
- **Consistency**: "The executive summary must be consistent with the detailed findings."
|
|
44
|
-
- **Compliance with template**: "The report must follow the format specified in Regulation Appendix A."
|
|
45
|
-
|
|
46
|
-
For these, design an LLM prompt:
|
|
47
|
-
1. Provide the rule text (what constitutes compliance).
|
|
48
|
-
2. Provide the extracted content (what the document says).
|
|
49
|
-
3. Ask for a structured verdict: pass/fail, reasoning, and comment.
|
|
50
|
-
4. Ask the model to be conservative — flag as fail only when clearly non-compliant. When truly ambiguous, use a "partial" or "uncertain" result rather than a hard fail.
|
|
51
|
-
|
|
52
|
-
### Hybrid Judgments (Most Common)
|
|
53
|
-
|
|
54
|
-
Most rules combine deterministic and semantic elements:
|
|
55
|
-
- Extract the number (regex) → compare to threshold (Python) → if borderline, assess the explanation (LLM).
|
|
56
|
-
- Check that a section exists (deterministic) → check that it covers required topics (semantic).
|
|
57
|
-
|
|
58
|
-
Design the pipeline to run cheap steps first. Only invoke the LLM when the deterministic check is insufficient.
|
|
22
|
+
The right method is whatever achieves accuracy at lowest cost. Simple threshold checks don't need LLM. Semantic assessments don't benefit from Python. Most projects will have a mix — let the nature of each rule determine the method.
|
|
59
23
|
|
|
60
24
|
## Output Format
|
|
61
25
|
|
|
@@ -80,6 +44,10 @@ For each rule × document combination:
|
|
|
80
44
|
- **error**: Something went wrong during extraction or judgment (parsing failure, API error). Needs investigation.
|
|
81
45
|
- **uncertain**: The judgment is ambiguous. May need human review.
|
|
82
46
|
|
|
47
|
+
**Design exit criteria first:** Before writing judgment logic for a rule, define the exit conditions: what constitutes pass, what constitutes fail, what triggers escalation to human, how to handle empty/missing values, what value ranges are valid. Explicit exit criteria prevent ambiguous or inconsistent judgment.
|
|
48
|
+
|
|
49
|
+
**Prompt design:** Design prompts for what you want, not against what you don't want. "Don't include reasoning" is less reliable than extracting the verdict from structured output in postprocessing. Use output filtering instead of prompt negation.
|
|
50
|
+
|
|
83
51
|
**Comments:**
|
|
84
52
|
- Required only when result is `fail`. Skip for `pass` unless the developer user specifically requests pass comments.
|
|
85
53
|
- Be concise and factual: "Capital adequacy ratio is 7.2%, below the regulatory minimum of 8.0%."
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: document-chunking
|
|
3
|
+
description: >
|
|
4
|
+
Fast, cheap chunking for processing batches of sample and input documents.
|
|
5
|
+
Use when you need to split documents into manageable pieces for initial observation,
|
|
6
|
+
data sensibility checks, or feeding to extraction workflows. Not for production
|
|
7
|
+
verification chunking — for that, use tree-processing to design a tailored chunking script.
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Document Chunking
|
|
11
|
+
|
|
12
|
+
Split documents into pieces for downstream processing. This is the fast, cheap version — for batch processing of samples and inputs, not for precision verification workflows.
|
|
13
|
+
|
|
14
|
+
## Methods
|
|
15
|
+
|
|
16
|
+
**Page-level splits** — simplest. Each page is a chunk. Works for most document processing where you need to iterate over content.
|
|
17
|
+
|
|
18
|
+
**Fixed-size chunks** — split by character/token count with overlap. Good for search and initial observation. Typical: 2000-4000 chars with 200 char overlap.
|
|
19
|
+
|
|
20
|
+
**Header-based splits** — detect section headers and split at boundaries. Preserves semantic units. Use regex patterns for the document's header convention.
|
|
21
|
+
|
|
22
|
+
## When to Use What
|
|
23
|
+
|
|
24
|
+
Pick the simplest method that serves the task:
|
|
25
|
+
- Batch document observation → page-level
|
|
26
|
+
- Full-text search index → fixed-size with overlap
|
|
27
|
+
- Section-level extraction → header-based
|
|
28
|
+
- Table of contents available → parse TOC for structure
|
|
29
|
+
|
|
30
|
+
## Relationship to tree-processing
|
|
31
|
+
|
|
32
|
+
This skill is for quick, cheap chunking during exploration and batch processing. When you need production-grade chunking for verification workflows — where the chunking mechanism must be precise, consistent, and coded as a script — use `tree-processing` instead.
|
|
@@ -12,28 +12,21 @@ Parsing is the foundation. If the text is wrong, everything downstream is wrong.
|
|
|
12
12
|
Start with the simplest parser. Escalate only when necessary. This is not about saving money — it is about producing the most reliable output. Simple parsers have fewer failure modes.
|
|
13
13
|
|
|
14
14
|
### Level 1: Direct Text Extraction
|
|
15
|
-
- Tool:
|
|
15
|
+
- Tool: pdfjs-dist or similar PDF text extraction.
|
|
16
16
|
- When: Well-formed digital PDFs with embedded text. This covers most modern business documents.
|
|
17
17
|
- Output: Raw text with basic structure preserved (paragraphs, basic formatting).
|
|
18
18
|
- Limitations: Tables may come out as messy text. Charts and images are invisible. Scanned PDFs produce nothing.
|
|
19
19
|
|
|
20
|
-
### Level 2:
|
|
21
|
-
- Tool:
|
|
22
|
-
- When: Level 1 produces
|
|
23
|
-
- Output:
|
|
24
|
-
-
|
|
25
|
-
|
|
26
|
-
### Level 3:
|
|
27
|
-
- Tool:
|
|
28
|
-
- When:
|
|
29
|
-
-
|
|
30
|
-
- Limitations: Slower, costs API calls, may introduce OCR errors.
|
|
31
|
-
|
|
32
|
-
### Level 4: Vision Model Interpretation
|
|
33
|
-
- Tool: High-capability vision models (OCR_MODEL_TIER1).
|
|
34
|
-
- When: Complex tables that text extraction cannot parse correctly, charts that need data point extraction, mixed text-and-image layouts.
|
|
35
|
-
- Output: Structured interpretation of visual content (table as markdown, chart data as JSON).
|
|
36
|
-
- Limitations: Expensive, slow. Reserve for when the visual content genuinely needs interpretation.
|
|
20
|
+
### Level 2: Provider VLM (Vision Language Model)
|
|
21
|
+
- Tool: VLM models from configured provider (VLM_TIER3 for cheap OCR, VLM_TIER1 for complex interpretation).
|
|
22
|
+
- When: Level 1 produces garbled/incomplete text, scanned PDFs, image-based PDFs.
|
|
23
|
+
- Output: Recognized text from page images, or structured interpretation (table as markdown, chart data as JSON).
|
|
24
|
+
- Calling a provider VLM is more convenient and reliable than deploying local OCR. Use the cheapest VLM tier first; escalate to a more capable tier for complex tables/charts.
|
|
25
|
+
|
|
26
|
+
### Level 3: MineRU API or Local Tools (Optional)
|
|
27
|
+
- Tool: MineRU API, pdfplumber, or locally deployed OCR — if configured.
|
|
28
|
+
- When: Provider VLM is unavailable or too expensive for batch processing.
|
|
29
|
+
- These are optional fallbacks. Most users will use Level 1 + Level 2.
|
|
37
30
|
|
|
38
31
|
## Quality Detection
|
|
39
32
|
|
|
@@ -33,40 +33,21 @@ The value could be anywhere, or the rule applies to the document as a whole.
|
|
|
33
33
|
|
|
34
34
|
## Method Selection
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
Extraction method selection is a cost-accuracy search. The goal is finding the cheapest method that meets the accuracy threshold. Regex is the smallest, cheapest "model" — zero cost, instant, deterministic. Worker LLM is more capable but costs tokens and time. Any search strategy is valid: try the cheapest first and escalate, try the most capable first and downgrade, bisect, or jump directly to a known-good method based on past experience in AGENT.md.
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
### Available Methods
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
- **Percentages**: `\d+\.?\d*\s*%`
|
|
43
|
-
- **Identifiers**: Loan numbers, registration codes, ID numbers — usually fixed formats.
|
|
44
|
-
- **Specific phrases**: "I hereby agree" or "本人同意" — exact string matching.
|
|
40
|
+
**Regex / Python** — Cost: zero. Speed: instant. Deterministic.
|
|
41
|
+
Works well for: dates, monetary amounts, percentages, identifiers, fixed phrases, any value with a predictable format.
|
|
45
42
|
|
|
46
|
-
|
|
43
|
+
**Worker LLM** — Cost: API tokens. Speed: seconds. Semantic understanding.
|
|
44
|
+
Works well for: contextual interpretation, conditional values, semantic matching, ambiguous structures, suggestive or misleading language detection, table interpretation, anything requiring understanding rather than pattern matching.
|
|
47
45
|
|
|
48
|
-
|
|
46
|
+
Many real verification tasks require semantic understanding — "is this description misleading?", "does this clause adequately disclose risk?", "is this guarantor's business description consistent with their stated industry?" — regex cannot handle these. Use worker LLM without hesitation for such tasks.
|
|
49
47
|
|
|
50
|
-
|
|
48
|
+
### The Search
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
- **Conditional values**: "the interest rate, including any adjustments" — requires understanding what constitutes an adjustment.
|
|
54
|
-
- **Semantic matching**: "adequate risk disclosure" — requires judgment about what text constitutes risk disclosure.
|
|
55
|
-
- **Table interpretation**: When a table's structure is not uniform and regex cannot reliably extract cells.
|
|
56
|
-
|
|
57
|
-
Design the LLM prompt to:
|
|
58
|
-
1. Include the narrowed context (from tree processing).
|
|
59
|
-
2. Specify exactly what to extract.
|
|
60
|
-
3. Define the output format (JSON with named fields).
|
|
61
|
-
4. Provide one example if the extraction is non-obvious.
|
|
62
|
-
|
|
63
|
-
### Hybrid Approach
|
|
64
|
-
|
|
65
|
-
Often the best strategy:
|
|
66
|
-
1. Use regex to extract candidates (fast, catches obvious matches).
|
|
67
|
-
2. If regex finds a confident match, use it.
|
|
68
|
-
3. If regex fails or is uncertain, fall back to LLM extraction.
|
|
69
|
-
4. Use LLM to validate regex results when confidence matters.
|
|
50
|
+
If a method's results fall below the accuracy threshold, try a different method or a more capable model. If regex works and meets accuracy — keep it, it's free. If regex produces results below threshold, escalate to worker LLM. If a cheap worker LLM isn't accurate enough, try a more capable tier. Record what works for each extraction type in AGENT.md for future reference.
|
|
70
51
|
|
|
71
52
|
## Schema Design
|
|
72
53
|
|
|
@@ -118,6 +99,10 @@ Every extraction should carry a confidence estimate:
|
|
|
118
99
|
|
|
119
100
|
These are starting points. Calibrate based on actual accuracy (see `confidence-system`).
|
|
120
101
|
|
|
102
|
+
## Prompt Design: Ask For What You Want
|
|
103
|
+
|
|
104
|
+
Design prompts for what you want, not against what you don't want. "Don't include explanations" in a prompt is less reliable than stripping non-JSON text from the output in postprocessing. If you need to tell the LLM not to do something, use output filtering instead of prompt negation.
|
|
105
|
+
|
|
121
106
|
## Fitting Worker LLM Context
|
|
122
107
|
|
|
123
108
|
When designing extraction for worker LLM workflows:
|
|
@@ -1,12 +1,30 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: tree-processing
|
|
3
|
-
description:
|
|
3
|
+
description: >
|
|
4
|
+
Design production-grade document chunking mechanisms for verification workflows. Use when
|
|
5
|
+
building the chunking step of a workflow that will run repeatedly on many documents.
|
|
6
|
+
The approach: observe sample documents, find structural patterns, write a chunking script
|
|
7
|
+
in code, that script runs in production. Also use for navigating large documents via
|
|
8
|
+
hierarchical structure when a rule targets a specific section.
|
|
9
|
+
For quick, cheap batch chunking during exploration, use document-chunking instead.
|
|
4
10
|
---
|
|
5
11
|
|
|
6
12
|
# Tree Processing
|
|
7
13
|
|
|
8
14
|
Most verification rules do not need the entire document. They need a specific section, a specific table, a specific disclosure. The tree is your map for navigating large documents efficiently.
|
|
9
15
|
|
|
16
|
+
## Production Chunking Methodology
|
|
17
|
+
|
|
18
|
+
For verification workflows that process many documents, the chunking mechanism must be precise, consistent, and fast. The approach:
|
|
19
|
+
|
|
20
|
+
1. **Observe**: Read 3-5 sample documents. Note their structure — headers, numbering, section patterns.
|
|
21
|
+
2. **Find patterns**: Identify what's consistent (header format, numbering convention, TOC structure).
|
|
22
|
+
3. **Write code**: Design a chunking script (regex-based splitter, header detector, TOC parser) that captures the pattern.
|
|
23
|
+
4. **Test**: Run the script on samples. Verify it produces correct, consistent chunks.
|
|
24
|
+
5. **Deploy**: The script runs in production workflows. It's deterministic, free, and fast.
|
|
25
|
+
|
|
26
|
+
This is different from `document-chunking` (quick, cheap splits for exploration). Production chunking is a one-time design effort that pays off across all documents of the same type.
|
|
27
|
+
|
|
10
28
|
## Why Trees
|
|
11
29
|
|
|
12
30
|
Two reasons:
|