kc-beta 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/package.json +1 -1
  2. package/src/agent/context.js +8 -4
  3. package/src/agent/engine.js +65 -9
  4. package/src/agent/pipelines/initializer.js +53 -8
  5. package/src/agent/session-state.js +1 -0
  6. package/src/agent/skill-loader.js +13 -1
  7. package/src/agent/tools/document-parse.js +104 -21
  8. package/src/agent/tools/document-search.js +24 -8
  9. package/src/agent/tools/sandbox-exec.js +16 -5
  10. package/src/agent/tools/workspace-file.js +47 -20
  11. package/src/agent/workspace.js +24 -1
  12. package/src/cli/components.js +8 -1
  13. package/src/cli/config.js +100 -6
  14. package/src/cli/index.js +14 -1
  15. package/src/cli/onboard.js +70 -1
  16. package/src/config.js +43 -3
  17. package/src/model-tiers.json +153 -0
  18. package/src/providers.js +63 -66
  19. package/template/AGENT.md +20 -0
  20. package/template/skills/en/meta/compliance-judgment/SKILL.md +10 -42
  21. package/template/skills/en/meta/document-chunking/SKILL.md +32 -0
  22. package/template/skills/en/meta/document-parsing/SKILL.md +11 -18
  23. package/template/skills/en/meta/entity-extraction/SKILL.md +13 -28
  24. package/template/skills/en/meta/tree-processing/SKILL.md +19 -1
  25. package/template/skills/en/meta-meta/auto-model-selection/SKILL.md +53 -0
  26. package/template/skills/en/meta-meta/pdf-review-dashboard/SKILL.md +57 -0
  27. package/template/skills/en/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
  28. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +24 -1
  29. package/template/skills/en/meta-meta/skill-authoring/SKILL.md +6 -0
  30. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +4 -0
  31. package/template/skills/zh/meta/compliance-judgment/SKILL.md +41 -262
  32. package/template/skills/zh/meta/document-chunking/SKILL.md +32 -0
  33. package/template/skills/zh/meta/document-parsing/SKILL.md +65 -132
  34. package/template/skills/zh/meta/entity-extraction/SKILL.md +68 -230
  35. package/template/skills/zh/meta/tree-processing/SKILL.md +82 -194
  36. package/template/skills/zh/meta-meta/auto-model-selection/SKILL.md +51 -0
  37. package/template/skills/zh/meta-meta/pdf-review-dashboard/SKILL.md +55 -0
  38. package/template/skills/zh/meta-meta/pdf-review-dashboard/scripts/generate_review.js +262 -0
  39. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +79 -164
  40. package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +64 -185
  41. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +95 -216
@@ -0,0 +1,153 @@
1
+ {
2
+ "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
3
+
4
+ "siliconflow": {
5
+ "conductor": "Pro/zai-org/GLM-5",
6
+ "llm": {
7
+ "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
8
+ "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
9
+ "tier3": "Qwen/Qwen3.5-122B-A10B",
10
+ "tier4": "Qwen/Qwen3.5-35B-A3B"
11
+ },
12
+ "vlm": {
13
+ "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
14
+ "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
15
+ "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
16
+ }
17
+ },
18
+
19
+ "aliyun": {
20
+ "conductor": "qwen3.6-plus",
21
+ "llm": {
22
+ "tier1": "qwen3.6-plus",
23
+ "tier2": "",
24
+ "tier3": "",
25
+ "tier4": ""
26
+ },
27
+ "vlm": {
28
+ "tier1": "qwen-vl-max",
29
+ "tier2": "qwen-vl-plus",
30
+ "tier3": ""
31
+ }
32
+ },
33
+
34
+ "volcanocloud": {
35
+ "conductor": "doubao-seed-2-0-pro-260215",
36
+ "llm": {
37
+ "tier1": "doubao-seed-2-0-pro-260215, deepseek-v3-2-251201",
38
+ "tier2": "glm-4-7-251222, doubao-1-5-pro-32k-250115",
39
+ "tier3": "doubao-seed-2-0-mini-260215",
40
+ "tier4": "doubao-seed-2-0-lite-260215, doubao-1-5-lite-32k-250115"
41
+ },
42
+ "vlm": {
43
+ "tier1": "doubao-vision-pro-32k-241028",
44
+ "tier2": "doubao-vision-lite-32k-241028",
45
+ "tier3": ""
46
+ }
47
+ },
48
+
49
+ "anthropic": {
50
+ "conductor": "claude-sonnet-4-20250514",
51
+ "llm": {
52
+ "tier1": "claude-sonnet-4-20250514",
53
+ "tier2": "claude-sonnet-4-20250514",
54
+ "tier3": "claude-haiku-4-5-20251001",
55
+ "tier4": "claude-haiku-4-5-20251001"
56
+ },
57
+ "vlm": {
58
+ "tier1": "claude-sonnet-4-20250514",
59
+ "tier2": "claude-haiku-4-5-20251001",
60
+ "tier3": "claude-haiku-4-5-20251001"
61
+ }
62
+ },
63
+
64
+ "openai": {
65
+ "conductor": "gpt-4o",
66
+ "llm": {
67
+ "tier1": "gpt-4o",
68
+ "tier2": "gpt-4o-mini",
69
+ "tier3": "gpt-4o-mini",
70
+ "tier4": "gpt-4o-mini"
71
+ },
72
+ "vlm": {
73
+ "tier1": "gpt-4o",
74
+ "tier2": "gpt-4o-mini",
75
+ "tier3": "gpt-4o-mini"
76
+ }
77
+ },
78
+
79
+ "zhipu": {
80
+ "conductor": "glm-4-plus",
81
+ "llm": {
82
+ "tier1": "glm-4-plus",
83
+ "tier2": "glm-4-air",
84
+ "tier3": "glm-4-flash",
85
+ "tier4": "glm-4-flash"
86
+ },
87
+ "vlm": {
88
+ "tier1": "glm-4v-plus",
89
+ "tier2": "glm-4v",
90
+ "tier3": "glm-4v-flash"
91
+ }
92
+ },
93
+
94
+ "minimax": {
95
+ "conductor": "MiniMax-M2.5",
96
+ "llm": {
97
+ "tier1": "MiniMax-M2.5",
98
+ "tier2": "MiniMax-M2.5",
99
+ "tier3": "MiniMax-M1",
100
+ "tier4": "MiniMax-M1"
101
+ },
102
+ "vlm": {
103
+ "tier1": "",
104
+ "tier2": "",
105
+ "tier3": ""
106
+ }
107
+ },
108
+
109
+ "openrouter": {
110
+ "conductor": "anthropic/claude-sonnet-4-20250514",
111
+ "llm": {
112
+ "tier1": "anthropic/claude-sonnet-4-20250514",
113
+ "tier2": "google/gemini-2.5-flash",
114
+ "tier3": "google/gemini-2.5-flash",
115
+ "tier4": "google/gemini-2.5-flash"
116
+ },
117
+ "vlm": {
118
+ "tier1": "anthropic/claude-sonnet-4-20250514",
119
+ "tier2": "google/gemini-2.5-flash",
120
+ "tier3": "google/gemini-2.5-flash"
121
+ }
122
+ },
123
+
124
+ "bedrock": {
125
+ "conductor": "anthropic.claude-sonnet-4-20250514-v1:0",
126
+ "llm": {
127
+ "tier1": "anthropic.claude-sonnet-4-20250514-v1:0",
128
+ "tier2": "anthropic.claude-sonnet-4-20250514-v1:0",
129
+ "tier3": "anthropic.claude-haiku-4-5-20251001-v1:0",
130
+ "tier4": "anthropic.claude-haiku-4-5-20251001-v1:0"
131
+ },
132
+ "vlm": {
133
+ "tier1": "anthropic.claude-sonnet-4-20250514-v1:0",
134
+ "tier2": "anthropic.claude-haiku-4-5-20251001-v1:0",
135
+ "tier3": "anthropic.claude-haiku-4-5-20251001-v1:0"
136
+ }
137
+ },
138
+
139
+ "custom": {
140
+ "conductor": "",
141
+ "llm": {
142
+ "tier1": "",
143
+ "tier2": "",
144
+ "tier3": "",
145
+ "tier4": ""
146
+ },
147
+ "vlm": {
148
+ "tier1": "",
149
+ "tier2": "",
150
+ "tier3": ""
151
+ }
152
+ }
153
+ }
package/src/providers.js CHANGED
@@ -2,9 +2,32 @@
2
2
  * Provider registry for Multi-LLM support.
3
3
  * Centralizes provider metadata, default models, and model classification.
4
4
  *
5
- * Aligned with kc_reborn/platform/backend/app/providers.py
5
+ * Model tier assignments (LLM + VLM) are loaded from model-tiers.json
6
+ * so they can be updated without touching code.
6
7
  */
7
8
 
9
+ import { readFileSync } from "node:fs";
10
+ import { fileURLToPath } from "node:url";
11
+ import { dirname, join } from "node:path";
12
+
13
+ const __filename = fileURLToPath(import.meta.url);
14
+ const __dirname = dirname(__filename);
15
+
16
+ /** @type {Record<string, {conductor: string, llm: Record<string,string>, vlm: Record<string,string>}>} */
17
+ let MODEL_TIERS;
18
+ try {
19
+ MODEL_TIERS = JSON.parse(
20
+ readFileSync(join(__dirname, "model-tiers.json"), "utf-8")
21
+ );
22
+ } catch {
23
+ MODEL_TIERS = {};
24
+ }
25
+
26
+ /** Helper: get tier config for a provider, with fallbacks */
27
+ function getTierConfig(providerId) {
28
+ return MODEL_TIERS[providerId] || { conductor: "", llm: {}, vlm: {} };
29
+ }
30
+
8
31
  const PROVIDERS = [
9
32
  {
10
33
  id: "siliconflow",
@@ -13,13 +36,9 @@ const PROVIDERS = [
13
36
  authType: "bearer",
14
37
  apiFormat: "openai",
15
38
  modelsEndpoint: "/models",
16
- defaultModel: "glm-5",
17
- defaultTiers: {
18
- tier1: "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
19
- tier2: "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
20
- tier3: "Qwen/Qwen3.5-122B-A10B",
21
- tier4: "Qwen/Qwen3.5-35B-A3B",
22
- },
39
+ defaultModel: getTierConfig("siliconflow").conductor || "glm-5",
40
+ defaultTiers: getTierConfig("siliconflow").llm,
41
+ defaultVlm: getTierConfig("siliconflow").vlm,
23
42
  labels: {
24
43
  en: "SiliconFlow (recommended for China)",
25
44
  zh: "SiliconFlow(国内推荐)",
@@ -35,13 +54,9 @@ const PROVIDERS = [
35
54
  apiFormat: "openai",
36
55
  modelsEndpoint: null, // Aliyun coding plan doesn't support /models
37
56
  supportsCodingPlanKey: true,
38
- defaultModel: "glm-5",
39
- defaultTiers: {
40
- tier1: "qwen3.6-plus",
41
- tier2: "",
42
- tier3: "",
43
- tier4: "",
44
- },
57
+ defaultModel: getTierConfig("aliyun").conductor || "qwen3.6-plus",
58
+ defaultTiers: getTierConfig("aliyun").llm,
59
+ defaultVlm: getTierConfig("aliyun").vlm,
45
60
  // Curated model list (coding plan doesn't have /models endpoint)
46
61
  curatedModels: [
47
62
  { id: "qwen3.6-plus", ownedBy: "qwen" },
@@ -66,13 +81,9 @@ const PROVIDERS = [
66
81
  authType: "bearer",
67
82
  apiFormat: "openai",
68
83
  modelsEndpoint: null, // VolcanoCloud coding plan — use curated list
69
- defaultModel: "glm-5",
70
- defaultTiers: {
71
- tier1: "doubao-seed-2-0-pro-260215, deepseek-v3-2-251201",
72
- tier2: "glm-4-7-251222, doubao-1-5-pro-32k-250115",
73
- tier3: "doubao-seed-2-0-mini-260215",
74
- tier4: "doubao-seed-2-0-lite-260215, doubao-1-5-lite-32k-250115",
75
- },
84
+ defaultModel: getTierConfig("volcanocloud").conductor || "doubao-seed-2-0-pro-260215",
85
+ defaultTiers: getTierConfig("volcanocloud").llm,
86
+ defaultVlm: getTierConfig("volcanocloud").vlm,
76
87
  curatedModels: [
77
88
  { id: "doubao-seed-2-0-pro-260215", ownedBy: "bytedance" },
78
89
  { id: "deepseek-v3-2-251201", ownedBy: "deepseek" },
@@ -94,13 +105,9 @@ const PROVIDERS = [
94
105
  authType: "x-api-key",
95
106
  apiFormat: "anthropic",
96
107
  modelsEndpoint: null, // Use curated list
97
- defaultModel: "claude-sonnet-4-20250514",
98
- defaultTiers: {
99
- tier1: "claude-sonnet-4-20250514",
100
- tier2: "claude-sonnet-4-20250514",
101
- tier3: "claude-haiku-4-5-20251001",
102
- tier4: "claude-haiku-4-5-20251001",
103
- },
108
+ defaultModel: getTierConfig("anthropic").conductor || "claude-sonnet-4-20250514",
109
+ defaultTiers: getTierConfig("anthropic").llm,
110
+ defaultVlm: getTierConfig("anthropic").vlm,
104
111
  curatedModels: [
105
112
  { id: "claude-opus-4-20250514", ownedBy: "anthropic" },
106
113
  { id: "claude-sonnet-4-20250514", ownedBy: "anthropic" },
@@ -118,13 +125,9 @@ const PROVIDERS = [
118
125
  authType: "bearer",
119
126
  apiFormat: "openai",
120
127
  modelsEndpoint: "/models",
121
- defaultModel: "gpt-4o",
122
- defaultTiers: {
123
- tier1: "gpt-4o",
124
- tier2: "gpt-4o-mini",
125
- tier3: "gpt-4o-mini",
126
- tier4: "gpt-4o-mini",
127
- },
128
+ defaultModel: getTierConfig("openai").conductor || "gpt-4o",
129
+ defaultTiers: getTierConfig("openai").llm,
130
+ defaultVlm: getTierConfig("openai").vlm,
128
131
  labels: {
129
132
  en: "OpenAI",
130
133
  zh: "OpenAI",
@@ -137,13 +140,9 @@ const PROVIDERS = [
137
140
  authType: "bearer",
138
141
  apiFormat: "openai",
139
142
  modelsEndpoint: "/models",
140
- defaultModel: "glm-4-plus",
141
- defaultTiers: {
142
- tier1: "glm-4-plus",
143
- tier2: "glm-4-air",
144
- tier3: "glm-4-flash",
145
- tier4: "glm-4-flash",
146
- },
143
+ defaultModel: getTierConfig("zhipu").conductor || "glm-4-plus",
144
+ defaultTiers: getTierConfig("zhipu").llm,
145
+ defaultVlm: getTierConfig("zhipu").vlm,
147
146
  labels: {
148
147
  en: "Zhipu GLM",
149
148
  zh: "智谱 GLM",
@@ -156,13 +155,9 @@ const PROVIDERS = [
156
155
  authType: "bearer",
157
156
  apiFormat: "openai",
158
157
  modelsEndpoint: "/models",
159
- defaultModel: "MiniMax-M2.5",
160
- defaultTiers: {
161
- tier1: "MiniMax-M2.5",
162
- tier2: "MiniMax-M2.5",
163
- tier3: "MiniMax-M1",
164
- tier4: "MiniMax-M1",
165
- },
158
+ defaultModel: getTierConfig("minimax").conductor || "MiniMax-M2.5",
159
+ defaultTiers: getTierConfig("minimax").llm,
160
+ defaultVlm: getTierConfig("minimax").vlm,
166
161
  labels: {
167
162
  en: "MiniMax",
168
163
  zh: "MiniMax",
@@ -175,13 +170,9 @@ const PROVIDERS = [
175
170
  authType: "bearer",
176
171
  apiFormat: "openai",
177
172
  modelsEndpoint: "/models",
178
- defaultModel: "anthropic/claude-sonnet-4-20250514",
179
- defaultTiers: {
180
- tier1: "anthropic/claude-sonnet-4-20250514",
181
- tier2: "google/gemini-2.5-flash",
182
- tier3: "google/gemini-2.5-flash",
183
- tier4: "google/gemini-2.5-flash",
184
- },
173
+ defaultModel: getTierConfig("openrouter").conductor || "anthropic/claude-sonnet-4-20250514",
174
+ defaultTiers: getTierConfig("openrouter").llm,
175
+ defaultVlm: getTierConfig("openrouter").vlm,
185
176
  labels: {
186
177
  en: "OpenRouter",
187
178
  zh: "OpenRouter",
@@ -194,13 +185,9 @@ const PROVIDERS = [
194
185
  authType: "aws-sigv4",
195
186
  apiFormat: "anthropic",
196
187
  modelsEndpoint: null,
197
- defaultModel: "anthropic.claude-sonnet-4-20250514-v1:0",
198
- defaultTiers: {
199
- tier1: "anthropic.claude-sonnet-4-20250514-v1:0",
200
- tier2: "anthropic.claude-sonnet-4-20250514-v1:0",
201
- tier3: "anthropic.claude-haiku-4-5-20251001-v1:0",
202
- tier4: "anthropic.claude-haiku-4-5-20251001-v1:0",
203
- },
188
+ defaultModel: getTierConfig("bedrock").conductor || "anthropic.claude-sonnet-4-20250514-v1:0",
189
+ defaultTiers: getTierConfig("bedrock").llm,
190
+ defaultVlm: getTierConfig("bedrock").vlm,
204
191
  labels: {
205
192
  en: "AWS Bedrock (not yet supported)",
206
193
  zh: "AWS Bedrock(暂未支持)",
@@ -213,8 +200,9 @@ const PROVIDERS = [
213
200
  authType: "bearer",
214
201
  apiFormat: "openai",
215
202
  modelsEndpoint: "/models",
216
- defaultModel: "",
217
- defaultTiers: { tier1: "", tier2: "", tier3: "", tier4: "" },
203
+ defaultModel: getTierConfig("custom").conductor || "",
204
+ defaultTiers: getTierConfig("custom").llm,
205
+ defaultVlm: getTierConfig("custom").vlm,
218
206
  labels: {
219
207
  en: "Custom (enter base URL)",
220
208
  zh: "自定义(输入接口地址)",
@@ -368,3 +356,12 @@ export function getCuratedModels(providerId) {
368
356
  const provider = getProviderById(providerId);
369
357
  return provider?.curatedModels || null;
370
358
  }
359
+
360
+ /**
361
+ * Get the raw model tier config for a provider (from model-tiers.json).
362
+ * @param {string} providerId
363
+ * @returns {{ conductor: string, llm: Record<string,string>, vlm: Record<string,string> }}
364
+ */
365
+ export function getModelTierConfig(providerId) {
366
+ return getTierConfig(providerId);
367
+ }
@@ -0,0 +1,20 @@
1
+ # AGENT.md — Project Context
2
+
3
+ This file is your per-project memory. Update it as you learn about the project.
4
+ The content here is injected into your system prompt on every turn.
5
+
6
+ ## Project
7
+
8
+ <!-- What domain? What regulations? What documents? Fill this in during bootstrap. -->
9
+
10
+ ## Decisions
11
+
12
+ <!-- Key decisions made with the developer user. Rule granularity, accuracy targets, model choices, scope boundaries. -->
13
+
14
+ ## Domain Notes
15
+
16
+ <!-- Terminology, document formats, naming conventions, edge cases specific to this domain. -->
17
+
18
+ ## User Preferences
19
+
20
+ <!-- How the developer user prefers to communicate. Reporting format, language, level of detail. -->
@@ -9,53 +9,17 @@ Judgment is the moment of truth. You have the extracted entity. You have the rul
9
9
 
10
10
  ## The Judgment Spectrum
11
11
 
12
- Rules fall on a spectrum from fully deterministic to fully semantic:
12
+ Rules range from trivially deterministic to deeply semantic. Pick the right tool for each rule.
13
13
 
14
- ### Deterministic Judgments (Use Python)
14
+ **Deterministic** threshold checks, format validation, date arithmetic, cross-field consistency. Pure Python: free, instant, deterministic.
15
15
 
16
- Rules with clear, computable criteria:
16
+ **Semantic** — adequacy, completeness, consistency, compliance with templates, detecting misleading or suggestive language, assessing whether a description is fair and balanced. These require language understanding — use worker LLM.
17
17
 
18
- - **Threshold checks**: "The capital adequacy ratio must be >= 8%."
19
- ```python
20
- result = "pass" if extracted_ratio >= 8.0 else "fail"
21
- ```
22
- - **Format validation**: "The loan number must match pattern XX-YYYY-ZZZZZZ."
23
- ```python
24
- result = "pass" if re.match(r"[A-Z]{2}-\d{4}-\d{6}", loan_number) else "fail"
25
- ```
26
- - **Date arithmetic**: "The contract must be signed within 30 days of application."
27
- ```python
28
- result = "pass" if (sign_date - app_date).days <= 30 else "fail"
29
- ```
30
- - **Cross-field consistency**: "The total must equal the sum of line items."
31
- ```python
32
- result = "pass" if abs(total - sum(items)) < 0.01 else "fail"
33
- ```
18
+ Many real compliance rules require semantic judgment. "The risk disclosure must adequately describe the key risks" cannot be checked with regex or Python. "The contract description must not be misleading or suggestive" requires deep language understanding. Use worker LLM for these without hesitation.
34
19
 
35
- These are best implemented as pure Python. They are free, instant, and deterministic. When possible, prefer this form.
20
+ Some rules combine both: extract a number (deterministic), compare to threshold (deterministic), then assess the explanation if borderline (semantic). The mix depends on the rule.
36
21
 
37
- ### Semantic Judgments (Use LLM)
38
-
39
- Rules requiring language understanding:
40
-
41
- - **Adequacy**: "The risk disclosure must adequately describe the key risks."
42
- - **Completeness**: "The management discussion must address financial performance, strategic outlook, and market conditions."
43
- - **Consistency**: "The executive summary must be consistent with the detailed findings."
44
- - **Compliance with template**: "The report must follow the format specified in Regulation Appendix A."
45
-
46
- For these, design an LLM prompt:
47
- 1. Provide the rule text (what constitutes compliance).
48
- 2. Provide the extracted content (what the document says).
49
- 3. Ask for a structured verdict: pass/fail, reasoning, and comment.
50
- 4. Ask the model to be conservative — flag as fail only when clearly non-compliant. When truly ambiguous, use a "partial" or "uncertain" result rather than a hard fail.
51
-
52
- ### Hybrid Judgments (Most Common)
53
-
54
- Most rules combine deterministic and semantic elements:
55
- - Extract the number (regex) → compare to threshold (Python) → if borderline, assess the explanation (LLM).
56
- - Check that a section exists (deterministic) → check that it covers required topics (semantic).
57
-
58
- Design the pipeline to run cheap steps first. Only invoke the LLM when the deterministic check is insufficient.
22
+ The right method is whatever achieves accuracy at lowest cost. Simple threshold checks don't need LLM. Semantic assessments don't benefit from Python. Most projects will have a mix — let the nature of each rule determine the method.
59
23
 
60
24
  ## Output Format
61
25
 
@@ -80,6 +44,10 @@ For each rule × document combination:
80
44
  - **error**: Something went wrong during extraction or judgment (parsing failure, API error). Needs investigation.
81
45
  - **uncertain**: The judgment is ambiguous. May need human review.
82
46
 
47
+ **Design exit criteria first:** Before writing judgment logic for a rule, define the exit conditions: what constitutes pass, what constitutes fail, what triggers escalation to human, how to handle empty/missing values, what value ranges are valid. Explicit exit criteria prevent ambiguous or inconsistent judgment.
48
+
49
+ **Prompt design:** Design prompts for what you want, not against what you don't want. "Don't include reasoning" is less reliable than extracting the verdict from structured output in postprocessing. Use output filtering instead of prompt negation.
50
+
83
51
  **Comments:**
84
52
  - Required only when result is `fail`. Skip for `pass` unless the developer user specifically requests pass comments.
85
53
  - Be concise and factual: "Capital adequacy ratio is 7.2%, below the regulatory minimum of 8.0%."
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: document-chunking
3
+ description: >
4
+ Fast, cheap chunking for processing batches of sample and input documents.
5
+ Use when you need to split documents into manageable pieces for initial observation,
6
+ data sensibility checks, or feeding to extraction workflows. Not for production
7
+ verification chunking — for that, use tree-processing to design a tailored chunking script.
8
+ ---
9
+
10
+ # Document Chunking
11
+
12
+ Split documents into pieces for downstream processing. This is the fast, cheap version — for batch processing of samples and inputs, not for precision verification workflows.
13
+
14
+ ## Methods
15
+
16
+ **Page-level splits** — simplest. Each page is a chunk. Works for most document processing where you need to iterate over content.
17
+
18
+ **Fixed-size chunks** — split by character/token count with overlap. Good for search and initial observation. Typical: 2000-4000 chars with 200 char overlap.
19
+
20
+ **Header-based splits** — detect section headers and split at boundaries. Preserves semantic units. Use regex patterns for the document's header convention.
21
+
22
+ ## When to Use What
23
+
24
+ Pick the simplest method that serves the task:
25
+ - Batch document observation → page-level
26
+ - Full-text search index → fixed-size with overlap
27
+ - Section-level extraction → header-based
28
+ - Table of contents available → parse TOC for structure
29
+
30
+ ## Relationship to tree-processing
31
+
32
+ This skill is for quick, cheap chunking during exploration and batch processing. When you need production-grade chunking for verification workflows — where the chunking mechanism must be precise, consistent, and coded as a script — use `tree-processing` instead.
@@ -12,28 +12,21 @@ Parsing is the foundation. If the text is wrong, everything downstream is wrong.
12
12
  Start with the simplest parser. Escalate only when necessary. This is not about saving money — it is about producing the most reliable output. Simple parsers have fewer failure modes.
13
13
 
14
14
  ### Level 1: Direct Text Extraction
15
- - Tool: pymupdf (PyMuPDF) or similar PDF text extraction.
15
+ - Tool: pdfjs-dist or similar PDF text extraction.
16
16
  - When: Well-formed digital PDFs with embedded text. This covers most modern business documents.
17
17
  - Output: Raw text with basic structure preserved (paragraphs, basic formatting).
18
18
  - Limitations: Tables may come out as messy text. Charts and images are invisible. Scanned PDFs produce nothing.
19
19
 
20
- ### Level 2: Layout-Aware Extraction
21
- - Tool: pdfplumber or similar layout-aware parser.
22
- - When: Level 1 produces messy table output, or when preserving spatial layout matters (forms, multi-column documents).
23
- - Output: Text with table detection and cell-level extraction.
24
- - Limitations: Still text-based. Cannot handle scanned content.
25
-
26
- ### Level 3: OCR
27
- - Tool: Vision-capable models from OCR_MODEL_TIER in `.env` (PaddleOCR-VL, GLM-4.6V, etc.).
28
- - When: Scanned PDFs, image-based PDFs, or PDFs where Level 1-2 produce garbled/incomplete text.
29
- - Output: Recognized text from images.
30
- - Limitations: Slower, costs API calls, may introduce OCR errors.
31
-
32
- ### Level 4: Vision Model Interpretation
33
- - Tool: High-capability vision models (OCR_MODEL_TIER1).
34
- - When: Complex tables that text extraction cannot parse correctly, charts that need data point extraction, mixed text-and-image layouts.
35
- - Output: Structured interpretation of visual content (table as markdown, chart data as JSON).
36
- - Limitations: Expensive, slow. Reserve for when the visual content genuinely needs interpretation.
20
+ ### Level 2: Provider VLM (Vision Language Model)
21
+ - Tool: VLM models from configured provider (VLM_TIER3 for cheap OCR, VLM_TIER1 for complex interpretation).
22
+ - When: Level 1 produces garbled/incomplete text, scanned PDFs, image-based PDFs.
23
+ - Output: Recognized text from page images, or structured interpretation (table as markdown, chart data as JSON).
24
+ - Calling a provider VLM is more convenient and reliable than deploying local OCR. Use the cheapest VLM tier first; escalate to a more capable tier for complex tables/charts.
25
+
26
+ ### Level 3: MineRU API or Local Tools (Optional)
27
+ - Tool: MineRU API, pdfplumber, or locally deployed OCR if configured.
28
+ - When: Provider VLM is unavailable or too expensive for batch processing.
29
+ - These are optional fallbacks. Most users will use Level 1 + Level 2.
37
30
 
38
31
  ## Quality Detection
39
32
 
@@ -33,40 +33,21 @@ The value could be anywhere, or the rule applies to the document as a whole.
33
33
 
34
34
  ## Method Selection
35
35
 
36
- ### Regex / Python (Cost: zero, Speed: instant)
36
+ Extraction method selection is a cost-accuracy search. The goal is finding the cheapest method that meets the accuracy threshold. Regex is the smallest, cheapest "model" — zero cost, instant, deterministic. Worker LLM is more capable but costs tokens and time. Any search strategy is valid: try the cheapest first and escalate, try the most capable first and downgrade, bisect, or jump directly to a known-good method based on past experience in AGENT.md.
37
37
 
38
- Use when the entity has a predictable format:
38
+ ### Available Methods
39
39
 
40
- - **Dates**: `\d{4}[-/年]\d{1,2}[-/月]\d{1,2}[日]?` or specific patterns for the document type.
41
- - **Monetary amounts**: `[\d,]+\.?\d*\s*(元|万元|亿元|USD|RMB)` or similar.
42
- - **Percentages**: `\d+\.?\d*\s*%`
43
- - **Identifiers**: Loan numbers, registration codes, ID numbers — usually fixed formats.
44
- - **Specific phrases**: "I hereby agree" or "本人同意" — exact string matching.
40
+ **Regex / Python** Cost: zero. Speed: instant. Deterministic.
41
+ Works well for: dates, monetary amounts, percentages, identifiers, fixed phrases, any value with a predictable format.
45
42
 
46
- Build and test the regex on sample documents. A good regex is better than a good LLM prompt for structured values — it is faster, deterministic, and free.
43
+ **Worker LLM** Cost: API tokens. Speed: seconds. Semantic understanding.
44
+ Works well for: contextual interpretation, conditional values, semantic matching, ambiguous structures, suggestive or misleading language detection, table interpretation, anything requiring understanding rather than pattern matching.
47
45
 
48
- ### LLM Extraction (Cost: API call, Speed: seconds)
46
+ Many real verification tasks require semantic understanding — "is this description misleading?", "does this clause adequately disclose risk?", "is this guarantor's business description consistent with their stated industry?" — regex cannot handle these. Use worker LLM without hesitation for such tasks.
49
47
 
50
- Use when the entity requires understanding:
48
+ ### The Search
51
49
 
52
- - **Named entities in context**: "the guarantor's main business" requires understanding who the guarantor is and which text describes their business.
53
- - **Conditional values**: "the interest rate, including any adjustments" — requires understanding what constitutes an adjustment.
54
- - **Semantic matching**: "adequate risk disclosure" — requires judgment about what text constitutes risk disclosure.
55
- - **Table interpretation**: When a table's structure is not uniform and regex cannot reliably extract cells.
56
-
57
- Design the LLM prompt to:
58
- 1. Include the narrowed context (from tree processing).
59
- 2. Specify exactly what to extract.
60
- 3. Define the output format (JSON with named fields).
61
- 4. Provide one example if the extraction is non-obvious.
62
-
63
- ### Hybrid Approach
64
-
65
- Often the best strategy:
66
- 1. Use regex to extract candidates (fast, catches obvious matches).
67
- 2. If regex finds a confident match, use it.
68
- 3. If regex fails or is uncertain, fall back to LLM extraction.
69
- 4. Use LLM to validate regex results when confidence matters.
50
+ If a method's results fall below the accuracy threshold, try a different method or a more capable model. If regex works and meets accuracy — keep it, it's free. If regex produces results below threshold, escalate to worker LLM. If a cheap worker LLM isn't accurate enough, try a more capable tier. Record what works for each extraction type in AGENT.md for future reference.
70
51
 
71
52
  ## Schema Design
72
53
 
@@ -118,6 +99,10 @@ Every extraction should carry a confidence estimate:
118
99
 
119
100
  These are starting points. Calibrate based on actual accuracy (see `confidence-system`).
120
101
 
102
+ ## Prompt Design: Ask For What You Want
103
+
104
+ Design prompts for what you want, not against what you don't want. "Don't include explanations" in a prompt is less reliable than stripping non-JSON text from the output in postprocessing. If you need to tell the LLM not to do something, use output filtering instead of prompt negation.
105
+
121
106
  ## Fitting Worker LLM Context
122
107
 
123
108
  When designing extraction for worker LLM workflows:
@@ -1,12 +1,30 @@
1
1
  ---
2
2
  name: tree-processing
3
- description: Build hierarchical document trees and navigate to specific chapters or sections required by verification rules. Use when a rule targets a specific part of a document (e.g., "Chapter 3 must contain..."), when documents are too long for a single LLM context window, or when you need to find where a specific entity lives within a large document. Implements the "onion peeler" approach for chunking. Also use for documents over 100 pages where full-document processing is impractical.
3
+ description: >
4
+ Design production-grade document chunking mechanisms for verification workflows. Use when
5
+ building the chunking step of a workflow that will run repeatedly on many documents.
6
+ The approach: observe sample documents, find structural patterns, write a chunking script
7
+ in code, that script runs in production. Also use for navigating large documents via
8
+ hierarchical structure when a rule targets a specific section.
9
+ For quick, cheap batch chunking during exploration, use document-chunking instead.
4
10
  ---
5
11
 
6
12
  # Tree Processing
7
13
 
8
14
  Most verification rules do not need the entire document. They need a specific section, a specific table, a specific disclosure. The tree is your map for navigating large documents efficiently.
9
15
 
16
+ ## Production Chunking Methodology
17
+
18
+ For verification workflows that process many documents, the chunking mechanism must be precise, consistent, and fast. The approach:
19
+
20
+ 1. **Observe**: Read 3-5 sample documents. Note their structure — headers, numbering, section patterns.
21
+ 2. **Find patterns**: Identify what's consistent (header format, numbering convention, TOC structure).
22
+ 3. **Write code**: Design a chunking script (regex-based splitter, header detector, TOC parser) that captures the pattern.
23
+ 4. **Test**: Run the script on samples. Verify it produces correct, consistent chunks.
24
+ 5. **Deploy**: The script runs in production workflows. It's deterministic, free, and fast.
25
+
26
+ This is different from `document-chunking` (quick, cheap splits for exploration). Production chunking is a one-time design effort that pays off across all documents of the same type.
27
+
10
28
  ## Why Trees
11
29
 
12
30
  Two reasons: