kc-beta 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +149 -0
- package/README.md +207 -0
- package/package.json +12 -2
- package/src/agent/engine.js +89 -0
- package/src/agent/task-manager.js +186 -0
- package/src/agent/tools/document-parse.js +3 -8
- package/src/cli/components.js +34 -0
- package/src/cli/index.js +25 -1
package/QUICKSTART.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# KC Agent CLI (Beta) — Quickstart
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npm install -g kc-beta
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Requires **Node.js 20+**.
|
|
10
|
+
|
|
11
|
+
## Setup
|
|
12
|
+
|
|
13
|
+
Run the onboarding wizard to configure your LLM provider and API keys:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
kc-beta onboard
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
You'll be prompted to choose:
|
|
20
|
+
|
|
21
|
+
1. **Language** — English or 中文
|
|
22
|
+
2. **LLM Provider** — SiliconFlow, Aliyun Bailian, Anthropic, OpenAI, VolcanoCloud, Zhipu GLM, MiniMax, OpenRouter, or custom
|
|
23
|
+
3. **API Key** — your provider API key (supports both API keys and Aliyun/VolcanoCloud coding plan keys)
|
|
24
|
+
4. **Model Discovery** — KC auto-discovers available models via API or curated lists and suggests tier assignments with capability-based ranking
|
|
25
|
+
5. **Conductor Model** — the main model that drives the agent
|
|
26
|
+
6. **Worker LLM Tiers** — tier1 (best) through tier4 (cheapest) for verification tasks
|
|
27
|
+
7. **VLM Tiers** — vision models for OCR/document parsing (tier1-3)
|
|
28
|
+
8. **Worker Provider** (optional) — use a different provider for worker LLMs (defaults to conductor provider)
|
|
29
|
+
|
|
30
|
+
Config is saved to `~/.kc_agent/config.json` and shared across projects.
|
|
31
|
+
|
|
32
|
+
### Edit Settings Later
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
kc-beta config
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Category-based editor for: LLM provider, model tiers, VLM tiers, worker LLM provider, quality thresholds, language.
|
|
39
|
+
|
|
40
|
+
## Create a Project
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
kc-beta init my-project
|
|
44
|
+
kc-beta init my-project --lang=zh # Chinese skills
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
This creates a workspace with:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
my-project/
|
|
51
|
+
.env # Project-level config (overrides global)
|
|
52
|
+
Rules/ # Put regulation documents here
|
|
53
|
+
Samples/ # Put sample documents here
|
|
54
|
+
Input/ # Production batches
|
|
55
|
+
Output/ # Verification results
|
|
56
|
+
skills/ # Meta-methodology skills (en or zh)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Start the Agent
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
kc-beta # default language from config
|
|
63
|
+
kc-beta --en # this session in English (does not change config)
|
|
64
|
+
kc-beta --zh # this session in Chinese (does not change config)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Launch from your project directory — KC has full read/write access to the folder you launch from:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
cd my-project
|
|
71
|
+
kc-beta
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The agent starts in **BOOTSTRAP** phase. It will:
|
|
75
|
+
|
|
76
|
+
1. Set up the workspace structure
|
|
77
|
+
2. Detect regulations and samples in your project directory
|
|
78
|
+
3. Ask about your verification scenario
|
|
79
|
+
|
|
80
|
+
Once regulations and samples are in place, the agent advances through 6 phases automatically:
|
|
81
|
+
|
|
82
|
+
| Phase | What happens |
|
|
83
|
+
|-------|-------------|
|
|
84
|
+
| **BOOTSTRAP** | Workspace setup, understand the scenario |
|
|
85
|
+
| **EXTRACTION** | Decompose regulations into atomic rules |
|
|
86
|
+
| **SKILL_AUTHORING** | Write verification skills for each rule |
|
|
87
|
+
| **SKILL_TESTING** | Test skills, iterate via evolution loop |
|
|
88
|
+
| **DISTILLATION** | Convert skills to worker LLM workflows |
|
|
89
|
+
| **PRODUCTION_QC** | Run workflows on production docs with QC |
|
|
90
|
+
|
|
91
|
+
## Slash Commands
|
|
92
|
+
|
|
93
|
+
| Command | Description |
|
|
94
|
+
|---------|-------------|
|
|
95
|
+
| `/help` | Show available commands |
|
|
96
|
+
| `/status` | Session info, model, phase, context usage |
|
|
97
|
+
| `/clear` | Clear conversation history |
|
|
98
|
+
| `/compact` | Summarize older messages to reduce context usage |
|
|
99
|
+
| `/sessions` | List all sessions |
|
|
100
|
+
| `/resume <name>` | Resume a previous session (restores phase + pipeline state) |
|
|
101
|
+
| `/rename <name>` | Rename current session |
|
|
102
|
+
| `/exit` | Save state and quit |
|
|
103
|
+
|
|
104
|
+
## Keyboard Shortcuts
|
|
105
|
+
|
|
106
|
+
- **Enter** — Send message
|
|
107
|
+
- **Ctrl+C** — Clear queue (if streaming) or save & exit
|
|
108
|
+
- **Ctrl+D** — Save & exit
|
|
109
|
+
|
|
110
|
+
## Status Bar
|
|
111
|
+
|
|
112
|
+
The status bar shows:
|
|
113
|
+
- Session ID and current phase
|
|
114
|
+
- **Context usage**: `CTX: 45.2k/200k (23%)` — turns green/yellow/red as context fills
|
|
115
|
+
|
|
116
|
+
## Per-Project Config
|
|
117
|
+
|
|
118
|
+
Override global settings in your project's `.env`:
|
|
119
|
+
|
|
120
|
+
```env
|
|
121
|
+
LLM_API_KEY=sk-xxx
|
|
122
|
+
LLM_BASE_URL=https://api.siliconflow.cn/v1
|
|
123
|
+
|
|
124
|
+
TIER1=Pro/zai-org/GLM-5
|
|
125
|
+
TIER2=
|
|
126
|
+
TIER3=
|
|
127
|
+
TIER4=
|
|
128
|
+
|
|
129
|
+
SKILL_ACCURACY=0.9
|
|
130
|
+
WORKFLOW_ACCURACY=0.9
|
|
131
|
+
MAX_ITERATIONS=20
|
|
132
|
+
|
|
133
|
+
# Optional: web search via Tavily
|
|
134
|
+
TAVILY_API_KEY=tvly-xxx
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Legacy keys (`SILICONFLOW_API_KEY`, `SILICONFLOW_BASE_URL`) are still accepted for backward compatibility.
|
|
138
|
+
|
|
139
|
+
## Web Search
|
|
140
|
+
|
|
141
|
+
KC can search the web using Tavily when information is not available in your provided documents. Set `TAVILY_API_KEY` in your `.env` or global config. KC prioritizes your domain documents over web results.
|
|
142
|
+
|
|
143
|
+
## Troubleshooting
|
|
144
|
+
|
|
145
|
+
- **"No API key configured"** — Run `kc-beta onboard` first
|
|
146
|
+
- **Connection errors** — Check your API key and base URL. KC retries up to 10 times with exponential backoff on transient failures.
|
|
147
|
+
- **Context too long** — Use `/compact` to summarize older messages, or let automatic windowing handle it
|
|
148
|
+
- **Resume after crash** — Use `/resume <session-name>` to pick up where you left off
|
|
149
|
+
- **Node version** — Requires Node.js 20+. Check with `node --version`
|
package/README.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# KC Agent CLI (`kc-beta`)
|
|
2
|
+
|
|
3
|
+
> Build, distill, and run document verification systems with an LLM agent.
|
|
4
|
+
> Pure Node.js. One binary. Bring your own model.
|
|
5
|
+
|
|
6
|
+
KC is a coding agent purpose-built for **rule-based document verification**:
|
|
7
|
+
read a regulation, decompose it into atomic verification rules, write skills
|
|
8
|
+
to check each rule against sample documents, and (optionally) distill those
|
|
9
|
+
skills into cheap worker-LLM workflows for production batch processing.
|
|
10
|
+
|
|
11
|
+
It is designed for the developer at a bank, insurer, or law firm who needs
|
|
12
|
+
to verify hundreds of documents against dozens of compliance rules — and
|
|
13
|
+
wants the system to be transparent, testable, and ownable.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Quick Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm install -g kc-beta
|
|
21
|
+
kc-beta onboard # configure provider + API key
|
|
22
|
+
cd my-project # a folder containing rules/ and samples/
|
|
23
|
+
kc-beta # launch the agent
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Requires **Node.js 20+**. See [QUICKSTART.md](./QUICKSTART.md) for the full setup walkthrough.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## What It Does
|
|
31
|
+
|
|
32
|
+
KC drives a single coding agent through six phases:
|
|
33
|
+
|
|
34
|
+
| Phase | What it does |
|
|
35
|
+
|-------|-------------|
|
|
36
|
+
| **BOOTSTRAP** | Set up the workspace, detect rules/samples in your project |
|
|
37
|
+
| **EXTRACTION** | Decompose regulation documents into atomic, testable rules |
|
|
38
|
+
| **SKILL_AUTHORING** | Write a verification skill for each rule (Anthropic skill-creator format) |
|
|
39
|
+
| **SKILL_TESTING** | Run skills on samples, iterate via the evolution loop |
|
|
40
|
+
| **DISTILLATION** | Convert proven skills into cheap worker-LLM workflows |
|
|
41
|
+
| **PRODUCTION_QC** | Run workflows on production batches with confidence-based sampling |
|
|
42
|
+
|
|
43
|
+
The conductor LLM (your main model) drives all reasoning. Worker LLM tools
|
|
44
|
+
are gated to DISTILL phases only, so the build phase is always grounded in
|
|
45
|
+
high-quality output.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Architecture
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
~/.kc_agent/
|
|
53
|
+
config.json # provider, API key, model tiers
|
|
54
|
+
workspaces/<sessionId>/ # KC's working files
|
|
55
|
+
rules/, rule_skills/, workflows/, samples/, output/, logs/
|
|
56
|
+
AGENT.md # per-project context (KC can edit)
|
|
57
|
+
tasks.json # ralph-loop task list
|
|
58
|
+
session-state.json # phase + pipeline state for /resume
|
|
59
|
+
|
|
60
|
+
your-project/ # where you launched kc-beta
|
|
61
|
+
rules/ # source regulations (KC reads with scope="project")
|
|
62
|
+
samples/ # sample documents
|
|
63
|
+
Output/ # KC writes user-facing reports here
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Dual-directory design.** KC has full read/write to its own workspace plus
|
|
67
|
+
*scoped* read/write to your project directory. Source files stay in your
|
|
68
|
+
project; KC's working artifacts stay in `~/.kc_agent/workspaces/`.
|
|
69
|
+
|
|
70
|
+
**Phase-gated tools.** Worker LLM, workflow runner, tier downgrade, and QC
|
|
71
|
+
sampling tools only register during DISTILL phases. BUILD phases force the
|
|
72
|
+
conductor to do the intellectual work directly — the results are the
|
|
73
|
+
ground-truth baseline for distillation.
|
|
74
|
+
|
|
75
|
+
**Skills as first-class deliverables.** Every rule produces a self-contained
|
|
76
|
+
skill folder (SKILL.md + scripts + references + samples). For complex rules
|
|
77
|
+
that worker LLMs can't reliably handle, the skill itself — run by a capable
|
|
78
|
+
agent — is the production solution.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Provider Support
|
|
83
|
+
|
|
84
|
+
10 providers configured out of the box:
|
|
85
|
+
|
|
86
|
+
- **SiliconFlow** (default, recommended for China)
|
|
87
|
+
- **Aliyun Bailian** (with coding-plan key support)
|
|
88
|
+
- **VolcanoCloud** (ByteDance Doubao)
|
|
89
|
+
- **Anthropic** (Messages API native)
|
|
90
|
+
- **OpenAI**
|
|
91
|
+
- **Zhipu GLM**
|
|
92
|
+
- **MiniMax**
|
|
93
|
+
- **OpenRouter**
|
|
94
|
+
- **AWS Bedrock** (stub)
|
|
95
|
+
- **Custom** (any OpenAI-compatible endpoint)
|
|
96
|
+
|
|
97
|
+
Model assignments live in [`src/model-tiers.json`](./src/model-tiers.json) —
|
|
98
|
+
edit directly to update tier-1 through tier-4 LLM and tier-1 through tier-3
|
|
99
|
+
VLM (vision) models per provider, no code changes needed.
|
|
100
|
+
|
|
101
|
+
You can use **separate providers** for the conductor and worker LLMs (e.g.,
|
|
102
|
+
Anthropic conductor + SiliconFlow workers).
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Ralph-Loop Autonomous Execution
|
|
107
|
+
|
|
108
|
+
When KC extracts rules, it automatically generates a per-rule task list and
|
|
109
|
+
processes them one at a time. Between tasks the conductor's context is
|
|
110
|
+
compacted aggressively, so context stays bounded even with 50+ rules.
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
SKILL_AUTHORING [████████░░░░] 8/12
|
|
114
|
+
✓ R001 Registered capital check
|
|
115
|
+
✓ R002 Net asset adequacy
|
|
116
|
+
▸ R003 Related-party disclosure ← current
|
|
117
|
+
· R004 Risk capital calculation
|
|
118
|
+
...
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Use `/tasks` to see the full list. The agent decides *how* to do each task;
|
|
122
|
+
the task manager only tells it *what's next*.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Slash Commands
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
/help Show available commands
|
|
130
|
+
/status Session, model, phase, context usage
|
|
131
|
+
/tasks Show task list and progress
|
|
132
|
+
/clear Clear conversation (workspace preserved)
|
|
133
|
+
/compact Summarize older messages via the conductor
|
|
134
|
+
/sessions List all sessions
|
|
135
|
+
/resume <name> Resume a previous session
|
|
136
|
+
/rename <name> Rename current session
|
|
137
|
+
/exit Save state and quit
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
`--en` / `--zh` flags override language for one session without writing config.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Optional Plugins
|
|
145
|
+
|
|
146
|
+
Some heavyweight features ship as **meta-meta skills** the agent invokes on
|
|
147
|
+
demand, rather than always-on dependencies:
|
|
148
|
+
|
|
149
|
+
- **`pdf-review-dashboard`** — Two-column HTML dashboard (PDF on the left,
|
|
150
|
+
verification results on the right, click-to-jump) for manual review and
|
|
151
|
+
ground-truth collection.
|
|
152
|
+
- **`auto-model-selection`** — Use [Context7](https://github.com/upstash/context7)
|
|
153
|
+
CLI to fetch current model listings when the bundled `model-tiers.json`
|
|
154
|
+
is stale or you've switched providers.
|
|
155
|
+
|
|
156
|
+
Both are bundled in `template/skills/{en,zh}/meta-meta/` and discovered by
|
|
157
|
+
the skill loader at startup.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Configuration
|
|
162
|
+
|
|
163
|
+
Global config: `~/.kc_agent/config.json` (set by `kc-beta onboard`).
|
|
164
|
+
Per-project override: `<project>/.env`.
|
|
165
|
+
|
|
166
|
+
Edit anytime with the category-based editor:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
kc-beta config
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Categories: LLM Provider, Model Tiers, VLM Tiers, Worker LLM Provider,
|
|
173
|
+
Quality Thresholds, Language.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Documentation
|
|
178
|
+
|
|
179
|
+
- [QUICKSTART.md](./QUICKSTART.md) — full setup and slash command reference
|
|
180
|
+
- [DEV_LOG.md](./DEV_LOG.md) — release history and design rationale
|
|
181
|
+
- [docs/global_update_design_v3.md](./docs/global_update_design_v3.md) — v3 design plan and progress tracker
|
|
182
|
+
- [docs/initial_spec_draft.md](./docs/initial_spec_draft.md) — original architectural spec
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Status
|
|
187
|
+
|
|
188
|
+
**v0.3.1 — beta.** Production-readiness update covering the seven blocks
|
|
189
|
+
of the v3 design plan: dual-directory permissions, AGENT.md per-project
|
|
190
|
+
context, PDF review dashboard skill, parsing/extraction skill rewrites,
|
|
191
|
+
production-experience meta-skill polish, model-tier baseline + Context7
|
|
192
|
+
plugin, and ralph-loop autonomous task execution.
|
|
193
|
+
|
|
194
|
+
We are inviting a small group of developer users to test before public launch.
|
|
195
|
+
Bug reports and PRs welcome at <https://github.com/kitchen-engineer42/kc-cli>.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT. Bundled meta-skills under `template/skills/` are proprietary —
|
|
202
|
+
distributed via npm but not open-source. See `template/skills/LICENSE` for
|
|
203
|
+
terms.
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
*Built by Memium / kitchen-engineer42.*
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kc-beta",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "KC Agent — LLM document verification agent (pure Node.js CLI)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -9,8 +9,18 @@
|
|
|
9
9
|
"files": [
|
|
10
10
|
"bin/",
|
|
11
11
|
"src/",
|
|
12
|
-
"template/"
|
|
12
|
+
"template/",
|
|
13
|
+
"README.md",
|
|
14
|
+
"QUICKSTART.md"
|
|
13
15
|
],
|
|
16
|
+
"homepage": "https://github.com/kitchen-engineer42/kc-cli",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/kitchen-engineer42/kc-cli.git"
|
|
20
|
+
},
|
|
21
|
+
"bugs": {
|
|
22
|
+
"url": "https://github.com/kitchen-engineer42/kc-cli/issues"
|
|
23
|
+
},
|
|
14
24
|
"engines": {
|
|
15
25
|
"node": ">=20.0.0"
|
|
16
26
|
},
|
package/src/agent/engine.js
CHANGED
|
@@ -22,6 +22,7 @@ import { TierDowngradeTool } from "./tools/tier-downgrade.js";
|
|
|
22
22
|
import { AgentTool } from "./tools/agent-tool.js";
|
|
23
23
|
import { WebSearchTool } from "./tools/web-search.js";
|
|
24
24
|
import { SkillLoader } from "./skill-loader.js";
|
|
25
|
+
import { TaskManager } from "./task-manager.js";
|
|
25
26
|
import { Phase } from "./pipelines/index.js";
|
|
26
27
|
import { ProjectInitializer } from "./pipelines/initializer.js";
|
|
27
28
|
import { RuleExtractionPipeline } from "./pipelines/extraction.js";
|
|
@@ -76,6 +77,9 @@ export class AgentEngine {
|
|
|
76
77
|
// Session state persistence
|
|
77
78
|
this.sessionState = new SessionState(this.workspace.cwd);
|
|
78
79
|
|
|
80
|
+
// Task manager (ralph-loop)
|
|
81
|
+
this.taskManager = new TaskManager(this.workspace.cwd);
|
|
82
|
+
|
|
79
83
|
// Build all tool instances (but register phase-appropriate ones)
|
|
80
84
|
this._buildTools = this._createAllTools();
|
|
81
85
|
this._phaseSummaries = [];
|
|
@@ -199,6 +203,11 @@ export class AgentEngine {
|
|
|
199
203
|
`Write user-facing exports (reports, results) to the project directory when the user asks.`,
|
|
200
204
|
);
|
|
201
205
|
}
|
|
206
|
+
|
|
207
|
+
// Task progress (ralph-loop)
|
|
208
|
+
const taskContext = this.taskManager.describeForContext();
|
|
209
|
+
if (taskContext) lines.push("", taskContext);
|
|
210
|
+
|
|
202
211
|
return lines.join("\n");
|
|
203
212
|
}
|
|
204
213
|
|
|
@@ -489,6 +498,10 @@ export class AgentEngine {
|
|
|
489
498
|
});
|
|
490
499
|
this.currentPhase = pEvent.nextPhase;
|
|
491
500
|
this._registerToolsForPhase(this.currentPhase);
|
|
501
|
+
|
|
502
|
+
// Ralph-loop: create per-rule tasks for the new phase
|
|
503
|
+
this._createTasksForPhase(this.currentPhase);
|
|
504
|
+
|
|
492
505
|
this.saveState();
|
|
493
506
|
}
|
|
494
507
|
yield new AgentEvent({
|
|
@@ -506,4 +519,80 @@ export class AgentEngine {
|
|
|
506
519
|
}
|
|
507
520
|
}
|
|
508
521
|
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Create per-rule tasks when entering a new phase.
|
|
525
|
+
* Reads the rule catalog and creates one task per rule for the given phase.
|
|
526
|
+
*/
|
|
527
|
+
_createTasksForPhase(phase) {
|
|
528
|
+
const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
|
|
529
|
+
if (!fs.existsSync(catalogPath)) return;
|
|
530
|
+
|
|
531
|
+
try {
|
|
532
|
+
const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
533
|
+
const rules = Array.isArray(catalog) ? catalog : [];
|
|
534
|
+
if (rules.length > 0) {
|
|
535
|
+
this.taskManager.createRuleTasks(rules, phase);
|
|
536
|
+
}
|
|
537
|
+
} catch { /* skip if catalog can't be read */ }
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
/**
|
|
541
|
+
* Ralph-loop: run a turn, then auto-continue through pending tasks.
|
|
542
|
+
* Compacts context aggressively between tasks to prevent context blowup.
|
|
543
|
+
* If no tasks exist, behaves identically to runTurn().
|
|
544
|
+
*
|
|
545
|
+
* @param {string} userMessage
|
|
546
|
+
* @yields {AgentEvent}
|
|
547
|
+
*/
|
|
548
|
+
async *runTaskLoop(userMessage) {
|
|
549
|
+
// Run the initial turn (user's request)
|
|
550
|
+
yield* this.runTurn(userMessage);
|
|
551
|
+
|
|
552
|
+
// Auto-continue through pending tasks
|
|
553
|
+
while (this.taskManager.getNextPending()) {
|
|
554
|
+
// Context safety: force compaction if above 70%, or light compaction if history is long
|
|
555
|
+
const stats = this.getContextStats();
|
|
556
|
+
if (stats.percentage > 70) {
|
|
557
|
+
await this.compact();
|
|
558
|
+
} else if (this.history.messages.length > 15) {
|
|
559
|
+
await this.compact({ recentCount: 8 });
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
const task = this.taskManager.getNextPending();
|
|
563
|
+
this.taskManager.updateTask(task.id, { status: "in_progress" });
|
|
564
|
+
|
|
565
|
+
// Yield task progress event for TUI
|
|
566
|
+
yield new AgentEvent({
|
|
567
|
+
type: "task_progress",
|
|
568
|
+
data: {
|
|
569
|
+
taskId: task.id,
|
|
570
|
+
title: task.title,
|
|
571
|
+
ruleId: task.ruleId,
|
|
572
|
+
status: "in_progress",
|
|
573
|
+
progress: this.taskManager.progress,
|
|
574
|
+
},
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
// Synthesize a task-focused prompt
|
|
578
|
+
const taskPrompt = `Continue with next task: ${task.title}` +
|
|
579
|
+
(task.ruleId ? ` (rule: ${task.ruleId})` : "");
|
|
580
|
+
|
|
581
|
+
yield* this.runTurn(taskPrompt);
|
|
582
|
+
|
|
583
|
+
this.taskManager.updateTask(task.id, { status: "completed" });
|
|
584
|
+
this.taskManager.save();
|
|
585
|
+
this.saveState();
|
|
586
|
+
|
|
587
|
+
yield new AgentEvent({
|
|
588
|
+
type: "task_progress",
|
|
589
|
+
data: {
|
|
590
|
+
taskId: task.id,
|
|
591
|
+
title: task.title,
|
|
592
|
+
status: "completed",
|
|
593
|
+
progress: this.taskManager.progress,
|
|
594
|
+
},
|
|
595
|
+
});
|
|
596
|
+
}
|
|
597
|
+
}
|
|
509
598
|
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Manages a per-session task list for ralph-loop style autonomous execution.
|
|
6
|
+
* Tasks are generated from KC's rule catalog — each rule becomes a task.
|
|
7
|
+
* Persisted to workspace/tasks.json.
|
|
8
|
+
*/
|
|
9
|
+
export class TaskManager {
|
|
10
|
+
/**
|
|
11
|
+
* @param {string} workspacePath - Session workspace directory
|
|
12
|
+
*/
|
|
13
|
+
constructor(workspacePath) {
|
|
14
|
+
this._path = path.join(workspacePath, "tasks.json");
|
|
15
|
+
this._tasks = [];
|
|
16
|
+
this._load();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// --- Task CRUD ---
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Add a task to the list.
|
|
23
|
+
* @param {{ id: string, title: string, phase: string, ruleId?: string }} task
|
|
24
|
+
*/
|
|
25
|
+
addTask({ id, title, phase, ruleId }) {
|
|
26
|
+
// Don't add duplicates
|
|
27
|
+
if (this._tasks.find((t) => t.id === id)) return;
|
|
28
|
+
this._tasks.push({
|
|
29
|
+
id,
|
|
30
|
+
title,
|
|
31
|
+
phase,
|
|
32
|
+
ruleId: ruleId || null,
|
|
33
|
+
status: "pending",
|
|
34
|
+
summary: null,
|
|
35
|
+
createdAt: new Date().toISOString(),
|
|
36
|
+
completedAt: null,
|
|
37
|
+
});
|
|
38
|
+
this.save();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Update a task's status and optional summary.
|
|
43
|
+
* @param {string} id
|
|
44
|
+
* @param {{ status?: string, summary?: string }} updates
|
|
45
|
+
*/
|
|
46
|
+
updateTask(id, { status, summary } = {}) {
|
|
47
|
+
const task = this._tasks.find((t) => t.id === id);
|
|
48
|
+
if (!task) return;
|
|
49
|
+
if (status) {
|
|
50
|
+
task.status = status;
|
|
51
|
+
if (status === "completed" || status === "failed") {
|
|
52
|
+
task.completedAt = new Date().toISOString();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
if (summary !== undefined) task.summary = summary;
|
|
56
|
+
this.save();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Get the next pending task.
|
|
61
|
+
* @returns {object|null}
|
|
62
|
+
*/
|
|
63
|
+
getNextPending() {
|
|
64
|
+
return this._tasks.find((t) => t.status === "pending") || null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Get all tasks.
|
|
69
|
+
* @returns {Array}
|
|
70
|
+
*/
|
|
71
|
+
getAllTasks() {
|
|
72
|
+
return [...this._tasks];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Check if there are any tasks at all.
|
|
77
|
+
*/
|
|
78
|
+
get hasTasks() {
|
|
79
|
+
return this._tasks.length > 0;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// --- Bulk creation from rule catalog ---
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Create one task per rule for a given phase.
|
|
86
|
+
* Reads rules from the provided array (typically from rules/catalog.json).
|
|
87
|
+
* @param {Array<{id: string, title?: string, description?: string}>} rules
|
|
88
|
+
* @param {string} phase - The phase these tasks belong to
|
|
89
|
+
*/
|
|
90
|
+
createRuleTasks(rules, phase) {
|
|
91
|
+
for (const rule of rules) {
|
|
92
|
+
const ruleId = rule.id || rule.rule_id;
|
|
93
|
+
const title = rule.title || rule.description || ruleId;
|
|
94
|
+
this.addTask({
|
|
95
|
+
id: `${ruleId}-${phase}`,
|
|
96
|
+
title: `${title}`,
|
|
97
|
+
phase,
|
|
98
|
+
ruleId,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// --- Progress ---
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* @returns {{ total: number, completed: number, inProgress: number, pending: number, failed: number }}
|
|
107
|
+
*/
|
|
108
|
+
get progress() {
|
|
109
|
+
const total = this._tasks.length;
|
|
110
|
+
const completed = this._tasks.filter((t) => t.status === "completed").length;
|
|
111
|
+
const inProgress = this._tasks.filter((t) => t.status === "in_progress").length;
|
|
112
|
+
const failed = this._tasks.filter((t) => t.status === "failed").length;
|
|
113
|
+
const pending = this._tasks.filter((t) => t.status === "pending").length;
|
|
114
|
+
return { total, completed, inProgress, pending, failed };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Format task list for injection into system prompt context.
|
|
119
|
+
* Compact checklist — not conversation history.
|
|
120
|
+
* @returns {string}
|
|
121
|
+
*/
|
|
122
|
+
describeForContext() {
|
|
123
|
+
if (this._tasks.length === 0) return "";
|
|
124
|
+
|
|
125
|
+
const { total, completed, inProgress } = this.progress;
|
|
126
|
+
const current = this._tasks.find((t) => t.status === "in_progress");
|
|
127
|
+
const currentPhase = current?.phase || this._tasks.find((t) => t.status === "pending")?.phase || "";
|
|
128
|
+
|
|
129
|
+
const lines = [
|
|
130
|
+
`## Task Progress`,
|
|
131
|
+
`${completed}/${total} completed${currentPhase ? ` | Phase: ${currentPhase}` : ""}${current ? ` | Current: ${current.ruleId} — ${current.title}` : ""}`,
|
|
132
|
+
"",
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
for (const t of this._tasks) {
|
|
136
|
+
const mark = t.status === "completed" ? "[x]"
|
|
137
|
+
: t.status === "in_progress" ? "[>]"
|
|
138
|
+
: t.status === "failed" ? "[!]"
|
|
139
|
+
: "[ ]";
|
|
140
|
+
const arrow = t.status === "in_progress" ? " <-- current" : "";
|
|
141
|
+
lines.push(`- ${mark} ${t.ruleId || t.id}: ${t.title}${arrow}`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return lines.join("\n");
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Format for /tasks slash command (more detailed than context injection).
|
|
149
|
+
* @returns {string}
|
|
150
|
+
*/
|
|
151
|
+
formatForDisplay() {
|
|
152
|
+
if (this._tasks.length === 0) return "No tasks. Tasks are created when rules are extracted.";
|
|
153
|
+
|
|
154
|
+
const { total, completed, pending, failed } = this.progress;
|
|
155
|
+
const lines = [
|
|
156
|
+
`Tasks: ${completed}/${total} completed${failed ? `, ${failed} failed` : ""}, ${pending} pending`,
|
|
157
|
+
"",
|
|
158
|
+
];
|
|
159
|
+
|
|
160
|
+
for (const t of this._tasks) {
|
|
161
|
+
const icon = t.status === "completed" ? "✓"
|
|
162
|
+
: t.status === "in_progress" ? "▸"
|
|
163
|
+
: t.status === "failed" ? "✗"
|
|
164
|
+
: "·";
|
|
165
|
+
lines.push(` ${icon} ${t.ruleId || t.id} ${t.title} (${t.status})`);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return lines.join("\n");
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// --- Persistence ---
|
|
172
|
+
|
|
173
|
+
save() {
|
|
174
|
+
fs.writeFileSync(this._path, JSON.stringify(this._tasks, null, 2), "utf-8");
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
_load() {
|
|
178
|
+
if (fs.existsSync(this._path)) {
|
|
179
|
+
try {
|
|
180
|
+
this._tasks = JSON.parse(fs.readFileSync(this._path, "utf-8"));
|
|
181
|
+
} catch {
|
|
182
|
+
this._tasks = [];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
@@ -178,17 +178,12 @@ export class DocumentParseTool extends BaseTool {
|
|
|
178
178
|
const page = await doc.getPage(i + 1);
|
|
179
179
|
const viewport = page.getViewport({ scale: 2.0 }); // Higher res for OCR
|
|
180
180
|
|
|
181
|
-
//
|
|
181
|
+
// Render to PNG via node-canvas if available; otherwise skip VLM and let
|
|
182
|
+
// the escalation chain fall through to MineRU.
|
|
182
183
|
let imageBase64;
|
|
183
184
|
try {
|
|
184
|
-
// In Node.js, pdfjs can render to a canvas-like object
|
|
185
|
-
// We'll use the simpler approach: convert page to image via the API
|
|
186
185
|
const { createCanvas } = await import("canvas").catch(() => ({ createCanvas: null }));
|
|
187
|
-
if (!createCanvas)
|
|
188
|
-
// No canvas available — fall back to sending raw text content hint + page number
|
|
189
|
-
pages.push(`--- Page ${i + 1} (VLM) ---`);
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
186
|
+
if (!createCanvas) return null;
|
|
192
187
|
const canvas = createCanvas(viewport.width, viewport.height);
|
|
193
188
|
const ctx = canvas.getContext("2d");
|
|
194
189
|
await page.render({ canvasContext: ctx, viewport }).promise;
|
package/src/cli/components.js
CHANGED
|
@@ -52,6 +52,40 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
|
|
|
52
52
|
);
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
// --- Task dashboard (ralph-loop) ---
|
|
56
|
+
|
|
57
|
+
export function TaskDashboard({ tasks, progress }) {
|
|
58
|
+
if (!tasks || tasks.length === 0) return null;
|
|
59
|
+
|
|
60
|
+
const { total, completed } = progress || { total: 0, completed: 0 };
|
|
61
|
+
const barWidth = 20;
|
|
62
|
+
const filled = total > 0 ? Math.round((completed / total) * barWidth) : 0;
|
|
63
|
+
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barWidth - filled);
|
|
64
|
+
|
|
65
|
+
// Show at most 8 tasks — current + a few before/after
|
|
66
|
+
const currentIdx = tasks.findIndex((t) => t.status === "in_progress");
|
|
67
|
+
const startIdx = Math.max(0, Math.min(currentIdx - 2, tasks.length - 8));
|
|
68
|
+
const visible = tasks.slice(startIdx, startIdx + 8);
|
|
69
|
+
const hasMore = tasks.length > 8;
|
|
70
|
+
|
|
71
|
+
return h(Box, { flexDirection: "column", marginLeft: 2, marginBottom: 1, borderStyle: "single", borderColor: "gray", paddingLeft: 1, paddingRight: 1 },
|
|
72
|
+
h(Text, { dimColor: true }, `Tasks [${bar}] ${completed}/${total}`),
|
|
73
|
+
...visible.map((t) => {
|
|
74
|
+
const icon = t.status === "completed" ? "\u2713"
|
|
75
|
+
: t.status === "in_progress" ? "\u25b8"
|
|
76
|
+
: t.status === "failed" ? "\u2717"
|
|
77
|
+
: "\u00b7";
|
|
78
|
+
const color = t.status === "completed" ? "green"
|
|
79
|
+
: t.status === "in_progress" ? "cyan"
|
|
80
|
+
: t.status === "failed" ? "red"
|
|
81
|
+
: "gray";
|
|
82
|
+
const label = `${t.ruleId || t.id} ${t.title}`;
|
|
83
|
+
return h(Text, { key: t.id, color }, ` ${icon} ${label.slice(0, 50)}`);
|
|
84
|
+
}),
|
|
85
|
+
hasMore ? h(Text, { dimColor: true }, ` ... ${tasks.length - 8} more`) : null,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
55
89
|
// --- Welcome banner ---
|
|
56
90
|
|
|
57
91
|
export function WelcomeBanner({ projectDir } = {}) {
|
package/src/cli/index.js
CHANGED
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
StatusBar,
|
|
11
11
|
CookingSpinner,
|
|
12
12
|
ToolBlock,
|
|
13
|
+
TaskDashboard,
|
|
13
14
|
HRule,
|
|
14
15
|
InputPrompt,
|
|
15
16
|
} from "./components.js";
|
|
@@ -32,6 +33,8 @@ function App({ engine, config }) {
|
|
|
32
33
|
const [spinnerStatus, setSpinnerStatus] = useState(null);
|
|
33
34
|
const [contextTokens, setContextTokens] = useState(0);
|
|
34
35
|
const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
|
|
36
|
+
const [taskList, setTaskList] = useState([]);
|
|
37
|
+
const [taskProgress, setTaskProgress] = useState(null);
|
|
35
38
|
|
|
36
39
|
const engineRef = useRef(engine);
|
|
37
40
|
const streamingRef = useRef(false);
|
|
@@ -60,7 +63,7 @@ function App({ engine, config }) {
|
|
|
60
63
|
let accumulated = "";
|
|
61
64
|
|
|
62
65
|
try {
|
|
63
|
-
for await (const event of engineRef.current.
|
|
66
|
+
for await (const event of engineRef.current.runTaskLoop(text)) {
|
|
64
67
|
switch (event.type) {
|
|
65
68
|
case "text_delta":
|
|
66
69
|
accumulated += event.text ?? "";
|
|
@@ -110,6 +113,16 @@ function App({ engine, config }) {
|
|
|
110
113
|
break;
|
|
111
114
|
}
|
|
112
115
|
|
|
116
|
+
case "task_progress": {
|
|
117
|
+
const tp = event.data;
|
|
118
|
+
setTaskList(engineRef.current.taskManager.getAllTasks());
|
|
119
|
+
setTaskProgress(tp.progress);
|
|
120
|
+
if (tp.status === "in_progress") {
|
|
121
|
+
setSpinnerStatus(`Task: ${tp.title}`);
|
|
122
|
+
}
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
|
|
113
126
|
case "error":
|
|
114
127
|
addMessage({ role: "system", content: `Error: ${event.message ?? "Unknown error"}` });
|
|
115
128
|
break;
|
|
@@ -144,6 +157,7 @@ function App({ engine, config }) {
|
|
|
144
157
|
"Commands:\n" +
|
|
145
158
|
" /help Show this help\n" +
|
|
146
159
|
" /status Show session info, model, phase, workspace\n" +
|
|
160
|
+
" /tasks Show task progress\n" +
|
|
147
161
|
" /clear Clear conversation history (keep workspace)\n" +
|
|
148
162
|
" /compact Summarize older messages to reduce context\n" +
|
|
149
163
|
" /sessions List all sessions\n" +
|
|
@@ -172,6 +186,13 @@ function App({ engine, config }) {
|
|
|
172
186
|
return true;
|
|
173
187
|
}
|
|
174
188
|
|
|
189
|
+
case "/tasks":
|
|
190
|
+
addMessage({
|
|
191
|
+
role: "system",
|
|
192
|
+
content: engineRef.current.taskManager.formatForDisplay(),
|
|
193
|
+
});
|
|
194
|
+
return true;
|
|
195
|
+
|
|
175
196
|
case "/clear":
|
|
176
197
|
engineRef.current.history = new ConversationHistory(engineRef.current.workspace.cwd);
|
|
177
198
|
setMessages([]);
|
|
@@ -323,6 +344,9 @@ function App({ engine, config }) {
|
|
|
323
344
|
// Welcome banner
|
|
324
345
|
showWelcome ? h(WelcomeBanner, { projectDir: config.projectDir }) : null,
|
|
325
346
|
|
|
347
|
+
// Task dashboard (ralph-loop)
|
|
348
|
+
taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
|
|
349
|
+
|
|
326
350
|
// Message history
|
|
327
351
|
...messages.map((msg, i) => {
|
|
328
352
|
if (msg.role === "user") {
|