krim 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- krim-0.3.0/LICENSE +21 -0
- krim-0.3.0/PKG-INFO +334 -0
- krim-0.3.0/README.md +300 -0
- krim-0.3.0/krim/__init__.py +3 -0
- krim-0.3.0/krim/__main__.py +584 -0
- krim-0.3.0/krim/agent.py +571 -0
- krim-0.3.0/krim/compaction.py +172 -0
- krim-0.3.0/krim/config.py +174 -0
- krim-0.3.0/krim/context.py +161 -0
- krim-0.3.0/krim/git.py +102 -0
- krim-0.3.0/krim/mcp.py +241 -0
- krim-0.3.0/krim/models/__init__.py +76 -0
- krim-0.3.0/krim/models/base.py +32 -0
- krim-0.3.0/krim/models/claude.py +106 -0
- krim-0.3.0/krim/models/openai.py +109 -0
- krim-0.3.0/krim/models/vertex.py +111 -0
- krim-0.3.0/krim/prompt.py +66 -0
- krim-0.3.0/krim/py.typed +0 -0
- krim-0.3.0/krim/retry.py +60 -0
- krim-0.3.0/krim/safety.py +62 -0
- krim-0.3.0/krim/skills.py +164 -0
- krim-0.3.0/krim/tools/__init__.py +29 -0
- krim-0.3.0/krim/tools/base.py +34 -0
- krim-0.3.0/krim/tools/bash.py +116 -0
- krim-0.3.0/krim/tools/edit.py +300 -0
- krim-0.3.0/krim/tools/glob.py +72 -0
- krim-0.3.0/krim/tools/grep.py +110 -0
- krim-0.3.0/krim/tools/read.py +111 -0
- krim-0.3.0/krim/tools/skill.py +80 -0
- krim-0.3.0/krim/tools/submit.py +44 -0
- krim-0.3.0/krim/tools/write.py +32 -0
- krim-0.3.0/krim/truncate.py +24 -0
- krim-0.3.0/krim/ui.py +134 -0
- krim-0.3.0/krim/worktree.py +148 -0
- krim-0.3.0/krim.egg-info/PKG-INFO +334 -0
- krim-0.3.0/krim.egg-info/SOURCES.txt +42 -0
- krim-0.3.0/krim.egg-info/dependency_links.txt +1 -0
- krim-0.3.0/krim.egg-info/entry_points.txt +2 -0
- krim-0.3.0/krim.egg-info/requires.txt +10 -0
- krim-0.3.0/krim.egg-info/top_level.txt +2 -0
- krim-0.3.0/krim_harbor/__init__.py +5 -0
- krim-0.3.0/krim_harbor/agent.py +429 -0
- krim-0.3.0/pyproject.toml +53 -0
- krim-0.3.0/setup.cfg +4 -0
krim-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 batteryhob
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
krim-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: krim
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Thin CLI agent. Trust the model, keep the harness light.
|
|
5
|
+
Author: batteryhob
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/batteryhob/KRIM
|
|
8
|
+
Project-URL: Repository, https://github.com/batteryhob/KRIM
|
|
9
|
+
Project-URL: Issues, https://github.com/batteryhob/KRIM/issues
|
|
10
|
+
Keywords: cli,agent,coding-agent,llm,anthropic,openai
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: anthropic[vertex]>=0.39.0
|
|
26
|
+
Requires-Dist: openai>=1.50.0
|
|
27
|
+
Requires-Dist: rich>=13.0.0
|
|
28
|
+
Requires-Dist: prompt_toolkit>=3.0.0
|
|
29
|
+
Provides-Extra: harbor
|
|
30
|
+
Requires-Dist: harbor>=0.1.0; extra == "harbor"
|
|
31
|
+
Provides-Extra: vertex
|
|
32
|
+
Requires-Dist: anthropic[vertex]>=0.39.0; extra == "vertex"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
<table align="center"><tr><td>
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
██╗ ██╗██████╗ ██╗███╗ ███╗
|
|
39
|
+
● ● ██║ ██╔╝██╔══██╗██║████╗ ████║
|
|
40
|
+
█████╔╝ ██████╔╝██║██╔████╔██║
|
|
41
|
+
▄████◣◢████▄ ██╔═██╗ ██╔══██╗██║██║╚██╔╝██║
|
|
42
|
+
◥▄████▀▀ ▀▀████▄◤ ██║ ██╗██║ ██║██║██║ ╚═╝ ██║
|
|
43
|
+
▀▀ ▀▀ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝╚═╝ ╚═╝
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
</td></tr></table>
|
|
47
|
+
|
|
48
|
+
<div align="center">
|
|
49
|
+
|
|
50
|
+
**Trust the model. Keep the harness light.**
|
|
51
|
+
|
|
52
|
+
[](https://www.tbench.ai/leaderboard/terminal-bench/2.0)
|
|
53
|
+
[](https://www.python.org/downloads/)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
*A lightweight CLI coding agent. 6 tools, single loop, no sub-agents.*
|
|
57
|
+
|
|
58
|
+
[Installation](#installation) • [Usage](#usage) • [Benchmark](#benchmark) • [Architecture](#architecture)
|
|
59
|
+
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Philosophy
|
|
65
|
+
|
|
66
|
+
| Principle | Description |
|
|
67
|
+
|-----------|-------------|
|
|
68
|
+
| **Trust the model** | Minimal system prompt. No scaffolding, no CoT wrappers, no sub-agents. Let the model decide. |
|
|
69
|
+
| **One file, one job** | Every module does exactly one thing. Largest file is 330 lines. Average under 100. |
|
|
70
|
+
| **Safe by default, always escapable** | deny > allow > ask. Every guardrail has an off switch. |
|
|
71
|
+
|
|
72
|
+
## Benchmark
|
|
73
|
+
|
|
74
|
+
<div align="center">
|
|
75
|
+
|
|
76
|
+
### Terminal-Bench 2.0
|
|
77
|
+
|
|
78
|
+
| Metric | Score |
|
|
79
|
+
|--------|-------|
|
|
80
|
+
| **Pass Rate** | 64/89 (71.9%) |
|
|
81
|
+
| **Model** | Claude Opus 4.6 |
|
|
82
|
+
| **Provider** | Vertex AI |
|
|
83
|
+
|
|
84
|
+
</div>
|
|
85
|
+
|
|
86
|
+
## Installation
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install -e .
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Requires Python 3.10+ and an API key for your chosen provider:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Claude (Anthropic)
|
|
96
|
+
export ANTHROPIC_API_KEY="sk-..."
|
|
97
|
+
|
|
98
|
+
# OpenAI
|
|
99
|
+
export OPENAI_API_KEY="sk-..."
|
|
100
|
+
|
|
101
|
+
# Vertex AI (Google Cloud - recommended for high-volume)
|
|
102
|
+
export VERTEXAI_LOCATION="us-east5"
|
|
103
|
+
export GOOGLE_CLOUD_PROJECT="your-project-id"
|
|
104
|
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Usage
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Run krim (shows provider selection menu)
|
|
111
|
+
krim
|
|
112
|
+
# Select provider:
|
|
113
|
+
# [1] claude (Anthropic Claude)
|
|
114
|
+
# [2] openai (OpenAI)
|
|
115
|
+
# [3] vertex_ai (Google Vertex AI)
|
|
116
|
+
|
|
117
|
+
# Single prompt (also shows provider menu)
|
|
118
|
+
krim "fix the failing test in test_api.py"
|
|
119
|
+
|
|
120
|
+
# Skip menu with --provider flag
|
|
121
|
+
krim --provider claude "refactor this"
|
|
122
|
+
krim --provider openai "add tests"
|
|
123
|
+
krim --provider vertex_ai "deploy to prod"
|
|
124
|
+
|
|
125
|
+
# Power user
|
|
126
|
+
krim --max-turns 20 "large refactor"
|
|
127
|
+
krim --skill deploy "ship it"
|
|
128
|
+
krim --auto-commit "fix and commit"
|
|
129
|
+
krim --no-safety "run anything"
|
|
130
|
+
krim --verbose "debug mode"
|
|
131
|
+
krim --log-dir ./logs "save conversation"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Ralph Wiggum Loop
|
|
135
|
+
|
|
136
|
+
Run the agent repeatedly until a condition passes. Each iteration starts fresh — state lives in git, not LLM memory.
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# Loop until pytest passes (max 20 iterations)
|
|
140
|
+
krim --loop --exit-on "pytest" "implement the PRD"
|
|
141
|
+
|
|
142
|
+
# Limit iterations
|
|
143
|
+
krim --loop --max-iterations 10 --exit-on "ruff check ." "fix all lint errors"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Parallel Worktrees
|
|
147
|
+
|
|
148
|
+
Leverage git worktrees to run multiple independent tasks simultaneously.
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# 3 agents work in parallel, auto-merge on completion
|
|
152
|
+
krim --parallel "add auth" "add payments" "add tests"
|
|
153
|
+
|
|
154
|
+
# Keep branches separate for manual review
|
|
155
|
+
krim --parallel --no-merge "feature A" "feature B"
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Tools
|
|
159
|
+
|
|
160
|
+
| Tool | Description |
|
|
161
|
+
|------|-------------|
|
|
162
|
+
| `bash` | Shell execution with persistent cwd. Safety rules enforced. |
|
|
163
|
+
| `read` | File reading with line numbers. Supports offset/limit for large files. |
|
|
164
|
+
| `write` | File writing with auto parent directory creation. |
|
|
165
|
+
| `edit` | String replacement with exact → whitespace-normalized → fuzzy matching (0.8 threshold). |
|
|
166
|
+
| `grep` | Regex content search. Uses ripgrep when available, Python fallback otherwise. |
|
|
167
|
+
| `glob` | File pattern matching (e.g., `**/*.py`). Results sorted by modification time. |
|
|
168
|
+
|
|
169
|
+
## Architecture
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
krim/
|
|
173
|
+
├── __main__.py # CLI entry, arg parsing, interactive/loop/parallel modes
|
|
174
|
+
├── agent.py # Core agent loop, doom loop detection, stats
|
|
175
|
+
├── ui.py # Banner, prompt_toolkit input
|
|
176
|
+
├── prompt.py # System prompt builder
|
|
177
|
+
├── config.py # Hierarchical config loader
|
|
178
|
+
├── context.py # Environment context (cwd, git, file tree)
|
|
179
|
+
├── safety.py # Bash command safety rules
|
|
180
|
+
├── compaction.py # Token tracking, conversation compression
|
|
181
|
+
├── truncate.py # Output truncation (head/tail)
|
|
182
|
+
├── retry.py # Exponential backoff
|
|
183
|
+
├── git.py # Auto-commit, undo, selective staging
|
|
184
|
+
├── worktree.py # Git worktree management for parallel execution
|
|
185
|
+
├── skills.py # Skill discovery and injection
|
|
186
|
+
├── mcp.py # MCP client (stdio, JSON-RPC)
|
|
187
|
+
├── models/
|
|
188
|
+
│ ├── base.py # Abstract Model, ToolCall, ModelResponse
|
|
189
|
+
│ ├── claude.py # Anthropic Claude provider
|
|
190
|
+
│ ├── openai.py # OpenAI provider
|
|
191
|
+
│ └── vertex.py # Vertex AI provider
|
|
192
|
+
└── tools/
|
|
193
|
+
├── base.py # Abstract Tool with auto schema generation
|
|
194
|
+
├── bash.py # Shell execution, persistent cwd
|
|
195
|
+
├── read.py # File reading with line numbers
|
|
196
|
+
├── write.py # File writing
|
|
197
|
+
├── edit.py # Fuzzy string replacement
|
|
198
|
+
├── grep.py # Regex search (ripgrep + fallback)
|
|
199
|
+
└── glob.py # File pattern matching
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Loop Behavior
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
User Input
|
|
206
|
+
↓
|
|
207
|
+
[System Prompt + Context + History]
|
|
208
|
+
↓
|
|
209
|
+
Model → Text response? → Done
|
|
210
|
+
→ Tool calls? → Execute each → Append results → Loop
|
|
211
|
+
→ Doom loop 1st? → Recovery nudge (keep tools, suggest different strategy)
|
|
212
|
+
→ Doom loop 2nd? → Force exit with summary
|
|
213
|
+
→ Near max turns? → Inject last-turn warning
|
|
214
|
+
→ Max turns hit? → Force exit with wrap-up request
|
|
215
|
+
→ Token limit? → Compress history and continue
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Single loop. No routing, no planning phase, no sub-agents.
|
|
219
|
+
|
|
220
|
+
### Double Confirmation
|
|
221
|
+
|
|
222
|
+
When the agent responds with text only (no tool calls), KRIM asks for confirmation before exiting. This prevents premature exits when the agent says "I'll do X" but doesn't actually call a tool.
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
Agent: "I've completed the task."
|
|
226
|
+
KRIM: "Confirm by saying 'Task complete' or continue working."
|
|
227
|
+
Agent: "Task complete."
|
|
228
|
+
KRIM: [exits]
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
If the agent says "I haven't finished yet" or "Let me continue", KRIM resets and lets it keep working.
|
|
232
|
+
|
|
233
|
+
## Configuration
|
|
234
|
+
|
|
235
|
+
Hierarchy: `~/.krim/` (global) < `.krim/` (project) < CLI flags.
|
|
236
|
+
|
|
237
|
+
```
|
|
238
|
+
.krim/
|
|
239
|
+
├── config.json # Settings
|
|
240
|
+
├── KRIM.md # Instructions injected into system prompt
|
|
241
|
+
├── mcp.json # MCP server config
|
|
242
|
+
├── rules/
|
|
243
|
+
│ └── *.md # Additional rules
|
|
244
|
+
└── skills/
|
|
245
|
+
└── <name>/
|
|
246
|
+
└── SKILL.md # Skill instructions
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### config.json
|
|
250
|
+
|
|
251
|
+
```json
|
|
252
|
+
{
|
|
253
|
+
"max_turns": 10,
|
|
254
|
+
"auto_commit": false,
|
|
255
|
+
"ask_by_default": true,
|
|
256
|
+
"allow_commands": ["ls", "cat", "grep", "git status", "git diff", "pytest"],
|
|
257
|
+
"deny_patterns": ["rm -rf /", "> /dev/sda", "mkfs."],
|
|
258
|
+
"max_tokens": 16384,
|
|
259
|
+
"bash_timeout": 120,
|
|
260
|
+
"token_limit": 120000
|
|
261
|
+
}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
**Default models** (used when `--model` is not specified):
|
|
265
|
+
| Provider | Default Model |
|
|
266
|
+
|----------|---------------|
|
|
267
|
+
| claude | claude-opus-4-6 |
|
|
268
|
+
| openai | gpt-5.2 |
|
|
269
|
+
| vertex_ai | claude-opus-4-6 |
|
|
270
|
+
|
|
271
|
+
### Skills
|
|
272
|
+
|
|
273
|
+
Reusable prompt packages. Activate with `--skill <name>`:
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
krim --skill deploy "ship the new feature"
|
|
277
|
+
krim --list-skills
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**SKILL.md format** ([Anthropic standard](https://github.com/anthropics/skills)):
|
|
281
|
+
|
|
282
|
+
```markdown
|
|
283
|
+
---
|
|
284
|
+
name: my-skill
|
|
285
|
+
description: What this skill does and when to use it
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
# Skill Content
|
|
289
|
+
|
|
290
|
+
Instructions in markdown...
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### MCP
|
|
294
|
+
|
|
295
|
+
Connect external tool servers via [Model Context Protocol](https://modelcontextprotocol.io):
|
|
296
|
+
|
|
297
|
+
```json
|
|
298
|
+
{
|
|
299
|
+
"mcpServers": {
|
|
300
|
+
"web-search": {
|
|
301
|
+
"command": ["node", "search-server/index.js"],
|
|
302
|
+
"env": { "API_KEY": "..." }
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Safety
|
|
309
|
+
|
|
310
|
+
Commands pass through 3-stage checks: **deny > allow > ask**.
|
|
311
|
+
|
|
312
|
+
- **deny**: Substring match on dangerous patterns. Blocked immediately.
|
|
313
|
+
- **allow**: Word-boundary match on safe prefixes. Auto-approved.
|
|
314
|
+
- **ask**: Everything else prompts for user confirmation.
|
|
315
|
+
|
|
316
|
+
Disable with `--no-safety` or `"ask_by_default": false` in config.
|
|
317
|
+
|
|
318
|
+
## Interactive Commands
|
|
319
|
+
|
|
320
|
+
```
|
|
321
|
+
/help Show command help
|
|
322
|
+
/tokens Token usage and stats
|
|
323
|
+
/compact Force context compression
|
|
324
|
+
/config Show current configuration
|
|
325
|
+
/undo Revert last krim commit
|
|
326
|
+
/verbose Toggle verbose output
|
|
327
|
+
exit Quit
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
Arrow keys for history. Tab for command completion.
|
|
331
|
+
|
|
332
|
+
## License
|
|
333
|
+
|
|
334
|
+
MIT
|
krim-0.3.0/README.md
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
<table align="center"><tr><td>
|
|
2
|
+
|
|
3
|
+
```
|
|
4
|
+
██╗ ██╗██████╗ ██╗███╗ ███╗
|
|
5
|
+
● ● ██║ ██╔╝██╔══██╗██║████╗ ████║
|
|
6
|
+
█████╔╝ ██████╔╝██║██╔████╔██║
|
|
7
|
+
▄████◣◢████▄ ██╔═██╗ ██╔══██╗██║██║╚██╔╝██║
|
|
8
|
+
◥▄████▀▀ ▀▀████▄◤ ██║ ██╗██║ ██║██║██║ ╚═╝ ██║
|
|
9
|
+
▀▀ ▀▀ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝╚═╝ ╚═╝
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
</td></tr></table>
|
|
13
|
+
|
|
14
|
+
<div align="center">
|
|
15
|
+
|
|
16
|
+
**Trust the model. Keep the harness light.**
|
|
17
|
+
|
|
18
|
+
[](https://www.tbench.ai/leaderboard/terminal-bench/2.0)
|
|
19
|
+
[](https://www.python.org/downloads/)
|
|
20
|
+
[](LICENSE)
|
|
21
|
+
|
|
22
|
+
*A lightweight CLI coding agent. 6 tools, single loop, no sub-agents.*
|
|
23
|
+
|
|
24
|
+
[Installation](#installation) • [Usage](#usage) • [Benchmark](#benchmark) • [Architecture](#architecture)
|
|
25
|
+
|
|
26
|
+
</div>
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Philosophy
|
|
31
|
+
|
|
32
|
+
| Principle | Description |
|
|
33
|
+
|-----------|-------------|
|
|
34
|
+
| **Trust the model** | Minimal system prompt. No scaffolding, no CoT wrappers, no sub-agents. Let the model decide. |
|
|
35
|
+
| **One file, one job** | Every module does exactly one thing. Largest file is 330 lines. Average under 100. |
|
|
36
|
+
| **Safe by default, always escapable** | deny > allow > ask. Every guardrail has an off switch. |
|
|
37
|
+
|
|
38
|
+
## Benchmark
|
|
39
|
+
|
|
40
|
+
<div align="center">
|
|
41
|
+
|
|
42
|
+
### Terminal-Bench 2.0
|
|
43
|
+
|
|
44
|
+
| Metric | Score |
|
|
45
|
+
|--------|-------|
|
|
46
|
+
| **Pass Rate** | 64/89 (71.9%) |
|
|
47
|
+
| **Model** | Claude Opus 4.6 |
|
|
48
|
+
| **Provider** | Vertex AI |
|
|
49
|
+
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Requires Python 3.10+ and an API key for your chosen provider:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Claude (Anthropic)
|
|
62
|
+
export ANTHROPIC_API_KEY="sk-..."
|
|
63
|
+
|
|
64
|
+
# OpenAI
|
|
65
|
+
export OPENAI_API_KEY="sk-..."
|
|
66
|
+
|
|
67
|
+
# Vertex AI (Google Cloud - recommended for high-volume)
|
|
68
|
+
export VERTEXAI_LOCATION="us-east5"
|
|
69
|
+
export GOOGLE_CLOUD_PROJECT="your-project-id"
|
|
70
|
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Usage
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Run krim (shows provider selection menu)
|
|
77
|
+
krim
|
|
78
|
+
# Select provider:
|
|
79
|
+
# [1] claude (Anthropic Claude)
|
|
80
|
+
# [2] openai (OpenAI)
|
|
81
|
+
# [3] vertex_ai (Google Vertex AI)
|
|
82
|
+
|
|
83
|
+
# Single prompt (also shows provider menu)
|
|
84
|
+
krim "fix the failing test in test_api.py"
|
|
85
|
+
|
|
86
|
+
# Skip menu with --provider flag
|
|
87
|
+
krim --provider claude "refactor this"
|
|
88
|
+
krim --provider openai "add tests"
|
|
89
|
+
krim --provider vertex_ai "deploy to prod"
|
|
90
|
+
|
|
91
|
+
# Power user
|
|
92
|
+
krim --max-turns 20 "large refactor"
|
|
93
|
+
krim --skill deploy "ship it"
|
|
94
|
+
krim --auto-commit "fix and commit"
|
|
95
|
+
krim --no-safety "run anything"
|
|
96
|
+
krim --verbose "debug mode"
|
|
97
|
+
krim --log-dir ./logs "save conversation"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Ralph Wiggum Loop
|
|
101
|
+
|
|
102
|
+
Run the agent repeatedly until a condition passes. Each iteration starts fresh — state lives in git, not LLM memory.
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# Loop until pytest passes (max 20 iterations)
|
|
106
|
+
krim --loop --exit-on "pytest" "implement the PRD"
|
|
107
|
+
|
|
108
|
+
# Limit iterations
|
|
109
|
+
krim --loop --max-iterations 10 --exit-on "ruff check ." "fix all lint errors"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Parallel Worktrees
|
|
113
|
+
|
|
114
|
+
Leverage git worktrees to run multiple independent tasks simultaneously.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# 3 agents work in parallel, auto-merge on completion
|
|
118
|
+
krim --parallel "add auth" "add payments" "add tests"
|
|
119
|
+
|
|
120
|
+
# Keep branches separate for manual review
|
|
121
|
+
krim --parallel --no-merge "feature A" "feature B"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Tools
|
|
125
|
+
|
|
126
|
+
| Tool | Description |
|
|
127
|
+
|------|-------------|
|
|
128
|
+
| `bash` | Shell execution with persistent cwd. Safety rules enforced. |
|
|
129
|
+
| `read` | File reading with line numbers. Supports offset/limit for large files. |
|
|
130
|
+
| `write` | File writing with auto parent directory creation. |
|
|
131
|
+
| `edit` | String replacement with exact → whitespace-normalized → fuzzy matching (0.8 threshold). |
|
|
132
|
+
| `grep` | Regex content search. Uses ripgrep when available, Python fallback otherwise. |
|
|
133
|
+
| `glob` | File pattern matching (e.g., `**/*.py`). Results sorted by modification time. |
|
|
134
|
+
|
|
135
|
+
## Architecture
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
krim/
|
|
139
|
+
├── __main__.py # CLI entry, arg parsing, interactive/loop/parallel modes
|
|
140
|
+
├── agent.py # Core agent loop, doom loop detection, stats
|
|
141
|
+
├── ui.py # Banner, prompt_toolkit input
|
|
142
|
+
├── prompt.py # System prompt builder
|
|
143
|
+
├── config.py # Hierarchical config loader
|
|
144
|
+
├── context.py # Environment context (cwd, git, file tree)
|
|
145
|
+
├── safety.py # Bash command safety rules
|
|
146
|
+
├── compaction.py # Token tracking, conversation compression
|
|
147
|
+
├── truncate.py # Output truncation (head/tail)
|
|
148
|
+
├── retry.py # Exponential backoff
|
|
149
|
+
├── git.py # Auto-commit, undo, selective staging
|
|
150
|
+
├── worktree.py # Git worktree management for parallel execution
|
|
151
|
+
├── skills.py # Skill discovery and injection
|
|
152
|
+
├── mcp.py # MCP client (stdio, JSON-RPC)
|
|
153
|
+
├── models/
|
|
154
|
+
│ ├── base.py # Abstract Model, ToolCall, ModelResponse
|
|
155
|
+
│ ├── claude.py # Anthropic Claude provider
|
|
156
|
+
│ ├── openai.py # OpenAI provider
|
|
157
|
+
│ └── vertex.py # Vertex AI provider
|
|
158
|
+
└── tools/
|
|
159
|
+
├── base.py # Abstract Tool with auto schema generation
|
|
160
|
+
├── bash.py # Shell execution, persistent cwd
|
|
161
|
+
├── read.py # File reading with line numbers
|
|
162
|
+
├── write.py # File writing
|
|
163
|
+
├── edit.py # Fuzzy string replacement
|
|
164
|
+
├── grep.py # Regex search (ripgrep + fallback)
|
|
165
|
+
└── glob.py # File pattern matching
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Loop Behavior
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
User Input
|
|
172
|
+
↓
|
|
173
|
+
[System Prompt + Context + History]
|
|
174
|
+
↓
|
|
175
|
+
Model → Text response? → Done
|
|
176
|
+
→ Tool calls? → Execute each → Append results → Loop
|
|
177
|
+
→ Doom loop 1st? → Recovery nudge (keep tools, suggest different strategy)
|
|
178
|
+
→ Doom loop 2nd? → Force exit with summary
|
|
179
|
+
→ Near max turns? → Inject last-turn warning
|
|
180
|
+
→ Max turns hit? → Force exit with wrap-up request
|
|
181
|
+
→ Token limit? → Compress history and continue
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Single loop. No routing, no planning phase, no sub-agents.
|
|
185
|
+
|
|
186
|
+
### Double Confirmation
|
|
187
|
+
|
|
188
|
+
When the agent responds with text only (no tool calls), KRIM asks for confirmation before exiting. This prevents premature exits when the agent says "I'll do X" but doesn't actually call a tool.
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
Agent: "I've completed the task."
|
|
192
|
+
KRIM: "Confirm by saying 'Task complete' or continue working."
|
|
193
|
+
Agent: "Task complete."
|
|
194
|
+
KRIM: [exits]
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
If the agent says "I haven't finished yet" or "Let me continue", KRIM resets and lets it keep working.
|
|
198
|
+
|
|
199
|
+
## Configuration
|
|
200
|
+
|
|
201
|
+
Hierarchy: `~/.krim/` (global) < `.krim/` (project) < CLI flags.
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
.krim/
|
|
205
|
+
├── config.json # Settings
|
|
206
|
+
├── KRIM.md # Instructions injected into system prompt
|
|
207
|
+
├── mcp.json # MCP server config
|
|
208
|
+
├── rules/
|
|
209
|
+
│ └── *.md # Additional rules
|
|
210
|
+
└── skills/
|
|
211
|
+
└── <name>/
|
|
212
|
+
└── SKILL.md # Skill instructions
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### config.json
|
|
216
|
+
|
|
217
|
+
```json
|
|
218
|
+
{
|
|
219
|
+
"max_turns": 10,
|
|
220
|
+
"auto_commit": false,
|
|
221
|
+
"ask_by_default": true,
|
|
222
|
+
"allow_commands": ["ls", "cat", "grep", "git status", "git diff", "pytest"],
|
|
223
|
+
"deny_patterns": ["rm -rf /", "> /dev/sda", "mkfs."],
|
|
224
|
+
"max_tokens": 16384,
|
|
225
|
+
"bash_timeout": 120,
|
|
226
|
+
"token_limit": 120000
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
**Default models** (used when `--model` is not specified):
|
|
231
|
+
| Provider | Default Model |
|
|
232
|
+
|----------|---------------|
|
|
233
|
+
| claude | claude-opus-4-6 |
|
|
234
|
+
| openai | gpt-5.2 |
|
|
235
|
+
| vertex_ai | claude-opus-4-6 |
|
|
236
|
+
|
|
237
|
+
### Skills
|
|
238
|
+
|
|
239
|
+
Reusable prompt packages. Activate with `--skill <name>`:
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
krim --skill deploy "ship the new feature"
|
|
243
|
+
krim --list-skills
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**SKILL.md format** ([Anthropic standard](https://github.com/anthropics/skills)):
|
|
247
|
+
|
|
248
|
+
```markdown
|
|
249
|
+
---
|
|
250
|
+
name: my-skill
|
|
251
|
+
description: What this skill does and when to use it
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
# Skill Content
|
|
255
|
+
|
|
256
|
+
Instructions in markdown...
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### MCP
|
|
260
|
+
|
|
261
|
+
Connect external tool servers via [Model Context Protocol](https://modelcontextprotocol.io):
|
|
262
|
+
|
|
263
|
+
```json
|
|
264
|
+
{
|
|
265
|
+
"mcpServers": {
|
|
266
|
+
"web-search": {
|
|
267
|
+
"command": ["node", "search-server/index.js"],
|
|
268
|
+
"env": { "API_KEY": "..." }
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Safety
|
|
275
|
+
|
|
276
|
+
Commands pass through 3-stage checks: **deny > allow > ask**.
|
|
277
|
+
|
|
278
|
+
- **deny**: Substring match on dangerous patterns. Blocked immediately.
|
|
279
|
+
- **allow**: Word-boundary match on safe prefixes. Auto-approved.
|
|
280
|
+
- **ask**: Everything else prompts for user confirmation.
|
|
281
|
+
|
|
282
|
+
Disable with `--no-safety` or `"ask_by_default": false` in config.
|
|
283
|
+
|
|
284
|
+
## Interactive Commands
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
/help Show command help
|
|
288
|
+
/tokens Token usage and stats
|
|
289
|
+
/compact Force context compression
|
|
290
|
+
/config Show current configuration
|
|
291
|
+
/undo Revert last krim commit
|
|
292
|
+
/verbose Toggle verbose output
|
|
293
|
+
exit Quit
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Arrow keys for history. Tab for command completion.
|
|
297
|
+
|
|
298
|
+
## License
|
|
299
|
+
|
|
300
|
+
MIT
|