kadmon 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kadmon-0.1.0/.github/workflows/ci.yml +16 -0
- kadmon-0.1.0/.github/workflows/release.yml +39 -0
- kadmon-0.1.0/.gitignore +16 -0
- kadmon-0.1.0/AGENTS.md +111 -0
- kadmon-0.1.0/LICENSE +21 -0
- kadmon-0.1.0/PKG-INFO +232 -0
- kadmon-0.1.0/README.md +196 -0
- kadmon-0.1.0/dev +54 -0
- kadmon-0.1.0/docs/autonomous-context-plan.md +440 -0
- kadmon-0.1.0/kadmon/__init__.py +1 -0
- kadmon-0.1.0/kadmon/__main__.py +5 -0
- kadmon-0.1.0/kadmon/agent/__init__.py +1 -0
- kadmon-0.1.0/kadmon/agent/backtrack.py +72 -0
- kadmon-0.1.0/kadmon/agent/checkpoint.py +71 -0
- kadmon-0.1.0/kadmon/agent/context.py +52 -0
- kadmon-0.1.0/kadmon/agent/handoff.py +175 -0
- kadmon-0.1.0/kadmon/agent/loop.py +255 -0
- kadmon-0.1.0/kadmon/agent/planner.py +92 -0
- kadmon-0.1.0/kadmon/agent/prompts.py +58 -0
- kadmon-0.1.0/kadmon/agent/pruner.py +102 -0
- kadmon-0.1.0/kadmon/agent/recovery.py +52 -0
- kadmon-0.1.0/kadmon/cli.py +249 -0
- kadmon-0.1.0/kadmon/config.py +24 -0
- kadmon-0.1.0/kadmon/eval/__init__.py +4 -0
- kadmon-0.1.0/kadmon/eval/harness.py +127 -0
- kadmon-0.1.0/kadmon/eval/polyglot.py +493 -0
- kadmon-0.1.0/kadmon/human/__init__.py +8 -0
- kadmon-0.1.0/kadmon/human/channel.py +220 -0
- kadmon-0.1.0/kadmon/index/__init__.py +4 -0
- kadmon-0.1.0/kadmon/index/db.py +129 -0
- kadmon-0.1.0/kadmon/index/parser.py +195 -0
- kadmon-0.1.0/kadmon/index/updater.py +63 -0
- kadmon-0.1.0/kadmon/memory/__init__.py +3 -0
- kadmon-0.1.0/kadmon/memory/librarian.py +153 -0
- kadmon-0.1.0/kadmon/memory/read_cache.py +23 -0
- kadmon-0.1.0/kadmon/memory/session_tracker.py +154 -0
- kadmon-0.1.0/kadmon/providers/__init__.py +7 -0
- kadmon-0.1.0/kadmon/providers/anthropic.py +69 -0
- kadmon-0.1.0/kadmon/providers/base.py +33 -0
- kadmon-0.1.0/kadmon/providers/bedrock.py +70 -0
- kadmon-0.1.0/kadmon/tools/__init__.py +40 -0
- kadmon-0.1.0/kadmon/tools/ask_human.py +71 -0
- kadmon-0.1.0/kadmon/tools/base.py +43 -0
- kadmon-0.1.0/kadmon/tools/file_io.py +218 -0
- kadmon-0.1.0/kadmon/tools/plan.py +123 -0
- kadmon-0.1.0/kadmon/tools/references.py +127 -0
- kadmon-0.1.0/kadmon/tools/search.py +65 -0
- kadmon-0.1.0/kadmon/tools/shell.py +43 -0
- kadmon-0.1.0/kadmon/tools/skeleton.py +118 -0
- kadmon-0.1.0/kadmon/tools/submit.py +32 -0
- kadmon-0.1.0/npm/README.md +35 -0
- kadmon-0.1.0/npm/bin/kadmon.mjs +89 -0
- kadmon-0.1.0/npm/package.json +23 -0
- kadmon-0.1.0/publish +49 -0
- kadmon-0.1.0/pyproject.toml +59 -0
- kadmon-0.1.0/tests/__init__.py +0 -0
- kadmon-0.1.0/tests/test_agent.py +82 -0
- kadmon-0.1.0/tests/test_eval.py +33 -0
- kadmon-0.1.0/tests/test_handoff.py +119 -0
- kadmon-0.1.0/tests/test_human.py +130 -0
- kadmon-0.1.0/tests/test_memory.py +168 -0
- kadmon-0.1.0/tests/test_planning.py +236 -0
- kadmon-0.1.0/tests/test_pruner.py +126 -0
- kadmon-0.1.0/tests/test_read_cache.py +79 -0
- kadmon-0.1.0/tests/test_recovery.py +52 -0
- kadmon-0.1.0/tests/test_references.py +80 -0
- kadmon-0.1.0/tests/test_skeleton.py +45 -0
- kadmon-0.1.0/tests/test_tools.py +134 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
on: [push, pull_request]
|
|
3
|
+
jobs:
|
|
4
|
+
test:
|
|
5
|
+
runs-on: ubuntu-latest
|
|
6
|
+
strategy:
|
|
7
|
+
matrix:
|
|
8
|
+
python-version: ['3.11', '3.12']
|
|
9
|
+
steps:
|
|
10
|
+
- uses: actions/checkout@v4
|
|
11
|
+
- uses: actions/setup-python@v5
|
|
12
|
+
with:
|
|
13
|
+
python-version: ${{ matrix.python-version }}
|
|
14
|
+
- run: pip install -e '.[dev]'
|
|
15
|
+
- run: ruff check kadmon/ tests/
|
|
16
|
+
- run: pytest tests/ -v
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish-pypi:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- run: pip install build
|
|
19
|
+
- run: python -m build
|
|
20
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
21
|
+
|
|
22
|
+
publish-npm:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
permissions:
|
|
25
|
+
contents: read
|
|
26
|
+
id-token: write
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
- uses: actions/setup-node@v4
|
|
30
|
+
with:
|
|
31
|
+
node-version: "22"
|
|
32
|
+
registry-url: "https://registry.npmjs.org"
|
|
33
|
+
- name: Sync version from tag
|
|
34
|
+
run: |
|
|
35
|
+
VERSION=${GITHUB_REF#refs/tags/v}
|
|
36
|
+
cd npm
|
|
37
|
+
npm version $VERSION --no-git-tag-version
|
|
38
|
+
- name: Publish with provenance
|
|
39
|
+
run: cd npm && npm publish --provenance --access public
|
kadmon-0.1.0/.gitignore
ADDED
kadmon-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Guidelines for AI agents contributing to this repository.
|
|
4
|
+
|
|
5
|
+
## Build & Test Loop
|
|
6
|
+
|
|
7
|
+
Every change must follow this cycle:
|
|
8
|
+
|
|
9
|
+
1. **Understand** — Read relevant code before modifying. Use `grep_search` or `find_symbols` to locate what you need. Never guess at file contents.
|
|
10
|
+
2. **Implement** — Make minimal, focused changes. One concern per edit.
|
|
11
|
+
3. **Lint** — Run `ruff check kadmon/ tests/` and fix any issues.
|
|
12
|
+
4. **Test** — Run `pytest tests/ -v`. All tests must pass. If you added new functionality, add tests for it.
|
|
13
|
+
5. **Commit** — Use conventional commits (see below). Only commit when lint + tests pass.
|
|
14
|
+
|
|
15
|
+
Do not skip steps. Do not commit broken code.
|
|
16
|
+
|
|
17
|
+
## Development Setup
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install -e ".[dev]"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Commands
|
|
24
|
+
|
|
25
|
+
| Task | Command |
|
|
26
|
+
|------|---------|
|
|
27
|
+
| Lint | `ruff check kadmon/ tests/` |
|
|
28
|
+
| Lint fix | `ruff check --fix kadmon/ tests/` |
|
|
29
|
+
| Test | `pytest tests/ -v` |
|
|
30
|
+
| Run agent | `kadmon run --task "..." --repo /path/to/repo` |
|
|
31
|
+
| Run eval | `kadmon eval --dataset instances.json --limit 10` |
|
|
32
|
+
|
|
33
|
+
## Commit Convention
|
|
34
|
+
|
|
35
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/):
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
type(scope): description
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`, `ci`
|
|
42
|
+
|
|
43
|
+
Scopes: `agent`, `tools`, `providers`, `eval`, `index`, `memory`, `cli`
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
- `feat(tools): add grep_search with ripgrep fallback`
|
|
47
|
+
- `fix(agent): prevent infinite loop on repeated tool errors`
|
|
48
|
+
- `test(tools): add edit_file edge case coverage`
|
|
49
|
+
- `docs: update README with eval instructions`
|
|
50
|
+
|
|
51
|
+
Rules:
|
|
52
|
+
- One logical change per commit
|
|
53
|
+
- Implementation + its direct tests belong in the same commit
|
|
54
|
+
- Keep commits atomic — each should build and pass tests independently
|
|
55
|
+
- Never bundle unrelated changes
|
|
56
|
+
|
|
57
|
+
## Code Style
|
|
58
|
+
|
|
59
|
+
- Python 3.11+, type hints on all public functions
|
|
60
|
+
- Use `pathlib.Path` for filesystem operations
|
|
61
|
+
- Use dataclasses or pydantic models for structured data
|
|
62
|
+
- Keep functions short (<40 lines). Extract helpers.
|
|
63
|
+
- Error messages should be actionable (tell the user what to do next)
|
|
64
|
+
- No `# type: ignore`, no `Any` unless unavoidable
|
|
65
|
+
|
|
66
|
+
## Architecture Rules
|
|
67
|
+
|
|
68
|
+
- **Tools** are the agent's interface to the world. Optimize tool output for LLM consumption (concise, structured, actionable errors).
|
|
69
|
+
- **Providers** are stateless adapters. No business logic in providers.
|
|
70
|
+
- **Agent loop** is single-threaded ReAct. Keep it simple.
|
|
71
|
+
- **No frameworks** (no LangChain, no LangGraph). Use provider SDKs directly.
|
|
72
|
+
- All paths resolved relative to `repo_root`. Validate path escaping.
|
|
73
|
+
|
|
74
|
+
## Adding a New Tool
|
|
75
|
+
|
|
76
|
+
1. Create `kadmon/tools/your_tool.py` with a class extending `Tool`
|
|
77
|
+
2. Define `name`, `description`, `parameters` (JSON Schema)
|
|
78
|
+
3. Implement `execute(**kwargs) -> ToolResult`
|
|
79
|
+
4. Register in `kadmon/tools/__init__.py` inside `create_default_registry()`
|
|
80
|
+
5. Add tests in `tests/test_tools.py`
|
|
81
|
+
6. Run lint + tests before committing
|
|
82
|
+
|
|
83
|
+
## Adding a New Provider
|
|
84
|
+
|
|
85
|
+
1. Create `kadmon/providers/your_provider.py` implementing `LLMProvider` protocol
|
|
86
|
+
2. Handle message format conversion (internal ↔ provider-specific)
|
|
87
|
+
3. Add retry logic for transient errors
|
|
88
|
+
4. Register in provider factory
|
|
89
|
+
5. Add tests with mocked API responses
|
|
90
|
+
|
|
91
|
+
## File Structure
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
kadmon/
|
|
95
|
+
├── agent/ # Core loop, context management, planning
|
|
96
|
+
├── providers/ # LLM provider implementations
|
|
97
|
+
├── tools/ # Agent tools (file I/O, search, shell, etc.)
|
|
98
|
+
├── eval/ # SWE-bench evaluation harness
|
|
99
|
+
├── index/ # Tree-sitter symbol index (SQLite)
|
|
100
|
+
├── memory/ # Cross-session memory and handoff
|
|
101
|
+
└── cli.py # CLI entry point
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## What Not To Do
|
|
105
|
+
|
|
106
|
+
- Don't add dependencies without justification
|
|
107
|
+
- Don't add async unless the specific module requires it
|
|
108
|
+
- Don't modify the core loop without understanding the full message flow
|
|
109
|
+
- Don't delete or skip tests to make the build pass
|
|
110
|
+
- Don't leave debug prints or commented-out code
|
|
111
|
+
- Don't make changes outside the scope of your task
|
kadmon-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ayuan153
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
kadmon-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kadmon
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An autonomous coding agent that manages its own context across sessions
|
|
5
|
+
Project-URL: Homepage, https://github.com/ayuan153/kadmon
|
|
6
|
+
Project-URL: Repository, https://github.com/ayuan153/kadmon
|
|
7
|
+
Project-URL: Issues, https://github.com/ayuan153/kadmon/issues
|
|
8
|
+
Author: ayuan153
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,ai,autonomous,coding,llm
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: anthropic>=0.40.0
|
|
23
|
+
Requires-Dist: click>=8.0.0
|
|
24
|
+
Requires-Dist: openai>=1.50.0
|
|
25
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
|
+
Requires-Dist: rich>=13.0.0
|
|
27
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
28
|
+
Requires-Dist: tree-sitter-python>=0.23.0
|
|
29
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
30
|
+
Requires-Dist: tree-sitter>=0.23.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.5.0; extra == 'dev'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# Kadmon
|
|
38
|
+
|
|
39
|
+
An autonomous coding agent that manages its own context, asks clarifying questions, and hands off between sessions without human intervention. Scores 100% on Aider Polyglot (Python) benchmark.
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install kadmon
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Getting Started
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Interactive setup — picks your provider, configures credentials, tests the connection
|
|
51
|
+
kadmon init
|
|
52
|
+
|
|
53
|
+
# Run on a repo
|
|
54
|
+
cd your-project
|
|
55
|
+
kadmon run --task "Fix the failing test in test_auth.py"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
`kadmon init` walks you through:
|
|
59
|
+
1. Choose provider (Bedrock, Anthropic, OpenAI)
|
|
60
|
+
2. Configure credentials (AWS profile, API key, etc.)
|
|
61
|
+
3. Test the connection
|
|
62
|
+
4. Save to `.kadmon/config.toml`
|
|
63
|
+
|
|
64
|
+
### Manual Provider Setup
|
|
65
|
+
|
|
66
|
+
If you prefer to skip `kadmon init`:
|
|
67
|
+
|
|
68
|
+
**AWS Bedrock** (default):
|
|
69
|
+
```bash
|
|
70
|
+
# Any standard AWS credential method works (SSO, env vars, profiles)
|
|
71
|
+
export AWS_PROFILE=your-profile
|
|
72
|
+
export AWS_REGION=us-east-1
|
|
73
|
+
kadmon run --task "..." --provider bedrock --model us.anthropic.claude-sonnet-4-6
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Anthropic Direct**:
|
|
77
|
+
```bash
|
|
78
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
79
|
+
kadmon run --task "..." --provider anthropic --model claude-sonnet-4-20250514
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**OpenAI**:
|
|
83
|
+
```bash
|
|
84
|
+
export OPENAI_API_KEY=sk-...
|
|
85
|
+
kadmon run --task "..." --provider openai --model gpt-4o
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## What Makes Kadmon Different
|
|
89
|
+
|
|
90
|
+
Most coding agents are "very talented junior engineers" — they need constant supervision and context management. Kadmon is designed to be piloted like a **team lead manages a senior engineer**:
|
|
91
|
+
|
|
92
|
+
1. **No guessing** — asks clarifying questions when requirements are ambiguous (not for permission — for correctness)
|
|
93
|
+
2. **Rock climbing** — verifies each step before moving to the next, never sprints into the void
|
|
94
|
+
3. **Self-managing context** — detects when its context is degrading, writes a handoff doc, and continues in a fresh session automatically
|
|
95
|
+
|
|
96
|
+
### Autonomous Context Management
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
.kadmon/
|
|
100
|
+
├── config.toml # Provider config, preferences
|
|
101
|
+
├── library/ # Persistent knowledge (survives across sessions)
|
|
102
|
+
│ ├── architecture.md # Project structure notes
|
|
103
|
+
│ ├── conventions.md # Patterns, gotchas
|
|
104
|
+
│ └── tasks/current.md # Active task state
|
|
105
|
+
├── session.json # What's in flight right now
|
|
106
|
+
├── handoffs/ # Handoff docs (context continuity)
|
|
107
|
+
└── symbols.db # Code structure index (tree-sitter)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The agent automatically:
|
|
111
|
+
- Loads relevant library context on startup (cold start)
|
|
112
|
+
- Saves learnings after each completed step
|
|
113
|
+
- Detects context degradation (token budget, loops, quality drop)
|
|
114
|
+
- Writes a structured handoff and resets — no human intervention needed
|
|
115
|
+
|
|
116
|
+
## Local Development
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Clone and install in dev mode
|
|
120
|
+
git clone https://github.com/ayuan153/kadmon.git
|
|
121
|
+
cd kadmon
|
|
122
|
+
pip install -e ".[dev]"
|
|
123
|
+
|
|
124
|
+
# Run tests
|
|
125
|
+
./dev test
|
|
126
|
+
|
|
127
|
+
# Lint
|
|
128
|
+
./dev lint
|
|
129
|
+
|
|
130
|
+
# Run kadmon against a local repo
|
|
131
|
+
./dev run "Fix the bug in parser.py"
|
|
132
|
+
|
|
133
|
+
# Benchmark (5 Python exercises, quick smoke test)
|
|
134
|
+
./dev bench
|
|
135
|
+
|
|
136
|
+
# Full benchmark (225 exercises, all languages)
|
|
137
|
+
./dev bench-full
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Dev Script Reference
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
./dev bench [N] # N Python exercises (default: 5)
|
|
144
|
+
./dev bench-full # All 225 exercises, 6 languages
|
|
145
|
+
./dev run "task" # Run kadmon on current repo
|
|
146
|
+
./dev test # pytest
|
|
147
|
+
./dev lint # ruff
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Running Against Your Own Code
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# From any repo:
|
|
154
|
+
kadmon run --task "Add input validation to the create_user endpoint"
|
|
155
|
+
|
|
156
|
+
# With planning disabled (faster, simpler loop — good for debugging):
|
|
157
|
+
kadmon run --task "Fix the typo in README.md" --no-planning
|
|
158
|
+
|
|
159
|
+
# In yolo mode (no tool approval gates):
|
|
160
|
+
kadmon run --task "Refactor the auth module" --mode yolo
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Benchmarking
|
|
164
|
+
|
|
165
|
+
### Aider Polyglot
|
|
166
|
+
|
|
167
|
+
225 Exercism exercises across Python, JavaScript, Go, Rust, Java, C++.
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# Quick smoke test (~$1)
|
|
171
|
+
kadmon bench --languages python --limit 5
|
|
172
|
+
|
|
173
|
+
# Full Python
|
|
174
|
+
kadmon bench --languages python
|
|
175
|
+
|
|
176
|
+
# All languages, 10 parallel workers
|
|
177
|
+
kadmon bench -j 10
|
|
178
|
+
|
|
179
|
+
# Sequential (live timer, good for debugging)
|
|
180
|
+
kadmon bench --limit 5 -j 1
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Results: `eval_results/polyglot/summary.json`
|
|
184
|
+
|
|
185
|
+
### SWE-bench
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
kadmon eval --dataset swe_bench_verified_mini.json --limit 10
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Architecture
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
kadmon/
|
|
195
|
+
├── agent/ # ReAct loop, planning, backtracking, handoff, pruner
|
|
196
|
+
├── providers/ # LLM providers (Bedrock, Anthropic, OpenAI)
|
|
197
|
+
├── tools/ # file I/O, search, shell, skeleton, references, plan, ask_human
|
|
198
|
+
├── human/ # Question batching, CLI/webhook channels
|
|
199
|
+
├── eval/ # Benchmark harnesses (Aider Polyglot, SWE-bench)
|
|
200
|
+
├── index/ # Tree-sitter symbol index (SQLite)
|
|
201
|
+
└── memory/ # Librarian, session tracker, read cache
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Key design:
|
|
205
|
+
- **Single-threaded ReAct loop** with architect/editor phase separation
|
|
206
|
+
- **No frameworks** — provider SDKs directly, minimal core
|
|
207
|
+
- **Autonomous handoff** — detects context degradation, resets with continuity
|
|
208
|
+
- **File-based memory** — `.kadmon/library/` persists knowledge across sessions
|
|
209
|
+
- **Ambiguity resolution** — `ask_human` tool for genuine uncertainty (not permission)
|
|
210
|
+
|
|
211
|
+
## Configuration
|
|
212
|
+
|
|
213
|
+
All defaults in `kadmon/config.py`:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
DEFAULT_MODEL = "us.anthropic.claude-sonnet-4-6"
|
|
217
|
+
DEFAULT_PROVIDER = "bedrock"
|
|
218
|
+
DEFAULT_REGION = "us-east-1"
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Per-project config at `.kadmon/config.toml` (created by `kadmon init`).
|
|
222
|
+
|
|
223
|
+
## Contributing
|
|
224
|
+
|
|
225
|
+
See [AGENTS.md](AGENTS.md) for AI contribution guidelines. Key rules:
|
|
226
|
+
- Build → Lint → Test → Commit (no skipping)
|
|
227
|
+
- Conventional commits with scopes
|
|
228
|
+
- One concern per commit
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
MIT
|
kadmon-0.1.0/README.md
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# Kadmon
|
|
2
|
+
|
|
3
|
+
An autonomous coding agent that manages its own context, asks clarifying questions, and hands off between sessions without human intervention. Scores 100% on Aider Polyglot (Python) benchmark.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install kadmon
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Getting Started
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Interactive setup — picks your provider, configures credentials, tests the connection
|
|
15
|
+
kadmon init
|
|
16
|
+
|
|
17
|
+
# Run on a repo
|
|
18
|
+
cd your-project
|
|
19
|
+
kadmon run --task "Fix the failing test in test_auth.py"
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
`kadmon init` walks you through:
|
|
23
|
+
1. Choose provider (Bedrock, Anthropic, OpenAI)
|
|
24
|
+
2. Configure credentials (AWS profile, API key, etc.)
|
|
25
|
+
3. Test the connection
|
|
26
|
+
4. Save to `.kadmon/config.toml`
|
|
27
|
+
|
|
28
|
+
### Manual Provider Setup
|
|
29
|
+
|
|
30
|
+
If you prefer to skip `kadmon init`:
|
|
31
|
+
|
|
32
|
+
**AWS Bedrock** (default):
|
|
33
|
+
```bash
|
|
34
|
+
# Any standard AWS credential method works (SSO, env vars, profiles)
|
|
35
|
+
export AWS_PROFILE=your-profile
|
|
36
|
+
export AWS_REGION=us-east-1
|
|
37
|
+
kadmon run --task "..." --provider bedrock --model us.anthropic.claude-sonnet-4-6
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**Anthropic Direct**:
|
|
41
|
+
```bash
|
|
42
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
43
|
+
kadmon run --task "..." --provider anthropic --model claude-sonnet-4-20250514
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**OpenAI**:
|
|
47
|
+
```bash
|
|
48
|
+
export OPENAI_API_KEY=sk-...
|
|
49
|
+
kadmon run --task "..." --provider openai --model gpt-4o
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## What Makes Kadmon Different
|
|
53
|
+
|
|
54
|
+
Most coding agents are "very talented junior engineers" — they need constant supervision and context management. Kadmon is designed to be piloted like a **team lead manages a senior engineer**:
|
|
55
|
+
|
|
56
|
+
1. **No guessing** — asks clarifying questions when requirements are ambiguous (not for permission — for correctness)
|
|
57
|
+
2. **Rock climbing** — verifies each step before moving to the next, never sprints into the void
|
|
58
|
+
3. **Self-managing context** — detects when its context is degrading, writes a handoff doc, and continues in a fresh session automatically
|
|
59
|
+
|
|
60
|
+
### Autonomous Context Management
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
.kadmon/
|
|
64
|
+
├── config.toml # Provider config, preferences
|
|
65
|
+
├── library/ # Persistent knowledge (survives across sessions)
|
|
66
|
+
│ ├── architecture.md # Project structure notes
|
|
67
|
+
│ ├── conventions.md # Patterns, gotchas
|
|
68
|
+
│ └── tasks/current.md # Active task state
|
|
69
|
+
├── session.json # What's in flight right now
|
|
70
|
+
├── handoffs/ # Handoff docs (context continuity)
|
|
71
|
+
└── symbols.db # Code structure index (tree-sitter)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The agent automatically:
|
|
75
|
+
- Loads relevant library context on startup (cold start)
|
|
76
|
+
- Saves learnings after each completed step
|
|
77
|
+
- Detects context degradation (token budget, loops, quality drop)
|
|
78
|
+
- Writes a structured handoff and resets — no human intervention needed
|
|
79
|
+
|
|
80
|
+
## Local Development
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Clone and install in dev mode
|
|
84
|
+
git clone https://github.com/ayuan153/kadmon.git
|
|
85
|
+
cd kadmon
|
|
86
|
+
pip install -e ".[dev]"
|
|
87
|
+
|
|
88
|
+
# Run tests
|
|
89
|
+
./dev test
|
|
90
|
+
|
|
91
|
+
# Lint
|
|
92
|
+
./dev lint
|
|
93
|
+
|
|
94
|
+
# Run kadmon against a local repo
|
|
95
|
+
./dev run "Fix the bug in parser.py"
|
|
96
|
+
|
|
97
|
+
# Benchmark (5 Python exercises, quick smoke test)
|
|
98
|
+
./dev bench
|
|
99
|
+
|
|
100
|
+
# Full benchmark (225 exercises, all languages)
|
|
101
|
+
./dev bench-full
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Dev Script Reference
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
./dev bench [N] # N Python exercises (default: 5)
|
|
108
|
+
./dev bench-full # All 225 exercises, 6 languages
|
|
109
|
+
./dev run "task" # Run kadmon on current repo
|
|
110
|
+
./dev test # pytest
|
|
111
|
+
./dev lint # ruff
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Running Against Your Own Code
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# From any repo:
|
|
118
|
+
kadmon run --task "Add input validation to the create_user endpoint"
|
|
119
|
+
|
|
120
|
+
# With planning disabled (faster, simpler loop — good for debugging):
|
|
121
|
+
kadmon run --task "Fix the typo in README.md" --no-planning
|
|
122
|
+
|
|
123
|
+
# In yolo mode (no tool approval gates):
|
|
124
|
+
kadmon run --task "Refactor the auth module" --mode yolo
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Benchmarking
|
|
128
|
+
|
|
129
|
+
### Aider Polyglot
|
|
130
|
+
|
|
131
|
+
225 Exercism exercises across Python, JavaScript, Go, Rust, Java, C++.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Quick smoke test (~$1)
|
|
135
|
+
kadmon bench --languages python --limit 5
|
|
136
|
+
|
|
137
|
+
# Full Python
|
|
138
|
+
kadmon bench --languages python
|
|
139
|
+
|
|
140
|
+
# All languages, 10 parallel workers
|
|
141
|
+
kadmon bench -j 10
|
|
142
|
+
|
|
143
|
+
# Sequential (live timer, good for debugging)
|
|
144
|
+
kadmon bench --limit 5 -j 1
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Results: `eval_results/polyglot/summary.json`
|
|
148
|
+
|
|
149
|
+
### SWE-bench
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
kadmon eval --dataset swe_bench_verified_mini.json --limit 10
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Architecture
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
kadmon/
|
|
159
|
+
├── agent/ # ReAct loop, planning, backtracking, handoff, pruner
|
|
160
|
+
├── providers/ # LLM providers (Bedrock, Anthropic, OpenAI)
|
|
161
|
+
├── tools/ # file I/O, search, shell, skeleton, references, plan, ask_human
|
|
162
|
+
├── human/ # Question batching, CLI/webhook channels
|
|
163
|
+
├── eval/ # Benchmark harnesses (Aider Polyglot, SWE-bench)
|
|
164
|
+
├── index/ # Tree-sitter symbol index (SQLite)
|
|
165
|
+
└── memory/ # Librarian, session tracker, read cache
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Key design:
|
|
169
|
+
- **Single-threaded ReAct loop** with architect/editor phase separation
|
|
170
|
+
- **No frameworks** — provider SDKs directly, minimal core
|
|
171
|
+
- **Autonomous handoff** — detects context degradation, resets with continuity
|
|
172
|
+
- **File-based memory** — `.kadmon/library/` persists knowledge across sessions
|
|
173
|
+
- **Ambiguity resolution** — `ask_human` tool for genuine uncertainty (not permission)
|
|
174
|
+
|
|
175
|
+
## Configuration
|
|
176
|
+
|
|
177
|
+
All defaults in `kadmon/config.py`:
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
DEFAULT_MODEL = "us.anthropic.claude-sonnet-4-6"
|
|
181
|
+
DEFAULT_PROVIDER = "bedrock"
|
|
182
|
+
DEFAULT_REGION = "us-east-1"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Per-project config at `.kadmon/config.toml` (created by `kadmon init`).
|
|
186
|
+
|
|
187
|
+
## Contributing
|
|
188
|
+
|
|
189
|
+
See [AGENTS.md](AGENTS.md) for AI contribution guidelines. Key rules:
|
|
190
|
+
- Build → Lint → Test → Commit (no skipping)
|
|
191
|
+
- Conventional commits with scopes
|
|
192
|
+
- One concern per commit
|
|
193
|
+
|
|
194
|
+
## License
|
|
195
|
+
|
|
196
|
+
MIT
|
kadmon-0.1.0/dev
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Quick smoke test for kadmon using the 'kadmon' AWS profile.
|
|
3
|
+
# Usage:
|
|
4
|
+
# ./dev bench # 5 Python exercises (quick, ~$1)
|
|
5
|
+
# ./dev bench 20 # 20 Python exercises
|
|
6
|
+
# ./dev bench-full # All 225 exercises (~$20)
|
|
7
|
+
# ./dev run "Fix the bug" # Run on current repo
|
|
8
|
+
# ./dev test # Run unit tests
|
|
9
|
+
# ./dev lint # Run linter
|
|
10
|
+
|
|
11
|
+
set -euo pipefail
|
|
12
|
+
|
|
13
|
+
export AWS_PROFILE="${AWS_PROFILE:-kadmon}"
|
|
14
|
+
export AWS_REGION="${AWS_REGION:-us-east-1}"
|
|
15
|
+
MODEL="${KADMON_MODEL:-us.anthropic.claude-sonnet-4-6}"
|
|
16
|
+
|
|
17
|
+
case "${1:-help}" in
|
|
18
|
+
bench)
|
|
19
|
+
LIMIT="${2:-5}"
|
|
20
|
+
echo "Running $LIMIT Python exercises (profile=$AWS_PROFILE model=$MODEL)"
|
|
21
|
+
kadmon bench --languages python --limit "$LIMIT" --provider bedrock \
|
|
22
|
+
--aws-region "$AWS_REGION" --model "$MODEL"
|
|
23
|
+
;;
|
|
24
|
+
bench-full)
|
|
25
|
+
echo "Running full polyglot benchmark (225 exercises)"
|
|
26
|
+
kadmon bench --provider bedrock --aws-region "$AWS_REGION" --model "$MODEL"
|
|
27
|
+
;;
|
|
28
|
+
run)
|
|
29
|
+
shift
|
|
30
|
+
kadmon run --task "$*" --repo . --provider bedrock \
|
|
31
|
+
--aws-region "$AWS_REGION" --model "$MODEL"
|
|
32
|
+
;;
|
|
33
|
+
test)
|
|
34
|
+
pytest tests/ -v
|
|
35
|
+
;;
|
|
36
|
+
lint)
|
|
37
|
+
ruff check kadmon/ tests/
|
|
38
|
+
;;
|
|
39
|
+
help|*)
|
|
40
|
+
echo "Usage: ./dev <command>"
|
|
41
|
+
echo ""
|
|
42
|
+
echo "Commands:"
|
|
43
|
+
echo " bench [N] Run N Python exercises [default: 5]"
|
|
44
|
+
echo " bench-full Run all 225 exercises"
|
|
45
|
+
echo " run \"task\" Run kadmon on current repo"
|
|
46
|
+
echo " test Run unit tests"
|
|
47
|
+
echo " lint Run ruff linter"
|
|
48
|
+
echo ""
|
|
49
|
+
echo "Environment:"
|
|
50
|
+
echo " AWS_PROFILE AWS profile [default: kadmon]"
|
|
51
|
+
echo " AWS_REGION AWS region [default: us-east-1]"
|
|
52
|
+
echo " KADMON_MODEL Model ID [default: us.anthropic.claude-sonnet-4-6]"
|
|
53
|
+
;;
|
|
54
|
+
esac
|