kadmon 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. kadmon-0.1.0/.github/workflows/ci.yml +16 -0
  2. kadmon-0.1.0/.github/workflows/release.yml +39 -0
  3. kadmon-0.1.0/.gitignore +16 -0
  4. kadmon-0.1.0/AGENTS.md +111 -0
  5. kadmon-0.1.0/LICENSE +21 -0
  6. kadmon-0.1.0/PKG-INFO +232 -0
  7. kadmon-0.1.0/README.md +196 -0
  8. kadmon-0.1.0/dev +54 -0
  9. kadmon-0.1.0/docs/autonomous-context-plan.md +440 -0
  10. kadmon-0.1.0/kadmon/__init__.py +1 -0
  11. kadmon-0.1.0/kadmon/__main__.py +5 -0
  12. kadmon-0.1.0/kadmon/agent/__init__.py +1 -0
  13. kadmon-0.1.0/kadmon/agent/backtrack.py +72 -0
  14. kadmon-0.1.0/kadmon/agent/checkpoint.py +71 -0
  15. kadmon-0.1.0/kadmon/agent/context.py +52 -0
  16. kadmon-0.1.0/kadmon/agent/handoff.py +175 -0
  17. kadmon-0.1.0/kadmon/agent/loop.py +255 -0
  18. kadmon-0.1.0/kadmon/agent/planner.py +92 -0
  19. kadmon-0.1.0/kadmon/agent/prompts.py +58 -0
  20. kadmon-0.1.0/kadmon/agent/pruner.py +102 -0
  21. kadmon-0.1.0/kadmon/agent/recovery.py +52 -0
  22. kadmon-0.1.0/kadmon/cli.py +249 -0
  23. kadmon-0.1.0/kadmon/config.py +24 -0
  24. kadmon-0.1.0/kadmon/eval/__init__.py +4 -0
  25. kadmon-0.1.0/kadmon/eval/harness.py +127 -0
  26. kadmon-0.1.0/kadmon/eval/polyglot.py +493 -0
  27. kadmon-0.1.0/kadmon/human/__init__.py +8 -0
  28. kadmon-0.1.0/kadmon/human/channel.py +220 -0
  29. kadmon-0.1.0/kadmon/index/__init__.py +4 -0
  30. kadmon-0.1.0/kadmon/index/db.py +129 -0
  31. kadmon-0.1.0/kadmon/index/parser.py +195 -0
  32. kadmon-0.1.0/kadmon/index/updater.py +63 -0
  33. kadmon-0.1.0/kadmon/memory/__init__.py +3 -0
  34. kadmon-0.1.0/kadmon/memory/librarian.py +153 -0
  35. kadmon-0.1.0/kadmon/memory/read_cache.py +23 -0
  36. kadmon-0.1.0/kadmon/memory/session_tracker.py +154 -0
  37. kadmon-0.1.0/kadmon/providers/__init__.py +7 -0
  38. kadmon-0.1.0/kadmon/providers/anthropic.py +69 -0
  39. kadmon-0.1.0/kadmon/providers/base.py +33 -0
  40. kadmon-0.1.0/kadmon/providers/bedrock.py +70 -0
  41. kadmon-0.1.0/kadmon/tools/__init__.py +40 -0
  42. kadmon-0.1.0/kadmon/tools/ask_human.py +71 -0
  43. kadmon-0.1.0/kadmon/tools/base.py +43 -0
  44. kadmon-0.1.0/kadmon/tools/file_io.py +218 -0
  45. kadmon-0.1.0/kadmon/tools/plan.py +123 -0
  46. kadmon-0.1.0/kadmon/tools/references.py +127 -0
  47. kadmon-0.1.0/kadmon/tools/search.py +65 -0
  48. kadmon-0.1.0/kadmon/tools/shell.py +43 -0
  49. kadmon-0.1.0/kadmon/tools/skeleton.py +118 -0
  50. kadmon-0.1.0/kadmon/tools/submit.py +32 -0
  51. kadmon-0.1.0/npm/README.md +35 -0
  52. kadmon-0.1.0/npm/bin/kadmon.mjs +89 -0
  53. kadmon-0.1.0/npm/package.json +23 -0
  54. kadmon-0.1.0/publish +49 -0
  55. kadmon-0.1.0/pyproject.toml +59 -0
  56. kadmon-0.1.0/tests/__init__.py +0 -0
  57. kadmon-0.1.0/tests/test_agent.py +82 -0
  58. kadmon-0.1.0/tests/test_eval.py +33 -0
  59. kadmon-0.1.0/tests/test_handoff.py +119 -0
  60. kadmon-0.1.0/tests/test_human.py +130 -0
  61. kadmon-0.1.0/tests/test_memory.py +168 -0
  62. kadmon-0.1.0/tests/test_planning.py +236 -0
  63. kadmon-0.1.0/tests/test_pruner.py +126 -0
  64. kadmon-0.1.0/tests/test_read_cache.py +79 -0
  65. kadmon-0.1.0/tests/test_recovery.py +52 -0
  66. kadmon-0.1.0/tests/test_references.py +80 -0
  67. kadmon-0.1.0/tests/test_skeleton.py +45 -0
  68. kadmon-0.1.0/tests/test_tools.py +134 -0
@@ -0,0 +1,16 @@
1
+ name: CI
2
+ on: [push, pull_request]
3
+ jobs:
4
+ test:
5
+ runs-on: ubuntu-latest
6
+ strategy:
7
+ matrix:
8
+ python-version: ['3.11', '3.12']
9
+ steps:
10
+ - uses: actions/checkout@v4
11
+ - uses: actions/setup-python@v5
12
+ with:
13
+ python-version: ${{ matrix.python-version }}
14
+ - run: pip install -e '.[dev]'
15
+ - run: ruff check kadmon/ tests/
16
+ - run: pytest tests/ -v
@@ -0,0 +1,39 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish-pypi:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ - run: pip install build
19
+ - run: python -m build
20
+ - uses: pypa/gh-action-pypi-publish@release/v1
21
+
22
+ publish-npm:
23
+ runs-on: ubuntu-latest
24
+ permissions:
25
+ contents: read
26
+ id-token: write
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: actions/setup-node@v4
30
+ with:
31
+ node-version: "22"
32
+ registry-url: "https://registry.npmjs.org"
33
+ - name: Sync version from tag
34
+ run: |
35
+ VERSION=${GITHUB_REF#refs/tags/v}
36
+ cd npm
37
+ npm version $VERSION --no-git-tag-version
38
+ - name: Publish with provenance
39
+ run: cd npm && npm publish --provenance --access public
@@ -0,0 +1,16 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ .env
12
+ .pytest_cache/
13
+ .ruff_cache/
14
+ .mypy_cache/
15
+ .kadmon/
16
+ *.db
kadmon-0.1.0/AGENTS.md ADDED
@@ -0,0 +1,111 @@
1
+ # AGENTS.md
2
+
3
+ Guidelines for AI agents contributing to this repository.
4
+
5
+ ## Build & Test Loop
6
+
7
+ Every change must follow this cycle:
8
+
9
+ 1. **Understand** — Read relevant code before modifying. Use `grep_search` or `find_symbols` to locate what you need. Never guess at file contents.
10
+ 2. **Implement** — Make minimal, focused changes. One concern per edit.
11
+ 3. **Lint** — Run `ruff check kadmon/ tests/` and fix any issues.
12
+ 4. **Test** — Run `pytest tests/ -v`. All tests must pass. If you added new functionality, add tests for it.
13
+ 5. **Commit** — Use conventional commits (see below). Only commit when lint + tests pass.
14
+
15
+ Do not skip steps. Do not commit broken code.
16
+
17
+ ## Development Setup
18
+
19
+ ```bash
20
+ pip install -e ".[dev]"
21
+ ```
22
+
23
+ ## Commands
24
+
25
+ | Task | Command |
26
+ |------|---------|
27
+ | Lint | `ruff check kadmon/ tests/` |
28
+ | Lint fix | `ruff check --fix kadmon/ tests/` |
29
+ | Test | `pytest tests/ -v` |
30
+ | Run agent | `kadmon run --task "..." --repo /path/to/repo` |
31
+ | Run eval | `kadmon eval --dataset instances.json --limit 10` |
32
+
33
+ ## Commit Convention
34
+
35
+ Use [Conventional Commits](https://www.conventionalcommits.org/):
36
+
37
+ ```
38
+ type(scope): description
39
+ ```
40
+
41
+ Types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`, `ci`
42
+
43
+ Scopes: `agent`, `tools`, `providers`, `eval`, `index`, `memory`, `cli`
44
+
45
+ Examples:
46
+ - `feat(tools): add grep_search with ripgrep fallback`
47
+ - `fix(agent): prevent infinite loop on repeated tool errors`
48
+ - `test(tools): add edit_file edge case coverage`
49
+ - `docs: update README with eval instructions`
50
+
51
+ Rules:
52
+ - One logical change per commit
53
+ - Implementation + its direct tests belong in the same commit
54
+ - Keep commits atomic — each should build and pass tests independently
55
+ - Never bundle unrelated changes
56
+
57
+ ## Code Style
58
+
59
+ - Python 3.11+, type hints on all public functions
60
+ - Use `pathlib.Path` for filesystem operations
61
+ - Use dataclasses or pydantic models for structured data
62
+ - Keep functions short (<40 lines). Extract helpers.
63
+ - Error messages should be actionable (tell the user what to do next)
64
+ - No `# type: ignore`, no `Any` unless unavoidable
65
+
66
+ ## Architecture Rules
67
+
68
+ - **Tools** are the agent's interface to the world. Optimize tool output for LLM consumption (concise, structured, actionable errors).
69
+ - **Providers** are stateless adapters. No business logic in providers.
70
+ - **Agent loop** is single-threaded ReAct. Keep it simple.
71
+ - **No frameworks** (no LangChain, no LangGraph). Use provider SDKs directly.
72
+ - All paths resolved relative to `repo_root`. Validate path escaping.
73
+
74
+ ## Adding a New Tool
75
+
76
+ 1. Create `kadmon/tools/your_tool.py` with a class extending `Tool`
77
+ 2. Define `name`, `description`, `parameters` (JSON Schema)
78
+ 3. Implement `execute(**kwargs) -> ToolResult`
79
+ 4. Register in `kadmon/tools/__init__.py` inside `create_default_registry()`
80
+ 5. Add tests in `tests/test_tools.py`
81
+ 6. Run lint + tests before committing
82
+
83
+ ## Adding a New Provider
84
+
85
+ 1. Create `kadmon/providers/your_provider.py` implementing `LLMProvider` protocol
86
+ 2. Handle message format conversion (internal ↔ provider-specific)
87
+ 3. Add retry logic for transient errors
88
+ 4. Register in provider factory
89
+ 5. Add tests with mocked API responses
90
+
91
+ ## File Structure
92
+
93
+ ```
94
+ kadmon/
95
+ ├── agent/ # Core loop, context management, planning
96
+ ├── providers/ # LLM provider implementations
97
+ ├── tools/ # Agent tools (file I/O, search, shell, etc.)
98
+ ├── eval/ # SWE-bench evaluation harness
99
+ ├── index/ # Tree-sitter symbol index (SQLite)
100
+ ├── memory/ # Cross-session memory and handoff
101
+ └── cli.py # CLI entry point
102
+ ```
103
+
104
+ ## What Not To Do
105
+
106
+ - Don't add dependencies without justification
107
+ - Don't add async unless the specific module requires it
108
+ - Don't modify the core loop without understanding the full message flow
109
+ - Don't delete or skip tests to make the build pass
110
+ - Don't leave debug prints or commented-out code
111
+ - Don't make changes outside the scope of your task
kadmon-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ayuan153
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
kadmon-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: kadmon
3
+ Version: 0.1.0
4
+ Summary: An autonomous coding agent that manages its own context across sessions
5
+ Project-URL: Homepage, https://github.com/ayuan153/kadmon
6
+ Project-URL: Repository, https://github.com/ayuan153/kadmon
7
+ Project-URL: Issues, https://github.com/ayuan153/kadmon/issues
8
+ Author: ayuan153
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agent,ai,autonomous,coding,llm
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: anthropic>=0.40.0
23
+ Requires-Dist: click>=8.0.0
24
+ Requires-Dist: openai>=1.50.0
25
+ Requires-Dist: pydantic>=2.0.0
26
+ Requires-Dist: rich>=13.0.0
27
+ Requires-Dist: tree-sitter-javascript>=0.23.0
28
+ Requires-Dist: tree-sitter-python>=0.23.0
29
+ Requires-Dist: tree-sitter-typescript>=0.23.0
30
+ Requires-Dist: tree-sitter>=0.23.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
33
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
34
+ Requires-Dist: ruff>=0.5.0; extra == 'dev'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # Kadmon
38
+
39
+ An autonomous coding agent that manages its own context, asks clarifying questions, and hands off between sessions without human intervention. Scores 100% on Aider Polyglot (Python) benchmark.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install kadmon
45
+ ```
46
+
47
+ ## Getting Started
48
+
49
+ ```bash
50
+ # Interactive setup — picks your provider, configures credentials, tests the connection
51
+ kadmon init
52
+
53
+ # Run on a repo
54
+ cd your-project
55
+ kadmon run --task "Fix the failing test in test_auth.py"
56
+ ```
57
+
58
+ `kadmon init` walks you through:
59
+ 1. Choose provider (Bedrock, Anthropic, OpenAI)
60
+ 2. Configure credentials (AWS profile, API key, etc.)
61
+ 3. Test the connection
62
+ 4. Save to `.kadmon/config.toml`
63
+
64
+ ### Manual Provider Setup
65
+
66
+ If you prefer to skip `kadmon init`:
67
+
68
+ **AWS Bedrock** (default):
69
+ ```bash
70
+ # Any standard AWS credential method works (SSO, env vars, profiles)
71
+ export AWS_PROFILE=your-profile
72
+ export AWS_REGION=us-east-1
73
+ kadmon run --task "..." --provider bedrock --model us.anthropic.claude-sonnet-4-6
74
+ ```
75
+
76
+ **Anthropic Direct**:
77
+ ```bash
78
+ export ANTHROPIC_API_KEY=sk-ant-...
79
+ kadmon run --task "..." --provider anthropic --model claude-sonnet-4-20250514
80
+ ```
81
+
82
+ **OpenAI**:
83
+ ```bash
84
+ export OPENAI_API_KEY=sk-...
85
+ kadmon run --task "..." --provider openai --model gpt-4o
86
+ ```
87
+
88
+ ## What Makes Kadmon Different
89
+
90
+ Most coding agents are "very talented junior engineers" — they need constant supervision and context management. Kadmon is designed to be piloted like a **team lead manages a senior engineer**:
91
+
92
+ 1. **No guessing** — asks clarifying questions when requirements are ambiguous (not for permission — for correctness)
93
+ 2. **Rock climbing** — verifies each step before moving to the next, never sprints into the void
94
+ 3. **Self-managing context** — detects when its context is degrading, writes a handoff doc, and continues in a fresh session automatically
95
+
96
+ ### Autonomous Context Management
97
+
98
+ ```
99
+ .kadmon/
100
+ ├── config.toml # Provider config, preferences
101
+ ├── library/ # Persistent knowledge (survives across sessions)
102
+ │ ├── architecture.md # Project structure notes
103
+ │ ├── conventions.md # Patterns, gotchas
104
+ │ └── tasks/current.md # Active task state
105
+ ├── session.json # What's in flight right now
106
+ ├── handoffs/ # Handoff docs (context continuity)
107
+ └── symbols.db # Code structure index (tree-sitter)
108
+ ```
109
+
110
+ The agent automatically:
111
+ - Loads relevant library context on startup (cold start)
112
+ - Saves learnings after each completed step
113
+ - Detects context degradation (token budget, loops, quality drop)
114
+ - Writes a structured handoff and resets — no human intervention needed
115
+
116
+ ## Local Development
117
+
118
+ ```bash
119
+ # Clone and install in dev mode
120
+ git clone https://github.com/ayuan153/kadmon.git
121
+ cd kadmon
122
+ pip install -e ".[dev]"
123
+
124
+ # Run tests
125
+ ./dev test
126
+
127
+ # Lint
128
+ ./dev lint
129
+
130
+ # Run kadmon against a local repo
131
+ ./dev run "Fix the bug in parser.py"
132
+
133
+ # Benchmark (5 Python exercises, quick smoke test)
134
+ ./dev bench
135
+
136
+ # Full benchmark (225 exercises, all languages)
137
+ ./dev bench-full
138
+ ```
139
+
140
+ ### Dev Script Reference
141
+
142
+ ```bash
143
+ ./dev bench [N] # N Python exercises (default: 5)
144
+ ./dev bench-full # All 225 exercises, 6 languages
145
+ ./dev run "task" # Run kadmon on current repo
146
+ ./dev test # pytest
147
+ ./dev lint # ruff
148
+ ```
149
+
150
+ ### Running Against Your Own Code
151
+
152
+ ```bash
153
+ # From any repo:
154
+ kadmon run --task "Add input validation to the create_user endpoint"
155
+
156
+ # With planning disabled (faster, simpler loop — good for debugging):
157
+ kadmon run --task "Fix the typo in README.md" --no-planning
158
+
159
+ # In yolo mode (no tool approval gates):
160
+ kadmon run --task "Refactor the auth module" --mode yolo
161
+ ```
162
+
163
+ ## Benchmarking
164
+
165
+ ### Aider Polyglot
166
+
167
+ 225 Exercism exercises across Python, JavaScript, Go, Rust, Java, C++.
168
+
169
+ ```bash
170
+ # Quick smoke test (~$1)
171
+ kadmon bench --languages python --limit 5
172
+
173
+ # Full Python
174
+ kadmon bench --languages python
175
+
176
+ # All languages, 10 parallel workers
177
+ kadmon bench -j 10
178
+
179
+ # Sequential (live timer, good for debugging)
180
+ kadmon bench --limit 5 -j 1
181
+ ```
182
+
183
+ Results: `eval_results/polyglot/summary.json`
184
+
185
+ ### SWE-bench
186
+
187
+ ```bash
188
+ kadmon eval --dataset swe_bench_verified_mini.json --limit 10
189
+ ```
190
+
191
+ ## Architecture
192
+
193
+ ```
194
+ kadmon/
195
+ ├── agent/ # ReAct loop, planning, backtracking, handoff, pruner
196
+ ├── providers/ # LLM providers (Bedrock, Anthropic, OpenAI)
197
+ ├── tools/ # file I/O, search, shell, skeleton, references, plan, ask_human
198
+ ├── human/ # Question batching, CLI/webhook channels
199
+ ├── eval/ # Benchmark harnesses (Aider Polyglot, SWE-bench)
200
+ ├── index/ # Tree-sitter symbol index (SQLite)
201
+ └── memory/ # Librarian, session tracker, read cache
202
+ ```
203
+
204
+ Key design:
205
+ - **Single-threaded ReAct loop** with architect/editor phase separation
206
+ - **No frameworks** — provider SDKs directly, minimal core
207
+ - **Autonomous handoff** — detects context degradation, resets with continuity
208
+ - **File-based memory** — `.kadmon/library/` persists knowledge across sessions
209
+ - **Ambiguity resolution** — `ask_human` tool for genuine uncertainty (not permission)
210
+
211
+ ## Configuration
212
+
213
+ All defaults in `kadmon/config.py`:
214
+
215
+ ```python
216
+ DEFAULT_MODEL = "us.anthropic.claude-sonnet-4-6"
217
+ DEFAULT_PROVIDER = "bedrock"
218
+ DEFAULT_REGION = "us-east-1"
219
+ ```
220
+
221
+ Per-project config at `.kadmon/config.toml` (created by `kadmon init`).
222
+
223
+ ## Contributing
224
+
225
+ See [AGENTS.md](AGENTS.md) for AI contribution guidelines. Key rules:
226
+ - Build → Lint → Test → Commit (no skipping)
227
+ - Conventional commits with scopes
228
+ - One concern per commit
229
+
230
+ ## License
231
+
232
+ MIT
kadmon-0.1.0/README.md ADDED
@@ -0,0 +1,196 @@
1
+ # Kadmon
2
+
3
+ An autonomous coding agent that manages its own context, asks clarifying questions, and hands off between sessions without human intervention. Scores 100% on Aider Polyglot (Python) benchmark.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install kadmon
9
+ ```
10
+
11
+ ## Getting Started
12
+
13
+ ```bash
14
+ # Interactive setup — picks your provider, configures credentials, tests the connection
15
+ kadmon init
16
+
17
+ # Run on a repo
18
+ cd your-project
19
+ kadmon run --task "Fix the failing test in test_auth.py"
20
+ ```
21
+
22
+ `kadmon init` walks you through:
23
+ 1. Choose provider (Bedrock, Anthropic, OpenAI)
24
+ 2. Configure credentials (AWS profile, API key, etc.)
25
+ 3. Test the connection
26
+ 4. Save to `.kadmon/config.toml`
27
+
28
+ ### Manual Provider Setup
29
+
30
+ If you prefer to skip `kadmon init`:
31
+
32
+ **AWS Bedrock** (default):
33
+ ```bash
34
+ # Any standard AWS credential method works (SSO, env vars, profiles)
35
+ export AWS_PROFILE=your-profile
36
+ export AWS_REGION=us-east-1
37
+ kadmon run --task "..." --provider bedrock --model us.anthropic.claude-sonnet-4-6
38
+ ```
39
+
40
+ **Anthropic Direct**:
41
+ ```bash
42
+ export ANTHROPIC_API_KEY=sk-ant-...
43
+ kadmon run --task "..." --provider anthropic --model claude-sonnet-4-20250514
44
+ ```
45
+
46
+ **OpenAI**:
47
+ ```bash
48
+ export OPENAI_API_KEY=sk-...
49
+ kadmon run --task "..." --provider openai --model gpt-4o
50
+ ```
51
+
52
+ ## What Makes Kadmon Different
53
+
54
+ Most coding agents are "very talented junior engineers" — they need constant supervision and context management. Kadmon is designed to be piloted like a **team lead manages a senior engineer**:
55
+
56
+ 1. **No guessing** — asks clarifying questions when requirements are ambiguous (not for permission — for correctness)
57
+ 2. **Rock climbing** — verifies each step before moving to the next, never sprints into the void
58
+ 3. **Self-managing context** — detects when its context is degrading, writes a handoff doc, and continues in a fresh session automatically
59
+
60
+ ### Autonomous Context Management
61
+
62
+ ```
63
+ .kadmon/
64
+ ├── config.toml # Provider config, preferences
65
+ ├── library/ # Persistent knowledge (survives across sessions)
66
+ │ ├── architecture.md # Project structure notes
67
+ │ ├── conventions.md # Patterns, gotchas
68
+ │ └── tasks/current.md # Active task state
69
+ ├── session.json # What's in flight right now
70
+ ├── handoffs/ # Handoff docs (context continuity)
71
+ └── symbols.db # Code structure index (tree-sitter)
72
+ ```
73
+
74
+ The agent automatically:
75
+ - Loads relevant library context on startup (cold start)
76
+ - Saves learnings after each completed step
77
+ - Detects context degradation (token budget, loops, quality drop)
78
+ - Writes a structured handoff and resets — no human intervention needed
79
+
80
+ ## Local Development
81
+
82
+ ```bash
83
+ # Clone and install in dev mode
84
+ git clone https://github.com/ayuan153/kadmon.git
85
+ cd kadmon
86
+ pip install -e ".[dev]"
87
+
88
+ # Run tests
89
+ ./dev test
90
+
91
+ # Lint
92
+ ./dev lint
93
+
94
+ # Run kadmon against a local repo
95
+ ./dev run "Fix the bug in parser.py"
96
+
97
+ # Benchmark (5 Python exercises, quick smoke test)
98
+ ./dev bench
99
+
100
+ # Full benchmark (225 exercises, all languages)
101
+ ./dev bench-full
102
+ ```
103
+
104
+ ### Dev Script Reference
105
+
106
+ ```bash
107
+ ./dev bench [N] # N Python exercises (default: 5)
108
+ ./dev bench-full # All 225 exercises, 6 languages
109
+ ./dev run "task" # Run kadmon on current repo
110
+ ./dev test # pytest
111
+ ./dev lint # ruff
112
+ ```
113
+
114
+ ### Running Against Your Own Code
115
+
116
+ ```bash
117
+ # From any repo:
118
+ kadmon run --task "Add input validation to the create_user endpoint"
119
+
120
+ # With planning disabled (faster, simpler loop — good for debugging):
121
+ kadmon run --task "Fix the typo in README.md" --no-planning
122
+
123
+ # In yolo mode (no tool approval gates):
124
+ kadmon run --task "Refactor the auth module" --mode yolo
125
+ ```
126
+
127
+ ## Benchmarking
128
+
129
+ ### Aider Polyglot
130
+
131
+ 225 Exercism exercises across Python, JavaScript, Go, Rust, Java, C++.
132
+
133
+ ```bash
134
+ # Quick smoke test (~$1)
135
+ kadmon bench --languages python --limit 5
136
+
137
+ # Full Python
138
+ kadmon bench --languages python
139
+
140
+ # All languages, 10 parallel workers
141
+ kadmon bench -j 10
142
+
143
+ # Sequential (live timer, good for debugging)
144
+ kadmon bench --limit 5 -j 1
145
+ ```
146
+
147
+ Results: `eval_results/polyglot/summary.json`
148
+
149
+ ### SWE-bench
150
+
151
+ ```bash
152
+ kadmon eval --dataset swe_bench_verified_mini.json --limit 10
153
+ ```
154
+
155
+ ## Architecture
156
+
157
+ ```
158
+ kadmon/
159
+ ├── agent/ # ReAct loop, planning, backtracking, handoff, pruner
160
+ ├── providers/ # LLM providers (Bedrock, Anthropic, OpenAI)
161
+ ├── tools/ # file I/O, search, shell, skeleton, references, plan, ask_human
162
+ ├── human/ # Question batching, CLI/webhook channels
163
+ ├── eval/ # Benchmark harnesses (Aider Polyglot, SWE-bench)
164
+ ├── index/ # Tree-sitter symbol index (SQLite)
165
+ └── memory/ # Librarian, session tracker, read cache
166
+ ```
167
+
168
+ Key design:
169
+ - **Single-threaded ReAct loop** with architect/editor phase separation
170
+ - **No frameworks** — provider SDKs directly, minimal core
171
+ - **Autonomous handoff** — detects context degradation, resets with continuity
172
+ - **File-based memory** — `.kadmon/library/` persists knowledge across sessions
173
+ - **Ambiguity resolution** — `ask_human` tool for genuine uncertainty (not permission)
174
+
175
+ ## Configuration
176
+
177
+ All defaults in `kadmon/config.py`:
178
+
179
+ ```python
180
+ DEFAULT_MODEL = "us.anthropic.claude-sonnet-4-6"
181
+ DEFAULT_PROVIDER = "bedrock"
182
+ DEFAULT_REGION = "us-east-1"
183
+ ```
184
+
185
+ Per-project config at `.kadmon/config.toml` (created by `kadmon init`).
186
+
187
+ ## Contributing
188
+
189
+ See [AGENTS.md](AGENTS.md) for AI contribution guidelines. Key rules:
190
+ - Build → Lint → Test → Commit (no skipping)
191
+ - Conventional commits with scopes
192
+ - One concern per commit
193
+
194
+ ## License
195
+
196
+ MIT
kadmon-0.1.0/dev ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env bash
2
+ # Quick smoke test for kadmon using the 'kadmon' AWS profile.
3
+ # Usage:
4
+ # ./dev bench # 5 Python exercises (quick, ~$1)
5
+ # ./dev bench 20 # 20 Python exercises
6
+ # ./dev bench-full # All 225 exercises (~$20)
7
+ # ./dev run "Fix the bug" # Run on current repo
8
+ # ./dev test # Run unit tests
9
+ # ./dev lint # Run linter
10
+
11
+ set -euo pipefail
12
+
13
+ export AWS_PROFILE="${AWS_PROFILE:-kadmon}"
14
+ export AWS_REGION="${AWS_REGION:-us-east-1}"
15
+ MODEL="${KADMON_MODEL:-us.anthropic.claude-sonnet-4-6}"
16
+
17
+ case "${1:-help}" in
18
+ bench)
19
+ LIMIT="${2:-5}"
20
+ echo "Running $LIMIT Python exercises (profile=$AWS_PROFILE model=$MODEL)"
21
+ kadmon bench --languages python --limit "$LIMIT" --provider bedrock \
22
+ --aws-region "$AWS_REGION" --model "$MODEL"
23
+ ;;
24
+ bench-full)
25
+ echo "Running full polyglot benchmark (225 exercises)"
26
+ kadmon bench --provider bedrock --aws-region "$AWS_REGION" --model "$MODEL"
27
+ ;;
28
+ run)
29
+ shift
30
+ kadmon run --task "$*" --repo . --provider bedrock \
31
+ --aws-region "$AWS_REGION" --model "$MODEL"
32
+ ;;
33
+ test)
34
+ pytest tests/ -v
35
+ ;;
36
+ lint)
37
+ ruff check kadmon/ tests/
38
+ ;;
39
+ help|*)
40
+ echo "Usage: ./dev <command>"
41
+ echo ""
42
+ echo "Commands:"
43
+ echo " bench [N] Run N Python exercises [default: 5]"
44
+ echo " bench-full Run all 225 exercises"
45
+ echo " run \"task\" Run kadmon on current repo"
46
+ echo " test Run unit tests"
47
+ echo " lint Run ruff linter"
48
+ echo ""
49
+ echo "Environment:"
50
+ echo " AWS_PROFILE AWS profile [default: kadmon]"
51
+ echo " AWS_REGION AWS region [default: us-east-1]"
52
+ echo " KADMON_MODEL Model ID [default: us.anthropic.claude-sonnet-4-6]"
53
+ ;;
54
+ esac