ada-agent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +256 -0
- package/bench/README.md +88 -0
- package/bench/swebench.mjs +242 -0
- package/bin/ada-server.mjs +6 -0
- package/bin/ada.mjs +7 -0
- package/docs/agent-loop.svg +66 -0
- package/docs/architecture.md +139 -0
- package/docs/architecture.svg +73 -0
- package/docs/connectors.md +48 -0
- package/docs/integrations.md +59 -0
- package/docs/login-flow.svg +56 -0
- package/docs/orchestration.md +45 -0
- package/package.json +64 -0
- package/skills/accessibility/SKILL.md +23 -0
- package/skills/add-logging/SKILL.md +23 -0
- package/skills/add-metrics/SKILL.md +23 -0
- package/skills/adr/SKILL.md +24 -0
- package/skills/aesthetic-direction/SKILL.md +24 -0
- package/skills/agent-loop/SKILL.md +23 -0
- package/skills/alerting/SKILL.md +23 -0
- package/skills/alpha-compositing/SKILL.md +23 -0
- package/skills/android-compose/SKILL.md +23 -0
- package/skills/angular-module/SKILL.md +23 -0
- package/skills/ansible-playbook/SKILL.md +24 -0
- package/skills/api-docs/SKILL.md +24 -0
- package/skills/app-store-prep/SKILL.md +23 -0
- package/skills/architecture-diagram/SKILL.md +21 -0
- package/skills/architecture-doc/SKILL.md +24 -0
- package/skills/audit-log/SKILL.md +23 -0
- package/skills/authz-review/SKILL.md +23 -0
- package/skills/aws-lambda/SKILL.md +24 -0
- package/skills/bash-script/SKILL.md +23 -0
- package/skills/batch/SKILL.md +23 -0
- package/skills/bisect/SKILL.md +23 -0
- package/skills/bounding-box/SKILL.md +24 -0
- package/skills/branch-cleanup/SKILL.md +23 -0
- package/skills/bundle-analyze/SKILL.md +23 -0
- package/skills/cache/SKILL.md +23 -0
- package/skills/call-graph/SKILL.md +23 -0
- package/skills/canvas-debug/SKILL.md +23 -0
- package/skills/cdn-setup/SKILL.md +23 -0
- package/skills/changelog/SKILL.md +24 -0
- package/skills/cherry-pick/SKILL.md +23 -0
- package/skills/ci-setup/SKILL.md +23 -0
- package/skills/cleanup/SKILL.md +23 -0
- package/skills/cli-tool/SKILL.md +23 -0
- package/skills/cloudformation/SKILL.md +23 -0
- package/skills/code-examples/SKILL.md +24 -0
- package/skills/code-review/SKILL.md +23 -0
- package/skills/color-palette/SKILL.md +24 -0
- package/skills/color-space/SKILL.md +24 -0
- package/skills/comment-why/SKILL.md +23 -0
- package/skills/commit/SKILL.md +26 -0
- package/skills/complexity-audit/SKILL.md +23 -0
- package/skills/component/SKILL.md +23 -0
- package/skills/component-library/SKILL.md +23 -0
- package/skills/connect-github/SKILL.md +20 -0
- package/skills/connect-mcp/SKILL.md +21 -0
- package/skills/connect-postgres/SKILL.md +20 -0
- package/skills/connect-remote/SKILL.md +23 -0
- package/skills/connect-slack/SKILL.md +20 -0
- package/skills/contract-audit/SKILL.md +25 -0
- package/skills/contributing/SKILL.md +23 -0
- package/skills/cpp-raii/SKILL.md +23 -0
- package/skills/cron-job/SKILL.md +23 -0
- package/skills/cv-preprocess/SKILL.md +24 -0
- package/skills/dark-mode/SKILL.md +24 -0
- package/skills/dashboard/SKILL.md +23 -0
- package/skills/dashboard-ui/SKILL.md +23 -0
- package/skills/data-export/SKILL.md +23 -0
- package/skills/data-validation/SKILL.md +23 -0
- package/skills/dataframe/SKILL.md +23 -0
- package/skills/db-index/SKILL.md +24 -0
- package/skills/dead-code/SKILL.md +23 -0
- package/skills/debug/SKILL.md +24 -0
- package/skills/deck-review/SKILL.md +24 -0
- package/skills/dedupe/SKILL.md +23 -0
- package/skills/dedupe-deps/SKILL.md +23 -0
- package/skills/dependency-audit/SKILL.md +23 -0
- package/skills/dependency-update/SKILL.md +23 -0
- package/skills/deploy/SKILL.md +23 -0
- package/skills/design-system/SKILL.md +24 -0
- package/skills/design-tokens/SKILL.md +24 -0
- package/skills/diagram-as-code/SKILL.md +24 -0
- package/skills/diff-explain/SKILL.md +23 -0
- package/skills/django-view/SKILL.md +23 -0
- package/skills/doc-lint/SKILL.md +24 -0
- package/skills/docker-compose/SKILL.md +23 -0
- package/skills/dockerize/SKILL.md +23 -0
- package/skills/docstrings/SKILL.md +23 -0
- package/skills/dotfiles/SKILL.md +23 -0
- package/skills/dpi-scaling/SKILL.md +23 -0
- package/skills/e2e-test/SKILL.md +23 -0
- package/skills/embeddings/SKILL.md +23 -0
- package/skills/empty-states/SKILL.md +23 -0
- package/skills/env-setup/SKILL.md +23 -0
- package/skills/erc20/SKILL.md +24 -0
- package/skills/error-tracking/SKILL.md +23 -0
- package/skills/estimate/SKILL.md +23 -0
- package/skills/etl-pipeline/SKILL.md +24 -0
- package/skills/eval-harness/SKILL.md +23 -0
- package/skills/exif-orientation/SKILL.md +23 -0
- package/skills/explain-code/SKILL.md +23 -0
- package/skills/express-middleware/SKILL.md +23 -0
- package/skills/extract-function/SKILL.md +23 -0
- package/skills/faq/SKILL.md +24 -0
- package/skills/fastapi-endpoint/SKILL.md +23 -0
- package/skills/favicon/SKILL.md +23 -0
- package/skills/feature-engineering/SKILL.md +23 -0
- package/skills/few-shot/SKILL.md +23 -0
- package/skills/find-owner/SKILL.md +23 -0
- package/skills/firmware-driver/SKILL.md +23 -0
- package/skills/fix-flaky-tests/SKILL.md +23 -0
- package/skills/flutter-widget/SKILL.md +23 -0
- package/skills/font-rendering/SKILL.md +23 -0
- package/skills/form-validation/SKILL.md +23 -0
- package/skills/format/SKILL.md +23 -0
- package/skills/game-loop/SKILL.md +23 -0
- package/skills/gas-optimize/SKILL.md +25 -0
- package/skills/gdpr-review/SKILL.md +24 -0
- package/skills/github-actions/SKILL.md +23 -0
- package/skills/glossary/SKILL.md +24 -0
- package/skills/go-idioms/SKILL.md +23 -0
- package/skills/gpu-profile/SKILL.md +23 -0
- package/skills/graphify/SKILL.md +21 -0
- package/skills/graphql-resolver/SKILL.md +23 -0
- package/skills/grpc-service/SKILL.md +23 -0
- package/skills/guardrails/SKILL.md +23 -0
- package/skills/healthcheck/SKILL.md +23 -0
- package/skills/heisenbug/SKILL.md +23 -0
- package/skills/helm-chart/SKILL.md +24 -0
- package/skills/hero-section/SKILL.md +23 -0
- package/skills/html-email/SKILL.md +24 -0
- package/skills/html-form/SKILL.md +23 -0
- package/skills/html-sanitize/SKILL.md +23 -0
- package/skills/html-table/SKILL.md +23 -0
- package/skills/html-to-pdf/SKILL.md +23 -0
- package/skills/http-client/SKILL.md +23 -0
- package/skills/i18n/SKILL.md +23 -0
- package/skills/i2c-spi/SKILL.md +23 -0
- package/skills/image-decode/SKILL.md +24 -0
- package/skills/image-memory/SKILL.md +24 -0
- package/skills/image-perf/SKILL.md +24 -0
- package/skills/image-pipeline/SKILL.md +24 -0
- package/skills/image-upload/SKILL.md +24 -0
- package/skills/infra-cost/SKILL.md +24 -0
- package/skills/input-validation/SKILL.md +23 -0
- package/skills/issue-template/SKILL.md +23 -0
- package/skills/java-streams/SKILL.md +23 -0
- package/skills/k8s-manifest/SKILL.md +23 -0
- package/skills/kotlin-coroutines/SKILL.md +23 -0
- package/skills/landing-page/SKILL.md +24 -0
- package/skills/laravel-controller/SKILL.md +23 -0
- package/skills/lazy-load/SKILL.md +23 -0
- package/skills/license-check/SKILL.md +23 -0
- package/skills/license-header/SKILL.md +23 -0
- package/skills/lint-fix/SKILL.md +23 -0
- package/skills/llm-cost/SKILL.md +23 -0
- package/skills/lockfile-fix/SKILL.md +23 -0
- package/skills/low-power/SKILL.md +23 -0
- package/skills/makefile/SKILL.md +23 -0
- package/skills/man-page/SKILL.md +24 -0
- package/skills/mcp-server/SKILL.md +23 -0
- package/skills/memory-leak/SKILL.md +23 -0
- package/skills/mermaid-diagram/SKILL.md +23 -0
- package/skills/meta-tags/SKILL.md +23 -0
- package/skills/micro-interactions/SKILL.md +23 -0
- package/skills/migration/SKILL.md +23 -0
- package/skills/migration-guide/SKILL.md +24 -0
- package/skills/mkdocs-setup/SKILL.md +24 -0
- package/skills/mobile-permissions/SKILL.md +23 -0
- package/skills/mock-api/SKILL.md +23 -0
- package/skills/modernize/SKILL.md +23 -0
- package/skills/monorepo-setup/SKILL.md +23 -0
- package/skills/motion-design/SKILL.md +23 -0
- package/skills/n-plus-one/SKILL.md +23 -0
- package/skills/naming-review/SKILL.md +23 -0
- package/skills/nextjs-route/SKILL.md +23 -0
- package/skills/nginx-config/SKILL.md +23 -0
- package/skills/ocr-debug/SKILL.md +24 -0
- package/skills/onboard/SKILL.md +23 -0
- package/skills/onboarding-map/SKILL.md +23 -0
- package/skills/open-pr/SKILL.md +24 -0
- package/skills/openapi/SKILL.md +23 -0
- package/skills/opencv-debug/SKILL.md +24 -0
- package/skills/orm-model/SKILL.md +23 -0
- package/skills/owasp-check/SKILL.md +24 -0
- package/skills/page-transitions/SKILL.md +23 -0
- package/skills/pagination/SKILL.md +23 -0
- package/skills/perf-optimize/SKILL.md +23 -0
- package/skills/perf-profile/SKILL.md +23 -0
- package/skills/physics/SKILL.md +23 -0
- package/skills/pitch-deck/SKILL.md +24 -0
- package/skills/pixel-diff/SKILL.md +23 -0
- package/skills/ponytail/SKILL.md +46 -0
- package/skills/postmortem/SKILL.md +24 -0
- package/skills/pptx-deck/SKILL.md +23 -0
- package/skills/pptx-export/SKILL.md +23 -0
- package/skills/pptx-from-markdown/SKILL.md +23 -0
- package/skills/pptx-template/SKILL.md +24 -0
- package/skills/pr-review/SKILL.md +24 -0
- package/skills/precommit/SKILL.md +23 -0
- package/skills/pricing-page/SKILL.md +23 -0
- package/skills/project-overview/SKILL.md +22 -0
- package/skills/prompt-template/SKILL.md +23 -0
- package/skills/property-test/SKILL.md +23 -0
- package/skills/protobuf/SKILL.md +23 -0
- package/skills/py-async/SKILL.md +23 -0
- package/skills/py-typing/SKILL.md +23 -0
- package/skills/query-optimize/SKILL.md +23 -0
- package/skills/rag-pipeline/SKILL.md +23 -0
- package/skills/rails-resource/SKILL.md +23 -0
- package/skills/rate-limit/SKILL.md +23 -0
- package/skills/react-hooks/SKILL.md +23 -0
- package/skills/react-native-screen/SKILL.md +23 -0
- package/skills/react-perf/SKILL.md +23 -0
- package/skills/readme/SKILL.md +24 -0
- package/skills/rebase/SKILL.md +24 -0
- package/skills/refactor/SKILL.md +23 -0
- package/skills/regression-test/SKILL.md +23 -0
- package/skills/release-notes/SKILL.md +24 -0
- package/skills/rename-symbol/SKILL.md +23 -0
- package/skills/repro/SKILL.md +23 -0
- package/skills/resolve-conflicts/SKILL.md +23 -0
- package/skills/responsive/SKILL.md +23 -0
- package/skills/rest-endpoint/SKILL.md +23 -0
- package/skills/retro/SKILL.md +23 -0
- package/skills/rtos-task/SKILL.md +23 -0
- package/skills/runbook/SKILL.md +25 -0
- package/skills/rust-borrow/SKILL.md +23 -0
- package/skills/rust-unsafe-audit/SKILL.md +23 -0
- package/skills/sanitize/SKILL.md +23 -0
- package/skills/schema-design/SKILL.md +23 -0
- package/skills/screenshot-debug/SKILL.md +22 -0
- package/skills/scroll-animation/SKILL.md +23 -0
- package/skills/secret-scan/SKILL.md +23 -0
- package/skills/security-audit/SKILL.md +23 -0
- package/skills/security-review/SKILL.md +23 -0
- package/skills/seed-data/SKILL.md +23 -0
- package/skills/self-review/SKILL.md +23 -0
- package/skills/semantic-html/SKILL.md +23 -0
- package/skills/semver-bump/SKILL.md +24 -0
- package/skills/shader/SKILL.md +23 -0
- package/skills/shader-debug/SKILL.md +23 -0
- package/skills/simplify-conditionals/SKILL.md +23 -0
- package/skills/sitemap/SKILL.md +23 -0
- package/skills/skeleton-loader/SKILL.md +23 -0
- package/skills/slide-charts/SKILL.md +24 -0
- package/skills/slide-outline/SKILL.md +23 -0
- package/skills/snapshot-update/SKILL.md +23 -0
- package/skills/solidity-contract/SKILL.md +25 -0
- package/skills/speaker-notes/SKILL.md +23 -0
- package/skills/spike/SKILL.md +23 -0
- package/skills/split-file/SKILL.md +23 -0
- package/skills/spring-controller/SKILL.md +23 -0
- package/skills/sprite-anim/SKILL.md +23 -0
- package/skills/sql-report/SKILL.md +23 -0
- package/skills/squash/SKILL.md +24 -0
- package/skills/ssl-setup/SKILL.md +23 -0
- package/skills/stacktrace/SKILL.md +23 -0
- package/skills/static-site/SKILL.md +24 -0
- package/skills/structured-logging/SKILL.md +23 -0
- package/skills/svelte-store/SKILL.md +23 -0
- package/skills/swiftui-view/SKILL.md +23 -0
- package/skills/tailwind-theme/SKILL.md +24 -0
- package/skills/tcp-server/SKILL.md +23 -0
- package/skills/tdd/SKILL.md +23 -0
- package/skills/terraform-module/SKILL.md +24 -0
- package/skills/test-coverage/SKILL.md +23 -0
- package/skills/texture-debug/SKILL.md +23 -0
- package/skills/threat-model/SKILL.md +23 -0
- package/skills/thumbnail/SKILL.md +24 -0
- package/skills/todo-scan/SKILL.md +23 -0
- package/skills/tool-definition/SKILL.md +23 -0
- package/skills/trace-flow/SKILL.md +23 -0
- package/skills/tracing/SKILL.md +23 -0
- package/skills/train-model/SKILL.md +24 -0
- package/skills/tree-shake/SKILL.md +23 -0
- package/skills/ts-generics/SKILL.md +23 -0
- package/skills/ts-strict/SKILL.md +23 -0
- package/skills/tui-app/SKILL.md +23 -0
- package/skills/tutorial/SKILL.md +24 -0
- package/skills/type-tighten/SKILL.md +23 -0
- package/skills/typography/SKILL.md +24 -0
- package/skills/ui-bug-repro/SKILL.md +23 -0
- package/skills/ui-polish/SKILL.md +24 -0
- package/skills/ui-review/SKILL.md +24 -0
- package/skills/vendor/SKILL.md +23 -0
- package/skills/visual-diff-ci/SKILL.md +24 -0
- package/skills/visual-regression/SKILL.md +23 -0
- package/skills/vue-composition/SKILL.md +23 -0
- package/skills/web-component/SKILL.md +23 -0
- package/skills/web-fonts/SKILL.md +24 -0
- package/skills/web3-frontend/SKILL.md +25 -0
- package/skills/webgl-debug/SKILL.md +23 -0
- package/skills/webhook/SKILL.md +23 -0
- package/skills/websocket/SKILL.md +23 -0
- package/skills/write-tests/SKILL.md +19 -0
- package/src/client/agent.ts +803 -0
- package/src/client/background.ts +39 -0
- package/src/client/checkpoint.ts +48 -0
- package/src/client/cli.ts +1253 -0
- package/src/client/compaction.ts +86 -0
- package/src/client/extensions.ts +83 -0
- package/src/client/hooks.ts +40 -0
- package/src/client/image.ts +26 -0
- package/src/client/lsp.ts +0 -0
- package/src/client/mcp.ts +276 -0
- package/src/client/models-dev.ts +52 -0
- package/src/client/pkg.ts +41 -0
- package/src/client/platform.ts +94 -0
- package/src/client/prompts.ts +47 -0
- package/src/client/render.ts +138 -0
- package/src/client/session.ts +107 -0
- package/src/client/settings.ts +86 -0
- package/src/client/skill-router.ts +79 -0
- package/src/client/skills.ts +199 -0
- package/src/client/snapshot.ts +56 -0
- package/src/client/telemetry.ts +24 -0
- package/src/client/todos.ts +23 -0
- package/src/client/tools.ts +756 -0
- package/src/client/tui-mode.ts +41 -0
- package/src/client/tui.ts +224 -0
- package/src/sdk/index.ts +36 -0
- package/src/selfcheck.ts +364 -0
- package/src/server/config.ts +58 -0
- package/src/server/credentials.ts +89 -0
- package/src/server/identity.ts +58 -0
- package/src/server/index.ts +113 -0
- package/src/server/oauth.ts +93 -0
- package/src/server/providers/adapter.ts +25 -0
- package/src/server/providers/anthropic.ts +189 -0
- package/src/server/providers/openai-compat.ts +76 -0
- package/src/server/providers/registry.ts +31 -0
- package/src/server/router.ts +29 -0
- package/src/server/sse.ts +20 -0
- package/src/shared/types.ts +20 -0
- package/tsconfig.json +15 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Aditya
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# ada
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/ada-agent)
|
|
4
|
+
[](https://github.com/black141312/ada/actions/workflows/ci.yml)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](package.json)
|
|
7
|
+
|
|
8
|
+
A coding agent built from zero — a terminal client in the spirit of pi / Codex / Cursor,
|
|
9
|
+
that holds every provider key and speaks one wire
|
|
10
|
+
format to the client.
|
|
11
|
+
|
|
12
|
+

|
|
13
|
+
|
|
14
|
+
The client talks **only** OpenAI Chat Completions to the backend. The backend routes each request
|
|
15
|
+
to the right provider by model id and normalizes every provider back to that one format — so a new
|
|
16
|
+
model is **zero code**, and a new OpenAI-compatible provider is **two lines**.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Features
|
|
21
|
+
|
|
22
|
+
- **Agentic loop** — streams, calls tools, feeds results back, repeats until done.
|
|
23
|
+
- **Tools** — `read_file`, `write_file`, `edit_file` (exact-match), `apply_patch` (multi-file),
|
|
24
|
+
`bash`, `ls`, `grep` (uses `rg` if present), `glob`, `web_fetch`, `web_search`, `lsp_diagnostics`,
|
|
25
|
+
`ask_user` (clarifying questions).
|
|
26
|
+
- **Auto-format on edit** — written files are formatted with the project's formatter
|
|
27
|
+
(prettier/gofmt/rustfmt/ruff/shfmt) in trusted projects; off via `ADA_NO_FORMAT`.
|
|
28
|
+
- **LSP diagnostics** — `lsp_diagnostics` runs a language server (typescript-language-server,
|
|
29
|
+
pyright, gopls, rust-analyzer) and returns errors/warnings; servers are reused, trusted-project only.
|
|
30
|
+
- **Real PTY shell** — `bash` runs in a pseudo-terminal (node-pty), so TTY-only programs, colour, and
|
|
31
|
+
progress output behave; ANSI is stripped from what the model sees.
|
|
32
|
+
- **Two front-ends** — a classic readline REPL and an inline **TUI** (`--tui`) with a live "thinking"
|
|
33
|
+
spinner and Claude-style turn markers.
|
|
34
|
+
- **Permission modes — ask / plan / auto** — `/ask` confirms each tool, `/plan` is read-only (ada
|
|
35
|
+
plans, `/run` to execute), `/auto` runs freely (destructive `bash` still confirms). Each approval
|
|
36
|
+
states in plain words what it wants ("ada wants to run a shell command…") instead of raw args.
|
|
37
|
+
- **Skills that actually fire** — ~285 built-in skills; ada routes every request and **auto-applies**
|
|
38
|
+
a clearly-matching one (injecting its procedure), or suggests skills to load. See [Skills](#skills).
|
|
39
|
+
- **todos**, **checkpoint/undo** (revert the agent's edits), **protected paths**, **git worktrees**,
|
|
40
|
+
**workspace snapshots** (`/snapshot` `/restore`), **named agents**, and **subagents** (`spawn_agent`).
|
|
41
|
+
- **Sessions** — every turn is persisted; `--continue` / `--resume` to pick up where you left off.
|
|
42
|
+
- **Context compaction** — summarizes old turns automatically as context grows.
|
|
43
|
+
- **Sign in with GitHub or Google** (RFC 8628 device flow) — zero client config.
|
|
44
|
+
- **Extensible** — extensions (tools + hooks + commands), prompt templates, skills, and MCP servers.
|
|
45
|
+
- **No build step** — TypeScript run through `tsx`.
|
|
46
|
+
|
|
47
|
+
## Providers
|
|
48
|
+
|
|
49
|
+
The backend proxies any OpenAI-compatible upstream and translates the one that isn't (Anthropic):
|
|
50
|
+
|
|
51
|
+
| Provider | Models | Key env var |
|
|
52
|
+
|---|---|---|
|
|
53
|
+
| OpenAI | `gpt-*`, `o*` | `OPENAI_API_KEY` |
|
|
54
|
+
| Anthropic | `claude-*` | `ANTHROPIC_API_KEY` |
|
|
55
|
+
| Google Gemini | `gemini-*` | `GEMINI_API_KEY` |
|
|
56
|
+
| Mistral | `mistral-*` | `MISTRAL_API_KEY` |
|
|
57
|
+
| Groq | — | `GROQ_API_KEY` |
|
|
58
|
+
| DeepSeek | `deepseek-*` | `DEEPSEEK_API_KEY` |
|
|
59
|
+
| Together | — | `TOGETHER_API_KEY` |
|
|
60
|
+
| xAI (Grok) | `grok-*` | `XAI_API_KEY` |
|
|
61
|
+
| DashScope (Qwen) | — | `DASHSCOPE_API_KEY` |
|
|
62
|
+
| OpenRouter | everything else | `OPENROUTER_API_KEY` |
|
|
63
|
+
| **Ollama (local)** | `name:tag` (e.g. `qwen2.5-coder:latest`) | *keyless* |
|
|
64
|
+
|
|
65
|
+
Routing: a model id containing `:` → local Ollama; otherwise by prefix; an explicit `provider`
|
|
66
|
+
field always wins. Set only the keys you have — the rest stay dormant (vendor SDKs load lazily).
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Install
|
|
71
|
+
|
|
72
|
+
Requires **Node ≥ 18** (and a C toolchain, since `node-pty` builds natively).
|
|
73
|
+
|
|
74
|
+
**Run it without installing — `npx`:**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
npx ada-agent # the client (published to npm)
|
|
78
|
+
npx -p ada-agent ada-server # the backend (second bin in the same package)
|
|
79
|
+
# straight from source, no publish needed:
|
|
80
|
+
npx github:black141312/ada
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Install globally** (puts `ada` and `ada-server` on your PATH):
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
npm install -g ada-agent
|
|
87
|
+
ada
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**From a clone** (for hacking on it):
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
git clone https://github.com/black141312/ada.git
|
|
94
|
+
cd ada && npm install
|
|
95
|
+
npm link # global `ada` / `ada-server` · or `npm start`
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
> The published-npm commands work once a maintainer has run `npm publish`; the `github:` form works
|
|
99
|
+
> against the repo today.
|
|
100
|
+
|
|
101
|
+
## Quickstart
|
|
102
|
+
|
|
103
|
+
ada is two processes: a **backend** (holds keys, routes) and the **`ada`** client.
|
|
104
|
+
|
|
105
|
+
**Option A — local, no keys (Ollama):**
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# terminal 1: backend
|
|
109
|
+
ada-server # → http://localhost:8787
|
|
110
|
+
|
|
111
|
+
# terminal 2: the agent
|
|
112
|
+
ada # pick a local model and chat
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Option B — a cloud provider:**
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# terminal 1
|
|
119
|
+
export ANTHROPIC_API_KEY=sk-ant-... # and/or OPENAI_API_KEY, GEMINI_API_KEY, …
|
|
120
|
+
ada-server
|
|
121
|
+
|
|
122
|
+
# terminal 2
|
|
123
|
+
ada --model claude-opus-4-8
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Windows PowerShell: `$env:ANTHROPIC_API_KEY="sk-ant-..."`.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Using `ada`
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
ada # interactive; pick a model on first run
|
|
134
|
+
ada --tui # inline TUI front-end
|
|
135
|
+
ada --model <id> # start on a specific model
|
|
136
|
+
ada --list-models # everything your keys can reach (via the backend)
|
|
137
|
+
ada --continue # resume the most recent session
|
|
138
|
+
ada --resume # pick a session to resume
|
|
139
|
+
ada --yolo # auto-approve tool calls (skip prompts)
|
|
140
|
+
ada -p "fix the build" # one-shot: print the answer and exit
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Slash commands** (in a session): `/ask` · `/plan` · `/auto` · `/mode` (cycle the permission mode) ·
|
|
144
|
+
`/run` · `/model [id]` · `/models` · `/reasoning low|medium|high|off` ·
|
|
145
|
+
`/strategy react|single|plan|multi|toolsmith` · `/agent [name]` · `/todos` · `/undo` · `/snapshot` ·
|
|
146
|
+
`/restore` · `/jobs` · `/fork` · `/tree` · `/rewind` · `/compact` · `/context` · `/cost` ·
|
|
147
|
+
`/image <path>` · `/paste` · `/login` · `/logout` · `/exit`.
|
|
148
|
+
|
|
149
|
+
**Permission modes** — switch with `/ask` · `/plan` · `/auto` (or `/mode` to cycle); the current mode
|
|
150
|
+
shows in the prompt line. In **ask** mode each gated tool prompts with what it wants in plain words
|
|
151
|
+
(`ada wants to run a shell command…`) and one key: `[y]es` · `[a]uto` (run the rest without asking) ·
|
|
152
|
+
`[p]lan` · `[n]o`. **plan** is read-only — ada plans but won't edit; `/run` approves and executes.
|
|
153
|
+
**auto** runs tools without asking (destructive `bash` still confirms). `--yolo` starts in **auto**.
|
|
154
|
+
|
|
155
|
+
**Subcommands:** `ada mcp …` (connectors) · `ada skill add <url>` · `ada worktree add <name>` ·
|
|
156
|
+
`ada serve` (HTTP API) · `ada share` (view a session) · `ada acp` (editor bridge). See
|
|
157
|
+
[docs/integrations.md](docs/integrations.md) for the HTTP API, the typed SDK, and ACP.
|
|
158
|
+
|
|
159
|
+
**Orchestration strategies** — the harness runs pluggable agent architectures (`--strategy <name>`
|
|
160
|
+
or `/strategy`): `react` (default loop), `single` (one shot), `plan` (plan→execute), `multi`
|
|
161
|
+
(sub-agent fan-out), and `toolsmith` (read a connected integration's docs and have sub-agents author
|
|
162
|
+
skills for it). See [docs/orchestration.md](docs/orchestration.md).
|
|
163
|
+
|
|
164
|
+
**Sign in** (optional — identifies you to the backend): run `/login`, choose GitHub or Google, and
|
|
165
|
+
enter the device code in your browser. The token is stored locally and sent as your client key.
|
|
166
|
+
|
|
167
|
+
## Skills
|
|
168
|
+
|
|
169
|
+
ada ships with **~285 built-in skills** across ~30 categories — specialized instructions the model
|
|
170
|
+
pulls in only when a task needs them (progressive disclosure). ada **routes** every request with a
|
|
171
|
+
relevance ranker over names + descriptions: when one skill clearly fits, ada **auto-applies** it —
|
|
172
|
+
injecting its procedure so even a weak model follows it (announced as `↳ skill: <name>`); when the
|
|
173
|
+
match is ambiguous it just suggests them. The model can also browse with **`list_skills`** (by
|
|
174
|
+
`category`/`filter`), search with **`find_skill`** (ranked), and load one with **`use_skill`** — so
|
|
175
|
+
nothing bloats the prompt until it's used. A sample of the categories:
|
|
176
|
+
|
|
177
|
+
`git` · `review` · `testing` · `debugging` · `refactoring` · `docs` · `security` · `ci-cd` ·
|
|
178
|
+
`performance` · `database` · `api` · `frontend` · `ui-design` · `html` · `pptx` · `image` ·
|
|
179
|
+
`graphics` · `languages` · `frameworks` · `mobile` · `cloud` · `observability` · `data-ml` ·
|
|
180
|
+
`agent-llm` · `web3` · `networking` · `shell` · `connectors` · `compliance` · …
|
|
181
|
+
|
|
182
|
+
Examples: `commit`, `code-review`, `dockerize`, `migration`, `react-hooks`, `terraform-module`,
|
|
183
|
+
`rag-pipeline`, `security-audit`, `project-overview`, `architecture-diagram`, `graphify`, `ponytail`.
|
|
184
|
+
|
|
185
|
+
Add your own as `SKILL.md` files under `.ada/skills/<name>/` (project) or `~/.ada/skills/<name>/`
|
|
186
|
+
(global) — `---\ndescription: …\ncategory: …\n---` front-matter is all that's required. Project
|
|
187
|
+
skills override global, which override the built-ins. Install remote ones with
|
|
188
|
+
`ada skill add <url>` (a `SKILL.md` or a JSON index); `ada skill list` shows them.
|
|
189
|
+
|
|
190
|
+
## Connectors (MCP)
|
|
191
|
+
|
|
192
|
+
ada reaches external tools and data through MCP servers. Browse the catalog and add one:
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
ada mcp # list the catalog (filesystem, github, postgres, slack, sentry, …)
|
|
196
|
+
ada mcp add github # write it into .ada/mcp.json, then set the token it prints
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Both **local stdio** servers (`{ command, args }`) and **remote HTTP** servers (`{ url, headers }`)
|
|
200
|
+
are supported; their tools appear as `<server>__<tool>`, approval-gated, in trusted projects. See
|
|
201
|
+
[docs/connectors.md](docs/connectors.md), or the `connectors` skill category for per-connector setup.
|
|
202
|
+
|
|
203
|
+
## Configuration
|
|
204
|
+
|
|
205
|
+
**Client** (`ada`):
|
|
206
|
+
|
|
207
|
+
| Env var | Default | Purpose |
|
|
208
|
+
|---|---|---|
|
|
209
|
+
| `ADA_BACKEND_URL` | `http://localhost:8787/v1` | Where the backend lives |
|
|
210
|
+
| `ADA_CLIENT_KEY` | stored login token, else `dev` | Bearer sent to the backend |
|
|
211
|
+
| `ADA_MODEL` | — | Default model id |
|
|
212
|
+
| `ADA_COMPACT_AT` | `100000` | Token estimate that triggers compaction |
|
|
213
|
+
| `ADA_AUTO_APPROVE` | — | `1` ⇒ behave like `--yolo` |
|
|
214
|
+
| `NO_COLOR` / `ADA_THEME` | — | Disable color / theme overrides |
|
|
215
|
+
|
|
216
|
+
**Backend** (`ada-server`):
|
|
217
|
+
|
|
218
|
+
| Env var | Default | Purpose |
|
|
219
|
+
|---|---|---|
|
|
220
|
+
| `ADA_PORT` | `8787` | Listen port |
|
|
221
|
+
| `ADA_CLIENT_KEYS` | *(unset = dev/no-auth)* | Comma-separated allowed client keys |
|
|
222
|
+
| `ADA_REQUIRE_LOGIN` / `ADA_ALLOWED_USERS` | — | Gate access to verified GitHub/Google users |
|
|
223
|
+
| `OLLAMA_BASE_URL` | `http://localhost:11434/v1` | Local Ollama endpoint |
|
|
224
|
+
| *(provider keys)* | — | See the [Providers](#providers) table |
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Develop
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
npm run typecheck # tsc --noEmit
|
|
232
|
+
npm run selfcheck # offline checks (tools, sessions, routing, parsers, TUI)
|
|
233
|
+
npm start # run the client from source
|
|
234
|
+
npm run server # run the backend from source
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
See **[docs/architecture.md](docs/architecture.md)** for the design (adapters, routing, request
|
|
238
|
+
flow, file layout), **[docs/orchestration.md](docs/orchestration.md)** for the agent strategies, and
|
|
239
|
+
**[docs/integrations.md](docs/integrations.md)** for the HTTP API / SDK / ACP.
|
|
240
|
+
|
|
241
|
+
## Benchmarks
|
|
242
|
+
|
|
243
|
+
ada can run **SWE-bench Verified** — it generates patches for real GitHub issues (one isolated repo
|
|
244
|
+
clone per task), emitting an official-format `predictions.jsonl` that the official `swebench` Docker
|
|
245
|
+
harness scores. `node bench/swebench.mjs --dataset … --model … --out runs/x`. See
|
|
246
|
+
**[bench/README.md](bench/README.md)** for the full flow (dataset, prereqs, scoring command).
|
|
247
|
+
|
|
248
|
+
## Contributing
|
|
249
|
+
|
|
250
|
+
Issues and PRs welcome — it's a small, no-build codebase. Run `npm run typecheck && npm run selfcheck`
|
|
251
|
+
before a PR and keep changes lean. See **[CONTRIBUTING.md](CONTRIBUTING.md)**; report vulnerabilities
|
|
252
|
+
via **[SECURITY.md](SECURITY.md)**.
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
[MIT](LICENSE) © 2026 Aditya
|
package/bench/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Benchmarking ada on SWE-bench Verified
|
|
2
|
+
|
|
3
|
+
ada can run **SWE-bench Verified** — give the agent a real GitHub issue, let it edit the repo, and
|
|
4
|
+
score whether the repo's test suite passes. This directory has the **generation** half (ada produces
|
|
5
|
+
patches); **scoring** is the official `swebench` Docker harness — we don't reimplement it, because
|
|
6
|
+
that's the only way to get correct, comparable numbers.
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
dataset (issues) ──▶ bench/swebench.mjs ──▶ predictions.jsonl ──▶ official swebench eval ──▶ resolved %
|
|
10
|
+
(ada edits the repo, (Docker: apply patch +
|
|
11
|
+
per isolated clone) test_patch, run tests)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Prerequisites
|
|
15
|
+
|
|
16
|
+
- **ada-server running with provider keys** — the harness drives `ada -p`, which needs the backend:
|
|
17
|
+
```bash
|
|
18
|
+
export ANTHROPIC_API_KEY=sk-ant-... # and/or OPENAI_API_KEY, etc.
|
|
19
|
+
ada-server # http://localhost:8787
|
|
20
|
+
```
|
|
21
|
+
- `git` + network (the harness clones each task repo; clones are cached under `~/.cache/ada-swebench`).
|
|
22
|
+
- For scoring: **Docker** and the **`swebench`** Python package (`pip install swebench`). Allow plenty
|
|
23
|
+
of disk — the official images are large.
|
|
24
|
+
|
|
25
|
+
## 1. Get the dataset
|
|
26
|
+
|
|
27
|
+
SWE-bench Verified (500 instances) lives on Hugging Face. Export it to JSONL once:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
# pip install datasets
|
|
31
|
+
from datasets import load_dataset
|
|
32
|
+
load_dataset("princeton-nlp/SWE-bench_Verified", split="test").to_json("swe-bench-verified.jsonl")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## 2. Generate predictions with ada
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# smoke test on 5 instances first
|
|
39
|
+
node bench/swebench.mjs --dataset swe-bench-verified.jsonl --model claude-opus-4-8 \
|
|
40
|
+
--out runs/opus --limit 5 --concurrency 2
|
|
41
|
+
|
|
42
|
+
# a specific instance, or the whole set
|
|
43
|
+
node bench/swebench.mjs --dataset swe-bench-verified.jsonl --model claude-opus-4-8 \
|
|
44
|
+
--out runs/opus --instances astropy__astropy-12907
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
For each instance it clones the repo at `base_commit` into an isolated dir, hands ada the issue text
|
|
48
|
+
(`ada -p … --json`, auto-approve), captures `git diff` as the model patch, and appends an
|
|
49
|
+
official-format line to `runs/opus/predictions.jsonl`:
|
|
50
|
+
|
|
51
|
+
```json
|
|
52
|
+
{"instance_id": "...", "model_name_or_path": "claude-opus-4-8", "model_patch": "diff --git ..."}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
It also writes `meta.jsonl` (seconds, patch size, token/cost usage per instance). Re-running **resumes**
|
|
56
|
+
— instances already in `predictions.jsonl` are skipped. Flags: `--limit N`, `--instances a,b`,
|
|
57
|
+
`--concurrency` (default 2), `--timeout` seconds per instance (default 1200), `--out <dir>`.
|
|
58
|
+
|
|
59
|
+
Swap `--model` to compare models on the same tasks (`gpt-...`, `qwen2.5-coder:latest`, …) — ada routes
|
|
60
|
+
each to the right provider.
|
|
61
|
+
|
|
62
|
+
## 3. Score with the official harness
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
python -m swebench.harness.run_evaluation \
|
|
66
|
+
--dataset_name princeton-nlp/SWE-bench_Verified \
|
|
67
|
+
--predictions_path runs/opus/predictions.jsonl \
|
|
68
|
+
--max_workers 4 --run_id ada-opus
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
It applies each patch + the held-out `test_patch` in Docker, runs the `FAIL_TO_PASS` / `PASS_TO_PASS`
|
|
72
|
+
tests, and reports the **resolved rate** plus a per-instance breakdown.
|
|
73
|
+
|
|
74
|
+
## Notes & honest caveats
|
|
75
|
+
|
|
76
|
+
- ada is told **not to touch tests** (the grader supplies its own); the patch is whatever ada changed
|
|
77
|
+
in the source.
|
|
78
|
+
- An empty patch (ada gave up / errored) is still recorded — it just counts as unresolved.
|
|
79
|
+
- This measures ada's default `react` loop. Try `ADA_MODEL`, a different `--model`, or wire a
|
|
80
|
+
`--strategy` into the harness to compare setups.
|
|
81
|
+
- Other benchmarks (HumanEval, Aider polyglot) fit the same generate-then-score shape; ask and we'll
|
|
82
|
+
add a sibling script.
|
|
83
|
+
|
|
84
|
+
## Quick check
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
node bench/swebench.mjs --selftest # offline: validates the prompt/prediction/arg helpers
|
|
88
|
+
```
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// SWE-bench (Verified) prediction generator, driven by ada.
|
|
3
|
+
//
|
|
4
|
+
// This produces an **official-format** predictions.jsonl. It does NOT score — scoring is done by the
|
|
5
|
+
// official `swebench` harness in Docker (the only way to get correct, comparable numbers). See
|
|
6
|
+
// bench/README.md for the full flow (dataset, prereqs, the scoring command).
|
|
7
|
+
//
|
|
8
|
+
// For each instance: clone the task repo at its base commit into an isolated dir, hand ada the issue
|
|
9
|
+
// text (headless `ada -p --json`, auto-approve), then capture `git diff` as the model patch.
|
|
10
|
+
//
|
|
11
|
+
// node bench/swebench.mjs --dataset swe-bench-verified.jsonl --model claude-opus-4-8 \
|
|
12
|
+
// --out runs/opus [--limit 5] [--instances id1,id2] [--concurrency 2] [--timeout 1200]
|
|
13
|
+
// node bench/swebench.mjs --selftest # offline checks of the pure helpers
|
|
14
|
+
//
|
|
15
|
+
// Prereqs: a running `ada-server` with provider keys, `git`, network (clones the task repos).
|
|
16
|
+
|
|
17
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
18
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync } from "node:fs";
|
|
19
|
+
import { homedir } from "node:os";
|
|
20
|
+
import { dirname, join, resolve } from "node:path";
|
|
21
|
+
import { fileURLToPath } from "node:url";
|
|
22
|
+
import assert from "node:assert/strict";
|
|
23
|
+
|
|
24
|
+
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
25
|
+
const ADA_BIN = resolve(HERE, "..", "bin", "ada.mjs");
|
|
26
|
+
const CACHE = process.env.ADA_SWEBENCH_CACHE || join(homedir(), ".cache", "ada-swebench");
|
|
27
|
+
|
|
28
|
+
// ---------- pure helpers (covered by --selftest) ----------
|
|
29
|
+
|
|
30
|
+
export function parseArgs(argv) {
|
|
31
|
+
const f = { concurrency: 2, timeout: 1200, out: "runs/ada" };
|
|
32
|
+
for (let i = 0; i < argv.length; i++) {
|
|
33
|
+
const a = argv[i];
|
|
34
|
+
if (a === "--selftest") f.selftest = true;
|
|
35
|
+
else if (a === "--dataset") f.dataset = argv[++i];
|
|
36
|
+
else if (a === "--model") f.model = argv[++i];
|
|
37
|
+
else if (a === "--out") f.out = argv[++i];
|
|
38
|
+
else if (a === "--limit") f.limit = Number(argv[++i]);
|
|
39
|
+
else if (a === "--instances") f.instances = String(argv[++i]).split(",").map((s) => s.trim()).filter(Boolean);
|
|
40
|
+
else if (a === "--concurrency") f.concurrency = Math.max(1, Number(argv[++i]) || 1);
|
|
41
|
+
else if (a === "--timeout") f.timeout = Number(argv[++i]);
|
|
42
|
+
else if (a === "--ada") f.ada = argv[++i];
|
|
43
|
+
}
|
|
44
|
+
return f;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function buildPrompt(repo, problemStatement) {
|
|
48
|
+
return `The repository \`${repo}\` is checked out in the current directory at the commit where this issue was filed. Resolve the issue by editing the source code.
|
|
49
|
+
|
|
50
|
+
ISSUE:
|
|
51
|
+
${problemStatement}
|
|
52
|
+
|
|
53
|
+
Guidelines:
|
|
54
|
+
- Make the smallest change that fixes the issue.
|
|
55
|
+
- Edit only library/source files. Do NOT add or modify tests — the grader supplies its own.
|
|
56
|
+
- When the fix is complete and self-consistent, stop.`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function predictionLine(instanceId, model, patch) {
|
|
60
|
+
return JSON.stringify({ instance_id: instanceId, model_name_or_path: model, model_patch: patch });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function loadJsonl(path) {
|
|
64
|
+
return readFileSync(path, "utf8")
|
|
65
|
+
.split("\n")
|
|
66
|
+
.map((l) => l.trim())
|
|
67
|
+
.filter(Boolean)
|
|
68
|
+
.map((l) => JSON.parse(l));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function doneIds(predPath) {
|
|
72
|
+
if (!existsSync(predPath)) return new Set();
|
|
73
|
+
const ids = new Set();
|
|
74
|
+
for (const row of loadJsonl(predPath)) if (row.instance_id) ids.add(row.instance_id);
|
|
75
|
+
return ids;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function selectInstances(all, { instances, limit }) {
|
|
79
|
+
let xs = all;
|
|
80
|
+
if (instances?.length) {
|
|
81
|
+
const want = new Set(instances);
|
|
82
|
+
xs = xs.filter((x) => want.has(x.instance_id));
|
|
83
|
+
}
|
|
84
|
+
if (limit && limit > 0) xs = xs.slice(0, limit);
|
|
85
|
+
return xs;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ---------- git + ada (impure) ----------
|
|
89
|
+
|
|
90
|
+
const cloneLocks = new Map(); // repo → in-flight clone promise (don't clone the same repo twice)
|
|
91
|
+
function git(args, opts = {}) {
|
|
92
|
+
return spawnSync("git", args, { encoding: "utf8", ...opts });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function ensureCache(repo) {
|
|
96
|
+
const bare = join(CACHE, `${repo.replace("/", "__")}.git`);
|
|
97
|
+
if (existsSync(bare)) return bare;
|
|
98
|
+
if (!cloneLocks.has(repo)) {
|
|
99
|
+
mkdirSync(CACHE, { recursive: true });
|
|
100
|
+
cloneLocks.set(
|
|
101
|
+
repo,
|
|
102
|
+
new Promise((res, rej) => {
|
|
103
|
+
const p = spawn("git", ["clone", "--bare", `https://github.com/${repo}.git`, bare], { stdio: "inherit" });
|
|
104
|
+
p.on("exit", (code) => (code === 0 ? res(bare) : rej(new Error(`clone ${repo} failed (${code})`))));
|
|
105
|
+
p.on("error", rej);
|
|
106
|
+
}),
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
return cloneLocks.get(repo);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function prepInstance(repo, baseCommit, dir) {
|
|
113
|
+
const bare = await ensureCache(repo);
|
|
114
|
+
rmSync(dir, { recursive: true, force: true });
|
|
115
|
+
// --shared: instance dirs reuse the cache's objects (cheap, isolated working trees). Safe because
|
|
116
|
+
// we delete each dir before the cache is ever pruned.
|
|
117
|
+
let r = git(["clone", "--shared", "--no-checkout", bare, dir]);
|
|
118
|
+
if (r.status !== 0) throw new Error(`clone --shared failed: ${r.stderr}`);
|
|
119
|
+
r = git(["-C", dir, "checkout", "--detach", baseCommit]);
|
|
120
|
+
if (r.status !== 0) throw new Error(`checkout ${baseCommit.slice(0, 8)} failed: ${r.stderr}`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function diffPatch(dir) {
|
|
124
|
+
git(["-C", dir, "add", "-A"]);
|
|
125
|
+
const r = git(["-C", dir, "diff", "--cached", "--no-color"]);
|
|
126
|
+
return r.stdout ?? "";
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function runAda(adaBin, prompt, cwd, model, timeoutMs) {
|
|
130
|
+
return new Promise((res) => {
|
|
131
|
+
const child = spawn(process.execPath, [adaBin, "-p", prompt, "--model", model, "--json"], { cwd, env: process.env });
|
|
132
|
+
let out = "";
|
|
133
|
+
let err = "";
|
|
134
|
+
let timedOut = false;
|
|
135
|
+
const timer = setTimeout(() => {
|
|
136
|
+
timedOut = true;
|
|
137
|
+
child.kill("SIGKILL");
|
|
138
|
+
}, timeoutMs);
|
|
139
|
+
child.stdout.on("data", (d) => (out += d));
|
|
140
|
+
child.stderr.on("data", (d) => (err += d));
|
|
141
|
+
child.on("exit", (code) => {
|
|
142
|
+
clearTimeout(timer);
|
|
143
|
+
let usage = "";
|
|
144
|
+
const line = out.split("\n").reverse().find((l) => l.trim().startsWith("{"));
|
|
145
|
+
try {
|
|
146
|
+
usage = line ? JSON.parse(line).usage ?? "" : "";
|
|
147
|
+
} catch {
|
|
148
|
+
/* ignore */
|
|
149
|
+
}
|
|
150
|
+
res({ code, timedOut, usage, err: err.slice(-500) });
|
|
151
|
+
});
|
|
152
|
+
child.on("error", (e) => {
|
|
153
|
+
clearTimeout(timer);
|
|
154
|
+
res({ code: -1, timedOut, usage: "", err: String(e) });
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ---------- run ----------
|
|
160
|
+
|
|
161
|
+
async function pool(items, n, worker) {
|
|
162
|
+
const q = [...items.entries()];
|
|
163
|
+
const runners = Array.from({ length: Math.min(n, q.length) }, async () => {
|
|
164
|
+
for (;;) {
|
|
165
|
+
const next = q.shift();
|
|
166
|
+
if (!next) return;
|
|
167
|
+
await worker(next[1], next[0]);
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
await Promise.all(runners);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async function main(f) {
|
|
174
|
+
if (!f.dataset || !f.model) {
|
|
175
|
+
console.error("usage: node bench/swebench.mjs --dataset <verified.jsonl> --model <id> [--out dir] [--limit N] [--instances a,b] [--concurrency 2] [--timeout 1200]");
|
|
176
|
+
process.exit(2);
|
|
177
|
+
}
|
|
178
|
+
const adaBin = f.ada || ADA_BIN;
|
|
179
|
+
const outDir = resolve(f.out);
|
|
180
|
+
mkdirSync(outDir, { recursive: true });
|
|
181
|
+
const predPath = join(outDir, "predictions.jsonl");
|
|
182
|
+
const metaPath = join(outDir, "meta.jsonl");
|
|
183
|
+
|
|
184
|
+
const already = doneIds(predPath);
|
|
185
|
+
const todo = selectInstances(loadJsonl(f.dataset), f).filter((x) => !already.has(x.instance_id));
|
|
186
|
+
console.error(`ada SWE-bench · model=${f.model} · ${todo.length} instances (${already.size} already done) · concurrency=${f.concurrency} → ${outDir}`);
|
|
187
|
+
|
|
188
|
+
let done = 0;
|
|
189
|
+
let nonEmpty = 0;
|
|
190
|
+
await pool(todo, f.concurrency, async (inst) => {
|
|
191
|
+
const dir = join(CACHE, "wt", inst.instance_id);
|
|
192
|
+
const t0 = Date.now();
|
|
193
|
+
let patch = "";
|
|
194
|
+
let note = "";
|
|
195
|
+
try {
|
|
196
|
+
await prepInstance(inst.repo, inst.base_commit, dir);
|
|
197
|
+
const r = await runAda(adaBin, buildPrompt(inst.repo, inst.problem_statement), dir, f.model, f.timeout * 1000);
|
|
198
|
+
patch = diffPatch(dir);
|
|
199
|
+
note = r.timedOut ? "timeout" : r.code === 0 ? `usage:${r.usage}` : `exit ${r.code}: ${r.err}`;
|
|
200
|
+
} catch (e) {
|
|
201
|
+
note = `error: ${e instanceof Error ? e.message : e}`;
|
|
202
|
+
} finally {
|
|
203
|
+
rmSync(dir, { recursive: true, force: true });
|
|
204
|
+
}
|
|
205
|
+
appendFileSync(predPath, `${predictionLine(inst.instance_id, f.model, patch)}\n`);
|
|
206
|
+
appendFileSync(metaPath, `${JSON.stringify({ instance_id: inst.instance_id, seconds: Math.round((Date.now() - t0) / 1000), patch_bytes: patch.length, note })}\n`);
|
|
207
|
+
done++;
|
|
208
|
+
if (patch.trim()) nonEmpty++;
|
|
209
|
+
console.error(` [${done}/${todo.length}] ${inst.instance_id} · ${patch.length}B patch · ${note.slice(0, 60)}`);
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
console.error(`\nwrote ${predPath}\n${done} run, ${nonEmpty} produced a non-empty patch. Score with the official harness — see bench/README.md.`);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ---------- selftest ----------
|
|
216
|
+
|
|
217
|
+
function runSelftest() {
|
|
218
|
+
const a = parseArgs(["--dataset", "d.jsonl", "--model", "m", "--limit", "3", "--instances", "x,y", "--concurrency", "4"]);
|
|
219
|
+
assert.equal(a.dataset, "d.jsonl");
|
|
220
|
+
assert.equal(a.model, "m");
|
|
221
|
+
assert.equal(a.limit, 3);
|
|
222
|
+
assert.deepEqual(a.instances, ["x", "y"]);
|
|
223
|
+
assert.equal(a.concurrency, 4);
|
|
224
|
+
|
|
225
|
+
const p = buildPrompt("django/django", "Boom on empty queryset.");
|
|
226
|
+
assert.ok(p.includes("django/django") && p.includes("Boom on empty queryset.") && /do not add or modify tests/i.test(p), "prompt includes repo, issue, no-tests rule");
|
|
227
|
+
|
|
228
|
+
const line = predictionLine("django__django-123", "claude-opus-4-8", "diff --git a b");
|
|
229
|
+
const obj = JSON.parse(line);
|
|
230
|
+
assert.deepEqual(Object.keys(obj).sort(), ["instance_id", "model_name_or_path", "model_patch"]);
|
|
231
|
+
assert.equal(obj.instance_id, "django__django-123");
|
|
232
|
+
|
|
233
|
+
const all = [{ instance_id: "a" }, { instance_id: "b" }, { instance_id: "c" }];
|
|
234
|
+
assert.deepEqual(selectInstances(all, { instances: ["b", "c"], limit: 1 }).map((x) => x.instance_id), ["b"]);
|
|
235
|
+
assert.deepEqual(selectInstances(all, { limit: 2 }).map((x) => x.instance_id), ["a", "b"]);
|
|
236
|
+
|
|
237
|
+
console.log("swebench selftest OK");
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const flags = parseArgs(process.argv.slice(2));
|
|
241
|
+
if (flags.selftest) runSelftest();
|
|
242
|
+
else await main(flags);
|
package/bin/ada.mjs
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// ada — terminal coding agent. Registers the tsx loader so the TypeScript client
|
|
3
|
+
// runs with no build step, then hands off to the CLI entrypoint (which self-runs).
|
|
4
|
+
import { register } from "tsx/esm/api";
|
|
5
|
+
|
|
6
|
+
register();
|
|
7
|
+
await import(new URL("../src/client/cli.ts", import.meta.url));
|