franky-agent 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. franky_agent-0.0.2/LICENSE +21 -0
  2. franky_agent-0.0.2/PKG-INFO +328 -0
  3. franky_agent-0.0.2/README.md +301 -0
  4. franky_agent-0.0.2/franky/__init__.py +24 -0
  5. franky_agent-0.0.2/franky/_install.py +113 -0
  6. franky_agent-0.0.2/franky/cli.py +615 -0
  7. franky_agent-0.0.2/franky/config.py +192 -0
  8. franky_agent-0.0.2/franky/container.py +450 -0
  9. franky_agent-0.0.2/franky/economics.py +199 -0
  10. franky_agent-0.0.2/franky/egress.py +88 -0
  11. franky_agent-0.0.2/franky/engine.py +286 -0
  12. franky_agent-0.0.2/franky/jira.py +176 -0
  13. franky_agent-0.0.2/franky/persona.md +12 -0
  14. franky_agent-0.0.2/franky/prompt.py +168 -0
  15. franky_agent-0.0.2/franky/task.py +114 -0
  16. franky_agent-0.0.2/franky/update_check.py +388 -0
  17. franky_agent-0.0.2/franky/userconfig.py +243 -0
  18. franky_agent-0.0.2/franky_agent.egg-info/PKG-INFO +328 -0
  19. franky_agent-0.0.2/franky_agent.egg-info/SOURCES.txt +38 -0
  20. franky_agent-0.0.2/franky_agent.egg-info/dependency_links.txt +1 -0
  21. franky_agent-0.0.2/franky_agent.egg-info/entry_points.txt +2 -0
  22. franky_agent-0.0.2/franky_agent.egg-info/requires.txt +8 -0
  23. franky_agent-0.0.2/franky_agent.egg-info/top_level.txt +1 -0
  24. franky_agent-0.0.2/pyproject.toml +51 -0
  25. franky_agent-0.0.2/setup.cfg +4 -0
  26. franky_agent-0.0.2/tests/test_cli.py +1009 -0
  27. franky_agent-0.0.2/tests/test_config.py +264 -0
  28. franky_agent-0.0.2/tests/test_container.py +551 -0
  29. franky_agent-0.0.2/tests/test_doc_coherence.py +172 -0
  30. franky_agent-0.0.2/tests/test_economics.py +226 -0
  31. franky_agent-0.0.2/tests/test_egress.py +127 -0
  32. franky_agent-0.0.2/tests/test_engine.py +243 -0
  33. franky_agent-0.0.2/tests/test_eval.py +258 -0
  34. franky_agent-0.0.2/tests/test_install.py +182 -0
  35. franky_agent-0.0.2/tests/test_jira.py +325 -0
  36. franky_agent-0.0.2/tests/test_prompt.py +181 -0
  37. franky_agent-0.0.2/tests/test_release.py +487 -0
  38. franky_agent-0.0.2/tests/test_task.py +145 -0
  39. franky_agent-0.0.2/tests/test_update_check.py +493 -0
  40. franky_agent-0.0.2/tests/test_userconfig.py +320 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Viet Tran
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,328 @@
1
+ Metadata-Version: 2.4
2
+ Name: franky-agent
3
+ Version: 0.0.2
4
+ Summary: Franky - a lean personal coding agent that builds in a hardened container and opens a PR.
5
+ Author: Viet Tran
6
+ License: MIT
7
+ Keywords: coding-agent,cli,docker,github,automation,pull-request
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Build Tools
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: click>=8.1
22
+ Requires-Dist: tomli>=2.0; python_version < "3.11"
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == "dev"
25
+ Requires-Dist: ruff<0.16,>=0.15; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # Franky
29
+
30
+ A lean personal coding agent. Hand it a GitHub issue or a sentence, and it runs a
31
+ coding agent inside a fresh, hardened Docker container that clones the repo,
32
+ implements the change, and opens a pull request for you to review.
33
+
34
+ ```
35
+ franky build https://github.com/you/repo/issues/42
36
+ franky build jira FOO-123 --repo you/repo
37
+ franky build "add a --json flag to the export command" --repo you/repo
38
+ franky build "fix the flaky retry test" --repo you/repo --engine claude
39
+
40
+ # Got review comments or red CI on a Franky PR? Iterate on it with follow-up commits.
41
+ franky iterate https://github.com/you/repo/pull/42
42
+ ```
43
+
44
+ The agent is autonomous inside the container. The safety gate is four layers:
45
+ a hardened container, a default-deny egress allowlist (the container reaches only
46
+ your provider + GitHub + registries, via a creds-blind proxy), a fail-closed
47
+ trusted-repo allowlist, and the fact that Franky opens a PR rather than merging -
48
+ a human still reviews every change.
49
+
50
+ ## Why
51
+
52
+ Most coding-agent wrappers either lock you into one vendor or run the agent
53
+ straight on your machine with your real credentials and shell. Franky does
54
+ neither: the engine is pluggable, and the agent only ever runs inside a
55
+ throwaway container with a narrowly scoped token.
56
+
57
+ ## Engines
58
+
59
+ Franky is vendor-neutral. The engine that runs inside the container is pluggable;
60
+ all ship in the one image.
61
+
62
+ | Engine | CLI | Auth | Notes |
63
+ |--------|-----|------|-------|
64
+ | `pi` (default) | `@earendil-works/pi-coding-agent` | BYOK provider key | MIT, 15+ providers (OpenRouter, Anthropic, OpenAI, Ollama, ...) |
65
+ | `claude` | `@anthropic-ai/claude-code` | `CLAUDE_CODE_OAUTH_TOKEN` | Most capable; uses your Claude subscription |
66
+ | `codex` | `@openai/codex` | `CODEX_API_KEY` or `OPENAI_API_KEY` | OpenAI Codex headless (`codex exec`); API-key auth only |
67
+
68
+ Select with `--engine pi|claude|codex`, or set `FRANKY_ENGINE`. Resolution order:
69
+ `--engine` flag > `FRANKY_ENGINE` > default `pi`.
70
+
71
+ ## Install
72
+
73
+ Franky is published to PyPI as `franky-agent` (the installed command is `franky`):
74
+
75
+ ```
76
+ uv tool install franky-agent
77
+ # or pipx:
78
+ pipx install franky-agent
79
+ # or:
80
+ pip install franky-agent
81
+ ```
82
+
83
+ On first run the CLI pulls the version-pinned, public GHCR images
84
+ (`ghcr.io/vietlabs-work/franky:X.Y.Z` and `ghcr.io/vietlabs-work/franky-proxy:X.Y.Z`),
85
+ so all you need is Docker - no registry login. (Point `FRANKY_GHCR_REPO` at a different
86
+ namespace if you host the images elsewhere.)
87
+
88
+ To move to a newer release later, run `franky update` - it detects how you
89
+ installed (uv tool / pipx / pip) and reinstalls the latest version from PyPI via the same
90
+ manager. `franky update --force` reinstalls even when already current. (A dev
91
+ checkout updates via `git`; `franky update` is a no-op there.)
92
+
93
+ **For local development**, skip GHCR and point at local builds:
94
+
95
+ ```
96
+ docker build -t franky .
97
+ docker build -t franky-proxy proxy/
98
+ export FRANKY_IMAGE=franky
99
+ export FRANKY_PROXY_IMAGE=franky-proxy
100
+ ```
101
+
102
+ Franky is agent-agnostic to develop, not just to run: `AGENTS.md` is the canonical
103
+ agent guide (build/test commands, architecture, the load-bearing invariants, how to
104
+ add an engine), so Codex, Cursor, pi, or Claude Code all start with the same context.
105
+ `CLAUDE.md` is a symlink to it.
106
+
107
+ ## Quickstart
108
+
109
+ 1. Install Docker. Images are pulled automatically from GHCR on first run (see
110
+ Install above). For local dev only, build them manually (see Install above).
111
+ 2. Install Franky:
112
+ ```
113
+ python3 -m venv .venv && .venv/bin/pip install -e .
114
+ ```
115
+ 3. Configure credentials with the interactive wizard:
116
+ ```
117
+ franky config init
118
+ ```
119
+ This writes `~/.franky/config` (mode 0600) and walks you through engine selection,
120
+ `FRANKY_ALLOWED_REPOS`, `GH_TOKEN`, and engine creds. You can also set individual
121
+ keys later:
122
+ ```
123
+ franky config set FRANKY_ALLOWED_REPOS
124
+ franky config set GH_TOKEN # secret - entered at a hidden prompt
125
+ franky config list # view the file (secrets masked)
126
+ franky config path # show where the file lives
127
+ ```
128
+ At minimum you need:
129
+ - `FRANKY_ALLOWED_REPOS` - the trusted-repo allowlist (see below).
130
+ - `GH_TOKEN` - scoped to contents + pull_requests on those repos.
131
+ - the selected engine's creds (a provider key for `pi`,
132
+ `CLAUDE_CODE_OAUTH_TOKEN` for `claude`, or `CODEX_API_KEY` / `OPENAI_API_KEY`
133
+ for `codex`).
134
+ - for JIRA tasks: `JIRA_BASE_URL`, `JIRA_EMAIL`, `JIRA_API_TOKEN` (host-side only,
135
+ never forwarded into the container).
136
+ 4. Run:
137
+ ```
138
+ franky build <gh-issue-url | jira KEY | "prose"> [--repo owner/repo] [--engine pi|claude|codex] [--plan-first]
139
+ ```
140
+
141
+ Each run writes a redacted log to `tasks/<timestamp>.log` and prints the PR URL.
142
+
143
+ `--plan-first` adds an opt-in approval gate for sensitive targets: Franky runs a
144
+ read-only planning pass, prints the plan, and waits for explicit confirmation
145
+ before it builds or opens a PR. Decline (or run non-interactively) and nothing is
146
+ written. The default stays autonomous - the sandbox plus PR review is the gate.
147
+
148
+ `franky build` also does a quick (~1s, cached) check for a newer release and
149
+ prints a one-line hint if one exists - it never blocks the build. Silence it with
150
+ `FRANKY_NO_UPDATE_CHECK=1`, or set `FRANKY_AUTO_UPDATE=1` to auto-install the new
151
+ release for your next run. (Both are host-CLI only; neither reaches the container.)
152
+
153
+ ## Iterating on a PR
154
+
155
+ Franky is no longer one-shot. When a PR it opened gets review comments or a red CI
156
+ check, point it back at the PR and it responds with **additive follow-up commits** on
157
+ the same branch:
158
+
159
+ ```
160
+ franky iterate https://github.com/you/repo/pull/42 [--engine pi|claude|codex]
161
+ ```
162
+
163
+ It runs the **same hardened, egress-controlled container** as `franky build`, but instead
164
+ of starting fresh it checks out the PR's existing branch, reads the review comments and
165
+ failing checks with `gh` (in-container, already allowlisted), addresses them, runs the
166
+ tests green, and pushes. It **never force-pushes, never rewrites history, never opens a new
167
+ PR, and never merges** - a human still reviews every change. The PR URL carries the repo, so
168
+ there is no `--repo` flag, and the repo allowlist gates it exactly like `build`.
169
+
170
+ Unlike `build` (which prints the new PR URL to stdout), `iterate` opens no new PR - on a
171
+ clean run it writes only an economics summary and a labeled completion line to stderr, and
172
+ nothing to stdout. Review the existing PR for the new commits. The redacted transcript still
173
+ lands in `tasks/<timestamp>.log`.
174
+
175
+ `iterate` is intended for Franky's **own** PRs. As a guardrail it is instructed to confirm
176
+ the PR's head branch is a `franky/*` branch in the same repo (not a fork) before touching
177
+ anything, and to stop otherwise. This is a prompt-level guard in the same register as the
178
+ "never merge" rule (the agent is autonomous); the hard bounds remain the repo allowlist, the
179
+ egress cage, and PR-not-merge. See the Security section.
180
+
181
+ ## Security
182
+
183
+ Read this before pointing Franky at anything.
184
+
185
+ **Container hardening is load-bearing.** Because the agent runs autonomously
186
+ (claude with `--dangerously-skip-permissions`, codex with
187
+ `--dangerously-bypass-approvals-and-sandbox`, pi with its default tools), the
188
+ OS-level isolation is what bounds it, not tool-permission prompts. Franky runs the
189
+ container with:
190
+
191
+ - `--cap-drop=ALL`, then adds back only `CAP_SETUID`/`CAP_SETGID` (needed by the
192
+ rootless Docker daemon - see "Docker-in-Docker" below)
193
+ - `--read-only` root filesystem; writable paths only via `--tmpfs` (the clone, the
194
+ agent's HOME, and the rootless Docker data root, each owned by the non-root uid)
195
+ - `--pids-limit` and `--memory` caps (with `--memory-swap` = `--memory`, no swap)
196
+ - a non-root user (uid 1001) baked into the image
197
+ - **no Docker socket mount and no host bind mounts** - the repo is cloned inside
198
+ the container and the nested Docker daemon is rootless, so the agent never
199
+ touches your filesystem or your host's Docker daemon
200
+ - only the selected engine's required env vars passed in; nothing else
201
+
202
+ ### Docker-in-Docker (always on)
203
+
204
+ Many repos cannot run their test suite without Docker (compose-based integration
205
+ tests, testcontainers, a `docker build` step). So every Franky container runs its
206
+ **own rootless Docker daemon** - the agent can `docker build`, `docker compose up`
207
+ test infra, and run testcontainers entirely inside the sandbox. Nothing to enable;
208
+ it is always available.
209
+
210
+ This is rootless DinD (a daemon running as the non-root `franky` user inside its
211
+ own user namespace), **not** a mounted host Docker socket and **not** `--privileged`.
212
+ It needs a few specific, minimal relaxations of the locked profile, applied to every
213
+ task and verified on Docker Desktop for Mac:
214
+
215
+ - `--security-opt=no-new-privileges` is **dropped** (it blocks the setuid uid-map
216
+ helpers rootless Docker needs to start),
217
+ - `--security-opt=systempaths=unconfined` (unmasks `/proc` so the nested runtime can
218
+ mount it for inner containers - far narrower than `--privileged`/`seccomp=unconfined`),
219
+ - `CAP_SETUID`/`CAP_SETGID` added back on top of `--cap-drop=ALL`, and `/dev/net/tun`
220
+ for the rootless network stack.
221
+
222
+ The blast radius stays bounded by everything else (rootless user namespace, read-only
223
+ root, the egress cage below, no host FS, repo allowlist, PR-not-merge). The nested
224
+ daemon's image pulls and `docker build` fetches go **through the same egress proxy**
225
+ (it inherits `HTTP(S)_PROXY`), and inner containers have no route to the internet
226
+ except that proxy - verified: an off-allowlist `docker build` `FROM` or `RUN` fetch is
227
+ refused by the proxy, and a nested container's direct egress has no route out.
228
+
229
+ ### Egress control
230
+
231
+ The big v0 hole - a prompt-injected agent exfiltrating the creds it carries -
232
+ is now closed by a default-deny egress allowlist. The task container runs on a
233
+ Docker `--internal` network with NO route to the internet; its only peer is a
234
+ Squid proxy enforcing a domain allowlist.
235
+
236
+ ```
237
+ Docker --internal network (no internet route)
238
+ +-----------------------------------------------------------------+
239
+ | |
240
+ | [ task container ] --HTTP(S)_PROXY--> [ franky-proxy (Squid) ]-+--> allowlisted
241
+ | --dns 127.0.0.1 default-deny allowlist | hosts only
242
+ | (no creds on argv) (sees NO creds) |
243
+ +-----------------------------------------------------------------+
244
+ ```
245
+
246
+ - **Blind CONNECT, no creds at the proxy.** Egress is HTTPS-only (port 443):
247
+ Squid tunnels it with a blind CONNECT (no TLS termination), so it never sees the
248
+ bytes - your Claude token or BYOK key tunnel through encrypted and are never
249
+ visible to the proxy. Plain HTTP (port 80) is denied outright, so there is no
250
+ cleartext, proxy-visible path even to an allowlisted host.
251
+ - **DNS is killed in the task container** (`--dns 127.0.0.1`), so a hostile agent
252
+ cannot resolve or reach an off-allowlist host directly; only the proxy resolves.
253
+ - **Fail-closed.** Franky refuses to start the task unless the proxy is confirmed
254
+ healthy, and the proxy refuses to start with an empty or malformed allowlist.
255
+ - **The allowlist** covers: your engine's provider host (e.g. `api.anthropic.com`,
256
+ `openrouter.ai`, `api.openai.com`), GitHub (clone/push/PR), the npm + PyPI
257
+ registries, and - because
258
+ Docker-in-Docker is always on - a broad set of well-known **container image
259
+ registries** (Docker Hub + CDN, GHCR, GCR/Artifact Registry, `registry.k8s.io`,
260
+ Quay, ECR Public, MCR, GitLab, plus the CDNs they serve layer blobs from). Add
261
+ extra hosts with `FRANKY_EXTRA_ALLOWED_DOMAINS` (comma-separated).
262
+
263
+ **Residual risk.** The allowlisted hosts are high-trust, but the agent can still
264
+ reach GitHub, your model provider, the package registries, and the container
265
+ registries above - so a determined injection could still smuggle data to one of
266
+ those (e.g. a gist, an issue comment). Treat allowlisted destinations as trusted,
267
+ not inert. Two consequences of always-on DinD specifically:
268
+
269
+ - **Wider reachable set + a relaxed profile on every task** (incl. non-Docker ones):
270
+ the registry allowlist is broad (notably `.cloudfront.net`, a shared CDN), and the
271
+ hardening relaxations above apply universally. This is a deliberate trade for
272
+ "building/testing just works".
273
+ - **The agent can move its own creds into nested containers** (e.g. `docker run -e
274
+ GH_TOKEN ...`). The egress allowlist still bounds *where* anything can go and
275
+ PR-not-merge still bounds the damage, but the secret is no longer confined to a
276
+ single process. There is also no per-inner-container resource limit and no
277
+ cross-task concurrency cap - the outer `--memory`/`--pids` cap (~8 GB, tmpfs image
278
+ storage is RAM) bounds one task's whole container tree.
279
+
280
+ v0 mitigations, still in force:
281
+
282
+ 1. **Fail-closed trusted-repo allowlist.** Franky refuses any repo not in
283
+ `FRANKY_ALLOWED_REPOS`, and refuses everything if that var is unset. This
284
+ limits injection to content you already trust.
285
+
286
+ The allowlist supports per-segment glob patterns (case-insensitive):
287
+ - `my-org/my-repo` - exact match
288
+ - `my-org/*` - every repo in `my-org`
289
+ - `my-org/team-*` - repos with a name prefix
290
+ - `*` - every repo the `GH_TOKEN` can reach (its **full scope** - a conscious opt-in,
291
+ not the default; use only if the token is already narrowly scoped)
292
+ 2. **Scope your tokens narrowly.** Give `GH_TOKEN` only contents + pull_requests
293
+ on the target repos. Prefer a low-spend or separate API key for `pi`.
294
+ 3. **PR, not merge.** Franky only opens PRs. You review before anything lands.
295
+ `franky iterate` follows the same rule: it only pushes additive commits to an
296
+ existing PR's branch (never force-push, never merge, never a new PR), and the
297
+ "act only on a `franky/*` branch in the same repo" check is prompt-level - so
298
+ point `iterate` only at PRs Franky itself opened, in an allowlisted repo.
299
+
300
+ **GitHub Actions warning.** Opening a PR can trigger workflows. A PR built from an
301
+ attacker-influenced issue could run attacker-influenced workflow code with your
302
+ repo's Actions secrets. Review workflow changes in the PR diff, and consider
303
+ requiring approval for workflow runs on PRs.
304
+
305
+ ## Evals
306
+
307
+ Agent quality is probabilistic, so changes to the persona, prompt, model, or profile
308
+ should be gated on a measured **pass-rate**, not a hunch. The eval harness runs a golden
309
+ task set through the *real* Franky flow N times and reports pass-rate, plus a comparison
310
+ mode that reports the delta between two configs (e.g. one engine vs another).
311
+
312
+ It is **opt-in and out-of-band** (like the manual egress check) - it needs real Docker +
313
+ creds + a throwaway sandbox repo, so it is not part of the fast hermetic unit suite. Point
314
+ `evals/tasks.json` at your sandbox repo and run:
315
+
316
+ ```
317
+ make eval ARGS="-n 3 --engine pi --compare-engine codex"
318
+ ```
319
+
320
+ See [`evals/README.md`](evals/README.md) for setup, the task schema, and the success
321
+ checkers.
322
+
323
+ ## Status
324
+
325
+ v0.0.2. Real end-to-end runs need live engine credentials, supplied out-of-band by
326
+ the operator. The pieces under test here are the container hardening, the egress
327
+ allowlist + proxy orchestration, the secret redaction, the trusted-repo allowlist,
328
+ and the engine abstraction.
@@ -0,0 +1,301 @@
1
+ # Franky
2
+
3
+ A lean personal coding agent. Hand it a GitHub issue or a sentence, and it runs a
4
+ coding agent inside a fresh, hardened Docker container that clones the repo,
5
+ implements the change, and opens a pull request for you to review.
6
+
7
+ ```
8
+ franky build https://github.com/you/repo/issues/42
9
+ franky build jira FOO-123 --repo you/repo
10
+ franky build "add a --json flag to the export command" --repo you/repo
11
+ franky build "fix the flaky retry test" --repo you/repo --engine claude
12
+
13
+ # Got review comments or red CI on a Franky PR? Iterate on it with follow-up commits.
14
+ franky iterate https://github.com/you/repo/pull/42
15
+ ```
16
+
17
+ The agent is autonomous inside the container. The safety gate is four layers:
18
+ a hardened container, a default-deny egress allowlist (the container reaches only
19
+ your provider + GitHub + registries, via a creds-blind proxy), a fail-closed
20
+ trusted-repo allowlist, and the fact that Franky opens a PR rather than merging -
21
+ a human still reviews every change.
22
+
23
+ ## Why
24
+
25
+ Most coding-agent wrappers either lock you into one vendor or run the agent
26
+ straight on your machine with your real credentials and shell. Franky does
27
+ neither: the engine is pluggable, and the agent only ever runs inside a
28
+ throwaway container with a narrowly scoped token.
29
+
30
+ ## Engines
31
+
32
+ Franky is vendor-neutral. The engine that runs inside the container is pluggable;
33
+ all ship in the one image.
34
+
35
+ | Engine | CLI | Auth | Notes |
36
+ |--------|-----|------|-------|
37
+ | `pi` (default) | `@earendil-works/pi-coding-agent` | BYOK provider key | MIT, 15+ providers (OpenRouter, Anthropic, OpenAI, Ollama, ...) |
38
+ | `claude` | `@anthropic-ai/claude-code` | `CLAUDE_CODE_OAUTH_TOKEN` | Most capable; uses your Claude subscription |
39
+ | `codex` | `@openai/codex` | `CODEX_API_KEY` or `OPENAI_API_KEY` | OpenAI Codex headless (`codex exec`); API-key auth only |
40
+
41
+ Select with `--engine pi|claude|codex`, or set `FRANKY_ENGINE`. Resolution order:
42
+ `--engine` flag > `FRANKY_ENGINE` > default `pi`.
43
+
44
+ ## Install
45
+
46
+ Franky is published to PyPI as `franky-agent` (the installed command is `franky`):
47
+
48
+ ```
49
+ uv tool install franky-agent
50
+ # or pipx:
51
+ pipx install franky-agent
52
+ # or:
53
+ pip install franky-agent
54
+ ```
55
+
56
+ On first run the CLI pulls the version-pinned, public GHCR images
57
+ (`ghcr.io/vietlabs-work/franky:X.Y.Z` and `ghcr.io/vietlabs-work/franky-proxy:X.Y.Z`),
58
+ so all you need is Docker - no registry login. (Point `FRANKY_GHCR_REPO` at a different
59
+ namespace if you host the images elsewhere.)
60
+
61
+ To move to a newer release later, run `franky update` - it detects how you
62
+ installed (uv tool / pipx / pip) and reinstalls the latest version from PyPI via the same
63
+ manager. `franky update --force` reinstalls even when already current. (A dev
64
+ checkout updates via `git`; `franky update` is a no-op there.)
65
+
66
+ **For local development**, skip GHCR and point at local builds:
67
+
68
+ ```
69
+ docker build -t franky .
70
+ docker build -t franky-proxy proxy/
71
+ export FRANKY_IMAGE=franky
72
+ export FRANKY_PROXY_IMAGE=franky-proxy
73
+ ```
74
+
75
+ Franky is agent-agnostic to develop, not just to run: `AGENTS.md` is the canonical
76
+ agent guide (build/test commands, architecture, the load-bearing invariants, how to
77
+ add an engine), so Codex, Cursor, pi, or Claude Code all start with the same context.
78
+ `CLAUDE.md` is a symlink to it.
79
+
80
+ ## Quickstart
81
+
82
+ 1. Install Docker. Images are pulled automatically from GHCR on first run (see
83
+ Install above). For local dev only, build them manually (see Install above).
84
+ 2. Install Franky:
85
+ ```
86
+ python3 -m venv .venv && .venv/bin/pip install -e .
87
+ ```
88
+ 3. Configure credentials with the interactive wizard:
89
+ ```
90
+ franky config init
91
+ ```
92
+ This writes `~/.franky/config` (mode 0600) and walks you through engine selection,
93
+ `FRANKY_ALLOWED_REPOS`, `GH_TOKEN`, and engine creds. You can also set individual
94
+ keys later:
95
+ ```
96
+ franky config set FRANKY_ALLOWED_REPOS
97
+ franky config set GH_TOKEN # secret - entered at a hidden prompt
98
+ franky config list # view the file (secrets masked)
99
+ franky config path # show where the file lives
100
+ ```
101
+ At minimum you need:
102
+ - `FRANKY_ALLOWED_REPOS` - the trusted-repo allowlist (see below).
103
+ - `GH_TOKEN` - scoped to contents + pull_requests on those repos.
104
+ - the selected engine's creds (a provider key for `pi`,
105
+ `CLAUDE_CODE_OAUTH_TOKEN` for `claude`, or `CODEX_API_KEY` / `OPENAI_API_KEY`
106
+ for `codex`).
107
+ - for JIRA tasks: `JIRA_BASE_URL`, `JIRA_EMAIL`, `JIRA_API_TOKEN` (host-side only,
108
+ never forwarded into the container).
109
+ 4. Run:
110
+ ```
111
+ franky build <gh-issue-url | jira KEY | "prose"> [--repo owner/repo] [--engine pi|claude|codex] [--plan-first]
112
+ ```
113
+
114
+ Each run writes a redacted log to `tasks/<timestamp>.log` and prints the PR URL.
115
+
116
+ `--plan-first` adds an opt-in approval gate for sensitive targets: Franky runs a
117
+ read-only planning pass, prints the plan, and waits for explicit confirmation
118
+ before it builds or opens a PR. Decline (or run non-interactively) and nothing is
119
+ written. The default stays autonomous - the sandbox plus PR review is the gate.
120
+
121
+ `franky build` also does a quick (~1s, cached) check for a newer release and
122
+ prints a one-line hint if one exists - it never blocks the build. Silence it with
123
+ `FRANKY_NO_UPDATE_CHECK=1`, or set `FRANKY_AUTO_UPDATE=1` to auto-install the new
124
+ release for your next run. (Both are host-CLI only; neither reaches the container.)
125
+
126
+ ## Iterating on a PR
127
+
128
+ Franky is no longer one-shot. When a PR it opened gets review comments or a red CI
129
+ check, point it back at the PR and it responds with **additive follow-up commits** on
130
+ the same branch:
131
+
132
+ ```
133
+ franky iterate https://github.com/you/repo/pull/42 [--engine pi|claude|codex]
134
+ ```
135
+
136
+ It runs the **same hardened, egress-controlled container** as `franky build`, but instead
137
+ of starting fresh it checks out the PR's existing branch, reads the review comments and
138
+ failing checks with `gh` (in-container, already allowlisted), addresses them, runs the
139
+ tests green, and pushes. It **never force-pushes, never rewrites history, never opens a new
140
+ PR, and never merges** - a human still reviews every change. The PR URL carries the repo, so
141
+ there is no `--repo` flag, and the repo allowlist gates it exactly like `build`.
142
+
143
+ Unlike `build` (which prints the new PR URL to stdout), `iterate` opens no new PR - on a
144
+ clean run it writes only an economics summary and a labeled completion line to stderr, and
145
+ nothing to stdout. Review the existing PR for the new commits. The redacted transcript still
146
+ lands in `tasks/<timestamp>.log`.
147
+
148
+ `iterate` is intended for Franky's **own** PRs. As a guardrail it is instructed to confirm
149
+ the PR's head branch is a `franky/*` branch in the same repo (not a fork) before touching
150
+ anything, and to stop otherwise. This is a prompt-level guard in the same register as the
151
+ "never merge" rule (the agent is autonomous); the hard bounds remain the repo allowlist, the
152
+ egress cage, and PR-not-merge. See the Security section.
153
+
154
+ ## Security
155
+
156
+ Read this before pointing Franky at anything.
157
+
158
+ **Container hardening is load-bearing.** Because the agent runs autonomously
159
+ (claude with `--dangerously-skip-permissions`, codex with
160
+ `--dangerously-bypass-approvals-and-sandbox`, pi with its default tools), the
161
+ OS-level isolation is what bounds it, not tool-permission prompts. Franky runs the
162
+ container with:
163
+
164
+ - `--cap-drop=ALL`, then adds back only `CAP_SETUID`/`CAP_SETGID` (needed by the
165
+ rootless Docker daemon - see "Docker-in-Docker" below)
166
+ - `--read-only` root filesystem; writable paths only via `--tmpfs` (the clone, the
167
+ agent's HOME, and the rootless Docker data root, each owned by the non-root uid)
168
+ - `--pids-limit` and `--memory` caps (with `--memory-swap` = `--memory`, no swap)
169
+ - a non-root user (uid 1001) baked into the image
170
+ - **no Docker socket mount and no host bind mounts** - the repo is cloned inside
171
+ the container and the nested Docker daemon is rootless, so the agent never
172
+ touches your filesystem or your host's Docker daemon
173
+ - only the selected engine's required env vars passed in; nothing else
174
+
175
+ ### Docker-in-Docker (always on)
176
+
177
+ Many repos cannot run their test suite without Docker (compose-based integration
178
+ tests, testcontainers, a `docker build` step). So every Franky container runs its
179
+ **own rootless Docker daemon** - the agent can `docker build`, `docker compose up`
180
+ test infra, and run testcontainers entirely inside the sandbox. Nothing to enable;
181
+ it is always available.
182
+
183
+ This is rootless DinD (a daemon running as the non-root `franky` user inside its
184
+ own user namespace), **not** a mounted host Docker socket and **not** `--privileged`.
185
+ It needs a few specific, minimal relaxations of the locked profile, applied to every
186
+ task and verified on Docker Desktop for Mac:
187
+
188
+ - `--security-opt=no-new-privileges` is **dropped** (it blocks the setuid uid-map
189
+ helpers rootless Docker needs to start),
190
+ - `--security-opt=systempaths=unconfined` (unmasks `/proc` so the nested runtime can
191
+ mount it for inner containers - far narrower than `--privileged`/`seccomp=unconfined`),
192
+ - `CAP_SETUID`/`CAP_SETGID` added back on top of `--cap-drop=ALL`, and `/dev/net/tun`
193
+ for the rootless network stack.
194
+
195
+ The blast radius stays bounded by everything else (rootless user namespace, read-only
196
+ root, the egress cage below, no host FS, repo allowlist, PR-not-merge). The nested
197
+ daemon's image pulls and `docker build` fetches go **through the same egress proxy**
198
+ (it inherits `HTTP(S)_PROXY`), and inner containers have no route to the internet
199
+ except that proxy - verified: an off-allowlist `docker build` `FROM` or `RUN` fetch is
200
+ refused by the proxy, and a nested container's direct egress has no route out.
201
+
202
+ ### Egress control
203
+
204
+ The big v0 hole - a prompt-injected agent exfiltrating the creds it carries -
205
+ is now closed by a default-deny egress allowlist. The task container runs on a
206
+ Docker `--internal` network with NO route to the internet; its only peer is a
207
+ Squid proxy enforcing a domain allowlist.
208
+
209
+ ```
210
+ Docker --internal network (no internet route)
211
+ +-----------------------------------------------------------------+
212
+ | |
213
+ | [ task container ] --HTTP(S)_PROXY--> [ franky-proxy (Squid) ]-+--> allowlisted
214
+ | --dns 127.0.0.1 default-deny allowlist | hosts only
215
+ | (no creds on argv) (sees NO creds) |
216
+ +-----------------------------------------------------------------+
217
+ ```
218
+
219
+ - **Blind CONNECT, no creds at the proxy.** Egress is HTTPS-only (port 443):
220
+ Squid tunnels it with a blind CONNECT (no TLS termination), so it never sees the
221
+ bytes - your Claude token or BYOK key tunnel through encrypted and are never
222
+ visible to the proxy. Plain HTTP (port 80) is denied outright, so there is no
223
+ cleartext, proxy-visible path even to an allowlisted host.
224
+ - **DNS is killed in the task container** (`--dns 127.0.0.1`), so a hostile agent
225
+ cannot resolve or reach an off-allowlist host directly; only the proxy resolves.
226
+ - **Fail-closed.** Franky refuses to start the task unless the proxy is confirmed
227
+ healthy, and the proxy refuses to start with an empty or malformed allowlist.
228
+ - **The allowlist** covers: your engine's provider host (e.g. `api.anthropic.com`,
229
+ `openrouter.ai`, `api.openai.com`), GitHub (clone/push/PR), the npm + PyPI
230
+ registries, and - because
231
+ Docker-in-Docker is always on - a broad set of well-known **container image
232
+ registries** (Docker Hub + CDN, GHCR, GCR/Artifact Registry, `registry.k8s.io`,
233
+ Quay, ECR Public, MCR, GitLab, plus the CDNs they serve layer blobs from). Add
234
+ extra hosts with `FRANKY_EXTRA_ALLOWED_DOMAINS` (comma-separated).
235
+
236
+ **Residual risk.** The allowlisted hosts are high-trust, but the agent can still
237
+ reach GitHub, your model provider, the package registries, and the container
238
+ registries above - so a determined injection could still smuggle data to one of
239
+ those (e.g. a gist, an issue comment). Treat allowlisted destinations as trusted,
240
+ not inert. Two consequences of always-on DinD specifically:
241
+
242
+ - **Wider reachable set + a relaxed profile on every task** (incl. non-Docker ones):
243
+ the registry allowlist is broad (notably `.cloudfront.net`, a shared CDN), and the
244
+ hardening relaxations above apply universally. This is a deliberate trade for
245
+ "building/testing just works".
246
+ - **The agent can move its own creds into nested containers** (e.g. `docker run -e
247
+ GH_TOKEN ...`). The egress allowlist still bounds *where* anything can go and
248
+ PR-not-merge still bounds the damage, but the secret is no longer confined to a
249
+ single process. There is also no per-inner-container resource limit and no
250
+ cross-task concurrency cap - the outer `--memory`/`--pids` cap (~8 GB, tmpfs image
251
+ storage is RAM) bounds one task's whole container tree.
252
+
253
+ v0 mitigations, still in force:
254
+
255
+ 1. **Fail-closed trusted-repo allowlist.** Franky refuses any repo not in
256
+ `FRANKY_ALLOWED_REPOS`, and refuses everything if that var is unset. This
257
+ limits injection to content you already trust.
258
+
259
+ The allowlist supports per-segment glob patterns (case-insensitive):
260
+ - `my-org/my-repo` - exact match
261
+ - `my-org/*` - every repo in `my-org`
262
+ - `my-org/team-*` - repos with a name prefix
263
+ - `*` - every repo the `GH_TOKEN` can reach (its **full scope** - a conscious opt-in,
264
+ not the default; use only if the token is already narrowly scoped)
265
+ 2. **Scope your tokens narrowly.** Give `GH_TOKEN` only contents + pull_requests
266
+ on the target repos. Prefer a low-spend or separate API key for `pi`.
267
+ 3. **PR, not merge.** Franky only opens PRs. You review before anything lands.
268
+ `franky iterate` follows the same rule: it only pushes additive commits to an
269
+ existing PR's branch (never force-push, never merge, never a new PR), and the
270
+ "act only on a `franky/*` branch in the same repo" check is prompt-level - so
271
+ point `iterate` only at PRs Franky itself opened, in an allowlisted repo.
272
+
273
+ **GitHub Actions warning.** Opening a PR can trigger workflows. A PR built from an
274
+ attacker-influenced issue could run attacker-influenced workflow code with your
275
+ repo's Actions secrets. Review workflow changes in the PR diff, and consider
276
+ requiring approval for workflow runs on PRs.
277
+
278
+ ## Evals
279
+
280
+ Agent quality is probabilistic, so changes to the persona, prompt, model, or profile
281
+ should be gated on a measured **pass-rate**, not a hunch. The eval harness runs a golden
282
+ task set through the *real* Franky flow N times and reports pass-rate, plus a comparison
283
+ mode that reports the delta between two configs (e.g. one engine vs another).
284
+
285
+ It is **opt-in and out-of-band** (like the manual egress check) - it needs real Docker +
286
+ creds + a throwaway sandbox repo, so it is not part of the fast hermetic unit suite. Point
287
+ `evals/tasks.json` at your sandbox repo and run:
288
+
289
+ ```
290
+ make eval ARGS="-n 3 --engine pi --compare-engine codex"
291
+ ```
292
+
293
+ See [`evals/README.md`](evals/README.md) for setup, the task schema, and the success
294
+ checkers.
295
+
296
+ ## Status
297
+
298
+ v0.0.2. Real end-to-end runs need live engine credentials, supplied out-of-band by
299
+ the operator. The pieces under test here are the container hardening, the egress
300
+ allowlist + proxy orchestration, the secret redaction, the trusted-repo allowlist,
301
+ and the engine abstraction.
@@ -0,0 +1,24 @@
1
+ __version__ = "0.0.2"
2
+
3
+
4
+ def franky_version() -> str:
5
+ """Resolve the running version. Prefer installed package metadata (what
6
+ `pip install franky-agent==X` pins and what the published image tag is keyed to); fall back
7
+ to __version__ when metadata is absent (running from a source checkout with no install). If
8
+ both resolve and DISAGREE it's a dev-env skew (stale editable metadata vs a bumped
9
+ __version__) - warn to stderr and trust metadata."""
10
+ import sys
11
+ import importlib.metadata
12
+
13
+ from ._install import DIST_NAME
14
+
15
+ try:
16
+ meta = importlib.metadata.version(DIST_NAME)
17
+ except Exception:
18
+ return __version__
19
+ if meta != __version__:
20
+ print(
21
+ f"franky: version skew - metadata={meta}, __version__={__version__} (trusting metadata)",
22
+ file=sys.stderr,
23
+ )
24
+ return meta