@gajae-code/coding-agent 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/types/cli/web-search-cli.d.ts +12 -0
  3. package/dist/types/commands/rlm.d.ts +10 -0
  4. package/dist/types/commands/web-search.d.ts +54 -0
  5. package/dist/types/config/keybindings.d.ts +10 -0
  6. package/dist/types/config/model-profiles.d.ts +2 -1
  7. package/dist/types/config/model-registry.d.ts +3 -0
  8. package/dist/types/config/models-config-schema.d.ts +3 -0
  9. package/dist/types/config/settings-schema.d.ts +61 -3
  10. package/dist/types/edit/notebook.d.ts +3 -0
  11. package/dist/types/eval/py/executor.d.ts +3 -0
  12. package/dist/types/eval/py/kernel.d.ts +3 -1
  13. package/dist/types/eval/py/runtime.d.ts +9 -1
  14. package/dist/types/exec/bash-executor.d.ts +4 -0
  15. package/dist/types/extensibility/custom-tools/types.d.ts +2 -0
  16. package/dist/types/extensibility/custom-tools/wrapper.d.ts +1 -0
  17. package/dist/types/extensibility/extensions/types.d.ts +2 -0
  18. package/dist/types/extensibility/extensions/wrapper.d.ts +1 -0
  19. package/dist/types/gjc-runtime/launch-tmux.d.ts +6 -0
  20. package/dist/types/gjc-runtime/session-state-sidecar.d.ts +14 -0
  21. package/dist/types/gjc-runtime/tmux-common.d.ts +6 -0
  22. package/dist/types/gjc-runtime/tmux-gc.d.ts +3 -3
  23. package/dist/types/gjc-runtime/tmux-sessions.d.ts +4 -0
  24. package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +18 -0
  25. package/dist/types/goals/state.d.ts +1 -1
  26. package/dist/types/goals/tools/goal-tool.d.ts +2 -0
  27. package/dist/types/main.d.ts +11 -0
  28. package/dist/types/modes/components/custom-editor.d.ts +4 -2
  29. package/dist/types/modes/components/custom-model-preset-wizard.d.ts +12 -0
  30. package/dist/types/modes/components/model-selector.d.ts +5 -2
  31. package/dist/types/modes/components/status-line.d.ts +4 -1
  32. package/dist/types/modes/controllers/input-controller.d.ts +3 -0
  33. package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
  34. package/dist/types/modes/print-mode.d.ts +6 -0
  35. package/dist/types/modes/rpc/rpc-client.d.ts +21 -0
  36. package/dist/types/modes/rpc/rpc-socket-security.d.ts +7 -0
  37. package/dist/types/modes/rpc/rpc-types.d.ts +13 -0
  38. package/dist/types/modes/shared/agent-wire/command-dispatch.d.ts +2 -0
  39. package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +1 -0
  40. package/dist/types/rlm/artifacts.d.ts +9 -0
  41. package/dist/types/rlm/complete-research-tool.d.ts +35 -0
  42. package/dist/types/rlm/data-context.d.ts +6 -0
  43. package/dist/types/rlm/index.d.ts +35 -0
  44. package/dist/types/rlm/notebook.d.ts +12 -0
  45. package/dist/types/rlm/preset.d.ts +23 -0
  46. package/dist/types/rlm/python-tool.d.ts +16 -0
  47. package/dist/types/rlm/report.d.ts +14 -0
  48. package/dist/types/rlm/types.d.ts +37 -0
  49. package/dist/types/sdk.d.ts +7 -0
  50. package/dist/types/session/agent-session.d.ts +21 -0
  51. package/dist/types/tools/bash-allowed-prefixes.d.ts +6 -1
  52. package/dist/types/tools/browser/attach.d.ts +19 -3
  53. package/dist/types/tools/browser/registry.d.ts +15 -0
  54. package/dist/types/tools/browser/render.d.ts +3 -0
  55. package/dist/types/tools/browser.d.ts +18 -1
  56. package/dist/types/tools/computer/render.d.ts +17 -0
  57. package/dist/types/tools/computer.d.ts +465 -0
  58. package/dist/types/tools/index.d.ts +24 -1
  59. package/dist/types/tools/job.d.ts +13 -0
  60. package/dist/types/tools/tool-timeouts.d.ts +5 -0
  61. package/dist/types/web/search/index.d.ts +32 -2
  62. package/dist/types/web/search/providers/base.d.ts +22 -0
  63. package/dist/types/web/search/providers/xai.d.ts +64 -0
  64. package/dist/types/web/search/types.d.ts +11 -3
  65. package/package.json +7 -7
  66. package/src/cli/web-search-cli.ts +123 -8
  67. package/src/cli.ts +2 -0
  68. package/src/commands/rlm.ts +19 -0
  69. package/src/commands/web-search.ts +66 -0
  70. package/src/config/keybindings.ts +11 -0
  71. package/src/config/model-profiles.ts +11 -3
  72. package/src/config/model-registry.ts +55 -1
  73. package/src/config/models-config-schema.ts +1 -0
  74. package/src/config/settings-schema.ts +67 -1
  75. package/src/edit/notebook.ts +6 -2
  76. package/src/eval/py/executor.ts +8 -1
  77. package/src/eval/py/kernel.ts +9 -4
  78. package/src/eval/py/runtime.ts +153 -32
  79. package/src/exec/bash-executor.ts +10 -4
  80. package/src/extensibility/custom-tools/types.ts +2 -0
  81. package/src/extensibility/custom-tools/wrapper.ts +2 -0
  82. package/src/extensibility/extensions/types.ts +2 -0
  83. package/src/extensibility/extensions/wrapper.ts +1 -0
  84. package/src/gjc-runtime/launch-tmux.ts +129 -1
  85. package/src/gjc-runtime/session-state-sidecar.ts +61 -1
  86. package/src/gjc-runtime/tmux-common.ts +26 -2
  87. package/src/gjc-runtime/tmux-gc.ts +40 -27
  88. package/src/gjc-runtime/tmux-sessions.ts +13 -1
  89. package/src/gjc-runtime/ultragoal-runtime.ts +340 -18
  90. package/src/goals/runtime.ts +4 -3
  91. package/src/goals/state.ts +1 -1
  92. package/src/goals/tools/goal-tool.ts +16 -3
  93. package/src/internal-urls/docs-index.generated.ts +13 -9
  94. package/src/main.ts +28 -3
  95. package/src/modes/components/custom-editor.ts +13 -4
  96. package/src/modes/components/custom-model-preset-wizard.ts +293 -0
  97. package/src/modes/components/hook-selector.ts +1 -1
  98. package/src/modes/components/model-selector.ts +72 -29
  99. package/src/modes/components/skill-message.ts +62 -8
  100. package/src/modes/components/status-line.ts +13 -1
  101. package/src/modes/controllers/input-controller.ts +60 -11
  102. package/src/modes/controllers/selector-controller.ts +39 -0
  103. package/src/modes/interactive-mode.ts +1 -1
  104. package/src/modes/print-mode.ts +14 -4
  105. package/src/modes/rpc/rpc-client.ts +250 -80
  106. package/src/modes/rpc/rpc-mode.ts +6 -12
  107. package/src/modes/rpc/rpc-socket-security.ts +103 -0
  108. package/src/modes/rpc/rpc-types.ts +10 -0
  109. package/src/modes/shared/agent-wire/command-dispatch.ts +7 -0
  110. package/src/modes/shared/agent-wire/command-validation.ts +1 -0
  111. package/src/modes/shared/agent-wire/scopes.ts +1 -0
  112. package/src/modes/shared/agent-wire/unattended-session.ts +9 -0
  113. package/src/modes/utils/hotkeys-markdown.ts +4 -2
  114. package/src/modes/utils/ui-helpers.ts +2 -2
  115. package/src/prompts/goals/goal-continuation.md +1 -0
  116. package/src/prompts/goals/goal-mode-active.md +1 -0
  117. package/src/prompts/system/rlm-report-command.md +1 -0
  118. package/src/prompts/system/rlm-research.md +23 -0
  119. package/src/prompts/tools/bash.md +23 -2
  120. package/src/prompts/tools/browser.md +7 -3
  121. package/src/prompts/tools/computer.md +74 -0
  122. package/src/prompts/tools/goal.md +3 -0
  123. package/src/prompts/tools/job.md +9 -1
  124. package/src/prompts/tools/web-search.md +7 -0
  125. package/src/rlm/artifacts.ts +60 -0
  126. package/src/rlm/complete-research-tool.ts +163 -0
  127. package/src/rlm/data-context.ts +26 -0
  128. package/src/rlm/index.ts +339 -0
  129. package/src/rlm/notebook.ts +108 -0
  130. package/src/rlm/preset.ts +76 -0
  131. package/src/rlm/python-tool.ts +68 -0
  132. package/src/rlm/report.ts +70 -0
  133. package/src/rlm/types.ts +40 -0
  134. package/src/sdk.ts +12 -0
  135. package/src/session/agent-session.ts +48 -3
  136. package/src/slash-commands/builtin-registry.ts +17 -0
  137. package/src/tools/bash-allowed-prefixes.ts +84 -1
  138. package/src/tools/bash.ts +80 -13
  139. package/src/tools/browser/attach.ts +103 -3
  140. package/src/tools/browser/registry.ts +176 -2
  141. package/src/tools/browser/render.ts +9 -1
  142. package/src/tools/browser.ts +33 -0
  143. package/src/tools/computer/render.ts +78 -0
  144. package/src/tools/computer.ts +640 -0
  145. package/src/tools/index.ts +41 -1
  146. package/src/tools/job.ts +88 -5
  147. package/src/tools/json-tree.ts +42 -29
  148. package/src/tools/renderers.ts +2 -0
  149. package/src/tools/tool-timeouts.ts +1 -0
  150. package/src/web/search/index.ts +27 -2
  151. package/src/web/search/provider.ts +16 -1
  152. package/src/web/search/providers/base.ts +22 -0
  153. package/src/web/search/providers/xai.ts +511 -0
  154. package/src/web/search/render.ts +7 -0
  155. package/src/web/search/types.ts +11 -1
@@ -46,6 +46,7 @@ const RPC_COMMAND_SCOPE_REGISTRY: Record<RpcCommandType, BridgeCommandScope> = {
46
46
  set_todos: "control",
47
47
  set_host_tools: "host_tools",
48
48
  set_host_uri_schemes: "host_uri",
49
+ get_pending_workflow_gates: "message:read",
49
50
  set_model: "model",
50
51
  cycle_model: "model",
51
52
  get_available_models: "model",
@@ -181,6 +181,15 @@ export class UnattendedSessionControlPlane implements RpcUnattendedControlPlane,
181
181
  return this.#broker.resolve(response);
182
182
  }
183
183
 
184
+ listPendingGates(): import("../../rpc/rpc-types").RpcWorkflowGate[] {
185
+ const store = this.opts.store;
186
+ if (!store) return [];
187
+ return store
188
+ .list()
189
+ .filter(record => record.status === "pending")
190
+ .map(record => record.gate);
191
+ }
192
+
184
193
  emitGate(input: OpenGateInput): Promise<unknown> {
185
194
  if (!this.#broker) {
186
195
  return Promise.reject(new Error("cannot emit a workflow gate before unattended mode is negotiated"));
@@ -21,8 +21,9 @@ export function buildHotkeysMarkdown(bindings: HotkeysMarkdownBindings): string
21
21
  "**Editing**",
22
22
  "| Key | Action |",
23
23
  "|-----|--------|",
24
- "| `Enter` | Send message |",
25
- "| `Shift+Enter` / `Alt+Enter` | New line |",
24
+ "| `Enter` | Send / queue while busy |",
25
+ `| \`${appKey(bindings, "app.message.queue")}\` | Queue message for next turn |`,
26
+ "| `Shift+Enter` | New line |",
26
27
  "| `Ctrl+W` / `Option+Backspace` | Delete word backwards |",
27
28
  "| `Ctrl+U` | Delete to start of line |",
28
29
  "| `Ctrl+K` | Delete to end of line |",
@@ -45,6 +46,7 @@ export function buildHotkeysMarkdown(bindings: HotkeysMarkdownBindings): string
45
46
  `| \`${appKey(bindings, "app.plan.toggle")}\` | Toggle plan mode |`,
46
47
  `| \`${appKey(bindings, "app.history.search")}\` | Search prompt history |`,
47
48
  `| \`${appKey(bindings, "app.tools.expand")}\` | Toggle tool output expansion |`,
49
+ `| \`${appKey(bindings, "app.tool.backgroundFold")}\` twice | Fold supported foreground bash into a background job |`,
48
50
  `| \`${appKey(bindings, "app.thinking.toggle")}\` | Toggle thinking block visibility |`,
49
51
  `| \`${appKey(bindings, "app.editor.external")}\` | Edit message in external editor |`,
50
52
  `| \`${appKey(bindings, "app.clipboard.pasteImage")}\` | Paste image from clipboard |`,
@@ -559,11 +559,11 @@ export class UiHelpers {
559
559
 
560
560
  const followUpMessages: Array<{ message: string; label: string }> = [];
561
561
  for (const message of queuedMessages.followUp) {
562
- followUpMessages.push({ message, label: "Follow-up" });
562
+ followUpMessages.push({ message, label: "Queued" });
563
563
  }
564
564
  for (const entry of this.ctx.compactionQueuedMessages as CompactionQueuedMessage[]) {
565
565
  if (entry.mode === "followUp") {
566
- followUpMessages.push({ message: entry.text, label: "Follow-up" });
566
+ followUpMessages.push({ message: entry.text, label: "Queued" });
567
567
  }
568
568
  }
569
569
 
@@ -23,3 +23,4 @@ Before calling `goal({op:"complete"})`, you MUST perform a completion audit agai
23
23
  Call `goal({op:"complete"})` only when every deliverable has direct, current-state evidence proving it is satisfied. The completion call is a load-bearing claim; it ends the autonomous loop and surfaces a "done" report to the user.
24
24
 
25
25
  If the work is not done, just keep working. Do not narrate that you are continuing — execute.
26
+ If every outstanding deliverable is genuinely blocked on human input or action only the user can perform (e.g. the user must sing, record, edit, approve, or carry out a manual/physical step) and no further autonomous progress is possible, call `goal({op:"pause"})` to park the goal. This stops the autonomous continuation loop without falsely completing or dropping the objective. State the human blocker, pause, then stop. When the user later unblocks the work, they (or you) resume via `goal({op:"resume"})`.
@@ -12,6 +12,7 @@ Usage:
12
12
  Use the `goal` tool to inspect or complete the active goal:
13
13
  - `goal({op:"get"})` returns the current goal and usage state.
14
14
  - `goal({op:"complete"})` is only for verified completion.
15
+ - `goal({op:"pause"})` parks the goal when every outstanding deliverable is blocked on human input only the user can perform; it stops the autonomous continuation loop and is resumed with `goal({op:"resume"})`.
15
16
 
16
17
  You MUST keep the full objective intact across turns. Do not redefine success around a smaller, easier, or already-completed subset.
17
18
 
@@ -0,0 +1 @@
1
+ Call complete_research with final=false and a concise summary of the current notebook-backed findings. Do not mark the goal complete unless the full research objective is satisfied.
@@ -0,0 +1,23 @@
1
+ You are operating in **RLM research mode** — a Jupyter-notebook-style research session backed by a persistent Python kernel. Your job is to investigate a question or dataset through iterative, reproducible Python analysis and then synthesize what you found.
2
+
3
+ ## How you work
4
+
5
+ - Drive the investigation with the `python` tool. The kernel is **persistent**: variables, imports, and loaded data survive across calls, exactly like notebook cells. Build up state incrementally instead of re-running everything each time.
6
+ - Each `python` call is recorded as a notebook cell (code + output) in this session's `notebook.ipynb`. Write focused cells that each make one clear step of progress.
7
+ - Prefer the managed scientific stack (`numpy`, `pandas`, `matplotlib`, `polars`) when useful. If an additional package is missing, use the kernel's `%pip install ...` magic only when it is necessary for the investigation; that install cell is recorded in the notebook as provenance.
8
+ - Use `read` to inspect local files, `web_search` to gather external facts, and read-only `bash` only for simple inspection commands where shell-native views are materially useful (`grep`, `rg`, `tree`, `ls`, `pwd`, `wc`, `du`, `file`, `stat`). The `bash` surface is restricted to a single command with no pipelines, redirects, env overrides, command substitution, shell expansion, or write-capable flags. You do **not** have file-editing or arbitrary-mutation tools in this mode by design: keep all work inside the Python kernel, read-only inspection, and the notebook/report artifacts.
9
+ - RLM always runs under goal mode. Use `goal({"op":"get"})` to inspect the active research goal. When the research objective is actually satisfied and the report-worthy conclusions are grounded in notebook outputs, call `goal({"op":"complete"})`, then call `complete_research` with a concise final summary. Do not present the session as complete without both tool calls.
10
+
11
+ ## Evidence discipline
12
+
13
+ - Ground every claim in an actual cell output you can point to. Do not report a metric, finding, or conclusion you have not computed and seen.
14
+ - When a cell fails, read the error, fix the specific cause, and continue — do not paper over failures.
15
+ - Distinguish what the data shows from what you infer. State assumptions explicitly.
16
+
17
+ ## Data context
18
+
19
+ - If a `DATA.md` file (or a `--data` path) was provided, treat it as the authoritative description of the available data and honor it.
20
+
21
+ ## Reporting
22
+
23
+ - When the investigation is complete (or when asked), produce a clear Markdown research report covering the question, the method, the key findings with their supporting evidence, and the conclusions and caveats. The session's `report.md` is synthesized from your notebook and final summary.
@@ -2,27 +2,48 @@ Executes bash command in shell session for terminal operations like git, bun, ca
2
2
 
3
3
  <instruction>
4
4
  - Use `cwd` to set working directory, not `cd dir && …`
5
+ {{#when restrictionProfile "==" "read-only"}}
6
+ - Do not pass `env` overrides or `pty: true`; read-only bash rejects both.
7
+ - Shell control operators such as `;`, `|`, `&`, `<`, `>`, and command substitution are blocked.
8
+ - Internal URIs (`agent://`, `artifact://`, `rule://`, `local://`) are auto-resolved to filesystem paths without creating parent directories.
9
+ {{else}}
5
10
  - Prefer `env: { NAME: "…" }` for multiline, quote-heavy, or untrusted values; reference as `$NAME`
6
11
  - Quote variable expansions like `"$NAME"` to preserve exact content
7
12
  - PTY mode is opt-in: set `pty: true` only when the command needs a real terminal (e.g. `sudo`, `ssh` requiring user input); default is `false`
8
13
  - Use `;` only when later commands should run regardless of earlier failures
9
14
  - Internal URIs (`agent://`, `artifact://`, `rule://`, `local://`) are auto-resolved to filesystem paths
15
+ {{/when}}
10
16
  {{#if asyncEnabled}}
11
17
  - Use `async: true` for long-running commands when you don't need immediate output; the call returns a background job ID and the result is delivered automatically as a follow-up.
12
18
  {{/if}}
19
+ {{#if autoBackgroundEnabled}}
20
+ - In the interactive TUI, the user can press `Ctrl+B` twice while a supported managed foreground bash command is still running to fold it into a quiet background job. Do not instruct users to use raw shell `Ctrl+Z`/`bg` inside the GJC TUI; ownership and output routing are not safe there.
21
+ {{/if}}
13
22
  </instruction>
14
23
  {{#if restrictedAllowedPrefixes}}
15
- <restricted-role-agent-mode>
24
+ <restricted-bash-mode>
25
+ {{#when restrictionProfile "==" "read-only"}}
26
+ This session's bash tool is read-only. It accepts only simple, single-command inspections beginning with:
27
+ {{#each restrictedAllowedPrefixes}}
28
+ - `{{this}}`
29
+ {{/each}}
30
+ Shell control operators, command substitution, env overrides, redirects, pipelines, glob expansion, and known write-capable flags are blocked before execution. Use it only when an inspection command is materially better than `read`, `search`, or `find`.
31
+ {{else}}
16
32
  This session's bash tool is restricted. It only accepts commands beginning with:
17
33
  {{#each restrictedAllowedPrefixes}}
18
34
  - `{{this}}`
19
35
  {{/each}}
20
36
  Use it only for sanctioned GJC workflow CLI persistence or state read/write/contract operations; per-command env overrides and all other shell command shapes are blocked before execution.
21
- </restricted-role-agent-mode>
37
+ {{/when}}
38
+ </restricted-bash-mode>
22
39
  {{/if}}
23
40
 
24
41
  <critical>
42
+ {{#when restrictionProfile "==" "read-only"}}
43
+ - Use read-only bash only for approved inspection commands that are materially better than `read`, `search`, or `find`; the tool itself blocks non-approved commands and unsafe shell shapes.
44
+ {{else}}
25
45
  - NEVER use Linux coreutils (`cat`, `head`, `tail`, `less`, `more`, `ls`, `grep`, `rg`, `awk`, `sed`, `find`, `fd`, etc.) when a dedicated tool suffices — ALWAYS prefer `read`, `search`, `find`, `edit`, `write`.
46
+ {{/when}}
26
47
  - NEVER pipe through `| head -n N` or `| tail -n N` — output is already truncated with the full result available via `artifact://<id>`.
27
48
  - NEVER redirect with `2>&1` or `2>/dev/null` — stdout and stderr are already merged.
28
49
  </critical>
@@ -10,9 +10,10 @@ Drives a real Chromium tab with full puppeteer access via JS execution.
10
10
  - Tabs survive across `run` calls and across in-process subagents. Open once, reuse many times.
11
11
  - Browser kinds, selected by the `app` field on `open`:
12
12
  - default (no `app`) → headless Chromium with stealth patches.
13
- - `app.path` → spawn an absolute binary (Electron/CDP). If a running instance already exposes a CDP port, it is reused; otherwise stale instances are killed and a fresh one is spawned. No stealth patches never tamper with a real desktop app.
14
- - `app.cdp_url` → connect to an existing CDP endpoint (e.g. `http://127.0.0.1:9222`).
15
- - `app.target` (with `path`/`cdp_url`) substring matched against url+title to pick a BrowserWindow when the app exposes several.
13
+ - `app.path` → spawn an absolute binary (Electron/CDP). If a running instance already exposes a CDP port, it is reused; otherwise stale instances are killed and a fresh one is spawned. Do not use this for a daily Chrome profile; use `app.browser: "chrome"` instead.
14
+ - `app.browser: "chrome"` + `app.path` + `app.user_data_dir` + `app.profile_directory` use an existing saved Chrome profile. GJC binds CDP to `127.0.0.1` on an ephemeral or `app.cdp_port` port, reuses a matching running profile only when it already exposes attachable localhost CDP, refuses a matching non-CDP running profile instead of killing/relaunching it, and kills only the Chrome process GJC launched. Externally-owned CDP is disconnect-only. `app.background`/`app.no_focus` add Chromium's `--no-startup-window` guard; focus avoidance is best-effort and platform-dependent.
15
+ - `app.cdp_url` → connect to an existing CDP endpoint (e.g. `http://127.0.0.1:9222`). For logged-in profiles, only expose CDP on localhost and treat the endpoint as full browser-account access.
16
+ - `app.target` (with `path`/`cdp_url`/Chrome profile) — substring matched against url+title to pick a BrowserWindow when the app exposes several.
16
17
  - Inside `run`, `tab` exposes high-level helpers; reach for `page` (raw puppeteer Page) when you need anything they don't cover.
17
18
  - `tab.goto(url, { waitUntil? })` — clears the element cache and navigates.
18
19
  - `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot. Returns `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Element ids are stable until the next observe/goto.
@@ -56,6 +57,9 @@ Drives a real Chromium tab with full puppeteer access via JS execution.
56
57
  # Attach to an existing Electron app
57
58
  `{"action":"open","name":"cursor","app":{"path":"/Applications/Cursor.app/Contents/MacOS/Cursor"}}`
58
59
 
60
+ # Use an existing Chrome profile in the background
61
+ `{"action":"open","name":"work-browser","app":{"browser":"chrome","path":"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","user_data_dir":"~/Library/Application Support/Google/Chrome","profile_directory":"Profile 10","background":true,"no_focus":true,"target":"example.com"}}`
62
+
59
63
  # Close one tab (browser stays alive if other tabs reference it)
60
64
  `{"action":"close","name":"docs"}`
61
65
 
@@ -0,0 +1,74 @@
1
+ # computer
2
+
3
+ `computer` is available by default on supported Apple Silicon macOS. It controls the real desktop, so use it only when the task genuinely needs real desktop screenshot or input control.
4
+
5
+ ## Safety contract
6
+
7
+ - Disabled means disabled: when the tool is disabled (`computer.alwaysOn=false` with `computer.enabled` unset/false) or the platform is unsupported, every action including `screenshot` fails with `COMPUTER_DISABLED` and captures nothing.
8
+ - Callable only on Apple Silicon macOS (`arm64` darwin); available by default there, with `computer.alwaysOn=false` as the off-switch and `computer.enabled=true` as the manual enable path.
9
+ - Native execution remains supervisor-gated. If the stop/suspend supervisor is unavailable, stale, suspended, permissioned off, display-stale, or cancelled, the action fails closed with a `COMPUTER_*` code.
10
+ - Respect the user's stop/suspend request immediately. Do not loop desktop actions after a stop/suspend/error.
11
+ - The user can stop or suspend the session at any time with the configured kill-switch hotkey (default `Control+Option+Command+Escape`). If you see `COMPUTER_CANCELLED` or `COMPUTER_SUPERVISOR_NOT_LIVE`, stop and wait for the user.
12
+
13
+ ## Coordinate contract
14
+
15
+ Coordinates are screenshot pixels, not CSS pixels and not normalized fractions. Use the latest successful `screenshot` dimensions and origin/scale metadata as the coordinate frame. Do not guess coordinates outside the screenshot bounds.
16
+
17
+ When you send a sequence of actions in one `batch` call, pointer coordinates in later steps are validated against the most recent screenshot from that batch. If a coordinate is out of bounds, the batch stops and reports `COMPUTER_COORD_INVALID`. Always capture a fresh screenshot before acting if the display may have changed.
18
+
19
+ ## Actions
20
+
21
+ The model action object uses exactly these snake_case actions and fields:
22
+
23
+ - `screenshot` — capture the enabled desktop.
24
+ - `click` — `x`, `y`, optional `button` (`left`, `right`, `middle`).
25
+ - `double_click` — `x`, `y`, optional `button`.
26
+ - `move` — `x`, `y`, optional `button`.
27
+ - `drag` — `x`, `y`, `to_x`, `to_y`, optional `button`.
28
+ - `scroll` — `x`, `y`, `scroll_x`, `scroll_y`.
29
+ - `type` — `text`.
30
+ - `keypress` — `keys` string array.
31
+ - `wait` — `ms`.
32
+ - `batch` — `actions`: a non-empty array of the single actions above. Steps run in order and the result includes per-step status and the last screenshot captured inside the batch.
33
+
34
+ Shared optional fields: `timeout` seconds and `include_screenshot` for a bounded post-action screenshot when supported.
35
+
36
+ Do not use camelCase fields such as `doubleClick`, `toX`, `scrollX`, or `includeScreenshot` in the model action object.
37
+
38
+ ## Examples
39
+
40
+ Take a single screenshot:
41
+
42
+ ```json
43
+ { "action": "screenshot" }
44
+ ```
45
+
46
+ Click a coordinate from the latest screenshot:
47
+
48
+ ```json
49
+ { "action": "click", "x": 120, "y": 340 }
50
+ ```
51
+
52
+ Run a focused sequence in one batch — screenshot first, then act, so coordinates are validated:
53
+
54
+ ```json
55
+ {
56
+ "action": "batch",
57
+ "actions": [
58
+ { "action": "screenshot" },
59
+ { "action": "click", "x": 120, "y": 340 },
60
+ { "action": "type", "text": "hello" },
61
+ { "action": "keypress", "keys": ["Return"] }
62
+ ]
63
+ }
64
+ ```
65
+
66
+ ## Error recovery
67
+
68
+ - `COMPUTER_COORD_INVALID`: the coordinate was outside the latest screenshot bounds. Capture a fresh screenshot and re-derive coordinates.
69
+ - `COMPUTER_DISPLAY_STALE`: the display changed since the screenshot. Capture a fresh screenshot before acting.
70
+ - `COMPUTER_SUPERVISOR_NOT_LIVE` / `COMPUTER_SUSPENDED` / `COMPUTER_CANCELLED`: stop acting and wait for the user.
71
+ - `COMPUTER_PERMISSION_REQUIRED`: the host needs screen-recording or accessibility permission. Ask the user to grant it.
72
+ - `COMPUTER_DISABLED`: the tool is disabled or the host is unsupported. Do not retry.
73
+
74
+ After any error, resume with a fresh screenshot rather than guessing.
@@ -6,13 +6,16 @@ Use a single `op` field:
6
6
  - `resume` re-activates a paused goal so work can continue.
7
7
  - `complete` marks the goal complete after you have verified every deliverable against current evidence.
8
8
  - `drop` discards the current goal without completing it.
9
+ - `pause` parks an active goal without completing or dropping it. The autonomous continuation loop stops while the goal is paused, so the agent is not re-activated every turn. Use `pause` (not `drop`) when the goal is genuinely still alive but every outstanding deliverable is blocked on human input or action only the user can perform — e.g. the user must sing, record, edit, approve, or perform a manual/physical step — and no further autonomous progress is possible. A paused goal keeps its progress and is fully resumable via `resume`.
9
10
 
10
11
  Examples:
11
12
  - `goal({"op":"create","objective":"Implement feature X"})`
12
13
  - `goal({"op":"get"})`
13
14
  - `goal({"op":"resume"})`
15
+ - `goal({"op":"pause"})`
14
16
  - `goal({"op":"complete"})`
15
17
  - `goal({"op":"drop"})`
16
18
 
17
19
  Call `complete` only when the goal is actually done and verified.
18
20
  If `get` shows a paused goal, call `resume` before continuing work on it.
21
+ Do not `pause` as a substitute for `complete`; pause only when the outstanding work is human-blocked.
@@ -1,12 +1,20 @@
1
1
  Inspects, waits, or cancels async jobs.
2
2
 
3
- Background job results are delivered automatically when complete. Reach for this tool only when you need to intervene.
3
+ Background job results are delivered automatically when complete. Running job output stays quiet by default to avoid flooding the conversation; use `tail` when you explicitly want to show/reopen retained output. Reach for this tool only when you need to inspect or intervene.
4
+
5
+ In the interactive TUI, supported managed foreground bash can be folded into a background job by pressing `Ctrl+B` twice while it is running. Raw shell `Ctrl+Z`/`bg` is not the supported path inside GJC because it bypasses job ownership and output-routing contracts.
4
6
 
5
7
  # Operations
6
8
 
7
9
  ## `list: true`
8
10
  Use to inspect what's running.
9
11
 
12
+ ## `tail: [id, …]`
13
+ Show the retained output buffer for one or more background jobs without waiting.
14
+ - Use this to reopen/tail a backgrounded long-running bash/tool output after folding it away.
15
+ - Output is bounded by the manager retention window; stale cursors may report that only the retained tail is available.
16
+ - Prefer `tail` over polling when you only need to peek at progress, so the conversation can continue without flooding the TUI.
17
+
10
18
  ## `poll: [id, …]`
11
19
  Block until the specified jobs finish or the wait window elapses.
12
20
  - Use when you are genuinely blocked on a result and have no other work to do.
@@ -5,6 +5,13 @@ Searches the web for up-to-date information beyond knowledge cutoff.
5
5
  - You MUST include links for cited sources in the final response
6
6
  </instruction>
7
7
 
8
+ <xai>
9
+ - With provider `xai`, use `xai_search_mode: "web"` for normal web search, `"x"` for X/Twitter search, or `"web_and_x"` when both surfaces are relevant.
10
+ - xAI web filters: `allowed_domains` or `excluded_domains` (max 5, mutually exclusive), plus `enable_image_understanding` and `enable_image_search`.
11
+ - xAI X filters: `allowed_x_handles` or `excluded_x_handles` (max 20, mutually exclusive), `from_date`, `to_date`, `enable_image_understanding`, and `enable_video_understanding`.
12
+ - Use `no_inline_citations` with provider `xai` when the answer should omit inline citation markdown while still returning structured sources.
13
+ </xai>
14
+
8
15
  <caution>
9
16
  Searches are performed automatically within a single API call—no pagination or follow-up requests needed.
10
17
  </caution>
@@ -0,0 +1,60 @@
1
+ /**
2
+ * RLM session artifact layout under <cwd>/.gjc/rlm/<sessionId>/.
3
+ */
4
+ import * as fs from "node:fs/promises";
5
+ import * as path from "node:path";
6
+ import { readNotebookDocument } from "../edit/notebook";
7
+ import type { RlmArtifactPaths } from "./types";
8
+
9
+ export const RLM_DIR_SEGMENT = path.join(".gjc", "rlm");
10
+
11
+ const SESSION_ID_RE = /^[A-Za-z0-9_-]+$/;
12
+
13
+ export function isValidRlmSessionId(sessionId: string): boolean {
14
+ return sessionId.length > 0 && sessionId.length <= 128 && SESSION_ID_RE.test(sessionId);
15
+ }
16
+
17
+ /** Generate a filesystem-safe, sortable session id (timestamp + random suffix). */
18
+ export function generateRlmSessionId(now: Date = new Date()): string {
19
+ const stamp = now.toISOString().replace(/[:.]/g, "").replace("T", "-").replace("Z", "");
20
+ const suffix = Math.random().toString(36).slice(2, 8);
21
+ return `${stamp}-${suffix}`;
22
+ }
23
+
24
+ export function resolveRlmArtifactPaths(cwd: string, sessionId: string): RlmArtifactPaths {
25
+ if (!isValidRlmSessionId(sessionId)) {
26
+ throw new Error(`Invalid RLM session id: ${JSON.stringify(sessionId)}`);
27
+ }
28
+ const dir = path.join(cwd, RLM_DIR_SEGMENT, sessionId);
29
+ return {
30
+ dir,
31
+ notebookPath: path.join(dir, "notebook.ipynb"),
32
+ reportPath: path.join(dir, "report.md"),
33
+ metadataPath: path.join(dir, "metadata.json"),
34
+ agentSessionDir: path.join(dir, "agent-session"),
35
+ };
36
+ }
37
+
38
+ export async function ensureRlmSessionDir(paths: RlmArtifactPaths): Promise<void> {
39
+ await fs.mkdir(paths.dir, { recursive: true });
40
+ }
41
+
42
+ export async function rlmSessionExists(cwd: string, sessionId: string): Promise<boolean> {
43
+ const paths = resolveRlmArtifactPaths(cwd, sessionId);
44
+ try {
45
+ const stat = await fs.stat(paths.dir);
46
+ return stat.isDirectory();
47
+ } catch {
48
+ return false;
49
+ }
50
+ }
51
+
52
+ export async function readRlmNotebookIfPresent(cwd: string, sessionId: string) {
53
+ const paths = resolveRlmArtifactPaths(cwd, sessionId);
54
+ try {
55
+ return await readNotebookDocument(paths.notebookPath, paths.notebookPath);
56
+ } catch (error) {
57
+ if (error instanceof Error && error.message.startsWith("File not found:")) return undefined;
58
+ throw error;
59
+ }
60
+ }
@@ -0,0 +1,163 @@
1
+ /**
2
+ * RLM completion/report tool.
3
+ *
4
+ * This is the model-facing stop/report seam for autonomous research mode. It
5
+ * writes the deterministic report from the live notebook and, for final
6
+ * completion, marks the RLM controller complete so the existing agent loop can
7
+ * pause through CreateAgentSessionOptions.shouldPause.
8
+ */
9
+ import * as z from "zod/v4";
10
+ import type { NotebookCell, NotebookDocument } from "../edit/notebook";
11
+ import type { CustomTool } from "../extensibility/custom-tools/types";
12
+ import { ToolError } from "../tools/tool-errors";
13
+ import type { RlmNotebookWriter } from "./notebook";
14
+ import { synthesizeRlmReport } from "./report";
15
+ import type { RlmArtifactPaths } from "./types";
16
+
17
+ export const RLM_COMPLETE_RESEARCH_TOOL_NAME = "complete_research";
18
+
19
+ const paramsSchema = z.object({
20
+ summary: z
21
+ .string()
22
+ .min(1)
23
+ .describe("Concise final report summary grounded in notebook outputs and cited observations."),
24
+ final: z
25
+ .boolean()
26
+ .optional()
27
+ .describe("Set false to synthesize a draft report without ending the research session."),
28
+ });
29
+
30
+ export interface RlmReportWriteInput {
31
+ paths: RlmArtifactPaths;
32
+ notebook: RlmNotebookWriter;
33
+ title: string;
34
+ summary?: string;
35
+ dataPath?: string | null;
36
+ }
37
+
38
+ export interface RlmCompleteResearchContext extends RlmReportWriteInput {
39
+ minSuccessfulRuns?: number;
40
+ getGoalStatus?: () => string | undefined;
41
+ markCompleted?: (summary: string) => void;
42
+ }
43
+
44
+ function cellText(value: string | string[] | undefined): string {
45
+ if (value === undefined) return "";
46
+ return Array.isArray(value) ? value.join("") : value;
47
+ }
48
+
49
+ function isErrorOutput(output: unknown): boolean {
50
+ if (!output || typeof output !== "object") return false;
51
+ const record = output as Record<string, unknown>;
52
+ return record.output_type === "error" || (record.output_type === "stream" && record.name === "stderr");
53
+ }
54
+
55
+ function hasAnyOutput(cell: NotebookCell): boolean {
56
+ return Array.isArray(cell.outputs) && cell.outputs.length > 0;
57
+ }
58
+
59
+ export function countSuccessfulNotebookRuns(notebook: NotebookDocument): number {
60
+ return notebook.cells.filter(cell => {
61
+ if (cell.cell_type !== "code") return false;
62
+ if (!hasAnyOutput(cell)) return true;
63
+ return !(cell.outputs ?? []).some(isErrorOutput);
64
+ }).length;
65
+ }
66
+
67
+ export function summarizeNotebookForReplay(notebook: NotebookDocument, maxChars: number = 12_000): string {
68
+ const parts: string[] = [];
69
+ let codeIndex = 0;
70
+ for (const cell of notebook.cells) {
71
+ if (cell.cell_type === "markdown") {
72
+ const text = cellText(cell.source).trim();
73
+ if (text.length > 0) parts.push(`Markdown:\n${text}`);
74
+ continue;
75
+ }
76
+ if (cell.cell_type !== "code") continue;
77
+ codeIndex += 1;
78
+ const source = cellText(cell.source).trimEnd();
79
+ const outputs = (cell.outputs ?? [])
80
+ .map(output => {
81
+ if (!output || typeof output !== "object") return "";
82
+ const record = output as Record<string, unknown>;
83
+ if (record.output_type === "stream")
84
+ return cellText(record.text as string | string[] | undefined).trimEnd();
85
+ if (record.output_type === "error") return [record.ename, record.evalue].filter(Boolean).join(": ");
86
+ return "";
87
+ })
88
+ .filter(Boolean)
89
+ .join("\n");
90
+ parts.push(
91
+ [`Cell ${codeIndex}:`, "```python", source, "```", outputs ? `Output:\n${outputs}` : undefined]
92
+ .filter(Boolean)
93
+ .join("\n"),
94
+ );
95
+ }
96
+ const text = parts.join("\n\n---\n\n");
97
+ return text.length > maxChars ? `${text.slice(0, maxChars)}\n\n...[prior notebook replay truncated]` : text;
98
+ }
99
+
100
+ export async function writeRlmReport(input: RlmReportWriteInput): Promise<string> {
101
+ await input.notebook.flush();
102
+ const report = synthesizeRlmReport({
103
+ title: input.title,
104
+ summary: input.summary,
105
+ notebook: input.notebook.document,
106
+ dataPath: input.dataPath,
107
+ });
108
+ await Bun.write(input.paths.reportPath, report);
109
+ return report;
110
+ }
111
+
112
+ export function createRlmCompleteResearchTool(context: RlmCompleteResearchContext): CustomTool<typeof paramsSchema> {
113
+ return {
114
+ name: RLM_COMPLETE_RESEARCH_TOOL_NAME,
115
+ label: "Complete Research",
116
+ description:
117
+ 'Synthesize the RLM report from the live notebook. For final completion, call goal({op:"complete"}) first, then call this tool with final=true (default). Use final=false for a draft /report without ending the session.',
118
+ parameters: paramsSchema,
119
+ strict: true,
120
+ concurrency: "exclusive",
121
+ async execute(_toolCallId, params) {
122
+ const final = params.final ?? true;
123
+ const minRuns = Math.max(0, Math.floor(context.minSuccessfulRuns ?? 0));
124
+ const successfulRuns = countSuccessfulNotebookRuns(context.notebook.document);
125
+ if (final && minRuns > 0 && successfulRuns < minRuns) {
126
+ throw new ToolError(
127
+ `complete_research requires at least ${minRuns} successful Python run(s); current successful runs: ${successfulRuns}.`,
128
+ );
129
+ }
130
+
131
+ if (final) {
132
+ const goalStatus = context.getGoalStatus?.();
133
+ if (goalStatus !== undefined && goalStatus !== "complete") {
134
+ throw new ToolError(
135
+ `complete_research finalization requires goal({op:"complete"}) first; current RLM goal status is ${goalStatus}.`,
136
+ );
137
+ }
138
+ }
139
+
140
+ await writeRlmReport({
141
+ paths: context.paths,
142
+ notebook: context.notebook,
143
+ title: context.title,
144
+ summary: params.summary,
145
+ dataPath: context.dataPath,
146
+ });
147
+
148
+ if (final) {
149
+ context.markCompleted?.(params.summary);
150
+ }
151
+
152
+ const action = final ? "Final report synthesized" : "Draft report synthesized";
153
+ return {
154
+ content: [
155
+ {
156
+ type: "text",
157
+ text: `${action}: ${context.paths.reportPath}\nSuccessful Python runs: ${successfulRuns}`,
158
+ },
159
+ ],
160
+ };
161
+ },
162
+ };
163
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Optional research data description loading for RLM mode.
3
+ *
4
+ * Precedence: an explicit --data <path> (required to exist) overrides the
5
+ * project-root DATA.md, which auto-loads when present and is silently skipped
6
+ * when absent.
7
+ */
8
+ import * as path from "node:path";
9
+
10
+ export interface RlmDataContext {
11
+ /** Absolute path the content was loaded from. */
12
+ path: string;
13
+ content: string;
14
+ }
15
+
16
+ export async function loadRlmDataContext(cwd: string, dataFlag: string | undefined): Promise<RlmDataContext | null> {
17
+ const target = dataFlag ? path.resolve(cwd, dataFlag) : path.join(cwd, "DATA.md");
18
+ const file = Bun.file(target);
19
+ if (!(await file.exists())) {
20
+ if (dataFlag) {
21
+ throw new Error(`--data file not found: ${target}`);
22
+ }
23
+ return null;
24
+ }
25
+ return { path: target, content: await file.text() };
26
+ }