@gajae-code/coding-agent 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/types/cli/web-search-cli.d.ts +12 -0
  3. package/dist/types/commands/rlm.d.ts +10 -0
  4. package/dist/types/commands/web-search.d.ts +54 -0
  5. package/dist/types/config/keybindings.d.ts +10 -0
  6. package/dist/types/config/model-profiles.d.ts +2 -1
  7. package/dist/types/config/model-registry.d.ts +3 -0
  8. package/dist/types/config/models-config-schema.d.ts +3 -0
  9. package/dist/types/config/settings-schema.d.ts +61 -3
  10. package/dist/types/edit/notebook.d.ts +3 -0
  11. package/dist/types/eval/py/executor.d.ts +3 -0
  12. package/dist/types/eval/py/kernel.d.ts +3 -1
  13. package/dist/types/eval/py/runtime.d.ts +9 -1
  14. package/dist/types/exec/bash-executor.d.ts +4 -0
  15. package/dist/types/extensibility/custom-tools/types.d.ts +2 -0
  16. package/dist/types/extensibility/custom-tools/wrapper.d.ts +1 -0
  17. package/dist/types/extensibility/extensions/types.d.ts +2 -0
  18. package/dist/types/extensibility/extensions/wrapper.d.ts +1 -0
  19. package/dist/types/gjc-runtime/launch-tmux.d.ts +6 -0
  20. package/dist/types/gjc-runtime/session-state-sidecar.d.ts +14 -0
  21. package/dist/types/gjc-runtime/tmux-common.d.ts +6 -0
  22. package/dist/types/gjc-runtime/tmux-gc.d.ts +3 -3
  23. package/dist/types/gjc-runtime/tmux-sessions.d.ts +4 -0
  24. package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +18 -0
  25. package/dist/types/goals/state.d.ts +1 -1
  26. package/dist/types/goals/tools/goal-tool.d.ts +2 -0
  27. package/dist/types/main.d.ts +11 -0
  28. package/dist/types/modes/components/custom-editor.d.ts +4 -2
  29. package/dist/types/modes/components/custom-model-preset-wizard.d.ts +12 -0
  30. package/dist/types/modes/components/model-selector.d.ts +5 -2
  31. package/dist/types/modes/components/status-line.d.ts +4 -1
  32. package/dist/types/modes/controllers/input-controller.d.ts +3 -0
  33. package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
  34. package/dist/types/modes/print-mode.d.ts +6 -0
  35. package/dist/types/modes/rpc/rpc-client.d.ts +21 -0
  36. package/dist/types/modes/rpc/rpc-socket-security.d.ts +7 -0
  37. package/dist/types/modes/rpc/rpc-types.d.ts +13 -0
  38. package/dist/types/modes/shared/agent-wire/command-dispatch.d.ts +2 -0
  39. package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +1 -0
  40. package/dist/types/rlm/artifacts.d.ts +9 -0
  41. package/dist/types/rlm/complete-research-tool.d.ts +35 -0
  42. package/dist/types/rlm/data-context.d.ts +6 -0
  43. package/dist/types/rlm/index.d.ts +35 -0
  44. package/dist/types/rlm/notebook.d.ts +12 -0
  45. package/dist/types/rlm/preset.d.ts +23 -0
  46. package/dist/types/rlm/python-tool.d.ts +16 -0
  47. package/dist/types/rlm/report.d.ts +14 -0
  48. package/dist/types/rlm/types.d.ts +37 -0
  49. package/dist/types/sdk.d.ts +7 -0
  50. package/dist/types/session/agent-session.d.ts +21 -0
  51. package/dist/types/tools/bash-allowed-prefixes.d.ts +6 -1
  52. package/dist/types/tools/browser/attach.d.ts +19 -3
  53. package/dist/types/tools/browser/registry.d.ts +15 -0
  54. package/dist/types/tools/browser/render.d.ts +3 -0
  55. package/dist/types/tools/browser.d.ts +18 -1
  56. package/dist/types/tools/computer/render.d.ts +17 -0
  57. package/dist/types/tools/computer.d.ts +465 -0
  58. package/dist/types/tools/index.d.ts +24 -1
  59. package/dist/types/tools/job.d.ts +13 -0
  60. package/dist/types/tools/tool-timeouts.d.ts +5 -0
  61. package/dist/types/web/search/index.d.ts +32 -2
  62. package/dist/types/web/search/providers/base.d.ts +22 -0
  63. package/dist/types/web/search/providers/xai.d.ts +64 -0
  64. package/dist/types/web/search/types.d.ts +11 -3
  65. package/package.json +7 -7
  66. package/src/cli/web-search-cli.ts +123 -8
  67. package/src/cli.ts +2 -0
  68. package/src/commands/rlm.ts +19 -0
  69. package/src/commands/web-search.ts +66 -0
  70. package/src/config/keybindings.ts +11 -0
  71. package/src/config/model-profiles.ts +11 -3
  72. package/src/config/model-registry.ts +55 -1
  73. package/src/config/models-config-schema.ts +1 -0
  74. package/src/config/settings-schema.ts +67 -1
  75. package/src/edit/notebook.ts +6 -2
  76. package/src/eval/py/executor.ts +8 -1
  77. package/src/eval/py/kernel.ts +9 -4
  78. package/src/eval/py/runtime.ts +153 -32
  79. package/src/exec/bash-executor.ts +10 -4
  80. package/src/extensibility/custom-tools/types.ts +2 -0
  81. package/src/extensibility/custom-tools/wrapper.ts +2 -0
  82. package/src/extensibility/extensions/types.ts +2 -0
  83. package/src/extensibility/extensions/wrapper.ts +1 -0
  84. package/src/gjc-runtime/launch-tmux.ts +129 -1
  85. package/src/gjc-runtime/session-state-sidecar.ts +61 -1
  86. package/src/gjc-runtime/tmux-common.ts +26 -2
  87. package/src/gjc-runtime/tmux-gc.ts +40 -27
  88. package/src/gjc-runtime/tmux-sessions.ts +13 -1
  89. package/src/gjc-runtime/ultragoal-runtime.ts +340 -18
  90. package/src/goals/runtime.ts +4 -3
  91. package/src/goals/state.ts +1 -1
  92. package/src/goals/tools/goal-tool.ts +16 -3
  93. package/src/internal-urls/docs-index.generated.ts +13 -9
  94. package/src/main.ts +28 -3
  95. package/src/modes/components/custom-editor.ts +13 -4
  96. package/src/modes/components/custom-model-preset-wizard.ts +293 -0
  97. package/src/modes/components/hook-selector.ts +1 -1
  98. package/src/modes/components/model-selector.ts +72 -29
  99. package/src/modes/components/skill-message.ts +62 -8
  100. package/src/modes/components/status-line.ts +13 -1
  101. package/src/modes/controllers/input-controller.ts +60 -11
  102. package/src/modes/controllers/selector-controller.ts +39 -0
  103. package/src/modes/interactive-mode.ts +1 -1
  104. package/src/modes/print-mode.ts +14 -4
  105. package/src/modes/rpc/rpc-client.ts +250 -80
  106. package/src/modes/rpc/rpc-mode.ts +6 -12
  107. package/src/modes/rpc/rpc-socket-security.ts +103 -0
  108. package/src/modes/rpc/rpc-types.ts +10 -0
  109. package/src/modes/shared/agent-wire/command-dispatch.ts +7 -0
  110. package/src/modes/shared/agent-wire/command-validation.ts +1 -0
  111. package/src/modes/shared/agent-wire/scopes.ts +1 -0
  112. package/src/modes/shared/agent-wire/unattended-session.ts +9 -0
  113. package/src/modes/utils/hotkeys-markdown.ts +4 -2
  114. package/src/modes/utils/ui-helpers.ts +2 -2
  115. package/src/prompts/goals/goal-continuation.md +1 -0
  116. package/src/prompts/goals/goal-mode-active.md +1 -0
  117. package/src/prompts/system/rlm-report-command.md +1 -0
  118. package/src/prompts/system/rlm-research.md +23 -0
  119. package/src/prompts/tools/bash.md +23 -2
  120. package/src/prompts/tools/browser.md +7 -3
  121. package/src/prompts/tools/computer.md +74 -0
  122. package/src/prompts/tools/goal.md +3 -0
  123. package/src/prompts/tools/job.md +9 -1
  124. package/src/prompts/tools/web-search.md +7 -0
  125. package/src/rlm/artifacts.ts +60 -0
  126. package/src/rlm/complete-research-tool.ts +163 -0
  127. package/src/rlm/data-context.ts +26 -0
  128. package/src/rlm/index.ts +339 -0
  129. package/src/rlm/notebook.ts +108 -0
  130. package/src/rlm/preset.ts +76 -0
  131. package/src/rlm/python-tool.ts +68 -0
  132. package/src/rlm/report.ts +70 -0
  133. package/src/rlm/types.ts +40 -0
  134. package/src/sdk.ts +12 -0
  135. package/src/session/agent-session.ts +48 -3
  136. package/src/slash-commands/builtin-registry.ts +17 -0
  137. package/src/tools/bash-allowed-prefixes.ts +84 -1
  138. package/src/tools/bash.ts +80 -13
  139. package/src/tools/browser/attach.ts +103 -3
  140. package/src/tools/browser/registry.ts +176 -2
  141. package/src/tools/browser/render.ts +9 -1
  142. package/src/tools/browser.ts +33 -0
  143. package/src/tools/computer/render.ts +78 -0
  144. package/src/tools/computer.ts +640 -0
  145. package/src/tools/index.ts +41 -1
  146. package/src/tools/job.ts +88 -5
  147. package/src/tools/json-tree.ts +42 -29
  148. package/src/tools/renderers.ts +2 -0
  149. package/src/tools/tool-timeouts.ts +1 -0
  150. package/src/web/search/index.ts +27 -2
  151. package/src/web/search/provider.ts +16 -1
  152. package/src/web/search/providers/base.ts +22 -0
  153. package/src/web/search/providers/xai.ts +511 -0
  154. package/src/web/search/render.ts +7 -0
  155. package/src/web/search/types.ts +11 -1
@@ -0,0 +1,339 @@
1
+ /**
2
+ * RLM (research) mode entry point.
3
+ *
4
+ * Composes a research session over the existing agent/session loop (python
5
+ * kernel + read + web_search + read-only bash), optional DATA.md context, live
6
+ * notebook.ipynb, first-class complete_research report synthesis, autonomous
7
+ * goal-arg execution, and resumable .gjc/rlm/<session> artifacts.
8
+ */
9
+ import * as fs from "node:fs/promises";
10
+ import { getProjectDir } from "@gajae-code/utils";
11
+ import { type Args, parseArgs } from "../cli/args";
12
+ import { disposeKernelSessionsByOwner } from "../eval/py/executor";
13
+ import type { CustomTool } from "../extensibility/custom-tools/types";
14
+ import { type RlmPreset, runRootCommand } from "../main";
15
+ import rlmReportCommandPrompt from "../prompts/system/rlm-report-command.md" with { type: "text" };
16
+ import type { CreateAgentSessionOptions } from "../sdk";
17
+ import type { AgentSession } from "../session/agent-session";
18
+ import {
19
+ ensureRlmSessionDir,
20
+ generateRlmSessionId,
21
+ readRlmNotebookIfPresent,
22
+ resolveRlmArtifactPaths,
23
+ rlmSessionExists,
24
+ } from "./artifacts";
25
+ import {
26
+ countSuccessfulNotebookRuns,
27
+ createRlmCompleteResearchTool,
28
+ summarizeNotebookForReplay,
29
+ writeRlmReport,
30
+ } from "./complete-research-tool";
31
+ import { loadRlmDataContext, type RlmDataContext } from "./data-context";
32
+ import { RlmNotebookWriter } from "./notebook";
33
+ import { assertRlmToolAllowlist, buildRlmSystemPrompt, isRlmToolAllowed, RLM_READ_ONLY_BASH_PREFIXES } from "./preset";
34
+ import { createRlmPythonTool } from "./python-tool";
35
+ import type { RlmArtifactPaths, RlmSessionMetadata } from "./types";
36
+
37
+ interface ExtractedRlmFlags {
38
+ dataPath: string | undefined;
39
+ resumeSessionId: string | undefined;
40
+ minSuccessfulRuns: number;
41
+ rest: string[];
42
+ }
43
+
44
+ export interface RlmPresetOptions {
45
+ dataContext: RlmDataContext | null;
46
+ pythonTool: CustomTool;
47
+ completeResearchTool?: CustomTool;
48
+ objective?: string;
49
+ resumeContext?: string;
50
+ onSessionReady?: (session: AgentSession) => void;
51
+ }
52
+
53
+ interface RlmRunController {
54
+ completed: boolean;
55
+ finalSummary: string | undefined;
56
+ session: AgentSession | undefined;
57
+ }
58
+
59
+ function parseNonNegativeIntegerFlag(name: string, value: string | undefined): number {
60
+ if (value === undefined || value.trim().length === 0) {
61
+ throw new Error(`${name} requires a non-negative integer value.`);
62
+ }
63
+ const parsed = Number(value);
64
+ if (!Number.isInteger(parsed) || parsed < 0) {
65
+ throw new Error(`${name} requires a non-negative integer value.`);
66
+ }
67
+ return parsed;
68
+ }
69
+
70
+ /** Pull RLM-owned flags out of argv; the remainder is forwarded to the root command. */
71
+ export function extractRlmFlags(argv: string[]): ExtractedRlmFlags {
72
+ const rest: string[] = [];
73
+ let dataPath: string | undefined;
74
+ let resumeSessionId: string | undefined;
75
+ let minSuccessfulRuns = 0;
76
+ for (let i = 0; i < argv.length; i++) {
77
+ const arg = argv[i];
78
+ if (arg === "--data") {
79
+ dataPath = argv[i + 1];
80
+ i += 1;
81
+ } else if (arg.startsWith("--data=")) {
82
+ dataPath = arg.slice("--data=".length);
83
+ } else if (arg === "--resume" || arg === "-r") {
84
+ const next = argv[i + 1];
85
+ if (!next || next.startsWith("-")) throw new Error("gjc rlm --resume requires an RLM session id.");
86
+ resumeSessionId = next;
87
+ i += 1;
88
+ } else if (arg.startsWith("--resume=")) {
89
+ resumeSessionId = arg.slice("--resume=".length);
90
+ } else if (arg === "--min-successful-runs") {
91
+ minSuccessfulRuns = parseNonNegativeIntegerFlag(arg, argv[i + 1]);
92
+ i += 1;
93
+ } else if (arg.startsWith("--min-successful-runs=")) {
94
+ minSuccessfulRuns = parseNonNegativeIntegerFlag(
95
+ "--min-successful-runs",
96
+ arg.slice("--min-successful-runs=".length),
97
+ );
98
+ } else {
99
+ rest.push(arg);
100
+ }
101
+ }
102
+ return { dataPath, resumeSessionId, minSuccessfulRuns, rest };
103
+ }
104
+
105
+ /** @deprecated use extractRlmFlags; retained for tests and compatibility. */
106
+ export function extractDataFlag(argv: string[]): { dataPath: string | undefined; rest: string[] } {
107
+ const { dataPath, rest } = extractRlmFlags(argv);
108
+ return { dataPath, rest };
109
+ }
110
+
111
+ export function createRlmPreset({
112
+ dataContext,
113
+ pythonTool,
114
+ completeResearchTool,
115
+ objective,
116
+ resumeContext,
117
+ onSessionReady,
118
+ }: RlmPresetOptions): RlmPreset {
119
+ const resolvedObjective = objective ?? buildRlmGoalObjective({ messages: [], dataContext });
120
+ const customTools = completeResearchTool ? [pythonTool, completeResearchTool] : [pythonTool];
121
+ return {
122
+ applyOptions: (options: CreateAgentSessionOptions, settings) => {
123
+ options.systemPrompt = buildRlmSystemPrompt(dataContext, resumeContext);
124
+ options.customTools = customTools;
125
+ options.toolNames = ["read", "web_search", "search_tool_bm25", "bash", "goal"];
126
+ options.requireYieldTool = false;
127
+ options.skills = [];
128
+ options.rules = [];
129
+ options.disableExtensionDiscovery = true;
130
+ options.extensions = [];
131
+ options.additionalExtensionPaths = [];
132
+ options.preloadedExtensions = undefined;
133
+ options.bashAllowedPrefixes = [...RLM_READ_ONLY_BASH_PREFIXES];
134
+ options.bashRestrictionProfile = "read-only";
135
+ options.goalToolAllowedOps = ["get", "complete"];
136
+ options.discoverableToolAllowedNames = [];
137
+ options.slashCommands = [
138
+ ...(options.slashCommands ?? []),
139
+ {
140
+ name: "report",
141
+ description: "Synthesize a draft RLM report from the current notebook",
142
+ content: rlmReportCommandPrompt,
143
+ source: "rlm",
144
+ },
145
+ ];
146
+ // RLM always runs in goal mode; recipe injection stays outside the research surface.
147
+ settings.override("goal.enabled", true);
148
+ settings.override("tools.discoveryMode", "all");
149
+ settings.override("recipe.enabled", false);
150
+ },
151
+ onSessionCreated: async (session: AgentSession) => {
152
+ onSessionReady?.(session);
153
+ await ensureRlmGoalMode(session, resolvedObjective);
154
+ // Hard boundary: fail launch if any non-allowlisted tool slipped into the active set.
155
+ assertRlmToolAllowlist(session.getActiveToolNames());
156
+ },
157
+ };
158
+ }
159
+
160
+ async function ensureRlmGoalMode(session: AgentSession, objective: string): Promise<void> {
161
+ const current = session.getGoalModeState();
162
+ if (current?.goal && current.goal.status !== "complete" && current.goal.status !== "dropped") {
163
+ if (!current.enabled || current.goal.status === "paused") {
164
+ await session.goalRuntime.resumeGoal();
165
+ }
166
+ } else {
167
+ await session.goalRuntime.createGoal({ objective });
168
+ }
169
+ await session.setActiveToolsByName([...new Set([...session.getActiveToolNames().filter(isRlmToolAllowed), "goal"])]);
170
+ }
171
+
172
+ export function buildRlmGoalObjective(input: {
173
+ messages: readonly string[];
174
+ dataContext: RlmDataContext | null;
175
+ }): string {
176
+ const prompt = input.messages
177
+ .map(message => message.trim())
178
+ .filter(Boolean)
179
+ .join("\n\n");
180
+ if (prompt.length > 0) return prompt;
181
+ if (input.dataContext) {
182
+ return `Complete an RLM research session using data context ${input.dataContext.path}, grounding conclusions in notebook outputs and finishing with a report.`;
183
+ }
184
+ return "Complete this RLM research session, grounding conclusions in notebook outputs and finishing with a report.";
185
+ }
186
+ export function isRlmAutonomousRun(parsed: Pick<Args, "print" | "mode" | "messages">, pipedStdin: boolean): boolean {
187
+ return parsed.print === true || parsed.mode !== undefined || pipedStdin;
188
+ }
189
+
190
+ export function prepareRlmLaunchMode(parsed: Args, pipedStdin: boolean): boolean {
191
+ const autonomous = isRlmAutonomousRun(parsed, pipedStdin);
192
+ if (autonomous && parsed.mode === undefined) {
193
+ parsed.print = true;
194
+ }
195
+ return autonomous;
196
+ }
197
+
198
+ async function loadExistingMetadata(paths: RlmArtifactPaths): Promise<RlmSessionMetadata | undefined> {
199
+ try {
200
+ return (await Bun.file(paths.metadataPath).json()) as RlmSessionMetadata;
201
+ } catch {
202
+ return undefined;
203
+ }
204
+ }
205
+
206
+ async function writeRlmMetadata(input: {
207
+ paths: RlmArtifactPaths;
208
+ sessionId: string;
209
+ createdAt: string;
210
+ cwd: string;
211
+ dataPath: string | null;
212
+ cellCount: number;
213
+ mode: "interactive" | "autonomous";
214
+ resumedFrom: string | null;
215
+ completedAt: string | null;
216
+ finalSummary: string | null;
217
+ minSuccessfulRuns: number;
218
+ successfulRuns: number;
219
+ }): Promise<void> {
220
+ const metadata: RlmSessionMetadata = {
221
+ sessionId: input.sessionId,
222
+ createdAt: input.createdAt,
223
+ cwd: input.cwd,
224
+ dataPath: input.dataPath,
225
+ cellCount: input.cellCount,
226
+ mode: input.mode,
227
+ resumedFrom: input.resumedFrom,
228
+ completedAt: input.completedAt,
229
+ finalSummary: input.finalSummary,
230
+ minSuccessfulRuns: input.minSuccessfulRuns,
231
+ successfulRuns: input.successfulRuns,
232
+ };
233
+ await Bun.write(input.paths.metadataPath, `${JSON.stringify(metadata, null, 2)}\n`);
234
+ }
235
+
236
+ export async function runRlmCommand(argv: string[]): Promise<void> {
237
+ const cwd = getProjectDir();
238
+ const { dataPath, resumeSessionId, minSuccessfulRuns, rest } = extractRlmFlags(argv);
239
+ const dataContext = await loadRlmDataContext(cwd, dataPath);
240
+
241
+ const sessionId = resumeSessionId ?? generateRlmSessionId();
242
+ const paths = resolveRlmArtifactPaths(cwd, sessionId);
243
+ if (resumeSessionId && !(await rlmSessionExists(cwd, resumeSessionId))) {
244
+ throw new Error(`RLM session not found: ${resumeSessionId}`);
245
+ }
246
+ await ensureRlmSessionDir(paths);
247
+ await fs.mkdir(paths.agentSessionDir, { recursive: true });
248
+
249
+ const existingNotebook = resumeSessionId ? await readRlmNotebookIfPresent(cwd, sessionId) : undefined;
250
+ const existingMetadata = await loadExistingMetadata(paths);
251
+ const notebook = new RlmNotebookWriter(paths.notebookPath, existingNotebook);
252
+ const pythonTool = createRlmPythonTool({
253
+ cwd,
254
+ sessionId,
255
+ artifactsDir: paths.dir,
256
+ notebook,
257
+ managedWorkspaceVenv: true,
258
+ });
259
+ const controller: RlmRunController = { completed: false, finalSummary: undefined, session: undefined };
260
+ const reportTitle = `RLM research session ${sessionId}`;
261
+ const completeResearchTool = createRlmCompleteResearchTool({
262
+ paths,
263
+ notebook,
264
+ title: reportTitle,
265
+ dataPath: dataContext?.path ?? null,
266
+ minSuccessfulRuns,
267
+ getGoalStatus: () => controller.session?.getGoalModeState()?.goal.status,
268
+ markCompleted: summary => {
269
+ controller.completed = true;
270
+ controller.finalSummary = summary;
271
+ },
272
+ });
273
+
274
+ const parsed = parseArgs(rest);
275
+ parsed.sessionDir = paths.agentSessionDir;
276
+ if (resumeSessionId) {
277
+ parsed.continue = true;
278
+ }
279
+ // Piped stdin (non-TTY), explicit --print, and explicit --mode run as autonomous
280
+ // research. Positional argv messages seed the interactive RLM shell, so
281
+ // `gjc rlm "question"` still loads the TUI instead of being coerced into
282
+ // print mode.
283
+ const pipedStdin = process.stdin.isTTY === false;
284
+ const autonomous = prepareRlmLaunchMode(parsed, pipedStdin);
285
+ const resumeContext = existingNotebook ? summarizeNotebookForReplay(existingNotebook) : undefined;
286
+ const preset = createRlmPreset({
287
+ dataContext,
288
+ pythonTool,
289
+ completeResearchTool,
290
+ objective: buildRlmGoalObjective({ messages: parsed.messages, dataContext }),
291
+ resumeContext,
292
+ onSessionReady: session => {
293
+ controller.session = session;
294
+ },
295
+ });
296
+ if (autonomous) {
297
+ preset.applyOptions = ((applyOptions: RlmPreset["applyOptions"]) => (options, settings) => {
298
+ applyOptions(options, settings);
299
+ options.shouldPause = () => controller.completed;
300
+ })(preset.applyOptions);
301
+ }
302
+
303
+ let runError: unknown;
304
+ try {
305
+ await runRootCommand(parsed, rest, { rlmPreset: preset, suppressProcessExit: autonomous });
306
+ } catch (error) {
307
+ runError = error;
308
+ throw error;
309
+ } finally {
310
+ // The RLM python tool owns a retained kernel keyed by `rlm:<sessionId>`; the
311
+ // session's own dispose targets a different owner id, so release it here so
312
+ // the persistent kernel subprocess is reaped on every exit path.
313
+ await disposeKernelSessionsByOwner(`rlm:${sessionId}`).catch(() => {});
314
+ await writeRlmReport({
315
+ paths,
316
+ notebook,
317
+ title: reportTitle,
318
+ summary: controller.finalSummary,
319
+ dataPath: dataContext?.path ?? null,
320
+ });
321
+ await writeRlmMetadata({
322
+ paths,
323
+ sessionId,
324
+ createdAt: existingMetadata?.createdAt ?? new Date().toISOString(),
325
+ cwd,
326
+ dataPath: dataContext?.path ?? null,
327
+ cellCount: notebook.cellCount,
328
+ mode: autonomous ? "autonomous" : "interactive",
329
+ resumedFrom: resumeSessionId ?? null,
330
+ completedAt: controller.completed ? new Date().toISOString() : null,
331
+ finalSummary: controller.finalSummary ?? null,
332
+ minSuccessfulRuns,
333
+ successfulRuns: countSuccessfulNotebookRuns(notebook.document),
334
+ });
335
+ }
336
+ if (autonomous && !controller.completed && runError === undefined) {
337
+ throw new Error("RLM autonomous session ended before complete_research finalized the report.");
338
+ }
339
+ }
@@ -0,0 +1,108 @@
1
+ /**
2
+ * RLM live notebook writer: appends executed cells to notebook.ipynb with a
3
+ * single per-session write queue and atomic temp-file-then-rename writes, then
4
+ * validates the persisted file via readNotebookDocument.
5
+ */
6
+ import * as fs from "node:fs/promises";
7
+ import * as path from "node:path";
8
+ import {
9
+ createEmptyNotebook,
10
+ createNotebookCell,
11
+ type NotebookDocument,
12
+ readNotebookDocument,
13
+ serializeNotebookDocument,
14
+ splitNotebookSource,
15
+ } from "../edit/notebook";
16
+ import type { RlmCellResult } from "./types";
17
+
18
+ function buildCodeOutputs(result: RlmCellResult): unknown[] {
19
+ const outputs: unknown[] = [];
20
+ if (result.output.length > 0) {
21
+ outputs.push({
22
+ output_type: "stream",
23
+ name: result.exitCode !== undefined && result.exitCode !== 0 ? "stderr" : "stdout",
24
+ text: splitNotebookSource(result.output),
25
+ });
26
+ }
27
+ for (const display of result.displayOutputs) {
28
+ if (display.type === "image") {
29
+ outputs.push({ output_type: "display_data", data: { [display.mimeType]: display.data }, metadata: {} });
30
+ } else if (display.type === "json") {
31
+ outputs.push({ output_type: "display_data", data: { "application/json": display.data }, metadata: {} });
32
+ }
33
+ }
34
+ if (result.cancelled) {
35
+ outputs.push({ output_type: "stream", name: "stderr", text: ["[cell cancelled]\n"] });
36
+ }
37
+ if (result.truncated) {
38
+ outputs.push({ output_type: "stream", name: "stderr", text: ["[output truncated]\n"] });
39
+ }
40
+ return outputs;
41
+ }
42
+
43
+ export class RlmNotebookWriter {
44
+ readonly #notebookPath: string;
45
+ readonly #document: NotebookDocument;
46
+ #queue: Promise<void> = Promise.resolve();
47
+
48
+ constructor(notebookPath: string, initial?: NotebookDocument) {
49
+ this.#notebookPath = notebookPath;
50
+ this.#document = initial ?? createEmptyNotebook();
51
+ }
52
+
53
+ get document(): NotebookDocument {
54
+ return this.#document;
55
+ }
56
+
57
+ get cellCount(): number {
58
+ return this.#document.cells.length;
59
+ }
60
+
61
+ appendMarkdown(source: string): Promise<void> {
62
+ this.#document.cells.push(createNotebookCell("markdown", source));
63
+ return this.#enqueueWrite();
64
+ }
65
+
66
+ appendCode(code: string, result: RlmCellResult): Promise<void> {
67
+ const cell = createNotebookCell("code", code);
68
+ cell.execution_count = this.#nextExecutionCount();
69
+ cell.outputs = buildCodeOutputs(result);
70
+ this.#document.cells.push(cell);
71
+ return this.#enqueueWrite();
72
+ }
73
+
74
+ /** Resolve once all queued writes have flushed. */
75
+ flush(): Promise<void> {
76
+ return this.#queue;
77
+ }
78
+
79
+ #nextExecutionCount(): number {
80
+ let max = 0;
81
+ for (const cell of this.#document.cells) {
82
+ const count = cell.execution_count;
83
+ if (typeof count === "number" && count > max) max = count;
84
+ }
85
+ return max + 1;
86
+ }
87
+
88
+ #enqueueWrite(): Promise<void> {
89
+ const snapshot = serializeNotebookDocument(this.#document);
90
+ this.#queue = this.#queue.then(() => this.#atomicWrite(snapshot));
91
+ return this.#queue;
92
+ }
93
+
94
+ async #atomicWrite(content: string): Promise<void> {
95
+ const dir = path.dirname(this.#notebookPath);
96
+ const base = path.basename(this.#notebookPath);
97
+ const tmp = path.join(dir, `.${base}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`);
98
+ await Bun.write(tmp, content);
99
+ try {
100
+ await fs.rename(tmp, this.#notebookPath);
101
+ } catch (error) {
102
+ await fs.rm(tmp, { force: true });
103
+ throw error;
104
+ }
105
+ // Post-write validation: surfaces corruption immediately.
106
+ await readNotebookDocument(this.#notebookPath, this.#notebookPath);
107
+ }
108
+ }
@@ -0,0 +1,76 @@
1
+ /**
2
+ * RLM research preset: the static research system prompt, the exact tool
3
+ * allowlist, and a hard boundary assertion that fails launch if any
4
+ * non-allowlisted tool is active.
5
+ */
6
+ import rlmResearchPrompt from "../prompts/system/rlm-research.md" with { type: "text" };
7
+ import type { RlmDataContext } from "./data-context";
8
+
9
+ /**
10
+ * tool; `read` and `web_search` are the existing built-ins. `bash` is exposed
11
+ * through the read-only restriction profile below for inspection commands only,
12
+ * and `goal` is required so RLM sessions cannot finish without explicit goal
13
+ * completion. Everything else (edit, write, task, skill, browser, eval-js, ...) is excluded.
14
+ */
15
+ export const RLM_READ_ONLY_BASH_PREFIXES: readonly string[] = [
16
+ "grep",
17
+ "rg",
18
+ "tree",
19
+ "ls",
20
+ "pwd",
21
+ "wc",
22
+ "du",
23
+ "file",
24
+ "stat",
25
+ ];
26
+ export const RLM_TOOL_ALLOWLIST: readonly string[] = [
27
+ "python",
28
+ "read",
29
+ "web_search",
30
+ "search_tool_bm25",
31
+ "bash",
32
+ "goal",
33
+ "complete_research",
34
+ ];
35
+
36
+ export function isRlmToolAllowed(name: string): boolean {
37
+ return RLM_TOOL_ALLOWLIST.includes(name.toLowerCase());
38
+ }
39
+
40
+ /**
41
+ * Hard boundary: throws if any active tool is outside the allowlist. Call this
42
+ * after the session's tool registry is fully assembled and before running, so a
43
+ * tool leaked in by defaults/discovery/extensions fails the launch loudly.
44
+ */
45
+ export function assertRlmToolAllowlist(activeToolNames: readonly string[]): void {
46
+ const leaked = activeToolNames.filter(name => !isRlmToolAllowed(name));
47
+ if (leaked.length > 0) {
48
+ throw new Error(
49
+ `RLM tool boundary violation: non-allowlisted active tool(s) [${leaked.join(", ")}]. ` +
50
+ `RLM mode allows only: ${RLM_TOOL_ALLOWLIST.join(", ")}.`,
51
+ );
52
+ }
53
+ }
54
+
55
+ /** The research prompt text (exported for testing / prompt assembly). */
56
+ export const RLM_RESEARCH_PROMPT: string = rlmResearchPrompt;
57
+
58
+ /**
59
+ * Build the systemPrompt transform for createAgentSession: appends the research
60
+ * prompt, data context, and prior-notebook replay context to the default blocks.
61
+ */
62
+ export function buildRlmSystemPrompt(
63
+ dataContext: RlmDataContext | null,
64
+ resumeContext?: string,
65
+ ): (defaultPrompt: string[]) => string[] {
66
+ return (defaultPrompt: string[]): string[] => {
67
+ const blocks = [...defaultPrompt, rlmResearchPrompt];
68
+ if (dataContext) {
69
+ blocks.push(`# Data context (from ${dataContext.path})\n\n${dataContext.content}`);
70
+ }
71
+ if (resumeContext && resumeContext.trim().length > 0) {
72
+ blocks.push(`# Prior notebook replay context\n\n${resumeContext}`);
73
+ }
74
+ return blocks;
75
+ };
76
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * RLM `python` tool: the model-facing research execution tool. Wraps the shared
3
+ * persistent Python kernel executor and records every call as a notebook cell.
4
+ */
5
+ import type { AgentToolResult } from "@gajae-code/agent-core";
6
+ import { type Static, z } from "@gajae-code/ai";
7
+ import { executePython } from "../eval/py/executor";
8
+ import { RLM_MANAGED_PYTHON_PACKAGES } from "../eval/py/runtime";
9
+ import type { CustomTool } from "../extensibility/custom-tools/types";
10
+ import type { RlmNotebookWriter } from "./notebook";
11
+ import type { RlmCellResult } from "./types";
12
+
13
+ export const RLM_PYTHON_TOOL_NAME = "python";
14
+
15
+ export interface RlmPythonToolContext {
16
+ cwd: string;
17
+ sessionId: string;
18
+ artifactsDir: string;
19
+ notebook: RlmNotebookWriter;
20
+ managedWorkspaceVenv?: boolean;
21
+ }
22
+
23
+ const paramsSchema = z.object({
24
+ code: z
25
+ .string()
26
+ .describe("Python source to execute in the persistent research kernel. State persists across calls."),
27
+ });
28
+
29
+ export function createRlmPythonTool(rlm: RlmPythonToolContext): CustomTool<typeof paramsSchema> {
30
+ return {
31
+ name: RLM_PYTHON_TOOL_NAME,
32
+ label: "Python",
33
+ description:
34
+ "Execute Python in the persistent research kernel. Variables, imports, and loaded data persist across calls like notebook cells. Every call is recorded as a cell in the session notebook.",
35
+ parameters: paramsSchema,
36
+ strict: true,
37
+ concurrency: "exclusive",
38
+ async execute(
39
+ _toolCallId: string,
40
+ params: Static<typeof paramsSchema>,
41
+ _onUpdate,
42
+ _ctx,
43
+ signal?: AbortSignal,
44
+ ): Promise<AgentToolResult> {
45
+ const result = await executePython(params.code, {
46
+ cwd: rlm.cwd,
47
+ kernelMode: "session",
48
+ sessionId: `rlm:${rlm.sessionId}`,
49
+ kernelOwnerId: `rlm:${rlm.sessionId}`,
50
+ artifactsDir: rlm.artifactsDir,
51
+ runtimeOptions: rlm.managedWorkspaceVenv
52
+ ? { managedWorkspaceVenv: true, seedPackages: RLM_MANAGED_PYTHON_PACKAGES }
53
+ : undefined,
54
+ signal,
55
+ });
56
+ const cell: RlmCellResult = {
57
+ output: result.output,
58
+ exitCode: result.exitCode,
59
+ cancelled: result.cancelled,
60
+ truncated: result.truncated,
61
+ displayOutputs: result.displayOutputs,
62
+ };
63
+ await rlm.notebook.appendCode(params.code, cell);
64
+ const text = result.output.length > 0 ? result.output : "(no output)";
65
+ return { content: [{ type: "text", text }] };
66
+ },
67
+ };
68
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Deterministic RLM report synthesis: turns the accumulated notebook (plus an
3
+ * optional model-provided summary) into a Markdown research report.
4
+ */
5
+ import type { NotebookCell, NotebookDocument } from "../edit/notebook";
6
+
7
+ export interface RlmReportInput {
8
+ title: string;
9
+ summary?: string;
10
+ notebook: NotebookDocument;
11
+ dataPath?: string | null;
12
+ generatedAt?: string;
13
+ maxOutputChars?: number;
14
+ }
15
+
16
+ function cellText(value: string | string[] | undefined): string {
17
+ if (value === undefined) return "";
18
+ return Array.isArray(value) ? value.join("") : value;
19
+ }
20
+
21
+ function streamOutputText(cell: NotebookCell): string {
22
+ const outputs = Array.isArray(cell.outputs) ? cell.outputs : [];
23
+ const parts: string[] = [];
24
+ for (const out of outputs) {
25
+ if (out && typeof out === "object" && (out as Record<string, unknown>).output_type === "stream") {
26
+ parts.push(cellText((out as Record<string, unknown>).text as string | string[] | undefined));
27
+ }
28
+ }
29
+ return parts.join("");
30
+ }
31
+
32
+ export function synthesizeRlmReport(input: RlmReportInput): string {
33
+ const generatedAt = input.generatedAt ?? new Date().toISOString();
34
+ const maxOutput = input.maxOutputChars ?? 4000;
35
+ const codeCells = input.notebook.cells.filter(cell => cell.cell_type === "code");
36
+
37
+ const lines: string[] = [];
38
+ lines.push(`# ${input.title}`, "");
39
+ lines.push(`- Generated: ${generatedAt}`);
40
+ lines.push(`- Cells executed: ${codeCells.length}`);
41
+ if (input.dataPath) {
42
+ lines.push(`- Data context: ${input.dataPath}`);
43
+ }
44
+ lines.push("");
45
+
46
+ if (input.summary && input.summary.trim().length > 0) {
47
+ lines.push("## Summary", "", input.summary.trim(), "");
48
+ }
49
+
50
+ lines.push("## Notebook", "");
51
+ let codeIndex = 0;
52
+ for (const cell of input.notebook.cells) {
53
+ if (cell.cell_type === "markdown") {
54
+ const text = cellText(cell.source).trim();
55
+ if (text.length > 0) {
56
+ lines.push(text, "");
57
+ }
58
+ } else if (cell.cell_type === "code") {
59
+ codeIndex += 1;
60
+ lines.push(`### Cell ${codeIndex}`, "", "```python", cellText(cell.source).trimEnd(), "```", "");
61
+ const output = streamOutputText(cell).trimEnd();
62
+ if (output.length > 0) {
63
+ const shown = output.length > maxOutput ? `${output.slice(0, maxOutput)}\n... [truncated]` : output;
64
+ lines.push("```", shown, "```", "");
65
+ }
66
+ }
67
+ }
68
+
69
+ return `${lines.join("\n").trimEnd()}\n`;
70
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Shared types for RLM (research) mode.
3
+ */
4
+ import type { KernelDisplayOutput } from "../eval/py/kernel";
5
+
6
+ export interface RlmArtifactPaths {
7
+ /** Absolute session directory: <cwd>/.gjc/rlm/<sessionId>/ */
8
+ dir: string;
9
+ /** Absolute path to the live notebook.ipynb */
10
+ notebookPath: string;
11
+ /** Absolute path to the synthesized report.md */
12
+ reportPath: string;
13
+ /** Absolute path to the session metadata.json */
14
+ metadataPath: string;
15
+ /** Directory for the underlying GJC conversation session files. */
16
+ agentSessionDir: string;
17
+ }
18
+
19
+ /** Outcome of a single RLM python cell execution. */
20
+ export interface RlmCellResult {
21
+ output: string;
22
+ exitCode: number | undefined;
23
+ cancelled: boolean;
24
+ truncated: boolean;
25
+ displayOutputs: KernelDisplayOutput[];
26
+ }
27
+
28
+ export interface RlmSessionMetadata {
29
+ sessionId: string;
30
+ createdAt: string;
31
+ cwd: string;
32
+ dataPath: string | null;
33
+ cellCount: number;
34
+ mode?: "interactive" | "autonomous";
35
+ resumedFrom?: string | null;
36
+ completedAt?: string | null;
37
+ finalSummary?: string | null;
38
+ minSuccessfulRuns?: number;
39
+ successfulRuns?: number;
40
+ }