@oh-my-pi/pi-coding-agent 13.14.2 → 13.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +423 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/export/html/template.css +43 -13
  25. package/src/export/html/template.generated.ts +1 -1
  26. package/src/export/html/template.html +1 -0
  27. package/src/export/html/template.js +107 -0
  28. package/src/extensibility/extensions/types.ts +31 -8
  29. package/src/internal-urls/docs-index.generated.ts +1 -1
  30. package/src/lsp/index.ts +1 -1
  31. package/src/main.ts +44 -44
  32. package/src/mcp/oauth-discovery.ts +1 -1
  33. package/src/modes/acp/acp-agent.ts +957 -0
  34. package/src/modes/acp/acp-event-mapper.ts +531 -0
  35. package/src/modes/acp/acp-mode.ts +13 -0
  36. package/src/modes/acp/index.ts +2 -0
  37. package/src/modes/components/agent-dashboard.ts +5 -4
  38. package/src/modes/components/custom-editor.ts +53 -51
  39. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  40. package/src/modes/components/history-search.ts +2 -1
  41. package/src/modes/components/hook-editor.ts +2 -1
  42. package/src/modes/components/hook-input.ts +8 -7
  43. package/src/modes/components/hook-selector.ts +15 -10
  44. package/src/modes/components/keybinding-hints.ts +9 -9
  45. package/src/modes/components/login-dialog.ts +3 -3
  46. package/src/modes/components/mcp-add-wizard.ts +2 -1
  47. package/src/modes/components/model-selector.ts +14 -3
  48. package/src/modes/components/oauth-selector.ts +2 -1
  49. package/src/modes/components/session-selector.ts +2 -1
  50. package/src/modes/components/settings-selector.ts +2 -1
  51. package/src/modes/components/status-line-segment-editor.ts +2 -1
  52. package/src/modes/components/tree-selector.ts +3 -2
  53. package/src/modes/components/user-message-selector.ts +3 -8
  54. package/src/modes/components/user-message.ts +16 -0
  55. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  56. package/src/modes/controllers/input-controller.ts +48 -29
  57. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  58. package/src/modes/index.ts +1 -0
  59. package/src/modes/interactive-mode.ts +17 -5
  60. package/src/modes/print-mode.ts +1 -1
  61. package/src/modes/prompt-action-autocomplete.ts +7 -7
  62. package/src/modes/rpc/rpc-mode.ts +7 -2
  63. package/src/modes/rpc/rpc-types.ts +1 -0
  64. package/src/modes/theme/theme.ts +53 -44
  65. package/src/modes/types.ts +9 -2
  66. package/src/modes/utils/hotkeys-markdown.ts +20 -20
  67. package/src/modes/utils/keybinding-matchers.ts +21 -0
  68. package/src/modes/utils/ui-helpers.ts +1 -1
  69. package/src/patch/hashline.ts +139 -127
  70. package/src/patch/index.ts +77 -59
  71. package/src/patch/shared.ts +19 -11
  72. package/src/prompts/tools/hashline.md +43 -116
  73. package/src/sdk.ts +34 -17
  74. package/src/session/agent-session.ts +436 -86
  75. package/src/session/messages.ts +23 -0
  76. package/src/session/session-manager.ts +97 -31
  77. package/src/tools/ask.ts +56 -30
  78. package/src/tools/bash-interceptor.ts +1 -39
  79. package/src/tools/bash-skill-urls.ts +1 -1
  80. package/src/tools/browser.ts +1 -1
  81. package/src/tools/gemini-image.ts +1 -1
  82. package/src/tools/resolve.ts +1 -1
  83. package/src/utils/child-process.ts +88 -0
  84. package/src/utils/image-input.ts +11 -1
  85. package/src/web/search/providers/codex.ts +10 -3
@@ -0,0 +1,310 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import { StringEnum } from "@oh-my-pi/pi-ai";
4
+ import { Text } from "@oh-my-pi/pi-tui";
5
+ import { Type } from "@sinclair/typebox";
6
+ import type { ToolDefinition } from "../../extensibility/extensions";
7
+ import type { Theme } from "../../modes/theme/theme";
8
+ import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
9
+ import {
10
+ buildAutoresearchSegmentFingerprint,
11
+ contractListsEqual,
12
+ contractPathListsEqual,
13
+ loadAutoresearchScriptSnapshot,
14
+ readAutoresearchContract,
15
+ } from "../contract";
16
+ import {
17
+ inferMetricUnitFromName,
18
+ isAutoresearchShCommand,
19
+ readMaxExperiments,
20
+ readPendingRunSummary,
21
+ resolveWorkDir,
22
+ validateWorkDir,
23
+ } from "../helpers";
24
+ import { cloneExperimentState } from "../state";
25
+ import type { AutoresearchToolFactoryOptions, ExperimentState } from "../types";
26
+
27
+ const initExperimentSchema = Type.Object({
28
+ name: Type.String({
29
+ description: "Human-readable experiment name.",
30
+ }),
31
+ metric_name: Type.String({
32
+ description: "Primary metric name shown in the dashboard.",
33
+ }),
34
+ metric_unit: Type.Optional(
35
+ Type.String({
36
+ description: "Unit for the primary metric, for example µs, ms, s, kb, or empty.",
37
+ }),
38
+ ),
39
+ direction: Type.Optional(
40
+ StringEnum(["lower", "higher"], {
41
+ description: "Whether lower or higher values are better. Defaults to lower.",
42
+ }),
43
+ ),
44
+ benchmark_command: Type.String({
45
+ description: "Benchmark command recorded in autoresearch.md.",
46
+ }),
47
+ scope_paths: Type.Array(Type.String(), {
48
+ description: "Files in Scope from autoresearch.md. Must be non-empty.",
49
+ minItems: 1,
50
+ }),
51
+ off_limits: Type.Optional(
52
+ Type.Array(Type.String(), {
53
+ description: "Off Limits paths from autoresearch.md.",
54
+ }),
55
+ ),
56
+ constraints: Type.Optional(
57
+ Type.Array(Type.String(), {
58
+ description: "Constraints from autoresearch.md.",
59
+ }),
60
+ ),
61
+ });
62
+
63
+ interface InitExperimentDetails {
64
+ state: ExperimentState;
65
+ }
66
+
67
+ export function createInitExperimentTool(
68
+ options: AutoresearchToolFactoryOptions,
69
+ ): ToolDefinition<typeof initExperimentSchema, InitExperimentDetails> {
70
+ return {
71
+ name: "init_experiment",
72
+ label: "Init Experiment",
73
+ description:
74
+ "Initialize or reset the autoresearch session for the current optimization target before the first logged run of a segment.",
75
+ parameters: initExperimentSchema,
76
+ defaultInactive: true,
77
+ async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
78
+ const workDirError = validateWorkDir(ctx.cwd);
79
+ if (workDirError) {
80
+ return {
81
+ content: [{ type: "text", text: `Error: ${workDirError}` }],
82
+ };
83
+ }
84
+
85
+ const runtime = options.getRuntime(ctx);
86
+ const state = runtime.state;
87
+ const isReinitializing = state.results.length > 0;
88
+ const workDir = resolveWorkDir(ctx.cwd);
89
+ const pendingRun = await readPendingRunSummary(workDir, collectLoggedRunNumbers(state.results));
90
+ if (pendingRun) {
91
+ return {
92
+ content: [
93
+ {
94
+ type: "text",
95
+ text:
96
+ `Error: run #${pendingRun.runNumber} has not been logged yet. ` +
97
+ "Call log_experiment before re-initializing the current segment.",
98
+ },
99
+ ],
100
+ };
101
+ }
102
+ const contractResult = readAutoresearchContract(workDir);
103
+ const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
104
+ const errors = [...contractResult.errors, ...scriptSnapshot.errors];
105
+ if (errors.length > 0) {
106
+ return {
107
+ content: [{ type: "text", text: `Error: ${errors.join(" ")}` }],
108
+ };
109
+ }
110
+
111
+ const benchmarkContract = contractResult.contract.benchmark;
112
+ const expectedDirection = benchmarkContract.direction ?? "lower";
113
+ const expectedMetricUnit = benchmarkContract.metricUnit;
114
+ if (benchmarkContract.command && !isAutoresearchShCommand(benchmarkContract.command)) {
115
+ return {
116
+ content: [
117
+ {
118
+ type: "text",
119
+ text:
120
+ "Error: Benchmark.command in autoresearch.md must invoke `autoresearch.sh` directly. " +
121
+ "Move the real workload into `autoresearch.sh` and re-run init_experiment.",
122
+ },
123
+ ],
124
+ };
125
+ }
126
+ if (benchmarkContract.command !== params.benchmark_command.trim()) {
127
+ return {
128
+ content: [
129
+ {
130
+ type: "text",
131
+ text:
132
+ "Error: benchmark_command does not match autoresearch.md. " +
133
+ `Expected: ${benchmarkContract.command ?? "(missing)"}\nReceived: ${params.benchmark_command}`,
134
+ },
135
+ ],
136
+ };
137
+ }
138
+ if (benchmarkContract.primaryMetric !== params.metric_name.trim()) {
139
+ return {
140
+ content: [
141
+ {
142
+ type: "text",
143
+ text:
144
+ "Error: metric_name does not match autoresearch.md. " +
145
+ `Expected: ${benchmarkContract.primaryMetric ?? "(missing)"}\nReceived: ${params.metric_name}`,
146
+ },
147
+ ],
148
+ };
149
+ }
150
+ if ((params.metric_unit ?? "") !== expectedMetricUnit) {
151
+ return {
152
+ content: [
153
+ {
154
+ type: "text",
155
+ text:
156
+ "Error: metric_unit does not match autoresearch.md. " +
157
+ `Expected: ${expectedMetricUnit || "(empty)"}\nReceived: ${params.metric_unit ?? "(empty)"}`,
158
+ },
159
+ ],
160
+ };
161
+ }
162
+ if ((params.direction ?? "lower") !== expectedDirection) {
163
+ return {
164
+ content: [
165
+ {
166
+ type: "text",
167
+ text:
168
+ "Error: direction does not match autoresearch.md. " +
169
+ `Expected: ${expectedDirection}\nReceived: ${params.direction ?? "lower"}`,
170
+ },
171
+ ],
172
+ };
173
+ }
174
+ if (!contractPathListsEqual(params.scope_paths, contractResult.contract.scopePaths)) {
175
+ return {
176
+ content: [
177
+ {
178
+ type: "text",
179
+ text:
180
+ "Error: scope_paths do not match autoresearch.md. " +
181
+ `Expected: ${contractResult.contract.scopePaths.join(", ")}`,
182
+ },
183
+ ],
184
+ };
185
+ }
186
+ if (!contractPathListsEqual(params.off_limits ?? [], contractResult.contract.offLimits)) {
187
+ return {
188
+ content: [
189
+ {
190
+ type: "text",
191
+ text:
192
+ "Error: off_limits do not match autoresearch.md. " +
193
+ `Expected: ${contractResult.contract.offLimits.join(", ") || "(empty)"}`,
194
+ },
195
+ ],
196
+ };
197
+ }
198
+ if (!contractListsEqual(params.constraints ?? [], contractResult.contract.constraints)) {
199
+ return {
200
+ content: [
201
+ {
202
+ type: "text",
203
+ text:
204
+ "Error: constraints do not match autoresearch.md. " +
205
+ `Expected: ${contractResult.contract.constraints.join(", ") || "(empty)"}`,
206
+ },
207
+ ],
208
+ };
209
+ }
210
+
211
+ const segmentFingerprint = buildAutoresearchSegmentFingerprint(contractResult.contract, {
212
+ benchmarkScript: scriptSnapshot.benchmarkScript,
213
+ checksScript: scriptSnapshot.checksScript,
214
+ });
215
+
216
+ state.name = params.name;
217
+ state.metricName = params.metric_name;
218
+ state.metricUnit = params.metric_unit ?? "";
219
+ state.bestDirection = params.direction ?? "lower";
220
+ state.maxExperiments = readMaxExperiments(ctx.cwd);
221
+ state.bestMetric = null;
222
+ state.confidence = null;
223
+ state.secondaryMetrics = benchmarkContract.secondaryMetrics.map(name => ({
224
+ name,
225
+ unit: inferMetricUnitFromName(name),
226
+ }));
227
+ state.benchmarkCommand = params.benchmark_command.trim();
228
+ state.scopePaths = [...contractResult.contract.scopePaths];
229
+ state.offLimits = [...contractResult.contract.offLimits];
230
+ state.constraints = [...contractResult.contract.constraints];
231
+ state.segmentFingerprint = segmentFingerprint;
232
+ if (isReinitializing) {
233
+ state.currentSegment += 1;
234
+ }
235
+
236
+ const jsonlPath = path.join(workDir, "autoresearch.jsonl");
237
+ const configLine = JSON.stringify({
238
+ type: "config",
239
+ name: state.name,
240
+ metricName: state.metricName,
241
+ metricUnit: state.metricUnit,
242
+ bestDirection: state.bestDirection,
243
+ benchmarkCommand: state.benchmarkCommand,
244
+ secondaryMetrics: state.secondaryMetrics.map(metric => metric.name),
245
+ scopePaths: state.scopePaths,
246
+ offLimits: state.offLimits,
247
+ constraints: state.constraints,
248
+ segmentFingerprint,
249
+ });
250
+
251
+ if (isReinitializing) {
252
+ fs.appendFileSync(jsonlPath, `${configLine}\n`);
253
+ } else {
254
+ fs.writeFileSync(jsonlPath, `${configLine}\n`);
255
+ }
256
+
257
+ runtime.autoresearchMode = true;
258
+ runtime.autoResumeArmed = true;
259
+ runtime.lastAutoResumePendingRunNumber = null;
260
+ runtime.lastRunChecks = null;
261
+ runtime.lastRunDuration = null;
262
+ runtime.lastRunAsi = null;
263
+ runtime.lastRunArtifactDir = null;
264
+ runtime.lastRunNumber = null;
265
+ runtime.lastRunSummary = null;
266
+ options.dashboard.updateWidget(ctx, runtime);
267
+ options.dashboard.requestRender();
268
+
269
+ const lines = [
270
+ `Experiment initialized: ${state.name}`,
271
+ `Metric: ${state.metricName} (${state.metricUnit || "unitless"}, ${state.bestDirection} is better)`,
272
+ `Benchmark command: ${state.benchmarkCommand}`,
273
+ `Working directory: ${workDir}`,
274
+ `Files in Scope: ${state.scopePaths.join(", ")}`,
275
+ isReinitializing
276
+ ? "Previous results remain in history. This starts a new segment and requires a fresh baseline."
277
+ : "Now run the baseline experiment and log it.",
278
+ ];
279
+ if (state.maxExperiments !== null) {
280
+ lines.push(`Max iterations: ${state.maxExperiments}`);
281
+ }
282
+
283
+ return {
284
+ content: [{ type: "text", text: lines.join("\n") }],
285
+ details: { state: cloneExperimentState(state) },
286
+ };
287
+ },
288
+ renderCall(args, _options, theme): Text {
289
+ return new Text(renderInitCall(args.name, theme), 0, 0);
290
+ },
291
+ renderResult(result): Text {
292
+ const text = replaceTabs(result.content.find(part => part.type === "text")?.text ?? "");
293
+ return new Text(text, 0, 0);
294
+ },
295
+ };
296
+ }
297
+
298
+ function renderInitCall(name: string, theme: Theme): string {
299
+ return `${theme.fg("toolTitle", theme.bold("init_experiment"))} ${theme.fg("accent", truncateToWidth(replaceTabs(name), 100))}`;
300
+ }
301
+
302
+ function collectLoggedRunNumbers(results: ExperimentState["results"]): Set<number> {
303
+ const runNumbers = new Set<number>();
304
+ for (const result of results) {
305
+ if (result.runNumber !== null) {
306
+ runNumbers.add(result.runNumber);
307
+ }
308
+ }
309
+ return runNumbers;
310
+ }