@onyx-robotics/agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ import type { LocalResearchBranchStartedRecord } from "../protocol"
2
+
3
+ import {
4
+ descriptionOption,
5
+ nameOption,
6
+ requireOption,
7
+ type Args,
8
+ } from "../lib/args"
9
+ import { emitEvent } from "../lib/events"
10
+ import {
11
+ gitBranchForName,
12
+ currentCommit,
13
+ git,
14
+ gitResult,
15
+ repoRoot,
16
+ } from "../lib/git"
17
+ import { appendBranchToMarkdown, type MetricDirection } from "../lib/markdown"
18
+ import { appendOutbox, readState, writeState } from "../lib/outbox"
19
+ import { branchStateKey, resolveProjectPath } from "../lib/project"
20
+ import { flushOutbox } from "../lib/sync"
21
+
22
+ export async function commandBranchCreate(args: Args) {
23
+ const root = await repoRoot()
24
+ const projectPath = await resolveProjectPath(root, args)
25
+ const name = nameOption(args)
26
+ const metricName = requireOption(args, "metric")
27
+ const metricUnit = args.options.unit ?? null
28
+ const metricDirection = (args.options.direction ??
29
+ "maximize") as MetricDirection
30
+
31
+ if (metricDirection !== "maximize" && metricDirection !== "minimize") {
32
+ throw new Error("--direction must be maximize or minimize")
33
+ }
34
+
35
+ const gitBranchName = gitBranchForName(name)
36
+ const baseCommitSha = await currentCommit(root)
37
+ const exists = await gitResult(["rev-parse", "--verify", gitBranchName], root)
38
+ if (exists.code === 0) {
39
+ await git(["checkout", gitBranchName], root)
40
+ } else {
41
+ await git(["checkout", "-b", gitBranchName], root)
42
+ }
43
+
44
+ const description = descriptionOption(args)
45
+ await appendBranchToMarkdown({
46
+ root,
47
+ projectPath,
48
+ name,
49
+ description,
50
+ baseCommitSha,
51
+ metricName,
52
+ metricUnit,
53
+ metricDirection,
54
+ })
55
+
56
+ const record: LocalResearchBranchStartedRecord = {
57
+ schemaVersion: 1,
58
+ type: "branch_started",
59
+ createdAt: new Date().toISOString(),
60
+ name,
61
+ description,
62
+ gitBranchName,
63
+ projectPath,
64
+ baseCommitSha,
65
+ metricName,
66
+ metricUnit,
67
+ metricDirection,
68
+ }
69
+ await appendOutbox(root, record)
70
+
71
+ const state = await readState(root)
72
+ state.projectPath = projectPath
73
+ state.branches[branchStateKey(projectPath, name)] = {
74
+ ...state.branches[branchStateKey(projectPath, name)],
75
+ projectPath,
76
+ gitBranchName,
77
+ baseCommitSha,
78
+ description,
79
+ metricName,
80
+ metricUnit,
81
+ metricDirection,
82
+ }
83
+ await writeState(root, state)
84
+
85
+ await emitEvent(root, {
86
+ type: "branch_created",
87
+ branchName: name,
88
+ commitSha: baseCommitSha,
89
+ message: gitBranchName,
90
+ })
91
+ console.log(`Created ${gitBranchName}`)
92
+ console.log(`Base commit: ${baseCommitSha}`)
93
+
94
+ // Best-effort: register the branch with the app now; it stays queued if offline.
95
+ await flushOutbox(root, args, { quiet: true }).catch(() => {})
96
+ }
@@ -0,0 +1,432 @@
1
+ import type {
2
+ LocalResearchExperimentLoggedRecord,
3
+ LocalResearchHistoryRecord,
4
+ } from "../protocol"
5
+
6
+ import { readFile } from "node:fs/promises"
7
+
8
+ import { descriptionOption, optionalFlag, type Args } from "../lib/args"
9
+ import { emitEvent } from "../lib/events"
10
+ import { currentBranch, currentCommit, repoRoot } from "../lib/git"
11
+ import {
12
+ appendHistory,
13
+ experimentRecordToHistory,
14
+ readHistory,
15
+ } from "../lib/history"
16
+ import { branchMetadata, resolveBranchName } from "../lib/markdown"
17
+ import {
18
+ parseMetricLines,
19
+ primaryMetric,
20
+ summarizeOutput,
21
+ } from "../lib/metrics"
22
+ import {
23
+ appendOutbox,
24
+ clearLastRun,
25
+ clientRunRef,
26
+ readLastRun,
27
+ writeLastRun,
28
+ type LastRunRecord,
29
+ } from "../lib/outbox"
30
+ import { onyxPath, resolveProjectPath, scopedRoot } from "../lib/project"
31
+ import { pathExists, runProcess } from "../lib/process"
32
+ import { flushOutbox } from "../lib/sync"
33
+ import { renderExperimentTable } from "../lib/tui"
34
+
35
+ type ExperimentStatus = LocalResearchExperimentLoggedRecord["status"]
36
+ type ChecksRecord = NonNullable<LocalResearchExperimentLoggedRecord["checks"]>
37
+
38
+ async function syncAfterRecord(root: string, args: Args, recordName: string) {
39
+ await flushOutbox(root, args).catch((error) => {
40
+ console.warn(
41
+ `Recorded ${recordName} locally; sync failed: ${
42
+ error instanceof Error ? error.message : String(error)
43
+ }`
44
+ )
45
+ })
46
+ }
47
+
48
+ function numberOption(args: Args, name: string, fallback: number) {
49
+ const value = args.options[name]
50
+ if (value === undefined) return fallback
51
+ const parsed = Number(value)
52
+ if (!Number.isFinite(parsed) || parsed <= 0) {
53
+ throw new Error(`--${name} must be a positive number`)
54
+ }
55
+ return parsed
56
+ }
57
+
58
+ function validateStatus(value: string): ExperimentStatus {
59
+ if (
60
+ value === "queued" ||
61
+ value === "running" ||
62
+ value === "succeeded" ||
63
+ value === "failed" ||
64
+ value === "checks_failed" ||
65
+ value === "accepted" ||
66
+ value === "rejected"
67
+ ) {
68
+ return value
69
+ }
70
+
71
+ throw new Error(
72
+ "--status must be queued, running, succeeded, failed, checks_failed, accepted, or rejected"
73
+ )
74
+ }
75
+
76
+ function parseAgentNotes(value?: string): Record<string, unknown> {
77
+ if (!value) return {}
78
+
79
+ try {
80
+ const parsed: unknown = JSON.parse(value)
81
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
82
+ return parsed as Record<string, unknown>
83
+ }
84
+ } catch {
85
+ // fall through to plain-text notes
86
+ }
87
+
88
+ return { note: value }
89
+ }
90
+
91
+ async function assertEvalReady(evalSh: string) {
92
+ if (!(await pathExists(evalSh))) {
93
+ throw new Error(
94
+ `Missing ${evalSh}. Create onyx/eval.sh before running experiments.`
95
+ )
96
+ }
97
+
98
+ const text = await readFile(evalSh, "utf8")
99
+ if (text.includes("ONYX_STUB_EVAL")) {
100
+ throw new Error(
101
+ `${evalSh} still contains ONYX_STUB_EVAL. Replace the stub with a real eval before running experiments.`
102
+ )
103
+ }
104
+ }
105
+
106
+ async function runChecks({
107
+ root,
108
+ projectPath,
109
+ timeoutMs,
110
+ }: {
111
+ root: string
112
+ projectPath: string
113
+ timeoutMs: number
114
+ }): Promise<ChecksRecord | null> {
115
+ const checksSh = onyxPath(root, projectPath, "checks.sh")
116
+ if (!(await pathExists(checksSh))) return null
117
+
118
+ const started = Date.now()
119
+ const result = await runProcess("bash", [checksSh], {
120
+ cwd: scopedRoot(root, projectPath),
121
+ timeoutMs,
122
+ })
123
+ const durationMs = Date.now() - started
124
+ const outputSummary = summarizeOutput(result.stdout, result.stderr) || null
125
+
126
+ return {
127
+ status: result.timedOut
128
+ ? "timed_out"
129
+ : result.code === 0
130
+ ? "passed"
131
+ : "failed",
132
+ durationMs,
133
+ outputSummary,
134
+ }
135
+ }
136
+
137
+ export async function commandExpRun(args: Args) {
138
+ const root = await repoRoot()
139
+ const projectPath = await resolveProjectPath(root, args)
140
+ const branchName = await resolveBranchName(root, args.options.branch)
141
+ const gitBranchName = await currentBranch(root)
142
+ const commitSha = await currentCommit(root)
143
+ const branch = await branchMetadata({
144
+ root,
145
+ projectPath,
146
+ branchName,
147
+ gitBranchName,
148
+ })
149
+ const evalSh = onyxPath(root, projectPath, "eval.sh")
150
+ await assertEvalReady(evalSh)
151
+
152
+ const timeoutMs = numberOption(args, "timeout", 600) * 1000
153
+ const checksTimeoutMs = numberOption(args, "checks-timeout", 300) * 1000
154
+ const started = new Date()
155
+ const runRef = clientRunRef(branchName)
156
+ await emitEvent(root, { type: "exp_run_started", branchName, commitSha })
157
+ const result = await runProcess("bash", [evalSh], {
158
+ cwd: scopedRoot(root, projectPath),
159
+ timeoutMs,
160
+ })
161
+ const completed = new Date()
162
+ const metrics = parseMetricLines(result.stdout, branch.metricName)
163
+ const primary = primaryMetric(metrics, branch.metricName)
164
+ const benchmarkSucceeded =
165
+ result.code === 0 && !result.timedOut && primary.value !== null
166
+ await emitEvent(root, {
167
+ type: "eval_finished",
168
+ branchName,
169
+ commitSha,
170
+ message: `${primary.name}=${primary.value ?? "null"} (${benchmarkSucceeded ? "ok" : "failed"})`,
171
+ })
172
+ const checks = benchmarkSucceeded
173
+ ? await runChecks({ root, projectPath, timeoutMs: checksTimeoutMs })
174
+ : null
175
+ if (checks) {
176
+ await emitEvent(root, {
177
+ type: "checks_finished",
178
+ branchName,
179
+ commitSha,
180
+ message: checks.status,
181
+ })
182
+ }
183
+ const status: ExperimentStatus = !benchmarkSucceeded
184
+ ? "failed"
185
+ : checks && checks.status !== "passed"
186
+ ? "checks_failed"
187
+ : "succeeded"
188
+
189
+ const outputSummaryParts = [
190
+ result.timedOut ? `Eval timed out after ${timeoutMs / 1000}s.` : "",
191
+ result.code === 0 && primary.value === null
192
+ ? `No METRIC line found for ${branch.metricName}.`
193
+ : "",
194
+ summarizeOutput(result.stdout, result.stderr),
195
+ ].filter(Boolean)
196
+ const outputSummary = outputSummaryParts.join("\n").slice(0, 4000) || null
197
+
198
+ if (optionalFlag(args, "no-log")) {
199
+ console.log(JSON.stringify({ metrics, status, checks }, null, 2))
200
+ if (result.code !== 0) process.exitCode = result.code ?? 1
201
+ return
202
+ }
203
+
204
+ const record: LastRunRecord = {
205
+ schemaVersion: 1,
206
+ createdAt: completed.toISOString(),
207
+ runRef,
208
+ branchName,
209
+ gitBranchName,
210
+ projectPath,
211
+ commitSha,
212
+ status,
213
+ primaryMetricName: primary.name,
214
+ primaryMetricValue: primary.value,
215
+ metrics,
216
+ agentNotes: {},
217
+ checks,
218
+ durationMs: completed.getTime() - started.getTime(),
219
+ startedAt: started.toISOString(),
220
+ completedAt: completed.toISOString(),
221
+ outputSummary,
222
+ }
223
+ await writeLastRun(root, record)
224
+ await emitEvent(root, {
225
+ type: "run_finished",
226
+ branchName,
227
+ commitSha,
228
+ message: status,
229
+ })
230
+ console.log(
231
+ `Measured ${commitSha.slice(0, 7)} (${primary.name}=${primary.value ?? "null"}, ${status}); runRef ${runRef}`
232
+ )
233
+ console.log("Run `onyx exp log --description <text>` to record this result.")
234
+
235
+ if (!benchmarkSucceeded || status === "checks_failed") {
236
+ process.exitCode = result.code && result.code !== 0 ? result.code : 1
237
+ }
238
+ }
239
+
240
+ export async function commandExpLog(args: Args) {
241
+ const root = await repoRoot()
242
+ const projectPath = await resolveProjectPath(root, args)
243
+ const branchName = await resolveBranchName(root, args.options.branch)
244
+ const gitBranchName = await currentBranch(root)
245
+ const lastRun = await readLastRun(root)
246
+ const usableLastRun =
247
+ lastRun?.branchName === branchName && lastRun.projectPath === projectPath
248
+ ? lastRun
249
+ : null
250
+ const commitSha =
251
+ args.options.commit ??
252
+ usableLastRun?.commitSha ??
253
+ (await currentCommit(root))
254
+ const branch = await branchMetadata({
255
+ root,
256
+ projectPath,
257
+ branchName,
258
+ gitBranchName,
259
+ })
260
+ const metricName =
261
+ args.options["metric-name"] ??
262
+ usableLastRun?.primaryMetricName ??
263
+ branch.metricName
264
+ const metricValue =
265
+ args.options.metric === undefined
266
+ ? (usableLastRun?.primaryMetricValue ?? null)
267
+ : Number(args.options.metric)
268
+ if (metricValue !== null && !Number.isFinite(metricValue)) {
269
+ throw new Error("--metric must be a finite number")
270
+ }
271
+ const status = validateStatus(
272
+ args.options.status ?? usableLastRun?.status ?? "succeeded"
273
+ )
274
+ const checks = usableLastRun?.checks ?? null
275
+ if (
276
+ checks &&
277
+ checks.status !== "passed" &&
278
+ (status === "succeeded" || status === "accepted")
279
+ ) {
280
+ throw new Error(
281
+ `Cannot record ${status}: checks ${checks.status}. Use --status checks_failed.`
282
+ )
283
+ }
284
+ const completedAt = new Date().toISOString()
285
+ const metrics =
286
+ args.options.metric === undefined
287
+ ? (usableLastRun?.metrics ?? {})
288
+ : metricValue === null
289
+ ? {}
290
+ : { ...(usableLastRun?.metrics ?? {}), [metricName]: metricValue }
291
+
292
+ const record: LocalResearchExperimentLoggedRecord = {
293
+ schemaVersion: 1,
294
+ type: "experiment_logged",
295
+ createdAt: completedAt,
296
+ runRef: usableLastRun?.runRef ?? clientRunRef(branchName),
297
+ branchName,
298
+ name: args.options.name ?? `experiment-${commitSha.slice(0, 7)}`,
299
+ description: descriptionOption(args),
300
+ gitBranchName,
301
+ projectPath,
302
+ commitSha,
303
+ status,
304
+ primaryMetricName: metricName,
305
+ primaryMetricValue: metricValue,
306
+ metrics,
307
+ agentNotes: parseAgentNotes(args.options["agent-notes"]),
308
+ checks,
309
+ durationMs: usableLastRun?.durationMs ?? null,
310
+ startedAt: usableLastRun?.startedAt ?? null,
311
+ completedAt: usableLastRun?.completedAt ?? completedAt,
312
+ outputSummary: usableLastRun?.outputSummary ?? null,
313
+ }
314
+ await appendOutbox(root, record)
315
+ // Permanent local history row; superseded by the canonical record on sync.
316
+ await appendHistory(root, experimentRecordToHistory(record)).catch(() => {})
317
+ await emitEvent(root, {
318
+ type: "exp_logged",
319
+ branchName,
320
+ commitSha,
321
+ message: `${record.name} (${status})`,
322
+ })
323
+ console.log(`Recorded ${record.name} (${status})`)
324
+ if (usableLastRun) await clearLastRun(root)
325
+
326
+ await syncAfterRecord(root, args, record.name)
327
+ }
328
+
329
+ /**
330
+ * Searches the local history cache (`.git/onyx/history.jsonl`). Works fully
331
+ * offline; run `onyx sync` first to hydrate cross-branch canonical history.
332
+ */
333
+ export async function commandExpList(args: Args) {
334
+ const root = await repoRoot()
335
+ const { records, corrupt } = await readHistory(root)
336
+ if (corrupt > 0) {
337
+ console.warn(`Skipped ${corrupt} unreadable history record(s).`)
338
+ }
339
+
340
+ const rows: LocalResearchHistoryRecord[] = [...records]
341
+
342
+ // Surface a measured-but-unlogged run so the latest attempt is never hidden.
343
+ const lastRun = await readLastRun(root)
344
+ if (lastRun && !rows.some((row) => row.runRef === lastRun.runRef)) {
345
+ rows.push({
346
+ schemaVersion: 1,
347
+ source: "local",
348
+ branchName: lastRun.branchName,
349
+ gitBranchName: lastRun.gitBranchName,
350
+ runRef: lastRun.runRef,
351
+ commitSha: lastRun.commitSha,
352
+ status: lastRun.status,
353
+ name: `(unlogged) ${lastRun.commitSha.slice(0, 7)}`,
354
+ description: null,
355
+ primaryMetricName: lastRun.primaryMetricName,
356
+ primaryMetricValue: lastRun.primaryMetricValue,
357
+ metrics: lastRun.metrics,
358
+ agentNotes: lastRun.agentNotes,
359
+ checks: lastRun.checks ?? null,
360
+ durationMs: lastRun.durationMs ?? null,
361
+ startedAt: lastRun.startedAt ?? null,
362
+ completedAt: lastRun.completedAt ?? null,
363
+ createdAt: lastRun.createdAt,
364
+ })
365
+ }
366
+
367
+ let filtered = rows
368
+ if (args.options.branch) {
369
+ filtered = filtered.filter((row) => row.branchName === args.options.branch)
370
+ }
371
+ if (args.options.status) {
372
+ const status = validateStatus(args.options.status)
373
+ filtered = filtered.filter((row) => row.status === status)
374
+ }
375
+ if (args.options.grep) {
376
+ let pattern: RegExp
377
+ try {
378
+ pattern = new RegExp(args.options.grep, "i")
379
+ } catch (error) {
380
+ throw new Error(
381
+ `--grep is not a valid regex: ${
382
+ error instanceof Error ? error.message : String(error)
383
+ }`
384
+ )
385
+ }
386
+ filtered = filtered.filter((row) =>
387
+ pattern.test(
388
+ [
389
+ row.name,
390
+ row.description ?? "",
391
+ JSON.stringify(row.agentNotes),
392
+ row.outputSummary ?? "",
393
+ ].join("\n")
394
+ )
395
+ )
396
+ }
397
+
398
+ // Newest first for reading; the file itself stays branch-grouped.
399
+ filtered.sort((a, b) =>
400
+ a.createdAt < b.createdAt ? 1 : a.createdAt > b.createdAt ? -1 : 0
401
+ )
402
+ const limit = numberOption(args, "limit", 50)
403
+ const limited = filtered.slice(0, limit)
404
+
405
+ if (optionalFlag(args, "json")) {
406
+ console.log(JSON.stringify(limited, null, 2))
407
+ return
408
+ }
409
+
410
+ if (limited.length === 0) {
411
+ console.log(
412
+ records.length === 0
413
+ ? "No experiments recorded yet. Run `onyx sync` to hydrate from the Onyx app."
414
+ : "No experiments matched the given filters."
415
+ )
416
+ return
417
+ }
418
+
419
+ const lines = renderExperimentTable(limited, {
420
+ columns: process.stdout.columns ?? 120,
421
+ color: process.stdout.isTTY ?? false,
422
+ nowMs: Date.now(),
423
+ // The branch column is redundant when filtering to a single branch.
424
+ showBranch: !args.options.branch,
425
+ })
426
+ for (const line of lines) console.log(line)
427
+ if (filtered.length > limited.length) {
428
+ console.log(
429
+ `… ${filtered.length - limited.length} more; raise --limit to see all.`
430
+ )
431
+ }
432
+ }