@onyx-robotics/agent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +72 -0
- package/bin/onyx.ts +4 -0
- package/package.json +52 -0
- package/scripts/install.sh +115 -0
- package/skills/onyx/SKILL.md +150 -0
- package/src/commands/agent.ts +23 -0
- package/src/commands/branch.ts +96 -0
- package/src/commands/exp.ts +432 -0
- package/src/commands/listen.ts +327 -0
- package/src/commands/login.ts +198 -0
- package/src/commands/profile.ts +112 -0
- package/src/commands/sync.ts +88 -0
- package/src/install.test.ts +38 -0
- package/src/lib/api.ts +227 -0
- package/src/lib/args.ts +68 -0
- package/src/lib/config.ts +148 -0
- package/src/lib/events.ts +97 -0
- package/src/lib/git.ts +57 -0
- package/src/lib/history.ts +272 -0
- package/src/lib/login.ts +233 -0
- package/src/lib/markdown.ts +148 -0
- package/src/lib/metrics.ts +41 -0
- package/src/lib/outbox.ts +173 -0
- package/src/lib/process.ts +73 -0
- package/src/lib/project.ts +42 -0
- package/src/lib/skill-content.ts +1 -0
- package/src/lib/skill.ts +50 -0
- package/src/lib/sync.ts +294 -0
- package/src/lib/tui.ts +364 -0
- package/src/main.ts +84 -0
- package/src/onyx.test.ts +952 -0
- package/src/onyx.ts +92 -0
- package/src/profile.test.ts +472 -0
- package/src/protocol/index.ts +2 -0
- package/src/protocol/local-research.ts +152 -0
- package/src/protocol/research.ts +75 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export function parseMetricLines(stdout: string, fallbackName = "score") {
|
|
2
|
+
const metrics: Record<string, number> = {}
|
|
3
|
+
for (const line of stdout.split("\n")) {
|
|
4
|
+
const match = line
|
|
5
|
+
.trim()
|
|
6
|
+
.match(/^METRIC\s+([A-Za-z0-9_.:-]+)=(-?\d+(?:\.\d+)?(?:e[+-]?\d+)?)$/i)
|
|
7
|
+
if (!match) continue
|
|
8
|
+
metrics[match[1]!] = Number(match[2])
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
if (Object.keys(metrics).length === 0) {
|
|
12
|
+
const trimmed = stdout.trim()
|
|
13
|
+
if (/^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(trimmed)) {
|
|
14
|
+
metrics[fallbackName] = Number(trimmed)
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return metrics
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function summarizeOutput(stdout: string, stderr: string) {
|
|
22
|
+
return [stdout.trim(), stderr.trim()]
|
|
23
|
+
.filter(Boolean)
|
|
24
|
+
.join("\n--- stderr ---\n")
|
|
25
|
+
.slice(0, 4000)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function primaryMetric(
|
|
29
|
+
metrics: Record<string, number>,
|
|
30
|
+
preferredName: string
|
|
31
|
+
): { name: string; value: number | null } {
|
|
32
|
+
if (Object.hasOwn(metrics, preferredName)) {
|
|
33
|
+
return { name: preferredName, value: metrics[preferredName] ?? null }
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const [firstName, firstValue] = Object.entries(metrics)[0] ?? []
|
|
37
|
+
return {
|
|
38
|
+
name: firstName ?? preferredName,
|
|
39
|
+
value: firstValue ?? null,
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { mkdir, readFile, rename, unlink, writeFile } from "node:fs/promises"
|
|
2
|
+
import { randomUUID } from "node:crypto"
|
|
3
|
+
import { join } from "node:path"
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
localResearchRecordSchema,
|
|
7
|
+
type LocalResearchExperimentLoggedRecord,
|
|
8
|
+
type LocalResearchRecord,
|
|
9
|
+
} from "../protocol"
|
|
10
|
+
|
|
11
|
+
import { gitDir } from "./git"
|
|
12
|
+
|
|
13
|
+
export type CliState = {
|
|
14
|
+
projectId?: string
|
|
15
|
+
projectPath?: string
|
|
16
|
+
branches: Record<
|
|
17
|
+
string,
|
|
18
|
+
{
|
|
19
|
+
branchId?: string
|
|
20
|
+
projectPath?: string
|
|
21
|
+
gitBranchName?: string
|
|
22
|
+
baseCommitSha?: string | null
|
|
23
|
+
description?: string | null
|
|
24
|
+
metricName?: string
|
|
25
|
+
metricUnit?: string | null
|
|
26
|
+
metricDirection?: "maximize" | "minimize"
|
|
27
|
+
}
|
|
28
|
+
>
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export type LastRunRecord = Omit<
|
|
32
|
+
LocalResearchExperimentLoggedRecord,
|
|
33
|
+
"type" | "schemaVersion" | "createdAt" | "name" | "description"
|
|
34
|
+
> & {
|
|
35
|
+
schemaVersion: 1
|
|
36
|
+
createdAt: string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export async function onyxStateDir(root: string) {
|
|
40
|
+
const dir = join(await gitDir(root), "onyx")
|
|
41
|
+
await mkdir(dir, { recursive: true })
|
|
42
|
+
return dir
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function outboxPath(root: string) {
|
|
46
|
+
return join(await onyxStateDir(root), "outbox.jsonl")
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export async function statePath(root: string) {
|
|
50
|
+
return join(await onyxStateDir(root), "state.json")
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export async function lastRunPath(root: string) {
|
|
54
|
+
return join(await onyxStateDir(root), "last-run.json")
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Stable idempotency key persisted with the record and re-sent on every retry. */
|
|
58
|
+
export function clientRunRef(branchName: string) {
|
|
59
|
+
return `local/${branchName}/${randomUUID()}`
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function appendOutbox(root: string, record: LocalResearchRecord) {
|
|
63
|
+
const validated = localResearchRecordSchema.parse(record)
|
|
64
|
+
await writeFile(await outboxPath(root), `${JSON.stringify(validated)}\n`, {
|
|
65
|
+
encoding: "utf8",
|
|
66
|
+
flag: "a",
|
|
67
|
+
})
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Reads queued records. Corrupt or partially-written lines are skipped and
|
|
72
|
+
* counted rather than thrown, so a crash mid-append never wedges the outbox.
|
|
73
|
+
*/
|
|
74
|
+
export async function readOutbox(
|
|
75
|
+
root: string
|
|
76
|
+
): Promise<{ records: LocalResearchRecord[]; corrupt: number }> {
|
|
77
|
+
let text = ""
|
|
78
|
+
try {
|
|
79
|
+
text = await readFile(await outboxPath(root), "utf8")
|
|
80
|
+
} catch {
|
|
81
|
+
return { records: [], corrupt: 0 }
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const records: LocalResearchRecord[] = []
|
|
85
|
+
let corrupt = 0
|
|
86
|
+
|
|
87
|
+
for (const line of text.split("\n")) {
|
|
88
|
+
const trimmed = line.trim()
|
|
89
|
+
if (!trimmed) continue
|
|
90
|
+
let parsed: unknown
|
|
91
|
+
try {
|
|
92
|
+
parsed = JSON.parse(trimmed)
|
|
93
|
+
} catch {
|
|
94
|
+
corrupt += 1
|
|
95
|
+
continue
|
|
96
|
+
}
|
|
97
|
+
const result = localResearchRecordSchema.safeParse(parsed)
|
|
98
|
+
if (result.success) {
|
|
99
|
+
records.push(result.data)
|
|
100
|
+
} else {
|
|
101
|
+
corrupt += 1
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { records, corrupt }
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** Atomically replaces the outbox with the still-pending records. */
|
|
109
|
+
export async function rewriteOutbox(
|
|
110
|
+
root: string,
|
|
111
|
+
records: LocalResearchRecord[]
|
|
112
|
+
) {
|
|
113
|
+
const path = await outboxPath(root)
|
|
114
|
+
const body = records.map((record) => JSON.stringify(record)).join("\n")
|
|
115
|
+
const tmp = `${path}.tmp`
|
|
116
|
+
await writeFile(tmp, body ? `${body}\n` : "", "utf8")
|
|
117
|
+
await rename(tmp, path)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export async function readState(root: string): Promise<CliState> {
|
|
121
|
+
try {
|
|
122
|
+
const parsed = JSON.parse(
|
|
123
|
+
await readFile(await statePath(root), "utf8")
|
|
124
|
+
) as Partial<CliState>
|
|
125
|
+
return {
|
|
126
|
+
projectId: parsed.projectId,
|
|
127
|
+
projectPath: parsed.projectPath,
|
|
128
|
+
branches: parsed.branches ?? {},
|
|
129
|
+
}
|
|
130
|
+
} catch {
|
|
131
|
+
return { branches: {} }
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export async function writeState(root: string, state: CliState) {
|
|
136
|
+
const path = await statePath(root)
|
|
137
|
+
const tmp = `${path}.tmp`
|
|
138
|
+
await writeFile(tmp, `${JSON.stringify(state, null, 2)}\n`, "utf8")
|
|
139
|
+
await rename(tmp, path)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export async function readLastRun(root: string): Promise<LastRunRecord | null> {
|
|
143
|
+
try {
|
|
144
|
+
const parsed = JSON.parse(await readFile(await lastRunPath(root), "utf8"))
|
|
145
|
+
if (
|
|
146
|
+
parsed &&
|
|
147
|
+
typeof parsed === "object" &&
|
|
148
|
+
parsed.schemaVersion === 1 &&
|
|
149
|
+
typeof parsed.runRef === "string"
|
|
150
|
+
) {
|
|
151
|
+
return parsed as LastRunRecord
|
|
152
|
+
}
|
|
153
|
+
} catch {
|
|
154
|
+
return null
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return null
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export async function writeLastRun(root: string, record: LastRunRecord) {
|
|
161
|
+
const path = await lastRunPath(root)
|
|
162
|
+
const tmp = `${path}.tmp`
|
|
163
|
+
await writeFile(tmp, `${JSON.stringify(record, null, 2)}\n`, "utf8")
|
|
164
|
+
await rename(tmp, path)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export async function clearLastRun(root: string) {
|
|
168
|
+
try {
|
|
169
|
+
await unlink(await lastRunPath(root))
|
|
170
|
+
} catch {
|
|
171
|
+
// no last run to clear
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { spawn } from "node:child_process"
|
|
2
|
+
import { stat } from "node:fs/promises"
|
|
3
|
+
|
|
4
|
+
export type ProcessResult = {
|
|
5
|
+
code: number | null
|
|
6
|
+
stdout: string
|
|
7
|
+
stderr: string
|
|
8
|
+
timedOut: boolean
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function runProcess(
|
|
12
|
+
command: string,
|
|
13
|
+
args: string[],
|
|
14
|
+
options: { cwd?: string; env?: NodeJS.ProcessEnv; timeoutMs?: number } = {}
|
|
15
|
+
): Promise<ProcessResult> {
|
|
16
|
+
return new Promise((resolveProcess, reject) => {
|
|
17
|
+
const child = spawn(command, args, {
|
|
18
|
+
cwd: options.cwd,
|
|
19
|
+
env: options.env,
|
|
20
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
21
|
+
})
|
|
22
|
+
const stdout: Buffer[] = []
|
|
23
|
+
const stderr: Buffer[] = []
|
|
24
|
+
let timedOut = false
|
|
25
|
+
const timeout =
|
|
26
|
+
options.timeoutMs === undefined
|
|
27
|
+
? null
|
|
28
|
+
: setTimeout(() => {
|
|
29
|
+
timedOut = true
|
|
30
|
+
child.kill("SIGTERM")
|
|
31
|
+
}, options.timeoutMs)
|
|
32
|
+
|
|
33
|
+
child.stdout.on("data", (chunk: Buffer) => stdout.push(chunk))
|
|
34
|
+
child.stderr.on("data", (chunk: Buffer) => stderr.push(chunk))
|
|
35
|
+
child.on("error", reject)
|
|
36
|
+
child.on("close", (code) => {
|
|
37
|
+
if (timeout) clearTimeout(timeout)
|
|
38
|
+
resolveProcess({
|
|
39
|
+
code,
|
|
40
|
+
stdout: Buffer.concat(stdout).toString("utf8"),
|
|
41
|
+
stderr: Buffer.concat(stderr).toString("utf8"),
|
|
42
|
+
timedOut,
|
|
43
|
+
})
|
|
44
|
+
})
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function commandOutput(
|
|
49
|
+
command: string,
|
|
50
|
+
args: string[],
|
|
51
|
+
cwd?: string
|
|
52
|
+
) {
|
|
53
|
+
const result = await runProcess(command, args, { cwd })
|
|
54
|
+
if (result.code !== 0 || result.timedOut) {
|
|
55
|
+
throw new Error(
|
|
56
|
+
`${command} ${args.join(" ")} failed: ${
|
|
57
|
+
result.timedOut
|
|
58
|
+
? "timed out"
|
|
59
|
+
: result.stderr.trim() || result.stdout.trim()
|
|
60
|
+
}`
|
|
61
|
+
)
|
|
62
|
+
}
|
|
63
|
+
return result.stdout.trim()
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export async function pathExists(path: string) {
|
|
67
|
+
try {
|
|
68
|
+
await stat(path)
|
|
69
|
+
return true
|
|
70
|
+
} catch {
|
|
71
|
+
return false
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { join } from "node:path"
|
|
2
|
+
|
|
3
|
+
import type { Args } from "./args"
|
|
4
|
+
import { readState } from "./outbox"
|
|
5
|
+
|
|
6
|
+
export const ONYX_DIR = "onyx"
|
|
7
|
+
|
|
8
|
+
export function normalizeProjectPath(value?: string | null) {
|
|
9
|
+
const path = (value ?? "").trim().replace(/^\/+|\/+$/g, "")
|
|
10
|
+
if (
|
|
11
|
+
path.includes("\0") ||
|
|
12
|
+
path.split("/").some((segment) => segment === "..")
|
|
13
|
+
) {
|
|
14
|
+
throw new Error("--project-path must be a relative path without '..'")
|
|
15
|
+
}
|
|
16
|
+
return path
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function resolveProjectPath(root: string, args: Args) {
|
|
20
|
+
if (args.options["project-path"] !== undefined) {
|
|
21
|
+
return normalizeProjectPath(args.options["project-path"])
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const state = await readState(root)
|
|
25
|
+
return normalizeProjectPath(state.projectPath)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function scopedRoot(root: string, projectPath: string) {
|
|
29
|
+
return projectPath ? join(root, projectPath) : root
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function onyxPath(
|
|
33
|
+
root: string,
|
|
34
|
+
projectPath: string,
|
|
35
|
+
...segments: string[]
|
|
36
|
+
) {
|
|
37
|
+
return join(scopedRoot(root, projectPath), ONYX_DIR, ...segments)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function branchStateKey(projectPath: string, branchName: string) {
|
|
41
|
+
return projectPath ? `${projectPath}:${branchName}` : branchName
|
|
42
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const ONYX_SKILL_MARKDOWN = "---\nname: onyx\ndescription: Drive the Onyx auto research workflow end-to-end. Use when asked to start, resume, or continue Onyx experiments, run auto research, optimize a metric, work on an Onyx branch, /onyx anything, or keep the Onyx platform updated from local research. Handles setup, the autonomous experiment loop, and recording every attempt — successful or failed — to the Onyx platform (queued in a local outbox when offline).\n---\n\n# Onyx Research\n\nDrive an autonomous research loop using the `onyx` CLI as the substrate. You own reasoning, edits, commits, durable notes, and the experiment records the Onyx platform tracks.\n\n## Setup\n\n1. Ask (or infer):\n - **Goal**\n - **Evaluation**\n - **Metric**, **unit**, **direction** (`maximize` / `minimize`)\n - **Files in scope**\n - **Constraints**\n - **Stop conditions** - eg. `stop after N iterations`, `for 30 minutes`, `until <condition>`, default is no stop condition, loop forever until manually stopped by user\n2. `onyx branch create --name <slug> --metric <name> --unit <unit> --direction <maximize/minimize> --description <goal>`\n - Add `--project-path <projectPath>` when the Onyx project is scoped to a subdirectory.\n3. Read the source files. Understand the workload deeply before writing anything.\n4. Write `<projectPath>/onyx/onyx.md` and `<projectPath>/onyx/eval.sh` (see below). Optionally write `<projectPath>/onyx/checks.sh` when correctness constraints require it. Commit these files.\n5. Run a baseline with `onyx exp run`, then record it with `onyx exp log --description \"baseline\" --agent-notes '<json>'`, then start looping immediately.\n\n### `onyx.md`\n\nThis is the heart of the session. A fresh agent with no context should be able to read this file and run the loop effectively. Invest time making it excellent.\n\n```markdown\n# Onyx Research: <goal>\n\n## Objective\n\n<Specific description of what we're optimizing and the workload.>\n\n## Metrics\n\n- **Primary**: <name>, <unit>, <direction> - the optimization target\n- **Secondary**: - independent tradeoff monitors\n - <name>, <unit>, <direction>\n - <name>, <unit>, <direction>\n - ...\n\n## How to Run\n\n`./onyx/eval.sh` - outputs `METRIC name=number` lines.\n\n## Files in Scope\n\n<Every file the agent may modify, with a brief note on what it does.>\n\n## Off Limits\n\n<What must NOT be touched.>\n\n## Constraints\n\n<Hard rules: tests must pass, no new deps, etc.>\n\n## What's Been Tried\n\n<High-level strategy notes only: key wins, dead-end THEMES, and architectural\ninsights. Do not list individual experiments here - the full per-experiment\nrecord lives in `onyx exp list` (searchable offline).>\n```\n\nUpdate `onyx.md` periodically - especially the \"What's Been Tried\" section - so resuming agents have strategic context. For the detailed record of individual attempts, rely on `onyx exp list` instead of duplicating it in `onyx.md`:\n\n- `onyx exp list --limit 20` - recent experiments (newest first) with status and metric.\n- `onyx exp list --grep <regex>` - search names, descriptions, agent notes, and output summaries; e.g. `onyx exp list --grep 'cache|memoiz'` before trying a caching idea.\n- `onyx exp list --status failed --json` - full records (agent notes included) for post-mortems.\n\nThe history cache is hydrated from the Onyx app on `onyx sync`, so after a fresh clone run `onyx sync` once to pull the cross-branch history.\n\n### `eval.sh`\n\nBash script (`set -euo pipefail`) that: pre-checks fast (syntax errors in <1s), runs the benchmark, and outputs structured lines to stdout. Keep the script fast - every second is multiplied by hundreds of experiment runs.\n\n#### Structured output\n\n- `METRIC name=value` - primary metric (must match `onyx branch create`'s `metric name`) and any secondary metrics. Parsed automatically by `onyx exp run`.\n\n#### Design the script to inform optimization\n\nThe script should output **whatever data helps you make better decisions in the next iteration.** Think about what you'll need to see after each experiment run to know where to focus:\n\n- Phase timings when the workload has distinct stages\n- Error counts, failure categories, or test names when checks can fail in different ways\n- Memory usage, cache hit rates, or other runtime diagnostics when relevant\n- Anything domain-specific that would help localize regressions or identify bottlenecks\n\nThe script runs the same code every iteration - but you can **update it during the loop** if you discover you need more signal. Add instrumentation as you learn what matters.\n\n#### Agent experiment side notes via `onyx exp log`\n\nUse `onyx exp log`'s `--agent-notes` flag to annotate each experiment run with **whatever would help the next iteration make a better decision.** Free-form key/value pairs - you decide what's worth recording. Don't repeat the description or raw output; capture what you'd lose after a context reset.\n\nAnnotate failures and crashes heavily. If you don't capture what you tried and why it failed, future iterations will waste time re-discovering the same dead ends.\n\n### `checks.sh` (optional)\n\nBash script (`set -euo pipefail`) for backpressure/correctness checks: tests, types, lint, etc. **Only create this file when the user's constraints require correctness validation** (e.g., \"tests must pass\", \"types must check\").\n\nWhen this file exists:\n\n- Runs automatically after every **passing** benchmark in `onyx exp run`.\n- If checks fail, `onyx exp run` reports it clearly - log as `checks_failed`.\n- Its execution time does **NOT** affect the primary metric.\n- A `checks_failed` result is recorded, but never becomes best.\n- Has a separate timeout (default 300s, configurable via `onyx exp run --checks-timeout <seconds>`).\n\nWhen this file does **not** exist, everything behaves exactly as before - no changes to the loop.\n\n**Keep output minimal.** Only the last 80 lines of checks output are fed back to the agent on failure. Suppress verbose progress/success output and let only errors through. This keeps context lean and helps the agent pinpoint what broke.\n\n```bash\n#!/bin/bash\nset -euo pipefail\n# Example: run tests and typecheck - suppress success output, only show errors\npnpm test --run --reporter=dot 2>&1 | tail -50\npnpm typecheck 2>&1 | grep -i error || true\n```\n\n## Loop Rules\n\n**LOOP FOREVER.** Never ask \"should I continue?\" - the user expects autonomous work.\n\n- **Primary metric is king.** Improved -> build the next experiment from that result. Worse/equal -> leave it recorded and build from the prior best instead. Secondary metrics rarely affect this.\n- **Annotate every run with `--agent-notes`.** Record what you learned - not what you did. What would help the next iteration or a fresh agent resuming this session? Notes are searchable later via `onyx exp list --grep`.\n- **Simpler is better.** Removing code for equal perf = good. Ugly complexity for tiny gain = probably not worth building on.\n- **Don't thrash.** Repeatedly returning to the same idea? Try something structurally different.\n- **Crashes:** fix if trivial, otherwise log and move on. Don't over-invest.\n- **Think longer when stuck.** Re-read source files, study the profiling data, reason about what the CPU is actually doing. The best ideas come from deep understanding, not from trying random variations.\n- **Resuming:** if `onyx.md` exists, read it + git log + `onyx status` + `onyx exp list --limit 20`, continue looping.\n\n**NEVER STOP.** The user may be away for hours. Keep going until interrupted.\n\n## Git Rules\n\nOnyx is append-only: commit every attempt forward on `onyx/{name}`. Do not use `git reset --hard`, auto-revert, or force-push. Experiment metadata is canonical in the Onyx app/API; `.git/onyx/outbox.jsonl` is only an offline retry queue.\n\n## Ideas Backlog\n\nWhen you discover complex but promising optimizations that you won't pursue right now, **append them as bullets to `onyx.ideas.md`**. Don't let good ideas get lost.\n\nOn resume (context limit, crash), check `onyx.ideas.md` - prune stale/tried entries, experiment with the rest. When all paths are exhausted, delete the file and write a final summary.\n\n## User Messages During Experiments\n\nIf the user sends a message while an experiment is running, finish the current `onyx exp run` + `onyx exp log` cycle first, then incorporate their feedback in the next iteration. Don't abandon a running experiment.\n"
|
package/src/lib/skill.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises"
|
|
2
|
+
import { homedir } from "node:os"
|
|
3
|
+
import { dirname, join } from "node:path"
|
|
4
|
+
import { fileURLToPath } from "node:url"
|
|
5
|
+
|
|
6
|
+
import { ONYX_SKILL_MARKDOWN } from "./skill-content"
|
|
7
|
+
|
|
8
|
+
export const ONYX_SKILL_NAME = "onyx"
|
|
9
|
+
|
|
10
|
+
export function defaultSkillInstallRoot() {
|
|
11
|
+
return join(homedir(), ".onyx", "skills")
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function packagedSkillPath() {
|
|
15
|
+
return fileURLToPath(new URL("../../skills/onyx/SKILL.md", import.meta.url))
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async function readPackagedSkill() {
|
|
19
|
+
try {
|
|
20
|
+
return await readFile(packagedSkillPath(), "utf8")
|
|
21
|
+
} catch {
|
|
22
|
+
return ONYX_SKILL_MARKDOWN
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export async function installOnyxSkill({
|
|
27
|
+
dir,
|
|
28
|
+
quiet = false,
|
|
29
|
+
}: {
|
|
30
|
+
dir?: string
|
|
31
|
+
quiet?: boolean
|
|
32
|
+
} = {}) {
|
|
33
|
+
const root = dir ?? defaultSkillInstallRoot()
|
|
34
|
+
const target = join(root, ONYX_SKILL_NAME, "SKILL.md")
|
|
35
|
+
await mkdir(dirname(target), { recursive: true })
|
|
36
|
+
await writeFile(target, await readPackagedSkill(), "utf8")
|
|
37
|
+
if (!quiet) {
|
|
38
|
+
console.log(`Installed Onyx agent skill to ${target}`)
|
|
39
|
+
}
|
|
40
|
+
return target
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export async function displaySkillPath() {
|
|
44
|
+
try {
|
|
45
|
+
await readFile(packagedSkillPath(), "utf8")
|
|
46
|
+
return packagedSkillPath()
|
|
47
|
+
} catch {
|
|
48
|
+
return "embedded:onyx"
|
|
49
|
+
}
|
|
50
|
+
}
|
package/src/lib/sync.ts
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
LocalResearchExperimentLoggedRecord,
|
|
3
|
+
LocalResearchBranchStartedRecord,
|
|
4
|
+
LocalResearchRecord,
|
|
5
|
+
} from "../protocol"
|
|
6
|
+
|
|
7
|
+
import type { Args } from "./args"
|
|
8
|
+
import {
|
|
9
|
+
ApiError,
|
|
10
|
+
listProjectBranches,
|
|
11
|
+
reportExperiment,
|
|
12
|
+
resolveProject,
|
|
13
|
+
upsertBranch,
|
|
14
|
+
type ApiProject,
|
|
15
|
+
} from "./api"
|
|
16
|
+
import { emitEvent } from "./events"
|
|
17
|
+
import { pushBranch } from "./git"
|
|
18
|
+
import { applyHistorySyncUpdates, type HistorySyncUpdate } from "./history"
|
|
19
|
+
import { branchMetadata } from "./markdown"
|
|
20
|
+
import { branchStateKey, normalizeProjectPath } from "./project"
|
|
21
|
+
import {
|
|
22
|
+
readOutbox,
|
|
23
|
+
readState,
|
|
24
|
+
rewriteOutbox,
|
|
25
|
+
writeState,
|
|
26
|
+
type CliState,
|
|
27
|
+
} from "./outbox"
|
|
28
|
+
|
|
29
|
+
export type FlushResult = {
|
|
30
|
+
flushed: number
|
|
31
|
+
pending: number
|
|
32
|
+
offline: boolean
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function secondaryMetrics(record: LocalResearchExperimentLoggedRecord) {
|
|
36
|
+
const rest: Record<string, number> = { ...record.metrics }
|
|
37
|
+
delete rest[record.primaryMetricName]
|
|
38
|
+
return rest
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function upsertBranchFromMetadata({
|
|
42
|
+
projectId,
|
|
43
|
+
root,
|
|
44
|
+
branchName,
|
|
45
|
+
gitBranchName,
|
|
46
|
+
state,
|
|
47
|
+
args,
|
|
48
|
+
}: {
|
|
49
|
+
projectId: string
|
|
50
|
+
root: string
|
|
51
|
+
branchName: string
|
|
52
|
+
gitBranchName: string
|
|
53
|
+
state: CliState
|
|
54
|
+
args: Args
|
|
55
|
+
}): Promise<string> {
|
|
56
|
+
const projectPath = state.projectPath ?? ""
|
|
57
|
+
const key = branchStateKey(projectPath, branchName)
|
|
58
|
+
const cached = state.branches[key]?.branchId
|
|
59
|
+
if (cached) return cached
|
|
60
|
+
|
|
61
|
+
const meta = await branchMetadata({
|
|
62
|
+
root,
|
|
63
|
+
projectPath,
|
|
64
|
+
branchName,
|
|
65
|
+
gitBranchName,
|
|
66
|
+
})
|
|
67
|
+
const branch = await upsertBranch(
|
|
68
|
+
projectId,
|
|
69
|
+
{
|
|
70
|
+
name: branchName,
|
|
71
|
+
description: meta.description ?? undefined,
|
|
72
|
+
gitBranchName,
|
|
73
|
+
baseCommitSha: meta.baseCommitSha ?? undefined,
|
|
74
|
+
metricName: meta.metricName,
|
|
75
|
+
metricUnit: meta.metricUnit ?? undefined,
|
|
76
|
+
metricDirection: meta.metricDirection,
|
|
77
|
+
},
|
|
78
|
+
args
|
|
79
|
+
)
|
|
80
|
+
state.branches[key] = { ...state.branches[key], branchId: branch.id }
|
|
81
|
+
return branch.id
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function flushBranchStarted({
|
|
85
|
+
projectId,
|
|
86
|
+
record,
|
|
87
|
+
state,
|
|
88
|
+
args,
|
|
89
|
+
}: {
|
|
90
|
+
projectId: string
|
|
91
|
+
record: LocalResearchBranchStartedRecord
|
|
92
|
+
state: CliState
|
|
93
|
+
args: Args
|
|
94
|
+
}) {
|
|
95
|
+
if (record.projectPath !== undefined) {
|
|
96
|
+
state.projectPath = record.projectPath
|
|
97
|
+
}
|
|
98
|
+
const projectPath = state.projectPath ?? ""
|
|
99
|
+
const branch = await upsertBranch(
|
|
100
|
+
projectId,
|
|
101
|
+
{
|
|
102
|
+
name: record.name,
|
|
103
|
+
description: record.description ?? undefined,
|
|
104
|
+
gitBranchName: record.gitBranchName,
|
|
105
|
+
baseCommitSha: record.baseCommitSha ?? undefined,
|
|
106
|
+
metricName: record.metricName,
|
|
107
|
+
metricUnit: record.metricUnit ?? undefined,
|
|
108
|
+
metricDirection: record.metricDirection,
|
|
109
|
+
},
|
|
110
|
+
args
|
|
111
|
+
)
|
|
112
|
+
const key = branchStateKey(projectPath, record.name)
|
|
113
|
+
state.branches[key] = {
|
|
114
|
+
...state.branches[key],
|
|
115
|
+
branchId: branch.id,
|
|
116
|
+
projectPath,
|
|
117
|
+
gitBranchName: record.gitBranchName,
|
|
118
|
+
baseCommitSha: record.baseCommitSha ?? null,
|
|
119
|
+
description: record.description ?? null,
|
|
120
|
+
metricName: record.metricName,
|
|
121
|
+
metricUnit: record.metricUnit ?? null,
|
|
122
|
+
metricDirection: record.metricDirection,
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async function flushExperiment({
|
|
127
|
+
projectId,
|
|
128
|
+
root,
|
|
129
|
+
record,
|
|
130
|
+
state,
|
|
131
|
+
args,
|
|
132
|
+
}: {
|
|
133
|
+
projectId: string
|
|
134
|
+
root: string
|
|
135
|
+
record: LocalResearchExperimentLoggedRecord
|
|
136
|
+
state: CliState
|
|
137
|
+
args: Args
|
|
138
|
+
}): Promise<HistorySyncUpdate> {
|
|
139
|
+
if (record.projectPath !== undefined) {
|
|
140
|
+
state.projectPath = record.projectPath
|
|
141
|
+
}
|
|
142
|
+
const branchId = await upsertBranchFromMetadata({
|
|
143
|
+
projectId,
|
|
144
|
+
root,
|
|
145
|
+
branchName: record.branchName,
|
|
146
|
+
gitBranchName: record.gitBranchName,
|
|
147
|
+
state,
|
|
148
|
+
args,
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
const reported = await reportExperiment(
|
|
152
|
+
branchId,
|
|
153
|
+
{
|
|
154
|
+
name: record.name,
|
|
155
|
+
description: record.description ?? undefined,
|
|
156
|
+
runRef: record.runRef,
|
|
157
|
+
commitSha: record.commitSha,
|
|
158
|
+
status: record.status,
|
|
159
|
+
primaryMetricName: record.primaryMetricName,
|
|
160
|
+
primaryMetricValue: record.primaryMetricValue ?? undefined,
|
|
161
|
+
secondaryMetrics: secondaryMetrics(record),
|
|
162
|
+
artifactRefs: {},
|
|
163
|
+
agentNotes: record.agentNotes,
|
|
164
|
+
checks: record.checks ?? undefined,
|
|
165
|
+
durationMs: record.durationMs ?? undefined,
|
|
166
|
+
outputSummary: record.outputSummary ?? undefined,
|
|
167
|
+
startedAt: record.startedAt ?? undefined,
|
|
168
|
+
completedAt: record.completedAt ?? undefined,
|
|
169
|
+
},
|
|
170
|
+
args
|
|
171
|
+
)
|
|
172
|
+
return {
|
|
173
|
+
sequenceNumber: reported.sequenceNumber,
|
|
174
|
+
experimentId: reported.id,
|
|
175
|
+
branchId,
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Replays the local outbox to the Onyx API. Resolves the project, pushes every
|
|
181
|
+
* referenced branch so reported commits are reachable, then upserts branches and
|
|
182
|
+
* reports experiments idempotently (server dedups by runRef). A 409 means the
|
|
183
|
+
* commit is not on the mirror yet (push/propagation lag) and is retried on the
|
|
184
|
+
* next flush; any other error keeps the record queued and is surfaced. Offline
|
|
185
|
+
* (project unresolvable) leaves the outbox untouched.
|
|
186
|
+
*/
|
|
187
|
+
export async function flushOutbox(
|
|
188
|
+
root: string,
|
|
189
|
+
args: Args,
|
|
190
|
+
options: { quiet?: boolean } = {}
|
|
191
|
+
): Promise<FlushResult> {
|
|
192
|
+
const { records, corrupt } = await readOutbox(root)
|
|
193
|
+
if (corrupt > 0 && !options.quiet) {
|
|
194
|
+
console.warn(`Skipped ${corrupt} unreadable outbox record(s).`)
|
|
195
|
+
}
|
|
196
|
+
if (records.length === 0) {
|
|
197
|
+
return { flushed: 0, pending: 0, offline: false }
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const state = await readState(root)
|
|
201
|
+
const requestedProjectPath = normalizeProjectPath(
|
|
202
|
+
args.options["project-path"]
|
|
203
|
+
)
|
|
204
|
+
const queuedProjectPath = records.find(
|
|
205
|
+
(record) => record.projectPath !== undefined
|
|
206
|
+
)?.projectPath
|
|
207
|
+
state.projectPath =
|
|
208
|
+
requestedProjectPath || queuedProjectPath || state.projectPath
|
|
209
|
+
await writeState(root, state)
|
|
210
|
+
|
|
211
|
+
let project: ApiProject
|
|
212
|
+
try {
|
|
213
|
+
project = await resolveProject(root, args)
|
|
214
|
+
} catch (error) {
|
|
215
|
+
if (!options.quiet) {
|
|
216
|
+
console.log(
|
|
217
|
+
`${records.length} record(s) queued locally; not synced (${
|
|
218
|
+
error instanceof Error ? error.message : String(error)
|
|
219
|
+
})`
|
|
220
|
+
)
|
|
221
|
+
}
|
|
222
|
+
return { flushed: 0, pending: records.length, offline: true }
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
state.projectId = project.id
|
|
226
|
+
|
|
227
|
+
// Push every referenced branch up front so reported commits are reachable.
|
|
228
|
+
for (const branch of new Set(records.map((record) => record.gitBranchName))) {
|
|
229
|
+
try {
|
|
230
|
+
await pushBranch(root, branch)
|
|
231
|
+
} catch {
|
|
232
|
+
// Keep going: the report 409s and is retried if the commit is unreachable.
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Pre-populate branch ids so concurrently-created branches resolve without upsert.
|
|
237
|
+
try {
|
|
238
|
+
for (const branch of await listProjectBranches(project.id, args)) {
|
|
239
|
+
const key = branchStateKey(state.projectPath ?? "", branch.name)
|
|
240
|
+
state.branches[key] = { ...state.branches[key], branchId: branch.id }
|
|
241
|
+
}
|
|
242
|
+
} catch {
|
|
243
|
+
// best-effort
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const remaining: LocalResearchRecord[] = []
|
|
247
|
+
const historyUpdates = new Map<string, HistorySyncUpdate>()
|
|
248
|
+
let flushed = 0
|
|
249
|
+
|
|
250
|
+
for (const record of records) {
|
|
251
|
+
try {
|
|
252
|
+
if (record.type === "branch_started") {
|
|
253
|
+
await flushBranchStarted({ projectId: project.id, record, state, args })
|
|
254
|
+
} else {
|
|
255
|
+
const update = await flushExperiment({
|
|
256
|
+
projectId: project.id,
|
|
257
|
+
root,
|
|
258
|
+
record,
|
|
259
|
+
state,
|
|
260
|
+
args,
|
|
261
|
+
})
|
|
262
|
+
historyUpdates.set(record.runRef, update)
|
|
263
|
+
}
|
|
264
|
+
flushed += 1
|
|
265
|
+
} catch (error) {
|
|
266
|
+
if (!(error instanceof ApiError) || error.status !== 409) {
|
|
267
|
+
if (!options.quiet) {
|
|
268
|
+
console.warn(
|
|
269
|
+
`Keeping queued record after error: ${
|
|
270
|
+
error instanceof Error ? error.message : String(error)
|
|
271
|
+
}`
|
|
272
|
+
)
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
remaining.push(record)
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
await rewriteOutbox(root, remaining)
|
|
280
|
+
await writeState(root, state)
|
|
281
|
+
// Stamp server-assigned sequence numbers onto the local history cache so
|
|
282
|
+
// the TUI shows them immediately. Best-effort: hydration also covers this.
|
|
283
|
+
await applyHistorySyncUpdates(root, historyUpdates).catch(() => {})
|
|
284
|
+
await emitEvent(root, {
|
|
285
|
+
type: "flush_finished",
|
|
286
|
+
message: `synced ${flushed}, pending ${remaining.length}`,
|
|
287
|
+
})
|
|
288
|
+
|
|
289
|
+
if (!options.quiet) {
|
|
290
|
+
console.log(`Synced ${flushed} record(s); ${remaining.length} pending.`)
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return { flushed, pending: remaining.length, offline: false }
|
|
294
|
+
}
|