opencode-goal-mode 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +31 -0
- package/CHANGELOG.md +18 -0
- package/README.md +64 -24
- package/benchmarks/build-external-corpus.mjs +177 -0
- package/benchmarks/external-corpus.json +3540 -0
- package/benchmarks/external.mjs +110 -0
- package/benchmarks/run.mjs +78 -24
- package/commands/goal.md +16 -1
- package/docs/benchmarks/detection-by-family.svg +2 -2
- package/docs/benchmarks/external-scorecard.svg +32 -0
- package/docs/benchmarks/latency.svg +3 -3
- package/docs/benchmarks/overall-scorecard.svg +2 -2
- package/docs/benchmarks/results.json +112 -71
- package/docs/benchmarks/truthfulness-score.svg +2 -2
- package/package.json +3 -1
- package/plugins/goal-guard/config.js +9 -0
- package/plugins/goal-guard/shell.js +4 -3
- package/plugins/goal-guard/sidebar-data.js +71 -0
- package/plugins/goal-guard/summary.js +34 -0
- package/plugins/goal-guard/tools.js +8 -2
- package/plugins/goal-guard.js +13 -0
- package/plugins/goal-sidebar.js +141 -0
- package/research/benchmarks.md +75 -69
package/ARCHITECTURE.md
CHANGED
|
@@ -15,6 +15,10 @@ configuration directory:
|
|
|
15
15
|
— a runtime guard that enforces review discipline, blocks destructive shell
|
|
16
16
|
commands, preserves state across compaction and restarts, and exposes
|
|
17
17
|
first-class `goal_*` tools.
|
|
18
|
+
4. **An experimental TUI companion** (`plugins/goal-sidebar.js`) — a separate
|
|
19
|
+
`{ tui }` plugin module that renders the active goal as a yellow sidebar
|
|
20
|
+
banner. It is *paired* with the server plugin purely through the on-disk state
|
|
21
|
+
snapshot (no extra IPC) and no-ops on any runtime without the slot API.
|
|
18
22
|
|
|
19
23
|
This document focuses on the plugin, where the engineering lives.
|
|
20
24
|
|
|
@@ -48,7 +52,9 @@ as plugins. Each module is independently unit-tested.
|
|
|
48
52
|
| `goal-guard/events.js` | Shared edit/verification/evidence mutators. |
|
|
49
53
|
| `goal-guard/summary.js` | State summaries, status reports, and evidence-map projections. |
|
|
50
54
|
| `goal-guard/system.js` | Live state block injected into the system prompt. |
|
|
55
|
+
| `goal-guard/summary.js` | Status/evidence projections, the short goal label, and the sidebar view. |
|
|
51
56
|
| `goal-guard/tools.js` | The `goal_status` / `goal_evidence_map` / `goal_reviewer_memory` / `goal_contract` / `goal_evidence` / `goal_reset` tools. |
|
|
57
|
+
| `goal-guard/sidebar-data.js` | Pure reader that projects the persisted snapshot into the sidebar banner model. |
|
|
52
58
|
| `goal-guard/logger.js` | Best-effort logging/toasts over the OpenCode client. |
|
|
53
59
|
|
|
54
60
|
## Hooks used
|
|
@@ -157,6 +163,25 @@ The `@opencode-ai/plugin` import they need is isolated to `tools.js` and loaded
|
|
|
157
163
|
via a guarded dynamic import, so if the host cannot resolve it the core guard
|
|
158
164
|
hooks still load.
|
|
159
165
|
|
|
166
|
+
## TUI companion (experimental)
|
|
167
|
+
|
|
168
|
+
`plugins/goal-sidebar.js` is a TUI plugin module — `export const tui = async (api)
|
|
169
|
+
=> …` — distinct from the server plugin (`@opencode-ai/plugin` types it as a
|
|
170
|
+
`{ tui }` module, mutually exclusive with `{ server }`). It registers a
|
|
171
|
+
`sidebar_content` slot via `api.slots.register({ slots: { sidebar_content } })`
|
|
172
|
+
and renders, in the configured colour (`#FFD700` by default), the short goal
|
|
173
|
+
label plus a `passing/total gates · dirty/ready` line.
|
|
174
|
+
|
|
175
|
+
It is *paired* with the server plugin only through the persisted state file:
|
|
176
|
+
`sidebar-data.js` recomputes the same `stateBaseDir`/`projectKey` path the guard
|
|
177
|
+
writes to and projects the active session via `summary.sidebarView`. That keeps
|
|
178
|
+
the pure projection logic Node-testable (`tests/sidebar.test.mjs`) even though the
|
|
179
|
+
JSX renderer itself can only run inside OpenCode's (Bun) TUI runtime. Everything
|
|
180
|
+
in the `tui` entry is wrapped so a missing slot API, missing JSX runtime, or read
|
|
181
|
+
error degrades to rendering nothing — it can never break the TUI. The server plugin
|
|
182
|
+
also emits review-verdict and completion-unlock toasts (`toastOnReview`) so review
|
|
183
|
+
progress is visible even without the banner.
|
|
184
|
+
|
|
160
185
|
## Configuration
|
|
161
186
|
|
|
162
187
|
`config.js` merges, in increasing precedence: built-in defaults, environment
|
|
@@ -182,8 +207,14 @@ manifest of the file hashes it wrote. On upgrade it distinguishes files it owns
|
|
|
182
207
|
- `tests/plugin.test.mjs` — hook behavior, gating, verdicts, completion, tools, isolation.
|
|
183
208
|
- `tests/truthfulness-benchmark.test.mjs` — false-completion corpus and truthfulness scoring.
|
|
184
209
|
- `tests/state.test.mjs` — store, seq ordering, eviction, persistence round-trips.
|
|
210
|
+
- `tests/sidebar.test.mjs` — short goal label, sidebar projection, snapshot reader, new destructive bins.
|
|
211
|
+
- `tests/toast.test.mjs` — review-verdict and completion-unlock toasts.
|
|
185
212
|
- `tests/agents.test.mjs` / `tests/commands.test.mjs` — frontmatter and contracts.
|
|
186
213
|
- `tests/install.test.mjs` — recursive copy, manifest upgrades, uninstall.
|
|
187
214
|
|
|
215
|
+
The shell guard's headline accuracy is measured on an external, third-party
|
|
216
|
+
corpus (`benchmarks/external.mjs` over `external-corpus.json`), not on the curated
|
|
217
|
+
fixtures — see [research/benchmarks.md](research/benchmarks.md).
|
|
218
|
+
|
|
188
219
|
`npm run validate` runs the tests, the structural config validator, the publish
|
|
189
220
|
readiness check, and an `npm pack --dry-run`.
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## v0.3.0
|
|
4
|
+
|
|
5
|
+
- Honest benchmarks: add an EXTERNAL corpus of 704 real third-party commands from
|
|
6
|
+
tldr-pages (`benchmarks/external.mjs`, `npm run bench:external`) as the headline
|
|
7
|
+
detection/false-positive measure (93.3% vs 53.8% legacy; ~0% real false
|
|
8
|
+
positives). Reframe the curated 71-command set and 9 completion cases as
|
|
9
|
+
regression *fixtures*, not measured accuracy, and reword the README/charts to
|
|
10
|
+
stop overclaiming.
|
|
11
|
+
- Stronger guard: block `mkfs.<fstype>` variants, `srm`, and `mkswap`
|
|
12
|
+
(genuine destructive commands the external corpus exposed as misses).
|
|
13
|
+
- Deeper TUI embedding: toast on each review verdict (PASS/FAIL) and once when the
|
|
14
|
+
last required gate clears (`toastOnReview`); `goal_status` now surfaces the goal.
|
|
15
|
+
- Experimental TUI sidebar banner (`plugins/goal-sidebar.js`): the active goal in
|
|
16
|
+
shining yellow with a live gate-status line, paired with the guard via persisted
|
|
17
|
+
state. No-ops on any runtime without the TUI slot API. New options
|
|
18
|
+
`sidebarBanner` / `sidebarColor` (`GOAL_GUARD_SIDEBAR_*`).
|
|
19
|
+
- Tighter `/goal` flow that seeds the Goal Contract via the `goal_contract` tool.
|
|
20
|
+
|
|
3
21
|
## v0.2.4
|
|
4
22
|
|
|
5
23
|
- Add Reviewer Memory for unresolved/resolved reviewer findings across cycles.
|
package/README.md
CHANGED
|
@@ -38,37 +38,50 @@ honest caveats, in [research/goal-mode-comparison.md](research/goal-mode-compari
|
|
|
38
38
|
- **Destructive commands are blocked by a real shell tokenizer**, not a regex.
|
|
39
39
|
Claude Code's own docs call Bash argument-matching *"fragile"*.
|
|
40
40
|
|
|
41
|
-
### Benchmarks
|
|
41
|
+
### Benchmarks (honest edition)
|
|
42
42
|
|
|
43
|
-
The
|
|
44
|
-
|
|
45
|
-
|
|
43
|
+
The headline number is measured on commands **the analyzer was never fitted to**:
|
|
44
|
+
704 real example commands from [tldr-pages](https://github.com/tldr-pages/tldr)
|
|
45
|
+
(common/linux/osx), authored by hundreds of contributors who have never seen
|
|
46
|
+
this guard. Ground-truth labels come from a deliberately simple, analyzer-*independent*
|
|
47
|
+
rule (see [build-external-corpus.mjs](benchmarks/build-external-corpus.mjs)).
|
|
48
|
+
Reproduce with `npm run bench` or `node benchmarks/external.mjs`.
|
|
46
49
|
|
|
47
|
-

|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
| | Legacy regex guard | Goal Mode analyzer |
|
|
52
|
+
| On 704 real third-party commands | Legacy regex guard | Goal Mode analyzer |
|
|
52
53
|
| --- | --- | --- |
|
|
53
|
-
| Destructive-command detection |
|
|
54
|
-
| False positives on safe commands |
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
The
|
|
59
|
-
|
|
60
|
-
|
|
54
|
+
| Destructive-command detection | 53.8% | **93.3%** |
|
|
55
|
+
| False positives on safe commands | 0.2% | **0.2%** |
|
|
56
|
+
|
|
57
|
+
Honest caveats, because the point of this rewrite was to stop overclaiming:
|
|
58
|
+
|
|
59
|
+
- The ~7 remaining "misses" are almost all un-flagged single-target `rm <file>`,
|
|
60
|
+
which the guard **intentionally permits** (plain `rm` is common and the guard
|
|
61
|
+
blocks `rm -r`/`rm -f`, `$(rm …)`, `bash -c`, interpreters, etc.). Under a
|
|
62
|
+
strict every-`rm`-is-destructive labeling those count against it.
|
|
63
|
+
- The single counted false positive (`git filter-repo …`) actually *is* a
|
|
64
|
+
history-rewriting command, so the real-world false-positive rate is effectively
|
|
65
|
+
zero. `node benchmarks/external.mjs --json` lists every miss and false positive
|
|
66
|
+
so you can audit the disagreements yourself.
|
|
67
|
+
|
|
68
|
+
Two **curated fixture sets** also ship — and they are explicitly *fixtures*, not
|
|
69
|
+
an unbiased benchmark. They define the patterns the analyzer must catch and guard
|
|
70
|
+
against regressions, so they pass by construction; do not read the 100%/0% there
|
|
71
|
+
as measured accuracy:
|
|
72
|
+
|
|
73
|
+
- `benchmarks/corpus.mjs` — 71 destructive patterns (incl. `$(…)`, `bash -c`,
|
|
74
|
+
`sudo -u`, `/bin/rm`, `git -C … reset --hard`, `curl | sh`, interpreter
|
|
75
|
+
deletes) and their safe look-alikes (`git checkout -b`, `echo "rm -rf /"`).
|
|
76
|
+
- `benchmarks/completion-corpus.mjs` — 9 completion-claim policy cases (missing
|
|
77
|
+
review-cycle line, stale review after edit, missing contextual gate, inactive
|
|
78
|
+
session, custom marker). `npm run bench:truthfulness` prints them.
|
|
79
|
+
|
|
80
|
+
The analysis costs ~1µs per command (hundreds of thousands of classifications per
|
|
81
|
+
second) — negligible for a per-tool-call guard:
|
|
61
82
|
|
|
62
83
|

|
|
63
84
|
|
|
64
|
-
Goal Mode also ships a **False Completion Dataset** for completion-claim
|
|
65
|
-
truthfulness: `npm run bench` regenerates the scorecard and
|
|
66
|
-
`npm run bench:truthfulness` prints the labeled-case JSON for premature and valid
|
|
67
|
-
completion claims, including missing review-cycle lines, stale reviews after
|
|
68
|
-
edits, missing contextual gates, inactive sessions, and custom completion markers.
|
|
69
|
-
|
|
70
|
-

|
|
71
|
-
|
|
72
85
|
## Requirements
|
|
73
86
|
|
|
74
87
|
- Node.js 20.11 or newer.
|
|
@@ -98,9 +111,33 @@ edits, missing contextual gates, inactive sessions, and custom completion marker
|
|
|
98
111
|
`goal_reviewer_memory`, `goal_status`, `goal_reset`.
|
|
99
112
|
- **Live state injection** into the system prompt so the model always knows
|
|
100
113
|
what the guard requires.
|
|
114
|
+
- **TUI toasts**: a toast on each review verdict (PASS/FAIL) and a single
|
|
115
|
+
"completion unlocked" toast the moment the last required gate clears.
|
|
116
|
+
- An **experimental** companion TUI plugin (`plugins/goal-sidebar.js`) that shows
|
|
117
|
+
the active goal as a shining-yellow banner in the sidebar with a compact gate
|
|
118
|
+
status line. See [TUI integration](#tui-integration).
|
|
101
119
|
- A test suite validating the analyzer, plugin hooks, state store, install
|
|
102
120
|
safety, and config compatibility.
|
|
103
121
|
|
|
122
|
+
## TUI integration
|
|
123
|
+
|
|
124
|
+
Goal Mode is a **plugin pair**: the server-side `goal-guard` plugin owns
|
|
125
|
+
enforcement and writes its state to disk, and an experimental TUI plugin
|
|
126
|
+
(`plugins/goal-sidebar.js`) reads that same state to render a live banner.
|
|
127
|
+
|
|
128
|
+
- **Sidebar goal banner (experimental).** The current goal renders in shining
|
|
129
|
+
yellow in the sidebar (`sidebar_content` slot), with a `passing/total gates ·
|
|
130
|
+
dirty/ready` status line, and updates as reviews land. It requires a
|
|
131
|
+
TUI-plugin-capable OpenCode (one exposing `api.slots.register`); on any older
|
|
132
|
+
runtime it silently no-ops, so it can never break your TUI. Set
|
|
133
|
+
`sidebarBanner: false` (or `GOAL_GUARD_SIDEBAR_BANNER=0`) to disable, or
|
|
134
|
+
`sidebarColor` to recolour it. Because no local environment can run OpenCode's
|
|
135
|
+
TUI runtime, this banner is shipped best-effort and should be verified in your
|
|
136
|
+
own TUI.
|
|
137
|
+
- **Toasts.** Review verdicts and completion-unlock events surface as toasts
|
|
138
|
+
(`toastOnReview`), and blocked destructive commands / premature completions
|
|
139
|
+
toast as before (`toastOnBlock`).
|
|
140
|
+
|
|
104
141
|
## Install globally
|
|
105
142
|
|
|
106
143
|
```bash
|
|
@@ -162,6 +199,9 @@ Or via environment variables (`GOAL_GUARD_*`):
|
|
|
162
199
|
| `maxSessions` / `GOAL_GUARD_MAX_SESSIONS` | `200` | Session cache size. |
|
|
163
200
|
| `sessionTtlMs` / `GOAL_GUARD_SESSION_TTL_MS` | `86400000` | Idle session TTL. |
|
|
164
201
|
| `toastOnBlock` / `GOAL_GUARD_TOAST_ON_BLOCK` | `true` | Toast when something is blocked. |
|
|
202
|
+
| `toastOnReview` / `GOAL_GUARD_TOAST_ON_REVIEW` | `true` | Toast on each review verdict and when completion unlocks. |
|
|
203
|
+
| `sidebarBanner` / `GOAL_GUARD_SIDEBAR_BANNER` | `true` | Show the experimental yellow goal banner in the TUI sidebar. |
|
|
204
|
+
| `sidebarColor` / `GOAL_GUARD_SIDEBAR_COLOR` | `#FFD700` | Foreground colour of the sidebar goal banner. |
|
|
165
205
|
|
|
166
206
|
## Custom tools
|
|
167
207
|
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Build an EXTERNAL, third-party-authored shell-command corpus for the guard
|
|
4
|
+
* benchmark, so the reported detection / false-positive numbers measure
|
|
5
|
+
* real-world behavior instead of a self-authored set the analyzer was tuned on.
|
|
6
|
+
*
|
|
7
|
+
* Source: the tldr-pages project (https://github.com/tldr-pages/tldr, CC-BY).
|
|
8
|
+
* Every example command in the English `common`, `linux`, and `osx` pages is a
|
|
9
|
+
* real invocation documented by hundreds of contributors who have never seen
|
|
10
|
+
* this analyzer — so the analyzer cannot have been fitted to them.
|
|
11
|
+
*
|
|
12
|
+
* Ground-truth labels come from `labelDestructive()` below: a deliberately
|
|
13
|
+
* SIMPLE, transparent rule based on the primary utility and a fixed list of
|
|
14
|
+
* irreversible operations. It is intentionally independent of the analyzer's
|
|
15
|
+
* own classification logic. It is not perfect (no automatic labeler is) — the
|
|
16
|
+
* benchmark reports raw agreement and discloses the labeler so disagreements
|
|
17
|
+
* are auditable rather than hidden.
|
|
18
|
+
*
|
|
19
|
+
* Usage:
|
|
20
|
+
* node benchmarks/build-external-corpus.mjs --tldr /path/to/tldr [--limit 600]
|
|
21
|
+
* TLDR_DIR=/path/to/tldr node benchmarks/build-external-corpus.mjs
|
|
22
|
+
*
|
|
23
|
+
* Writes benchmarks/external-corpus.json (committed, so `npm run bench` is
|
|
24
|
+
* reproducible without a tldr checkout). Re-run this to regenerate it.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { readFileSync, readdirSync, writeFileSync, existsSync } from "node:fs";
|
|
28
|
+
import { join, dirname } from "node:path";
|
|
29
|
+
import { fileURLToPath } from "node:url";
|
|
30
|
+
import { parseArgs } from "node:util";
|
|
31
|
+
|
|
32
|
+
const { values } = parseArgs({
|
|
33
|
+
options: {
|
|
34
|
+
tldr: { type: "string" },
|
|
35
|
+
limit: { type: "string", default: "600" },
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
40
|
+
const tldrDir = values.tldr || process.env.TLDR_DIR;
|
|
41
|
+
const safeLimit = Math.max(50, Number.parseInt(values.limit, 10) || 600);
|
|
42
|
+
|
|
43
|
+
if (!tldrDir || !existsSync(tldrDir)) {
|
|
44
|
+
console.error(
|
|
45
|
+
"Need a tldr-pages checkout. Pass --tldr <dir> or set TLDR_DIR.\n" +
|
|
46
|
+
" git clone --depth 1 https://github.com/tldr-pages/tldr.git",
|
|
47
|
+
);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Pinned provenance for reproducibility — resolves a symbolic HEAD to its SHA. */
|
|
52
|
+
function tldrCommit() {
|
|
53
|
+
try {
|
|
54
|
+
const head = readFileSync(join(tldrDir, ".git", "HEAD"), "utf8").trim();
|
|
55
|
+
const ref = head.match(/^ref:\s*(.+)$/);
|
|
56
|
+
if (!ref) return head;
|
|
57
|
+
return readFileSync(join(tldrDir, ".git", ref[1]), "utf8").trim();
|
|
58
|
+
} catch {
|
|
59
|
+
return "unknown";
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Turn a tldr example line into a real, literal shell command:
|
|
65
|
+
* - `{{placeholder}}` → its inner text (a realistic argument).
|
|
66
|
+
* - `[-f|--force]` / `[-r|--recursive]` alternative-flag notation → the first
|
|
67
|
+
* form (`-f`, `-r`), so the result is a command a shell would actually accept
|
|
68
|
+
* rather than tldr documentation syntax.
|
|
69
|
+
*/
|
|
70
|
+
function fillPlaceholders(cmd) {
|
|
71
|
+
return cmd
|
|
72
|
+
.replace(/\{\{(.*?)\}\}/g, (_, inner) => String(inner).trim() || "arg")
|
|
73
|
+
.replace(/\[([^\]|]+)\|[^\]]+\]/g, (_, first) => String(first).trim());
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Independent, transparent destructive-intent labeler (NOT the analyzer). */
|
|
77
|
+
function labelDestructive(cmd) {
|
|
78
|
+
const c = cmd.trim();
|
|
79
|
+
// Remote code execution: fetch piped into a shell.
|
|
80
|
+
if (/\b(curl|wget|fetch)\b[^|]*\|\s*(sudo\s+)?(sh|bash|zsh|dash|ksh)\b/.test(c)) return true;
|
|
81
|
+
// Strip a leading wrapper so `sudo rm` / `time rm` resolve to their target.
|
|
82
|
+
const stripped = c.replace(/^(sudo|time|nice|ionice|nohup|env)\s+(-\S+\s+)*/, "");
|
|
83
|
+
const m = stripped.match(/^(\/[^\s]*\/)?([a-zA-Z0-9_.-]+)\b(.*)$/);
|
|
84
|
+
if (!m) return false;
|
|
85
|
+
const bin = m[2];
|
|
86
|
+
const rest = m[3] || "";
|
|
87
|
+
const DESTRUCTIVE_BINS = new Set([
|
|
88
|
+
"rm", "rmdir", "shred", "srm", "dd", "mkfs", "fdisk", "parted",
|
|
89
|
+
"wipefs", "mkswap", "blkdiscard", "sgdisk", "unlink",
|
|
90
|
+
]);
|
|
91
|
+
if (/^mkfs\./.test(bin)) return true;
|
|
92
|
+
if (DESTRUCTIVE_BINS.has(bin)) {
|
|
93
|
+
if (bin === "dd") return /\bof=\/dev\//.test(rest);
|
|
94
|
+
if (bin === "rmdir") return false; // only removes empty dirs
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
if (bin === "git") {
|
|
98
|
+
if (/\breset\s+--hard\b/.test(rest)) return true;
|
|
99
|
+
if (/\bclean\b.*\s-\S*f/.test(rest)) return true;
|
|
100
|
+
if (/\bpush\b.*(--force\b|\s-f\b)/.test(rest)) return true;
|
|
101
|
+
if (/\bbranch\b.*\s-D\b/.test(rest)) return true;
|
|
102
|
+
if (/\breflog\s+expire\b/.test(rest)) return true;
|
|
103
|
+
if (/\bgc\b.*--prune/.test(rest)) return true;
|
|
104
|
+
if (/\bfilter-branch\b/.test(rest)) return true;
|
|
105
|
+
}
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const dirs = ["common", "linux", "osx"]
|
|
110
|
+
.map((d) => join(tldrDir, "pages", d))
|
|
111
|
+
.filter((d) => existsSync(d));
|
|
112
|
+
|
|
113
|
+
const seen = new Set();
|
|
114
|
+
const destructive = [];
|
|
115
|
+
const safe = [];
|
|
116
|
+
|
|
117
|
+
for (const dir of dirs) {
|
|
118
|
+
const family = dir.split("/").slice(-1)[0];
|
|
119
|
+
for (const file of readdirSync(dir)) {
|
|
120
|
+
if (!file.endsWith(".md")) continue;
|
|
121
|
+
const page = file.replace(/\.md$/, "");
|
|
122
|
+
const text = readFileSync(join(dir, file), "utf8");
|
|
123
|
+
for (const line of text.split("\n")) {
|
|
124
|
+
const trimmed = line.trim();
|
|
125
|
+
// tldr example commands are fenced in single backticks on their own line.
|
|
126
|
+
if (!trimmed.startsWith("`") || !trimmed.endsWith("`") || trimmed.length < 4) continue;
|
|
127
|
+
const raw = fillPlaceholders(trimmed.slice(1, -1)).trim();
|
|
128
|
+
if (!raw || raw.length > 240) continue;
|
|
129
|
+
if (!/^[a-zA-Z/.~$]/.test(raw)) continue; // must start like a command
|
|
130
|
+
if (seen.has(raw)) continue;
|
|
131
|
+
seen.add(raw);
|
|
132
|
+
const entry = { cmd: raw, page, family };
|
|
133
|
+
if (labelDestructive(raw)) destructive.push(entry);
|
|
134
|
+
else safe.push(entry);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Deterministic evenly-spaced stride sample (no RNG, so the build is stable). */
|
|
140
|
+
function stride(list, target) {
|
|
141
|
+
if (list.length <= target) return list.slice();
|
|
142
|
+
const step = list.length / target;
|
|
143
|
+
const out = [];
|
|
144
|
+
for (let i = 0; i < target; i += 1) out.push(list[Math.floor(i * step)]);
|
|
145
|
+
return out;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Enrich ALL destructive examples (they are rare in real docs) and stride-sample
|
|
149
|
+
// safe ones up to the limit. This is disclosed in the report so the imbalance is
|
|
150
|
+
// not mistaken for the natural base rate.
|
|
151
|
+
destructive.sort((a, b) => a.cmd.localeCompare(b.cmd));
|
|
152
|
+
safe.sort((a, b) => a.cmd.localeCompare(b.cmd));
|
|
153
|
+
const sampledSafe = stride(safe, safeLimit);
|
|
154
|
+
|
|
155
|
+
const corpus = {
|
|
156
|
+
source: "tldr-pages",
|
|
157
|
+
url: "https://github.com/tldr-pages/tldr",
|
|
158
|
+
license: "CC-BY-4.0",
|
|
159
|
+
commit: tldrCommit(),
|
|
160
|
+
pages: dirs.map((d) => d.split("/").slice(-2).join("/")),
|
|
161
|
+
labeler: "benchmarks/build-external-corpus.mjs labelDestructive() — independent of the analyzer",
|
|
162
|
+
totals: {
|
|
163
|
+
uniqueCommandsScanned: seen.size,
|
|
164
|
+
destructiveFound: destructive.length,
|
|
165
|
+
safeFound: safe.length,
|
|
166
|
+
safeSampled: sampledSafe.length,
|
|
167
|
+
},
|
|
168
|
+
entries: [...destructive, ...sampledSafe],
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
const outPath = join(here, "external-corpus.json");
|
|
172
|
+
writeFileSync(outPath, JSON.stringify(corpus, null, 2));
|
|
173
|
+
console.log(
|
|
174
|
+
`Wrote ${corpus.entries.length} external commands ` +
|
|
175
|
+
`(${destructive.length} destructive + ${sampledSafe.length}/${safe.length} safe sampled) ` +
|
|
176
|
+
`from ${seen.size} unique tldr examples @ ${corpus.commit.slice(0, 12)} → ${outPath}`,
|
|
177
|
+
);
|