forgecad 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/assets/{AdminPage-DramHHDf.js → AdminPage-eWGs2K6H.js} +1 -1
  2. package/dist/assets/{BenchmarkPage-Bjgkh5m9.js → BenchmarkPage-CTrLKfpo.js} +1 -1
  3. package/dist/assets/{BlogPage-n_HGP3Qm.js → BlogPage-5nPesyds.js} +1 -1
  4. package/dist/assets/{DocsPage-WCIkPmzC.js → DocsPage-C4Y3nbYc.js} +1 -1
  5. package/dist/assets/{EditorApp-CP9Za6tm.js → EditorApp-lXv53A1m.js} +9 -29
  6. package/dist/assets/{EmbedViewer-DEZKqdfW.js → EmbedViewer-C8fB4n5U.js} +2 -2
  7. package/dist/assets/{LandingPageProofDriven-CeRIctuj.js → LandingPageProofDriven-jSz0LaMM.js} +1 -1
  8. package/dist/assets/{PricingPage-rIRa8p4Y.js → PricingPage-B83B90zh.js} +1 -1
  9. package/dist/assets/{SettingsPage-BqCUvEXM.js → SettingsPage-DY889pcu.js} +1 -1
  10. package/dist/assets/{app-BUZqJvSO.js → app-bEww1ic4.js} +26 -28
  11. package/dist/assets/cli/{render-lhGxj50Y.js → render-Cho2uKG_.js} +88 -25
  12. package/dist/assets/{constructionHistoryWorker-ipD1jcIv.js → constructionHistoryWorker-HYwzJY4m.js} +1 -1
  13. package/dist/assets/{evalWorker-CHXSe_-u.js → evalWorker-CjQwJSE-.js} +3 -3
  14. package/dist/assets/{forgecad_geometry-BVnIeXMG.js → forgecad_geometry-CH2nvuLA.js} +1 -1
  15. package/dist/assets/forgecad_geometry_bg-C5_E9Oa9.wasm +0 -0
  16. package/dist/assets/{manifold-D1LZIHqn.js → manifold-CG9Fokx-.js} +1 -1
  17. package/dist/assets/{manifold-BTkzxi9V.js → manifold-rmfAcdwF.js} +1 -1
  18. package/dist/assets/{manifold-C2fwoTgd.js → manifold-uRzgk5O8.js} +2 -2
  19. package/dist/assets/{reportWorker-Cq1qGmg0.js → reportWorker-4cW_ZpoS.js} +3 -3
  20. package/dist/assets/{scalar-sampling-budget-D9Qv_UlJ.js → scalar-sampling-budget-CfDiFvh7.js} +12 -18
  21. package/dist/assets/{solver-BZ9LPTHs.js → solver-DuJAO8S6.js} +1 -1
  22. package/dist/assets/solver_bg-CWvv4lnN.wasm +0 -0
  23. package/dist/assets/{renderSceneState-Dr0xPq1A.js → targets-D6PWsv6X.js} +27 -1
  24. package/dist/cli/render.html +1 -1
  25. package/dist/docs/index.html +2 -2
  26. package/dist/docs-raw/AI/usage.md +6 -5
  27. package/dist/docs-raw/CLI.md +41 -11
  28. package/dist/docs-raw/generated/concepts.md +3 -3
  29. package/dist/docs-raw/generated/viewport.md +3 -3
  30. package/dist/docs-raw/harbor-cli.md +854 -0
  31. package/dist/docs-raw/rl-environments.md +100 -258
  32. package/dist/docs-raw/skills/forgecad-3d-reconstruction.md +2 -2
  33. package/dist/docs-raw/skills/forgecad-make-a-model.md +3 -3
  34. package/dist/docs-raw/skills/forgecad-reconstruction-benchmark.md +3 -3
  35. package/dist/index.html +1 -1
  36. package/dist/sitemap.xml +7 -7
  37. package/dist-cli/{check-compiler-LOXCPEOI.js → check-compiler-U5SOPN7X.js} +2 -2
  38. package/dist-cli/{check-query-propagation-BAKNVWXR.js → check-query-propagation-XOKNSSYU.js} +2 -2
  39. package/dist-cli/{chunk-RY43WF46.js → chunk-EXWGNL6K.js} +342 -2
  40. package/dist-cli/{chunk-RY43WF46.js.map → chunk-EXWGNL6K.js.map} +1 -1
  41. package/dist-cli/forgecad.js +733 -352
  42. package/dist-cli/forgecad.js.map +1 -1
  43. package/dist-cli/forgecad_geometry_bg.wasm +0 -0
  44. package/dist-cli/solver_bg.wasm +0 -0
  45. package/dist-skill/CONTEXT.md +3 -3
  46. package/dist-skill/docs/CLI.md +41 -11
  47. package/dist-skill/docs/generated/viewport.md +3 -3
  48. package/dist-skill/docs-dev/CLI.md +41 -11
  49. package/dist-skill/docs-dev/generated/viewport.md +3 -3
  50. package/dist-skill/library/forgecad-3d-reconstruction/SKILL.md +2 -2
  51. package/dist-skill/library/forgecad-make-a-model/SKILL.md +3 -3
  52. package/dist-skill/library/forgecad-reconstruction-benchmark/SKILL.md +3 -3
  53. package/package.json +1 -6
  54. package/dist/assets/forgecad_geometry_bg-DufhhCBV.wasm +0 -0
  55. package/dist/assets/solver_bg-DAHZJ_rw.wasm +0 -0
  56. /package/dist-cli/{check-compiler-LOXCPEOI.js.map → check-compiler-U5SOPN7X.js.map} +0 -0
  57. /package/dist-cli/{check-query-propagation-BAKNVWXR.js.map → check-query-propagation-XOKNSSYU.js.map} +0 -0
@@ -1,62 +1,57 @@
1
1
  # ForgeCAD RL Environments
2
2
 
3
3
  This is the permanent operating guide for ForgeCAD reinforcement-learning and
4
- AI-lab reconstruction environments. Temporary notes can record experiments, but
5
- this document is the stable contract for building, running, evaluating, and
6
- maintaining reconstruction tasks.
4
+ AI-lab reconstruction environments.
7
5
 
8
6
  ## Product Goal
9
7
 
10
8
  ForgeCAD RL environments should let an external lab train or evaluate an agent
11
9
  that turns reference physical geometry into readable ForgeCAD source code.
12
10
 
13
- The loop is intentionally simple:
11
+ Harbor is the executable contract. ForgeCAD keeps the product code, verifier
12
+ integration, benchmark import tools, and public leaderboard page. The Harbor
13
+ task corpus lives outside this repository.
14
14
 
15
- 1. ForgeCAD authors Harbor task folders directly under
16
- `ai-labs/reconstruction/tasks/`.
17
- 2. `npm run harbor:reconstruction:refresh` validates those folders and updates
18
- `ai-labs/reconstruction/dataset.toml` digests.
19
- 3. Harbor builds the sandbox and runs the agent.
20
- 4. The hidden RewardKit verifier evaluates `/app/submission/main.forge.js`
21
- after the agent exits.
22
- 5. RewardKit writes Harbor rewards and per-criterion details; ForgeCAD-specific
23
- verifier reports preserve geometry comparison evidence.
15
+ ## Repository Split
24
16
 
25
- Harbor is the executable contract for both RL rollouts and benchmark evals.
26
- There is no second task catalog, no generated sibling registry, and no local
27
- runner command surface to keep in sync.
28
-
29
- The external collaboration repository for SpaceXAI-facing handoff material is:
17
+ Use two neighboring checkouts:
30
18
 
31
19
  ```text
32
- https://github.com/KoStard/SpaceXAI_ForgeCAD
20
+ ~/Projects/CAD/
21
+ ForgeCAD/
22
+ ai-labs/reconstruction-benchmark/
23
+ src/pages/generatedBenchmarkData.ts
24
+ ForgeCAD-Harbor-Datasets/
25
+ reconstruction/
26
+ dataset.toml
27
+ agents/
28
+ tasks/
33
29
  ```
34
30
 
35
- Treat that repository as coordination and distribution context. The executable
36
- benchmark contract is the Harbor dataset in this repo.
31
+ The ForgeCAD repo discovers the external dataset from:
37
32
 
38
- ## Current Reconstruction Suite
33
+ ```bash
34
+ FORGECAD_RECONSTRUCTION_DATASET_ROOT="$HOME/Projects/CAD/ForgeCAD-Harbor-Datasets/reconstruction"
35
+ ```
39
36
 
40
- The canonical reconstruction dataset lives in:
37
+ If the variable is not set, the local tools default to:
41
38
 
42
39
  ```text
43
- ai-labs/reconstruction/
40
+ ../ForgeCAD-Harbor-Datasets/reconstruction
44
41
  ```
45
42
 
46
- The current seed tasks are:
43
+ The external collaboration repository for SpaceXAI-facing handoff material is:
47
44
 
48
45
  ```text
49
- ball-bearing
50
- sealing-frame
51
- wheel
46
+ https://github.com/KoStard/SpaceXAI_ForgeCAD
52
47
  ```
53
48
 
54
- The old source-file-style labels are not primary task IDs. Public benchmark data
55
- normalizes historical rows to the user-facing labels above.
49
+ Treat that repository as coordination and distribution context. The executable
50
+ task contract is the Harbor dataset.
56
51
 
57
52
  ## Harbor Task Contract
58
53
 
59
- Each task folder includes:
54
+ Each external task folder includes:
60
55
 
61
56
  ```text
62
57
  README.md
@@ -106,65 +101,33 @@ script. RewardKit writes `/logs/verifier/reward.json` and
106
101
  RewardKit output while ensuring `reward.json` has numeric Harbor fields:
107
102
  `reward`, `score`, `rawCompareScore`, `guard`, and `valid`.
108
103
 
109
- ## NPM Command Surface
104
+ ## Command Surface
105
+
106
+ Run agents through Harbor directly:
107
+
108
+ ```bash
109
+ uvx --from harbor harbor run \
110
+ -p "$FORGECAD_RECONSTRUCTION_DATASET_ROOT/tasks" \
111
+ --include-task-name ball-bearing \
112
+ --agent codex \
113
+ --model gpt-5.5 \
114
+ --artifact /app/submission/main.forge.js \
115
+ --jobs-dir "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/codex" \
116
+ --n-concurrent 1
117
+ ```
110
118
 
111
- Keep this surface small and boring:
119
+ Use the ForgeCAD repo only for reporting:
112
120
 
113
121
  ```bash
114
- npm run harbor:reconstruction:refresh
115
- npm run harbor:reconstruction:check
116
- npm run harbor:reconstruction:smoke
117
- npm run harbor:reconstruction:pack-local
118
- npm run harbor:reconstruction:clean-local
119
122
  npm run benchmark:reconstruction:list
120
123
  npm run benchmark:reconstruction:import-harbor -- <job-or-trial-dir> --budget standard
121
124
  npm run benchmark:reconstruction:leaderboard
122
125
  npm run benchmark:reconstruction:report
126
+ npm run build:benchmark-data
123
127
  ```
124
128
 
125
- `refresh` validates task layout and rewrites dataset digests. It does not
126
- generate task folders. If a task should change, edit the Harbor task folder
127
- itself.
128
-
129
- `check` runs the same validation and fails if `dataset.toml` is stale.
130
-
131
- `smoke` runs Harbor with the no-op agent on `ball-bearing`. Use it whenever the
132
- task image, verifier dependencies, or RewardKit contract changes.
133
-
134
- ## Local Task Workflow
135
-
136
- Use this flow when adding or changing tasks inside the ForgeCAD repository.
137
-
138
- 1. Add or edit the task under `ai-labs/reconstruction/tasks/<task-id>/`.
139
- 2. Keep the task folder self-contained and Harbor-runnable.
140
- 3. Update the task `README.md` with environment, verifier dimensions, layout,
141
- and concrete Harbor run commands.
142
- 4. Refresh the dataset manifest:
143
-
144
- ```bash
145
- npm run harbor:reconstruction:refresh
146
- npm run harbor:reconstruction:check
147
- ```
148
-
149
- 5. List tasks through the local reporting wrapper:
150
-
151
- ```bash
152
- npm run benchmark:reconstruction:list
153
- ```
154
-
155
- 6. Run Harbor directly when testing the lab-facing package:
156
-
157
- ```bash
158
- npm run harbor:reconstruction:smoke
159
- ```
160
-
161
- 7. Import completed Harbor jobs when you need local leaderboard/report output:
162
-
163
- ```bash
164
- npm run benchmark:reconstruction:import-harbor -- \
165
- "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/smoke/<timestamp>" \
166
- --budget smoke
167
- ```
129
+ There is no ForgeCAD-owned task sync command. If Harbor can run the external
130
+ dataset path, the benchmark layer can import the resulting jobs.
168
131
 
169
132
  ## Direct Harbor Runs
170
133
 
@@ -176,7 +139,7 @@ built-in Harbor agent:
176
139
  ```bash
177
140
  CODEX_AUTH_JSON_PATH="$HOME/.codex/auth.json" \
178
141
  uvx --from harbor harbor run \
179
- -p ai-labs/reconstruction/tasks \
142
+ -p "$FORGECAD_RECONSTRUCTION_DATASET_ROOT/tasks" \
180
143
  --include-task-name ball-bearing \
181
144
  --agent codex \
182
145
  --model gpt-5.5 \
@@ -188,11 +151,11 @@ uvx --from harbor harbor run \
188
151
  Use `--agent-import-path` for a custom adapter such as the bundled Grok adapter:
189
152
 
190
153
  ```bash
191
- PYTHONPATH="$PWD/ai-labs/reconstruction/agents" \
154
+ PYTHONPATH="$FORGECAD_RECONSTRUCTION_DATASET_ROOT/agents" \
192
155
  GROK_AUTH_JSON="$HOME/.grok/auth.json" \
193
156
  GROK_CONFIG_TOML="$HOME/.grok/config.toml" \
194
157
  uvx --from harbor harbor run \
195
- -p ai-labs/reconstruction/tasks \
158
+ -p "$FORGECAD_RECONSTRUCTION_DATASET_ROOT/tasks" \
196
159
  --include-task-name ball-bearing \
197
160
  --agent-import-path grok_harbor_agent:GrokCliAgent \
198
161
  --model grok-build \
@@ -201,8 +164,6 @@ uvx --from harbor harbor run \
201
164
  --n-concurrent 1
202
165
  ```
203
166
 
204
- Current task names are `ball-bearing`, `sealing-frame`, and `wheel`.
205
-
206
167
  ### Full Suite
207
168
 
208
169
  Codex:
@@ -210,7 +171,7 @@ Codex:
210
171
  ```bash
211
172
  CODEX_AUTH_JSON_PATH="$HOME/.codex/auth.json" \
212
173
  uvx --from harbor harbor run \
213
- -p ai-labs/reconstruction/tasks \
174
+ -p "$FORGECAD_RECONSTRUCTION_DATASET_ROOT/tasks" \
214
175
  --agent codex \
215
176
  --model gpt-5.5 \
216
177
  --jobs-dir "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/codex" \
@@ -221,11 +182,11 @@ uvx --from harbor harbor run \
221
182
  Grok:
222
183
 
223
184
  ```bash
224
- PYTHONPATH="$PWD/ai-labs/reconstruction/agents" \
185
+ PYTHONPATH="$FORGECAD_RECONSTRUCTION_DATASET_ROOT/agents" \
225
186
  GROK_AUTH_JSON="$HOME/.grok/auth.json" \
226
187
  GROK_CONFIG_TOML="$HOME/.grok/config.toml" \
227
188
  uvx --from harbor harbor run \
228
- -p ai-labs/reconstruction/tasks \
189
+ -p "$FORGECAD_RECONSTRUCTION_DATASET_ROOT/tasks" \
229
190
  --agent-import-path grok_harbor_agent:GrokCliAgent \
230
191
  --model grok-build \
231
192
  --jobs-dir "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/grok" \
@@ -237,150 +198,25 @@ Direct Harbor CLI-agent runs need outbound internet from the task container for
237
198
  agent install/auth/model calls, so the task definitions set
238
199
  `allow_internet = true`. The verifier remains hidden until the agent exits.
239
200
 
240
- ## Remote Harbor Runs On popos
241
-
242
- Use `popos` as a remote Harbor execution host when local machine resources are
243
- the bottleneck. Do not add a second benchmark runner for this. The remote
244
- machine should run the same Harbor commands against the same task folders.
245
-
246
- Remote prerequisites:
247
-
248
- 1. `ssh popos` reaches the server.
249
- 2. Docker, `uvx`, Node, and npm are installed on the server.
250
- 3. A dedicated checkout exists at `~/Projects/CAD/ForgeCAD`.
251
- 4. Agent auth lives outside the repo on the server.
252
- 5. Run outputs stay outside the checkout under
253
- `~/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/`.
254
-
255
- For normal runs, update the dedicated server checkout with git after pushing
256
- the branch you want to test:
257
-
258
- ```bash
259
- branch="$(git branch --show-current)"
260
- git push origin "$branch"
261
- ssh popos "cd ~/Projects/CAD/ForgeCAD && \
262
- git fetch origin && \
263
- git switch '$branch' && \
264
- git pull --ff-only origin '$branch'"
265
- ```
266
-
267
- Use `rsync` only for scratch experiments where the local work is intentionally
268
- uncommitted. It mirrors the dirty checkout onto the server, so use it only
269
- against a dedicated throwaway checkout:
270
-
271
- ```bash
272
- rsync -az --delete \
273
- --exclude .git \
274
- --exclude node_modules \
275
- --exclude dist \
276
- --exclude dist-cli \
277
- --exclude dist-bundles \
278
- --exclude .data \
279
- --exclude 'ai-labs/reconstruction/tasks/*/environment/forgecad-package/*.tgz' \
280
- ./ popos:~/Projects/CAD/ForgeCAD/
281
- ```
282
-
283
- Run preflight validation on the server:
284
-
285
- ```bash
286
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && npm run harbor:reconstruction:check'
287
- ```
288
-
289
- If `popos` reports `Missing script: "harbor:reconstruction:check"`, the server
290
- checkout is stale. Update the server checkout with git first.
291
-
292
- Run the no-op Harbor smoke on the server:
293
-
294
- ```bash
295
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && npm run harbor:reconstruction:smoke'
296
- ```
297
-
298
- Run one task on one Codex agent remotely:
299
-
300
- ```bash
301
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && \
302
- CODEX_AUTH_JSON_PATH="$HOME/.codex/auth.json" \
303
- uvx --from harbor harbor run \
304
- -p ai-labs/reconstruction/tasks \
305
- --include-task-name ball-bearing \
306
- --agent codex \
307
- --model gpt-5.5 \
308
- --artifact /app/submission/main.forge.js \
309
- --jobs-dir "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/codex" \
310
- --n-concurrent 1'
311
- ```
201
+ ## Run Data And Leaderboard
312
202
 
313
- Run one task on the bundled Grok adapter remotely:
314
-
315
- ```bash
316
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && \
317
- PYTHONPATH="$PWD/ai-labs/reconstruction/agents" \
318
- GROK_AUTH_JSON="$HOME/.grok/auth.json" \
319
- GROK_CONFIG_TOML="$HOME/.grok/config.toml" \
320
- uvx --from harbor harbor run \
321
- -p ai-labs/reconstruction/tasks \
322
- --include-task-name ball-bearing \
323
- --agent-import-path grok_harbor_agent:GrokCliAgent \
324
- --model grok-build \
325
- --artifact /app/submission/main.forge.js \
326
- --jobs-dir "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/grok" \
327
- --n-concurrent 1'
328
- ```
329
-
330
- Fetch remote Harbor jobs back before importing them into the local benchmark
331
- ledger:
332
-
333
- ```bash
334
- rsync -az \
335
- popos:~/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/ \
336
- "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/"
337
-
338
- npm run benchmark:reconstruction:import-harbor -- \
339
- "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/codex/<timestamp>" \
340
- --budget standard
341
- ```
342
-
343
- By default, task Dockerfiles install the pinned published ForgeCAD npm package.
344
- For unpublished local ForgeCAD changes, run the local package flow on the server
345
- checkout:
346
-
347
- ```bash
348
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && npm run harbor:reconstruction:pack-local'
349
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && npm run harbor:reconstruction:smoke'
350
- ssh popos 'cd ~/Projects/CAD/ForgeCAD && npm run harbor:reconstruction:clean-local'
351
- ```
352
-
353
- If `pack-local` fails because repo build tools are missing, run `npm install`
354
- once in the server checkout.
355
-
356
- There is intentionally no `popos` npm script. The host alias, checkout path,
357
- and auth file locations are per-developer machine configuration; the repo-owned
358
- commands remain the small Harbor command surface documented above.
359
-
360
- ## Local Reporting Wrapper
361
-
362
- The local reporting wrapper is not a runner. It exists only to list Harbor
363
- tasks, import completed Harbor jobs, and build submission ledgers, leaderboards,
364
- and visual reports.
365
-
366
- There is no local matrix command, no host-side agent adapter layer, no separate
367
- task tracker, and no host-side evaluator. If an agent should run, run it
368
- through Harbor.
369
-
370
- ## Run Data
371
-
372
- Keep run outputs outside the source checkout under:
203
+ Keep run outputs outside the source checkout:
373
204
 
374
205
  ```text
375
206
  ~/Projects/CAD/ForgeCAD-RL-Agent-Runs/
376
- benchmark/
377
207
  harbor/
378
- research/
379
- smoke/
208
+ <agent>/<job>/
209
+ benchmark/main/
210
+ submissions.jsonl
211
+ submissions/
212
+ leaderboard.json
213
+ leaderboard.md
214
+ report/
380
215
  ```
381
216
 
382
- Keep raw agent logs and Harbor job directories. They are evidence for prompt
383
- design, task packaging, tool behavior, and reward design.
217
+ Raw Harbor jobs are evidence. Keep them somewhere durable, but do not commit
218
+ them to ForgeCAD. They contain logs, generated code, renders, verifier payloads,
219
+ local paths, and provider-specific artifacts.
384
220
 
385
221
  Important Harbor files:
386
222
 
@@ -397,6 +233,22 @@ Important Harbor files:
397
233
  verifier/reference.png
398
234
  ```
399
235
 
236
+ The public leaderboard is updated by importing selected Harbor jobs into the
237
+ local benchmark ledger, regenerating reports, then regenerating the repo-safe
238
+ snapshot:
239
+
240
+ ```bash
241
+ npm run benchmark:reconstruction:import-harbor -- \
242
+ "$HOME/Projects/CAD/ForgeCAD-RL-Agent-Runs/harbor/codex/<job>" \
243
+ --budget standard
244
+ npm run benchmark:reconstruction:report
245
+ npm run build:benchmark-data
246
+ ```
247
+
248
+ Commit `src/pages/generatedBenchmarkData.ts` to publish the website update.
249
+ The committed snapshot contains only sanitized task, model, score, reward,
250
+ metric, and timing fields. It does not contain raw logs or private paths.
251
+
400
252
  ## Result Payload Contract
401
253
 
402
254
  The Harbor verifier writes numeric reward files:
@@ -430,32 +282,25 @@ For reconstruction tasks, the geometry component should include:
430
282
  - Hard caps for obvious shortcut failures.
431
283
 
432
284
  Avoid solving score problems by only changing a curve shape. If a two-ring
433
- bearing approximation scores too high, the missing signal is structural: rolling
434
- elements, feature edges, component count, section occupancy, or topology. Add
435
- the missing measurement and then calibrate the scalar projection.
285
+ bearing approximation scores too high, the missing signal is structural:
286
+ rolling elements, feature edges, component count, section occupancy, or
287
+ topology. Add the missing measurement and then calibrate the scalar projection.
436
288
 
437
289
  ## Maintaining Tasks
438
290
 
439
- When adding or changing a task:
440
-
441
- 1. Put the task under `ai-labs/reconstruction/tasks/<task-id>/`.
442
- 2. Use a user-facing slug such as `ball-bearing`, not an opaque source asset ID.
443
- 3. Include the full Harbor layout: prompt, task TOML, environment, reference,
444
- starter, tests, task metadata, and README.
445
- 4. Record the reference asset SHA-256 in `tests/task.json`.
446
- 5. Keep `/app/submission/main.forge.js` as the stable deliverable.
447
- 6. Refresh the dataset with `npm run harbor:reconstruction:refresh`.
448
- 7. Run identity scoring: reference vs reference should score near 100.
449
- 8. Run the starter score and record it.
450
- 9. Run at least one known weak shortcut candidate and one stronger candidate.
451
- 10. Confirm the stronger candidate ranks higher for the right reasons.
452
- 11. Store local experiment artifacts under `docs/temporary/projects/...`, then
453
- promote only durable conclusions here.
291
+ When adding or changing a task, edit the external Harbor dataset. Keep task IDs
292
+ user-facing, include the full Harbor layout, record reference SHA-256 in
293
+ `tests/task.json`, and keep `/app/submission/main.forge.js` as the stable
294
+ deliverable.
295
+
296
+ Run enough Harbor jobs to prove the task behaves correctly: reference identity
297
+ scoring, starter scoring, at least one weak shortcut candidate, and one stronger
298
+ candidate. The stronger candidate should rank higher for the right reasons.
454
299
 
455
300
  ## Maintaining Agent Adapters
456
301
 
457
- Agent adapters belong in Harbor, not in the local benchmark reporting wrapper.
458
- For each supported Harbor agent CLI, keep a short note answering:
302
+ Agent adapters belong with the external Harbor dataset. For each supported
303
+ Harbor agent CLI, keep a short note answering:
459
304
 
460
305
  1. How to run headlessly.
461
306
  2. How to set cwd.
@@ -492,17 +337,14 @@ saved runs.
492
337
  Before merging reconstruction environment changes:
493
338
 
494
339
  1. `npm test` passes, or a narrower check is justified in the PR.
495
- 2. `npm run harbor:reconstruction:refresh` passes.
496
- 3. `npm run harbor:reconstruction:check` passes.
497
- 4. `npm run benchmark:reconstruction:list` passes.
498
- 5. `npm run harbor:reconstruction:smoke` passes when environment packaging
499
- changes.
500
- 6. Verifier reward output contains numeric `reward`, `score`,
340
+ 2. `npm run benchmark:reconstruction:list` passes against the external dataset.
341
+ 3. Harbor can run the changed task directly from the external dataset.
342
+ 4. Verifier reward output contains numeric `reward`, `score`,
501
343
  `rawCompareScore`, `guard`, and `valid`.
502
- 7. Verifier report output contains the detailed status, score vector,
344
+ 5. Verifier report output contains the detailed status, score vector,
503
345
  aggregation, guard data, and artifacts.
504
- 8. RewardKit writes `reward-details.json`.
505
- 9. Agent logs are captured in the Harbor trial folder.
506
- 10. Timeouts are enforced by Harbor and inside the verifier.
507
- 11. No global skills or parent repo instructions are required.
508
- 12. The docs index and this guide are updated when the contract changes.
346
+ 6. RewardKit writes `reward-details.json`.
347
+ 7. Agent logs are captured in the Harbor trial folder.
348
+ 8. Timeouts are enforced by Harbor and inside the verifier.
349
+ 9. No global skills or parent repo instructions are required.
350
+ 10. The docs index and this guide are updated when the contract changes.
@@ -36,13 +36,13 @@ Do not solve reconstruction by returning `importMesh("reference.stl")` or `impor
36
36
  No wrapper script is needed. Use the local checkout CLI:
37
37
 
38
38
  ```bash
39
- node dist-cli/forgecad.js run path/to/source.stl --quality live --details
39
+ node dist-cli/forgecad.js ls path/to/source.stl --quality live --long
40
40
  node dist-cli/forgecad.js render 3d path/to/source.stl /tmp/<slug>-source.png --camera iso --edges thin --size 900
41
41
  node dist-cli/forgecad.js inspect visual objects path/to/source.stl /tmp/<slug>-source-objects --camera iso --size 700 --force
42
42
  node dist-cli/forgecad.js inspect sections sample path/to/source.stl /tmp/<slug>-source-sections --count 5 --size 700 --force
43
43
  ```
44
44
 
45
- For 3MF sources, `run --details` also prints the source archive's build
45
+ For 3MF sources, `forgecad run` prints the source archive's build
46
46
  items/resource objects with stable refs such as
47
47
  `3mf:build:001:object:2`, automatic names, per-item bounding boxes, and
48
48
  triangle counts. Use that item table to avoid missing hidden multi-part
@@ -65,7 +65,7 @@ Use today's date for the directory. Use the user's current ForgeCAD project when
65
65
  - Return the final geometry (single shape, array, or named objects array)
66
66
  - Treat `fillet(shape, r)` and `chamfer(shape, r)` as experimental edge treatments: Manifold can produce incorrect results and OCCT can be very slow. Prefer simpler primitive profiles, lower segment counts, targeted edge selectors, and inspection before relying on the result.
67
67
  4. Validate — run `forgecad run <file>` to check for errors. For multi-file projects, always validate `main.forge.js`.
68
- 5. Verify geometry — render a multi-angle visual evidence set before final delivery: whole-model context plus agent-chosen orthographic, oblique, underside, or hidden-object views that expose the relevant components and interfaces. Choose camera directions from the model's shape and likely failure modes, not from a fixed recipe. Use those views to look for internals that are accidentally visible, parts that visibly do not fit, floating details, blocked access, missing seats, and unexpected interference. Run `forgecad run --connectivity` when the model has multiple returned objects or visible attachments, run `forgecad debug assembly --fail-on warning` when the script uses `assembly()`, run `forgecad inspect mechanical-integrity <project-or-file> --collisions` before sharing generated mechanical work, and run the targeted `forgecad inspect <family> <mode>` commands that match the task (see Final Acceptance Gate and Render-Verify Loop below). For multi-file projects, render and inspect `main.forge.js`. Collision findings are model work, not FYI: remove unexpected overlaps before delivery.
68
+ 5. Verify geometry — render a multi-angle visual evidence set before final delivery: whole-model context plus agent-chosen orthographic, oblique, underside, or hidden-object views that expose the relevant components and interfaces. Choose camera directions from the model's shape and likely failure modes, not from a fixed recipe. Use those views to look for internals that are accidentally visible, parts that visibly do not fit, floating details, blocked access, missing seats, and unexpected interference. Run `forgecad inspect physical components` when the model has multiple returned objects or visible attachments, run `forgecad debug assembly --fail-on warning` when the script uses `assembly()`, run `forgecad inspect mechanical-integrity <project-or-file> --collisions` before sharing generated mechanical work, and run the targeted `forgecad inspect <family> <mode>` commands that match the task (see Final Acceptance Gate and Render-Verify Loop below). For multi-file projects, render and inspect `main.forge.js`. Collision findings are model work, not FYI: remove unexpected overlaps before delivery.
69
69
  6. Iterate from visual and inspection feedback — treat every render and inspection bundle as model evidence, not a checkbox. Read the normal PNGs, manifest, and evidence PNGs; convert each unexpected collision, thin region, missing section detail, wrong component count, floating body, distance gap, confusing object-color result, accidentally exposed internal structure, bad fit, or visually unsupported interface into a concrete model edit; then rerun the same targeted evidence pass until the result matches the intended physical component graph.
70
70
 
71
71
  ### Manufacturing Process Is Not Assumed
@@ -188,7 +188,7 @@ Before telling the user the model is done, prove both technical validity and vis
188
188
  1. State the intended physical component graph. Decide whether the final artifact should be one connected component, several intentionally separate components, or a selected assembly plus named ghosts. Then run:
189
189
 
190
190
  ```bash
191
- forgecad run model.forge.js --connectivity
191
+ forgecad inspect physical components model.forge.js --camera iso
192
192
  ```
193
193
 
194
194
  The reported component count must match the design intent. Treat unexpected islands, accidental fusion, or bbox-only "touching" that does not make physical sense as model bugs.
@@ -297,7 +297,7 @@ For important components, collect both:
297
297
  - Context view — neighbors present, proving the part belongs in the final assembly.
298
298
  - Focus view — only the relevant objects visible, making small gaps, intersections, missing seats, and floating parts easy to see.
299
299
 
300
- Prefer CLI `--focus` / `--hide` filters, named views, or parameter-selected diagnostic modes over changing production geometry. Use the object names from `node dist-cli/forgecad.js run model.forge.js --quality live` when you are unsure what the filters should target.
300
+ Prefer CLI `--focus` / `--hide` filters, named views, or parameter-selected diagnostic modes over changing production geometry. Use the object paths from `node dist-cli/forgecad.js ls model.forge.js --tree` when you are unsure what the filters should target.
301
301
 
302
302
  #### Structured inspection bundles
303
303
 
@@ -77,13 +77,13 @@ ForgeCAD commands.
77
77
  inference.
78
78
 
79
79
  ```bash
80
- ./bin/forgecad run task/reference/<asset> --quality live --details
80
+ ./bin/forgecad ls task/reference/<asset> --quality live --long
81
81
  ./bin/forgecad render 3d task/reference/<asset> outputs/reference.png --camera iso --edges thin --size 900
82
82
  ./bin/forgecad inspect sections sample task/reference/<asset> outputs/reference-sections --count 5 --size 700
83
83
  ./bin/forgecad inspect section task/reference/<asset> --plane yz --offset 0 --ray width:-50,0:50,0 --size 700
84
84
  ```
85
85
 
86
- For 3MF references, the `run --details` output includes a source structure
86
+ For 3MF references, the `forgecad run` output includes a source structure
87
87
  table with stable `3mf:build:...:object:...` refs, automatic item names,
88
88
  per-item bounding boxes, and triangle counts. Treat that table as part of
89
89
  the evidence: the final model may be one part or many parts, but you should
@@ -93,7 +93,7 @@ ForgeCAD commands.
93
93
  unique probe directory with `result.json`; later you can replay that probe
94
94
  against the candidate with `./bin/forgecad inspect replay <result.json>
95
95
  --source submission/main.forge.js`.
96
- For 3MF references, the `run --details` output includes a source structure
96
+ For 3MF references, the `forgecad run` output includes a source structure
97
97
  table with stable `3mf:build:...:object:...` refs, automatic item names,
98
98
  per-item bounding boxes, and triangle counts. Treat that table as part of
99
99
  the evidence: the final model may be one part or many parts, but you should
package/dist/index.html CHANGED
@@ -55,7 +55,7 @@
55
55
  * { margin: 0; padding: 0; box-sizing: border-box; }
56
56
  html, body, #root { width: 100%; min-height: 100%; background: var(--fc-bg); color: var(--fc-text); font-family: system-ui, -apple-system, sans-serif; }
57
57
  </style>
58
- <script type="module" crossorigin src="/assets/app-BUZqJvSO.js"></script>
58
+ <script type="module" crossorigin src="/assets/app-bEww1ic4.js"></script>
59
59
  <link rel="stylesheet" crossorigin href="/assets/app-CsHnaBWt.css">
60
60
  </head>
61
61
  <body>
package/dist/sitemap.xml CHANGED
@@ -2,43 +2,43 @@
2
2
  <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3
3
  <url>
4
4
  <loc>https://forgecad.io/</loc>
5
- <lastmod>2026-05-29</lastmod>
5
+ <lastmod>2026-05-31</lastmod>
6
6
  <changefreq>weekly</changefreq>
7
7
  <priority>1.0</priority>
8
8
  </url>
9
9
  <url>
10
10
  <loc>https://forgecad.io/docs</loc>
11
- <lastmod>2026-05-29</lastmod>
11
+ <lastmod>2026-05-31</lastmod>
12
12
  <changefreq>weekly</changefreq>
13
13
  <priority>0.8</priority>
14
14
  </url>
15
15
  <url>
16
16
  <loc>https://forgecad.io/benchmark</loc>
17
- <lastmod>2026-05-29</lastmod>
17
+ <lastmod>2026-05-31</lastmod>
18
18
  <changefreq>weekly</changefreq>
19
19
  <priority>0.8</priority>
20
20
  </url>
21
21
  <url>
22
22
  <loc>https://forgecad.io/blog</loc>
23
- <lastmod>2026-05-29</lastmod>
23
+ <lastmod>2026-05-31</lastmod>
24
24
  <changefreq>weekly</changefreq>
25
25
  <priority>0.7</priority>
26
26
  </url>
27
27
  <url>
28
28
  <loc>https://forgecad.io/pricing</loc>
29
- <lastmod>2026-05-29</lastmod>
29
+ <lastmod>2026-05-31</lastmod>
30
30
  <changefreq>monthly</changefreq>
31
31
  <priority>0.6</priority>
32
32
  </url>
33
33
  <url>
34
34
  <loc>https://forgecad.io/examples</loc>
35
- <lastmod>2026-05-29</lastmod>
35
+ <lastmod>2026-05-31</lastmod>
36
36
  <changefreq>weekly</changefreq>
37
37
  <priority>0.6</priority>
38
38
  </url>
39
39
  <url>
40
40
  <loc>https://forgecad.io/blog/hello-forgecad-io</loc>
41
- <lastmod>2026-05-29</lastmod>
41
+ <lastmod>2026-05-31</lastmod>
42
42
  <changefreq>monthly</changefreq>
43
43
  <priority>0.5</priority>
44
44
  </url>
@@ -9,7 +9,7 @@ import {
9
9
  resolvePackagePath,
10
10
  runDirectCliMain,
11
11
  setActiveBackend
12
- } from "./chunk-RY43WF46.js";
12
+ } from "./chunk-EXWGNL6K.js";
13
13
 
14
14
  // cli/check-compiler.ts
15
15
  import assert from "assert/strict";
@@ -636,4 +636,4 @@ runDirectCliMain(import.meta.url, "cli/check-compiler.ts", () => runCheckCompile
636
636
  export {
637
637
  runCheckCompilerCli
638
638
  };
639
- //# sourceMappingURL=check-compiler-LOXCPEOI.js.map
639
+ //# sourceMappingURL=check-compiler-U5SOPN7X.js.map
@@ -12,7 +12,7 @@ import {
12
12
  resolvePackagePath,
13
13
  runDirectCliMain,
14
14
  setActiveBackend
15
- } from "./chunk-RY43WF46.js";
15
+ } from "./chunk-EXWGNL6K.js";
16
16
 
17
17
  // cli/check-query-propagation.ts
18
18
  import assert from "assert/strict";
@@ -488,4 +488,4 @@ runDirectCliMain(import.meta.url, "cli/check-query-propagation.ts", () => runChe
488
488
  export {
489
489
  runCheckQueryPropagationCli
490
490
  };
491
- //# sourceMappingURL=check-query-propagation-BAKNVWXR.js.map
491
+ //# sourceMappingURL=check-query-propagation-XOKNSSYU.js.map