@ara-commons/ara-skills 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/compiler/SKILL.md +16 -31
- package/skills/compiler/references/ara-schema.md +33 -26
- package/skills/research-visualizer/SKILL.md +21 -13
- package/skills/research-visualizer/references/binding.md +10 -46
- package/skills/research-visualizer/references/parsing.md +0 -7
- package/skills/research-visualizer/references/trajectory-template.html +2 -39
- package/src/installer.js +7 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ara-commons/ara-skills",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.2",
|
|
4
4
|
"description": "Install Agent-Native Research Artifact (ARA) skills — compiler, research-manager, rigor-reviewer, research-visualizer — into Claude Code, Cursor, OpenCode, Gemini CLI, Codex, and more.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
package/skills/compiler/SKILL.md
CHANGED
|
@@ -16,7 +16,7 @@ allowed-tools: Read, Write, Edit, Bash(python *|git clone *|ls *|mkdir *), Glob,
|
|
|
16
16
|
metadata:
|
|
17
17
|
author: ara-commons
|
|
18
18
|
category: research-tooling
|
|
19
|
-
version: "1.2.
|
|
19
|
+
version: "1.2.1"
|
|
20
20
|
tags: [research, compilation, artifacts, knowledge-extraction]
|
|
21
21
|
---
|
|
22
22
|
|
|
@@ -170,11 +170,19 @@ whichever layer fits best, preserving the source's granularity. Never silently d
|
|
|
170
170
|
or released form, *distinct from the prose that describes it*. `src/environment.md` is always
|
|
171
171
|
required (reproducibility). Beyond it, one rule decides everything:
|
|
172
172
|
|
|
173
|
-
> **Represent every concrete artifact losslessly
|
|
174
|
-
>
|
|
175
|
-
>
|
|
176
|
-
>
|
|
177
|
-
>
|
|
173
|
+
> **Represent every concrete artifact losslessly, and split it by KIND into the layer it belongs to:**
|
|
174
|
+
> - **Codebase → `src/`.** The experiment's *code* — source files, scripts, configs — in **any
|
|
175
|
+
> language** (judged by content, **never** by a `.py` suffix: `.c`/`.cu`, `.js`/`.ts`, `.rs`, `.cpp`,
|
|
176
|
+
> `.jl`, `.go`, notebooks, shell, … all count). When the code persists in a linkable codebase (a
|
|
177
|
+
> directory of script variants, a released/versioned repo), `src/artifacts.md` is a **pointer index to
|
|
178
|
+
> that codebase** — one link per code artifact (every script/config/module), nothing aggregated or
|
|
179
|
+
> copied. Transcribe into `src/execution/` only when the code would otherwise be **lost** (lives solely
|
|
180
|
+
> inside the paper, or a source not externally persisted).
|
|
181
|
+
> - **Run records → `evidence/`.** The *outputs* of running that code — per-run logs, metrics, run
|
|
182
|
+
> tables — are empirical **evidence, not code**: they live in `evidence/results/<node>.md` (run tables)
|
|
183
|
+
> + `evidence/logs/log_pointers.md` (direct per-run log pointers), linked straight from the trace/claims.
|
|
184
|
+
> **Never index runs or logs in `src/artifacts.md`** — `artifacts.md` is the codebase, not the run store.
|
|
185
|
+
> Never re-encode a prose-only description as code.
|
|
178
186
|
|
|
179
187
|
A concrete artifact is real content the cognitive layer doesn't already hold — capture it (grounded
|
|
180
188
|
in the real repo/files when provided), in whatever directory fits. But a method conveyed only in
|
|
@@ -199,26 +207,7 @@ the source actually reveals — but the node count and types are **source-bounde
|
|
|
199
207
|
never invent a dead end, decision, or experiment to hit a number. A paper that hides its failures
|
|
200
208
|
yields a smaller, honest tree (Rule 9 wins).
|
|
201
209
|
|
|
202
|
-
|
|
203
|
-
sequence of code edits and the scripts are resolvable at compile time, you MAY attach to an experiment
|
|
204
|
-
node the **unified diff** it represents — never required, omitted when unclear:
|
|
205
|
-
1. **Resolve node → representative variant — this link does NOT already exist; construct it.** From the
|
|
206
|
-
node's `source_refs` / its claims' cited `record_configs` → the run index (`runs.csv`/`runs.jsonl`)
|
|
207
|
-
row(s) whose family+purpose+bin match → the representative submitted script. Where this is empty or
|
|
208
|
-
ambiguous (most `decision`/`dead_end` nodes, or evidence that is only journal prose), **omit
|
|
209
|
-
`code_change`** — never guess a script.
|
|
210
|
-
2. **Resolve node → diff base** from the lineage you already reconstruct for `solution/*` (wave baseline
|
|
211
|
-
or immediate-parent variant).
|
|
212
|
-
3. **Index both scripts in `src/artifacts.md` under a stable anchor** (`A01`, `A02`, …) carrying real
|
|
213
|
-
path + sha256 + original location; compute the unified diff (variant vs base) and write it to a tracked
|
|
214
|
-
**`evidence/changes/<node-id>.diff.md`** sidecar (fenced ```diff, `**Source**` header citing the two
|
|
215
|
-
anchor ids). Set the node's `code_change: {base_artifact, variant_artifact, lang, diff_file}`. The whole
|
|
216
|
-
scripts stay pointers (Rule 14) — the diff is a derived, grounded view, like a `derived_subset` table.
|
|
217
|
-
4. **Store-absent ⇒ pointers, not a diff.** If the scripts don't resolve on disk (git-ignored store),
|
|
218
|
-
still record `code_change` with the anchor ids + a `note`, omit `diff_file` — the visualizer shows a
|
|
219
|
-
pointer chip. Expected, not a failure.
|
|
220
|
-
|
|
221
|
-
You MAY also attach `node.thinking` — the agent's deliberation — but **only verbatim** grounded
|
|
210
|
+
You MAY attach `node.thinking` — the agent's deliberation — but **only verbatim** grounded
|
|
222
211
|
journal/decision text; never compose new prose. No verbatim rationale ⇒ leave it absent.
|
|
223
212
|
|
|
224
213
|
### Step 3: Generate Files
|
|
@@ -271,10 +260,6 @@ Run ARA Seal Level 1. Check:
|
|
|
271
260
|
heuristic `Code ref` → a real `src/execution/` file (when both exist); tree `evidence:` → claim IDs
|
|
272
261
|
- Evidence: **every numbered table and figure is filed with BOTH a markdown file and a screenshot
|
|
273
262
|
(.png)**; numbered objects not filed are accounted for in `evidence/README.md` with a reason
|
|
274
|
-
- **Changed-code (only if emitted):** each `evidence/changes/<node>.diff.md` cites two `src/artifacts.md`
|
|
275
|
-
anchors (`base`/`variant`) that resolve; the diff is verbatim; the node's `code_change` points at the
|
|
276
|
-
sidecar via `diff_file` (or carries a `note` with no `diff_file` when the store was absent). Optional —
|
|
277
|
-
absent is fine; never invent a diff or a node→script mapping
|
|
278
263
|
- Evidence files have **Source** fields; figures declare Figure type / Extraction method / Reading
|
|
279
264
|
confidence; estimated readings marked `≈` (not `exact_from_labels`); diagrams/qualitative samples
|
|
280
265
|
carry a visual description, not a fabricated table
|
|
@@ -322,7 +307,7 @@ key stats (claims, experiments, concepts, tree nodes, evidence tables/figures).
|
|
|
322
307
|
11. **Visual extraction is honest extraction**: read figures by looking; mark estimates `≈` with extraction method + confidence; never present a digitized estimate as exact, invent points for an unreadable figure, or turn a diagram into a fake data table
|
|
323
308
|
12. **Complete, ordered evidence**: file EVERY numbered table and figure, in order — a systematic sweep, not a lucky sample — each as a markdown transcription PLUS a saved screenshot (`.png`). No early stopping; account for any object you don't file
|
|
324
309
|
13. **Fit the file set to the paper, not the paper to a template**: only PAPER.md + the mandatory core are required. Beyond them, generate the files THIS work actually warrants and nothing it doesn't have. Never force inappropriate files (e.g. model-training configs onto an eval or theory paper)
|
|
325
|
-
14. **`src/` holds
|
|
310
|
+
14. **`src/` holds the codebase (code), not run records and not re-encoded prose**: capture every concrete code artifact the source contains, in its native form — **any language, judged by content not by a `.py` extension** (`.c`/`.cu`, `.js`/`.ts`, `.rs`, `.cpp`, `.jl`, `.go`, notebooks, shell, … all count) — grounded in real files. Four sides: (a) never fabricate a code stub from a prose-only method — it already lives in `logic/`, so a stub just duplicates it; (b) never drop a concrete artifact that does exist — a lone `environment.md` is wrong when the work has one; (c) when the work's **codebase** persists in a linkable store (a directory of script variants, a released or versioned repo), index it as a **comprehensive pointer index** in `src/artifacts.md` — one link per code artifact (every script/config/module), nothing aggregated into a vague bucket, nothing copied; a lossy subset-copy is the failure; (d) **run records are NOT code** — per-run logs, metrics, and run tables are empirical evidence and live in `evidence/` (`evidence/results/<node>.md`, `evidence/logs/log_pointers.md`), linked straight from trace/claims, **never in `src/artifacts.md`**. **Transcribe real source into `src/execution/` only when it would otherwise be lost** — code that lives solely inside the paper, or a source not externally persisted (then `# Grounding: transcribed`, cite path). No implementation in the input → none applies.
|
|
326
311
|
15. **Source-bounded minimums**: any count or required field is a target, never a license to invent. If the source supports fewer, produce what is real and note the shortfall; for an unstated field write "Not specified in paper" rather than guessing
|
|
327
312
|
16. **Cite by verification, and ask on conflict**: a source reference (evidence `Source`, trace `source_refs`, claim `Proof`, a repo `file:line`/path) promises the cited location actually contains the claim — open it and confirm. Never transcribe a *description* of an artifact as a verified fact about it. **When the code repo and the paper disagree on a fact (line count, path, value, behavior), do NOT pick one silently — surface the conflict to the user and ask which source to follow.** If unverifiable and the user is unavailable, attribute it ("per §X") or omit. Carry a statistic's scope/denominator in its `Source`. **This extends to every load-bearing number in a claim/heuristic `Statement`/`Rationale`: it carries a `**Sources**` entry whose verbatim «quote» you opened and confirmed contains that value — a memory-filled value or a bare path is fabrication; use `[pending]` when you cannot open the source**
|
|
328
313
|
|
|
@@ -19,20 +19,22 @@ logic/
|
|
|
19
19
|
# study_design / formalization / results / proofs /
|
|
20
20
|
# design / heuristics … — whatever fits THIS work
|
|
21
21
|
related_work.md # ✓ Typed dependency graph (RDO)
|
|
22
|
-
src/
|
|
22
|
+
src/ # the CODEBASE (code in ANY language — never judged by a .py suffix)
|
|
23
23
|
environment.md # ✓ Data/software/hardware/protocols/seeds
|
|
24
|
+
artifacts.md # as warranted: pointer index to the codebase (every script/config/module)
|
|
24
25
|
configs/ # as warranted: hyperparameters / inference / deployment
|
|
25
|
-
execution/{module}.
|
|
26
|
+
execution/{module}.{ext} # as warranted: transcribed/grounded code, any language (or absent — see below)
|
|
26
27
|
prompts/, ... # as warranted: prompt templates, etc.
|
|
27
28
|
data/ # as warranted: dataset.md + preprocessing.md
|
|
28
29
|
trace/
|
|
29
30
|
exploration_tree.yaml # ✓ Research DAG: nested YAML tree with typed nodes
|
|
30
|
-
evidence/
|
|
31
|
+
evidence/ # derived + observed: diffs, run records, results, logs, tables, figures
|
|
31
32
|
README.md # ✓ Index mapping every evidence file to claims
|
|
32
33
|
tables/ # ✓ every numbered Table: tableN.md + tableN.png
|
|
33
34
|
figures/ # ✓ every numbered Figure: figureN.md + figureN.png
|
|
35
|
+
results/ # as warranted: per-node run records (run tables: run_id, params, metrics, export_id)
|
|
36
|
+
logs/ # as warranted: log_pointers.md — direct per-run log pointers (by export_id)
|
|
34
37
|
proofs/ # as warranted: derivations / proofs
|
|
35
|
-
changes/ # as warranted: per-node code-change unified diffs (Research Visualizer)
|
|
36
38
|
rubric/requirements.md # (Only if a rubric is provided)
|
|
37
39
|
```
|
|
38
40
|
|
|
@@ -364,13 +366,15 @@ pseudo-code — that information already lives in `logic/solution/`, and re-enco
|
|
|
364
366
|
duplicates it.** A concrete artifact that IS raw "code" — e.g. a prompt or template — is different:
|
|
365
367
|
store it verbatim in `src/prompts/`, don't paraphrase it. A hollow invented API is a hallucination.
|
|
366
368
|
|
|
367
|
-
## src/artifacts.md (the
|
|
369
|
+
## src/artifacts.md (the CODEBASE pointer index — code only, any language)
|
|
368
370
|
|
|
369
|
-
`src
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
371
|
+
`src/artifacts.md` is the **pointer index to the experiment's codebase** — the *code*: every script,
|
|
372
|
+
config, and module, in **any language** (judged by content, never by a `.py` suffix). When the codebase
|
|
373
|
+
persists in a linkable store (a directory of script variants, a released/versioned repo), point at every
|
|
374
|
+
code artifact, grounded in the real files, nothing aggregated into a vague bucket and nothing copied.
|
|
375
|
+
**Run records do NOT belong here** — per-run logs, metrics, and run tables are evidence
|
|
376
|
+
(`evidence/results/`, `evidence/logs/log_pointers.md`), linked straight from trace/claims, not indexed in
|
|
377
|
+
`artifacts.md`. One block (or row) per **code** artifact:
|
|
374
378
|
|
|
375
379
|
**Capture is the fallback, not the default.** Transcribe a file into `src/execution/` only when it
|
|
376
380
|
would otherwise be **lost** — code that lives solely inside the paper, or a source not externally
|
|
@@ -379,8 +383,6 @@ the winner, or files collapsed into a single directory link) is the failure.
|
|
|
379
383
|
|
|
380
384
|
```markdown
|
|
381
385
|
## {Artifact name}
|
|
382
|
-
- **Anchor**: {stable short id — `A01`, `A02`, … — so a trace node's `code_change` can reference this artifact by id; optional, but required for the Research Visualizer's changed-code diffs}
|
|
383
|
-
- **sha256**: {content hash of the file, when a code-change diff cites it}
|
|
384
386
|
- **File(s) in repo**: {real path(s), verified to exist}
|
|
385
387
|
- **Nature**: {what it is — tool / library / skill spec / system / dataset}
|
|
386
388
|
- **What it does / contains**: {grounded description}
|
|
@@ -426,25 +428,31 @@ Reproducibility for any field. For purely analytical work, state so explicitly.
|
|
|
426
428
|
|
|
427
429
|
---
|
|
428
430
|
|
|
429
|
-
## evidence/
|
|
431
|
+
## evidence/results/{node-or-name}.md (run records — the outputs of running the code)
|
|
430
432
|
|
|
431
|
-
Per-experiment
|
|
432
|
-
|
|
433
|
-
has a resolvable code change:
|
|
433
|
+
Per-experiment **run records**: the run table(s) a node produced. **This is where runs live**, not in
|
|
434
|
+
`src/artifacts.md`. One file per experiment node (or per result group):
|
|
434
435
|
|
|
435
436
|
```markdown
|
|
436
|
-
#
|
|
437
|
-
- **
|
|
438
|
-
- **
|
|
439
|
-
- **Language**: python
|
|
437
|
+
# {Node/result}: {short description}
|
|
438
|
+
- **Trace node**: N22
|
|
439
|
+
- **Claim**: C04
|
|
440
440
|
|
|
441
|
-
|
|
441
|
+
| run_id | {params…} | metric | export_id |
|
|
442
|
+
|--------|-----------|--------|-----------|
|
|
443
|
+
| … | … | … | … |
|
|
442
444
|
```
|
|
443
445
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
446
|
+
## evidence/logs/log_pointers.md (direct per-run log pointers)
|
|
447
|
+
|
|
448
|
+
A single index of **direct pointers to each run's log**, grouped by node — `<store>/<export_id>/<log>`
|
|
449
|
+
(e.g. `train.log`, or the field's equivalent). Pointer-resolution only; do not transcribe logs:
|
|
450
|
+
|
|
451
|
+
```markdown
|
|
452
|
+
## N22: WD sweep (C04)
|
|
453
|
+
- `data/train/00023-…/train.log` — winning run
|
|
454
|
+
- packet: v1-008
|
|
455
|
+
```
|
|
448
456
|
|
|
449
457
|
---
|
|
450
458
|
|
|
@@ -502,7 +510,6 @@ tree:
|
|
|
502
510
|
description: "{...}"
|
|
503
511
|
# OPTIONAL enrichment (Research Visualizer; omit when absent):
|
|
504
512
|
# thinking: "{verbatim agent deliberation — why it did/branched}"
|
|
505
|
-
# code_change: { base_artifact: A01, variant_artifact: A07, lang: python, diff_file: evidence/changes/N01.diff.md }
|
|
506
513
|
```
|
|
507
514
|
|
|
508
515
|
Rules:
|
|
@@ -4,8 +4,9 @@ description: |
|
|
|
4
4
|
Research Visualizer. Renders an existing Agent-Native Research Artifact (ARA) into ONE
|
|
5
5
|
self-contained, interactive HTML file showing the AI scientist's step-by-step research process:
|
|
6
6
|
a clickable process map of the exploration tree (branches and dead ends included) on the left,
|
|
7
|
-
and a per-step drill-down on the right — what the step did
|
|
8
|
-
result (verbatim grounded numbers + inline figures
|
|
7
|
+
and a per-step drill-down on the right — what the step did (its narrative written in plain language a
|
|
8
|
+
person can follow), why (the linked claim), the real result (verbatim grounded numbers + inline figures
|
|
9
|
+
+ tables), and the code/artifact pointer.
|
|
9
10
|
Read-only consumer of the artifact — it never changes how research is done.
|
|
10
11
|
When the ARA carries them, it also surfaces (each optional, only when present) the related-work
|
|
11
12
|
dependency graph, the problem framing, a concepts glossary with in-text term popovers, and the
|
|
@@ -21,7 +22,7 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash(python3 *|base64 *|find *|ls
|
|
|
21
22
|
metadata:
|
|
22
23
|
author: ara-commons
|
|
23
24
|
category: research-tooling
|
|
24
|
-
version: "1.0.
|
|
25
|
+
version: "1.0.1"
|
|
25
26
|
tags: [research, visualization, trajectory, exploration-tree, html]
|
|
26
27
|
---
|
|
27
28
|
|
|
@@ -93,16 +94,22 @@ One self-contained file, default `<ara-dir>/trajectory.html` (override with `--o
|
|
|
93
94
|
never link off-ARA); derive each node's `built_on`/`rejected_here` (dependency→claim→node, bucketed by
|
|
94
95
|
`relation_norm`), `concepts` (whole-word name-match), and `recipe_refs` (recipe→claim→node); mark
|
|
95
96
|
cross-agent entries. All per-node enrichment fields default `[]`.
|
|
97
|
+
5c. **Write each step's narrative as plain language (same layout, human words).** The trace's notes are
|
|
98
|
+
written for an agent; rendered as-is they read like a log and a person can't follow what happened or
|
|
99
|
+
why it mattered. For each node, write its narrative — `thinking`, and `body` if used — in plain
|
|
100
|
+
language a reader who has NOT seen the ARA can follow: your own words, translating the trace's
|
|
101
|
+
agent-facing deliberation, **not** a verbatim paste; expand jargon on first use and state the point,
|
|
102
|
+
not the log line. This changes ONLY the prose that fills the existing reasoning block — keep every
|
|
103
|
+
block and the layout exactly as they are. Stay grounded: introduce no number, name, or claim that is
|
|
104
|
+
not already in that node, and keep claim `Statement`s, `Sources` quotes and table numbers **verbatim**
|
|
105
|
+
in the why/result blocks — those are the receipts.
|
|
96
106
|
6. **Inline figures.** For each referenced figure that has a real raster (`evidence/figures/*.png`),
|
|
97
107
|
base64-encode it and put the `data:` URI in `figures[].img`. Use Bash, e.g.
|
|
98
108
|
`python3 -c "import base64,sys;print('data:image/png;base64,'+base64.b64encode(open(sys.argv[1],'rb').read()).decode())" <path>`.
|
|
99
109
|
For data-only figure markdown (no raster), render its data table instead (as a `tables[]` entry).
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
`base_artifact`/`variant_artifact` ids resolve; carry each node's verbatim `thinking` straight through.
|
|
104
|
-
Sanitize all three verbatim fields per the Injection contract. The visualizer never computes a diff
|
|
105
|
-
itself and never opens the external store — it only inlines what the ARA already contains.
|
|
110
|
+
Carry each node's `thinking` (the plain-language narrative from 5c) through, and sanitize it per the
|
|
111
|
+
Injection contract. The ARA carries **no code diff** — a step's code change is conveyed by its
|
|
112
|
+
natural-language narrative (`body`/`thinking`); the code itself is pointed at via `src/artifacts.md`.
|
|
106
113
|
7. **Assemble `ARA_DATA`** (exact schema in `references/binding.md`) and **inject** it: replace ONLY
|
|
107
114
|
the JSON between `/* __ARA_DATA_BEGIN__ */` and `/* __ARA_DATA_END__ */` in the
|
|
108
115
|
`<script id="ara-data">` block of a copy of the template. Write the result to the output path.
|
|
@@ -121,16 +128,17 @@ One self-contained file, default `<ara-dir>/trajectory.html` (override with `--o
|
|
|
121
128
|
`/* __ARA_DATA_BEGIN__ */` / `/* __ARA_DATA_END__ */`. Escape any `<` in inlined markdown/text as `<`
|
|
122
129
|
(or `<`) — this also neutralizes `</script>`. (A bare `*/` inside a string value is harmless to
|
|
123
130
|
`JSON.parse`; only the exact marker strings would be stripped.)
|
|
124
|
-
- **The verbatim free-text fields `thinking` and `code_change.diff` are the high-risk carriers** (source
|
|
125
|
-
code routinely contains `/* … */`). If either marker token would appear in their text, break it (e.g.
|
|
126
|
-
insert a zero-width space inside `__ARA_DATA_…`) so the global marker-strip can't delete it from inside
|
|
127
|
-
a value. Re-validate: a node whose `thinking`/`diff` contains a marker token MUST round-trip intact.
|
|
128
131
|
- Do not touch anything else in the template — only the bytes between the two markers.
|
|
129
132
|
- After writing, re-validate: the file still parses (the embedded JSON loads). If a figure pushed the
|
|
130
133
|
file very large, apply the size guards in `references/binding.md` (truncate logs/tables, keep figures).
|
|
131
134
|
|
|
132
135
|
## Faithfulness (hard rules)
|
|
133
136
|
|
|
137
|
+
- **Speak human in the narrative, quote the evidence.** A node's narrative (`thinking`/`body`) is plain
|
|
138
|
+
language — your own words, a grounded translation (5c), not a verbatim paste. Everything that is
|
|
139
|
+
*evidence* — claim `Statement`s, `Sources` quotes, table cells/numbers, relations, definitions — is
|
|
140
|
+
reproduced **verbatim** in the why/result/overlay blocks. The narrative explains; the receipts prove. A
|
|
141
|
+
narrative that states a number absent from the node fails; so does an evidence block that paraphrases.
|
|
134
142
|
- Reproduce claim `Statement`s, `Sources` quotes, and table numbers **verbatim** — never paraphrase,
|
|
135
143
|
never invent. Missing data → set the field empty/omit (the viewer shows "No …"); never fabricate.
|
|
136
144
|
- Provenance, `support_level`, and `status` are shown **only if present** in the source; do not guess.
|
|
@@ -49,33 +49,12 @@ Fallback if no README row: scan the figure/table `.md` for an inline `Supports C
|
|
|
49
49
|
- A figure's caption/title comes from the first heading or the `What it shows` section of its `.md`.
|
|
50
50
|
|
|
51
51
|
### What counts as "code" now
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
### The changed-code diff (`node.code_change`) — compiler-produced, visualizer-rendered
|
|
59
|
-
A full ARA may carry, per experiment node, the **unified diff** the step represents. The addresses
|
|
60
|
-
live in ONE place and are referenced by id (weak coupling):
|
|
61
|
-
|
|
62
|
-
`node.code_change` → `evidence/changes/<node-id>.diff.md` (the diff **text**) → `artifacts[]` /
|
|
63
|
-
`src/artifacts.md` entry (path + sha256 + original location) → the original repo.
|
|
64
|
-
|
|
65
|
-
- The diff **text** is grounded by citing the two **artifact ids** (`base_artifact`, `variant_artifact`),
|
|
66
|
-
never an embedded path. Whole scripts stay pointers in `src/artifacts.md` (Rule 14) — the diff is a
|
|
67
|
-
derived, grounded view (≈ a `derived_subset` table), not a copy of the artifact.
|
|
68
|
-
- **`diff_file` → `diff` inlining** (parallel to figures' `.md`→base64 `img`): on disk the node carries
|
|
69
|
-
`code_change.diff_file: "evidence/changes/<id>.diff.md"`; the visualizer reads that **tracked** sidecar
|
|
70
|
-
and inlines its fenced diff text into `code_change.diff` in `ARA_DATA`, so the rendered HTML stays
|
|
71
|
-
self-contained (the sidecar lives inside the ARA dir).
|
|
72
|
-
- **`artifactById`**: the visualizer builds an `id → artifacts[] entry` map (parallel to `nodeByClaim`)
|
|
73
|
-
and resolves `base_artifact`/`variant_artifact` into the shown-not-resolved pointer chip under the diff.
|
|
74
|
-
- **Degrade**: when the scripts don't resolve at compile time (store absent), the compiler emits
|
|
75
|
-
`code_change` with the artifact ids + a `note` but no diff; the viewer shows a pointer chip, not a diff.
|
|
76
|
-
- **Marker safety**: `diff` and `thinking` are verbatim, so the producer MUST ensure neither the literal
|
|
77
|
-
`/* __ARA_DATA_BEGIN__ */` / `/* __ARA_DATA_END__ */` tokens nor `</script>` appears in any inlined
|
|
78
|
-
string (escape `<`→`<`; break the marker tokens). See SKILL.md "Injection contract".
|
|
52
|
+
`src/artifacts.md` is the **codebase** pointer index — the code (scripts/configs/modules) in **any
|
|
53
|
+
language** (never assume `.py`). Populate `artifact[]` from it plus the relevant `logic/solution/*.md`
|
|
54
|
+
recipe section. The **run records** a node produced are NOT code — they arrive via `result` (from
|
|
55
|
+
`evidence/results/` run tables + `evidence/logs/log_pointers.md`), never via `artifact[]`. Only when a
|
|
56
|
+
real transcribed `src/execution/` file exists (legacy / paper-only code) do you point at it.
|
|
57
|
+
**Never resolve the external store in v1** — the pointer text is the value.
|
|
79
58
|
|
|
80
59
|
## The `ARA_DATA` object (exact schema the scaffold reads)
|
|
81
60
|
|
|
@@ -92,13 +71,6 @@ live in ONE place and are referenced by id (weak coupling):
|
|
|
92
71
|
// (typically a pre-order DFS of the tree). If omitted, the scaffold derives a DFS from `parent`.
|
|
93
72
|
"order": ["N01", "N02", "N03", "..."],
|
|
94
73
|
|
|
95
|
-
// OPTIONAL addressable artifact index (from src/artifacts.md). Each script the compiler points at
|
|
96
|
-
// gets a stable id so a node's code_change can reference it BY ID (no embedded path). Omit if absent.
|
|
97
|
-
"artifacts": [
|
|
98
|
-
{ "id":"A01", "name":"<artifact name>", "path":"<repo-relative path>", "sha256":"<...>",
|
|
99
|
-
"original_location":"<store/repo ref>", "pointer":"<src/artifacts.md pointer text>" }
|
|
100
|
-
],
|
|
101
|
-
|
|
102
74
|
"nodes": [
|
|
103
75
|
{
|
|
104
76
|
"id": "N02",
|
|
@@ -106,7 +78,7 @@ live in ONE place and are referenced by id (weak coupling):
|
|
|
106
78
|
"parent": "N01", // id of the nesting parent, or null for a root
|
|
107
79
|
"title": "<normalized step title>", // see parsing.md
|
|
108
80
|
"body": "<what the step did / its outcome>",
|
|
109
|
-
"thinking": "<
|
|
81
|
+
"thinking": "<the step's narrative in PLAIN, human language — a grounded translation, NOT a verbatim paste (SKILL.md 5c); OPTIONAL>", // primary block; falls back to body
|
|
110
82
|
"support_level": "explicit", // "explicit"|"inferred"|null
|
|
111
83
|
"isolated": false, // true → rendered in a separated dashed box
|
|
112
84
|
"depends_on": ["N00"], // also_depends_on cross-edges (ids); [] if none
|
|
@@ -132,15 +104,7 @@ live in ONE place and are referenced by id (weak coupling):
|
|
|
132
104
|
|
|
133
105
|
"artifact": [ // src/artifacts.md pointers + solution recipe refs (pointer text only)
|
|
134
106
|
{ "name":"<artifact / family name>", "pointer":"<src/artifacts.md pointer text>", "what":"pointer index entry" }
|
|
135
|
-
]
|
|
136
|
-
|
|
137
|
-
"code_change": { // OPTIONAL — the changed-code diff for this step (compiler-produced)
|
|
138
|
-
"base_artifact":"A01", // → artifacts[].id (holds path+sha+original_location)
|
|
139
|
-
"variant_artifact":"A07", // → artifacts[].id
|
|
140
|
-
"lang":"python",
|
|
141
|
-
"diff":"<unified-diff text, inlined by the visualizer from evidence/changes/<id>.diff.md>",
|
|
142
|
-
"note":"" // set (with diff absent) when the scripts didn't resolve → pointer-only chip
|
|
143
|
-
}
|
|
107
|
+
]
|
|
144
108
|
}
|
|
145
109
|
// ... one object per trace node
|
|
146
110
|
]
|
|
@@ -151,8 +115,8 @@ live in ONE place and are referenced by id (weak coupling):
|
|
|
151
115
|
- Every node MUST have `id`, `type`, `title`, `parent` (or null). All other arrays default to `[]`,
|
|
152
116
|
scalars to `null`/`""`. The scaffold tolerates missing optional fields.
|
|
153
117
|
- Put **only what the source contains**. Empty `why`/`result`/`verified_by`/`artifact` is fine and
|
|
154
|
-
common (e.g. a bare `decision` node) — the viewer simply omits those blocks. `thinking`
|
|
155
|
-
|
|
118
|
+
common (e.g. a bare `decision` node) — the viewer simply omits those blocks. `thinking` is likewise
|
|
119
|
+
optional; omit when absent (a payload without it is byte-compatible).
|
|
156
120
|
- `status` is lower-cased by the viewer for styling; pass it as written (`Supported`, `hypothesis`, …).
|
|
157
121
|
|
|
158
122
|
### Size guards
|
|
@@ -25,10 +25,6 @@ top-level nodes have `parent: null`.
|
|
|
25
25
|
- `also_depends_on: [ids]` → emit as `depends_on` (DAG cross-edges).
|
|
26
26
|
- `thinking` — verbatim agent deliberation, **passed straight through** (the primary reasoning block).
|
|
27
27
|
Absent ⇒ omit. Never paraphrase or synthesize it.
|
|
28
|
-
- `code_change` — when the compiler wrote one onto the node (`base_artifact` / `variant_artifact` /
|
|
29
|
-
`lang` / `diff_file`), **pass it through**. The `diff_file`→`diff` inlining and the top-level
|
|
30
|
-
`artifacts[]` index are done in the binding/inline step (binding.md); the visualizer never computes a
|
|
31
|
-
diff itself. Absent ⇒ omit.
|
|
32
28
|
|
|
33
29
|
## 3. Title + body normalization (the dialect bridge)
|
|
34
30
|
|
|
@@ -107,9 +103,6 @@ a `{thought, action, observation/result}`. Map it onto the tree:
|
|
|
107
103
|
- `source_refs` = a pointer back to the log line(s) (shown, never resolved).
|
|
108
104
|
- nesting via `children`; convergence via `also_depends_on`; a discarded branch via `isolated`.
|
|
109
105
|
|
|
110
|
-
No `logic/` or `evidence/` is required; enrich the same tree later (via the compiler) to add claims,
|
|
111
|
-
evidence, and per-node `code_change` diffs.
|
|
112
|
-
|
|
113
106
|
# 8. The four `logic/` enrichment layers (all optional)
|
|
114
107
|
|
|
115
108
|
These produce the OPTIONAL `context` / `glossary` / `dependencies` / `recipes` keys (and the per-node
|
|
@@ -151,15 +151,6 @@
|
|
|
151
151
|
table.md th{background:var(--panel2);color:var(--ink);font-weight:700;text-transform:uppercase;font-size:10.5px;letter-spacing:.04em}
|
|
152
152
|
pre.snip{background:var(--code-bg);border:1px solid var(--line);border-radius:6px;padding:10px 12px;overflow:auto;font-size:12px;color:var(--ink);white-space:pre-wrap}
|
|
153
153
|
|
|
154
|
-
/* changed-code diff block */
|
|
155
|
-
pre.diff{background:var(--code-bg);border:1px solid var(--line);border-radius:6px;padding:8px 0;overflow:auto;font-size:12px;margin:11px 0;white-space:pre}
|
|
156
|
-
pre.diff .dl{display:block;padding:0 12px;border-left:3px solid transparent}
|
|
157
|
-
pre.diff .dl.add{background:var(--add-bg);color:var(--add-ink);border-left-color:var(--ok)}
|
|
158
|
-
pre.diff .dl.del{background:var(--del-bg);color:var(--del-ink);border-left-color:var(--warn)}
|
|
159
|
-
pre.diff .dl.hunk{color:var(--hunk);background:var(--panel2)}
|
|
160
|
-
pre.diff .dl.meta{color:var(--muted)}
|
|
161
|
-
pre.diff .dl.ctx{color:#3a423c}
|
|
162
|
-
|
|
163
154
|
.ptr{font-size:13px;margin:7px 0}
|
|
164
155
|
.ptr .path{font-family:var(--mono);color:#46504a}
|
|
165
156
|
.truncated{color:var(--muted);font-size:11.5px;font-style:italic}
|
|
@@ -267,18 +258,13 @@
|
|
|
267
258
|
"title": "Research Visualizer — template demo",
|
|
268
259
|
"authors": ["(demo data — replaced by the research-visualizer skill)"],
|
|
269
260
|
"year": "", "venue": "", "ara_dir": "",
|
|
270
|
-
"abstract": "This bare template ships with demo nodes so it opens standalone and shows the reasoning-first layout,
|
|
261
|
+
"abstract": "This bare template ships with demo nodes so it opens standalone and shows the reasoning-first layout, an experiment, a dead end, and an isolated subtree. Running /research-visualizer on a real ARA overwrites this block."
|
|
271
262
|
},
|
|
272
263
|
"order": ["N01", "N02", "N03", "NV1"],
|
|
273
|
-
"artifacts": [
|
|
274
|
-
{ "id":"A01","name":"encode.py @ baseline","path":"src/encode.py","sha256":"0000…base","original_location":"repo@main:src/encode.py" },
|
|
275
|
-
{ "id":"A02","name":"encode.py @ variant","path":"src/encode.py","sha256":"1111…var","original_location":"repo@feat:src/encode.py" }
|
|
276
|
-
],
|
|
277
264
|
"nodes": [
|
|
278
|
-
{ "id":"N01","type":"question","parent":null,"title":"Is this template wired correctly end to end?","body":"","thinking":"Before trusting the renderer on real data, prove every node state (reasoning,
|
|
265
|
+
{ "id":"N01","type":"question","parent":null,"title":"Is this template wired correctly end to end?","body":"","thinking":"Before trusting the renderer on real data, prove every node state (reasoning, result, dead end, isolated) shows up from one fixed scaffold.","support_level":"explicit","isolated":false,"depends_on":[],"source_refs":["notes.md:1-4"],
|
|
279
266
|
"why":[], "result":{"sources":[],"figures":[],"tables":[],"data":[]}, "verified_by":[], "artifact":[] },
|
|
280
267
|
{ "id":"N02","type":"experiment","parent":"N01","title":"Precompute the field encoder once per type","body":"Replaced the per-call json.dumps path with a cached per-type encoder; output byte-identical.","thinking":"The profile said serialization dominates, and the dead end showed per-field reflection is the trap — so keep the hand-written encoding but pay its setup cost once, not per request.","support_level":"explicit","isolated":false,"depends_on":[],"source_refs":["notes.md:10-22"],
|
|
281
|
-
"code_change":{ "base_artifact":"A01","variant_artifact":"A02","lang":"python","diff":"@@ -1,4 +1,5 @@\n def encode(rec):\n- return json.dumps(rec)\n+ enc = encoder_for(type(rec))\n+ return enc(rec)" },
|
|
282
268
|
"why":[{"id":"C01","statement":"Paying per-type setup once instead of per call removes the hot-path cost without changing output.","status":"supported","conditions":"Holds when the type set is small and stable across calls.","falsification":"If amortized setup ever exceeds the per-call cost it replaces.","provenance":"ai-executed","dependencies":[]}],
|
|
283
269
|
"result":{
|
|
284
270
|
"sources":[{"quote":"p99 down 38%; output byte-identical to the baseline","ref":"figures/demo.md:14"}],
|
|
@@ -320,7 +306,6 @@
|
|
|
320
306
|
|
|
321
307
|
const nodes = DATA.nodes || [];
|
|
322
308
|
const byId = new Map(nodes.map(n => [n.id, n]));
|
|
323
|
-
const artifactById = new Map((DATA.artifacts||[]).map(a => [a.id, a]));
|
|
324
309
|
const kids = new Map();
|
|
325
310
|
nodes.forEach(n => kids.set(n.id, []));
|
|
326
311
|
let roots = [];
|
|
@@ -373,23 +358,6 @@
|
|
|
373
358
|
}
|
|
374
359
|
function chips(arr, klass){ return arr&&arr.length ? '<div class="chips">'+arr.map(x=>'<span class="chip '+(klass||"")+'">'+esc(x)+'</span>').join("")+'</div>' : ""; }
|
|
375
360
|
|
|
376
|
-
// resolve an artifact id (src/artifacts.md) to a label + a shown-not-resolved pointer tooltip
|
|
377
|
-
function artLabel(id){ const a=artifactById.get(id); return a ? (a.name||a.path||id) : id; }
|
|
378
|
-
function artTip(id){ const a=artifactById.get(id); return a ? [a.path,a.sha256,a.original_location||a.original_path].filter(Boolean).join(" · ") : ""; }
|
|
379
|
-
function renderDiff(cc){
|
|
380
|
-
const head=[];
|
|
381
|
-
if(cc.base_artifact) head.push('<span class="chip" title="'+esc(artTip(cc.base_artifact))+'">base: '+esc(artLabel(cc.base_artifact))+'</span>');
|
|
382
|
-
if(cc.variant_artifact) head.push('<span class="chip" title="'+esc(artTip(cc.variant_artifact))+'">variant: '+esc(artLabel(cc.variant_artifact))+'</span>');
|
|
383
|
-
if(cc.lang) head.push('<span class="chip ext">'+esc(cc.lang)+'</span>');
|
|
384
|
-
const chiprow = head.length?'<div class="chips">'+head.join("")+'</div>':"";
|
|
385
|
-
if(cc.diff){
|
|
386
|
-
const lc = L => /^(\+\+\+|---|diff |index )/.test(L) ? "meta" : (L[0]==="+" ? "add" : (L[0]==="-" ? "del" : (/^@@/.test(L) ? "hunk" : "ctx")));
|
|
387
|
-
const body = String(cc.diff).split("\n").map(L=>'<span class="dl '+lc(L)+'">'+esc(L||" ")+'</span>').join("");
|
|
388
|
-
return chiprow+'<pre class="diff">'+body+'</pre>';
|
|
389
|
-
}
|
|
390
|
-
return chiprow+'<div class="empty">'+esc(cc.note||"Diff not available in this checkout — base/variant scripts not present; pointers only.")+'</div>';
|
|
391
|
-
}
|
|
392
|
-
|
|
393
361
|
function renderDetail(n){
|
|
394
362
|
const d = document.getElementById("detail");
|
|
395
363
|
let h = '<div class="dhead"><span class="badge '+cls(n.type)+'">'+esc(n.type)+'</span>'+
|
|
@@ -413,11 +381,6 @@
|
|
|
413
381
|
if(n.recipe_refs&&n.recipe_refs.length) reason += recipeChips(n.recipe_refs);
|
|
414
382
|
h += block(n.thinking?"reasoning":"what", n.type, reason, true, "reason");
|
|
415
383
|
|
|
416
|
-
// changed code (unified diff) — open when present
|
|
417
|
-
if(n.code_change && (n.code_change.diff || n.code_change.base_artifact || n.code_change.variant_artifact || n.code_change.note)){
|
|
418
|
-
h += block("changed code", n.code_change.lang||"diff", renderDiff(n.code_change), true);
|
|
419
|
-
}
|
|
420
|
-
|
|
421
384
|
// result (the grounded payload) — open
|
|
422
385
|
const r = n.result||{};
|
|
423
386
|
if((r.sources&&r.sources.length)||(r.figures&&r.figures.length)||(r.tables&&r.tables.length)||(r.data&&r.data.length)){
|
package/src/installer.js
CHANGED
|
@@ -143,7 +143,11 @@ export function uninstall(opts) {
|
|
|
143
143
|
}
|
|
144
144
|
|
|
145
145
|
/**
|
|
146
|
-
* "Update" =
|
|
146
|
+
* "Update" = reconcile this agent against the FULL bundled skill set:
|
|
147
|
+
* re-install (overwrite) every tracked skill AND pull in any skills that
|
|
148
|
+
* were added to the package since the last install. Skips agents that have
|
|
149
|
+
* nothing installed — update should refresh an existing setup, not bootstrap
|
|
150
|
+
* a fresh one (use `install` for that).
|
|
147
151
|
*/
|
|
148
152
|
export function update(opts) {
|
|
149
153
|
const { agentId, local = false, cwd, quiet = false } = opts;
|
|
@@ -157,7 +161,8 @@ export function update(opts) {
|
|
|
157
161
|
if (!quiet) console.log(` (no skills tracked at ${targetDir})`);
|
|
158
162
|
return { agent: agent.id, targetDir, results: [] };
|
|
159
163
|
}
|
|
160
|
-
|
|
164
|
+
// skillIds: [] => all bundled skills; force => overwrite the tracked ones.
|
|
165
|
+
return install({ agentId, skillIds: [], local, cwd, force: true, quiet });
|
|
161
166
|
}
|
|
162
167
|
|
|
163
168
|
/**
|