agentera 0.0.0 → 3.0.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -45
- package/bundle/.agentera-npx-bundle.json +4 -0
- package/bundle/references/adapters/cursor.md +213 -0
- package/bundle/references/adapters/opencode.md +530 -0
- package/bundle/references/adapters/package-manifest-interface-model.yaml +337 -0
- package/bundle/references/adapters/package-registry.yaml +247 -0
- package/bundle/references/adapters/package-surface-characterization.md +48 -0
- package/bundle/references/adapters/runtime-adapter-characterization.md +79 -0
- package/bundle/references/adapters/runtime-adapter-interface-model.yaml +200 -0
- package/bundle/references/adapters/runtime-adapter-registry.yaml +548 -0
- package/bundle/references/adapters/runtime-feature-parity.md +189 -0
- package/bundle/references/analysis/benchmark.md +267 -0
- package/bundle/references/analysis/startup-measurement-contract.yaml +424 -0
- package/bundle/references/artifacts/artifact-registry-interface-model.yaml +288 -0
- package/bundle/references/cli/agent-ready-state-contract.yaml +950 -0
- package/bundle/references/cli/app-lifecycle-vocabulary.yaml +241 -0
- package/bundle/references/cli/audience-namespace-cli-migration.yaml +355 -0
- package/bundle/references/cli/bundle-skill-vocabulary.yaml +278 -0
- package/bundle/references/cli/capability-instruction-contract.yaml +123 -0
- package/bundle/references/cli/capability-tool-classification.yaml +53 -0
- package/bundle/references/cli/routing-execution-vocabulary.yaml +281 -0
- package/bundle/references/cli/update-channels.yaml +147 -0
- package/bundle/references/cli/vocabulary-index.yaml +160 -0
- package/bundle/references/cli/vocabulary.md +566 -0
- package/bundle/references/meta/documentation-inventory.md +43 -0
- package/bundle/references/v1-section-mapping.md +47 -0
- package/bundle/registry.json +39 -0
- package/bundle/skills/agentera/.claude-plugin/plugin.json +27 -0
- package/bundle/skills/agentera/SKILL.md +470 -0
- package/bundle/skills/agentera/agents/dokumentera.toml +6 -0
- package/bundle/skills/agentera/agents/hej.toml +6 -0
- package/bundle/skills/agentera/agents/inspektera.toml +6 -0
- package/bundle/skills/agentera/agents/inspirera.toml +6 -0
- package/bundle/skills/agentera/agents/optimera.toml +6 -0
- package/bundle/skills/agentera/agents/orkestrera.toml +6 -0
- package/bundle/skills/agentera/agents/planera.toml +6 -0
- package/bundle/skills/agentera/agents/profilera.toml +6 -0
- package/bundle/skills/agentera/agents/realisera.toml +6 -0
- package/bundle/skills/agentera/agents/resonera.toml +6 -0
- package/bundle/skills/agentera/agents/visionera.toml +6 -0
- package/bundle/skills/agentera/agents/visualisera.toml +6 -0
- package/bundle/skills/agentera/capabilities/dokumentera/instructions.md +428 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/artifacts.yaml +73 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/triggers.yaml +35 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/validation.yaml +139 -0
- package/bundle/skills/agentera/capabilities/hej/instructions.md +331 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/artifacts.yaml +69 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/exit.yaml +32 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/triggers.yaml +58 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/validation.yaml +55 -0
- package/bundle/skills/agentera/capabilities/inspektera/instructions.md +514 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/artifacts.yaml +76 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/exit.yaml +36 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/triggers.yaml +38 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/validation.yaml +113 -0
- package/bundle/skills/agentera/capabilities/inspirera/instructions.md +280 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/artifacts.yaml +24 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/exit.yaml +33 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/triggers.yaml +34 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/validation.yaml +58 -0
- package/bundle/skills/agentera/capabilities/optimera/instructions.md +437 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/artifacts.yaml +69 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/triggers.yaml +39 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/validation.yaml +91 -0
- package/bundle/skills/agentera/capabilities/orkestrera/instructions.md +433 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/artifacts.yaml +64 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/triggers.yaml +42 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/validation.yaml +107 -0
- package/bundle/skills/agentera/capabilities/planera/instructions.md +368 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/artifacts.yaml +62 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/exit.yaml +33 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/triggers.yaml +34 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/validation.yaml +61 -0
- package/bundle/skills/agentera/capabilities/profilera/instructions.md +419 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/artifacts.yaml +18 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/triggers.yaml +45 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/validation.yaml +57 -0
- package/bundle/skills/agentera/capabilities/realisera/instructions.md +403 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/artifacts.yaml +80 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/triggers.yaml +39 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/validation.yaml +110 -0
- package/bundle/skills/agentera/capabilities/resonera/instructions.md +329 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/artifacts.yaml +47 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/triggers.yaml +46 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/validation.yaml +77 -0
- package/bundle/skills/agentera/capabilities/visionera/instructions.md +309 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/artifacts.yaml +57 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/triggers.yaml +41 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/validation.yaml +74 -0
- package/bundle/skills/agentera/capabilities/visualisera/instructions.md +400 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/artifacts.yaml +44 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/triggers.yaml +33 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/validation.yaml +80 -0
- package/bundle/skills/agentera/capability_schema_contract.yaml +385 -0
- package/bundle/skills/agentera/protocol.yaml +463 -0
- package/bundle/skills/agentera/references/contract.md +1039 -0
- package/bundle/skills/agentera/schemas/artifacts/changelog.yaml +60 -0
- package/bundle/skills/agentera/schemas/artifacts/decisions.yaml +461 -0
- package/bundle/skills/agentera/schemas/artifacts/design.yaml +55 -0
- package/bundle/skills/agentera/schemas/artifacts/docs.yaml +402 -0
- package/bundle/skills/agentera/schemas/artifacts/experiments.yaml +373 -0
- package/bundle/skills/agentera/schemas/artifacts/health.yaml +484 -0
- package/bundle/skills/agentera/schemas/artifacts/objective.yaml +399 -0
- package/bundle/skills/agentera/schemas/artifacts/plan.yaml +342 -0
- package/bundle/skills/agentera/schemas/artifacts/progress.yaml +325 -0
- package/bundle/skills/agentera/schemas/artifacts/todo.yaml +110 -0
- package/bundle/skills/agentera/schemas/artifacts/vision.yaml +262 -0
- package/bundle/skills/hej/.claude-plugin/plugin.json +6 -0
- package/bundle/skills/hej/SKILL.md +69 -0
- package/bundle/skills/hej/agents/hej.toml +11 -0
- package/bundle/skills/hej/agents/openai.yaml +8 -0
- package/dist/analytics/extractCorpus.js +1791 -0
- package/dist/analytics/extractCorpus.js.map +1 -0
- package/dist/analytics/usageStats.js +487 -0
- package/dist/analytics/usageStats.js.map +1 -0
- package/dist/bin/agentera.js +4 -0
- package/dist/bin/agentera.js.map +1 -0
- package/dist/cli/appContext.js +226 -0
- package/dist/cli/appContext.js.map +1 -0
- package/dist/cli/argvalidate.js +41 -0
- package/dist/cli/argvalidate.js.map +1 -0
- package/dist/cli/capabilityContext.js +2421 -0
- package/dist/cli/capabilityContext.js.map +1 -0
- package/dist/cli/commands/backfill.js +84 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/capability.js +44 -0
- package/dist/cli/commands/capability.js.map +1 -0
- package/dist/cli/commands/compact.js +148 -0
- package/dist/cli/commands/compact.js.map +1 -0
- package/dist/cli/commands/doctor.js +180 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/lint.js +179 -0
- package/dist/cli/commands/lint.js.map +1 -0
- package/dist/cli/commands/prime.js +544 -0
- package/dist/cli/commands/prime.js.map +1 -0
- package/dist/cli/commands/query.js +346 -0
- package/dist/cli/commands/query.js.map +1 -0
- package/dist/cli/commands/report.js +210 -0
- package/dist/cli/commands/report.js.map +1 -0
- package/dist/cli/commands/schema.js +306 -0
- package/dist/cli/commands/schema.js.map +1 -0
- package/dist/cli/commands/state.js +1012 -0
- package/dist/cli/commands/state.js.map +1 -0
- package/dist/cli/commands/upgrade.js +48 -0
- package/dist/cli/commands/upgrade.js.map +1 -0
- package/dist/cli/commands/validate.js +519 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/commands/verify.js +204 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/dispatch.js +958 -0
- package/dist/cli/dispatch.js.map +1 -0
- package/dist/cli/orientation.js +595 -0
- package/dist/cli/orientation.js.map +1 -0
- package/dist/cli/prime-blob.js +3 -0
- package/dist/cli/prime-blob.js.map +1 -0
- package/dist/cli/stateQuery.js +292 -0
- package/dist/cli/stateQuery.js.map +1 -0
- package/dist/cli/structured.js +18 -0
- package/dist/cli/structured.js.map +1 -0
- package/dist/core/difflib.js +274 -0
- package/dist/core/difflib.js.map +1 -0
- package/dist/core/git.js +43 -0
- package/dist/core/git.js.map +1 -0
- package/dist/core/paths.js +50 -0
- package/dist/core/paths.js.map +1 -0
- package/dist/core/pyjson.js +101 -0
- package/dist/core/pyjson.js.map +1 -0
- package/dist/core/sourceRoot.js +72 -0
- package/dist/core/sourceRoot.js.map +1 -0
- package/dist/core/toml.js +11 -0
- package/dist/core/toml.js.map +1 -0
- package/dist/core/yaml.js +25 -0
- package/dist/core/yaml.js.map +1 -0
- package/dist/eval/evalSkills.js +258 -0
- package/dist/eval/evalSkills.js.map +1 -0
- package/dist/eval/semanticEval.js +148 -0
- package/dist/eval/semanticEval.js.map +1 -0
- package/dist/eval/semanticFixtures.js +227 -0
- package/dist/eval/semanticFixtures.js.map +1 -0
- package/dist/hooks/common.js +160 -0
- package/dist/hooks/common.js.map +1 -0
- package/dist/hooks/compaction.js +935 -0
- package/dist/hooks/compaction.js.map +1 -0
- package/dist/hooks/cursorPreToolUse.js +19 -0
- package/dist/hooks/cursorPreToolUse.js.map +1 -0
- package/dist/hooks/cursorSessionStart.js +71 -0
- package/dist/hooks/cursorSessionStart.js.map +1 -0
- package/dist/hooks/sessionStart.js +209 -0
- package/dist/hooks/sessionStart.js.map +1 -0
- package/dist/hooks/sessionStop.js +212 -0
- package/dist/hooks/sessionStop.js.map +1 -0
- package/dist/hooks/validateArtifact.js +933 -0
- package/dist/hooks/validateArtifact.js.map +1 -0
- package/dist/registries/artifactRegistry.js +206 -0
- package/dist/registries/artifactRegistry.js.map +1 -0
- package/dist/registries/capabilityContract.js +310 -0
- package/dist/registries/capabilityContract.js.map +1 -0
- package/dist/registries/packageRegistry.js +641 -0
- package/dist/registries/packageRegistry.js.map +1 -0
- package/dist/registries/runtimeAdapterRegistry.js +315 -0
- package/dist/registries/runtimeAdapterRegistry.js.map +1 -0
- package/dist/setup/codex.js +1056 -0
- package/dist/setup/codex.js.map +1 -0
- package/dist/setup/copilot.js +227 -0
- package/dist/setup/copilot.js.map +1 -0
- package/dist/setup/cursor.js +127 -0
- package/dist/setup/cursor.js.map +1 -0
- package/dist/setup/doctor.js +1276 -0
- package/dist/setup/doctor.js.map +1 -0
- package/dist/state/installRoot.js +279 -0
- package/dist/state/installRoot.js.map +1 -0
- package/dist/state/progressCommit.js +289 -0
- package/dist/state/progressCommit.js.map +1 -0
- package/dist/state/startupAnalysis.js +1953 -0
- package/dist/state/startupAnalysis.js.map +1 -0
- package/dist/upgrade/appModel.js +189 -0
- package/dist/upgrade/appModel.js.map +1 -0
- package/dist/upgrade/channels.js +208 -0
- package/dist/upgrade/channels.js.map +1 -0
- package/dist/upgrade/compatibility.js +201 -0
- package/dist/upgrade/compatibility.js.map +1 -0
- package/dist/upgrade/doctor.js +373 -0
- package/dist/upgrade/doctor.js.map +1 -0
- package/dist/upgrade/migrateArtifactsV2ToV3.js +332 -0
- package/dist/upgrade/migrateArtifactsV2ToV3.js.map +1 -0
- package/dist/upgrade/runtimeMigration.js +484 -0
- package/dist/upgrade/runtimeMigration.js.map +1 -0
- package/dist/upgrade/upgradeCommands.js +36 -0
- package/dist/upgrade/upgradeCommands.js.map +1 -0
- package/dist/upgrade/upgradeOrchestrator.js +299 -0
- package/dist/upgrade/upgradeOrchestrator.js.map +1 -0
- package/dist/upgrade/versionResolution.js +179 -0
- package/dist/upgrade/versionResolution.js.map +1 -0
- package/dist/validate/appHomeContract.js +150 -0
- package/dist/validate/appHomeContract.js.map +1 -0
- package/dist/validate/capability.js +412 -0
- package/dist/validate/capability.js.map +1 -0
- package/dist/validate/crossCapability.js +145 -0
- package/dist/validate/crossCapability.js.map +1 -0
- package/dist/validate/lifecycleAdapters.js +772 -0
- package/dist/validate/lifecycleAdapters.js.map +1 -0
- package/dist/validate/selfAudit.js +107 -0
- package/dist/validate/selfAudit.js.map +1 -0
- package/package.json +28 -8
- package/LICENSE +0 -201
- package/bin/agentera.mjs +0 -50
- package/lib/exec.mjs +0 -116
- package/lib/resolve.mjs +0 -129
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# Experiments Artifact Schema
|
|
2
|
+
#
|
|
3
|
+
# Experiment log maintained by optimera. Each experiment records hypothesis,
|
|
4
|
+
# method, metric results, kept/discarded status, and conclusions. Lives
|
|
5
|
+
# alongside OBJECTIVE.md in .agentera/optimera/<name>/.
|
|
6
|
+
#
|
|
7
|
+
# Structural pattern: UPPER_CASE groups with numbered entries and stable IDs,
|
|
8
|
+
# following capability_schema_contract.yaml and protocol.yaml conventions.
|
|
9
|
+
#
|
|
10
|
+
# ── Field-by-field mapping: v1 EXPERIMENTS.md → v2 experiments.yaml ──
|
|
11
|
+
#
|
|
12
|
+
# v1 Field v2 Field v2 ID
|
|
13
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
14
|
+
# ## Experiment N · date · label EXPERIMENT.number EX1
|
|
15
|
+
# (date from heading) EXPERIMENT.date EX2
|
|
16
|
+
# (label from heading) EXPERIMENT.label EX3
|
|
17
|
+
# **Hypothesis**: <text> EXPERIMENT.hypothesis EX4
|
|
18
|
+
# **Method**: <text> EXPERIMENT.method EX5
|
|
19
|
+
# **Change**: <text> EXPERIMENT.change EX6
|
|
20
|
+
# **Metric**: <text> + tables EXPERIMENT.metric EX7
|
|
21
|
+
# **Regression**: <text> EXPERIMENT.regression EX8
|
|
22
|
+
# **Status**: kept/discarded/baseline EXPERIMENT.status EX9
|
|
23
|
+
# **Conclusion**: <text> EXPERIMENT.conclusion EX10
|
|
24
|
+
# **Next**: <text> EXPERIMENT.next EX11
|
|
25
|
+
# **Escalation**: <text> EXPERIMENT.escalation EX12
|
|
26
|
+
# **Target progress**: <text> EXPERIMENT.target_progress EX13
|
|
27
|
+
# **Brainstorm iteration log** EXPERIMENT.iteration_log EX14
|
|
28
|
+
# ## Closure · date CLOSURE.date EC1
|
|
29
|
+
# **Final value**: <value> CLOSURE.final_value EC2
|
|
30
|
+
# **Target**: <target> CLOSURE.target EC3
|
|
31
|
+
# **Reason**: <text> CLOSURE.reason EC4
|
|
32
|
+
# ## Archived Experiments (implicit section) —
|
|
33
|
+
# - EXP-N: result summary ARCHIVE.summary EA1
|
|
34
|
+
#
|
|
35
|
+
# Protocol references:
|
|
36
|
+
# status visual tokens: VT1-VT3 (protocol.yaml VISUAL_TOKENS)
|
|
37
|
+
# trend tokens: VT12-VT13 (protocol.yaml VISUAL_TOKENS)
|
|
38
|
+
|
|
39
|
+
meta:
|
|
40
|
+
name: experiments
|
|
41
|
+
version: "1.0.0"
|
|
42
|
+
description: >-
|
|
43
|
+
Experiment log for optimera objectives. Produced and consumed by optimera.
|
|
44
|
+
Lives in .agentera/optimera/<name>/ alongside the objective.
|
|
45
|
+
artifact_type: agent_facing
|
|
46
|
+
path: ".agentera/optimera/<name>/experiments.yaml"
|
|
47
|
+
producer: optimera
|
|
48
|
+
consumers: [hej, optimera]
|
|
49
|
+
format: yaml
|
|
50
|
+
|
|
51
|
+
GROUP_PREFIXES:
|
|
52
|
+
BUDGET: EB
|
|
53
|
+
COMPACTION: EX_CMP
|
|
54
|
+
EXPERIMENT: EX
|
|
55
|
+
CLOSURE: EC
|
|
56
|
+
ARCHIVE: EA
|
|
57
|
+
VALIDATION: EV
|
|
58
|
+
|
|
59
|
+
BUDGET:
|
|
60
|
+
1:
|
|
61
|
+
id: EB1
|
|
62
|
+
scope: per_experiment_entry
|
|
63
|
+
max_words: 300
|
|
64
|
+
description: >-
|
|
65
|
+
Maximum words per experiment entry. Budgets are guidelines, not
|
|
66
|
+
hard blockers.
|
|
67
|
+
2:
|
|
68
|
+
id: EB2
|
|
69
|
+
scope: full_file
|
|
70
|
+
max_words: 2500
|
|
71
|
+
description: >-
|
|
72
|
+
Maximum words for the full file. If a write would exceed this
|
|
73
|
+
budget, compact first (see COMPACTION group).
|
|
74
|
+
|
|
75
|
+
COMPACTION:
|
|
76
|
+
1:
|
|
77
|
+
id: EX_CMP1
|
|
78
|
+
rule: uniform_10_40_50
|
|
79
|
+
description: >-
|
|
80
|
+
Growing artifacts follow a uniform 10/40/50 rule: 10 full-detail
|
|
81
|
+
entries, 40 one-line archive entries, drop beyond 50 total.
|
|
82
|
+
2:
|
|
83
|
+
id: EX_CMP2
|
|
84
|
+
full_detail:
|
|
85
|
+
max_entries: 10
|
|
86
|
+
selection: "10 most recent experiments"
|
|
87
|
+
format: "Standard experiment entry format (all fields)"
|
|
88
|
+
description: >-
|
|
89
|
+
The 10 most recent experiments are kept in full detail with
|
|
90
|
+
all fields.
|
|
91
|
+
3:
|
|
92
|
+
id: EX_CMP3
|
|
93
|
+
one_line_archive:
|
|
94
|
+
max_entries: 40
|
|
95
|
+
selection: "Experiments 11 through 50"
|
|
96
|
+
format: "EXP-N: <=15-word result summary"
|
|
97
|
+
fields: [number, summary]
|
|
98
|
+
description: >-
|
|
99
|
+
Experiments 11 through 50 are collapsed to one-line format under
|
|
100
|
+
the archive section. One-line summaries preserve experiment number
|
|
101
|
+
and result.
|
|
102
|
+
4:
|
|
103
|
+
id: EX_CMP4
|
|
104
|
+
dropped:
|
|
105
|
+
threshold: 50
|
|
106
|
+
action: "Removed entirely"
|
|
107
|
+
description: >-
|
|
108
|
+
Experiments older than 50 are dropped entirely.
|
|
109
|
+
5:
|
|
110
|
+
id: EX_CMP5
|
|
111
|
+
trigger: "When optimera writes a new experiment"
|
|
112
|
+
description: >-
|
|
113
|
+
Compaction runs when the producing skill writes a new entry.
|
|
114
|
+
If >10 full-detail entries exist, collapse the oldest to one-line.
|
|
115
|
+
If >40 one-line entries exist, drop the oldest.
|
|
116
|
+
6:
|
|
117
|
+
id: EX_CMP6
|
|
118
|
+
archive_section:
|
|
119
|
+
heading: "Archived Experiments"
|
|
120
|
+
placement: "Below recent experiments"
|
|
121
|
+
description: >-
|
|
122
|
+
Archive entries sit below the recent experiments section.
|
|
123
|
+
|
|
124
|
+
EXPERIMENT:
|
|
125
|
+
1:
|
|
126
|
+
id: EX1
|
|
127
|
+
field: number
|
|
128
|
+
type: integer
|
|
129
|
+
required: true
|
|
130
|
+
description: >-
|
|
131
|
+
Sequential experiment number starting from 0. Experiment 0 is
|
|
132
|
+
the baseline measurement. Monotonically increasing.
|
|
133
|
+
validation:
|
|
134
|
+
- "Must be a non-negative integer"
|
|
135
|
+
- "Must be unique across active and archived entries"
|
|
136
|
+
2:
|
|
137
|
+
id: EX2
|
|
138
|
+
field: date
|
|
139
|
+
type: string
|
|
140
|
+
format: "YYYY-MM-DD HH:MM"
|
|
141
|
+
required: true
|
|
142
|
+
description: "When the experiment was run."
|
|
143
|
+
validation:
|
|
144
|
+
- "Must match YYYY-MM-DD HH:MM format"
|
|
145
|
+
3:
|
|
146
|
+
id: EX3
|
|
147
|
+
field: label
|
|
148
|
+
type: string
|
|
149
|
+
required: true
|
|
150
|
+
description: >-
|
|
151
|
+
Short descriptive label for the experiment's variable or
|
|
152
|
+
approach (e.g., 'contract.md lazy-reference', 'baseline').
|
|
153
|
+
validation:
|
|
154
|
+
- "Non-empty string"
|
|
155
|
+
4:
|
|
156
|
+
id: EX4
|
|
157
|
+
field: hypothesis
|
|
158
|
+
type: string
|
|
159
|
+
required: true
|
|
160
|
+
description: >-
|
|
161
|
+
What the experiment tests. The pre-registered hypothesis that
|
|
162
|
+
the metric result will confirm or refute.
|
|
163
|
+
validation:
|
|
164
|
+
- "Non-empty string"
|
|
165
|
+
5:
|
|
166
|
+
id: EX5
|
|
167
|
+
field: method
|
|
168
|
+
type: string
|
|
169
|
+
required: true
|
|
170
|
+
description: >-
|
|
171
|
+
How the experiment was conducted. Harness configuration, model,
|
|
172
|
+
flags, substrate setup, and implementation approach.
|
|
173
|
+
validation:
|
|
174
|
+
- "Non-empty string"
|
|
175
|
+
6:
|
|
176
|
+
id: EX6
|
|
177
|
+
field: change
|
|
178
|
+
type: string
|
|
179
|
+
required: true
|
|
180
|
+
description: >-
|
|
181
|
+
What was changed. File paths, line ranges, and description of
|
|
182
|
+
the modification tested.
|
|
183
|
+
validation:
|
|
184
|
+
- "Non-empty string"
|
|
185
|
+
7:
|
|
186
|
+
id: EX7
|
|
187
|
+
field: metric
|
|
188
|
+
type: map
|
|
189
|
+
required: true
|
|
190
|
+
description: >-
|
|
191
|
+
Metric results. Includes primary value, comparison table
|
|
192
|
+
(baseline vs experiment), delta, and breakdown fields
|
|
193
|
+
(peak_context, output_total, turns, tool_uses, etc.).
|
|
194
|
+
sub_fields:
|
|
195
|
+
- field: primary_value
|
|
196
|
+
type: string
|
|
197
|
+
required: true
|
|
198
|
+
- field: delta_vs_baseline
|
|
199
|
+
type: string
|
|
200
|
+
required: true
|
|
201
|
+
- field: breakdown
|
|
202
|
+
type: map
|
|
203
|
+
required: false
|
|
204
|
+
validation:
|
|
205
|
+
- "primary_value is present"
|
|
206
|
+
- "delta_vs_baseline is present"
|
|
207
|
+
8:
|
|
208
|
+
id: EX8
|
|
209
|
+
field: regression
|
|
210
|
+
type: string
|
|
211
|
+
required: false
|
|
212
|
+
description: >-
|
|
213
|
+
Regression check results. Whether linter, tests, and eval
|
|
214
|
+
passed after the change.
|
|
215
|
+
9:
|
|
216
|
+
id: EX9
|
|
217
|
+
field: status
|
|
218
|
+
type: string
|
|
219
|
+
required: true
|
|
220
|
+
description: >-
|
|
221
|
+
Experiment outcome. 'kept' if the experiment improved the metric
|
|
222
|
+
and passed gates; 'discarded' if it did not; 'baseline' for
|
|
223
|
+
Experiment 0 reference measurements.
|
|
224
|
+
validation:
|
|
225
|
+
- "Must be one of: kept, discarded, baseline"
|
|
226
|
+
10:
|
|
227
|
+
id: EX10
|
|
228
|
+
field: conclusion
|
|
229
|
+
type: string
|
|
230
|
+
required: true
|
|
231
|
+
description: >-
|
|
232
|
+
What was learned. Interprets the metric result relative to the
|
|
233
|
+
hypothesis, explains variance, and draws actionable insight.
|
|
234
|
+
validation:
|
|
235
|
+
- "Non-empty string"
|
|
236
|
+
11:
|
|
237
|
+
id: EX11
|
|
238
|
+
field: next
|
|
239
|
+
type: string
|
|
240
|
+
required: false
|
|
241
|
+
description: >-
|
|
242
|
+
What the next experiment should target. Suggests follow-up
|
|
243
|
+
based on findings.
|
|
244
|
+
12:
|
|
245
|
+
id: EX12
|
|
246
|
+
field: escalation
|
|
247
|
+
type: string
|
|
248
|
+
required: false
|
|
249
|
+
description: >-
|
|
250
|
+
Escalation text when N consecutive experiments are discarded.
|
|
251
|
+
Explains why the optimization is stuck and recommends
|
|
252
|
+
deliberation or harness redesign.
|
|
253
|
+
13:
|
|
254
|
+
id: EX13
|
|
255
|
+
field: target_progress
|
|
256
|
+
type: string
|
|
257
|
+
required: false
|
|
258
|
+
description: >-
|
|
259
|
+
Cumulative progress toward the objective target. Shows current
|
|
260
|
+
value, baseline, percentage change, and remaining gap.
|
|
261
|
+
14:
|
|
262
|
+
id: EX14
|
|
263
|
+
field: iteration_log
|
|
264
|
+
type: list[map]
|
|
265
|
+
required: false
|
|
266
|
+
description: >-
|
|
267
|
+
Harness refinement log for baseline experiments. Each entry
|
|
268
|
+
records a run number, issue encountered, and fix applied.
|
|
269
|
+
|
|
270
|
+
CLOSURE:
|
|
271
|
+
1:
|
|
272
|
+
id: EC1
|
|
273
|
+
field: date
|
|
274
|
+
type: string
|
|
275
|
+
format: "ISO-8601 UTC"
|
|
276
|
+
required: false
|
|
277
|
+
description: >-
|
|
278
|
+
Timestamp when the objective was closed. Per the objective
|
|
279
|
+
closure contract. One closure entry per experiments file.
|
|
280
|
+
2:
|
|
281
|
+
id: EC2
|
|
282
|
+
field: final_value
|
|
283
|
+
type: string
|
|
284
|
+
required: false
|
|
285
|
+
description: >-
|
|
286
|
+
Final metric value at closure. Matches the objective's
|
|
287
|
+
closure final_value.
|
|
288
|
+
3:
|
|
289
|
+
id: EC3
|
|
290
|
+
field: target
|
|
291
|
+
type: string
|
|
292
|
+
required: false
|
|
293
|
+
description: >-
|
|
294
|
+
Target that was met or abandoned at closure.
|
|
295
|
+
4:
|
|
296
|
+
id: EC4
|
|
297
|
+
field: reason
|
|
298
|
+
type: string
|
|
299
|
+
required: false
|
|
300
|
+
description: >-
|
|
301
|
+
Why the objective was closed. Per the objective closure contract.
|
|
302
|
+
|
|
303
|
+
ARCHIVE:
|
|
304
|
+
1:
|
|
305
|
+
id: EA1
|
|
306
|
+
field: summary
|
|
307
|
+
type: string
|
|
308
|
+
required: true
|
|
309
|
+
description: >-
|
|
310
|
+
One-line archive entry for a compacted experiment. Format:
|
|
311
|
+
'EXP-N: <=15-word result summary'.
|
|
312
|
+
Preserves experiment number and outcome.
|
|
313
|
+
validation:
|
|
314
|
+
- "Must include experiment number"
|
|
315
|
+
- "Summary portion must be <=15 words"
|
|
316
|
+
|
|
317
|
+
VALIDATION:
|
|
318
|
+
1:
|
|
319
|
+
id: EV1
|
|
320
|
+
rule: unique_experiment_numbers
|
|
321
|
+
severity: error
|
|
322
|
+
description: >-
|
|
323
|
+
Experiment numbers must be unique and sequential.
|
|
324
|
+
checks:
|
|
325
|
+
- "All experiment numbers are unique across active and archived entries"
|
|
326
|
+
- "Experiment numbers are sequential starting from 0"
|
|
327
|
+
2:
|
|
328
|
+
id: EV2
|
|
329
|
+
rule: required_fields
|
|
330
|
+
severity: error
|
|
331
|
+
description: >-
|
|
332
|
+
Each full-detail experiment entry must have: number, date, label,
|
|
333
|
+
hypothesis, method, change, metric, status, and conclusion.
|
|
334
|
+
checks:
|
|
335
|
+
- "number is present and is a non-negative integer"
|
|
336
|
+
- "date is present and matches YYYY-MM-DD HH:MM"
|
|
337
|
+
- "label is present and non-empty"
|
|
338
|
+
- "hypothesis is present and non-empty"
|
|
339
|
+
- "method is present and non-empty"
|
|
340
|
+
- "change is present and non-empty"
|
|
341
|
+
- "metric is present with primary_value and delta_vs_baseline"
|
|
342
|
+
- "status is present and is 'kept', 'discarded', or 'baseline'"
|
|
343
|
+
- "conclusion is present and non-empty"
|
|
344
|
+
3:
|
|
345
|
+
id: EV3
|
|
346
|
+
rule: compaction_thresholds
|
|
347
|
+
severity: warning
|
|
348
|
+
description: >-
|
|
349
|
+
Warn if the file exceeds compaction thresholds: >10 full-detail
|
|
350
|
+
entries or >40 one-line archive entries.
|
|
351
|
+
checks:
|
|
352
|
+
- "Full-detail entries <= 10"
|
|
353
|
+
- "One-line archive entries <= 40"
|
|
354
|
+
- "Total entries <= 50"
|
|
355
|
+
4:
|
|
356
|
+
id: EV4
|
|
357
|
+
rule: word_budget
|
|
358
|
+
severity: advisory
|
|
359
|
+
description: >-
|
|
360
|
+
Advisory check. Per-experiment entry should be <=300 words;
|
|
361
|
+
full file should be <=2500 words.
|
|
362
|
+
checks:
|
|
363
|
+
- "Per-experiment word count <= 300"
|
|
364
|
+
- "Total file word count <= 2500"
|
|
365
|
+
5:
|
|
366
|
+
id: EV5
|
|
367
|
+
rule: baseline_first
|
|
368
|
+
severity: warning
|
|
369
|
+
description: >-
|
|
370
|
+
The first experiment (number 0) should have status 'baseline'
|
|
371
|
+
and serve as the reference measurement.
|
|
372
|
+
checks:
|
|
373
|
+
- "Experiment 0 has status 'baseline'"
|