@trohde/earos 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +1 -1
  2. package/assets/init/.agents/skills/earos-assess/SKILL.md +2 -2
  3. package/assets/init/.agents/skills/earos-assess/references/output-templates.md +61 -67
  4. package/assets/init/.agents/skills/earos-calibrate/SKILL.md +1 -1
  5. package/assets/init/.agents/skills/earos-calibrate/references/calibration-protocol.md +1 -1
  6. package/assets/init/.agents/skills/earos-create/references/validation-checklist.md +2 -2
  7. package/assets/init/.agents/skills/earos-profile-author/SKILL.md +1 -1
  8. package/assets/init/CLAUDE.md +46 -8
  9. package/assets/init/README.md +5 -6
  10. package/assets/init/core/core-meta-rubric.yaml +1 -1
  11. package/assets/init/docs/getting-started.md +20 -42
  12. package/assets/init/docs/onboarding/agent-assisted.md +2 -2
  13. package/assets/init/docs/onboarding/first-assessment.md +10 -4
  14. package/assets/init/docs/onboarding/governed-review.md +26 -16
  15. package/assets/init/docs/onboarding/overview.md +1 -49
  16. package/assets/init/docs/onboarding/scaling-optimization.md +2 -2
  17. package/assets/init/docs/profile-authoring-guide.md +6 -10
  18. package/assets/init/docs/terminology.md +4 -4
  19. package/assets/init/profiles/reference-architecture.yaml +1 -1
  20. package/assets/init/standard/EAROS.md +6 -16
  21. package/bin.js +1 -1
  22. package/dist/assets/{_basePickBy-BVu6YmSW.js → _basePickBy-PmSUrUsK.js} +1 -1
  23. package/dist/assets/{_baseUniq-CWRzQDz_.js → _baseUniq-HuZouVIz.js} +1 -1
  24. package/dist/assets/{arc-CyDBhtDM.js → arc-CJFxtF3d.js} +1 -1
  25. package/dist/assets/{architectureDiagram-2XIMDMQ5-BH6O4dvN.js → architectureDiagram-2XIMDMQ5-XA-oU2UG.js} +1 -1
  26. package/dist/assets/{blockDiagram-WCTKOSBZ-2xmwdjpg.js → blockDiagram-WCTKOSBZ-Oxp-wAST.js} +1 -1
  27. package/dist/assets/{c4Diagram-IC4MRINW-BNmPRFJF.js → c4Diagram-IC4MRINW-D8m5hQH9.js} +1 -1
  28. package/dist/assets/channel-SoktpVBQ.js +1 -0
  29. package/dist/assets/{chunk-4BX2VUAB-DGQTvirp.js → chunk-4BX2VUAB-D2kBTn2O.js} +1 -1
  30. package/dist/assets/{chunk-55IACEB6-DNMAQAC_.js → chunk-55IACEB6-Dxqrf5oZ.js} +1 -1
  31. package/dist/assets/{chunk-FMBD7UC4-BJbVTQ5o.js → chunk-FMBD7UC4-DoOEFFQC.js} +1 -1
  32. package/dist/assets/{chunk-JSJVCQXG-BCxUL74A.js → chunk-JSJVCQXG-BerphV2K.js} +1 -1
  33. package/dist/assets/{chunk-KX2RTZJC-H7wWZOfz.js → chunk-KX2RTZJC-CxUAqT05.js} +1 -1
  34. package/dist/assets/{chunk-NQ4KR5QH-BK4RlTQF.js → chunk-NQ4KR5QH-fCqZgFkU.js} +1 -1
  35. package/dist/assets/{chunk-QZHKN3VN-0chxDV5g.js → chunk-QZHKN3VN-HlpHnJEy.js} +1 -1
  36. package/dist/assets/{chunk-WL4C6EOR-DexfQ-AV.js → chunk-WL4C6EOR-D9yxAHyd.js} +1 -1
  37. package/dist/assets/classDiagram-VBA2DB6C-BT2AdZTe.js +1 -0
  38. package/dist/assets/classDiagram-v2-RAHNMMFH-BT2AdZTe.js +1 -0
  39. package/dist/assets/clone-DOjIfi5r.js +1 -0
  40. package/dist/assets/{cose-bilkent-S5V4N54A-DS2IOCfZ.js → cose-bilkent-S5V4N54A-F5xOBvqW.js} +1 -1
  41. package/dist/assets/{dagre-KLK3FWXG-BbSoTTa3.js → dagre-KLK3FWXG-CD3BTpHv.js} +1 -1
  42. package/dist/assets/{diagram-E7M64L7V-C9TvYgv0.js → diagram-E7M64L7V-C3D9MCay.js} +1 -1
  43. package/dist/assets/{diagram-IFDJBPK2-DowUMWrg.js → diagram-IFDJBPK2-zJBVM-GK.js} +1 -1
  44. package/dist/assets/{diagram-P4PSJMXO-BL6nrnQF.js → diagram-P4PSJMXO-BrmFZOLB.js} +1 -1
  45. package/dist/assets/{erDiagram-INFDFZHY-rXPRl8VM.js → erDiagram-INFDFZHY-aSMhKiV2.js} +1 -1
  46. package/dist/assets/{flowDiagram-PKNHOUZH-DBRM99-W.js → flowDiagram-PKNHOUZH-DwgX7l8F.js} +1 -1
  47. package/dist/assets/{ganttDiagram-A5KZAMGK-INcWFsBT.js → ganttDiagram-A5KZAMGK-C57Hz6QW.js} +1 -1
  48. package/dist/assets/{gitGraphDiagram-K3NZZRJ6-DMwpfE91.js → gitGraphDiagram-K3NZZRJ6-CuchqqGh.js} +1 -1
  49. package/dist/assets/{graph-DLQn37b-.js → graph-CPFGBV5J.js} +1 -1
  50. package/dist/assets/{index-BFFITMT8.js → index-DMt1cpG6.js} +124 -124
  51. package/dist/assets/{infoDiagram-LFFYTUFH-B0f4TWRM.js → infoDiagram-LFFYTUFH-Dd_5tfX7.js} +1 -1
  52. package/dist/assets/{ishikawaDiagram-PHBUUO56-CsU6XimZ.js → ishikawaDiagram-PHBUUO56-DwosSEvT.js} +1 -1
  53. package/dist/assets/{journeyDiagram-4ABVD52K-CQ7ibNib.js → journeyDiagram-4ABVD52K-BuCxcsX0.js} +1 -1
  54. package/dist/assets/{kanban-definition-K7BYSVSG-DzEN7THt.js → kanban-definition-K7BYSVSG-DF_1UCkW.js} +1 -1
  55. package/dist/assets/{layout-C0dvb42R.js → layout-DIcS6m1g.js} +1 -1
  56. package/dist/assets/{linear-j4a8mGj7.js → linear-BXkwBhoJ.js} +1 -1
  57. package/dist/assets/{mindmap-definition-YRQLILUH-DP8iEuCf.js → mindmap-definition-YRQLILUH-DcDvYagd.js} +1 -1
  58. package/dist/assets/{pieDiagram-SKSYHLDU-BpIAXgAm.js → pieDiagram-SKSYHLDU-BmeDeWDM.js} +1 -1
  59. package/dist/assets/{quadrantDiagram-337W2JSQ-DrpXn5Eg.js → quadrantDiagram-337W2JSQ-3zfjULUM.js} +1 -1
  60. package/dist/assets/{requirementDiagram-Z7DCOOCP-Bg7EwHlG.js → requirementDiagram-Z7DCOOCP-B2wQMJpq.js} +1 -1
  61. package/dist/assets/{sankeyDiagram-WA2Y5GQK-BWagRs1F.js → sankeyDiagram-WA2Y5GQK-__kKlCTq.js} +1 -1
  62. package/dist/assets/{sequenceDiagram-2WXFIKYE-q5jwhivG.js → sequenceDiagram-2WXFIKYE-B7O81Vih.js} +1 -1
  63. package/dist/assets/{stateDiagram-RAJIS63D-B_J9pE-2.js → stateDiagram-RAJIS63D-CcJaDrAK.js} +1 -1
  64. package/dist/assets/stateDiagram-v2-FVOUBMTO-B2goOPt-.js +1 -0
  65. package/dist/assets/{timeline-definition-YZTLITO2-dv0jgQ0z.js → timeline-definition-YZTLITO2-DSaQQqIU.js} +1 -1
  66. package/dist/assets/treemap-KZPCXAKY-9Hcrd8XD.js +162 -0
  67. package/dist/assets/{vennDiagram-LZ73GAT5-BdO5RgRZ.js → vennDiagram-LZ73GAT5-BqHNyca2.js} +1 -1
  68. package/dist/assets/{xychartDiagram-JWTSCODW-CpDVe-8v.js → xychartDiagram-JWTSCODW-BqeYf6Fk.js} +1 -1
  69. package/dist/index.html +1 -1
  70. package/package.json +1 -1
  71. package/dist/assets/channel-CiySTNoJ.js +0 -1
  72. package/dist/assets/classDiagram-VBA2DB6C-D7luWJQn.js +0 -1
  73. package/dist/assets/classDiagram-v2-RAHNMMFH-D7luWJQn.js +0 -1
  74. package/dist/assets/clone-ylgRbd3D.js +0 -1
  75. package/dist/assets/stateDiagram-v2-FVOUBMTO-Q_1GcybB.js +0 -1
  76. package/dist/assets/treemap-KZPCXAKY-Dt1dkIE7.js +0 -162
package/README.md CHANGED
@@ -55,7 +55,7 @@ The workspace is **agent-agnostic** — the `.agents/skills/` directory works wi
55
55
  |---------|-------------|
56
56
  | `earos` | Start the web editor (Express server, opens browser) |
57
57
  | `earos init [dir] [--icons]` | Scaffold a complete EaROS workspace in `dir` and optionally download architecture icon packages from AWS, Azure, and GCP into `icons/`, with stable aliases in `icons/aws/`, `icons/azure/`, and `icons/gcp/` |
58
- | `earos validate <file>` | Validate a rubric or evaluation YAML against EaROS schemas (exit 0/1) |
58
+ | `earos validate <file>` | Validate any EaROS YAML (rubric, evaluation, or artifact) against schemas (exit 0/1) |
59
59
  | `earos manifest` | Regenerate `earos.manifest.yaml` by scanning the filesystem |
60
60
  | `earos manifest add <file>` | Add a single file to the manifest |
61
61
  | `earos manifest check` | Verify the manifest matches the filesystem (exits non-zero on drift) |
@@ -156,11 +156,11 @@ Quick self-checks:
156
156
 
157
157
  ### Step 8 — Status Determination
158
158
 
159
- **Gates first** — check gate criteria before computing any weighted average. A single critical gate failure = Reject, no matter how high the average is.
159
+ **Gates first** — check gate criteria before computing any weighted average. A single critical gate failure blocks a passing status, no matter how high the average is. The specific outcome (`reject` or `not_reviewable`) is determined by the criterion's `failure_effect`.
160
160
 
161
161
  | Gate type | Effect |
162
162
  |-----------|--------|
163
- | `critical` failure | Status = `reject` regardless of average |
163
+ | `critical` failure | Status = `reject` or `not_reviewable` (per `failure_effect`) regardless of average |
164
164
  | `major` failure | Status cannot exceed `conditional_pass` |
165
165
 
166
166
  Then compute the weighted overall average and apply thresholds:
@@ -13,36 +13,41 @@ The YAML record is the machine-readable, archivable output. It is the authoritat
13
13
  ### Full Template
14
14
 
15
15
  ```yaml
16
+ kind: evaluation
17
+
16
18
  evaluation_id: EVAL-[TYPE]-[NNNN]
17
19
  # Format: EVAL-SOL-0001 (solution), EVAL-REF-0001 (reference arch), EVAL-ADR-0001, etc.
18
20
  # Use a sequential number within the artifact type.
19
21
 
20
- rubric_id: [rubric IDs used, comma-separated]
21
- # Example: EAROS-CORE-002, EAROS-REFARCH-001
22
- # If overlays applied: EAROS-CORE-002, EAROS-SOL-001, EAROS-OVR-SEC-001
22
+ rubric_id: [primary rubric ID e.g. EAROS-CORE-002 or EAROS-SOL-001]
23
+ rubric_version: [version of the rubric used — e.g. 2.0.0]
23
24
 
24
- rubric_version: [version of the profile used]
25
- # Example: 2.0.0
25
+ # If profile and/or overlays are applied in addition to the core:
26
+ profiles_applied:
27
+ - [profile rubric ID — e.g. EAROS-REFARCH-001]
28
+ overlays_applied:
29
+ - [overlay rubric ID — e.g. EAROS-OVR-SEC-001]
26
30
 
27
- artifact_ref:
28
- id: [artifact identifier if one exists, or omit]
29
- title: [full title of the artifact as it appears in the document]
30
- artifact_type: [solution_architecture | reference_architecture | adr | capability_map | roadmap]
31
- owner: [team or individual named as owner in the artifact]
32
- uri: [repo path, URL, or file path — omit if not available]
31
+ artifact_id: [artifact identifier — e.g. SOL-ART-042]
32
+ artifact_type: [solution_architecture | reference_architecture | adr | capability_map | roadmap]
33
+ artifact_version: [version of the artifact being evaluated omit if not available]
33
34
 
34
35
  evaluation_date: '[YYYY-MM-DD]'
36
+ evaluation_mode: [human | agent | hybrid]
35
37
 
36
- evaluators:
37
- - name: EAROS evaluator
38
- role: rubric-evaluator
39
- mode: agent
38
+ evaluated_by:
39
+ - role: evaluator
40
+ actor: agent
41
+ identity: EAROS evaluator
40
42
  # If human also evaluated, add:
41
- # - name: [name]
42
- # role: domain architect
43
- # mode: human
43
+ # - role: evaluator
44
+ # actor: human
45
+ # identity: [name or role]
46
+ # If a challenge pass was performed:
47
+ # - role: challenger
48
+ # actor: agent
44
49
 
45
- status: [pass | conditional_pass | rework_required | reject | not_reviewable]
50
+ overall_status: [pass | conditional_pass | rework_required | reject | not_reviewable]
46
51
 
47
52
  overall_score: [weighted average to 1 decimal place — e.g. 2.8]
48
53
  # Compute: sum(dimension_score × weight) / sum(weights)
@@ -58,22 +63,27 @@ criterion_results:
58
63
  - criterion_id: [ID]
59
64
  # Use IDs from the rubric YAML, e.g. STK-01, SCP-01, TRC-01
60
65
  score: [0 | 1 | 2 | 3 | 4 | "N/A"]
61
- judgment_type: [observed | inferred | external | mixed | none]
62
- # 'mixed' when evidence combines observed and inferred
66
+ evidence_class: [observed | inferred | external]
67
+ # observed: directly supported by a quote from the artifact
68
+ # inferred: reasonable interpretation not directly stated
69
+ # external: judgment based on a standard or source outside the artifact
63
70
  confidence: [high | medium | low]
64
- evidence_sufficiency: [sufficient | partial | absent]
71
+ confidence_reason: "[why confidence is below high — omit if high]"
72
+ evidence_sufficiency: [sufficient | partial | insufficient | none]
65
73
  # sufficient: evidence supports the score without reservation
66
74
  # partial: evidence exists but is incomplete or ambiguous
67
- # absent: no evidence found; score is 0 or N/A
75
+ # insufficient: evidence exists but is too weak to confidently score
76
+ # none: no evidence found; score is 0 or N/A
68
77
  evidence_refs:
69
- - location: "[section heading, page number, or diagram label]"
70
- excerpt: "[direct quote or very close paraphrase from the artifact]"
78
+ - section: "[section heading or number]"
79
+ quotation: "[direct quote or very close paraphrase from the artifact]"
71
80
  # Add more refs if multiple evidence sources support the score
81
+ # Can also be a simple string: "Section 3.2, paragraph 2"
72
82
  rationale: >
73
83
  [1-3 sentences explaining why the evidence maps to this score level.
74
84
  Cite the specific evidence. Explain why it is not one level higher
75
85
  if the score is below 4.]
76
- missing_information:
86
+ evidence_gaps:
77
87
  - "[specific piece of information that would improve this score]"
78
88
  # Leave empty if score is 4 or N/A
79
89
  recommended_actions:
@@ -83,48 +93,29 @@ criterion_results:
83
93
 
84
94
  # Repeat for every criterion in core + profile + overlays
85
95
 
86
- dimension_scores:
96
+ dimension_results:
87
97
  - dimension_id: [D1 | D2 | D3 | D4 | D5 | D6 | D7 | D8 | D9 | RA-D1 | etc.]
88
- dimension_name: [name from rubric]
89
- score: [weighted average of criteria in this dimension, 1 decimal place]
90
- weight: [weight from rubric YAML — default 1.0]
91
- summary: "[1 sentence summary of why this dimension scored this way]"
98
+ weighted_score: [weighted average of criteria in this dimension, 1 decimal place]
92
99
 
93
100
  # Repeat for every dimension in core + profile
94
101
 
95
- narrative_summary: |
102
+ decision_summary: >
96
103
  [2-3 paragraphs for a governance reviewer. Cover:
97
104
  1. What the artifact is, who it is for, and the overall verdict
98
105
  2. The most significant strengths (what was well done)
99
106
  3. The most significant weaknesses (what holds it back)
107
+ Address all three evaluation perspectives: artifact quality,
108
+ architectural fitness, and governance fit.
100
109
  Do NOT restate the criterion scores — synthesize them into a judgment.]
101
110
 
102
- summary:
103
- strengths:
104
- - "[Key strength — specific, not generic]"
105
- - "[Second strength]"
106
- weaknesses:
107
- - "[Key weakness — specific, not generic]"
108
- - "[Second weakness]"
109
- risks:
110
- - "[Risk that follows from a weakness — what could go wrong in delivery/governance]"
111
- next_actions:
112
- - "[Top-priority action]"
113
- - "[Second priority action]"
114
- decision_narrative: >
115
- [1-2 sentences on what happens next — should this go to governance board as-is,
116
- conditional on specific fixes, or returned for rework?]
117
-
118
111
  recommended_actions:
119
- - priority: 1
120
- criterion_id: [ID of the criterion this addresses]
121
- action: "[Specific, actionable step — verb-first]"
122
- owner_suggestion: "[Who should own this — team role, not individual]"
123
- - priority: 2
124
- criterion_id: [ID]
125
- action: "[Action]"
126
- owner_suggestion: "[Role]"
112
+ - "[Top-priority action — verb-first, specific]"
113
+ - "[Second priority action]"
127
114
  # Top 5 actions, ordered by impact on overall status
115
+
116
+ challenger_notes: >
117
+ [Findings from the challenge pass (Step 6). Which scores were
118
+ challenged, what was the outcome, and any adjustments made.]
128
119
  ```
129
120
 
130
121
  ---
@@ -135,7 +126,7 @@ recommended_actions:
135
126
 
136
127
  Use a sequential ID within the artifact type. If you don't have a numbering system, use the date: `EVAL-SOL-20260319-001`. The ID must be unique within the organization's evaluation records.
137
128
 
138
- ### status
129
+ ### overall_status
139
130
 
140
131
  The status is determined by gates first, then thresholds (Step 8 in SKILL.md). Do not set status until all gate checks and aggregation are complete. Common error: setting `conditional_pass` when a critical gate has failed — critical gate failure always = `reject`.
141
132
 
@@ -148,21 +139,23 @@ overall_score = sum(dimension_score × dimension_weight) / sum(dimension_weights
148
139
 
149
140
  Round to 1 decimal place. A score of 2.35 rounds to 2.4, which is the `conditional_pass` threshold — be precise.
150
141
 
151
- ### judgment_type
142
+ ### evidence_class
152
143
 
153
- This is the evidence class for the criterion as a whole. If all evidence is `observed`, use `observed`. If you used a mix of observed and inferred evidence to reach the score, use `mixed`. `none` means no evidence was found (score must be 0 or 1).
144
+ This is the evidence class for the criterion as a whole — `observed`, `inferred`, or `external`. Use the highest-credibility class that applies. If the primary evidence is a direct quote from the artifact, use `observed`. If you are interpreting content that is not directly stated, use `inferred`. If your judgment relies on a standard or source outside the artifact, use `external`.
154
145
 
155
146
  ### evidence_sufficiency
156
147
 
157
148
  This is your assessment of whether the evidence you found is adequate to confidently assign the score:
158
149
  - `sufficient` — the evidence clearly matches one level; you wouldn't expect a reviewer to disagree
159
150
  - `partial` — evidence exists but is ambiguous; a different reviewer might score differently
160
- - `absent` — no evidence was found; score is based on absence
151
+ - `insufficient` — evidence exists but is too weak to support the score with confidence
152
+ - `none` — no evidence was found; score is based on absence
161
153
 
162
- ### narrative_summary
154
+ ### decision_summary
163
155
 
164
156
  This is the most important text in the record for human reviewers. Write it for a governance board member who will skim the criterion table but read the narrative carefully. The narrative should:
165
157
  - Name what the artifact is and its governance context
158
+ - Address all three evaluation perspectives: artifact quality, architectural fitness, and governance fit
166
159
  - Identify the 2-3 things that most determine the outcome
167
160
  - Give a clear recommendation (proceed, fix X first, rework)
168
161
 
@@ -267,9 +260,9 @@ No gate failures. The artifact passes all gate checks.
267
260
  | Field | Value |
268
261
  |-------|-------|
269
262
  | Score | [0-4 or N/A] |
270
- | Evidence Class | [observed / inferred / external / none] |
263
+ | Evidence Class | [observed / inferred / external] |
271
264
  | Confidence | [high / medium / low] |
272
- | Evidence Sufficiency | [sufficient / partial / absent] |
265
+ | Evidence Sufficiency | [sufficient / partial / insufficient / none] |
273
266
 
274
267
  **Evidence:** [Section/location] — "[Direct quote or close paraphrase]"
275
268
 
@@ -288,7 +281,7 @@ No gate failures. The artifact passes all gate checks.
288
281
  ## Narrative Summary
289
282
 
290
283
  [2-3 paragraphs — synthesized judgment for a governance reviewer.
291
- Copy from the YAML narrative_summary field.]
284
+ Copy from the YAML decision_summary field.]
292
285
 
293
286
  ---
294
287
 
@@ -302,10 +295,11 @@ Copy from the YAML narrative_summary field.]
302
295
  Before submitting the YAML evaluation record, check:
303
296
 
304
297
  1. Every criterion in the loaded rubric files has a result entry
305
- 2. Every score has at least one `evidence_refs` entry (unless `evidence_class: none`)
298
+ 2. Every score has at least one `evidence_refs` entry (unless `evidence_sufficiency: none`)
306
299
  3. `gate_failures` matches the gate criteria that failed (not just any low score)
307
300
  4. `overall_score` is the weighted average, not a simple average
308
- 5. `status` was determined by gates first, then thresholds
309
- 6. The `narrative_summary` does not just list criterion scores — it synthesizes them
301
+ 5. `overall_status` was determined by gates first, then thresholds
302
+ 6. The `decision_summary` does not just list criterion scores — it synthesizes them
303
+ 7. All required schema fields are present: `kind`, `artifact_id`, `artifact_type`, `evaluated_by`, `evaluation_mode`, `overall_status`, `overall_score`
310
304
 
311
305
  The full JSON Schema for validation is at `standard/schemas/evaluation.schema.json`. If you have access to a YAML validator, validate the output before delivery.
@@ -54,7 +54,7 @@ For each calibration artifact, run a full EAROS assessment using the `earos-asse
54
54
 
55
55
  **This step cannot be skipped or abbreviated.** Independent scoring is the entire point of calibration. If you score after seeing the benchmark, you measure nothing.
56
56
 
57
- > **For the full assessment protocol**, see `.claude/skills/earos-assess/SKILL.md`.
57
+ > **For the full assessment protocol**, see `.agents/skills/earos-assess/SKILL.md`.
58
58
 
59
59
  ---
60
60
 
@@ -99,7 +99,7 @@ Before finalising scores, run the internal challenge:
99
99
  ### 2.4 Determine status for each artifact
100
100
 
101
101
  Apply the status thresholds:
102
- 1. Check gate failures first — any critical gate failure = reject
102
+ 1. Check gate failures first — any critical gate failure blocks a passing status (outcome per `failure_effect`: `reject` or `not_reviewable`)
103
103
  2. Check overall score: ≥ 3.2 = pass, 2.4–3.19 = conditional_pass, < 2.4 = rework_required
104
104
  3. Check dimension floor: no dimension < 2.0 for a pass status
105
105
 
@@ -6,11 +6,11 @@ This checklist covers all pre-publication quality checks for a new EAROS rubric
6
6
 
7
7
  ## Quick Reference — What "Valid" Means
8
8
 
9
- A valid EAROS v2 rubric file:
9
+ A valid EAROS rubric file:
10
10
  1. Passes schema validation against `standard/schemas/rubric.schema.json`
11
11
  2. Has a unique rubric ID and unique criterion IDs (no conflicts across the entire repo)
12
12
  3. Has the correct YAML structure for its kind (profile, overlay, or core rubric)
13
- 4. Has all 13 v2 required fields on every criterion
13
+ 4. Has all 13 required fields on every criterion
14
14
  5. Has a calibrated gate model (not over- or under-gated)
15
15
  6. Does not duplicate what `EAROS-CORE-002` already covers
16
16
 
@@ -179,7 +179,7 @@ change_log:
179
179
  date: "[today]"
180
180
  author: "[author]"
181
181
  changes:
182
- - Initial profile for EAROS v2.0
182
+ - Initial profile
183
183
  ```
184
184
 
185
185
  ---
@@ -1,6 +1,6 @@
1
1
  # CLAUDE.md — EAROS Project Guide
2
2
 
3
- **Enterprise Architecture Rubric Operational Standard · Version 2.0**
3
+ **Enterprise Architecture Rubric Operational Standard**
4
4
 
5
5
  This file tells Claude how to work effectively in this project.
6
6
 
@@ -327,10 +327,10 @@ Start from `templates/new-profile.template.yaml`. Set:
327
327
  - `design_method` from step 2
328
328
  - `rubric_id` using pattern `EAROS-<ARTIFACT>-<NNN>`
329
329
 
330
- ### Step 4 — Write 5–12 criteria
330
+ ### Step 4 — Write up to 12 criteria
331
331
 
332
332
  Rules:
333
- - Add **no more than 5–12 criteria** (the core already has 10)
333
+ - Add **no more than 12 criteria** (the core already has 10; built-in profiles use 3–9)
334
334
  - Every criterion needs: `question`, `description`, `scoring_guide` (all 5 levels 0–4), `required_evidence`, `anti_patterns`, `examples.good`, `examples.bad`, `decision_tree`, `remediation_hints`
335
335
  - Assign each criterion to a dimension with an appropriate `weight`
336
336
  - Designate gate types deliberately — not every criterion needs a gate; over-gating creates false rejects
@@ -467,7 +467,7 @@ The `rubric_locked: true` flag in `agent_evaluation` means an agent must not mod
467
467
 
468
468
  1. **Never collapse the three evaluation types.** Artifact quality, architectural fitness, and governance fit are distinct judgments. Never merge them into a single opaque score.
469
469
 
470
- 2. **Gates before averages.** Always check gates before computing a weighted average. A single critical gate failure = Reject, no matter how high the average.
470
+ 2. **Gates before averages.** Always check gates before computing a weighted average. A single critical gate failure blocks a passing status — the outcome (`Reject` or `Not Reviewable`) depends on the criterion's `failure_effect`.
471
471
 
472
472
  3. **Evidence first.** Every score requires a cited excerpt or reference. "Evidence: section 3 states X" is valid. "The artifact seems to address this" is not. Use RULERS anchoring.
473
473
 
@@ -489,7 +489,7 @@ The `rubric_locked: true` flag in `agent_evaluation` means an agent must not mod
489
489
 
490
490
  ## 10. The Reference Architecture Profile — Model for Other Profiles
491
491
 
492
- `profiles/reference-architecture.yaml` (`EAROS-REFARCH-001`) is the first full profile in EAROS v2 and serves as the reference implementation for how profiles should be built.
492
+ `profiles/reference-architecture.yaml` (`EAROS-REFARCH-001`) is the first full profile and serves as the reference implementation for how profiles should be built.
493
493
 
494
494
  **Why it is a good model:**
495
495
  - Uses `design_method: pattern_library` (Method E) — appropriate for recurring platform blueprints
@@ -522,7 +522,7 @@ This pattern — count observable features, branch on presence — is the right
522
522
 
523
523
  ## 11. Agent Skills
524
524
 
525
- The `.agents/skills/` directory contains Claude agent skills for working with EAROS. Each skill lives in its own subdirectory with a `SKILL.md` file. Skills are auto-triggered when their description matches the user's request — no slash command needed.
525
+ The `.agents/skills/` directory contains Claude Code skills for working with EAROS in this development repo. In scaffolded workspaces (`earos init`), skills live in `.agents/skills/` — an agent-agnostic convention readable by Cursor, Copilot, Windsurf, and other AI coding tools. Each skill lives in its own subdirectory with a `SKILL.md` file. Skills are auto-triggered when their description matches the user's request — no slash command needed.
526
526
 
527
527
  ```
528
528
  .agents/skills/
@@ -530,7 +530,7 @@ The `.agents/skills/` directory contains Claude agent skills for working with EA
530
530
  ├── earos-review/SKILL.md Challenger — audits an existing evaluation record for over-scoring and unsupported claims
531
531
  ├── earos-template-fill/SKILL.md Author guide — coaches artifact authors through writing assessment-ready documents
532
532
  ├── earos-create/SKILL.md Rubric creation — guided interview + YAML generation for profiles, overlays, and core rubrics
533
- ├── earos-profile-author/SKILL.md Profile YAML authoring — technical reference for v2 field structure and schema compliance
533
+ ├── earos-profile-author/SKILL.md Profile YAML authoring — technical reference for field structure and schema compliance
534
534
  ├── earos-calibrate/SKILL.md Calibration — runs calibration exercises and computes inter-rater reliability
535
535
  ├── earos-report/SKILL.md Reporting — generates executive reports from evaluation records
536
536
  ├── earos-validate/SKILL.md Health check — validates all YAML rubrics against schemas and checks consistency
@@ -592,7 +592,7 @@ The full glossary is in [`docs/terminology.md`](docs/terminology.md). It covers
592
592
  | Term | Definition |
593
593
  |------|------------|
594
594
  | **Core meta-rubric** | Universal foundation rubric (`EAROS-CORE-002`): 9 dimensions, 10 criteria, applied to every artifact |
595
- | **Profile** | Artifact-type extension of the core (5–12 extra criteria). Declares `inherits: [EAROS-CORE-002]` |
595
+ | **Profile** | Artifact-type extension of the core (additional criteria, typically 3–9). Declares `inherits: [EAROS-CORE-002]` |
596
596
  | **Overlay** | Cross-cutting concern extension (e.g. security). Applied by context, not artifact type. Uses `append_to_base_rubric` scoring |
597
597
  | **Gate** | Criterion-level control that blocks a passing status regardless of average. Types: `none`, `advisory`, `major`, `critical` |
598
598
  | **Evidence anchor** | Specific reference (section, page, diagram ID) in the artifact supporting a score. Required by RULERS protocol |
@@ -612,6 +612,43 @@ The full glossary is in [`docs/terminology.md`](docs/terminology.md). It covers
612
612
 
613
613
  ---
614
614
 
615
+ ## 14. Publishing the CLI to npm
616
+
617
+ The `@trohde/earos` CLI is published from `tools/editor/`. A GitHub Actions workflow (`.github/workflows/publish-npm.yml`) auto-publishes when the version in `tools/editor/package.json` changes on `master`.
618
+
619
+ ### When the user says "publish to npm"
620
+
621
+ 1. **Review all changes since the last publish** — run `git log` to see commits since the last `release:` commit
622
+ 2. **Choose the version bump** based on what changed:
623
+ - **patch** — bug fixes, documentation, typo fixes, dependency updates, minor UI tweaks
624
+ - **minor** — new features, new commands, new editor capabilities, new schema fields, new skills bundled in `assets/init/`
625
+ - **major** — breaking CLI changes (renamed commands, removed flags), breaking changes to `earos init` scaffold structure, incompatible schema changes
626
+ 3. **Bump, commit, and push:**
627
+ ```bash
628
+ cd tools/editor && npm run version:patch # or version:minor / version:major
629
+ cd ../..
630
+ git add tools/editor/package.json
631
+ git commit -m "release: v<NEW_VERSION>"
632
+ git push origin master
633
+ ```
634
+ 4. **Watch the workflow** — `gh run watch` on the triggered run to confirm publish succeeds
635
+ 5. **Report the result** — tell the user the new version and confirm it's live
636
+
637
+ ### Version scripts (in `tools/editor/`)
638
+
639
+ | Script | Effect |
640
+ |--------|--------|
641
+ | `npm run version:patch` | Bump patch (1.0.1 → 1.0.2) |
642
+ | `npm run version:minor` | Bump minor (1.0.2 → 1.1.0) |
643
+ | `npm run version:major` | Bump major (1.1.0 → 2.0.0) |
644
+ | `npm run release:patch` | Bump + publish locally (bypasses CI) |
645
+ | `npm run release:minor` | Bump + publish locally (bypasses CI) |
646
+ | `npm run release:major` | Bump + publish locally (bypasses CI) |
647
+
648
+ **CI token note:** The `NPM_TOKEN` GitHub secret holds a granular access token with "Bypass 2FA" enabled, scoped to `@trohde`. It expires periodically and must be rotated on npmjs.com → Access Tokens.
649
+
650
+ ---
651
+
615
652
  ## Quick Reference
616
653
 
617
654
  | Task | Where to start |
@@ -633,3 +670,4 @@ The full glossary is in [`docs/terminology.md`](docs/terminology.md). It covers
633
670
  | Regenerate the manifest | `node tools/editor/bin.js manifest` |
634
671
  | Add a new rubric to the manifest | `node tools/editor/bin.js manifest add <path>` |
635
672
  | Check manifest-filesystem consistency | `node tools/editor/bin.js manifest check` |
673
+ | Publish CLI to npm | Say "publish to npm" — Claude chooses version bump, commits, pushes, CI publishes |
@@ -1,10 +1,9 @@
1
1
  # EaROS — Enterprise Architecture Rubric Operational Standard
2
2
 
3
3
  [![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
4
- [![Version](https://img.shields.io/badge/Version-2.0.0-blue.svg)](CHANGELOG.md)
5
4
  [![GitHub](https://img.shields.io/badge/GitHub-ThomasRohde%2FEAROS-blue?logo=github)](https://github.com/ThomasRohde/EAROS)
6
5
 
7
- **Version 2.0.0 · March 2026** · [github.com/ThomasRohde/EAROS](https://github.com/ThomasRohde/EAROS)
6
+ **March 2026** · [github.com/ThomasRohde/EAROS](https://github.com/ThomasRohde/EAROS)
8
7
 
9
8
  EaROS is a structured, extensible framework for evaluating enterprise architecture artifacts. It provides a universal rubric foundation, artifact-specific profiles, and cross-cutting overlays that together enable consistent, evidence-anchored assessment — by human reviewers and AI agents alike.
10
9
 
@@ -367,7 +366,7 @@ flowchart LR
367
366
  style S8 fill:#4caf50,stroke:#2e7d32,color:#fff
368
367
  ```
369
368
 
370
- Calibrate your agent against `calibration/gold-set/` before production use. Target inter-rater reliability of Cohen's κ > 0.70.
369
+ Calibrate your agent before production use. Start with the benchmark artifact in `examples/aws-event-driven-order-processing/`, then populate `calibration/gold-set/` with your own reference artifacts. Target inter-rater reliability of Cohen's κ > 0.70.
371
370
 
372
371
  ---
373
372
 
@@ -450,9 +449,9 @@ Use `kind: overlay` and `artifact_type: any`. Overlays use `scoring.method: appe
450
449
 
451
450
  ### Calibrating Before Production
452
451
 
453
- 1. Score the artifacts in `calibration/gold-set/` independently
454
- 2. Compare against reference scores using `calibration/results/`
455
- 3. Resolve disagreements against the level descriptors
452
+ 1. Start with the benchmark at `examples/aws-event-driven-order-processing/`, then add your own artifacts to `calibration/gold-set/`
453
+ 2. Have 2+ reviewers score each artifact independently
454
+ 3. Compare scores and resolve disagreements against the level descriptors
456
455
  4. Iterate until κ > 0.70 on well-defined criteria, > 0.50 on subjective ones
457
456
 
458
457
  ---
@@ -635,7 +635,7 @@ change_log:
635
635
  - Added reliability_targets to scoring
636
636
  - Added evidence_class and evidence_anchors to outputs
637
637
  - Added DAG evaluation steps
638
- - Updated from EAROS v1 based on 63-source research programme
638
+ - Incorporated findings from 63-source research programme
639
639
  - version: "1.0.0"
640
640
  date: "2026-03-16"
641
641
  author: "Thomas Rohde"
@@ -2,8 +2,6 @@
2
2
 
3
3
  This guide walks you through your first architecture artifact assessment using EaROS. By the end, you will have scored an artifact, produced a structured evaluation record, and know how to interpret the results.
4
4
 
5
- ---
6
-
7
5
  ## Before You Start
8
6
 
9
7
  **What you need:**
@@ -15,24 +13,22 @@ This guide walks you through your first architecture artifact assessment using E
15
13
  - A scoring sheet to record your evidence and scores
16
14
  - Clear pass/fail thresholds
17
15
 
18
- ---
19
-
20
16
  ## Step 1: Identify the Artifact Type
21
17
 
22
18
  EaROS has profiles for the most common enterprise architecture artifact types:
23
19
 
24
- | Artifact Type | Profile to Use |
25
- |--------------|----------------|
26
- | Solution architecture document | `profiles/solution-architecture.yaml` |
27
- | Reference architecture | `profiles/reference-architecture.yaml` |
28
- | Architecture Decision Record (ADR) | `profiles/adr.yaml` |
29
- | Capability map | `profiles/capability-map.yaml` |
30
- | Architecture roadmap | `profiles/roadmap.yaml` |
31
- | Other / unknown | Core only: `core/core-meta-rubric.yaml` |
20
+ | Artifact Type | Profile to Use | Status |
21
+ |--------------|----------------|--------|
22
+ | Solution architecture document | `profiles/solution-architecture.yaml` | Approved |
23
+ | Reference architecture | `profiles/reference-architecture.yaml` | Draft |
24
+ | Architecture Decision Record (ADR) | `profiles/adr.yaml` | Approved |
25
+ | Capability map | `profiles/capability-map.yaml` | Approved |
26
+ | Architecture roadmap | `profiles/roadmap.yaml` | Draft |
27
+ | Other / unknown | Core only: `core/core-meta-rubric.yaml` | --- |
32
28
 
33
- If your artifact does not match any profile, apply only the core rubric. The core dimensions are universal.
29
+ > **Status:** *Approved* profiles have completed calibration. *Draft* profiles are usable but have not yet been calibrated with inter-rater reliability measured. Check `earos.manifest.yaml` for the latest status of each rubric.
34
30
 
35
- ---
31
+ If your artifact does not match any profile, apply only the core rubric. The core dimensions are universal.
36
32
 
37
33
  ## Step 2: Select Your Rubric Set
38
34
 
@@ -60,14 +56,11 @@ overlays/regulatory.yaml ← if the design is subject to compliance requir
60
56
 
61
57
  Apply overlays selectively. Not every artifact needs every overlay.
62
58
 
63
- ---
64
-
65
59
  ## Step 3: Open the Scoring Sheet
66
60
 
67
- Open the appropriate Excel scoring sheet from `tools/scoring-sheets/`:
61
+ Open the Excel scoring sheet from `tools/scoring-sheets/`:
68
62
 
69
- - **`EAROS_Scoring_Sheet_v2.xlsx`** — use for most artifact types
70
- - **`EAROS_RefArch_Scoring_Sheet.xlsx`** — use specifically for reference architectures
63
+ - **`EAROS_Scoring_Sheet_v2.xlsx`** — general-purpose, works for all artifact types
71
64
 
72
65
  The scoring sheet has:
73
66
  - One tab per rubric section (core dimensions + profile dimensions)
@@ -75,8 +68,6 @@ The scoring sheet has:
75
68
  - Evidence fields for recording your cited text or reference
76
69
  - An automatic aggregation tab that calculates the weighted score and indicates the pass threshold
77
70
 
78
- ---
79
-
80
71
  ## Step 4: Read the Rubric, Then Read the Artifact
81
72
 
82
73
  Open the relevant YAML rubric files. For each criterion, familiarise yourself with:
@@ -87,8 +78,6 @@ Open the relevant YAML rubric files. For each criterion, familiarise yourself wi
87
78
 
88
79
  **Then read the artifact end-to-end** before scoring. Do not score as you read on the first pass. Form an overall impression first, then return to score criterion by criterion.
89
80
 
90
- ---
91
-
92
81
  ## Step 5: Score Each Criterion
93
82
 
94
83
  For each criterion:
@@ -109,35 +98,30 @@ For each criterion:
109
98
 
110
99
  You read the artifact and find a scope statement that defines what is in scope but does not list explicit exclusions. → **Score: 3** → Record: "Section 1.2: scope statement defines in-scope components but exclusions are not listed."
111
100
 
112
- ---
113
-
114
101
  ## Step 6: Check the Gates
115
102
 
116
103
  Before calculating the aggregate, check every criterion with a `gate` object (not `gate: false`) in the rubric files. Gate behaviour depends on severity:
117
104
 
118
- - **`critical`** — Any score below the threshold triggers an immediate **Reject**, regardless of the aggregate score.
105
+ - **`critical`** — Any score below the threshold blocks passing. The gate's `failure_effect` determines the outcome: **Reject** (mandatory control breach) or **Not Reviewable** (evidence too incomplete to score).
119
106
  - **`major`** — A weak score (typically < 2) caps the status at **Conditional Pass** at best; cannot achieve a Pass.
120
107
  - **`advisory`** — Triggers a recommendation but does not cap the status.
121
108
 
122
109
  Gate criteria represent non-negotiable minimums on their respective concern. A critical gate failure means the artifact has a fundamental deficiency that makes it unsuitable for its purpose.
123
110
 
124
- ---
125
-
126
111
  ## Step 7: Determine the Status
127
112
 
128
113
  The scoring sheet calculates the weighted dimension average automatically. Read the status from the aggregation tab:
129
114
 
130
115
  | Weighted Average | Status |
131
116
  |-----------------|--------|
132
- | ≥ 3.2 | **Pass** |
133
- | 2.4 – 3.19 | **Conditional Pass** |
117
+ | ≥ 3.2 (no critical gate failure, no dimension < 2.0) | **Pass** |
118
+ | 2.4 – 3.19 (no critical gate failure) | **Conditional Pass** |
134
119
  | < 2.4 | **Rework Required** |
135
- | Any gate at 0 | **Reject** |
120
+ | Critical gate failure (mandatory control breach) | **Reject** |
121
+ | Critical gate failure (evidence too incomplete to score) | **Not Reviewable** |
136
122
 
137
123
  **Conditional Pass** means the artifact is acceptable for use but has identified remediation items that must be addressed before the next formal review. Document each item with the criterion ID, the score, and the specific improvement needed.
138
124
 
139
- ---
140
-
141
125
  ## Step 8: Write the Evaluation Record
142
126
 
143
127
  Use `templates/evaluation-record.template.yaml` to produce a structured evaluation record. See `examples/example-solution-architecture.evaluation.yaml` for a completed example.
@@ -154,8 +138,6 @@ The evaluation record captures:
154
138
 
155
139
  Store completed evaluation records with the artifact or in your architecture governance system.
156
140
 
157
- ---
158
-
159
141
  ## Interpreting Results
160
142
 
161
143
  ### Pass
@@ -170,8 +152,6 @@ The artifact has pervasive or significant gaps. Return it to the author with the
170
152
  ### Reject
171
153
  The artifact has failed one or more gate criteria, indicating a fundamental deficiency. Reject means the artifact should not be used or progressed until the gate issue is fully resolved. A gate failure is not about quality level — it is about something that makes the artifact unsuitable for its purpose.
172
154
 
173
- ---
174
-
175
155
  ## Calibrating Your Assessments
176
156
 
177
157
  If you are introducing EaROS to a team or beginning to use it for formal governance, calibrate before going live:
@@ -184,11 +164,9 @@ If you are introducing EaROS to a team or beginning to use it for formal governa
184
164
 
185
165
  Target inter-rater reliability: Cohen's κ > 0.70 for well-defined criteria.
186
166
 
187
- ---
188
-
189
167
  ## Next Steps
190
168
 
191
169
  - **Create a profile** for an artifact type not yet covered → [`docs/profile-authoring-guide.md`](profile-authoring-guide.md)
192
- - **Set up AI-agent assessment** → [`README.md`](../README.md#ai-agent-assessment) and [`standard/EAROS.md`](../standard/EAROS.md)
193
- - **Review the research behind EaROS** → [`research/`](../research/)
194
- - **Run a team calibration session** → [`calibration/`](../calibration/)
170
+ - **Set up AI-agent assessment** → [`standard/EAROS.md`](../standard/EAROS.md)
171
+ - **Review the research behind EaROS** → `research/` directory in the repository
172
+ - **Run a team calibration session** → `calibration/` directory in the repository
@@ -28,7 +28,7 @@ Agent evaluations follow an 8-step directed acyclic graph (DAG). Each step must
28
28
 
29
29
  **Step 7 --- Calibration.** The agent aligns its score distribution to reference human distributions using the Wasserstein-based method (`rulers_wasserstein`). This prevents systematic over-scoring or under-scoring relative to human reviewers.
30
30
 
31
- **Step 8 --- Status Determination.** Gates are checked first (critical gate failure equals Reject), then the weighted average is computed and applied against the status thresholds.
31
+ **Step 8 --- Status Determination.** Gates are checked first (a critical gate failure blocks a passing status --- the specific outcome, `Reject` or `Not Reviewable`, is determined by the criterion's `failure_effect`), then the weighted average is computed and applied against the status thresholds.
32
32
 
33
33
  > **The DAG is not optional.** Skipping steps --- particularly the challenge pass (Step 6) --- undermines evaluation quality. An agent evaluation without a challenge pass is an unchecked evaluation.
34
34
 
@@ -36,7 +36,7 @@ Agent evaluations follow an 8-step directed acyclic graph (DAG). Each step must
36
36
 
37
37
  ### With Claude Code
38
38
 
39
- The `earos init` command scaffolds agent skills into `.claude/skills/` in your workspace. These are ready to use immediately:
39
+ The `earos init` command scaffolds agent skills into `.agents/skills/` in your workspace. These are ready to use immediately:
40
40
 
41
41
  ```bash
42
42
  earos init my-workspace