@trohde/earos 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -0
- package/assets/init/.agents/skills/earos-artifact-gen/SKILL.md +106 -0
- package/assets/init/.agents/skills/earos-artifact-gen/references/interview-guide.md +313 -0
- package/assets/init/.agents/skills/earos-artifact-gen/references/output-guide.md +367 -0
- package/assets/init/.agents/skills/earos-assess/SKILL.md +212 -0
- package/assets/init/.agents/skills/earos-assess/references/calibration-benchmarks.md +160 -0
- package/assets/init/.agents/skills/earos-assess/references/output-templates.md +311 -0
- package/assets/init/.agents/skills/earos-assess/references/scoring-protocol.md +281 -0
- package/assets/init/.agents/skills/earos-calibrate/SKILL.md +153 -0
- package/assets/init/.agents/skills/earos-calibrate/references/agreement-metrics.md +188 -0
- package/assets/init/.agents/skills/earos-calibrate/references/calibration-protocol.md +263 -0
- package/assets/init/.agents/skills/earos-create/SKILL.md +257 -0
- package/assets/init/.agents/skills/earos-create/references/criterion-writing-guide.md +268 -0
- package/assets/init/.agents/skills/earos-create/references/dependency-rules.md +193 -0
- package/assets/init/.agents/skills/earos-create/references/rubric-interview-guide.md +123 -0
- package/assets/init/.agents/skills/earos-create/references/validation-checklist.md +238 -0
- package/assets/init/.agents/skills/earos-profile-author/SKILL.md +251 -0
- package/assets/init/.agents/skills/earos-profile-author/references/criterion-writing-guide.md +280 -0
- package/assets/init/.agents/skills/earos-profile-author/references/design-methods.md +158 -0
- package/assets/init/.agents/skills/earos-profile-author/references/profile-checklist.md +173 -0
- package/assets/init/.agents/skills/earos-remediate/SKILL.md +118 -0
- package/assets/init/.agents/skills/earos-remediate/references/output-template.md +199 -0
- package/assets/init/.agents/skills/earos-remediate/references/remediation-patterns.md +330 -0
- package/assets/init/.agents/skills/earos-report/SKILL.md +85 -0
- package/assets/init/.agents/skills/earos-report/references/portfolio-template.md +181 -0
- package/assets/init/.agents/skills/earos-report/references/single-artifact-template.md +168 -0
- package/assets/init/.agents/skills/earos-review/SKILL.md +130 -0
- package/assets/init/.agents/skills/earos-review/references/challenge-patterns.md +163 -0
- package/assets/init/.agents/skills/earos-review/references/output-template.md +180 -0
- package/assets/init/.agents/skills/earos-template-fill/SKILL.md +177 -0
- package/assets/init/.agents/skills/earos-template-fill/references/evidence-writing-guide.md +186 -0
- package/assets/init/.agents/skills/earos-template-fill/references/section-rubric-mapping.md +200 -0
- package/assets/init/.agents/skills/earos-validate/SKILL.md +113 -0
- package/assets/init/.agents/skills/earos-validate/references/fix-patterns.md +281 -0
- package/assets/init/.agents/skills/earos-validate/references/validation-checks.md +287 -0
- package/assets/init/.claude/CLAUDE.md +4 -0
- package/assets/init/AGENTS.md +293 -0
- package/assets/init/CLAUDE.md +635 -0
- package/assets/init/README.md +507 -0
- package/assets/init/calibration/gold-set/.gitkeep +0 -0
- package/assets/init/calibration/results/.gitkeep +0 -0
- package/assets/init/core/core-meta-rubric.yaml +643 -0
- package/assets/init/docs/consistency-report.md +325 -0
- package/assets/init/docs/getting-started.md +194 -0
- package/assets/init/docs/profile-authoring-guide.md +51 -0
- package/assets/init/docs/terminology.md +126 -0
- package/assets/init/earos.manifest.yaml +104 -0
- package/assets/init/evaluations/.gitkeep +0 -0
- package/assets/init/examples/aws-event-driven-order-processing/artifact.yaml +2056 -0
- package/assets/init/examples/aws-event-driven-order-processing/evaluation.yaml +973 -0
- package/assets/init/examples/aws-event-driven-order-processing/report.md +244 -0
- package/assets/init/examples/example-solution-architecture.evaluation.yaml +136 -0
- package/assets/init/examples/multi-cloud-data-analytics/artifact.yaml +715 -0
- package/assets/init/overlays/data-governance.yaml +94 -0
- package/assets/init/overlays/regulatory.yaml +154 -0
- package/assets/init/overlays/security.yaml +92 -0
- package/assets/init/profiles/adr.yaml +225 -0
- package/assets/init/profiles/capability-map.yaml +223 -0
- package/assets/init/profiles/reference-architecture.yaml +426 -0
- package/assets/init/profiles/roadmap.yaml +205 -0
- package/assets/init/profiles/solution-architecture.yaml +227 -0
- package/assets/init/research/architecture-assessment-rubrics-research.docx +0 -0
- package/assets/init/research/architecture-assessment-rubrics-research.md +566 -0
- package/assets/init/research/reference-architecture-research.md +751 -0
- package/assets/init/standard/EAROS.md +1426 -0
- package/assets/init/standard/schemas/artifact.schema.json +1295 -0
- package/assets/init/standard/schemas/artifact.uischema.json +65 -0
- package/assets/init/standard/schemas/evaluation.schema.json +284 -0
- package/assets/init/standard/schemas/rubric.schema.json +383 -0
- package/assets/init/templates/evaluation-record.template.yaml +58 -0
- package/assets/init/templates/new-profile.template.yaml +65 -0
- package/bin.js +188 -0
- package/dist/assets/_basePickBy-BVu6YmSW.js +1 -0
- package/dist/assets/_baseUniq-CWRzQDz_.js +1 -0
- package/dist/assets/arc-CyDBhtDM.js +1 -0
- package/dist/assets/architectureDiagram-2XIMDMQ5-BH6O4dvN.js +36 -0
- package/dist/assets/blockDiagram-WCTKOSBZ-2xmwdjpg.js +132 -0
- package/dist/assets/c4Diagram-IC4MRINW-BNmPRFJF.js +10 -0
- package/dist/assets/channel-CiySTNoJ.js +1 -0
- package/dist/assets/chunk-4BX2VUAB-DGQTvirp.js +1 -0
- package/dist/assets/chunk-55IACEB6-DNMAQAC_.js +1 -0
- package/dist/assets/chunk-FMBD7UC4-BJbVTQ5o.js +15 -0
- package/dist/assets/chunk-JSJVCQXG-BCxUL74A.js +1 -0
- package/dist/assets/chunk-KX2RTZJC-H7wWZOfz.js +1 -0
- package/dist/assets/chunk-NQ4KR5QH-BK4RlTQF.js +220 -0
- package/dist/assets/chunk-QZHKN3VN-0chxDV5g.js +1 -0
- package/dist/assets/chunk-WL4C6EOR-DexfQ-AV.js +189 -0
- package/dist/assets/classDiagram-VBA2DB6C-D7luWJQn.js +1 -0
- package/dist/assets/classDiagram-v2-RAHNMMFH-D7luWJQn.js +1 -0
- package/dist/assets/clone-ylgRbd3D.js +1 -0
- package/dist/assets/cose-bilkent-S5V4N54A-DS2IOCfZ.js +1 -0
- package/dist/assets/cytoscape.esm-CyJtwmzi.js +331 -0
- package/dist/assets/dagre-KLK3FWXG-BbSoTTa3.js +4 -0
- package/dist/assets/defaultLocale-DX6XiGOO.js +1 -0
- package/dist/assets/diagram-E7M64L7V-C9TvYgv0.js +24 -0
- package/dist/assets/diagram-IFDJBPK2-DowUMWrg.js +43 -0
- package/dist/assets/diagram-P4PSJMXO-BL6nrnQF.js +24 -0
- package/dist/assets/erDiagram-INFDFZHY-rXPRl8VM.js +70 -0
- package/dist/assets/flowDiagram-PKNHOUZH-DBRM99-W.js +162 -0
- package/dist/assets/ganttDiagram-A5KZAMGK-INcWFsBT.js +292 -0
- package/dist/assets/gitGraphDiagram-K3NZZRJ6-DMwpfE91.js +65 -0
- package/dist/assets/graph-DLQn37b-.js +1 -0
- package/dist/assets/index-BFFITMT8.js +650 -0
- package/dist/assets/index-H7f6VTz1.css +1 -0
- package/dist/assets/infoDiagram-LFFYTUFH-B0f4TWRM.js +2 -0
- package/dist/assets/init-Gi6I4Gst.js +1 -0
- package/dist/assets/ishikawaDiagram-PHBUUO56-CsU6XimZ.js +70 -0
- package/dist/assets/journeyDiagram-4ABVD52K-CQ7ibNib.js +139 -0
- package/dist/assets/kanban-definition-K7BYSVSG-DzEN7THt.js +89 -0
- package/dist/assets/katex-B1X10hvy.js +261 -0
- package/dist/assets/layout-C0dvb42R.js +1 -0
- package/dist/assets/linear-j4a8mGj7.js +1 -0
- package/dist/assets/mindmap-definition-YRQLILUH-DP8iEuCf.js +68 -0
- package/dist/assets/ordinal-Cboi1Yqb.js +1 -0
- package/dist/assets/pieDiagram-SKSYHLDU-BpIAXgAm.js +30 -0
- package/dist/assets/quadrantDiagram-337W2JSQ-DrpXn5Eg.js +7 -0
- package/dist/assets/requirementDiagram-Z7DCOOCP-Bg7EwHlG.js +73 -0
- package/dist/assets/sankeyDiagram-WA2Y5GQK-BWagRs1F.js +10 -0
- package/dist/assets/sequenceDiagram-2WXFIKYE-q5jwhivG.js +145 -0
- package/dist/assets/stateDiagram-RAJIS63D-B_J9pE-2.js +1 -0
- package/dist/assets/stateDiagram-v2-FVOUBMTO-Q_1GcybB.js +1 -0
- package/dist/assets/timeline-definition-YZTLITO2-dv0jgQ0z.js +61 -0
- package/dist/assets/treemap-KZPCXAKY-Dt1dkIE7.js +162 -0
- package/dist/assets/vennDiagram-LZ73GAT5-BdO5RgRZ.js +34 -0
- package/dist/assets/xychartDiagram-JWTSCODW-CpDVe-8v.js +7 -0
- package/dist/index.html +23 -0
- package/export-docx.js +1583 -0
- package/init.js +353 -0
- package/manifest-cli.mjs +207 -0
- package/package.json +83 -0
- package/schemas/artifact.schema.json +1295 -0
- package/schemas/artifact.uischema.json +65 -0
- package/schemas/evaluation.schema.json +284 -0
- package/schemas/rubric.schema.json +383 -0
- package/serve.js +238 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
rubric_id: EAROS-CORE-002
|
|
2
|
+
version: 2.0.0
|
|
3
|
+
kind: core_rubric
|
|
4
|
+
title: EAROS Core Meta-Rubric v2.0
|
|
5
|
+
status: draft
|
|
6
|
+
effective_date: "2026-03-18"
|
|
7
|
+
next_review_date: "2026-09-18"
|
|
8
|
+
owner: enterprise-architecture
|
|
9
|
+
artifact_type: architecture_artifact
|
|
10
|
+
purpose:
|
|
11
|
+
- design_review
|
|
12
|
+
- governance_review
|
|
13
|
+
- assurance_review
|
|
14
|
+
- agent_evaluation
|
|
15
|
+
stakeholders:
|
|
16
|
+
- architecture_board
|
|
17
|
+
- domain_architect
|
|
18
|
+
- delivery_lead
|
|
19
|
+
- risk
|
|
20
|
+
- security
|
|
21
|
+
- operations
|
|
22
|
+
viewpoints:
|
|
23
|
+
- context
|
|
24
|
+
- functional
|
|
25
|
+
- information
|
|
26
|
+
- integration
|
|
27
|
+
- deployment
|
|
28
|
+
- roadmap
|
|
29
|
+
- decision
|
|
30
|
+
|
|
31
|
+
dimensions:
|
|
32
|
+
- id: D1
|
|
33
|
+
name: Stakeholder and purpose fit
|
|
34
|
+
description: Does the artifact identify stakeholders, decision intent, and the concerns it must address?
|
|
35
|
+
weight: 1.0
|
|
36
|
+
criteria:
|
|
37
|
+
- id: STK-01
|
|
38
|
+
question: Does the artifact explicitly identify intended stakeholders, decision purpose, and review context?
|
|
39
|
+
description: The artifact should make clear who it is for, why it exists, and what decision it supports.
|
|
40
|
+
metric_type: ordinal
|
|
41
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
42
|
+
gate:
|
|
43
|
+
enabled: true
|
|
44
|
+
severity: major
|
|
45
|
+
failure_effect: Cannot pass above conditional_pass
|
|
46
|
+
required_evidence:
|
|
47
|
+
- purpose statement
|
|
48
|
+
- stakeholder list
|
|
49
|
+
- decision or usage context
|
|
50
|
+
scoring_guide:
|
|
51
|
+
"0": Absent or contradicted
|
|
52
|
+
"1": Implied only
|
|
53
|
+
"2": Explicit but incomplete
|
|
54
|
+
"3": Explicit and mostly complete
|
|
55
|
+
"4": Explicit, complete, and used consistently
|
|
56
|
+
anti_patterns:
|
|
57
|
+
- Generic audience only
|
|
58
|
+
- No stated decision intent
|
|
59
|
+
examples:
|
|
60
|
+
good:
|
|
61
|
+
- "This document supports the Architecture Board review of the Payments platform migration (Q3 2026). Primary stakeholders: CTO, Head of Payments, Security Architecture."
|
|
62
|
+
bad:
|
|
63
|
+
- "Audience: technical stakeholders"
|
|
64
|
+
decision_tree: "IF no purpose section exists THEN score 0. IF purpose exists but no named stakeholders THEN score 1. IF stakeholders named but no decision context THEN score 2."
|
|
65
|
+
remediation_hints:
|
|
66
|
+
- Add a purpose section
|
|
67
|
+
- Add a stakeholder-concern table
|
|
68
|
+
|
|
69
|
+
- id: STK-02
|
|
70
|
+
question: Are concerns mapped to the views or sections used in the artifact?
|
|
71
|
+
description: >
|
|
72
|
+
Views that do not address specific stakeholder concerns waste reviewer time and may
|
|
73
|
+
miss critical perspectives. This criterion checks whether the artifact's structure
|
|
74
|
+
is deliberately aligned to the concerns that motivated it. An artifact with many
|
|
75
|
+
diagrams and no mapping between diagrams and concerns forces reviewers to guess
|
|
76
|
+
which content is relevant to which decision.
|
|
77
|
+
metric_type: ordinal
|
|
78
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
79
|
+
gate: false
|
|
80
|
+
required_evidence:
|
|
81
|
+
- concern-to-view mapping
|
|
82
|
+
- section structure
|
|
83
|
+
- legend or reading guide
|
|
84
|
+
scoring_guide:
|
|
85
|
+
"0": No mapping — views exist with no indication of which concern they address
|
|
86
|
+
"1": Weak implied mapping — structure implies alignment but nothing is explicit
|
|
87
|
+
"2": Partial mapping — some views mapped to concerns, significant gaps
|
|
88
|
+
"3": Mostly complete mapping — most views have an explicit concern or audience
|
|
89
|
+
"4": Clear and consistent mapping — every view mapped to a concern, reading guide present
|
|
90
|
+
anti_patterns:
|
|
91
|
+
- Many diagrams with no explanation of purpose or intended audience
|
|
92
|
+
- Views do not align to the stated stakeholder concerns
|
|
93
|
+
- Reading guide absent despite complex multi-view artifact
|
|
94
|
+
examples:
|
|
95
|
+
good:
|
|
96
|
+
- >
|
|
97
|
+
"Reading guide: Section 3 (Context diagram) → CTO, Head of Product (strategic
|
|
98
|
+
positioning). Section 5 (Deployment view) → Security, Operations (topology and
|
|
99
|
+
resilience). Section 6 (Data flow) → Data Governance, Privacy (information
|
|
100
|
+
lifecycle). Section 7 (API contracts) → Engineering Lead (integration constraints)."
|
|
101
|
+
bad:
|
|
102
|
+
- >
|
|
103
|
+
"Contains 8 diagrams with no explanation of which concerns they address or
|
|
104
|
+
which audience each diagram is for."
|
|
105
|
+
decision_tree: >
|
|
106
|
+
IF no views present THEN score 0.
|
|
107
|
+
IF views exist but no mapping to concerns or audiences THEN score 1.
|
|
108
|
+
IF concern mapping implicit in section headings but not explicit THEN score 2.
|
|
109
|
+
IF concerns mapped to most views explicitly THEN score 3.
|
|
110
|
+
IF every view is explicitly mapped to one or more concerns AND a reading guide is present THEN score 4.
|
|
111
|
+
remediation_hints:
|
|
112
|
+
- Add a view inventory table (view, audience, concern addressed)
|
|
113
|
+
- Explain why each view exists and what decision it supports
|
|
114
|
+
- Remove views that serve no identifiable concern
|
|
115
|
+
|
|
116
|
+
- id: D2
|
|
117
|
+
name: Scope and boundary clarity
|
|
118
|
+
description: Does the artifact define what is in scope, out of scope, and the boundaries of the architecture?
|
|
119
|
+
weight: 1.0
|
|
120
|
+
criteria:
|
|
121
|
+
- id: SCP-01
|
|
122
|
+
question: Does the artifact define scope, boundaries, assumptions, and exclusions?
|
|
123
|
+
description: >
|
|
124
|
+
Without a clear scope, reviewers cannot determine what is being assessed, what is
|
|
125
|
+
out of bounds, or what assumptions underpin the design. Scope ambiguity is the
|
|
126
|
+
single most common cause of architecture review failure — reviewers challenge the
|
|
127
|
+
artifact on points that are explicitly out of scope, or miss gaps because the
|
|
128
|
+
scope was never stated.
|
|
129
|
+
metric_type: ordinal
|
|
130
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
131
|
+
gate:
|
|
132
|
+
enabled: true
|
|
133
|
+
severity: critical
|
|
134
|
+
failure_effect: not_reviewable when score < 2
|
|
135
|
+
required_evidence:
|
|
136
|
+
- scope statement
|
|
137
|
+
- in/out list
|
|
138
|
+
- boundary definition
|
|
139
|
+
- assumptions
|
|
140
|
+
scoring_guide:
|
|
141
|
+
"0": No scope or boundary
|
|
142
|
+
"1": Scope is ambiguous
|
|
143
|
+
"2": Basic scope exists but is incomplete
|
|
144
|
+
"3": Scope and boundaries are clear
|
|
145
|
+
"4": Scope and boundaries are clear, tested, and internally consistent
|
|
146
|
+
anti_patterns:
|
|
147
|
+
- Everything is in scope — no explicit exclusions or boundaries
|
|
148
|
+
- System boundary changes silently between diagrams
|
|
149
|
+
- Scope statement so broad it provides no constraint
|
|
150
|
+
examples:
|
|
151
|
+
good:
|
|
152
|
+
- >
|
|
153
|
+
"In scope: Payments service, Notification service, upstream Banking Core API.
|
|
154
|
+
Out of scope: Authentication (handled by IAM platform — see IAM-2024-001),
|
|
155
|
+
analytics pipeline, reporting layer. Assumptions: Banking Core API versioned
|
|
156
|
+
contract stable for 12 months. Boundary: Does not cover the customer mobile app
|
|
157
|
+
or internal staff portal."
|
|
158
|
+
bad:
|
|
159
|
+
- >
|
|
160
|
+
"This document covers the relevant parts of the payments system.
|
|
161
|
+
[No explicit boundary, no exclusions, no assumptions]"
|
|
162
|
+
decision_tree: "IF no scope section THEN score 0. IF scope exists but no exclusions listed THEN max score 2. IF assumptions not stated THEN max score 3."
|
|
163
|
+
remediation_hints:
|
|
164
|
+
- Add an explicit in-scope / out-of-scope list with rationale for exclusions
|
|
165
|
+
- Define the system context boundary using a C4 context diagram or equivalent
|
|
166
|
+
- List all assumptions with the consequence if each is violated
|
|
167
|
+
|
|
168
|
+
- id: D3
|
|
169
|
+
name: Concern coverage and viewpoint appropriateness
|
|
170
|
+
description: >
|
|
171
|
+
The choice of representational style — deployment diagram, sequence diagram, capability
|
|
172
|
+
map — must be fit for the decision purpose. An artifact that uses deployment diagrams
|
|
173
|
+
to answer business strategy questions, or sequence diagrams when the audience needs
|
|
174
|
+
topology, fails to communicate effectively regardless of technical accuracy.
|
|
175
|
+
weight: 1.0
|
|
176
|
+
criteria:
|
|
177
|
+
- id: CVP-01
|
|
178
|
+
question: Do the selected views and representations fit the stakeholder concerns and review purpose?
|
|
179
|
+
description: >
|
|
180
|
+
Views must be selected for their audience, not for their familiarity to the author.
|
|
181
|
+
A solution architecture that shows only UML class diagrams fails an operations
|
|
182
|
+
reviewer who needs a deployment view. A reference architecture that only shows
|
|
183
|
+
deployment topology fails a business reviewer who needs a capability view. This
|
|
184
|
+
criterion checks fitness-for-purpose, not technical accuracy.
|
|
185
|
+
metric_type: ordinal
|
|
186
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
187
|
+
gate: false
|
|
188
|
+
required_evidence:
|
|
189
|
+
- view inventory
|
|
190
|
+
- stakeholder concerns
|
|
191
|
+
- rationale for representations chosen
|
|
192
|
+
scoring_guide:
|
|
193
|
+
"0": Views are unfit for the stated purpose or missing entirely
|
|
194
|
+
"1": Views weakly fit — partially relevant but major concerns have no corresponding view
|
|
195
|
+
"2": Views cover some concerns — key concerns have views, others do not
|
|
196
|
+
"3": Views cover most concerns appropriately — level of abstraction matches audience
|
|
197
|
+
"4": Views are well chosen, explicitly justified, and materially support the decision at hand
|
|
198
|
+
anti_patterns:
|
|
199
|
+
- Wrong level of abstraction for the audience (e.g. class diagrams for executive review)
|
|
200
|
+
- Diagram-first approach with no stated decision intent
|
|
201
|
+
- Views repeated across multiple formats without purpose differentiation
|
|
202
|
+
examples:
|
|
203
|
+
good:
|
|
204
|
+
- >
|
|
205
|
+
"Business stakeholders: capability map and context diagram (Sections 2–3).
|
|
206
|
+
Delivery team: container diagram and sequence diagram for the three critical
|
|
207
|
+
flows (Sections 4–5). Operations: deployment diagram and SLO definition
|
|
208
|
+
(Sections 6–7). Security: threat model and control mapping (Section 8)."
|
|
209
|
+
bad:
|
|
210
|
+
- >
|
|
211
|
+
"Twenty UML diagrams with no explanation of which stakeholder need each one
|
|
212
|
+
addresses, at varying abstraction levels. [No fitness-for-purpose assessment]"
|
|
213
|
+
decision_tree: >
|
|
214
|
+
IF views have no evident relation to stated stakeholder concerns THEN score 0.
|
|
215
|
+
IF views partially address concerns but significant concerns have no view THEN score 1-2.
|
|
216
|
+
IF views cover most concerns at appropriate abstraction levels THEN score 3.
|
|
217
|
+
IF view selection is explicitly justified for each concern AND views cross-reference each other THEN score 4.
|
|
218
|
+
remediation_hints:
|
|
219
|
+
- Remove decorative views that address no specific stakeholder concern
|
|
220
|
+
- Add missing stakeholder-focused views (e.g. operational view for operations audience)
|
|
221
|
+
- State the intended audience and concern for each view
|
|
222
|
+
|
|
223
|
+
- id: D4
|
|
224
|
+
name: Traceability to drivers, requirements, and principles
|
|
225
|
+
description: >
|
|
226
|
+
An architecture that cannot be traced to the business drivers that motivated it cannot
|
|
227
|
+
be objectively assessed, challenged, or evolved. Without traceability, reviewers must
|
|
228
|
+
accept the architecture on faith rather than evidence — and delivery teams cannot
|
|
229
|
+
know which design choices are constraints versus preferences.
|
|
230
|
+
weight: 1.0
|
|
231
|
+
criteria:
|
|
232
|
+
- id: TRC-01
|
|
233
|
+
question: Are business drivers, objectives, principles, or requirements traceably connected to the architecture content?
|
|
234
|
+
description: >
|
|
235
|
+
Listing drivers in an introduction section without using them is not traceability.
|
|
236
|
+
Traceability means each significant architectural decision or structural choice can
|
|
237
|
+
be linked to the driver, requirement, or principle that necessitated it. This enables
|
|
238
|
+
reviewers to challenge whether a decision was truly required, and allows future
|
|
239
|
+
maintainers to understand which constraints are still live.
|
|
240
|
+
metric_type: ordinal
|
|
241
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
242
|
+
gate:
|
|
243
|
+
enabled: true
|
|
244
|
+
severity: major
|
|
245
|
+
failure_effect: Cannot pass if score < 2
|
|
246
|
+
required_evidence:
|
|
247
|
+
- driver list or requirements list
|
|
248
|
+
- requirements traceability (explicit links from driver to design element)
|
|
249
|
+
- principle references in design decisions
|
|
250
|
+
scoring_guide:
|
|
251
|
+
"0": No traceability — no drivers, requirements, or principles referenced
|
|
252
|
+
"1": Loose narrative only — drivers mentioned in introduction but not connected to design
|
|
253
|
+
"2": Partial traceability — some decisions linked to drivers, material gaps remain
|
|
254
|
+
"3": Clear traceability for most important items — key decisions have explicit driver links
|
|
255
|
+
"4": Consistent traceability for all decision-relevant items — matrix or explicit markup throughout
|
|
256
|
+
anti_patterns:
|
|
257
|
+
- Drivers listed in Section 1 but never referenced in the architecture
|
|
258
|
+
- Principles cited in a sidebar without effect on design choices
|
|
259
|
+
- Requirements traceability matrix present but disconnected from actual design decisions
|
|
260
|
+
examples:
|
|
261
|
+
good:
|
|
262
|
+
- >
|
|
263
|
+
"Business driver: Reduce fraud rate by 30%. → Architecture response: Real-time
|
|
264
|
+
fraud scoring service added (Section 4.2, Decision D-07). Principle: API-first.
|
|
265
|
+
→ All integrations use REST/OpenAPI (API inventory, Appendix B). Requirement:
|
|
266
|
+
Data residency EU. → All data stores in eu-west-1, enforced by policy (Section 5.1)."
|
|
267
|
+
bad:
|
|
268
|
+
- >
|
|
269
|
+
"The architecture supports the company strategy and aligns with our principles.
|
|
270
|
+
[No specific links from drivers to design elements]"
|
|
271
|
+
decision_tree: >
|
|
272
|
+
IF no drivers or requirements listed THEN score 0.
|
|
273
|
+
IF drivers listed but not connected to any design element THEN score 1.
|
|
274
|
+
IF some design elements trace to drivers but material gaps remain THEN score 2.
|
|
275
|
+
IF most important drivers trace to architecture decisions THEN score 3.
|
|
276
|
+
IF all decision-relevant drivers traceable AND traceability consistent throughout THEN score 4.
|
|
277
|
+
remediation_hints:
|
|
278
|
+
- Add a traceability matrix linking requirements to design sections
|
|
279
|
+
- Annotate each key architectural decision with the driver that required it
|
|
280
|
+
- Remove drivers that have no effect on the architecture (or explain why)
|
|
281
|
+
|
|
282
|
+
- id: D5
|
|
283
|
+
name: Internal consistency and integrity
|
|
284
|
+
description: >
|
|
285
|
+
Inconsistencies between views or sections undermine trust and create ambiguity for
|
|
286
|
+
implementers. A service called 'PaymentProcessor' in one diagram and 'PaymentService'
|
|
287
|
+
in another suggests the author conflated two different things — or worse, did not
|
|
288
|
+
notice the divergence. Internal consistency is a necessary condition for a reviewable
|
|
289
|
+
artifact.
|
|
290
|
+
weight: 1.0
|
|
291
|
+
criteria:
|
|
292
|
+
- id: CON-01
|
|
293
|
+
question: Are terms, structures, interfaces, and facts consistent across sections and views?
|
|
294
|
+
description: >
|
|
295
|
+
Inconsistencies between views or sections undermine trust and create implementation
|
|
296
|
+
ambiguity. When the same entity has different names, different interfaces, or
|
|
297
|
+
different responsibilities across diagrams, implementers cannot determine the
|
|
298
|
+
authoritative definition. This is particularly critical for API contracts, component
|
|
299
|
+
names, and data entity definitions which will be implemented as code.
|
|
300
|
+
metric_type: ordinal
|
|
301
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
302
|
+
gate: false
|
|
303
|
+
required_evidence:
|
|
304
|
+
- consistent terminology across sections
|
|
305
|
+
- cross-view agreement on component names, interfaces, and responsibilities
|
|
306
|
+
- legend or glossary for key terms
|
|
307
|
+
scoring_guide:
|
|
308
|
+
"0": Contradictory — direct conflicts between sections that cannot both be correct
|
|
309
|
+
"1": Frequent inconsistencies — same entity named or specified differently in multiple places
|
|
310
|
+
"2": Some inconsistencies remain — minor naming or interface mismatches
|
|
311
|
+
"3": Mostly consistent — terminology and interfaces agree across main views
|
|
312
|
+
"4": Consistent and actively normalised — glossary present, entities reconciled, cross-references verified
|
|
313
|
+
anti_patterns:
|
|
314
|
+
- Different names for the same component or entity in different diagrams
|
|
315
|
+
- Interface signatures differ between the API contract and the sequence diagram
|
|
316
|
+
- Technology stack differs between deployment view and implementation notes
|
|
317
|
+
examples:
|
|
318
|
+
good:
|
|
319
|
+
- >
|
|
320
|
+
"Glossary on page 2 defines 'Payment Service' — used consistently throughout
|
|
321
|
+
all 12 diagrams. Entity names, API contracts, and deployment configurations
|
|
322
|
+
all refer to the same 15 named components with matching names."
|
|
323
|
+
bad:
|
|
324
|
+
- >
|
|
325
|
+
"The context diagram shows 'Auth Service' but the deployment diagram references
|
|
326
|
+
'IAM Component' and the sequence diagram shows 'LoginService'. The API contract
|
|
327
|
+
shows a different endpoint signature than the sequence diagram."
|
|
328
|
+
decision_tree: >
|
|
329
|
+
IF direct contradictions between sections THEN score 0.
|
|
330
|
+
IF same entity has different names or interfaces in multiple views THEN score 1.
|
|
331
|
+
IF minor naming inconsistencies only THEN score 2.
|
|
332
|
+
IF terminology and interfaces consistent across main views THEN score 3.
|
|
333
|
+
IF glossary present AND entities reconciled AND cross-references verified THEN score 4.
|
|
334
|
+
remediation_hints:
|
|
335
|
+
- Create a component/entity glossary and enforce consistent naming throughout
|
|
336
|
+
- Reconcile interface definitions across all views (API contract, sequence diagram, deployment view)
|
|
337
|
+
- Assign a single authoritative source for each entity definition
|
|
338
|
+
|
|
339
|
+
- id: D6
|
|
340
|
+
name: Risks, assumptions, constraints, and tradeoffs
|
|
341
|
+
description: >
|
|
342
|
+
Architecture is always a set of trade-offs, not a set of optimal choices. An artifact
|
|
343
|
+
that only presents upsides is describing a fantasy, not a real architecture. Explicitly
|
|
344
|
+
documenting risks, assumptions, constraints, and trade-offs is what distinguishes a
|
|
345
|
+
decision-ready architecture from a marketing presentation.
|
|
346
|
+
weight: 1.0
|
|
347
|
+
criteria:
|
|
348
|
+
- id: RAT-01
|
|
349
|
+
question: Does the artifact identify key risks, assumptions, constraints, and tradeoffs relevant to the decision?
|
|
350
|
+
description: >
|
|
351
|
+
Every significant architectural decision involves trade-offs that are accepted, not
|
|
352
|
+
solved. These trade-offs — complexity vs. simplicity, consistency vs. availability,
|
|
353
|
+
cost vs. capability — must be made visible so that reviewers can assess whether
|
|
354
|
+
the trade-offs are acceptable and delivery teams understand the design intent.
|
|
355
|
+
Assumptions that are buried or unstated become implicit risks.
|
|
356
|
+
metric_type: ordinal
|
|
357
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
358
|
+
gate: false
|
|
359
|
+
required_evidence:
|
|
360
|
+
- risk list with mitigations and owners
|
|
361
|
+
- stated assumptions
|
|
362
|
+
- constraints (technical, legal, organisational)
|
|
363
|
+
- trade-off discussion
|
|
364
|
+
scoring_guide:
|
|
365
|
+
"0": Not addressed — risks, assumptions, and trade-offs entirely absent
|
|
366
|
+
"1": Mentioned superficially — 'there are some risks that will need to be addressed'
|
|
367
|
+
"2": Partial treatment — risks listed without mitigations, or trade-offs mentioned without analysis
|
|
368
|
+
"3": Material issues treated — key risks, assumptions, and trade-offs documented with owners
|
|
369
|
+
"4": Trade-offs and consequences are explicit, evidence-based, and decision-useful — all material risks mitigated or accepted with rationale
|
|
370
|
+
anti_patterns:
|
|
371
|
+
- Only upsides presented — risks section omitted or deferred
|
|
372
|
+
- Risks section says 'TBD' or 'to be assessed'
|
|
373
|
+
- Trade-offs acknowledged but no design response or acceptance decision
|
|
374
|
+
examples:
|
|
375
|
+
good:
|
|
376
|
+
- >
|
|
377
|
+
"Risk: Event sourcing adds operational complexity for the team. Mitigation: Team
|
|
378
|
+
training in Q2. Residual risk: Medium. Owner: Platform Lead. Assumption: Banking
|
|
379
|
+
Core API remains stable for 12 months — if violated, migration scope increases
|
|
380
|
+
significantly. Trade-off: Accepted higher operational complexity in exchange for
|
|
381
|
+
auditability and decoupling (see Decision D-04)."
|
|
382
|
+
bad:
|
|
383
|
+
- "Risks: TBD. Assumptions: TBD. [No content]"
|
|
384
|
+
decision_tree: >
|
|
385
|
+
IF risks section absent or entirely TBD THEN score 0.
|
|
386
|
+
IF risks mentioned in passing with no specifics THEN score 1.
|
|
387
|
+
IF risks listed but no mitigations or owners THEN score 2.
|
|
388
|
+
IF material risks, assumptions, and trade-offs documented with owners THEN score 3.
|
|
389
|
+
IF all material risks mitigated or accepted with rationale AND trade-offs explicitly balanced THEN score 4.
|
|
390
|
+
remediation_hints:
|
|
391
|
+
- Add a RAID log or risk table with mitigation and owner columns
|
|
392
|
+
- Record assumptions with the consequence if each assumption is violated
|
|
393
|
+
- Explicitly state the trade-offs accepted in each key design decision
|
|
394
|
+
|
|
395
|
+
- id: D7
|
|
396
|
+
name: Standards and policy compliance
|
|
397
|
+
description: >
|
|
398
|
+
Architecture artifacts in enterprise contexts must demonstrate alignment with applicable
|
|
399
|
+
standards, policies, and mandatory controls — especially security, data protection,
|
|
400
|
+
and regulatory requirements. Compliance by assertion (saying 'we comply') is
|
|
401
|
+
insufficient; the artifact must show how the design meets each relevant control,
|
|
402
|
+
and must document exceptions with owners and remediation timelines.
|
|
403
|
+
weight: 1.0
|
|
404
|
+
criteria:
|
|
405
|
+
- id: CMP-01
|
|
406
|
+
question: Does the artifact show alignment to applicable architecture standards, policies, and mandatory controls?
|
|
407
|
+
description: >
|
|
408
|
+
Architecture artifacts operate within a governed context. Standards references that
|
|
409
|
+
appear in a sidebar without effect on design choices do not constitute compliance.
|
|
410
|
+
Compliance must be demonstrated through explicit control-to-design mappings: which
|
|
411
|
+
design element satisfies which control, who owns the evidence, and what exceptions
|
|
412
|
+
have been approved. Without this, governance boards cannot discharge their oversight
|
|
413
|
+
responsibility.
|
|
414
|
+
metric_type: ordinal
|
|
415
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
416
|
+
gate:
|
|
417
|
+
enabled: true
|
|
418
|
+
severity: critical
|
|
419
|
+
failure_effect: reject when mandatory control compliance cannot be determined
|
|
420
|
+
required_evidence:
|
|
421
|
+
- references to applicable standards or policies
|
|
422
|
+
- control-to-design mapping
|
|
423
|
+
- exceptions list with approval and owner
|
|
424
|
+
scoring_guide:
|
|
425
|
+
"0": No compliance treatment — standards and controls not referenced
|
|
426
|
+
"1": Assertions without evidence — 'solution will comply with all standards'
|
|
427
|
+
"2": Partial compliance view — some controls mapped, material gaps remain
|
|
428
|
+
"3": Compliance treated for most material controls — mappings present with owners
|
|
429
|
+
"4": Compliance and exceptions are explicit, evidence-backed, and owned — full control mapping with approved exception log
|
|
430
|
+
anti_patterns:
|
|
431
|
+
- Compliant by assumption — no specific standards or controls referenced
|
|
432
|
+
- Exceptions exist but are not documented or approved
|
|
433
|
+
- Standards listed without any mapping to design elements
|
|
434
|
+
examples:
|
|
435
|
+
good:
|
|
436
|
+
- >
|
|
437
|
+
"Enterprise Security Standard ESS-01: satisfied by mutual TLS on all
|
|
438
|
+
service-to-service calls (Section 5.1). GDPR Article 17 (Right to Erasure):
|
|
439
|
+
implemented via async delete event to all downstream stores (Section 4.3).
|
|
440
|
+
Exception: ESS-03 (password complexity) — not applicable for service accounts;
|
|
441
|
+
exception approved by CISO 2026-02-01, expires 2026-08-01."
|
|
442
|
+
bad:
|
|
443
|
+
- >
|
|
444
|
+
"The solution will comply with all applicable security standards and regulations.
|
|
445
|
+
[No specific standards named, no control mappings, no exceptions]"
|
|
446
|
+
decision_tree: >
|
|
447
|
+
IF no standards or controls referenced THEN score 0.
|
|
448
|
+
IF standards cited but no mapping to design elements THEN score 1.
|
|
449
|
+
IF some controls mapped but material compliance gaps THEN score 2.
|
|
450
|
+
IF most material controls addressed with evidence and owners THEN score 3.
|
|
451
|
+
IF all controls addressed, exceptions documented with approvals and owners THEN score 4.
|
|
452
|
+
remediation_hints:
|
|
453
|
+
- Map each applicable control to the design element that satisfies it
|
|
454
|
+
- Log exceptions with approver, approval date, and expiry or remediation plan
|
|
455
|
+
- Remove generic compliance assertions and replace with specific evidence
|
|
456
|
+
|
|
457
|
+
- id: D8
|
|
458
|
+
name: Actionability and implementation relevance
|
|
459
|
+
description: >
|
|
460
|
+
Architecture artifacts exist to enable decisions and actions. An artifact that presents
|
|
461
|
+
information without actionable next steps, named owners, or clear decision outcomes
|
|
462
|
+
fails its purpose — regardless of how technically sophisticated the content is.
|
|
463
|
+
Interesting but unusable is a common failure mode for architecture documents.
|
|
464
|
+
weight: 1.0
|
|
465
|
+
criteria:
|
|
466
|
+
- id: ACT-01
|
|
467
|
+
question: Can delivery and governance teams act on the artifact without major reinterpretation?
|
|
468
|
+
description: >
|
|
469
|
+
Architecture artifacts must be decision-ready. Delivery teams should be able to
|
|
470
|
+
read the document and know what to build. Governance teams should be able to
|
|
471
|
+
read it and know what they are approving. If either group must significantly
|
|
472
|
+
reinterpret the document to act on it, the artifact has failed its purpose.
|
|
473
|
+
Actionability requires explicit decisions, concrete next steps, and named owners.
|
|
474
|
+
metric_type: ordinal
|
|
475
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
476
|
+
gate: false
|
|
477
|
+
required_evidence:
|
|
478
|
+
- decision statement (what is approved or proposed)
|
|
479
|
+
- next actions with owners
|
|
480
|
+
- roadmap or governance references
|
|
481
|
+
scoring_guide:
|
|
482
|
+
"0": Not actionable — purely descriptive with no decisions or actions
|
|
483
|
+
"1": Heavily ambiguous — 'next steps to be determined' or 'team will decide'
|
|
484
|
+
"2": Partly actionable — some decisions made, some actions listed, significant gaps
|
|
485
|
+
"3": Mostly actionable — key decisions made, most actions owned and sequenced
|
|
486
|
+
"4": Actionable with clear ownership — all decisions explicit, all actions owned and dated, governance path clear
|
|
487
|
+
anti_patterns:
|
|
488
|
+
- Analytically interesting but operationally unusable
|
|
489
|
+
- No named owner for any action or decision
|
|
490
|
+
- All decisions deferred to future sessions
|
|
491
|
+
examples:
|
|
492
|
+
good:
|
|
493
|
+
- >
|
|
494
|
+
"Decision: Approve migration to event-driven architecture. Next actions:
|
|
495
|
+
(1) Platform team to provision Kafka cluster by 2026-04-01 [Owner: Platform Lead];
|
|
496
|
+
(2) Payments team to refactor Order Service by 2026-05-15 [Owner: Payments Lead].
|
|
497
|
+
Review trigger: Architecture Board checkpoint 2026-04-10."
|
|
498
|
+
bad:
|
|
499
|
+
- >
|
|
500
|
+
"Various options are discussed in this document. Further work will be required
|
|
501
|
+
to determine the path forward. [No decisions, no owners, no concrete next steps]"
|
|
502
|
+
decision_tree: >
|
|
503
|
+
IF no decision statements or next steps exist THEN score 0.
|
|
504
|
+
IF artifact is descriptive only with no actionable output THEN score 1.
|
|
505
|
+
IF some decisions stated but delivery path unclear or ownership absent THEN score 2.
|
|
506
|
+
IF most key decisions made with next steps and owners assigned THEN score 3.
|
|
507
|
+
IF all decisions explicit, all actions owned and dated, governance path clear THEN score 4.
|
|
508
|
+
remediation_hints:
|
|
509
|
+
- Add a decisions and actions section at the end of the document
|
|
510
|
+
- Assign a named owner to every action
|
|
511
|
+
- State the governance outcome explicitly (approved, conditional, rejected)
|
|
512
|
+
|
|
513
|
+
- id: D9
|
|
514
|
+
name: Artifact maintainability and stewardship
|
|
515
|
+
description: >
|
|
516
|
+
Architecture artifacts must be treated as living documents with explicit ownership,
|
|
517
|
+
not one-time deliverables. An artifact with no owner, no last-updated date, and no
|
|
518
|
+
change history is an orphan — it may be read but cannot be trusted, evolved, or
|
|
519
|
+
challenged. Stewardship is the difference between architecture as a governance asset
|
|
520
|
+
and architecture as a historical curiosity.
|
|
521
|
+
weight: 1.0
|
|
522
|
+
criteria:
|
|
523
|
+
- id: MNT-01
|
|
524
|
+
question: Does the artifact identify ownership, update expectations, and change history or provenance?
|
|
525
|
+
description: >
|
|
526
|
+
Architecture decisions made today will be challenged or superseded by teams years
|
|
527
|
+
from now. Without ownership and change history, a future maintainer cannot know
|
|
528
|
+
whether the document is current, who to contact with questions, or what has
|
|
529
|
+
changed since the last review. A well-stewarded artifact actively supports
|
|
530
|
+
governance by making its lifecycle state explicit.
|
|
531
|
+
metric_type: ordinal
|
|
532
|
+
scale: [0, 1, 2, 3, 4, "N/A"]
|
|
533
|
+
gate: false
|
|
534
|
+
required_evidence:
|
|
535
|
+
- named owner (individual or team)
|
|
536
|
+
- last updated date or version
|
|
537
|
+
- change log or provenance
|
|
538
|
+
scoring_guide:
|
|
539
|
+
"0": No stewardship information — no owner, no date, no change history
|
|
540
|
+
"1": Ownership unclear — author named but no ownership, no update date
|
|
541
|
+
"2": Basic stewardship exists — owner and date present, no change history or review cadence
|
|
542
|
+
"3": Stewardship and update expectations are clear — owner, version, cadence, and change history present
|
|
543
|
+
"4": Stewardship, provenance, and lifecycle handling are strong — full change history, review triggers, successor/deprecation plan
|
|
544
|
+
anti_patterns:
|
|
545
|
+
- Orphaned artifact — no owner and unknown last update
|
|
546
|
+
- Author present but no ongoing ownership responsibility defined
|
|
547
|
+
- No review cadence or supersession plan
|
|
548
|
+
examples:
|
|
549
|
+
good:
|
|
550
|
+
- >
|
|
551
|
+
"Owner: Enterprise Architecture, Payments Domain. Version: 2.1. Last reviewed:
|
|
552
|
+
2026-03-01. Next review: 2026-09-01 (trigger: post-migration assessment). Change
|
|
553
|
+
log: v2.1 updated Kafka cluster spec (2026-02); v2.0 added DR runbook (2026-01);
|
|
554
|
+
v1.0 initial draft (2025-09)."
|
|
555
|
+
bad:
|
|
556
|
+
- >
|
|
557
|
+
"Created by J. Smith. [No version, no date, no owner, no review information,
|
|
558
|
+
no change history]"
|
|
559
|
+
decision_tree: >
|
|
560
|
+
IF no owner, date, or version present THEN score 0.
|
|
561
|
+
IF author named but no ongoing ownership or date THEN score 1.
|
|
562
|
+
IF owner and date present but no change history or review cadence THEN score 2.
|
|
563
|
+
IF owner, version, cadence, and change history all present THEN score 3.
|
|
564
|
+
IF full change history, review triggers, and lifecycle management including successor/deprecation plan THEN score 4.
|
|
565
|
+
remediation_hints:
|
|
566
|
+
- Assign a named owner with an ongoing stewardship responsibility
|
|
567
|
+
- Add version number, last-updated date, and next-review date
|
|
568
|
+
- Create a change log — even one line per version is sufficient to start
|
|
569
|
+
|
|
570
|
+
scoring:
|
|
571
|
+
scale: 0-4 ordinal plus N/A
|
|
572
|
+
agent_scale: 0-3 ordinal plus N/A (optional collapse; must map in metadata)
|
|
573
|
+
method: gates_first_then_weighted_average
|
|
574
|
+
thresholds:
|
|
575
|
+
pass: No critical gate failure, overall >= 3.2, and no dimension < 2.0
|
|
576
|
+
conditional_pass: No critical gate failure and overall 2.4-3.19 or one weak dimension
|
|
577
|
+
rework_required: Overall < 2.4 or repeated weak dimensions
|
|
578
|
+
reject: Critical gate failure or mandatory control breach
|
|
579
|
+
not_reviewable: Evidence insufficient for core gate criteria
|
|
580
|
+
na_policy: Exclude N/A criteria from denominator; evaluator must justify N/A in narrative
|
|
581
|
+
confidence_policy: Confidence is reported separately and must not mathematically modify the score
|
|
582
|
+
reliability_targets:
|
|
583
|
+
binary_agreement: "> 95%"
|
|
584
|
+
ordinal_kappa: "> 0.70 (substantial) for well-defined criteria; > 0.50 (moderate) for subjective"
|
|
585
|
+
overall_correlation: "Spearman rho > 0.80"
|
|
586
|
+
|
|
587
|
+
outputs:
|
|
588
|
+
require_evidence_refs: true
|
|
589
|
+
require_confidence: true
|
|
590
|
+
require_actions: true
|
|
591
|
+
require_evidence_class: true
|
|
592
|
+
require_evidence_anchors: true
|
|
593
|
+
formats:
|
|
594
|
+
- yaml
|
|
595
|
+
- json
|
|
596
|
+
- markdown-report
|
|
597
|
+
- xlsx
|
|
598
|
+
|
|
599
|
+
agent_evaluation:
|
|
600
|
+
dag_steps:
|
|
601
|
+
- structural_validation
|
|
602
|
+
- content_extraction
|
|
603
|
+
- criterion_scoring
|
|
604
|
+
- cross_reference_validation
|
|
605
|
+
- dimension_aggregation
|
|
606
|
+
- challenge_pass
|
|
607
|
+
- calibration
|
|
608
|
+
- status_determination
|
|
609
|
+
rubric_locked: true
|
|
610
|
+
calibration_method: rulers_wasserstein
|
|
611
|
+
|
|
612
|
+
calibration:
|
|
613
|
+
required_before_production: true
|
|
614
|
+
minimum_examples: 3
|
|
615
|
+
recommended_reviewers:
|
|
616
|
+
- 2 human reviewers
|
|
617
|
+
- 1 evaluator agent
|
|
618
|
+
- 1 challenger agent
|
|
619
|
+
recalibration_triggers:
|
|
620
|
+
- profile changes materially
|
|
621
|
+
- new overlay introduced
|
|
622
|
+
- agreement drops below targets
|
|
623
|
+
- new artifact formats appear
|
|
624
|
+
- agent behaviour changes materially
|
|
625
|
+
- governance expectations change
|
|
626
|
+
|
|
627
|
+
change_log:
|
|
628
|
+
- version: "2.0.0"
|
|
629
|
+
date: "2026-03-18"
|
|
630
|
+
author: "Thomas Rohde"
|
|
631
|
+
changes:
|
|
632
|
+
- Added Principle 8 (machine-readable artifacts)
|
|
633
|
+
- Added examples and decision_tree fields to criteria
|
|
634
|
+
- Added agent_evaluation configuration
|
|
635
|
+
- Added reliability_targets to scoring
|
|
636
|
+
- Added evidence_class and evidence_anchors to outputs
|
|
637
|
+
- Added DAG evaluation steps
|
|
638
|
+
- Updated from EAROS v1 based on 63-source research programme
|
|
639
|
+
- version: "1.0.0"
|
|
640
|
+
date: "2026-03-16"
|
|
641
|
+
author: "Thomas Rohde"
|
|
642
|
+
changes:
|
|
643
|
+
- Initial release
|