@principles/core 1.150.0 → 1.152.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/prompt-builder/__tests__/prompt-builder-core.test.js.map +1 -1
  2. package/dist/quality-scorecard/__tests__/quality-scorecard.test.js.map +1 -1
  3. package/dist/runtime-v2/__tests__/adversarial-loop.test.d.ts +2 -0
  4. package/dist/runtime-v2/__tests__/adversarial-loop.test.d.ts.map +1 -0
  5. package/dist/runtime-v2/__tests__/adversarial-loop.test.js +435 -0
  6. package/dist/runtime-v2/__tests__/adversarial-loop.test.js.map +1 -0
  7. package/dist/runtime-v2/__tests__/architecture-regression.test.js +5 -0
  8. package/dist/runtime-v2/__tests__/architecture-regression.test.js.map +1 -1
  9. package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js.map +1 -1
  10. package/dist/runtime-v2/__tests__/build-golden-trace-from-artificer.test.d.ts +2 -0
  11. package/dist/runtime-v2/__tests__/build-golden-trace-from-artificer.test.d.ts.map +1 -0
  12. package/dist/runtime-v2/__tests__/build-golden-trace-from-artificer.test.js +169 -0
  13. package/dist/runtime-v2/__tests__/build-golden-trace-from-artificer.test.js.map +1 -0
  14. package/dist/runtime-v2/__tests__/dreamer-output-validator.test.js.map +1 -1
  15. package/dist/runtime-v2/__tests__/dreamer-runner-vslice.test.js.map +1 -1
  16. package/dist/runtime-v2/__tests__/evaluator-runner-vslice-v2.test.d.ts +2 -0
  17. package/dist/runtime-v2/__tests__/evaluator-runner-vslice-v2.test.d.ts.map +1 -0
  18. package/dist/runtime-v2/__tests__/evaluator-runner-vslice-v2.test.js +815 -0
  19. package/dist/runtime-v2/__tests__/evaluator-runner-vslice-v2.test.js.map +1 -0
  20. package/dist/runtime-v2/__tests__/evaluator-runner-vslice.test.js.map +1 -1
  21. package/dist/runtime-v2/__tests__/evidence-sanitizer.test.js +4 -3
  22. package/dist/runtime-v2/__tests__/evidence-sanitizer.test.js.map +1 -1
  23. package/dist/runtime-v2/__tests__/feedback/privacy-preview.test.js.map +1 -1
  24. package/dist/runtime-v2/__tests__/feedback/render-github-url.test.js.map +1 -1
  25. package/dist/runtime-v2/__tests__/feedback/render-markdown.test.js.map +1 -1
  26. package/dist/runtime-v2/__tests__/internalization-consumer-product-path.test.js.map +1 -1
  27. package/dist/runtime-v2/__tests__/internalization-orchestrator.test.js +1 -3
  28. package/dist/runtime-v2/__tests__/internalization-orchestrator.test.js.map +1 -1
  29. package/dist/runtime-v2/__tests__/internalization-peer-runner-contracts.test.js +6 -0
  30. package/dist/runtime-v2/__tests__/internalization-peer-runner-contracts.test.js.map +1 -1
  31. package/dist/runtime-v2/__tests__/internalization-queue-read-model.test.js +0 -1
  32. package/dist/runtime-v2/__tests__/internalization-queue-read-model.test.js.map +1 -1
  33. package/dist/runtime-v2/__tests__/internalization-state-machine.test.js.map +1 -1
  34. package/dist/runtime-v2/__tests__/mainline-product-path.test.js.map +1 -1
  35. package/dist/runtime-v2/__tests__/pain-chain-read-model.test.js +0 -1
  36. package/dist/runtime-v2/__tests__/pain-chain-read-model.test.js.map +1 -1
  37. package/dist/runtime-v2/__tests__/pain-evidence-contract.test.js.map +1 -1
  38. package/dist/runtime-v2/__tests__/pain-signal-observability.test.js +1 -1
  39. package/dist/runtime-v2/__tests__/pain-signal-observability.test.js.map +1 -1
  40. package/dist/runtime-v2/__tests__/philosopher-runner-vslice.test.js.map +1 -1
  41. package/dist/runtime-v2/__tests__/pitask-metadata.test.js +5 -1
  42. package/dist/runtime-v2/__tests__/pitask-metadata.test.js.map +1 -1
  43. package/dist/runtime-v2/__tests__/proven-channel-baseline.test.js +1 -1
  44. package/dist/runtime-v2/__tests__/proven-channel-baseline.test.js.map +1 -1
  45. package/dist/runtime-v2/__tests__/pruning-read-model.test.js.map +1 -1
  46. package/dist/runtime-v2/__tests__/rulehost-oob-defense-simulation.test.js.map +1 -1
  47. package/dist/runtime-v2/__tests__/scribe-runner-vslice.test.js.map +1 -1
  48. package/dist/runtime-v2/__tests__/task-three-strikes.test.js.map +1 -1
  49. package/dist/runtime-v2/__tests__/trace-refiner-agent.test.js.map +1 -1
  50. package/dist/runtime-v2/__tests__/trainer-output-validator.test.js.map +1 -1
  51. package/dist/runtime-v2/activation/__tests__/activation-dispatcher.test.js.map +1 -1
  52. package/dist/runtime-v2/activation/__tests__/approval-store-extended.test.js +0 -1
  53. package/dist/runtime-v2/activation/__tests__/approval-store-extended.test.js.map +1 -1
  54. package/dist/runtime-v2/activation/__tests__/sqlite-activation-state-store.test.js +33 -0
  55. package/dist/runtime-v2/activation/__tests__/sqlite-activation-state-store.test.js.map +1 -1
  56. package/dist/runtime-v2/activation/activation-types.d.ts +1 -0
  57. package/dist/runtime-v2/activation/activation-types.d.ts.map +1 -1
  58. package/dist/runtime-v2/activation/activation-types.js.map +1 -1
  59. package/dist/runtime-v2/activation/memory-activation-state-store.d.ts +1 -0
  60. package/dist/runtime-v2/activation/memory-activation-state-store.d.ts.map +1 -1
  61. package/dist/runtime-v2/activation/memory-activation-state-store.js +9 -0
  62. package/dist/runtime-v2/activation/memory-activation-state-store.js.map +1 -1
  63. package/dist/runtime-v2/activation/sqlite-activation-state-store.d.ts +1 -0
  64. package/dist/runtime-v2/activation/sqlite-activation-state-store.d.ts.map +1 -1
  65. package/dist/runtime-v2/activation/sqlite-activation-state-store.js +18 -0
  66. package/dist/runtime-v2/activation/sqlite-activation-state-store.js.map +1 -1
  67. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.d.ts +2 -0
  68. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.d.ts.map +1 -0
  69. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js +510 -0
  70. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js.map +1 -0
  71. package/dist/runtime-v2/adapter/__tests__/schema-prompt-adapter.test.js +4 -4
  72. package/dist/runtime-v2/adapter/__tests__/schema-prompt-adapter.test.js.map +1 -1
  73. package/dist/runtime-v2/adapter/__tests__/structured-output-repair.test.js.map +1 -1
  74. package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts +47 -0
  75. package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts.map +1 -0
  76. package/dist/runtime-v2/adapter/artificer-l2-adapter.js +307 -0
  77. package/dist/runtime-v2/adapter/artificer-l2-adapter.js.map +1 -0
  78. package/dist/runtime-v2/adversarial-loop.d.ts +65 -0
  79. package/dist/runtime-v2/adversarial-loop.d.ts.map +1 -0
  80. package/dist/runtime-v2/adversarial-loop.js +203 -0
  81. package/dist/runtime-v2/adversarial-loop.js.map +1 -0
  82. package/dist/runtime-v2/config/__tests__/pd-config-redaction-deep.test.js +2 -2
  83. package/dist/runtime-v2/config/__tests__/pd-config-redaction-deep.test.js.map +1 -1
  84. package/dist/runtime-v2/config/__tests__/pd-config-validation-edge-cases.test.js +1 -1
  85. package/dist/runtime-v2/config/__tests__/pd-config-validation-edge-cases.test.js.map +1 -1
  86. package/dist/runtime-v2/golden-trace.d.ts +30 -0
  87. package/dist/runtime-v2/golden-trace.d.ts.map +1 -1
  88. package/dist/runtime-v2/golden-trace.js +57 -0
  89. package/dist/runtime-v2/golden-trace.js.map +1 -1
  90. package/dist/runtime-v2/index.d.ts +10 -6
  91. package/dist/runtime-v2/index.d.ts.map +1 -1
  92. package/dist/runtime-v2/index.js +7 -3
  93. package/dist/runtime-v2/index.js.map +1 -1
  94. package/dist/runtime-v2/internalization/__tests__/adversarial-case.test.d.ts +2 -0
  95. package/dist/runtime-v2/internalization/__tests__/adversarial-case.test.d.ts.map +1 -0
  96. package/dist/runtime-v2/internalization/__tests__/adversarial-case.test.js +139 -0
  97. package/dist/runtime-v2/internalization/__tests__/adversarial-case.test.js.map +1 -0
  98. package/dist/runtime-v2/internalization/__tests__/adversarial-feedback.test.d.ts +2 -0
  99. package/dist/runtime-v2/internalization/__tests__/adversarial-feedback.test.d.ts.map +1 -0
  100. package/dist/runtime-v2/internalization/__tests__/adversarial-feedback.test.js +62 -0
  101. package/dist/runtime-v2/internalization/__tests__/adversarial-feedback.test.js.map +1 -0
  102. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts +2 -0
  103. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts.map +1 -0
  104. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.js +249 -0
  105. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.js.map +1 -0
  106. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder-v2.test.d.ts +2 -0
  107. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder-v2.test.d.ts.map +1 -0
  108. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder-v2.test.js +24 -0
  109. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder-v2.test.js.map +1 -0
  110. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder.test.js +2 -2
  111. package/dist/runtime-v2/internalization/__tests__/artificer-prompt-builder.test.js.map +1 -1
  112. package/dist/runtime-v2/internalization/__tests__/diag-distiller-runner.test.js.map +1 -1
  113. package/dist/runtime-v2/internalization/__tests__/diag-rootcause-runner.test.js.map +1 -1
  114. package/dist/runtime-v2/internalization/__tests__/diag-router-runner.test.js.map +1 -1
  115. package/dist/runtime-v2/internalization/__tests__/evaluator-output-v2.test.d.ts +2 -0
  116. package/dist/runtime-v2/internalization/__tests__/evaluator-output-v2.test.d.ts.map +1 -0
  117. package/dist/runtime-v2/internalization/__tests__/evaluator-output-v2.test.js +209 -0
  118. package/dist/runtime-v2/internalization/__tests__/evaluator-output-v2.test.js.map +1 -0
  119. package/dist/runtime-v2/internalization/__tests__/evaluator-prompt-builder-v2.test.d.ts +2 -0
  120. package/dist/runtime-v2/internalization/__tests__/evaluator-prompt-builder-v2.test.d.ts.map +1 -0
  121. package/dist/runtime-v2/internalization/__tests__/evaluator-prompt-builder-v2.test.js +125 -0
  122. package/dist/runtime-v2/internalization/__tests__/evaluator-prompt-builder-v2.test.js.map +1 -0
  123. package/dist/runtime-v2/internalization/__tests__/refiner-sandbox-wrapper.test.js.map +1 -1
  124. package/dist/runtime-v2/internalization/adversarial-case.d.ts +32 -0
  125. package/dist/runtime-v2/internalization/adversarial-case.d.ts.map +1 -0
  126. package/dist/runtime-v2/internalization/adversarial-case.js +68 -0
  127. package/dist/runtime-v2/internalization/adversarial-case.js.map +1 -0
  128. package/dist/runtime-v2/internalization/adversarial-feedback.d.ts +23 -0
  129. package/dist/runtime-v2/internalization/adversarial-feedback.d.ts.map +1 -0
  130. package/dist/runtime-v2/internalization/adversarial-feedback.js +20 -0
  131. package/dist/runtime-v2/internalization/adversarial-feedback.js.map +1 -0
  132. package/dist/runtime-v2/internalization/artificer-output.d.ts +42 -0
  133. package/dist/runtime-v2/internalization/artificer-output.d.ts.map +1 -1
  134. package/dist/runtime-v2/internalization/artificer-output.js +147 -0
  135. package/dist/runtime-v2/internalization/artificer-output.js.map +1 -1
  136. package/dist/runtime-v2/internalization/artificer-prompt-builder.d.ts +10 -2
  137. package/dist/runtime-v2/internalization/artificer-prompt-builder.d.ts.map +1 -1
  138. package/dist/runtime-v2/internalization/artificer-prompt-builder.js +26 -3
  139. package/dist/runtime-v2/internalization/artificer-prompt-builder.js.map +1 -1
  140. package/dist/runtime-v2/internalization/artificer-runner.d.ts +7 -0
  141. package/dist/runtime-v2/internalization/artificer-runner.d.ts.map +1 -1
  142. package/dist/runtime-v2/internalization/artificer-runner.js +11 -2
  143. package/dist/runtime-v2/internalization/artificer-runner.js.map +1 -1
  144. package/dist/runtime-v2/internalization/evaluator-output.d.ts +63 -0
  145. package/dist/runtime-v2/internalization/evaluator-output.d.ts.map +1 -1
  146. package/dist/runtime-v2/internalization/evaluator-output.js +158 -0
  147. package/dist/runtime-v2/internalization/evaluator-output.js.map +1 -1
  148. package/dist/runtime-v2/internalization/evaluator-prompt-builder.d.ts +8 -1
  149. package/dist/runtime-v2/internalization/evaluator-prompt-builder.d.ts.map +1 -1
  150. package/dist/runtime-v2/internalization/evaluator-prompt-builder.js +16 -3
  151. package/dist/runtime-v2/internalization/evaluator-prompt-builder.js.map +1 -1
  152. package/dist/runtime-v2/internalization/evaluator-runner.d.ts +82 -2
  153. package/dist/runtime-v2/internalization/evaluator-runner.d.ts.map +1 -1
  154. package/dist/runtime-v2/internalization/evaluator-runner.js +496 -4
  155. package/dist/runtime-v2/internalization/evaluator-runner.js.map +1 -1
  156. package/dist/runtime-v2/internalization/index.d.ts +9 -4
  157. package/dist/runtime-v2/internalization/index.d.ts.map +1 -1
  158. package/dist/runtime-v2/internalization/index.js +6 -2
  159. package/dist/runtime-v2/internalization/index.js.map +1 -1
  160. package/dist/runtime-v2/internalization/peer-runner-contracts.d.ts +2 -0
  161. package/dist/runtime-v2/internalization/peer-runner-contracts.d.ts.map +1 -1
  162. package/dist/runtime-v2/internalization/peer-runner-contracts.js +19 -9
  163. package/dist/runtime-v2/internalization/peer-runner-contracts.js.map +1 -1
  164. package/dist/runtime-v2/internalization/pitask-metadata.d.ts +7 -0
  165. package/dist/runtime-v2/internalization/pitask-metadata.d.ts.map +1 -1
  166. package/dist/runtime-v2/internalization/pitask-metadata.js +15 -5
  167. package/dist/runtime-v2/internalization/pitask-metadata.js.map +1 -1
  168. package/dist/runtime-v2/internalization/prompt-serializer.d.ts +3 -0
  169. package/dist/runtime-v2/internalization/prompt-serializer.d.ts.map +1 -0
  170. package/dist/runtime-v2/internalization/prompt-serializer.js +22 -0
  171. package/dist/runtime-v2/internalization/prompt-serializer.js.map +1 -0
  172. package/dist/runtime-v2/proven-channel-baseline.d.ts.map +1 -1
  173. package/dist/runtime-v2/proven-channel-baseline.js +1 -0
  174. package/dist/runtime-v2/proven-channel-baseline.js.map +1 -1
  175. package/dist/runtime-v2/runner/__tests__/base-peer-runner-trust-boundary.test.js.map +1 -1
  176. package/dist/runtime-v2/runner/__tests__/pain-signal-bridge-admission.test.js.map +1 -1
  177. package/dist/runtime-v2/runner/__tests__/pain-signal-bridge.test.js.map +1 -1
  178. package/dist/runtime-v2/store/history/sqlite-history-query.test.js +1 -1
  179. package/dist/runtime-v2/store/history/sqlite-history-query.test.js.map +1 -1
  180. package/dist/runtime-v2/store/idempotent-transitions.test.js +0 -5
  181. package/dist/runtime-v2/store/idempotent-transitions.test.js.map +1 -1
  182. package/dist/runtime-v2/store/lifecycle/lease-manager.test.js +0 -2
  183. package/dist/runtime-v2/store/lifecycle/lease-manager.test.js.map +1 -1
  184. package/dist/runtime-v2/store/lifecycle/recovery-sweep.test.js +0 -2
  185. package/dist/runtime-v2/store/lifecycle/recovery-sweep.test.js.map +1 -1
  186. package/dist/runtime-v2/store/schema-conformance.test.js +0 -10
  187. package/dist/runtime-v2/store/schema-conformance.test.js.map +1 -1
  188. package/dist/runtime-v2/store/sqlite-connection-pragma.test.js +2 -2
  189. package/dist/runtime-v2/store/sqlite-connection-pragma.test.js.map +1 -1
  190. package/dist/runtime-v2/store/sqlite-run-store.test.js.map +1 -1
  191. package/dist/runtime-v2/store/sqlite-task-store.test.js +0 -2
  192. package/dist/runtime-v2/store/sqlite-task-store.test.js.map +1 -1
  193. package/dist/runtime-v2/store/trajectory/source-trace-locator.test.js +0 -1
  194. package/dist/runtime-v2/store/trajectory/source-trace-locator.test.js.map +1 -1
  195. package/dist/runtime-v2/store/trajectory/sqlite-trajectory-locator.test.js +1 -1
  196. package/dist/runtime-v2/store/trajectory/sqlite-trajectory-locator.test.js.map +1 -1
  197. package/dist/runtime-v2/store/workspace-isolation.test.js +0 -2
  198. package/dist/runtime-v2/store/workspace-isolation.test.js.map +1 -1
  199. package/dist/telemetry-event.d.ts +2 -2
  200. package/dist/telemetry-event.d.ts.map +1 -1
  201. package/dist/telemetry-event.js +21 -0
  202. package/dist/telemetry-event.js.map +1 -1
  203. package/package.json +1 -1
@@ -0,0 +1,815 @@
1
+ /**
2
+ * EvaluatorRunner V2 vertical-slice tests — adversarial sandbox replay
3
+ * (RuleHost MVP Activation, PRI-426, PRD Decision 11d).
4
+ *
5
+ * These tests pin the single-round adversarial replay contract that runs
6
+ * inside EvaluatorRunner.succeedTask after the principle artifact is written.
7
+ * They DO NOT exercise the multi-round orchestrator loop (Phase 7 / PRI-428).
8
+ *
9
+ * Scope of PRI-426:
10
+ * - V2 output (codeReview + adversarialCases) flows through succeedTask.
11
+ * - Passive review failing (any of 3 dimensions) → no adversarial replay.
12
+ * - Passive review passing + adversarialCases present → single
13
+ * evaluateRefinerRuleHostGate replay via injected gateDeps.
14
+ * - PRI-423 contract: the merged trace sent to the gate MUST contain ≥1
15
+ * positive case drawn from the Artificer golden trace. adversarialCases
16
+ * alone are all negative and would fail replay validation.
17
+ * - adversarialResult is populated; decision already reflects needs_revision
18
+ * when the LLM followed the passive-review short-circuit instruction.
19
+ *
20
+ * What is NOT in scope here (covered elsewhere):
21
+ * - rule artifact assembly (Phase 6 / PRI-427)
22
+ * - multi-round Artificer retry loop (Phase 7 / PRI-428)
23
+ *
24
+ * ERR considerations:
25
+ * - ERR-001 / ERR-005: V2 fields are detected via isEvaluatorOutputV2 after
26
+ * validate(); never `as`-cast.
27
+ * - ERR-069: every output-emitting path must emit a validated object.
28
+ * adversarialResult is built only from sandbox-returned failedCases with
29
+ * known fields; degraded paths must populate a reason, not silently skip.
30
+ * - ERR-018: gateDeps injection is the trust boundary — a throwing sandbox
31
+ * must degrade, not crash the runner.
32
+ */
33
+ import { describe, it, expect, vi } from 'vitest';
34
+ import { EvaluatorRunner } from '../internalization/evaluator-runner.js';
35
+ import { MemoryPIArtifactStore } from '../internalization/pi-artifact-store.js';
36
+ import { DefaultEvaluatorValidator } from '../internalization/evaluator-output.js';
37
+ import { createPITaskDiagnosticJson } from '../internalization/pitask-metadata.js';
38
+ // PRI-427: rule artifact assembly — verify the produced artifact satisfies
39
+ // the downstream RuleHostWriter.canActivate field contract.
40
+ import { RuleHostWriter } from '../activation/writers/rule-host-writer.js';
41
+ const ARTIFICER_TASK_ID = 'artificer-001';
42
+ const SCRIBE_TASK_ID = 'scribe-001';
43
+ const EVALUATOR_TASK_ID = 'evaluator-001';
44
+ // ── Task / artifact fixtures ──────────────────────────────────────────────────
45
+ function makeArtificerTask(overrides = {}) {
46
+ return {
47
+ taskId: ARTIFICER_TASK_ID,
48
+ taskKind: 'artificer',
49
+ status: 'succeeded',
50
+ attemptCount: 1,
51
+ maxAttempts: 3,
52
+ resultRef: 'artificer://run-001',
53
+ createdAt: new Date().toISOString(),
54
+ updatedAt: new Date().toISOString(),
55
+ diagnosticJson: createPITaskDiagnosticJson({
56
+ dependencyTaskIds: [],
57
+ channel: 'prompt',
58
+ timeoutMs: 300_000,
59
+ inputArtifactRefs: [],
60
+ outputArtifactRefs: [{ artifactType: 'principle', ref: 'pi-art-artificer-001-run-001' }],
61
+ }),
62
+ ...overrides,
63
+ };
64
+ }
65
+ function makeEvaluatorTask(overrides = {}) {
66
+ return {
67
+ taskId: EVALUATOR_TASK_ID,
68
+ taskKind: 'evaluator',
69
+ status: 'pending',
70
+ attemptCount: 0,
71
+ maxAttempts: 3,
72
+ createdAt: new Date().toISOString(),
73
+ updatedAt: new Date().toISOString(),
74
+ diagnosticJson: createPITaskDiagnosticJson({
75
+ dependencyTaskIds: [ARTIFICER_TASK_ID],
76
+ channel: 'prompt',
77
+ timeoutMs: 300_000,
78
+ inputArtifactRefs: [{ artifactType: 'principle', ref: 'pi-art-artificer-001-run-001' }],
79
+ outputArtifactRefs: [],
80
+ }),
81
+ ...overrides,
82
+ };
83
+ }
84
+ function makeScribeTask(overrides = {}) {
85
+ return {
86
+ taskId: SCRIBE_TASK_ID,
87
+ taskKind: 'scribe',
88
+ status: 'succeeded',
89
+ attemptCount: 1,
90
+ maxAttempts: 3,
91
+ resultRef: 'scribe://run-001',
92
+ createdAt: new Date().toISOString(),
93
+ updatedAt: new Date().toISOString(),
94
+ diagnosticJson: createPITaskDiagnosticJson({
95
+ dependencyTaskIds: [],
96
+ channel: 'prompt',
97
+ timeoutMs: 300_000,
98
+ inputArtifactRefs: [],
99
+ outputArtifactRefs: [{ artifactType: 'principle', ref: 'pi-art-scribe-001' }],
100
+ }),
101
+ ...overrides,
102
+ };
103
+ }
104
+ function makeScribeArtifact() {
105
+ return {
106
+ artifactId: 'pi-art-scribe-001',
107
+ artifactKind: 'principle',
108
+ sourceTaskId: SCRIBE_TASK_ID,
109
+ lineageArtifactIds: [],
110
+ validationStatus: 'pending',
111
+ contentJson: JSON.stringify({
112
+ principleDraft: {
113
+ title: 'Always validate async input',
114
+ statement: 'Every async function must validate its input before processing.',
115
+ },
116
+ generatedAt: new Date().toISOString(),
117
+ }),
118
+ createdAt: new Date().toISOString(),
119
+ updatedAt: new Date().toISOString(),
120
+ };
121
+ }
122
+ /**
123
+ * V1 artificer artifact (no implementationCode). Used as the "no golden trace
124
+ * cases" degradation fixture.
125
+ */
126
+ function makeV1ArtificerArtifact() {
127
+ return {
128
+ artifactId: 'pi-art-artificer-001-run-001',
129
+ artifactKind: 'principle',
130
+ sourceTaskId: ARTIFICER_TASK_ID,
131
+ lineageArtifactIds: [],
132
+ validationStatus: 'pending',
133
+ contentJson: JSON.stringify({
134
+ taskId: ARTIFICER_TASK_ID,
135
+ sourceScribeArtifactId: 'pi-art-scribe-001',
136
+ implementationPlan: {
137
+ summary: 'Add input validation to all async operations',
138
+ targetSurface: 'src/async-ops/*.ts',
139
+ changes: ['Add try-catch to asyncOp1'],
140
+ tests: ['Unit test for asyncOp1 error handling'],
141
+ rolloutNotes: ['Deploy behind feature flag'],
142
+ confidence: 0.85,
143
+ },
144
+ sourceTrace: { scribeArtifactId: 'pi-art-scribe-001' },
145
+ risks: [],
146
+ generatedAt: new Date().toISOString(),
147
+ }),
148
+ createdAt: new Date().toISOString(),
149
+ updatedAt: new Date().toISOString(),
150
+ };
151
+ }
152
+ /**
153
+ * V2 artificer artifact: implementationCode + goldenTraceCases (1 pos + 1 neg).
154
+ * The positive case is what PRI-426 merges into the adversarial trace.
155
+ */
156
+ function makeV2ArtificerArtifact() {
157
+ return {
158
+ artifactId: 'pi-art-artificer-001-run-001',
159
+ artifactKind: 'principle',
160
+ sourceTaskId: ARTIFICER_TASK_ID,
161
+ lineageArtifactIds: [],
162
+ validationStatus: 'pending',
163
+ contentJson: JSON.stringify({
164
+ taskId: ARTIFICER_TASK_ID,
165
+ sourceScribeArtifactId: 'pi-art-scribe-001',
166
+ implementationPlan: {
167
+ summary: 'Add input validation to all async operations',
168
+ targetSurface: 'src/async-ops/*.ts',
169
+ changes: ['Add try-catch to asyncOp1'],
170
+ tests: ['Unit test for asyncOp1 error handling'],
171
+ rolloutNotes: ['Deploy behind feature flag'],
172
+ confidence: 0.85,
173
+ },
174
+ // V2 fields:
175
+ implementationCode: 'function evaluate(input, helpers) { return { decision: "allow", matched: true, reason: "ok" }; }',
176
+ goldenTraceCases: [
177
+ {
178
+ caseId: 'artificer-positive-1',
179
+ kind: 'positive',
180
+ toolName: 'read_file',
181
+ params: { path: '/safe/path.txt' },
182
+ expectedDecision: 'allow',
183
+ },
184
+ {
185
+ caseId: 'artificer-negative-1',
186
+ kind: 'negative',
187
+ toolName: 'read_file',
188
+ params: { path: '/etc/passwd' },
189
+ expectedDecision: 'block',
190
+ },
191
+ ],
192
+ affectedTools: ['read_file'],
193
+ sourceTrace: { scribeArtifactId: 'pi-art-scribe-001' },
194
+ risks: [],
195
+ generatedAt: new Date().toISOString(),
196
+ }),
197
+ createdAt: new Date().toISOString(),
198
+ updatedAt: new Date().toISOString(),
199
+ };
200
+ }
201
+ // ── Evaluator V2 output fixtures ──────────────────────────────────────────────
202
+ function makePassingCodeReview() {
203
+ return {
204
+ intentConsistency: { aligned: true, explanation: 'Code matches principle intent.' },
205
+ scopePrecision: { verdict: 'precise', explanation: 'Matcher is exact.' },
206
+ traceCoverage: { sufficient: true, gaps: [], explanation: 'Covers both cases.' },
207
+ };
208
+ }
209
+ function makeAdversarialCases() {
210
+ // 3 attack types: boundary / omission / inversion — all negative expectation.
211
+ return [
212
+ {
213
+ caseId: 'adv-boundary-1',
214
+ attackType: 'boundary',
215
+ toolName: 'read_file',
216
+ params: { path: '/safe/../etc/passwd' },
217
+ expectedDecision: 'block',
218
+ rationale: 'Path traversal at the boundary of the matcher.',
219
+ },
220
+ {
221
+ caseId: 'adv-omission-1',
222
+ attackType: 'omission',
223
+ toolName: 'read_file',
224
+ params: { path: '' },
225
+ expectedDecision: 'block',
226
+ rationale: 'Empty path the matcher may have skipped.',
227
+ },
228
+ {
229
+ caseId: 'adv-inversion-1',
230
+ attackType: 'inversion',
231
+ toolName: 'read_file',
232
+ params: { path: '/safe/path.txt' },
233
+ expectedDecision: 'block',
234
+ rationale: 'Inverted positive case to check false-negative.',
235
+ },
236
+ ];
237
+ }
238
+ function makeEvaluatorV2Output(overrides = {}) {
239
+ return {
240
+ taskId: EVALUATOR_TASK_ID,
241
+ sourceArtificerArtifactId: 'pi-art-artificer-001-run-001',
242
+ evaluation: {
243
+ decision: 'approved',
244
+ summary: 'Code review passed and adversarial replay passed.',
245
+ score: 0.9,
246
+ strengths: ['Clear matcher', 'Good coverage'],
247
+ concerns: [],
248
+ requiredChanges: [],
249
+ },
250
+ sourceTrace: {
251
+ artificerArtifactId: 'pi-art-artificer-001-run-001',
252
+ scribeArtifactId: 'pi-art-scribe-001',
253
+ },
254
+ risks: [],
255
+ generatedAt: new Date().toISOString(),
256
+ codeReview: makePassingCodeReview(),
257
+ adversarialCases: makeAdversarialCases(),
258
+ ...overrides,
259
+ };
260
+ }
261
+ function createMockDeps(options = {}) {
262
+ const artifactStore = options.artifactStore ?? new MemoryPIArtifactStore();
263
+ const evaluatorTask = makeEvaluatorTask();
264
+ const artificerTask = makeArtificerTask();
265
+ const scribeTask = makeScribeTask();
266
+ const stateManager = {
267
+ acquireLease: vi.fn().mockResolvedValue(evaluatorTask),
268
+ getTask: vi.fn().mockImplementation((id) => {
269
+ if (id === EVALUATOR_TASK_ID)
270
+ return Promise.resolve(evaluatorTask);
271
+ if (id === ARTIFICER_TASK_ID)
272
+ return Promise.resolve(artificerTask);
273
+ if (id === SCRIBE_TASK_ID)
274
+ return Promise.resolve(scribeTask);
275
+ return Promise.resolve(null);
276
+ }),
277
+ getRunsByTask: vi.fn().mockResolvedValue([{
278
+ runId: 'run-evaluator-001',
279
+ taskId: EVALUATOR_TASK_ID,
280
+ runtimeKind: 'evaluator',
281
+ startedAt: new Date().toISOString(),
282
+ }]),
283
+ getValidRunsByTaskTolerant: vi.fn().mockResolvedValue({
284
+ runs: [{ runId: 'run-evaluator-001', taskId: EVALUATOR_TASK_ID, runtimeKind: 'evaluator', startedAt: new Date().toISOString() }],
285
+ degradedRuns: [],
286
+ }),
287
+ updateRunOutput: vi.fn().mockResolvedValue(undefined),
288
+ markTaskSucceeded: vi.fn().mockResolvedValue(undefined),
289
+ markTaskFailed: vi.fn().mockResolvedValue(undefined),
290
+ markTaskRetryWait: vi.fn().mockResolvedValue(undefined),
291
+ getRetryPolicy: vi.fn().mockReturnValue({ shouldRetry: () => false }),
292
+ };
293
+ const runHandle = { runId: 'run-evaluator-001', runtimeKind: 'test-double', startedAt: new Date().toISOString() };
294
+ const succeededStatus = { status: 'succeeded', runId: 'run-evaluator-001' };
295
+ const runtimeAdapter = {
296
+ startRun: vi.fn().mockResolvedValue(runHandle),
297
+ pollRun: vi.fn().mockResolvedValue(succeededStatus),
298
+ fetchOutput: vi.fn().mockResolvedValue({
299
+ payload: options.output ?? makeEvaluatorV2Output(),
300
+ }),
301
+ cancelRun: vi.fn().mockResolvedValue(undefined),
302
+ };
303
+ const eventEmitter = {
304
+ emitTelemetry: vi.fn(),
305
+ };
306
+ const validator = new DefaultEvaluatorValidator();
307
+ const deps = {
308
+ stateManager,
309
+ runtimeAdapter,
310
+ eventEmitter,
311
+ validator,
312
+ artifactStore,
313
+ };
314
+ return deps;
315
+ }
316
+ /** Build a runner, wiring gateDeps through the constructor options (PRI-426). */
317
+ function makeRunner(deps, gateDeps) {
318
+ return new EvaluatorRunner(deps, {
319
+ owner: 'test',
320
+ runtimeKind: 'evaluator',
321
+ pollIntervalMs: 10,
322
+ timeoutMs: 1000,
323
+ gateDeps,
324
+ });
325
+ }
326
+ // ── Helpers for inspecting the trace the gate received ────────────────────────
327
+ function makeRecordingGate(capture, result) {
328
+ return {
329
+ evaluateInSandbox: (code, goldenTrace) => {
330
+ capture.code = code;
331
+ capture.trace = goldenTrace;
332
+ return result.sandboxResult;
333
+ },
334
+ };
335
+ }
336
+ function sandboxResultSuccess() {
337
+ return { success: true, failedCases: [], executionTimeMs: 5, forbiddenPatternViolations: [] };
338
+ }
339
+ function sandboxResultValidationFailed(caseIds) {
340
+ return {
341
+ success: false,
342
+ failedCases: caseIds.map((caseId) => ({
343
+ caseId,
344
+ errorType: 'validation_failed',
345
+ message: `case ${caseId} produced the wrong decision`,
346
+ })),
347
+ executionTimeMs: 5,
348
+ forbiddenPatternViolations: [],
349
+ };
350
+ }
351
+ // ── Tests ─────────────────────────────────────────────────────────────────────
352
+ describe('EvaluatorRunner V2 — adversarial sandbox replay (PRI-426)', () => {
353
+ it('V1 output (no codeReview) does not invoke adversarial replay', async () => {
354
+ const store = new MemoryPIArtifactStore();
355
+ await store.upsertArtifact(makeV1ArtificerArtifact());
356
+ await store.upsertArtifact(makeScribeArtifact());
357
+ const gateSpy = vi.fn(() => sandboxResultSuccess());
358
+ const gateDeps = { evaluateInSandbox: gateSpy };
359
+ const deps = createMockDeps({ artifactStore: store });
360
+ // V1 output: no codeReview, no adversarialCases
361
+ const v1Output = {
362
+ taskId: EVALUATOR_TASK_ID,
363
+ sourceArtificerArtifactId: 'pi-art-artificer-001-run-001',
364
+ evaluation: {
365
+ decision: 'approved',
366
+ summary: 'V1 plan approved.',
367
+ score: 0.8,
368
+ strengths: [],
369
+ concerns: [],
370
+ requiredChanges: [],
371
+ },
372
+ sourceTrace: {
373
+ artificerArtifactId: 'pi-art-artificer-001-run-001',
374
+ scribeArtifactId: 'pi-art-scribe-001',
375
+ },
376
+ risks: [],
377
+ generatedAt: new Date().toISOString(),
378
+ };
379
+ deps.runtimeAdapter.fetchOutput = vi.fn().mockResolvedValue({ payload: v1Output });
380
+ const runner = makeRunner(deps, gateDeps);
381
+ const result = await runner.run(EVALUATOR_TASK_ID);
382
+ expect(result.status).toBe('succeeded');
383
+ expect(gateSpy).not.toHaveBeenCalled();
384
+ });
385
+ it('V2 output with failing passive review (intentConsistency) skips adversarial replay', async () => {
386
+ const store = new MemoryPIArtifactStore();
387
+ await store.upsertArtifact(makeV2ArtificerArtifact());
388
+ await store.upsertArtifact(makeScribeArtifact());
389
+ const gateSpy = vi.fn(() => sandboxResultSuccess());
390
+ const gateDeps = { evaluateInSandbox: gateSpy };
391
+ // Passive review fails on intentConsistency — LLM short-circuits and emits
392
+ // decision=needs_revision (per prompt instruction).
393
+ const output = makeEvaluatorV2Output({
394
+ evaluation: {
395
+ decision: 'needs_revision',
396
+ summary: 'intent mismatch',
397
+ score: 0.4,
398
+ strengths: [],
399
+ concerns: ['code does not match principle'],
400
+ requiredChanges: ['Rewrite matcher'],
401
+ },
402
+ codeReview: {
403
+ intentConsistency: { aligned: false, explanation: 'Matcher allows unsafe paths.' },
404
+ scopePrecision: { verdict: 'precise', explanation: 'ok' },
405
+ traceCoverage: { sufficient: true, gaps: [], explanation: 'ok' },
406
+ },
407
+ });
408
+ const deps = createMockDeps({ artifactStore: store, output });
409
+ const runner = makeRunner(deps, gateDeps);
410
+ const result = await runner.run(EVALUATOR_TASK_ID);
411
+ expect(result.status).toBe('succeeded');
412
+ expect(gateSpy).not.toHaveBeenCalled();
413
+ // No adversarialResult populated when replay was skipped.
414
+ const events = deps.eventEmitter.emitTelemetry.mock.calls.map((call) => call[0]);
415
+ const replayEvent = events.find((e) => e.eventType === 'evaluator_adversarial_replay');
416
+ expect(replayEvent).toBeUndefined();
417
+ });
418
+ it('V2 output: passive review passes + adversarial sandbox PASSES → adversarialResult.passed=true', async () => {
419
+ const store = new MemoryPIArtifactStore();
420
+ await store.upsertArtifact(makeV2ArtificerArtifact());
421
+ await store.upsertArtifact(makeScribeArtifact());
422
+ const capture = {};
423
+ const gateDeps = makeRecordingGate(capture, {
424
+ decision: 'accepted_shadow',
425
+ applicationMode: 'shadow',
426
+ sandboxResult: sandboxResultSuccess(),
427
+ reasons: [],
428
+ });
429
+ const deps = createMockDeps({ artifactStore: store });
430
+ const runner = makeRunner(deps, gateDeps);
431
+ const result = await runner.run(EVALUATOR_TASK_ID);
432
+ expect(result.status).toBe('succeeded');
433
+ expect(result.output).toBeDefined();
434
+ const v2 = result.output;
435
+ expect(v2.adversarialResult).toBeDefined();
436
+ expect(v2.adversarialResult?.passed).toBe(true);
437
+ expect(v2.adversarialResult?.failedCases).toHaveLength(0);
438
+ const persistedOutputs = deps.stateManager.updateRunOutput.mock.calls;
439
+ expect(persistedOutputs).toHaveLength(2);
440
+ expect(JSON.parse(persistedOutputs[1]?.[1])).toMatchObject({
441
+ adversarialResult: { passed: true, failedCases: [] },
442
+ });
443
+ });
444
+ it('adversarial replay telemetry uses the actual runtime runId', async () => {
445
+ const store = new MemoryPIArtifactStore();
446
+ await store.upsertArtifact(makeV1ArtificerArtifact());
447
+ await store.upsertArtifact(makeScribeArtifact());
448
+ const deps = createMockDeps({ artifactStore: store });
449
+ await makeRunner(deps, makeRecordingGate({}, {
450
+ decision: 'accepted_shadow',
451
+ applicationMode: 'shadow',
452
+ sandboxResult: sandboxResultSuccess(),
453
+ reasons: [],
454
+ })).run(EVALUATOR_TASK_ID);
455
+ const skipped = deps.eventEmitter.emitTelemetry.mock.calls
456
+ .map((call) => call[0])
457
+ .find((event) => event.eventType === 'evaluator_adversarial_replay_skipped');
458
+ expect(skipped?.payload.runId).toBe('run-evaluator-001');
459
+ });
460
+ it('PRI-423 contract: merged trace sent to gate contains ≥1 positive case from Artificer', async () => {
461
+ const store = new MemoryPIArtifactStore();
462
+ await store.upsertArtifact(makeV2ArtificerArtifact());
463
+ await store.upsertArtifact(makeScribeArtifact());
464
+ const capture = {};
465
+ const gateDeps = makeRecordingGate(capture, {
466
+ decision: 'accepted_shadow',
467
+ applicationMode: 'shadow',
468
+ sandboxResult: sandboxResultSuccess(),
469
+ reasons: [],
470
+ });
471
+ const deps = createMockDeps({ artifactStore: store });
472
+ const runner = makeRunner(deps, gateDeps);
473
+ await runner.run(EVALUATOR_TASK_ID);
474
+ expect(capture.trace).toBeDefined();
475
+ // capture.trace is narrowed to GoldenTrace by the assertion above.
476
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
477
+ const { cases } = capture.trace;
478
+ // PRI-423: adversarial cases are all negative. Without merging the Artificer
479
+ // positive case, the gate would receive 0 positives and the trace would
480
+ // fail validateGoldenTrace(). The runner must merge in the positive case.
481
+ const positives = cases.filter((c) => c.kind === 'positive');
482
+ expect(positives.length).toBeGreaterThanOrEqual(1);
483
+ // All adversarial cases preserved as negative.
484
+ const negatives = cases.filter((c) => c.kind === 'negative');
485
+ expect(negatives.length).toBe(3);
486
+ });
487
+ it('V2 output: adversarial sandbox FAILS (validation_failed) → adversarialResult.passed=false + failedCases', async () => {
488
+ const store = new MemoryPIArtifactStore();
489
+ await store.upsertArtifact(makeV2ArtificerArtifact());
490
+ await store.upsertArtifact(makeScribeArtifact());
491
+ const failingCaseIds = ['adv-boundary-1', 'adv-inversion-1'];
492
+ const gateDeps = makeRecordingGate({}, {
493
+ decision: 'rejected_validation_failed',
494
+ applicationMode: 'shadow',
495
+ sandboxResult: sandboxResultValidationFailed(failingCaseIds),
496
+ reasons: ['2 cases failed validation'],
497
+ });
498
+ const deps = createMockDeps({ artifactStore: store });
499
+ const runner = makeRunner(deps, gateDeps);
500
+ const result = await runner.run(EVALUATOR_TASK_ID);
501
+ expect(result.status).toBe('succeeded');
502
+ const v2 = result.output;
503
+ expect(v2.adversarialResult).toBeDefined();
504
+ expect(v2.adversarialResult?.passed).toBe(false);
505
+ expect(v2.adversarialResult?.failedCases.length).toBe(2);
506
+ const failedIds = v2.adversarialResult?.failedCases.map((c) => c.caseId);
507
+ expect(failedIds).toEqual(expect.arrayContaining(failingCaseIds));
508
+ });
509
+ it('V2 output: sandbox adapter THROWS → degrade with passed=false, runner does not crash (ERR-018)', async () => {
510
+ const store = new MemoryPIArtifactStore();
511
+ await store.upsertArtifact(makeV2ArtificerArtifact());
512
+ await store.upsertArtifact(makeScribeArtifact());
513
+ const gateDeps = {
514
+ evaluateInSandbox: () => {
515
+ throw new Error('sandbox VM crashed');
516
+ },
517
+ };
518
+ const deps = createMockDeps({ artifactStore: store });
519
+ const runner = makeRunner(deps, gateDeps);
520
+ const result = await runner.run(EVALUATOR_TASK_ID);
521
+ expect(result.status).toBe('succeeded');
522
+ const v2 = result.output;
523
+ expect(v2.adversarialResult).toBeDefined();
524
+ expect(v2.adversarialResult?.passed).toBe(false);
525
+ });
526
+ it('V2 output but Artificer artifact has no goldenTraceCases → degrade: skip replay with telemetry', async () => {
527
+ const store = new MemoryPIArtifactStore();
528
+ await store.upsertArtifact(makeV1ArtificerArtifact()); // V1 artificer, no golden trace
529
+ await store.upsertArtifact(makeScribeArtifact());
530
+ const gateSpy = vi.fn(() => sandboxResultSuccess());
531
+ const gateDeps = { evaluateInSandbox: gateSpy };
532
+ // Mismatched: evaluator emits V2 (codeReview + adversarialCases) but the
533
+ // artificer artifact in store is V1 (no goldenTraceCases). The runner must
534
+ // degrade gracefully — no positive case to merge → skip replay + emit
535
+ // telemetry with a reason, not crash.
536
+ const deps = createMockDeps({ artifactStore: store });
537
+ const runner = makeRunner(deps, gateDeps);
538
+ const result = await runner.run(EVALUATOR_TASK_ID);
539
+ expect(result.status).toBe('succeeded');
540
+ expect(gateSpy).not.toHaveBeenCalled();
541
+ const events = deps.eventEmitter.emitTelemetry.mock.calls.map((call) => call[0]);
542
+ const degraded = events.find((e) => e.eventType === 'evaluator_adversarial_replay_skipped');
543
+ expect(degraded).toBeDefined();
544
+ expect(typeof degraded?.payload?.reason).toBe('string');
545
+ });
546
+ it('emits evaluator_adversarial_replay telemetry with gate decision on each replay', async () => {
547
+ const store = new MemoryPIArtifactStore();
548
+ await store.upsertArtifact(makeV2ArtificerArtifact());
549
+ await store.upsertArtifact(makeScribeArtifact());
550
+ const gateDeps = makeRecordingGate({}, {
551
+ decision: 'accepted_shadow',
552
+ applicationMode: 'shadow',
553
+ sandboxResult: sandboxResultSuccess(),
554
+ reasons: [],
555
+ });
556
+ const deps = createMockDeps({ artifactStore: store });
557
+ const runner = makeRunner(deps, gateDeps);
558
+ await runner.run(EVALUATOR_TASK_ID);
559
+ const events = deps.eventEmitter.emitTelemetry.mock.calls.map((call) => call[0]);
560
+ const replayEvent = events.find((e) => e.eventType === 'evaluator_adversarial_replay');
561
+ expect(replayEvent).toBeDefined();
562
+ expect(replayEvent?.payload?.gateDecision).toBe('accepted_shadow');
563
+ });
564
+ });
565
+ // ── PRI-427: rule artifact assembly tests ────────────────────────────────────
566
+ /**
567
+ * Convert a PIArtifactRecord into the snapshot shape RuleHostWriter.canActivate
568
+ * reads. We don't activate (that needs the full ActivationContext); we only
569
+ * verify the artifact PASSES the canActivate field checks up to the gate.
570
+ */
571
+ function toSnapshot(record) {
572
+ return {
573
+ artifactId: record.artifactId,
574
+ artifactKind: record.artifactKind,
575
+ sourceTaskId: record.sourceTaskId,
576
+ sourceRuleId: record.sourceRuleId,
577
+ lineageArtifactIds: record.lineageArtifactIds,
578
+ validationStatus: record.validationStatus,
579
+ contentJson: record.contentJson,
580
+ createdAt: record.createdAt,
581
+ updatedAt: record.updatedAt,
582
+ };
583
+ }
584
+ describe('EvaluatorRunner V2 — rule artifact assembly (PRI-427)', () => {
585
+ it('adversarialResult.passed=true → writes rule artifact with artifactKind=rule + validated', async () => {
586
+ const store = new MemoryPIArtifactStore();
587
+ await store.upsertArtifact(makeV2ArtificerArtifact());
588
+ await store.upsertArtifact(makeScribeArtifact());
589
+ const gateDeps = makeRecordingGate({}, {
590
+ decision: 'accepted_shadow',
591
+ applicationMode: 'shadow',
592
+ sandboxResult: sandboxResultSuccess(),
593
+ reasons: [],
594
+ });
595
+ const deps = createMockDeps({ artifactStore: store });
596
+ const runner = makeRunner(deps, gateDeps);
597
+ const result = await runner.run(EVALUATOR_TASK_ID);
598
+ expect(result.status).toBe('succeeded');
599
+ // Evaluator task produces: 1 principle artifact + 1 rule artifact.
600
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
601
+ const ruleArtifacts = artifacts.filter((a) => a.artifactKind === 'rule');
602
+ expect(ruleArtifacts.length).toBe(1);
603
+ // length asserted above.
604
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
605
+ const ruleArtifact = ruleArtifacts[0];
606
+ expect(ruleArtifact.validationStatus).toBe('validated');
607
+ });
608
+ it('rule artifact contentJson carries implementationCode + goldenTrace + ruleHostGateDecision + affectedTools', async () => {
609
+ const store = new MemoryPIArtifactStore();
610
+ await store.upsertArtifact(makeV2ArtificerArtifact());
611
+ await store.upsertArtifact(makeScribeArtifact());
612
+ const gateDeps = makeRecordingGate({}, {
613
+ decision: 'accepted_shadow',
614
+ applicationMode: 'shadow',
615
+ sandboxResult: sandboxResultSuccess(),
616
+ reasons: [],
617
+ });
618
+ const deps = createMockDeps({ artifactStore: store });
619
+ const runner = makeRunner(deps, gateDeps);
620
+ await runner.run(EVALUATOR_TASK_ID);
621
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
622
+ const ruleArtifact = artifacts.find((a) => a.artifactKind === 'rule');
623
+ expect(ruleArtifact).toBeDefined();
624
+ // ruleArtifact is asserted defined above.
625
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
626
+ const parsed = JSON.parse(ruleArtifact.contentJson);
627
+ expect(typeof parsed.implementationCode).toBe('string');
628
+ expect(parsed.implementationCode.length).toBeGreaterThan(0);
629
+ expect(parsed.goldenTrace).toBeDefined();
630
+ expect(typeof parsed.goldenTrace.traceId).toBe('string');
631
+ expect(Array.isArray(parsed.goldenTrace.cases)).toBe(true);
632
+ expect(parsed.goldenTrace.cases.length).toBeGreaterThan(0);
633
+ expect(parsed.ruleHostGateDecision).toBe('accepted_shadow');
634
+ expect(Array.isArray(parsed.affectedTools)).toBe(true);
635
+ });
636
+ it('rule artifact goldenTrace is the Artificer full trace (pos+neg), NOT the adversarial-only trace', async () => {
637
+ const store = new MemoryPIArtifactStore();
638
+ await store.upsertArtifact(makeV2ArtificerArtifact());
639
+ await store.upsertArtifact(makeScribeArtifact());
640
+ const gateDeps = makeRecordingGate({}, {
641
+ decision: 'accepted_shadow',
642
+ applicationMode: 'shadow',
643
+ sandboxResult: sandboxResultSuccess(),
644
+ reasons: [],
645
+ });
646
+ const deps = createMockDeps({ artifactStore: store });
647
+ const runner = makeRunner(deps, gateDeps);
648
+ await runner.run(EVALUATOR_TASK_ID);
649
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
650
+ const ruleArtifact = artifacts.find((a) => a.artifactKind === 'rule');
651
+ expect(ruleArtifact).toBeDefined();
652
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
653
+ const parsed = JSON.parse(ruleArtifact.contentJson);
654
+ // The Artificer fixture has exactly: 1 positive + 1 negative case. The
655
+ // rule artifact must carry BOTH (the production trace for enforcement),
656
+ // not the adversarial replay trace (which had 3 negative cases).
657
+ const positives = parsed.goldenTrace.cases.filter((c) => c.kind === 'positive');
658
+ const negatives = parsed.goldenTrace.cases.filter((c) => c.kind === 'negative');
659
+ expect(positives.length).toBe(1);
660
+ expect(negatives.length).toBe(1);
661
+ // Should NOT contain adversarial caseIds.
662
+ const caseIds = parsed.goldenTrace.cases.map((c) => c.caseId);
663
+ expect(caseIds).not.toContain('adv-boundary-1');
664
+ });
665
+ it('adversarialResult.passed=false → does NOT write rule artifact (principle artifact only)', async () => {
666
+ const store = new MemoryPIArtifactStore();
667
+ await store.upsertArtifact(makeV2ArtificerArtifact());
668
+ await store.upsertArtifact(makeScribeArtifact());
669
+ const failingCaseIds = ['adv-boundary-1'];
670
+ const gateDeps = makeRecordingGate({}, {
671
+ decision: 'rejected_validation_failed',
672
+ applicationMode: 'shadow',
673
+ sandboxResult: sandboxResultValidationFailed(failingCaseIds),
674
+ reasons: ['validation failed'],
675
+ });
676
+ const deps = createMockDeps({ artifactStore: store });
677
+ const runner = makeRunner(deps, gateDeps);
678
+ const result = await runner.run(EVALUATOR_TASK_ID);
679
+ expect(result.status).toBe('succeeded');
680
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
681
+ const ruleArtifacts = artifacts.filter((a) => a.artifactKind === 'rule');
682
+ expect(ruleArtifacts.length).toBe(0);
683
+ // Principle artifact is still present (prompt-channel fallback).
684
+ const principleArtifacts = artifacts.filter((a) => a.artifactKind === 'principle');
685
+ expect(principleArtifacts.length).toBe(1);
686
+ });
687
+ it('V1 output (no codeReview) → no rule artifact written', async () => {
688
+ const store = new MemoryPIArtifactStore();
689
+ await store.upsertArtifact(makeV1ArtificerArtifact());
690
+ await store.upsertArtifact(makeScribeArtifact());
691
+ const gateDeps = { evaluateInSandbox: vi.fn(() => sandboxResultSuccess()) };
692
+ const deps = createMockDeps({ artifactStore: store });
693
+ const v1Output = {
694
+ taskId: EVALUATOR_TASK_ID,
695
+ sourceArtificerArtifactId: 'pi-art-artificer-001-run-001',
696
+ evaluation: {
697
+ decision: 'approved',
698
+ summary: 'V1 plan approved.',
699
+ score: 0.8,
700
+ strengths: [],
701
+ concerns: [],
702
+ requiredChanges: [],
703
+ },
704
+ sourceTrace: {
705
+ artificerArtifactId: 'pi-art-artificer-001-run-001',
706
+ scribeArtifactId: 'pi-art-scribe-001',
707
+ },
708
+ risks: [],
709
+ generatedAt: new Date().toISOString(),
710
+ };
711
+ deps.runtimeAdapter.fetchOutput = vi.fn().mockResolvedValue({ payload: v1Output });
712
+ const runner = makeRunner(deps, gateDeps);
713
+ await runner.run(EVALUATOR_TASK_ID);
714
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
715
+ const ruleArtifacts = artifacts.filter((a) => a.artifactKind === 'rule');
716
+ expect(ruleArtifacts.length).toBe(0);
717
+ });
718
+ it('rule artifact write failure does NOT crash the runner; principle artifact remains (degradation)', async () => {
719
+ const store = new MemoryPIArtifactStore();
720
+ await store.upsertArtifact(makeV2ArtificerArtifact());
721
+ await store.upsertArtifact(makeScribeArtifact());
722
+ // Wrap the store: allow principle artifact upsert + the adversarial re-
723
+ // persist, but make the SECOND upsert for artifactKind='rule' throw.
724
+ const realStore = store;
725
+ let _upsertCallCount = 0;
726
+ const failingStore = {
727
+ ...realStore,
728
+ upsertArtifact: vi.fn(async (record) => {
729
+ _upsertCallCount += 1;
730
+ // The rule artifact is written AFTER the principle artifact + the
731
+ // adversarial re-persist. It carries artifactKind='rule'.
732
+ if (record.artifactKind === 'rule') {
733
+ throw new Error('simulated rule artifact write failure');
734
+ }
735
+ return realStore.upsertArtifact(record);
736
+ }),
737
+ getArtifactById: realStore.getArtifactById.bind(realStore),
738
+ listBySourceTaskId: realStore.listBySourceTaskId.bind(realStore),
739
+ updateValidationStatus: realStore.updateValidationStatus.bind(realStore),
740
+ createArtifact: realStore.createArtifact.bind(realStore),
741
+ listLineage: realStore.listLineage.bind(realStore),
742
+ };
743
+ const gateDeps = makeRecordingGate({}, {
744
+ decision: 'accepted_shadow',
745
+ applicationMode: 'shadow',
746
+ sandboxResult: sandboxResultSuccess(),
747
+ reasons: [],
748
+ });
749
+ const deps = createMockDeps({ artifactStore: failingStore });
750
+ const runner = makeRunner(deps, gateDeps);
751
+ const result = await runner.run(EVALUATOR_TASK_ID);
752
+ // PRD Decision 5 degradation: assembly failure → principle artifact still
753
+ // written, prompt channel usable. Runner does NOT crash.
754
+ expect(result.status).toBe('succeeded');
755
+ const artifacts = await realStore.listBySourceTaskId(EVALUATOR_TASK_ID);
756
+ const principleArtifacts = artifacts.filter((a) => a.artifactKind === 'principle');
757
+ expect(principleArtifacts.length).toBe(1);
758
+ const ruleArtifacts = artifacts.filter((a) => a.artifactKind === 'rule');
759
+ expect(ruleArtifacts.length).toBe(0);
760
+ // Telemetry must record the assembly failure with a reason.
761
+ const events = deps.eventEmitter.emitTelemetry.mock.calls.map((call) => call[0]);
762
+ const asmFail = events.find((e) => e.eventType === 'evaluator_rule_assembly_failed');
763
+ expect(asmFail).toBeDefined();
764
+ expect(typeof asmFail?.payload?.reason).toBe('string');
765
+ });
766
+ it('produced rule artifact passes RuleHostWriter.canActivate field checks', async () => {
767
+ const store = new MemoryPIArtifactStore();
768
+ await store.upsertArtifact(makeV2ArtificerArtifact());
769
+ await store.upsertArtifact(makeScribeArtifact());
770
+ const gateDeps = makeRecordingGate({}, {
771
+ decision: 'accepted_shadow',
772
+ applicationMode: 'shadow',
773
+ sandboxResult: sandboxResultSuccess(),
774
+ reasons: [],
775
+ });
776
+ const deps = createMockDeps({ artifactStore: store });
777
+ const runner = makeRunner(deps, gateDeps);
778
+ await runner.run(EVALUATOR_TASK_ID);
779
+ const artifacts = await store.listBySourceTaskId(EVALUATOR_TASK_ID);
780
+ const ruleArtifact = artifacts.find((a) => a.artifactKind === 'rule');
781
+ expect(ruleArtifact).toBeDefined();
782
+ // ruleArtifact is asserted defined above.
783
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
784
+ const ruleArtifactRecord = ruleArtifact;
785
+ // RuleHostWriter.canActivate re-runs the gate; reuse the same gateDeps so
786
+ // the gate decision is consistent. The artifact must pass the kind/
787
+ // validationStatus/implementationCode/goldenTrace/ruleHostGateDecision
788
+ // checks AND the gate's own replay.
789
+ const writer = new RuleHostWriter({ gateDeps });
790
+ const snapshot = toSnapshot(ruleArtifactRecord);
791
+ const canActivateResult = await writer.canActivate(snapshot);
792
+ // canActivate returns { ok: true, riskLevel } when all field checks pass
793
+ // AND the gate re-replay accepts.
794
+ expect(canActivateResult.ok).toBe(true);
795
+ });
796
+ it('emits evaluator_rule_assembled telemetry on successful rule artifact write', async () => {
797
+ const store = new MemoryPIArtifactStore();
798
+ await store.upsertArtifact(makeV2ArtificerArtifact());
799
+ await store.upsertArtifact(makeScribeArtifact());
800
+ const gateDeps = makeRecordingGate({}, {
801
+ decision: 'accepted_shadow',
802
+ applicationMode: 'shadow',
803
+ sandboxResult: sandboxResultSuccess(),
804
+ reasons: [],
805
+ });
806
+ const deps = createMockDeps({ artifactStore: store });
807
+ const runner = makeRunner(deps, gateDeps);
808
+ await runner.run(EVALUATOR_TASK_ID);
809
+ const events = deps.eventEmitter.emitTelemetry.mock.calls.map((call) => call[0]);
810
+ const assembled = events.find((e) => e.eventType === 'evaluator_rule_assembled');
811
+ expect(assembled).toBeDefined();
812
+ expect(typeof assembled?.payload?.artifactId).toBe('string');
813
+ });
814
+ });
815
+ //# sourceMappingURL=evaluator-runner-vslice-v2.test.js.map