@exaudeus/workrail 3.10.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/application/services/validation-engine.js +134 -0
  2. package/dist/application/services/workflow-compiler.js +54 -0
  3. package/dist/manifest.json +138 -74
  4. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.d.ts +14 -0
  5. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.js +27 -0
  6. package/dist/mcp/handlers/v2-advance-core/assessment-validation.d.ts +16 -0
  7. package/dist/mcp/handlers/v2-advance-core/assessment-validation.js +213 -0
  8. package/dist/mcp/handlers/v2-advance-core/event-builders.d.ts +1 -0
  9. package/dist/mcp/handlers/v2-advance-core/event-builders.js +3 -2
  10. package/dist/mcp/handlers/v2-advance-core/index.js +23 -8
  11. package/dist/mcp/handlers/v2-advance-core/input-validation.d.ts +9 -1
  12. package/dist/mcp/handlers/v2-advance-core/input-validation.js +22 -2
  13. package/dist/mcp/handlers/v2-advance-core/outcome-blocked.d.ts +2 -0
  14. package/dist/mcp/handlers/v2-advance-core/outcome-blocked.js +69 -19
  15. package/dist/mcp/handlers/v2-advance-core/outcome-success.js +22 -0
  16. package/dist/mcp/handlers/v2-execution/replay.js +7 -0
  17. package/dist/mcp/output-schemas.d.ts +156 -42
  18. package/dist/mcp/output-schemas.js +15 -0
  19. package/dist/mcp/v2-response-formatter.js +7 -1
  20. package/dist/types/workflow-definition.d.ts +26 -0
  21. package/dist/types/workflow-definition.js +16 -1
  22. package/dist/v2/durable-core/constants.d.ts +2 -0
  23. package/dist/v2/durable-core/constants.js +2 -0
  24. package/dist/v2/durable-core/domain/assessment-consequence-event-builder.d.ts +23 -0
  25. package/dist/v2/durable-core/domain/assessment-consequence-event-builder.js +36 -0
  26. package/dist/v2/durable-core/domain/assessment-record.d.ts +12 -0
  27. package/dist/v2/durable-core/domain/assessment-record.js +2 -0
  28. package/dist/v2/durable-core/domain/assessment-recorded-event-builder.d.ts +22 -0
  29. package/dist/v2/durable-core/domain/assessment-recorded-event-builder.js +38 -0
  30. package/dist/v2/durable-core/domain/blocked-node-builder.d.ts +1 -1
  31. package/dist/v2/durable-core/domain/blocked-node-builder.js +8 -0
  32. package/dist/v2/durable-core/domain/blocking-decision.d.ts +6 -0
  33. package/dist/v2/durable-core/domain/blocking-decision.js +15 -0
  34. package/dist/v2/durable-core/domain/prompt-renderer.js +25 -1
  35. package/dist/v2/durable-core/domain/reason-model.d.ts +12 -2
  36. package/dist/v2/durable-core/domain/reason-model.js +27 -2
  37. package/dist/v2/durable-core/domain/risk-policy-guardrails.js +1 -0
  38. package/dist/v2/durable-core/domain/validation-criteria-validator.d.ts +1 -0
  39. package/dist/v2/durable-core/domain/validation-criteria-validator.js +11 -0
  40. package/dist/v2/durable-core/schemas/artifacts/assessment.d.ts +55 -0
  41. package/dist/v2/durable-core/schemas/artifacts/assessment.js +29 -0
  42. package/dist/v2/durable-core/schemas/artifacts/index.d.ts +2 -1
  43. package/dist/v2/durable-core/schemas/artifacts/index.js +8 -1
  44. package/dist/v2/durable-core/schemas/compiled-workflow/index.d.ts +24 -24
  45. package/dist/v2/durable-core/schemas/execution-snapshot/blocked-snapshot.d.ts +141 -21
  46. package/dist/v2/durable-core/schemas/execution-snapshot/blocked-snapshot.js +10 -1
  47. package/dist/v2/durable-core/schemas/execution-snapshot/execution-snapshot.v1.d.ts +729 -171
  48. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +1442 -202
  49. package/dist/v2/durable-core/schemas/session/events.d.ts +231 -8
  50. package/dist/v2/durable-core/schemas/session/events.js +36 -0
  51. package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
  52. package/dist/v2/projections/assessment-consequences.d.ts +19 -0
  53. package/dist/v2/projections/assessment-consequences.js +33 -0
  54. package/dist/v2/projections/assessments.d.ts +21 -0
  55. package/dist/v2/projections/assessments.js +35 -0
  56. package/package.json +1 -1
  57. package/spec/workflow.schema.json +110 -0
  58. package/workflows/bug-investigation.agentic.v2.json +28 -2
  59. package/workflows/test-artifact-loop-control.json +28 -2
@@ -31,6 +31,19 @@
31
31
  "TRIGGERS: WorkRail can only react to explicit outputs. Use structural fields like `contextUnknownCount`, `hypothesesConsideredCount`, `hasStrongAlternative`, `contradictionCount`, `unresolvedEvidenceGapCount`, and `diagnosisConfidenceBand`.",
32
32
  "BOUNDARY: this workflow investigates and proves root cause. It may describe high-level fix direction and likely files, but must not create implementation plans, patch sequencing, PR plans, or code-writing momentum."
33
33
  ],
34
+ "assessments": [
35
+ {
36
+ "id": "diagnosis_readiness_gate",
37
+ "purpose": "Assess whether the diagnosis is ready to hand off after validation.",
38
+ "dimensions": [
39
+ {
40
+ "id": "confidence",
41
+ "purpose": "How confident the agent is that the diagnosis is ready for final handoff.",
42
+ "levels": ["low", "high"]
43
+ }
44
+ ]
45
+ }
46
+ ],
34
47
  "steps": [
35
48
  {
36
49
  "id": "phase-0-triage-and-intake",
@@ -116,7 +129,20 @@
116
129
  {
117
130
  "id": "phase-5-diagnosis-validation",
118
131
  "title": "Phase 5: Diagnosis Validation Bundle",
119
- "prompt": "Stress-test the current diagnosis before handoff.\n\nSet `diagnosisConfidenceBand` using these rules:\n- High = all symptoms explained, no material contradictions, no unresolved evidence gaps\n- Medium = likely diagnosis, but one bounded uncertainty remains\n- Low = multiple viable explanations remain or contradictions are unresolved\n\nMode-adaptive validation:\n- QUICK: self-challenge; if `diagnosisConfidenceBand != High` or contradictions remain, optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge`\n- STANDARD: if delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and `routine-execution-simulation`\n- THOROUGH: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation`, and an additional `routine-hypothesis-challenge` pass focused on breaking the current diagnosis from a different angle\n\nParallel-output synthesis rules:\n- if 2+ validators raise serious concerns, reopen evidence or shortlist work\n- if exactly one validator raises a concern, investigate it before escalating\n- if no validator can materially break the diagnosis and `contradictionCount = 0`, proceed to handoff\n\nSet context variables:\n- `diagnosisConfidenceBand`\n- `validationFindingsCountBySeverity`\n- `validationSummary`\n\nBoundary rule:\n- allowed: high-level fix direction, likely files involved, verification recommendations\n- not allowed: implementation plan, patch sequencing, PR plan, or code-writing momentum",
132
+ "prompt": "Stress-test the current diagnosis before handoff.\n\nSet `diagnosisConfidenceBand` using these rules:\n- High = all symptoms explained, no material contradictions, no unresolved evidence gaps\n- Medium = likely diagnosis, but one bounded uncertainty remains\n- Low = multiple viable explanations remain or contradictions are unresolved\n\nMode-adaptive validation:\n- QUICK: self-challenge; if `diagnosisConfidenceBand != High` or contradictions remain, optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge`\n- STANDARD: if delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and `routine-execution-simulation`\n- THOROUGH: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation`, and an additional `routine-hypothesis-challenge` pass focused on breaking the current diagnosis from a different angle\n\nParallel-output synthesis rules:\n- if 2+ validators raise serious concerns, reopen evidence or shortlist work\n- if exactly one validator raises a concern, investigate it before escalating\n- if no validator can materially break the diagnosis and `contradictionCount = 0`, proceed to handoff\n\nAfter synthesizing the validation result, assess whether the diagnosis is ready for final handoff.\n\nSet context variables:\n- `diagnosisConfidenceBand`\n- `validationFindingsCountBySeverity`\n- `validationSummary`\n\nBoundary rule:\n- allowed: high-level fix direction, likely files involved, verification recommendations\n- not allowed: implementation plan, patch sequencing, PR plan, or code-writing momentum",
133
+ "assessmentRefs": ["diagnosis_readiness_gate"],
134
+ "assessmentConsequences": [
135
+ {
136
+ "when": {
137
+ "dimensionId": "confidence",
138
+ "equalsLevel": "low"
139
+ },
140
+ "effect": {
141
+ "kind": "require_followup",
142
+ "guidance": "Resolve the remaining diagnosis uncertainty, tighten the evidence summary, and retry this validation step before handing off."
143
+ }
144
+ }
145
+ ],
120
146
  "requireConfirmation": {
121
147
  "or": [
122
148
  { "var": "diagnosisConfidenceBand", "equals": "Low" },
@@ -131,4 +157,4 @@
131
157
  "requireConfirmation": true
132
158
  }
133
159
  ]
134
- }
160
+ }
@@ -53,8 +53,34 @@
53
53
  {
54
54
  "id": "complete",
55
55
  "title": "Complete",
56
- "prompt": "The iteration loop has completed. Summarize what was accomplished.",
57
- "requireConfirmation": false
56
+ "prompt": "The iteration loop has completed. Assess whether the loop result is ready to publish.\n\nProvide an assessment artifact for readiness.",
57
+ "requireConfirmation": false,
58
+ "assessmentRefs": ["readiness_gate"],
59
+ "assessmentConsequences": [
60
+ {
61
+ "when": {
62
+ "dimensionId": "confidence",
63
+ "equalsLevel": "low"
64
+ },
65
+ "effect": {
66
+ "kind": "require_followup",
67
+ "guidance": "Review the loop result one more time and confirm the outcome before completing this step."
68
+ }
69
+ }
70
+ ]
71
+ }
72
+ ],
73
+ "assessments": [
74
+ {
75
+ "id": "readiness_gate",
76
+ "purpose": "Assess whether the loop result is ready to publish.",
77
+ "dimensions": [
78
+ {
79
+ "id": "confidence",
80
+ "purpose": "How confident the agent is that the loop result is complete and correct.",
81
+ "levels": ["low", "high"]
82
+ }
83
+ ]
58
84
  }
59
85
  ]
60
86
  }