kairos-chain 3.12.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -0
- data/lib/kairos_mcp/version.rb +1 -1
- data/templates/skillsets/agent/config/agent.yml +14 -0
- data/templates/skillsets/agent/lib/agent/session.rb +31 -0
- data/templates/skillsets/agent/test/test_agent_complexity_review.rb +585 -0
- data/templates/skillsets/agent/tools/agent_step.rb +429 -1
- data/templates/skillsets/llm_client/lib/llm_client/claude_code_adapter.rb +24 -3
- data/templates/skillsets/llm_client/tools/llm_call.rb +6 -3
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9677cd39be103647a6f733640b46d4785fb7898bff420596ac9bf34880b5d9f7
|
|
4
|
+
data.tar.gz: daaff4ac3d451e272429787ccd1ffd0c22c3522c355a382f0129d89d806b39c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 73aa8e44ecb933877178888fe64c494a5a85544051aa68f58d7823e541322a53a9a39d73d907bd6c30974bb79a7b4ada52c31c77e0165d5a17ba71df7981e663
|
|
7
|
+
data.tar.gz: de7dc8b133bdc276781da26aa6fea468b26e7190af199cfaa4c0de2e966622cd171c040431c4baa7c0712e4728b4ffbe4fe8d661e594cf07c54c26b0c28b0b1d
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,51 @@ All notable changes to the `kairos-chain` gem will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
This project follows [Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [3.13.0] - 2026-04-02
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **Complexity-driven review for Agent auto mode** — New Gate 5.5a/b (pre-ACT) and
|
|
12
|
+
Gate 6.5 (post-ACT) in the Agent autonomous OODA loop.
|
|
13
|
+
- **Structural complexity assessment** with 7 signals: `high_risk`, `many_steps`,
|
|
14
|
+
`design_scope`, `l0_change`, `core_files`, `multi_file`, `state_mutation`
|
|
15
|
+
- **LLM self-assessment merge**: DECIDE prompt requests `complexity_hint`; merge rule
|
|
16
|
+
caps LLM at structural + 1 level (prevents over-reporting)
|
|
17
|
+
- **Gate 5.5a**: L0 changes always checkpoint with multi-LLM review prompt generation
|
|
18
|
+
- **Gate 5.5b**: High complexity triggers Persona Assembly review (inner retry loop
|
|
19
|
+
with max re-DECIDE attempts, risk/loop/complexity re-checks per revision)
|
|
20
|
+
- **Gate 6.5**: Medium complexity runs post-ACT lightweight advisory review
|
|
21
|
+
- Low complexity: no overhead (unchanged flow)
|
|
22
|
+
- Parse failures default to REVISE (never silent APPROVE)
|
|
23
|
+
- Persona definitions loaded from L1 knowledge with hardcoded fallback
|
|
24
|
+
- Configuration: `complexity_review` section in `agent.yml` (personas, retries,
|
|
25
|
+
L0 checkpoint policy, post-ACT toggle)
|
|
26
|
+
- New `review` phase config in `agent.yml` (max_llm_calls, max_tool_calls)
|
|
27
|
+
- Session: `save_review_result`, `load_review_result`, `save_progress_amendment`
|
|
28
|
+
- Tests: 37 new (complexity assessment, persona review parsing, config, session, prompts)
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
|
|
32
|
+
- **`llm_call.rb` eager adapter loading** — All provider adapters were unconditionally
|
|
33
|
+
required at startup, crashing with `LoadError` when optional gems (`faraday`,
|
|
34
|
+
`aws-sdk`) were not installed. Now lazy-loads adapters in `build_adapter()`;
|
|
35
|
+
only `claude_code_adapter` and base modules loaded at startup.
|
|
36
|
+
- **`claude_code_adapter` recursive MCP server loading** — `claude -p` subprocess
|
|
37
|
+
loaded `.mcp.json` and spawned additional MCP server instances, causing deadlocks
|
|
38
|
+
(stdio) or port conflicts (HTTP). Fixed with `--mcp-config '{"mcpServers":{}}'`
|
|
39
|
+
and `--no-session-persistence`.
|
|
40
|
+
- **`claude_code_adapter` missing timeout** — `Open3.capture3` had no timeout,
|
|
41
|
+
risking indefinite hangs. Wrapped with `Timeout.timeout` (default 120s,
|
|
42
|
+
configurable via `timeout_seconds` in `llm_client.yml`).
|
|
43
|
+
|
|
44
|
+
### Design Process
|
|
45
|
+
|
|
46
|
+
- Complexity review design: 2R x 2 LLMs (Claude Team, Cursor Composer) → APPROVED
|
|
47
|
+
- Complexity review impl: R1 x 3 LLMs (Claude Team, Cursor, Codex) → fixes applied
|
|
48
|
+
- Codex found off-by-one in retry counter and cycle number (both fixed)
|
|
49
|
+
- R2 (Claude Team) → APPROVED
|
|
50
|
+
- llm_client fixes: reported by SUSHI self-maintenance MCP project, verified in upstream
|
|
51
|
+
|
|
7
52
|
## [3.12.0] - 2026-04-02
|
|
8
53
|
|
|
9
54
|
### Added
|
data/lib/kairos_mcp/version.rb
CHANGED
|
@@ -17,6 +17,10 @@ phases:
|
|
|
17
17
|
reflect:
|
|
18
18
|
max_llm_calls: 3
|
|
19
19
|
max_tool_calls: 0
|
|
20
|
+
review:
|
|
21
|
+
max_llm_calls: 5
|
|
22
|
+
max_tool_calls: 0
|
|
23
|
+
max_repair_attempts: 1
|
|
20
24
|
|
|
21
25
|
# Default policy
|
|
22
26
|
tool_blacklist:
|
|
@@ -62,5 +66,15 @@ agent_execute:
|
|
|
62
66
|
default_budget_usd: 0.50
|
|
63
67
|
max_budget_usd: 2.00
|
|
64
68
|
|
|
69
|
+
# Complexity-driven review (Gate 5.5 / Gate 6.5)
|
|
70
|
+
complexity_review:
|
|
71
|
+
enabled: true
|
|
72
|
+
personas: [pragmatic, skeptic] # default personas for review
|
|
73
|
+
high_personas: [kairos, pragmatic, skeptic] # personas for high complexity
|
|
74
|
+
max_review_retries: 2 # max re-DECIDE from review feedback
|
|
75
|
+
# review_budget_llm is controlled by phases.review.max_llm_calls (above)
|
|
76
|
+
l0_always_checkpoint: true # L0 changes always pause for human
|
|
77
|
+
post_act_review: true # enable medium-complexity post-ACT review
|
|
78
|
+
|
|
65
79
|
# Audit
|
|
66
80
|
audit_level: summary
|
|
@@ -161,8 +161,39 @@ module KairosMcp
|
|
|
161
161
|
end
|
|
162
162
|
end
|
|
163
163
|
|
|
164
|
+
# Save persona review result for audit trail.
|
|
165
|
+
def save_review_result(review)
|
|
166
|
+
File.write(review_path, JSON.pretty_generate(review))
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Load the last persona review result.
|
|
170
|
+
def load_review_result
|
|
171
|
+
return nil unless File.exist?(review_path)
|
|
172
|
+
JSON.parse(File.read(review_path), symbolize_names: true)
|
|
173
|
+
rescue JSON::ParserError
|
|
174
|
+
nil
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Append review concerns as a progress amendment entry.
|
|
178
|
+
# Append review concerns as a progress amendment entry.
|
|
179
|
+
# Called after run_act_reflect_internal which already incremented cycle_number,
|
|
180
|
+
# so @cycle_number is the current (post-increment) cycle.
|
|
181
|
+
def save_progress_amendment(concerns)
|
|
182
|
+
entry = {
|
|
183
|
+
'cycle' => @cycle_number,
|
|
184
|
+
'timestamp' => Time.now.utc.iso8601,
|
|
185
|
+
'type' => 'review_amendment',
|
|
186
|
+
'concerns' => concerns
|
|
187
|
+
}
|
|
188
|
+
File.open(progress_path, 'a') { |f| f.puts(JSON.generate(entry)) }
|
|
189
|
+
end
|
|
190
|
+
|
|
164
191
|
private
|
|
165
192
|
|
|
193
|
+
def review_path
|
|
194
|
+
File.join(session_dir, 'last_review.json')
|
|
195
|
+
end
|
|
196
|
+
|
|
166
197
|
def session_dir
|
|
167
198
|
dir = self.class.storage_path("agent_sessions/#{@session_id}")
|
|
168
199
|
FileUtils.mkdir_p(dir)
|
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Test suite for Complexity-Driven Review Integration
|
|
5
|
+
# Tests: M1 (complexity assessment), M2 (persona review), M3 (autonomous loop), M4 (config)
|
|
6
|
+
# Usage: ruby test_agent_complexity_review.rb
|
|
7
|
+
|
|
8
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __dir__)
|
|
9
|
+
$LOAD_PATH.unshift File.expand_path('../../../../lib', __dir__)
|
|
10
|
+
|
|
11
|
+
require 'json'
|
|
12
|
+
require 'yaml'
|
|
13
|
+
require 'fileutils'
|
|
14
|
+
require 'tmpdir'
|
|
15
|
+
require 'digest'
|
|
16
|
+
require 'time'
|
|
17
|
+
require 'kairos_mcp/invocation_context'
|
|
18
|
+
require 'kairos_mcp/tools/base_tool'
|
|
19
|
+
require 'kairos_mcp/tool_registry'
|
|
20
|
+
require_relative '../lib/agent'
|
|
21
|
+
require_relative '../tools/agent_start'
|
|
22
|
+
require_relative '../tools/agent_step'
|
|
23
|
+
require_relative '../tools/agent_status'
|
|
24
|
+
require_relative '../tools/agent_stop'
|
|
25
|
+
|
|
26
|
+
$pass = 0
|
|
27
|
+
$fail = 0
|
|
28
|
+
|
|
29
|
+
def assert(description, &block)
|
|
30
|
+
result = block.call
|
|
31
|
+
if result
|
|
32
|
+
$pass += 1
|
|
33
|
+
puts " PASS: #{description}"
|
|
34
|
+
else
|
|
35
|
+
$fail += 1
|
|
36
|
+
puts " FAIL: #{description}"
|
|
37
|
+
end
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
$fail += 1
|
|
40
|
+
puts " FAIL: #{description} (#{e.class}: #{e.message})"
|
|
41
|
+
puts " #{e.backtrace.first(3).join("\n ")}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def section(title)
|
|
45
|
+
puts "\n#{'=' * 60}"
|
|
46
|
+
puts "TEST: #{title}"
|
|
47
|
+
puts '=' * 60
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# ---- Test infrastructure ----
|
|
51
|
+
|
|
52
|
+
TMPDIR = Dir.mktmpdir('agent_complexity_test')
|
|
53
|
+
|
|
54
|
+
module Autonomos
|
|
55
|
+
@storage_base = TMPDIR
|
|
56
|
+
|
|
57
|
+
def self.storage_path(subpath)
|
|
58
|
+
path = File.join(@storage_base, subpath)
|
|
59
|
+
FileUtils.mkdir_p(path)
|
|
60
|
+
path
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.config
|
|
64
|
+
{}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
module Ooda
|
|
68
|
+
COMPLEX_KEYWORDS = /\b(architect|design|refactor|migrat|restructur|integrat|security|auth)/i
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
require File.expand_path('../../../../.kairos/skillsets/autonomos/lib/autonomos/mandate',
|
|
73
|
+
File.dirname(__dir__))
|
|
74
|
+
|
|
75
|
+
Session = KairosMcp::SkillSets::Agent::Session
|
|
76
|
+
AgentStep = KairosMcp::SkillSets::Agent::Tools::AgentStep
|
|
77
|
+
MandateAdapter = KairosMcp::SkillSets::Agent::MandateAdapter
|
|
78
|
+
|
|
79
|
+
module Autoexec
|
|
80
|
+
class TaskDsl
|
|
81
|
+
def self.from_json(json_str)
|
|
82
|
+
parsed = JSON.parse(json_str)
|
|
83
|
+
raise ArgumentError, "Missing task_id" unless parsed['task_id']
|
|
84
|
+
raise ArgumentError, "Missing steps" unless parsed['steps'].is_a?(Array)
|
|
85
|
+
parsed
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# ---- Mock tools ----
|
|
91
|
+
|
|
92
|
+
class MockLlmCall < KairosMcp::Tools::BaseTool
|
|
93
|
+
@@responses = []
|
|
94
|
+
def self.queue_response(r); @@responses << r; end
|
|
95
|
+
def self.clear!; @@responses.clear; end
|
|
96
|
+
def name; 'llm_call'; end
|
|
97
|
+
def description; 'mock'; end
|
|
98
|
+
def input_schema; { type: 'object', properties: {} }; end
|
|
99
|
+
def call(arguments)
|
|
100
|
+
resp = @@responses.shift || { 'content' => 'default', 'tool_use' => nil, 'stop_reason' => 'end_turn' }
|
|
101
|
+
text_content(JSON.generate({
|
|
102
|
+
'status' => 'ok', 'provider' => 'mock', 'model' => 'mock-1',
|
|
103
|
+
'response' => resp, 'usage' => { 'input_tokens' => 10, 'output_tokens' => 20 },
|
|
104
|
+
'snapshot' => { 'model' => 'mock-1', 'timestamp' => Time.now.iso8601 }
|
|
105
|
+
}))
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class MockKnowledgeGet < KairosMcp::Tools::BaseTool
|
|
110
|
+
def name; 'knowledge_get'; end
|
|
111
|
+
def description; 'mock'; end
|
|
112
|
+
def input_schema; { type: 'object', properties: {} }; end
|
|
113
|
+
def call(arguments)
|
|
114
|
+
text_content(JSON.generate({ 'name' => arguments['name'], 'content' => 'mock persona content' }))
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
class MockAutoexecPlan < KairosMcp::Tools::BaseTool
|
|
119
|
+
def name; 'autoexec_plan'; end
|
|
120
|
+
def description; 'mock'; end
|
|
121
|
+
def input_schema; { type: 'object', properties: {} }; end
|
|
122
|
+
def call(arguments)
|
|
123
|
+
task_json = JSON.parse(arguments['task_json'])
|
|
124
|
+
text_content(JSON.generate({
|
|
125
|
+
'status' => 'ok', 'task_id' => task_json['task_id'] || 'mock_001',
|
|
126
|
+
'plan_hash' => Digest::SHA256.hexdigest(arguments['task_json'])[0..15]
|
|
127
|
+
}))
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class MockAutoexecRun < KairosMcp::Tools::BaseTool
|
|
132
|
+
def name; 'autoexec_run'; end
|
|
133
|
+
def description; 'mock'; end
|
|
134
|
+
def input_schema; { type: 'object', properties: {} }; end
|
|
135
|
+
def call(arguments)
|
|
136
|
+
text_content(JSON.generate({ 'status' => 'ok', 'outcome' => 'step_complete' }))
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def build_registry
|
|
141
|
+
registry = KairosMcp::ToolRegistry.allocate
|
|
142
|
+
registry.instance_variable_set(:@safety, KairosMcp::Safety.new)
|
|
143
|
+
registry.instance_variable_set(:@tools, {})
|
|
144
|
+
KairosMcp::ToolRegistry.clear_gates!
|
|
145
|
+
tools = {
|
|
146
|
+
'llm_call' => MockLlmCall.new(nil, registry: registry),
|
|
147
|
+
'knowledge_get' => MockKnowledgeGet.new(nil, registry: registry),
|
|
148
|
+
'autoexec_plan' => MockAutoexecPlan.new(nil, registry: registry),
|
|
149
|
+
'autoexec_run' => MockAutoexecRun.new(nil, registry: registry),
|
|
150
|
+
'agent_start' => KairosMcp::SkillSets::Agent::Tools::AgentStart.new(nil, registry: registry),
|
|
151
|
+
'agent_step' => AgentStep.new(nil, registry: registry),
|
|
152
|
+
'agent_status' => KairosMcp::SkillSets::Agent::Tools::AgentStatus.new(nil, registry: registry),
|
|
153
|
+
'agent_stop' => KairosMcp::SkillSets::Agent::Tools::AgentStop.new(nil, registry: registry)
|
|
154
|
+
}
|
|
155
|
+
registry.instance_variable_set(:@tools, tools)
|
|
156
|
+
registry
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Helper to get AgentStep instance for testing private methods
|
|
160
|
+
def build_step_tool
|
|
161
|
+
registry = build_registry
|
|
162
|
+
registry.instance_variable_get(:@tools)['agent_step']
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# ---- Decision payload factories ----
|
|
166
|
+
|
|
167
|
+
def low_complexity_payload
|
|
168
|
+
{
|
|
169
|
+
'summary' => 'Update readme file',
|
|
170
|
+
'task_json' => {
|
|
171
|
+
'task_id' => 'test_001', 'meta' => { 'description' => 'test', 'risk_default' => 'low' },
|
|
172
|
+
'steps' => [
|
|
173
|
+
{ 'step_id' => 's1', 'action' => 'edit file', 'tool_name' => 'Edit',
|
|
174
|
+
'tool_arguments' => { 'file_path' => '/tmp/readme.md' }, 'risk' => 'low',
|
|
175
|
+
'depends_on' => [], 'requires_human_cognition' => false }
|
|
176
|
+
]
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def medium_complexity_payload
|
|
182
|
+
{
|
|
183
|
+
'summary' => 'Add logging to API handler',
|
|
184
|
+
'task_json' => {
|
|
185
|
+
'task_id' => 'test_002', 'meta' => { 'description' => 'test', 'risk_default' => 'high' },
|
|
186
|
+
'steps' => [
|
|
187
|
+
{ 'step_id' => 's1', 'action' => 'modify handler', 'tool_name' => 'Edit',
|
|
188
|
+
'tool_arguments' => { 'file_path' => '/tmp/handler.rb' }, 'risk' => 'high',
|
|
189
|
+
'depends_on' => [], 'requires_human_cognition' => false }
|
|
190
|
+
]
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def high_complexity_payload
|
|
196
|
+
{
|
|
197
|
+
'summary' => 'Refactor authentication architecture',
|
|
198
|
+
'task_json' => {
|
|
199
|
+
'task_id' => 'test_003', 'meta' => { 'description' => 'test', 'risk_default' => 'high' },
|
|
200
|
+
'steps' => [
|
|
201
|
+
{ 'step_id' => 's1', 'action' => 'modify auth', 'tool_name' => 'Edit',
|
|
202
|
+
'tool_arguments' => { 'file_path' => '/tmp/auth.rb' }, 'risk' => 'high',
|
|
203
|
+
'depends_on' => [], 'requires_human_cognition' => false },
|
|
204
|
+
{ 'step_id' => 's2', 'action' => 'update config', 'tool_name' => 'Write',
|
|
205
|
+
'tool_arguments' => { 'file_path' => '/tmp/config.yml' }, 'risk' => 'medium',
|
|
206
|
+
'depends_on' => ['s1'], 'requires_human_cognition' => false }
|
|
207
|
+
]
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def l0_change_payload
|
|
213
|
+
{
|
|
214
|
+
'summary' => 'Update skill definitions',
|
|
215
|
+
'task_json' => {
|
|
216
|
+
'task_id' => 'test_004', 'meta' => { 'description' => 'test', 'risk_default' => 'low' },
|
|
217
|
+
'steps' => [
|
|
218
|
+
{ 'step_id' => 's1', 'action' => 'evolve skill', 'tool_name' => 'skills_evolve',
|
|
219
|
+
'tool_arguments' => { 'name' => 'test_skill' }, 'risk' => 'low',
|
|
220
|
+
'depends_on' => [], 'requires_human_cognition' => false }
|
|
221
|
+
]
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def multi_file_payload
|
|
227
|
+
{
|
|
228
|
+
'summary' => 'Update multiple modules',
|
|
229
|
+
'task_json' => {
|
|
230
|
+
'task_id' => 'test_005', 'meta' => { 'description' => 'test', 'risk_default' => 'low' },
|
|
231
|
+
'steps' => (1..5).map { |i|
|
|
232
|
+
{ 'step_id' => "s#{i}", 'action' => 'edit', 'tool_name' => 'Edit',
|
|
233
|
+
'tool_arguments' => { 'file_path' => "/tmp/file#{i}.rb" }, 'risk' => 'low',
|
|
234
|
+
'depends_on' => [], 'requires_human_cognition' => false }
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def state_mutation_payload
|
|
241
|
+
{
|
|
242
|
+
'summary' => 'Record knowledge update',
|
|
243
|
+
'task_json' => {
|
|
244
|
+
'task_id' => 'test_006', 'meta' => { 'description' => 'test', 'risk_default' => 'low' },
|
|
245
|
+
'steps' => [
|
|
246
|
+
{ 'step_id' => 's1', 'action' => 'update knowledge', 'tool_name' => 'knowledge_update',
|
|
247
|
+
'tool_arguments' => { 'name' => 'test' }, 'risk' => 'low',
|
|
248
|
+
'depends_on' => [], 'requires_human_cognition' => false }
|
|
249
|
+
]
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# ============================================================
|
|
255
|
+
# M1: Complexity Assessment
|
|
256
|
+
# ============================================================
|
|
257
|
+
|
|
258
|
+
section "M1: Complexity Assessment"
|
|
259
|
+
|
|
260
|
+
step = build_step_tool
|
|
261
|
+
|
|
262
|
+
assert "test_low_complexity: single low-risk step → level 'low'" do
|
|
263
|
+
result = step.send(:assess_decision_complexity, low_complexity_payload)
|
|
264
|
+
result[:level] == 'low' && result[:signals].empty?
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
assert "test_medium_complexity_risk: one high-risk step → level 'medium'" do
|
|
268
|
+
result = step.send(:assess_decision_complexity, medium_complexity_payload)
|
|
269
|
+
result[:level] == 'medium' && result[:signals] == ['high_risk']
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
assert "test_high_complexity: high_risk + design_scope → level 'high'" do
|
|
273
|
+
result = step.send(:assess_decision_complexity, high_complexity_payload)
|
|
274
|
+
result[:level] == 'high' &&
|
|
275
|
+
result[:signals].include?('high_risk') &&
|
|
276
|
+
result[:signals].include?('design_scope')
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
assert "test_l0_forces_high: single l0_change → level 'high' (not medium)" do
|
|
280
|
+
result = step.send(:assess_decision_complexity, l0_change_payload)
|
|
281
|
+
result[:level] == 'high' && result[:signals] == ['l0_change']
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
assert "test_multi_file_signal: 4+ distinct file paths → 'multi_file' signal" do
|
|
285
|
+
result = step.send(:assess_decision_complexity, multi_file_payload)
|
|
286
|
+
result[:signals].include?('multi_file')
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
assert "test_state_mutation_signal: knowledge_update tool → 'state_mutation' signal" do
|
|
290
|
+
result = step.send(:assess_decision_complexity, state_mutation_payload)
|
|
291
|
+
result[:signals].include?('state_mutation')
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
assert "test_many_steps_signal: >5 steps → 'many_steps' signal" do
|
|
295
|
+
payload = {
|
|
296
|
+
'summary' => 'Big task',
|
|
297
|
+
'task_json' => {
|
|
298
|
+
'task_id' => 'test_007', 'meta' => {},
|
|
299
|
+
'steps' => (1..7).map { |i|
|
|
300
|
+
{ 'step_id' => "s#{i}", 'action' => 'do', 'tool_name' => 'Read',
|
|
301
|
+
'tool_arguments' => {}, 'risk' => 'low', 'depends_on' => [],
|
|
302
|
+
'requires_human_cognition' => false }
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
result = step.send(:assess_decision_complexity, payload)
|
|
307
|
+
result[:signals].include?('many_steps')
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
assert "test_core_files_signal: kairos lib path → 'core_files' signal" do
|
|
311
|
+
payload = {
|
|
312
|
+
'summary' => 'Fix bug',
|
|
313
|
+
'task_json' => {
|
|
314
|
+
'task_id' => 'test_008', 'meta' => {},
|
|
315
|
+
'steps' => [
|
|
316
|
+
{ 'step_id' => 's1', 'action' => 'fix', 'tool_name' => 'Edit',
|
|
317
|
+
'tool_arguments' => { 'file_path' => '/project/kairos_mcp/lib/kairos_mcp/chain.rb' },
|
|
318
|
+
'risk' => 'low', 'depends_on' => [], 'requires_human_cognition' => false }
|
|
319
|
+
]
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
result = step.send(:assess_decision_complexity, payload)
|
|
323
|
+
result[:signals].include?('core_files')
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
assert "test_nil_task_json: missing task_json → low complexity, no crash" do
|
|
327
|
+
payload = { 'summary' => 'nothing' }
|
|
328
|
+
result = step.send(:assess_decision_complexity, payload)
|
|
329
|
+
result[:level] == 'low' && result[:signals].empty?
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# ---- Merge complexity ----
|
|
333
|
+
|
|
334
|
+
assert "test_merge_llm_structural: LLM high + structural low → final medium (capped +1)" do
|
|
335
|
+
structural = { level: 'low', signals: [] }
|
|
336
|
+
llm_hint = { 'level' => 'high', 'signals' => ['semantic_complexity'] }
|
|
337
|
+
result = step.send(:merge_complexity, structural, llm_hint)
|
|
338
|
+
result[:level] == 'medium'
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
assert "test_merge_llm_cannot_lower: LLM low + structural high → final high" do
|
|
342
|
+
structural = { level: 'high', signals: ['high_risk', 'design_scope'] }
|
|
343
|
+
llm_hint = { 'level' => 'low', 'signals' => [] }
|
|
344
|
+
result = step.send(:merge_complexity, structural, llm_hint)
|
|
345
|
+
result[:level] == 'high'
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
assert "test_merge_llm_same_level: LLM medium + structural medium → final medium" do
|
|
349
|
+
structural = { level: 'medium', signals: ['high_risk'] }
|
|
350
|
+
llm_hint = { 'level' => 'medium', 'signals' => ['moderate_scope'] }
|
|
351
|
+
result = step.send(:merge_complexity, structural, llm_hint)
|
|
352
|
+
result[:level] == 'medium' && result[:signals].include?('moderate_scope')
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
assert "test_merge_nil_hint: nil LLM hint → structural unchanged" do
|
|
356
|
+
structural = { level: 'medium', signals: ['high_risk'] }
|
|
357
|
+
result = step.send(:merge_complexity, structural, nil)
|
|
358
|
+
result[:level] == 'medium'
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
assert "test_merge_symbol_keys: symbol-key llm_hint works" do
|
|
362
|
+
structural = { level: 'low', signals: [] }
|
|
363
|
+
llm_hint = { level: 'high', signals: ['deep'] }
|
|
364
|
+
result = step.send(:merge_complexity, structural, llm_hint)
|
|
365
|
+
result[:level] == 'medium' && result[:signals].include?('deep')
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# ============================================================
|
|
369
|
+
# M2: Persona Review Parsing
|
|
370
|
+
# ============================================================
|
|
371
|
+
|
|
372
|
+
section "M2: Persona Review Parsing"
|
|
373
|
+
|
|
374
|
+
assert "test_parse_approve: valid JSON with APPROVE → overall_verdict APPROVE" do
|
|
375
|
+
content = JSON.generate({
|
|
376
|
+
'personas' => { 'pragmatic' => { 'verdict' => 'APPROVE' } },
|
|
377
|
+
'overall_verdict' => 'APPROVE',
|
|
378
|
+
'key_findings' => []
|
|
379
|
+
})
|
|
380
|
+
result = step.send(:parse_persona_review, content)
|
|
381
|
+
result[:overall_verdict] == 'APPROVE'
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
assert "test_parse_revise: REVISE verdict → correctly parsed" do
|
|
385
|
+
content = JSON.generate({
|
|
386
|
+
'personas' => { 'skeptic' => { 'verdict' => 'REVISE', 'concerns' => ['No rollback'] } },
|
|
387
|
+
'overall_verdict' => 'revise',
|
|
388
|
+
'key_findings' => ['No rollback plan']
|
|
389
|
+
})
|
|
390
|
+
result = step.send(:parse_persona_review, content)
|
|
391
|
+
result[:overall_verdict] == 'REVISE' && result[:key_findings] == ['No rollback plan']
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
assert "test_parse_reject: REJECT verdict → correctly parsed" do
|
|
395
|
+
content = JSON.generate({
|
|
396
|
+
'personas' => {},
|
|
397
|
+
'overall_verdict' => 'REJECT',
|
|
398
|
+
'key_findings' => ['Violates layer boundaries']
|
|
399
|
+
})
|
|
400
|
+
result = step.send(:parse_persona_review, content)
|
|
401
|
+
result[:overall_verdict] == 'REJECT'
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
assert "test_parse_json_error: malformed content → fallback REVISE with parse_error" do
|
|
405
|
+
result = step.send(:parse_persona_review, 'not json at all')
|
|
406
|
+
result[:overall_verdict] == 'REVISE' && result[:parse_error] == true
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
assert "test_parse_nil_content: nil → fallback REVISE" do
|
|
410
|
+
result = step.send(:parse_persona_review, nil)
|
|
411
|
+
result[:overall_verdict] == 'REVISE' && result[:parse_error] == true
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
assert "test_parse_missing_verdict: valid JSON but no overall_verdict → fallback REVISE" do
|
|
415
|
+
content = JSON.generate({ 'personas' => {}, 'key_findings' => [] })
|
|
416
|
+
result = step.send(:parse_persona_review, content)
|
|
417
|
+
result[:overall_verdict] == 'REVISE' && result[:parse_error] == true
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
assert "test_parse_non_string_verdict: numeric verdict → fallback REVISE" do
|
|
421
|
+
content = JSON.generate({ 'overall_verdict' => 42, 'key_findings' => [], 'personas' => {} })
|
|
422
|
+
result = step.send(:parse_persona_review, content)
|
|
423
|
+
result[:overall_verdict] == 'REVISE' && result[:parse_error] == true
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
assert "test_parse_code_fenced_json: JSON in code fences → parsed correctly" do
|
|
427
|
+
content = "Here is my review:\n```json\n{\"overall_verdict\": \"APPROVE\", \"key_findings\": [], \"personas\": {}}\n```"
|
|
428
|
+
result = step.send(:parse_persona_review, content)
|
|
429
|
+
result[:overall_verdict] == 'APPROVE'
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
assert "test_parse_bare_json_after_prose: JSON after text (no fences) → parsed" do
|
|
433
|
+
content = "Here is my analysis:\n{\"overall_verdict\": \"REVISE\", \"key_findings\": [\"issue found\"], \"personas\": {}}"
|
|
434
|
+
result = step.send(:parse_persona_review, content)
|
|
435
|
+
result[:overall_verdict] == 'REVISE' && result[:key_findings] == ['issue found']
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# ---- Lightweight review parsing ----
|
|
439
|
+
|
|
440
|
+
assert "test_parse_lightweight_concerns: valid JSON → concerns extracted" do
|
|
441
|
+
content = JSON.generate({ 'concerns' => ['edge case missed'], 'suggestions' => ['add test'] })
|
|
442
|
+
result = step.send(:parse_lightweight_review, content)
|
|
443
|
+
result[:concerns] == ['edge case missed']
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
assert "test_parse_lightweight_nil: nil content → empty concerns" do
|
|
447
|
+
result = step.send(:parse_lightweight_review, nil)
|
|
448
|
+
result[:concerns] == []
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
assert "test_parse_lightweight_malformed: bad JSON → empty concerns" do
|
|
452
|
+
result = step.send(:parse_lightweight_review, 'garbage')
|
|
453
|
+
result[:concerns] == []
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# ============================================================
|
|
457
|
+
# M3: review_enabled? and configuration
|
|
458
|
+
# ============================================================
|
|
459
|
+
|
|
460
|
+
section "M3: Configuration"
|
|
461
|
+
|
|
462
|
+
assert "test_review_enabled_default: no config → true" do
|
|
463
|
+
session = Session.new(
|
|
464
|
+
session_id: 'test_cfg_1', mandate_id: 'test_m', goal_name: 'test',
|
|
465
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
466
|
+
)
|
|
467
|
+
step.send(:review_enabled?, session) == true
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
assert "test_review_disabled: enabled=false → false" do
|
|
471
|
+
session = Session.new(
|
|
472
|
+
session_id: 'test_cfg_2', mandate_id: 'test_m', goal_name: 'test',
|
|
473
|
+
invocation_context: KairosMcp::InvocationContext.new,
|
|
474
|
+
config: { 'complexity_review' => { 'enabled' => false } }
|
|
475
|
+
)
|
|
476
|
+
step.send(:review_enabled?, session) == false
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
assert "test_review_enabled_explicit: enabled=true → true" do
|
|
480
|
+
session = Session.new(
|
|
481
|
+
session_id: 'test_cfg_3', mandate_id: 'test_m', goal_name: 'test',
|
|
482
|
+
invocation_context: KairosMcp::InvocationContext.new,
|
|
483
|
+
config: { 'complexity_review' => { 'enabled' => true } }
|
|
484
|
+
)
|
|
485
|
+
step.send(:review_enabled?, session) == true
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# ============================================================
|
|
489
|
+
# M4: Session review methods
|
|
490
|
+
# ============================================================
|
|
491
|
+
|
|
492
|
+
section "M4: Session review persistence"
|
|
493
|
+
|
|
494
|
+
assert "test_save_load_review_result: round-trip review result (symbol keys)" do
|
|
495
|
+
session = Session.new(
|
|
496
|
+
session_id: 'test_review_1', mandate_id: 'test_m', goal_name: 'test',
|
|
497
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
498
|
+
)
|
|
499
|
+
review = { overall_verdict: 'APPROVE', key_findings: [], personas: {} }
|
|
500
|
+
session.save_review_result(review)
|
|
501
|
+
|
|
502
|
+
loaded = session.load_review_result
|
|
503
|
+
loaded && loaded[:overall_verdict] == 'APPROVE'
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
assert "test_load_review_result_missing: no file → nil" do
|
|
507
|
+
session = Session.new(
|
|
508
|
+
session_id: 'test_review_nonexist', mandate_id: 'test_m', goal_name: 'test',
|
|
509
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
510
|
+
)
|
|
511
|
+
session.load_review_result.nil?
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
assert "test_load_review_result_symbol_keys: round-trip preserves symbol keys" do
|
|
515
|
+
session = Session.new(
|
|
516
|
+
session_id: 'test_review_sym', mandate_id: 'test_m', goal_name: 'test',
|
|
517
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
518
|
+
)
|
|
519
|
+
review = { overall_verdict: 'APPROVE', key_findings: [], personas: {} }
|
|
520
|
+
session.save_review_result(review)
|
|
521
|
+
loaded = session.load_review_result
|
|
522
|
+
loaded && loaded[:overall_verdict] == 'APPROVE'
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
assert "test_save_progress_amendment: appends review concerns to progress" do
|
|
526
|
+
session = Session.new(
|
|
527
|
+
session_id: 'test_amend_1', mandate_id: 'test_m', goal_name: 'test',
|
|
528
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
529
|
+
)
|
|
530
|
+
session.save_progress_amendment(['concern 1', 'concern 2'])
|
|
531
|
+
progress = session.load_progress
|
|
532
|
+
progress.any? { |e| e['type'] == 'review_amendment' && e['concerns'].include?('concern 1') }
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
assert "test_save_progress_amendment_cycle_number: uses current cycle_number" do
|
|
536
|
+
session = Session.new(
|
|
537
|
+
session_id: 'test_amend_cycle', mandate_id: 'test_m', goal_name: 'test',
|
|
538
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
539
|
+
)
|
|
540
|
+
session.increment_cycle # simulate post-ACT increment → cycle_number = 1
|
|
541
|
+
session.save_progress_amendment(['test concern'])
|
|
542
|
+
progress = session.load_progress
|
|
543
|
+
progress.any? { |e| e['type'] == 'review_amendment' && e['cycle'] == 1 }
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# ============================================================
|
|
547
|
+
# M5: Persona review prompt building
|
|
548
|
+
# ============================================================
|
|
549
|
+
|
|
550
|
+
section "M5: Prompt building"
|
|
551
|
+
|
|
552
|
+
assert "test_persona_review_prompt_contains_summary: summary in prompt" do
|
|
553
|
+
payload = high_complexity_payload
|
|
554
|
+
complexity = { level: 'high', signals: ['high_risk', 'design_scope'] }
|
|
555
|
+
persona_defs = { 'skeptic' => 'Be critical.' }
|
|
556
|
+
prompt = step.send(:build_persona_review_prompt, payload, complexity, persona_defs)
|
|
557
|
+
prompt.include?('Refactor authentication architecture') && prompt.include?('skeptic')
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
assert "test_lightweight_review_prompt: contains plan summary" do
|
|
561
|
+
payload = medium_complexity_payload
|
|
562
|
+
ar_result = { act: { 'summary' => 'completed' }, reflect: { 'confidence' => 0.8, 'achieved' => ['done'] } }
|
|
563
|
+
prompt = step.send(:build_lightweight_review_prompt, payload, ar_result)
|
|
564
|
+
prompt.include?('Add logging to API handler') && prompt.include?('skeptical')
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
assert "test_multi_llm_review_prompt: L0 review prompt generated" do
|
|
568
|
+
session = Session.new(
|
|
569
|
+
session_id: 'test_mlp_1', mandate_id: 'test_m', goal_name: 'test_goal',
|
|
570
|
+
invocation_context: KairosMcp::InvocationContext.new, config: {}
|
|
571
|
+
)
|
|
572
|
+
prompt = step.send(:generate_multi_llm_review_prompt, session, l0_change_payload)
|
|
573
|
+
prompt.include?('L0 Change Review') && prompt.include?('evolve skill') && prompt.include?('test_goal')
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
# ============================================================
|
|
577
|
+
# Summary
|
|
578
|
+
# ============================================================
|
|
579
|
+
|
|
580
|
+
puts "\n#{'=' * 60}"
|
|
581
|
+
puts "RESULTS: #{$pass} passed, #{$fail} failed (total: #{$pass + $fail})"
|
|
582
|
+
puts '=' * 60
|
|
583
|
+
|
|
584
|
+
FileUtils.rm_rf(TMPDIR)
|
|
585
|
+
exit($fail > 0 ? 1 : 0)
|
|
@@ -372,6 +372,105 @@ module KairosMcp
|
|
|
372
372
|
return finalize_autonomous(session, results, paused: 'risk_exceeded')
|
|
373
373
|
end
|
|
374
374
|
|
|
375
|
+
# Gate 5.5: Complexity-driven review
|
|
376
|
+
review_cfg = session.config['complexity_review'] || {}
|
|
377
|
+
complexity = assess_decision_complexity(decision_payload)
|
|
378
|
+
llm_hint = decision_payload['complexity_hint']
|
|
379
|
+
complexity = merge_complexity(complexity, llm_hint) if llm_hint
|
|
380
|
+
|
|
381
|
+
if review_enabled?(session)
|
|
382
|
+
# Gate 5.5a: L0 escalation (before persona review — save LLM cost)
|
|
383
|
+
if complexity[:signals].include?('l0_change') &&
|
|
384
|
+
review_cfg.fetch('l0_always_checkpoint', true)
|
|
385
|
+
multi_llm_prompt = generate_multi_llm_review_prompt(session, decision_payload)
|
|
386
|
+
session.update_state('checkpoint')
|
|
387
|
+
session.save
|
|
388
|
+
return finalize_autonomous(session, results, checkpoint: true,
|
|
389
|
+
warning: 'l0_requires_external_review',
|
|
390
|
+
multi_llm_prompt: multi_llm_prompt)
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Gate 5.5b: High-complexity persona review (inner retry loop)
|
|
394
|
+
if complexity[:level] == 'high'
|
|
395
|
+
review_retries = 0
|
|
396
|
+
max_retries = review_cfg['max_review_retries'] || 2
|
|
397
|
+
|
|
398
|
+
loop do
|
|
399
|
+
# Budget guard inside inner loop (P1-2 fix)
|
|
400
|
+
if total_llm_calls >= max_total_llm
|
|
401
|
+
session.update_state('checkpoint')
|
|
402
|
+
session.save
|
|
403
|
+
return finalize_autonomous(session, results, checkpoint: true,
|
|
404
|
+
paused: 'llm_budget_exceeded')
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
review = run_persona_review(session, decision_payload, complexity)
|
|
408
|
+
total_llm_calls += review[:llm_calls] || 0
|
|
409
|
+
session.save_review_result(review)
|
|
410
|
+
|
|
411
|
+
case review[:overall_verdict]
|
|
412
|
+
when 'APPROVE'
|
|
413
|
+
break
|
|
414
|
+
when 'REJECT'
|
|
415
|
+
session.update_state('checkpoint')
|
|
416
|
+
session.save
|
|
417
|
+
return finalize_autonomous(session, results, checkpoint: true,
|
|
418
|
+
warning: 'review_rejected', review: review)
|
|
419
|
+
else # REVISE or parse fallback
|
|
420
|
+
review_retries += 1
|
|
421
|
+
if review_retries > max_retries
|
|
422
|
+
session.update_state('checkpoint')
|
|
423
|
+
session.save
|
|
424
|
+
return finalize_autonomous(session, results, checkpoint: true,
|
|
425
|
+
warning: 'review_max_retries', review: review)
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
findings = Array(review[:key_findings]).join("\n- ")
|
|
429
|
+
feedback = "Persona review (attempt #{review_retries}/#{max_retries}) found issues:\n- #{findings}\n\nRevise the plan to address these concerns."
|
|
430
|
+
decide_result = run_decide_with_review_feedback_internal(session, feedback)
|
|
431
|
+
total_llm_calls += decide_result[:llm_calls] || 0
|
|
432
|
+
|
|
433
|
+
if decide_result[:error]
|
|
434
|
+
session.update_state('paused_error')
|
|
435
|
+
session.save
|
|
436
|
+
return finalize_autonomous(session, results, error: decide_result[:error])
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
decision_payload = session.load_decision
|
|
440
|
+
|
|
441
|
+
# Re-check loop detection with review-tagged summary
|
|
442
|
+
tagged_summary = "#{decision_payload['summary']}_review_rev#{review_retries}"
|
|
443
|
+
loop_term = check_loop_detection(
|
|
444
|
+
session, nil,
|
|
445
|
+
decision_payload.merge('summary' => tagged_summary),
|
|
446
|
+
mandate_override: mandate
|
|
447
|
+
)
|
|
448
|
+
if loop_term
|
|
449
|
+
session.update_state('terminated')
|
|
450
|
+
return finalize_autonomous(session, results, terminated: 'loop_detected')
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
# Re-check risk budget on revised plan
|
|
454
|
+
proposal = MandateAdapter.to_mandate_proposal(decision_payload)
|
|
455
|
+
if ::Autonomos::Mandate.risk_exceeds_budget?(proposal, mandate[:risk_budget])
|
|
456
|
+
mandate[:status] = 'paused_risk_exceeded'
|
|
457
|
+
::Autonomos::Mandate.save(session.mandate_id, mandate)
|
|
458
|
+
session.update_state('paused_risk')
|
|
459
|
+
session.save
|
|
460
|
+
return finalize_autonomous(session, results, paused: 'risk_exceeded')
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# Re-assess complexity for revised plan
|
|
464
|
+
complexity = assess_decision_complexity(decision_payload)
|
|
465
|
+
llm_hint = decision_payload['complexity_hint']
|
|
466
|
+
complexity = merge_complexity(complexity, llm_hint) if llm_hint
|
|
467
|
+
|
|
468
|
+
break unless complexity[:level] == 'high'
|
|
469
|
+
end
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
|
|
375
474
|
# ACT + REFLECT
|
|
376
475
|
ar_result = run_act_reflect_internal(session)
|
|
377
476
|
total_llm_calls += ar_result[:llm_calls] || 0
|
|
@@ -391,6 +490,19 @@ module KairosMcp
|
|
|
391
490
|
return finalize_autonomous(session, results, terminated: term_reason)
|
|
392
491
|
end
|
|
393
492
|
|
|
493
|
+
# Gate 6.5: Post-ACT advisory review for medium complexity
|
|
494
|
+
if review_enabled?(session) &&
|
|
495
|
+
review_cfg.fetch('post_act_review', true) &&
|
|
496
|
+
complexity[:level] == 'medium' &&
|
|
497
|
+
ar_result[:act_succeeded]
|
|
498
|
+
post_review = run_lightweight_review(session, decision_payload, ar_result)
|
|
499
|
+
total_llm_calls += post_review[:llm_calls] || 0
|
|
500
|
+
if Array(post_review[:concerns]).any?
|
|
501
|
+
ar_result[:reflect]['review_concerns'] = post_review[:concerns]
|
|
502
|
+
session.save_progress_amendment(post_review[:concerns])
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
394
506
|
# Gate 7: Confidence-based early exit
|
|
395
507
|
if session.cycle_number >= min_exit_cycles
|
|
396
508
|
confidence = clamp_confidence(ar_result.dig(:reflect, 'confidence'))
|
|
@@ -425,7 +537,8 @@ module KairosMcp
|
|
|
425
537
|
end
|
|
426
538
|
|
|
427
539
|
def finalize_autonomous(session, cycle_results, terminated: nil, paused: nil,
|
|
428
|
-
checkpoint: nil, error: nil, warning: nil
|
|
540
|
+
checkpoint: nil, error: nil, warning: nil,
|
|
541
|
+
review: nil, multi_llm_prompt: nil)
|
|
429
542
|
session.save
|
|
430
543
|
|
|
431
544
|
status = if checkpoint then 'checkpoint'
|
|
@@ -451,6 +564,8 @@ module KairosMcp
|
|
|
451
564
|
}
|
|
452
565
|
}
|
|
453
566
|
response['permission_advisory'] = session.permission_advisory if session.permission_advisory
|
|
567
|
+
response['review'] = review if review
|
|
568
|
+
response['multi_llm_prompt'] = multi_llm_prompt if multi_llm_prompt
|
|
454
569
|
text_content(JSON.generate(response))
|
|
455
570
|
end
|
|
456
571
|
|
|
@@ -806,6 +921,216 @@ module KairosMcp
|
|
|
806
921
|
warn "[agent] Failed to record cycle: #{e.message}"
|
|
807
922
|
end
|
|
808
923
|
|
|
924
|
+
# ---- Complexity Assessment ----
|
|
925
|
+
|
|
926
|
+
L0_TOOLS = %w[skills_evolve skills_rollback instructions_update system_upgrade].freeze
|
|
927
|
+
STATE_MUTATION_TOOLS = %w[state_commit chain_record knowledge_update formalization_record].freeze
|
|
928
|
+
|
|
929
|
+
def assess_decision_complexity(decision_payload)
|
|
930
|
+
signals = []
|
|
931
|
+
steps = decision_payload.dig('task_json', 'steps') || []
|
|
932
|
+
|
|
933
|
+
signals << 'high_risk' if steps.any? { |s| s['risk'] == 'high' }
|
|
934
|
+
signals << 'many_steps' if steps.size > 5
|
|
935
|
+
signals << 'design_scope' if decision_payload['summary']&.match?(
|
|
936
|
+
::Autonomos::Ooda::COMPLEX_KEYWORDS
|
|
937
|
+
)
|
|
938
|
+
signals << 'l0_change' if steps.any? { |s| L0_TOOLS.include?(s['tool_name']) }
|
|
939
|
+
signals << 'core_files' if steps.any? { |s|
|
|
940
|
+
path = s.dig('tool_arguments', 'file_path').to_s
|
|
941
|
+
path.include?('/lib/') && path.include?('kairos')
|
|
942
|
+
}
|
|
943
|
+
file_paths = steps.filter_map { |s| s.dig('tool_arguments', 'file_path') }.uniq
|
|
944
|
+
signals << 'multi_file' if file_paths.size > 3
|
|
945
|
+
signals << 'state_mutation' if steps.any? { |s| STATE_MUTATION_TOOLS.include?(s['tool_name']) }
|
|
946
|
+
|
|
947
|
+
level = case signals.size
|
|
948
|
+
when 0 then 'low'
|
|
949
|
+
when 1 then 'medium'
|
|
950
|
+
else 'high'
|
|
951
|
+
end
|
|
952
|
+
|
|
953
|
+
# L0 override: always high
|
|
954
|
+
level = 'high' if signals.include?('l0_change')
|
|
955
|
+
|
|
956
|
+
{ level: level, signals: signals }
|
|
957
|
+
end
|
|
958
|
+
|
|
959
|
+
def merge_complexity(structural, llm_hint)
|
|
960
|
+
levels = { 'low' => 0, 'medium' => 1, 'high' => 2 }
|
|
961
|
+
s_val = levels[structural[:level]] || 0
|
|
962
|
+
l_val = levels[llm_hint&.dig('level') || llm_hint&.dig(:level)] || 0
|
|
963
|
+
# LLM can raise by at most 1 level
|
|
964
|
+
capped_llm = [l_val, s_val + 1].min
|
|
965
|
+
final_val = [s_val, capped_llm].max
|
|
966
|
+
final_level = levels.key(final_val) || 'low'
|
|
967
|
+
{
|
|
968
|
+
level: final_level,
|
|
969
|
+
signals: (structural[:signals] + Array(llm_hint&.dig('signals') || llm_hint&.dig(:signals))).uniq
|
|
970
|
+
}
|
|
971
|
+
end
|
|
972
|
+
|
|
973
|
+
def review_enabled?(session)
|
|
974
|
+
review_cfg = session.config['complexity_review'] || {}
|
|
975
|
+
review_cfg.fetch('enabled', true)
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
# ---- Persona Review ----
|
|
979
|
+
|
|
980
|
+
def run_persona_review(session, decision_payload, complexity)
|
|
981
|
+
review_cfg = session.config['complexity_review'] || {}
|
|
982
|
+
personas = if complexity[:signals].include?('l0_change')
|
|
983
|
+
review_cfg['high_personas'] || %w[kairos pragmatic skeptic]
|
|
984
|
+
else
|
|
985
|
+
review_cfg['personas'] || %w[pragmatic skeptic]
|
|
986
|
+
end
|
|
987
|
+
|
|
988
|
+
persona_defs = load_persona_definitions(personas, session)
|
|
989
|
+
prompt = build_persona_review_prompt(decision_payload, complexity, persona_defs)
|
|
990
|
+
review_loop = CognitiveLoop.new(self, session)
|
|
991
|
+
messages = [{ 'role' => 'user', 'content' => prompt }]
|
|
992
|
+
result = review_loop.run_phase('review', persona_review_system_prompt, messages, [])
|
|
993
|
+
|
|
994
|
+
parsed = parse_persona_review(result['content'])
|
|
995
|
+
parsed[:llm_calls] = review_loop.total_calls
|
|
996
|
+
parsed
|
|
997
|
+
end
|
|
998
|
+
|
|
999
|
+
def run_lightweight_review(session, decision_payload, ar_result)
|
|
1000
|
+
prompt = build_lightweight_review_prompt(decision_payload, ar_result)
|
|
1001
|
+
review_loop = CognitiveLoop.new(self, session)
|
|
1002
|
+
messages = [{ 'role' => 'user', 'content' => prompt }]
|
|
1003
|
+
result = review_loop.run_phase('review', lightweight_review_system_prompt, messages, [])
|
|
1004
|
+
|
|
1005
|
+
parsed = parse_lightweight_review(result['content'])
|
|
1006
|
+
parsed[:llm_calls] = review_loop.total_calls
|
|
1007
|
+
parsed
|
|
1008
|
+
end
|
|
1009
|
+
|
|
1010
|
+
def parse_persona_review(content)
|
|
1011
|
+
return review_parse_fallback('no content') unless content
|
|
1012
|
+
|
|
1013
|
+
json_str = extract_json_from_content(content)
|
|
1014
|
+
return review_parse_fallback('no JSON found') unless json_str
|
|
1015
|
+
|
|
1016
|
+
parsed = JSON.parse(json_str)
|
|
1017
|
+
verdict = parsed['overall_verdict']
|
|
1018
|
+
unless verdict.is_a?(String)
|
|
1019
|
+
return review_parse_fallback("invalid overall_verdict type: #{verdict.class}")
|
|
1020
|
+
end
|
|
1021
|
+
parsed['overall_verdict'] = verdict.upcase
|
|
1022
|
+
parsed.transform_keys(&:to_sym)
|
|
1023
|
+
rescue JSON::ParserError => e
|
|
1024
|
+
review_parse_fallback("JSON parse error: #{e.message}")
|
|
1025
|
+
end
|
|
1026
|
+
|
|
1027
|
+
def review_parse_fallback(reason)
|
|
1028
|
+
{
|
|
1029
|
+
overall_verdict: 'REVISE',
|
|
1030
|
+
key_findings: ["Review parse failed (#{reason}) — defaulting to REVISE"],
|
|
1031
|
+
parse_error: true,
|
|
1032
|
+
personas: {}
|
|
1033
|
+
}
|
|
1034
|
+
end
|
|
1035
|
+
|
|
1036
|
+
def parse_lightweight_review(content)
|
|
1037
|
+
return { concerns: [], llm_calls: 0 } unless content
|
|
1038
|
+
|
|
1039
|
+
json_str = extract_json_from_content(content)
|
|
1040
|
+
if json_str
|
|
1041
|
+
parsed = JSON.parse(json_str)
|
|
1042
|
+
{ concerns: Array(parsed['concerns']), suggestions: Array(parsed['suggestions']) }
|
|
1043
|
+
else
|
|
1044
|
+
{ concerns: [], suggestions: [], parse_error: true }
|
|
1045
|
+
end
|
|
1046
|
+
rescue JSON::ParserError
|
|
1047
|
+
{ concerns: [], suggestions: [], parse_error: true }
|
|
1048
|
+
end
|
|
1049
|
+
|
|
1050
|
+
def load_persona_definitions(persona_names, session)
|
|
1051
|
+
result = invoke_tool('knowledge_get', { 'name' => 'persona_definitions' },
|
|
1052
|
+
context: session.invocation_context)
|
|
1053
|
+
parsed = JSON.parse(result.map { |b| b[:text] || b['text'] }.compact.join)
|
|
1054
|
+
content = parsed['content'] || ''
|
|
1055
|
+
extract_persona_sections(content, persona_names)
|
|
1056
|
+
rescue StandardError
|
|
1057
|
+
# Hardcoded fallback
|
|
1058
|
+
{
|
|
1059
|
+
'pragmatic' => 'Evaluate for real-world utility, implementation complexity, and maintenance burden.',
|
|
1060
|
+
'skeptic' => 'Challenge assumptions, identify edge cases, failure modes, and unintended consequences.',
|
|
1061
|
+
'kairos' => 'Evaluate alignment with KairosChain philosophy: self-referentiality, structural integrity, and layer boundaries.'
|
|
1062
|
+
}.slice(*persona_names)
|
|
1063
|
+
end
|
|
1064
|
+
|
|
1065
|
+
def extract_persona_sections(content, persona_names)
|
|
1066
|
+
defs = {}
|
|
1067
|
+
persona_names.each do |name|
|
|
1068
|
+
# Try to find "### name" or "## name" section
|
|
1069
|
+
if content =~ /##\s*#{Regexp.escape(name)}\s*\n(.*?)(?=\n##|\z)/mi
|
|
1070
|
+
defs[name] = $1.strip[0..300]
|
|
1071
|
+
end
|
|
1072
|
+
end
|
|
1073
|
+
defs
|
|
1074
|
+
end
|
|
1075
|
+
|
|
1076
|
+
# ---- Internal DECIDE with Review Feedback ----
|
|
1077
|
+
|
|
1078
|
+
def run_decide_with_review_feedback_internal(session, feedback)
|
|
1079
|
+
loop_inst = CognitiveLoop.new(self, session)
|
|
1080
|
+
|
|
1081
|
+
prior_decision = session.load_decision
|
|
1082
|
+
prior_json = prior_decision ? JSON.generate(prior_decision) : '(none)'
|
|
1083
|
+
catalog = build_tool_catalog(session)
|
|
1084
|
+
|
|
1085
|
+
messages = [
|
|
1086
|
+
{ 'role' => 'user', 'content' =>
|
|
1087
|
+
"## Available Tools\n#{catalog}\n\n" \
|
|
1088
|
+
"Previous plan:\n#{prior_json}\n\n" \
|
|
1089
|
+
"This plan was flagged by persona review. Feedback:\n#{feedback}\n\n" \
|
|
1090
|
+
"Revise the plan and output a new decision_payload as JSON. " \
|
|
1091
|
+
"Include a complexity_hint key in your output. Use ONLY tools listed above." }
|
|
1092
|
+
]
|
|
1093
|
+
|
|
1094
|
+
decide_result = loop_inst.run_decide(decide_system_prompt, messages)
|
|
1095
|
+
if decide_result['error']
|
|
1096
|
+
return { error: decide_result['error'], llm_calls: loop_inst.total_calls }
|
|
1097
|
+
end
|
|
1098
|
+
|
|
1099
|
+
session.save_decision(decide_result['decision_payload'])
|
|
1100
|
+
{ decision_payload: decide_result['decision_payload'],
|
|
1101
|
+
llm_calls: loop_inst.total_calls, error: nil }
|
|
1102
|
+
end
|
|
1103
|
+
|
|
1104
|
+
# ---- Multi-LLM Review Prompt Generation ----
|
|
1105
|
+
|
|
1106
|
+
def generate_multi_llm_review_prompt(session, decision_payload)
|
|
1107
|
+
summary = decision_payload['summary'] || 'unknown'
|
|
1108
|
+
steps = decision_payload.dig('task_json', 'steps') || []
|
|
1109
|
+
step_desc = steps.map.with_index(1) { |s, i|
|
|
1110
|
+
" #{i}. #{s['action'] || s['tool_name']} (risk: #{s['risk']})"
|
|
1111
|
+
}.join("\n")
|
|
1112
|
+
|
|
1113
|
+
<<~PROMPT
|
|
1114
|
+
# L0 Change Review Required
|
|
1115
|
+
|
|
1116
|
+
An autonomous agent proposed the following L0-level change.
|
|
1117
|
+
L0 changes modify the KairosChain framework itself and require external review.
|
|
1118
|
+
|
|
1119
|
+
## Goal: #{session.goal_name}
|
|
1120
|
+
## Summary: #{summary}
|
|
1121
|
+
## Steps:
|
|
1122
|
+
#{step_desc}
|
|
1123
|
+
|
|
1124
|
+
## Review Criteria
|
|
1125
|
+
1. Does this change preserve structural self-referentiality?
|
|
1126
|
+
2. Is the change recorded on the blockchain?
|
|
1127
|
+
3. Could this be a SkillSet instead of core infrastructure?
|
|
1128
|
+
4. Are layer boundaries (L0/L1/L2) respected?
|
|
1129
|
+
|
|
1130
|
+
Please evaluate with APPROVE / REVISE / REJECT and explain your reasoning.
|
|
1131
|
+
PROMPT
|
|
1132
|
+
end
|
|
1133
|
+
|
|
809
1134
|
# ---- Prompts ----
|
|
810
1135
|
|
|
811
1136
|
def orient_system_prompt
|
|
@@ -830,6 +1155,19 @@ module KairosMcp
|
|
|
830
1155
|
"remaining: [...], learnings: [...], open_questions: [...]}."
|
|
831
1156
|
end
|
|
832
1157
|
|
|
1158
|
+
def persona_review_system_prompt
|
|
1159
|
+
"You are a multi-perspective review panel evaluating an autonomous agent's " \
|
|
1160
|
+
"proposed action plan. Each persona has a distinct viewpoint. Evaluate " \
|
|
1161
|
+
"independently, then synthesize. Output ONLY a JSON object with the " \
|
|
1162
|
+
"structure specified in the prompt."
|
|
1163
|
+
end
|
|
1164
|
+
|
|
1165
|
+
def lightweight_review_system_prompt
|
|
1166
|
+
"You are a skeptical reviewer evaluating the results of an autonomous agent's " \
|
|
1167
|
+
"execution. Identify any concerns, edge cases, or quality issues. " \
|
|
1168
|
+
"Output a JSON object: {concerns: [...], suggestions: [...]}."
|
|
1169
|
+
end
|
|
1170
|
+
|
|
833
1171
|
def build_orient_prompt(session, observation_text = nil)
|
|
834
1172
|
parts = ["Goal: #{session.goal_name}", "Cycle: #{session.cycle_number + 1}"]
|
|
835
1173
|
# M5: Prepend progress summary for cross-cycle continuity
|
|
@@ -858,9 +1196,71 @@ module KairosMcp
|
|
|
858
1196
|
"Based on this analysis:\n#{analysis}\n\n" \
|
|
859
1197
|
"## Available Tools\n#{catalog}\n\n" \
|
|
860
1198
|
"Create a task execution plan as JSON (decision_payload format). " \
|
|
1199
|
+
"Include a 'complexity_hint' key: {\"level\": \"low\"|\"medium\"|\"high\", " \
|
|
1200
|
+
"\"signals\": [\"reason1\"]}. Assess complexity based on risk, step count, " \
|
|
1201
|
+
"architectural scope, and L0 framework changes. " \
|
|
861
1202
|
"Use ONLY tools listed above."
|
|
862
1203
|
end
|
|
863
1204
|
|
|
1205
|
+
def build_persona_review_prompt(decision_payload, complexity, persona_defs)
|
|
1206
|
+
summary = decision_payload['summary'] || 'unknown'
|
|
1207
|
+
steps = decision_payload.dig('task_json', 'steps') || []
|
|
1208
|
+
step_text = steps.map.with_index(1) { |s, i|
|
|
1209
|
+
" #{i}. #{s['action'] || s['tool_name']} (risk: #{s['risk']}, tool: #{s['tool_name']})"
|
|
1210
|
+
}.join("\n")
|
|
1211
|
+
|
|
1212
|
+
persona_sections = persona_defs.map { |name, desc|
|
|
1213
|
+
"### #{name}\n#{desc}\nEvaluate from this perspective."
|
|
1214
|
+
}.join("\n\n")
|
|
1215
|
+
|
|
1216
|
+
<<~PROMPT
|
|
1217
|
+
Evaluate the following proposed action plan from multiple perspectives.
|
|
1218
|
+
|
|
1219
|
+
## Proposal
|
|
1220
|
+
Summary: #{summary}
|
|
1221
|
+
Complexity: #{complexity[:level]} (#{complexity[:signals].join(', ')})
|
|
1222
|
+
Steps:
|
|
1223
|
+
#{step_text}
|
|
1224
|
+
|
|
1225
|
+
## Personas
|
|
1226
|
+
#{persona_sections}
|
|
1227
|
+
|
|
1228
|
+
For EACH persona, provide:
|
|
1229
|
+
- VERDICT: APPROVE | REVISE | REJECT
|
|
1230
|
+
- CONCERNS: [list of specific concerns]
|
|
1231
|
+
- SUGGESTIONS: [list of improvements]
|
|
1232
|
+
|
|
1233
|
+
Then provide:
|
|
1234
|
+
- OVERALL_VERDICT: APPROVE (all approve) | REVISE (any revise) | REJECT (any reject)
|
|
1235
|
+
- KEY_FINDINGS: [consolidated list of all concerns]
|
|
1236
|
+
|
|
1237
|
+
Output as a single JSON object.
|
|
1238
|
+
PROMPT
|
|
1239
|
+
end
|
|
1240
|
+
|
|
1241
|
+
def build_lightweight_review_prompt(decision_payload, ar_result)
|
|
1242
|
+
summary = decision_payload['summary'] || 'unknown'
|
|
1243
|
+
act_summary = ar_result.dig(:act, 'summary') || 'unknown'
|
|
1244
|
+
confidence = ar_result.dig(:reflect, 'confidence') || 0.0
|
|
1245
|
+
achieved = ar_result.dig(:reflect, 'achieved') || []
|
|
1246
|
+
|
|
1247
|
+
<<~PROMPT
|
|
1248
|
+
Review the execution results of an autonomous agent cycle.
|
|
1249
|
+
|
|
1250
|
+
Plan summary: #{summary}
|
|
1251
|
+
Execution result: #{act_summary}
|
|
1252
|
+
Confidence: #{confidence}
|
|
1253
|
+
Achieved: #{achieved.join(', ')}
|
|
1254
|
+
|
|
1255
|
+
As a skeptical reviewer, identify any concerns about:
|
|
1256
|
+
1. Whether the execution actually achieved what was planned
|
|
1257
|
+
2. Edge cases or error handling that may have been missed
|
|
1258
|
+
3. Quality issues in the approach taken
|
|
1259
|
+
|
|
1260
|
+
Output a JSON object: {"concerns": [...], "suggestions": [...]}
|
|
1261
|
+
PROMPT
|
|
1262
|
+
end
|
|
1263
|
+
|
|
864
1264
|
def build_reflect_prompt(session, act_result)
|
|
865
1265
|
"Goal: #{session.goal_name}\n" \
|
|
866
1266
|
"Execution result:\n#{JSON.generate(act_result)}\n\n" \
|
|
@@ -928,6 +1328,34 @@ module KairosMcp
|
|
|
928
1328
|
required.map(&:to_s)
|
|
929
1329
|
end
|
|
930
1330
|
|
|
1331
|
+
# ---- JSON Extraction ----
|
|
1332
|
+
|
|
1333
|
+
# Extract valid JSON from content that may include code fences or prose.
|
|
1334
|
+
# Same logic as CognitiveLoop#extract_json but accessible from AgentStep.
|
|
1335
|
+
def extract_json_from_content(content)
|
|
1336
|
+
JSON.parse(content)
|
|
1337
|
+
content
|
|
1338
|
+
rescue JSON::ParserError
|
|
1339
|
+
# Try code fences first
|
|
1340
|
+
if content =~ /```(?:json)?\s*\n?(.*?)\n?```/m
|
|
1341
|
+
begin
|
|
1342
|
+
JSON.parse($1)
|
|
1343
|
+
return $1
|
|
1344
|
+
rescue JSON::ParserError
|
|
1345
|
+
# fall through
|
|
1346
|
+
end
|
|
1347
|
+
end
|
|
1348
|
+
# Bare JSON after prose: find first { to last }
|
|
1349
|
+
if content =~ /(\{.*\})/m
|
|
1350
|
+
begin
|
|
1351
|
+
JSON.parse($1)
|
|
1352
|
+
return $1
|
|
1353
|
+
rescue JSON::ParserError
|
|
1354
|
+
nil
|
|
1355
|
+
end
|
|
1356
|
+
end
|
|
1357
|
+
end
|
|
1358
|
+
|
|
931
1359
|
# ---- Helpers ----
|
|
932
1360
|
|
|
933
1361
|
def load_last_decision(session)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'json'
|
|
4
4
|
require 'open3'
|
|
5
|
+
require 'timeout'
|
|
5
6
|
require_relative 'adapter'
|
|
6
7
|
|
|
7
8
|
module KairosMcp
|
|
@@ -10,15 +11,30 @@ module KairosMcp
|
|
|
10
11
|
# Adapter that uses Claude Code CLI as the LLM backend.
|
|
11
12
|
# No API costs — uses the Claude Code subscription.
|
|
12
13
|
# Invokes `claude -p --output-format json` as a subprocess.
|
|
14
|
+
#
|
|
15
|
+
# Key safety measures:
|
|
16
|
+
# - --mcp-config '{"mcpServers":{}}' prevents recursive MCP server loading
|
|
17
|
+
# - --no-session-persistence avoids polluting session state
|
|
18
|
+
# - Timeout.timeout prevents indefinite hangs
|
|
13
19
|
class ClaudeCodeAdapter < Adapter
|
|
20
|
+
DEFAULT_TIMEOUT = 120
|
|
21
|
+
|
|
14
22
|
def call(messages:, system: nil, tools: nil, model: nil,
|
|
15
23
|
max_tokens: nil, temperature: nil, output_schema: nil)
|
|
16
24
|
prompt = build_prompt(messages, system, tools, output_schema)
|
|
17
|
-
|
|
18
|
-
|
|
25
|
+
timeout_seconds = @config&.dig('timeout_seconds') || DEFAULT_TIMEOUT
|
|
26
|
+
|
|
27
|
+
args = [
|
|
28
|
+
'claude', '-p',
|
|
29
|
+
'--output-format', 'json',
|
|
30
|
+
'--no-session-persistence',
|
|
31
|
+
'--mcp-config', '{"mcpServers":{}}'
|
|
32
|
+
]
|
|
19
33
|
args += ['--model', model] if model
|
|
20
34
|
|
|
21
|
-
stdout, stderr, status =
|
|
35
|
+
stdout, stderr, status = Timeout.timeout(timeout_seconds) do
|
|
36
|
+
Open3.capture3(*args, stdin_data: prompt)
|
|
37
|
+
end
|
|
22
38
|
|
|
23
39
|
unless status.success?
|
|
24
40
|
raise ApiError.new(
|
|
@@ -28,6 +44,11 @@ module KairosMcp
|
|
|
28
44
|
end
|
|
29
45
|
|
|
30
46
|
parse_response(stdout)
|
|
47
|
+
rescue Timeout::Error
|
|
48
|
+
raise ApiError.new(
|
|
49
|
+
"Claude Code timed out after #{timeout_seconds}s",
|
|
50
|
+
provider: 'claude_code', retryable: true
|
|
51
|
+
)
|
|
31
52
|
rescue Errno::ENOENT
|
|
32
53
|
raise ApiError.new(
|
|
33
54
|
"Claude Code CLI not found. Install: https://docs.anthropic.com/en/docs/claude-code",
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
require 'json'
|
|
4
4
|
require 'digest'
|
|
5
5
|
require 'time'
|
|
6
|
+
# Only load always-needed modules at startup.
|
|
7
|
+
# Provider adapters are lazy-loaded in build_adapter() to avoid
|
|
8
|
+
# crashing when optional gems (faraday, aws-sdk) are not installed.
|
|
6
9
|
require_relative '../lib/llm_client/adapter'
|
|
7
|
-
require_relative '../lib/llm_client/anthropic_adapter'
|
|
8
|
-
require_relative '../lib/llm_client/openai_adapter'
|
|
9
10
|
require_relative '../lib/llm_client/claude_code_adapter'
|
|
10
|
-
require_relative '../lib/llm_client/bedrock_adapter'
|
|
11
11
|
require_relative '../lib/llm_client/schema_converter'
|
|
12
12
|
|
|
13
13
|
module KairosMcp
|
|
@@ -172,12 +172,15 @@ module KairosMcp
|
|
|
172
172
|
def build_adapter(config)
|
|
173
173
|
case config['provider']
|
|
174
174
|
when 'openai', 'local', 'openrouter'
|
|
175
|
+
require_relative '../lib/llm_client/openai_adapter'
|
|
175
176
|
OpenaiAdapter.new(config)
|
|
176
177
|
when 'claude_code'
|
|
177
178
|
ClaudeCodeAdapter.new(config)
|
|
178
179
|
when 'bedrock'
|
|
180
|
+
require_relative '../lib/llm_client/bedrock_adapter'
|
|
179
181
|
BedrockAdapter.new(config)
|
|
180
182
|
else
|
|
183
|
+
require_relative '../lib/llm_client/anthropic_adapter'
|
|
181
184
|
AnthropicAdapter.new(config)
|
|
182
185
|
end
|
|
183
186
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kairos-chain
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.13.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Masaomi Hatakeyama
|
|
@@ -214,6 +214,7 @@ files:
|
|
|
214
214
|
- templates/skillsets/agent/lib/agent/session.rb
|
|
215
215
|
- templates/skillsets/agent/skillset.json
|
|
216
216
|
- templates/skillsets/agent/test/test_agent_capability_discovery.rb
|
|
217
|
+
- templates/skillsets/agent/test/test_agent_complexity_review.rb
|
|
217
218
|
- templates/skillsets/agent/test/test_agent_m1.rb
|
|
218
219
|
- templates/skillsets/agent/test/test_agent_m2.rb
|
|
219
220
|
- templates/skillsets/agent/test/test_agent_m3.rb
|