llm_gateway 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +255 -1
- data/docs/migration_guide_0.7.0.md +193 -0
- data/lib/llm_gateway/adapters/adapter.rb +30 -12
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +31 -8
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +48 -16
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +131 -3
- data/lib/llm_gateway/adapters/structs.rb +45 -10
- data/lib/llm_gateway/agents/event.rb +105 -0
- data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
- data/lib/llm_gateway/agents/harness.rb +176 -0
- data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
- data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
- data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
- data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
- data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
- data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
- data/lib/llm_gateway/base_client.rb +3 -3
- data/lib/llm_gateway/clients/anthropic.rb +5 -5
- data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
- data/lib/llm_gateway/clients/openai.rb +2 -2
- data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
- data/lib/llm_gateway/prompt.rb +105 -68
- data/lib/llm_gateway/proxy/adapter.rb +48 -0
- data/lib/llm_gateway/proxy/client.rb +85 -0
- data/lib/llm_gateway/proxy/server.rb +65 -0
- data/lib/llm_gateway/utils.rb +116 -13
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +12 -1
- metadata +15 -2
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
3
5
|
require_relative "../../stream_mapper"
|
|
4
6
|
|
|
5
7
|
module LlmGateway
|
|
@@ -23,10 +25,16 @@ module LlmGateway
|
|
|
23
25
|
response_created_patches(data[:response])
|
|
24
26
|
when "response.output_item.added"
|
|
25
27
|
output_item_added_patches(data)
|
|
28
|
+
when "response.output_item.done"
|
|
29
|
+
output_item_done_patches(data)
|
|
26
30
|
when "response.content_part.added"
|
|
27
31
|
content_part_added_patches(data)
|
|
28
|
-
when "response.content_part.done"
|
|
32
|
+
when "response.content_part.done", "response.output_text.done"
|
|
29
33
|
content_part_done_patches(data)
|
|
34
|
+
when "response.code_interpreter_call_code.delta"
|
|
35
|
+
code_interpreter_code_delta_patches(data)
|
|
36
|
+
when "response.code_interpreter_call.in_progress", "response.code_interpreter_call.interpreting", "response.code_interpreter_call.completed", "response.code_interpreter_call_code.done"
|
|
37
|
+
[]
|
|
30
38
|
when "response.output_text.delta"
|
|
31
39
|
[ { type: :text_delta, delta: data[:delta] || "" } ]
|
|
32
40
|
when "response.function_call_arguments.delta"
|
|
@@ -84,6 +92,38 @@ module LlmGateway
|
|
|
84
92
|
name: item[:name]
|
|
85
93
|
}
|
|
86
94
|
]
|
|
95
|
+
when "code_interpreter_call"
|
|
96
|
+
state = code_interpreter_state[data[:output_index] || 0] = {
|
|
97
|
+
id: item[:id],
|
|
98
|
+
container_id: item[:container_id],
|
|
99
|
+
outputs: item[:outputs],
|
|
100
|
+
input_opened: false,
|
|
101
|
+
input_closed: false
|
|
102
|
+
}
|
|
103
|
+
container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id]
|
|
104
|
+
|
|
105
|
+
[
|
|
106
|
+
{
|
|
107
|
+
type: :tool_start,
|
|
108
|
+
delta: "",
|
|
109
|
+
id: item[:id],
|
|
110
|
+
name: "code_interpreter_call",
|
|
111
|
+
tool_type: "server_tool_use"
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
else
|
|
115
|
+
[]
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def output_item_done_patches(data)
|
|
120
|
+
item = data[:item] || {}
|
|
121
|
+
|
|
122
|
+
case item[:type]
|
|
123
|
+
when "code_interpreter_call"
|
|
124
|
+
code_interpreter_done_patches(data[:output_index] || 0, item)
|
|
125
|
+
when "message"
|
|
126
|
+
container_file_citation_patches(item)
|
|
87
127
|
else
|
|
88
128
|
[]
|
|
89
129
|
end
|
|
@@ -100,7 +140,83 @@ module LlmGateway
|
|
|
100
140
|
part = data[:part] || {}
|
|
101
141
|
return [] unless part.empty? || part[:type] == "output_text"
|
|
102
142
|
|
|
103
|
-
|
|
143
|
+
citations = container_file_citation_patches(data)
|
|
144
|
+
return citations unless accumulator.active_block_type == :text
|
|
145
|
+
|
|
146
|
+
[ { type: :text_end, delta: "" } ] + citations
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def code_interpreter_code_delta_patches(data)
|
|
150
|
+
output_index = data[:output_index] || 0
|
|
151
|
+
state = code_interpreter_state[output_index] ||= {
|
|
152
|
+
id: nil,
|
|
153
|
+
container_id: nil,
|
|
154
|
+
outputs: nil,
|
|
155
|
+
input_opened: false,
|
|
156
|
+
input_closed: false
|
|
157
|
+
}
|
|
158
|
+
delta = escape_json_string_fragment(data[:delta] || "")
|
|
159
|
+
delta = "{\"code\":\"#{delta}" unless state[:input_opened]
|
|
160
|
+
state[:input_opened] = true
|
|
161
|
+
|
|
162
|
+
[ { type: :tool_delta, delta: } ]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def code_interpreter_done_patches(output_index, item)
|
|
166
|
+
state = code_interpreter_state[output_index] ||= {}
|
|
167
|
+
state[:id] ||= item[:id]
|
|
168
|
+
state[:container_id] = item[:container_id] if item.key?(:container_id)
|
|
169
|
+
state[:outputs] = item[:outputs] if item.key?(:outputs)
|
|
170
|
+
container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id] && state[:id]
|
|
171
|
+
return [] if state[:input_closed]
|
|
172
|
+
|
|
173
|
+
opening = state[:input_opened] ? "" : "{\"code\":\""
|
|
174
|
+
state[:input_opened] = true
|
|
175
|
+
closing = "\"," + JSON.generate(container_id: state[:container_id], outputs: state[:outputs])[1..]
|
|
176
|
+
state[:input_closed] = true
|
|
177
|
+
|
|
178
|
+
[
|
|
179
|
+
{ type: :tool_delta, delta: opening + closing },
|
|
180
|
+
{ type: :tool_end, delta: "" }
|
|
181
|
+
]
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def container_file_citation_patches(data)
|
|
185
|
+
extract_annotations(data).filter_map do |annotation|
|
|
186
|
+
next unless annotation[:type] == "container_file_citation"
|
|
187
|
+
|
|
188
|
+
container_id = annotation[:container_id]
|
|
189
|
+
file_id = annotation[:file_id]
|
|
190
|
+
filename = annotation[:filename]
|
|
191
|
+
tool_id = container_id_to_tool_id[container_id]
|
|
192
|
+
next unless tool_id
|
|
193
|
+
|
|
194
|
+
key = [ tool_id, container_id, file_id, filename ]
|
|
195
|
+
next if emitted_citation_keys[key]
|
|
196
|
+
|
|
197
|
+
emitted_citation_keys[key] = true
|
|
198
|
+
{
|
|
199
|
+
type: :tool_result_start,
|
|
200
|
+
delta: JSON.generate(container_id:, file_id:, filename:),
|
|
201
|
+
tool_use_id: tool_id,
|
|
202
|
+
name: "container_file_citation_tool_result"
|
|
203
|
+
}
|
|
204
|
+
end.flat_map { |start| [ start, { type: :tool_result_end, delta: "" } ] }
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def extract_annotations(data)
|
|
208
|
+
annotations = []
|
|
209
|
+
annotations.concat(Array(data[:annotations]))
|
|
210
|
+
annotations.concat(Array(data.dig(:part, :annotations)))
|
|
211
|
+
annotations.concat(Array(data.dig(:item, :annotations)))
|
|
212
|
+
Array(data.dig(:item, :content)).each do |content_part|
|
|
213
|
+
annotations.concat(Array(content_part[:annotations])) if content_part.is_a?(Hash)
|
|
214
|
+
end
|
|
215
|
+
annotations
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def escape_json_string_fragment(value)
|
|
219
|
+
JSON.generate(value)[1...-1]
|
|
104
220
|
end
|
|
105
221
|
|
|
106
222
|
def response_completed_patches(response)
|
|
@@ -162,7 +278,19 @@ module LlmGateway
|
|
|
162
278
|
end
|
|
163
279
|
|
|
164
280
|
def tool_seen?
|
|
165
|
-
accumulator.blocks.any? { |content_block| content_block && content_block[:type]
|
|
281
|
+
accumulator.blocks.any? { |content_block| content_block && [ "tool_use", "server_tool_use" ].include?(content_block[:type]) }
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def code_interpreter_state
|
|
285
|
+
@code_interpreter_state ||= {}
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def container_id_to_tool_id
|
|
289
|
+
@container_id_to_tool_id ||= {}
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def emitted_citation_keys
|
|
293
|
+
@emitted_citation_keys ||= {}
|
|
166
294
|
end
|
|
167
295
|
end
|
|
168
296
|
end
|
|
@@ -52,17 +52,43 @@ class ToolCall < BaseStruct
|
|
|
52
52
|
end
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
class ServerToolCall < ToolCall
|
|
56
|
+
attribute :type, Types::String.enum("server_tool_use")
|
|
57
|
+
end
|
|
58
|
+
|
|
55
59
|
class ToolResult < BaseStruct
|
|
56
|
-
attribute :type, Types::String
|
|
60
|
+
attribute :type, Types::String
|
|
57
61
|
attribute :tool_use_id, Types::String
|
|
58
|
-
attribute :content, Types::
|
|
62
|
+
attribute :content, Types::Any
|
|
63
|
+
|
|
64
|
+
def to_h
|
|
65
|
+
{
|
|
66
|
+
type: type,
|
|
67
|
+
tool_use_id: tool_use_id,
|
|
68
|
+
content: content
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
class ServerToolResult < ToolResult
|
|
74
|
+
attribute :type, Types::String.enum("server_tool_result")
|
|
75
|
+
attribute? :name, Types::String.optional
|
|
76
|
+
|
|
77
|
+
def to_h
|
|
78
|
+
result = super
|
|
79
|
+
result[:name] = name unless name.nil?
|
|
80
|
+
result
|
|
81
|
+
end
|
|
59
82
|
end
|
|
60
83
|
|
|
61
84
|
class PartialAssistantMessage < BaseStruct
|
|
62
85
|
ContentBlock =
|
|
63
86
|
Types.Instance(TextContent) |
|
|
64
87
|
Types.Instance(ReasoningContent) |
|
|
65
|
-
Types.Instance(ToolCall)
|
|
88
|
+
Types.Instance(ToolCall) |
|
|
89
|
+
Types.Instance(ServerToolCall) |
|
|
90
|
+
Types.Instance(ToolResult) |
|
|
91
|
+
Types.Instance(ServerToolResult)
|
|
66
92
|
|
|
67
93
|
attribute? :id, Types::String.optional
|
|
68
94
|
attribute? :model, Types::String.optional
|
|
@@ -78,7 +104,7 @@ class PartialAssistantMessage < BaseStruct
|
|
|
78
104
|
end
|
|
79
105
|
|
|
80
106
|
def self.build_content_block(block)
|
|
81
|
-
return block if block.is_a?(TextContent) || block.is_a?(ReasoningContent) || block.is_a?(ToolCall)
|
|
107
|
+
return block if block.is_a?(TextContent) || block.is_a?(ReasoningContent) || block.is_a?(ToolCall) || block.is_a?(ServerToolCall) || block.is_a?(ToolResult) || block.is_a?(ServerToolResult)
|
|
82
108
|
|
|
83
109
|
case block[:type] || block["type"]
|
|
84
110
|
when "text"
|
|
@@ -86,14 +112,16 @@ class PartialAssistantMessage < BaseStruct
|
|
|
86
112
|
when "reasoning"
|
|
87
113
|
ReasoningContent.new(block)
|
|
88
114
|
when "thinking"
|
|
89
|
-
ReasoningContent.new(
|
|
90
|
-
type: "reasoning",
|
|
91
|
-
reasoning: block[:thinking] || block["thinking"] || block[:reasoning] || block["reasoning"],
|
|
92
|
-
signature: block[:signature] || block["signature"]
|
|
93
|
-
)
|
|
115
|
+
ReasoningContent.new(type: "reasoning", reasoning: block[:thinking] || block["thinking"] || block[:reasoning] || block["reasoning"], signature: block[:signature] || block["signature"])
|
|
94
116
|
when "tool_use"
|
|
95
117
|
ToolCall.new(block)
|
|
118
|
+
when "server_tool_use"
|
|
119
|
+
ServerToolCall.new(block)
|
|
96
120
|
else
|
|
121
|
+
type = block[:type] || block["type"]
|
|
122
|
+
return ServerToolResult.new(block) if type == "server_tool_result"
|
|
123
|
+
return ToolResult.new(block) if type&.end_with?("_tool_result")
|
|
124
|
+
|
|
97
125
|
raise ArgumentError, "Unsupported content block type: #{block[:type] || block['type']}"
|
|
98
126
|
end
|
|
99
127
|
end
|
|
@@ -102,7 +130,7 @@ class PartialAssistantMessage < BaseStruct
|
|
|
102
130
|
end
|
|
103
131
|
|
|
104
132
|
class AssistantStreamEvent < BaseStruct
|
|
105
|
-
EventType = Types::Coercible::Symbol.enum(:text_start, :text_delta, :text_end, :tool_start, :tool_delta, :tool_end, :reasoning_start, :reasoning_delta, :reasoning_end)
|
|
133
|
+
EventType = Types::Coercible::Symbol.enum(:text_start, :text_delta, :text_end, :tool_start, :tool_delta, :tool_end, :tool_result_start, :tool_result_delta, :tool_result_end, :reasoning_start, :reasoning_delta, :reasoning_end)
|
|
106
134
|
|
|
107
135
|
attribute :type, EventType
|
|
108
136
|
attribute :delta, Types::Coercible::String.default { "" }
|
|
@@ -144,6 +172,13 @@ end
|
|
|
144
172
|
class AssistantToolStartEvent < AssistantStreamEvent
|
|
145
173
|
attribute :id, Types::String
|
|
146
174
|
attribute :name, Types::String
|
|
175
|
+
attribute :tool_type, Types::String.default("tool_use".freeze).enum("tool_use", "server_tool_use")
|
|
176
|
+
attribute :content_index, Types::Integer
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
class AssistantToolResultStartEvent < AssistantStreamEvent
|
|
180
|
+
attribute :tool_use_id, Types::String
|
|
181
|
+
attribute :name, Types::String
|
|
147
182
|
attribute :content_index, Types::Integer
|
|
148
183
|
end
|
|
149
184
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../adapters/structs"
|
|
4
|
+
|
|
5
|
+
module LlmGateway
|
|
6
|
+
module Agents
|
|
7
|
+
module Event
|
|
8
|
+
AgentEventType = Types::Coercible::Symbol.enum(
|
|
9
|
+
:agent_start,
|
|
10
|
+
:turn_start,
|
|
11
|
+
:message_start,
|
|
12
|
+
:message_update,
|
|
13
|
+
:message_end,
|
|
14
|
+
:tool_execution_start,
|
|
15
|
+
:tool_execution_end,
|
|
16
|
+
:turn_end,
|
|
17
|
+
:agent_end
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
StreamEvent =
|
|
21
|
+
Types.Instance(AssistantStreamEvent) |
|
|
22
|
+
Types.Instance(AssistantStreamMessageEvent) |
|
|
23
|
+
Types.Instance(AssistantStreamMessageEndEvent)
|
|
24
|
+
|
|
25
|
+
ToolParameters = Types::Hash.schema(
|
|
26
|
+
id: Types::String,
|
|
27
|
+
type: Types::String.enum("tool_use"),
|
|
28
|
+
name: Types::String,
|
|
29
|
+
input: Types::Hash
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
class ToolCallResult < ::BaseStruct
|
|
33
|
+
attribute :type, Types::Coercible::Symbol.enum(:tool_result)
|
|
34
|
+
attribute :tool_use_id, Types::String
|
|
35
|
+
attribute :content, Types::Any
|
|
36
|
+
|
|
37
|
+
def to_h
|
|
38
|
+
{
|
|
39
|
+
type: type.to_s,
|
|
40
|
+
tool_use_id: tool_use_id,
|
|
41
|
+
content: content
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def dig(*keys)
|
|
46
|
+
to_h.dig(*keys)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class Base < ::BaseStruct
|
|
51
|
+
attribute :type, AgentEventType
|
|
52
|
+
|
|
53
|
+
def to_h
|
|
54
|
+
{
|
|
55
|
+
type: type
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
class AgentStart < Base
|
|
61
|
+
attribute :type, Types::Coercible::Symbol.default(:agent_start).enum(:agent_start)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
class TurnStart < Base
|
|
65
|
+
attribute :type, Types::Coercible::Symbol.default(:turn_start).enum(:turn_start)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class MessageStart < Base
|
|
69
|
+
attribute :type, Types::Coercible::Symbol.default(:message_start).enum(:message_start)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
class MessageUpdate < Base
|
|
73
|
+
attribute :type, Types::Coercible::Symbol.default(:message_update).enum(:message_update)
|
|
74
|
+
attribute :stream_event, StreamEvent
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
class MessageEnd < Base
|
|
78
|
+
attribute :type, Types::Coercible::Symbol.default(:message_end).enum(:message_end)
|
|
79
|
+
attribute :message, Types.Instance(AssistantMessage)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
class ToolExecutionStart < Base
|
|
83
|
+
attribute :type, Types::Coercible::Symbol.default(:tool_execution_start).enum(:tool_execution_start)
|
|
84
|
+
attribute :parameters, ToolParameters
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
class ToolExecutionEnd < Base
|
|
88
|
+
attribute :type, Types::Coercible::Symbol.default(:tool_execution_end).enum(:tool_execution_end)
|
|
89
|
+
attribute :parameters, ToolParameters
|
|
90
|
+
attribute :result, ToolCallResult
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
class TurnEnd < Base
|
|
94
|
+
attribute :type, Types::Coercible::Symbol.default(:turn_end).enum(:turn_end)
|
|
95
|
+
attribute :message, Types.Instance(AssistantMessage)
|
|
96
|
+
attribute :tool_results, Types::Array.of(Types.Instance(::ToolResult))
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
class AgentEnd < Base
|
|
100
|
+
attribute :type, Types::Coercible::Symbol.default(:agent_end).enum(:agent_end)
|
|
101
|
+
attribute :messages, Types::Array.of(Types::Hash)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "json"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
require "time"
|
|
7
|
+
require_relative "in_memory_session_manager"
|
|
8
|
+
|
|
9
|
+
module LlmGateway
|
|
10
|
+
module Agents
|
|
11
|
+
class FileSessionManager < InMemorySessionManager
|
|
12
|
+
attr_reader :file_name, :session_path
|
|
13
|
+
|
|
14
|
+
def initialize(file_name = nil, session_id: nil, session_start: nil, session_dir: nil)
|
|
15
|
+
super(session_id)
|
|
16
|
+
@file_name = file_name
|
|
17
|
+
@preset_session_start = session_start
|
|
18
|
+
@session_dir = session_dir
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def session_id
|
|
22
|
+
events
|
|
23
|
+
@session_id
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def session_start
|
|
27
|
+
events
|
|
28
|
+
@session_start
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def normalize_path(file_name)
|
|
32
|
+
File.expand_path(file_name)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def events
|
|
36
|
+
@events ||= begin
|
|
37
|
+
@session_path = normalize_path(file_name) if file_name
|
|
38
|
+
if @session_path && File.exist?(@session_path)
|
|
39
|
+
load_session(@session_path)
|
|
40
|
+
else
|
|
41
|
+
create_new_session
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def create_new_session
|
|
49
|
+
@session_id ||= SecureRandom.uuid
|
|
50
|
+
@session_start = @preset_session_start || Time.now.strftime("%Y%m%d_%H%M%S")
|
|
51
|
+
|
|
52
|
+
session_event = {
|
|
53
|
+
type: "session",
|
|
54
|
+
id: @session_id,
|
|
55
|
+
timestamp: @session_start
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
@session_path ||= File.join(session_dir, "#{@session_start}_#{@session_id}.jsonl")
|
|
59
|
+
FileUtils.mkdir_p(File.dirname(@session_path))
|
|
60
|
+
File.open(@session_path, "a") do |file|
|
|
61
|
+
file.puts(JSON.generate(session_event))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
[ session_event ]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def load_session(path)
|
|
68
|
+
loaded_events = []
|
|
69
|
+
File.foreach(path).with_index(1) do |line, line_number|
|
|
70
|
+
next if line.strip.empty?
|
|
71
|
+
|
|
72
|
+
loaded_events << JSON.parse(line, symbolize_names: true)
|
|
73
|
+
rescue JSON::ParserError => e
|
|
74
|
+
raise ArgumentError, "Invalid JSONL in #{path} at line #{line_number}: #{e.message}"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
session_event = loaded_events.find { |event| event[:type] == "session" }
|
|
78
|
+
@session_id = session_event[:id] if session_event&.dig(:id)
|
|
79
|
+
@session_start = session_event[:timestamp] if session_event&.dig(:timestamp)
|
|
80
|
+
|
|
81
|
+
loaded_events
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def persist_entry(entry)
|
|
85
|
+
attributes = super
|
|
86
|
+
|
|
87
|
+
FileUtils.mkdir_p(File.dirname(session_path))
|
|
88
|
+
File.open(session_path, "a") do |file|
|
|
89
|
+
file.puts(JSON.generate(entry))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
attributes
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def session_dir
|
|
96
|
+
File.expand_path(@session_dir || ENV.fetch("LLM_GATEWAY_SESSION_DIR", "~/.llm_gateway/sessions"))
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "event"
|
|
4
|
+
require_relative "../utils"
|
|
5
|
+
|
|
6
|
+
module LlmGateway
|
|
7
|
+
module Agents
|
|
8
|
+
class Harness < LlmGateway::Prompt
|
|
9
|
+
COMPACTION_TOKEN_THRESHOLD = 180_000
|
|
10
|
+
COMPACTION_IDLE_THRESHOLD_SECONDS = 60 * 60
|
|
11
|
+
attr_accessor :provider
|
|
12
|
+
attr_reader :session_manager, :default_queue_mode, :queue_drain_mode,
|
|
13
|
+
:model, :reasoning
|
|
14
|
+
|
|
15
|
+
def initialize(session_manager, provider:, model: nil, reasoning: "high")
|
|
16
|
+
@provider = provider
|
|
17
|
+
super(provider: provider, model: model, reasoning: reasoning)
|
|
18
|
+
@session_manager = session_manager
|
|
19
|
+
sync_initial_configuration_events
|
|
20
|
+
self.default_queue_mode = :next_turn
|
|
21
|
+
self.queue_drain_mode = :all
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def transcript
|
|
25
|
+
session_manager.build_model_input_messages
|
|
26
|
+
end
|
|
27
|
+
alias :prompt :transcript
|
|
28
|
+
|
|
29
|
+
def prompt_message(message, &block)
|
|
30
|
+
enqueue_or_run_message(message, default_queue_mode, &block)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def steer_message(message, &block)
|
|
34
|
+
enqueue_or_run_message(message, :steer, &block)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def follow_up_message(message, &block)
|
|
38
|
+
enqueue_or_run_message(message, :follow_up, &block)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def next_turn_message(message, &block)
|
|
42
|
+
enqueue_or_run_message(message, :next_turn, &block)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def default_queue_mode=(mode)
|
|
46
|
+
@default_queue_mode = session_manager.validate_queue!(mode)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def queue_drain_mode=(mode)
|
|
50
|
+
@queue_drain_mode = session_manager.validate_drain_mode!(mode)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def model=(model_id)
|
|
54
|
+
return @model if @model == model_id
|
|
55
|
+
|
|
56
|
+
@model = model_id
|
|
57
|
+
publish_session_event(type: "model_change", model_id: model_id)
|
|
58
|
+
@model
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def reasoning=(level)
|
|
62
|
+
return @reasoning if @reasoning == level
|
|
63
|
+
|
|
64
|
+
@reasoning = level
|
|
65
|
+
publish_session_event(type: "reasoning_change", reasoning: level)
|
|
66
|
+
@reasoning
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def compact
|
|
70
|
+
session_manager.compaction(provider)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def run(&block)
|
|
74
|
+
emit(Event::AgentStart.new, &block)
|
|
75
|
+
drain_queue(:steer)
|
|
76
|
+
emit(Event::TurnStart.new, &block)
|
|
77
|
+
emit(Event::MessageStart.new, &block)
|
|
78
|
+
|
|
79
|
+
assistant_message = stream do |event|
|
|
80
|
+
emit(Event::MessageUpdate.new(stream_event: event), &block)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
session_manager.push_message(assistant_message.to_h)
|
|
84
|
+
emit(Event::MessageEnd.new(message: assistant_message), &block)
|
|
85
|
+
|
|
86
|
+
tool_results = tool_requests(assistant_message).map do |message|
|
|
87
|
+
parameters = message.to_h
|
|
88
|
+
emit(Event::ToolExecutionStart.new(parameters: parameters), &block)
|
|
89
|
+
tool_result = find_and_execute_tool(message)
|
|
90
|
+
emit(Event::ToolExecutionEnd.new(parameters: parameters, result: tool_result), &block)
|
|
91
|
+
tool_result
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
tool_result_content = tool_results.map(&:to_h)
|
|
95
|
+
session_manager.push_message(
|
|
96
|
+
role: "user",
|
|
97
|
+
content: tool_result_content,
|
|
98
|
+
) unless tool_result_content.empty?
|
|
99
|
+
|
|
100
|
+
turn_end_event = Event::TurnEnd.new(message: assistant_message, tool_results: tool_results)
|
|
101
|
+
emit(turn_end_event, &block)
|
|
102
|
+
|
|
103
|
+
if tool_results.length.positive?
|
|
104
|
+
return run(&block)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if session_manager.queued_messages?(:follow_up)
|
|
108
|
+
compact_if_needed
|
|
109
|
+
return run(&block) if drain_queue(:follow_up).any?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
emit(Event::AgentEnd.new(messages: []), &block)
|
|
113
|
+
assistant_message
|
|
114
|
+
end
|
|
115
|
+
alias :continue :run
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
def publish_session_event(type:, **attributes)
|
|
120
|
+
session_manager.push_entry(type: type, **attributes)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def sync_initial_configuration_events
|
|
124
|
+
publish_session_event(type: "model_change", model_id: model) if model && !session_manager.last_model_used
|
|
125
|
+
if reasoning && !session_manager.last_reasoning_level_used
|
|
126
|
+
publish_session_event(type: "reasoning_change", reasoning: reasoning)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def enqueue_or_run_message(message, queue, &block)
|
|
131
|
+
prepared_input = LlmGateway::Utils.deep_symbolize_keys(message)
|
|
132
|
+
result = session_manager.start_or_enqueue_user_message(prepared_input, queue: queue) do
|
|
133
|
+
compact_if_needed
|
|
134
|
+
end
|
|
135
|
+
return if result == session_manager.class::MESSAGE_QUEUED
|
|
136
|
+
|
|
137
|
+
begin
|
|
138
|
+
continue(&block)
|
|
139
|
+
|
|
140
|
+
loop do
|
|
141
|
+
break unless session_manager.queued_messages?(:next_turn)
|
|
142
|
+
|
|
143
|
+
compact_if_needed
|
|
144
|
+
drain_queue(:next_turn)
|
|
145
|
+
continue(&block)
|
|
146
|
+
end
|
|
147
|
+
ensure
|
|
148
|
+
session_manager.idle!
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def compact_if_needed
|
|
153
|
+
compact if compaction_needed?
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def compaction_needed?
|
|
157
|
+
session_manager.total_tokens > COMPACTION_TOKEN_THRESHOLD || last_assistant_message_stale?
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def last_assistant_message_stale?
|
|
161
|
+
last_assistant_message_at = session_manager.last_assistant_message_at
|
|
162
|
+
last_assistant_message_at && Time.now - last_assistant_message_at > COMPACTION_IDLE_THRESHOLD_SECONDS
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def drain_queue(queue)
|
|
166
|
+
session_manager.drain_message_queue(queue, mode: queue_drain_mode)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def emit(event, &block)
|
|
170
|
+
return unless block
|
|
171
|
+
|
|
172
|
+
block.call(event)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|