mistri 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +177 -0
- data/NOTICE +9 -0
- data/README.md +314 -3
- data/lib/generators/mistri/install/install_generator.rb +54 -0
- data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
- data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
- data/lib/mistri/abort_signal.rb +63 -0
- data/lib/mistri/agent.rb +340 -0
- data/lib/mistri/budget.rb +29 -0
- data/lib/mistri/compaction.rb +78 -0
- data/lib/mistri/compactor.rb +182 -0
- data/lib/mistri/content.rb +89 -0
- data/lib/mistri/edit.rb +238 -0
- data/lib/mistri/errors.rb +94 -0
- data/lib/mistri/event.rb +50 -0
- data/lib/mistri/memory.rb +26 -0
- data/lib/mistri/message.rb +90 -0
- data/lib/mistri/models.rb +43 -0
- data/lib/mistri/partial_json.rb +210 -0
- data/lib/mistri/providers/anthropic/assembler.rb +205 -0
- data/lib/mistri/providers/anthropic/serializer.rb +106 -0
- data/lib/mistri/providers/anthropic.rb +106 -0
- data/lib/mistri/providers/fake.rb +109 -0
- data/lib/mistri/providers/gemini/assembler.rb +163 -0
- data/lib/mistri/providers/gemini/serializer.rb +109 -0
- data/lib/mistri/providers/gemini.rb +73 -0
- data/lib/mistri/providers/openai/assembler.rb +205 -0
- data/lib/mistri/providers/openai/serializer.rb +104 -0
- data/lib/mistri/providers/openai.rb +72 -0
- data/lib/mistri/result.rb +30 -0
- data/lib/mistri/retry_policy.rb +47 -0
- data/lib/mistri/schema.rb +162 -0
- data/lib/mistri/session.rb +124 -0
- data/lib/mistri/sinks/action_cable.rb +30 -0
- data/lib/mistri/sinks/coalesced.rb +61 -0
- data/lib/mistri/sinks/sse.rb +26 -0
- data/lib/mistri/skill.rb +15 -0
- data/lib/mistri/skills.rb +81 -0
- data/lib/mistri/sse.rb +50 -0
- data/lib/mistri/stop_reason.rb +25 -0
- data/lib/mistri/stores/active_record.rb +47 -0
- data/lib/mistri/stores/jsonl.rb +37 -0
- data/lib/mistri/stores/memory.rb +22 -0
- data/lib/mistri/sub_agent.rb +211 -0
- data/lib/mistri/tool.rb +94 -0
- data/lib/mistri/tool_call.rb +18 -0
- data/lib/mistri/tool_context.rb +15 -0
- data/lib/mistri/tool_executor.rb +66 -0
- data/lib/mistri/tool_result.rb +23 -0
- data/lib/mistri/tools/edit_file.rb +37 -0
- data/lib/mistri/tools/find_in_file.rb +36 -0
- data/lib/mistri/tools/list_files.rb +16 -0
- data/lib/mistri/tools/read_file.rb +38 -0
- data/lib/mistri/tools/read_memory.rb +16 -0
- data/lib/mistri/tools/update_memory.rb +22 -0
- data/lib/mistri/tools/write_file.rb +20 -0
- data/lib/mistri/tools.rb +50 -0
- data/lib/mistri/transport.rb +187 -0
- data/lib/mistri/usage.rb +79 -0
- data/lib/mistri/version.rb +3 -1
- data/lib/mistri/workspace/active_record.rb +47 -0
- data/lib/mistri/workspace/directory.rb +52 -0
- data/lib/mistri/workspace/memory.rb +40 -0
- data/lib/mistri/workspace/single.rb +48 -0
- data/lib/mistri.rb +91 -2
- metadata +73 -7
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
module Providers
|
|
7
|
+
# A scriptable provider: it streams each scripted turn as a well-formed
|
|
8
|
+
# event sequence and returns the assembled assistant message, so hosts test
|
|
9
|
+
# agent behavior hermetically while exercising real streaming semantics.
|
|
10
|
+
#
|
|
11
|
+
# provider = Mistri::Providers::Fake.new(turns: [
|
|
12
|
+
# { text: "Hello!" },
|
|
13
|
+
# { tool_calls: [{ name: "search", arguments: { "q" => "ruby" } }] },
|
|
14
|
+
# ])
|
|
15
|
+
#
|
|
16
|
+
# A turn may combine :thinking, :text, and :tool_calls, or carry :error to
|
|
17
|
+
# stream a failed turn. :stop_reason overrides the inferred reason.
|
|
18
|
+
class Fake
|
|
19
|
+
MODEL = "fake-1"
|
|
20
|
+
|
|
21
|
+
# Every #stream call is recorded here, so a test can assert what the
|
|
22
|
+
# agent actually sent.
|
|
23
|
+
attr_reader :requests
|
|
24
|
+
|
|
25
|
+
def model = MODEL
|
|
26
|
+
|
|
27
|
+
def initialize(turns: [], chunk_size: 12)
|
|
28
|
+
@turns = turns.map { |turn| turn.transform_keys(&:to_sym) }
|
|
29
|
+
@chunk_size = [chunk_size, 1].max
|
|
30
|
+
@requests = []
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def stream(messages: [], **options, &emit)
|
|
34
|
+
# Snapshot the array: the loop appends replies to it in place.
|
|
35
|
+
@requests << { messages: messages.dup, options: }
|
|
36
|
+
turn = @turns.shift
|
|
37
|
+
raise ConfigurationError, "fake provider has no scripted turns left" unless turn
|
|
38
|
+
|
|
39
|
+
blocks = []
|
|
40
|
+
emit_event(emit, :start, blocks)
|
|
41
|
+
return finish_error(turn, blocks, emit) if turn[:error]
|
|
42
|
+
|
|
43
|
+
stream_block(:thinking, turn[:thinking], blocks, emit) if turn[:thinking]
|
|
44
|
+
stream_block(:text, turn[:text], blocks, emit) if turn[:text]
|
|
45
|
+
Array(turn[:tool_calls]).each_with_index do |call, position|
|
|
46
|
+
stream_tool_call(call, position, blocks, emit)
|
|
47
|
+
end
|
|
48
|
+
finish(turn, blocks, emit)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def stream_block(kind, full_text, blocks, emit)
|
|
54
|
+
index = blocks.size
|
|
55
|
+
emit_event(emit, :"#{kind}_start", blocks, content_index: index)
|
|
56
|
+
built = +""
|
|
57
|
+
full_text.scan(/.{1,#{@chunk_size}}/m) do |chunk|
|
|
58
|
+
built << chunk
|
|
59
|
+
emit_event(emit, :"#{kind}_delta", blocks + [build_block(kind, built)],
|
|
60
|
+
content_index: index, delta: chunk)
|
|
61
|
+
end
|
|
62
|
+
blocks << build_block(kind, full_text)
|
|
63
|
+
emit_event(emit, :"#{kind}_end", blocks, content_index: index, content: full_text)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_block(kind, text)
|
|
67
|
+
kind == :text ? Content::Text.new(text:) : Content::Thinking.new(thinking: text)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def stream_tool_call(spec, position, blocks, emit)
|
|
71
|
+
spec = spec.transform_keys(&:to_sym)
|
|
72
|
+
call = ToolCall.new(id: spec[:id] || "call_#{position + 1}", name: spec[:name],
|
|
73
|
+
arguments: (spec[:arguments] || {}).transform_keys(&:to_s))
|
|
74
|
+
index = blocks.size
|
|
75
|
+
emit_event(emit, :toolcall_start, blocks, content_index: index)
|
|
76
|
+
emit_event(emit, :toolcall_delta, blocks, content_index: index,
|
|
77
|
+
delta: JSON.generate(call.arguments))
|
|
78
|
+
blocks << call
|
|
79
|
+
emit_event(emit, :toolcall_end, blocks, content_index: index, tool_call: call)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def finish(turn, blocks, emit)
|
|
83
|
+
reason = turn[:stop_reason] ||
|
|
84
|
+
(blocks.any?(ToolCall) ? StopReason::TOOL_USE : StopReason::STOP)
|
|
85
|
+
message = assemble(blocks, usage: turn[:usage] || Usage.zero, stop_reason: reason)
|
|
86
|
+
emit&.call(Event.new(type: :done, reason:, message:))
|
|
87
|
+
message
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def finish_error(turn, blocks, emit)
|
|
91
|
+
error = { "type" => turn.fetch(:error_type, "Error") }
|
|
92
|
+
error["status"] = turn[:status] if turn[:status]
|
|
93
|
+
message = assemble(blocks, usage: Usage.zero, stop_reason: StopReason::ERROR,
|
|
94
|
+
error_message: turn[:error], error: error)
|
|
95
|
+
emit&.call(Event.new(type: :error, reason: StopReason::ERROR, message:,
|
|
96
|
+
error_message: turn[:error]))
|
|
97
|
+
message
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def assemble(blocks, **meta)
|
|
101
|
+
Message.assistant(content: blocks, model: MODEL, provider: :fake, **meta)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def emit_event(emit, type, blocks, **fields)
|
|
105
|
+
emit&.call(Event.new(type:, partial: assemble(blocks), **fields))
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
module Providers
|
|
7
|
+
class Gemini
|
|
8
|
+
# Folds streamGenerateContent records into the event union. Each record
|
|
9
|
+
# carries delta parts: plain text extends a text block, thought parts a
|
|
10
|
+
# thinking block, and a functionCall arrives whole, so its three events
|
|
11
|
+
# emit back to back. A kind switch closes the open block.
|
|
12
|
+
#
|
|
13
|
+
# Thought signatures ride on individual parts and are captured onto the
|
|
14
|
+
# block they arrived with, verbatim, for replay.
|
|
15
|
+
class Assembler
|
|
16
|
+
def initialize(model:)
|
|
17
|
+
@model = model
|
|
18
|
+
@blocks = []
|
|
19
|
+
@current = nil
|
|
20
|
+
@usage = Usage.zero
|
|
21
|
+
@finish_reason = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def feed(record, &)
|
|
25
|
+
if (error = record["error"])
|
|
26
|
+
@error = ProviderError.new(error["message"] || "provider error",
|
|
27
|
+
status: error["code"])
|
|
28
|
+
return
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
candidate = record.dig("candidates", 0) || {}
|
|
32
|
+
Array(candidate.dig("content", "parts")).each { |part| fold_part(part, &) }
|
|
33
|
+
@finish_reason = candidate["finishReason"] if candidate["finishReason"]
|
|
34
|
+
@usage = parse_usage(record["usageMetadata"]) if record["usageMetadata"]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# A stream that ended without a finishReason was truncated, not
|
|
38
|
+
# cancelled: fail it so the loop can treat it as retryable.
|
|
39
|
+
def finish(&emit)
|
|
40
|
+
return fail_stream(@error, &emit) if @error
|
|
41
|
+
return fail_stream("stream ended without a finish reason", &emit) unless @finish_reason
|
|
42
|
+
|
|
43
|
+
close_current(&emit)
|
|
44
|
+
@message = assemble(stop_reason: stop_reason)
|
|
45
|
+
emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
|
|
46
|
+
@message
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def abort(&)
|
|
50
|
+
close_current
|
|
51
|
+
terminal(StopReason::ABORTED, "aborted", &)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def fail_stream(reason, &)
|
|
55
|
+
close_current
|
|
56
|
+
text = case reason
|
|
57
|
+
when ProviderError then "#{reason.class}: #{reason.describe}"
|
|
58
|
+
when Exception then "#{reason.class}: #{reason.message}"
|
|
59
|
+
else reason.to_s
|
|
60
|
+
end
|
|
61
|
+
terminal(StopReason::ERROR, text, error: ErrorData.for(reason), &)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def message = @message ||= finish
|
|
65
|
+
|
|
66
|
+
Builder = Struct.new(:kind, :index, :text, :signature)
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def fold_part(part, &)
|
|
71
|
+
if part.key?("functionCall")
|
|
72
|
+
fold_function_call(part, &)
|
|
73
|
+
elsif part.key?("text")
|
|
74
|
+
fold_text(part, part["thought"] ? :thinking : :text, &)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def fold_text(part, kind, &)
|
|
79
|
+
close_current(&) if @current && @current.kind != kind
|
|
80
|
+
unless @current
|
|
81
|
+
@current = Builder.new(kind, @blocks.size, +"", nil)
|
|
82
|
+
emit_event(:"#{kind}_start", content_index: @current.index, &)
|
|
83
|
+
end
|
|
84
|
+
@current.text << part["text"].to_s
|
|
85
|
+
@current.signature = part["thoughtSignature"] if part["thoughtSignature"]
|
|
86
|
+
delta_type = kind == :text ? :text_delta : :thinking_delta
|
|
87
|
+
emit_event(delta_type, content_index: @current.index, delta: part["text"], &)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# A function call arrives complete in one part: start, one delta with
|
|
91
|
+
# the full arguments, end.
|
|
92
|
+
def fold_function_call(part, &)
|
|
93
|
+
close_current(&)
|
|
94
|
+
call_spec = part["functionCall"] || {}
|
|
95
|
+
arguments = call_spec["args"].is_a?(Hash) ? call_spec["args"] : {}
|
|
96
|
+
call = ToolCall.new(id: call_spec["id"] || "call_#{@blocks.size + 1}",
|
|
97
|
+
name: call_spec["name"], arguments: arguments,
|
|
98
|
+
signature: part["thoughtSignature"])
|
|
99
|
+
index = @blocks.size
|
|
100
|
+
emit_event(:toolcall_start, content_index: index, &)
|
|
101
|
+
emit_event(:toolcall_delta, content_index: index,
|
|
102
|
+
delta: JSON.generate(arguments), &)
|
|
103
|
+
@blocks << call
|
|
104
|
+
emit_event(:toolcall_end, content_index: index, tool_call: call, &)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def close_current(&)
|
|
108
|
+
return unless @current
|
|
109
|
+
|
|
110
|
+
block = build_current
|
|
111
|
+
@blocks << block
|
|
112
|
+
kind = @current.kind
|
|
113
|
+
index = @current.index
|
|
114
|
+
@current = nil
|
|
115
|
+
emit_event(:"#{kind}_end", content_index: index,
|
|
116
|
+
content: kind == :text ? block.text : block.thinking, &)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def build_current
|
|
120
|
+
if @current.kind == :text
|
|
121
|
+
Content::Text.new(text: @current.text, signature: @current.signature)
|
|
122
|
+
else
|
|
123
|
+
Content::Thinking.new(thinking: @current.text, signature: @current.signature)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def stop_reason
|
|
128
|
+
return StopReason::TOOL_USE if @blocks.any?(ToolCall)
|
|
129
|
+
return StopReason::LENGTH if @finish_reason == "MAX_TOKENS"
|
|
130
|
+
|
|
131
|
+
StopReason::STOP
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def terminal(reason, text, error: nil, &emit)
|
|
135
|
+
@message = assemble(stop_reason: reason, error_message: text, error: error)
|
|
136
|
+
emit&.call(Event.new(type: :error, reason: reason, message: @message,
|
|
137
|
+
error_message: text))
|
|
138
|
+
@message
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def emit_event(type, **fields, &emit)
|
|
142
|
+
emit&.call(Event.new(type:, partial: assemble, **fields))
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def assemble(**meta)
|
|
146
|
+
blocks = @blocks.dup
|
|
147
|
+
blocks << build_current if @current
|
|
148
|
+
Message.assistant(content: blocks, model: @model, provider: :gemini,
|
|
149
|
+
usage: @usage, **meta)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def parse_usage(raw)
|
|
153
|
+
prompt = raw["promptTokenCount"].to_i
|
|
154
|
+
cache_read = raw["cachedContentTokenCount"].to_i
|
|
155
|
+
reasoning = raw["thoughtsTokenCount"].to_i
|
|
156
|
+
Usage.new(input: [prompt - cache_read, 0].max,
|
|
157
|
+
output: raw["candidatesTokenCount"].to_i + reasoning,
|
|
158
|
+
cache_read: cache_read, reasoning: reasoning)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
class Gemini
|
|
6
|
+
# Serializes protocol messages into generateContent contents.
|
|
7
|
+
#
|
|
8
|
+
# Wire rules that matter: roles are user and model, consecutive tool
|
|
9
|
+
# results merge into one user turn of functionResponse parts, and
|
|
10
|
+
# thought signatures echo back verbatim on the exact part they arrived
|
|
11
|
+
# with, but only for messages this provider produced; a foreign
|
|
12
|
+
# signature would be rejected. Thinking summaries are output-only and
|
|
13
|
+
# never replay.
|
|
14
|
+
module Serializer
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def contents(history)
|
|
18
|
+
groups = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
|
|
19
|
+
turns = groups.filter_map do |group|
|
|
20
|
+
group.first.tool? ? tool_turn(group) : turn(group.first)
|
|
21
|
+
end
|
|
22
|
+
merge_user_runs(turns)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# A steered run puts a user message right behind tool results, and
|
|
26
|
+
# both serialize as user turns. Gemini expects turns to alternate, so
|
|
27
|
+
# consecutive user turns merge into one.
|
|
28
|
+
def merge_user_runs(turns)
|
|
29
|
+
turns.chunk_while { |a, b| a[:role] == "user" && b[:role] == "user" }
|
|
30
|
+
.map do |run|
|
|
31
|
+
run.length == 1 ? run.first : { role: "user", parts: run.flat_map { |t| t[:parts] } }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def system_instruction(system)
|
|
36
|
+
return nil if system.nil? || system.empty?
|
|
37
|
+
|
|
38
|
+
{ parts: [{ text: system }] }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def tools(definitions)
|
|
42
|
+
declarations = definitions.map do |tool|
|
|
43
|
+
spec = tool.transform_keys(&:to_sym)
|
|
44
|
+
{ name: spec[:name], description: spec[:description],
|
|
45
|
+
parameters: spec[:input_schema] }
|
|
46
|
+
end
|
|
47
|
+
[{ functionDeclarations: declarations }]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def turn(msg)
|
|
51
|
+
parts = msg.assistant? ? assistant_parts(msg) : user_parts(msg)
|
|
52
|
+
return nil if parts.empty?
|
|
53
|
+
|
|
54
|
+
{ role: msg.assistant? ? "model" : "user", parts: parts }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Gemini pairs a functionResponse to its call by NAME; a wrong name
|
|
58
|
+
# silently mismatches, so a missing one fails loudly instead.
|
|
59
|
+
def tool_turn(group)
|
|
60
|
+
{ role: "user", parts: group.map do |msg|
|
|
61
|
+
unless msg.tool_name
|
|
62
|
+
raise SchemaError, "Gemini tool results need tool_name to pair with their call"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
{ functionResponse: { name: msg.tool_name,
|
|
66
|
+
response: { "result" => result_text(msg) } } }
|
|
67
|
+
end }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Non-text blocks in a tool result have no functionResponse encoding;
|
|
71
|
+
# note the omission rather than dropping it silently.
|
|
72
|
+
def result_text(msg)
|
|
73
|
+
omitted = msg.content.count { |block| !block.is_a?(Content::Text) }
|
|
74
|
+
text = msg.text.to_s
|
|
75
|
+
omitted.positive? ? "#{text}\n[#{omitted} non-text block(s) omitted]".strip : text
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def user_parts(msg)
|
|
79
|
+
msg.content.map do |block|
|
|
80
|
+
case block
|
|
81
|
+
when Content::Text then { text: block.text }
|
|
82
|
+
when Content::Image
|
|
83
|
+
{ inlineData: { mimeType: block.mime_type, data: block.data } }
|
|
84
|
+
else
|
|
85
|
+
raise SchemaError, "cannot serialize #{block.class} for Gemini user input"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def assistant_parts(msg)
|
|
91
|
+
own = msg.provider == :gemini
|
|
92
|
+
msg.content.filter_map do |block|
|
|
93
|
+
case block
|
|
94
|
+
when Content::Text then signed({ text: block.text }, block.signature, own)
|
|
95
|
+
when ToolCall
|
|
96
|
+
signed({ functionCall: { name: block.name, args: block.arguments } },
|
|
97
|
+
block.signature, own)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def signed(part, signature, own)
|
|
103
|
+
part[:thoughtSignature] = signature if own && signature
|
|
104
|
+
part
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
# The Gemini API (v1beta generateContent), streamed over SSE and
|
|
6
|
+
# stateless: the full history replays every turn.
|
|
7
|
+
#
|
|
8
|
+
# Thinking is deliberately unconstrained: no budget, no level, only
|
|
9
|
+
# includeThoughts so summaries stream for the UI. The model's own defaults
|
|
10
|
+
# decide how much to think, and a host override passes through verbatim.
|
|
11
|
+
# maxOutputTokens is omitted for the same reason: the API defaults to the
|
|
12
|
+
# model's ceiling.
|
|
13
|
+
class Gemini
|
|
14
|
+
DEFAULT_THINKING = { includeThoughts: true }.freeze
|
|
15
|
+
|
|
16
|
+
def initialize(api_key:, model: "gemini-2.5-flash",
|
|
17
|
+
origin: "https://generativelanguage.googleapis.com",
|
|
18
|
+
thinking: DEFAULT_THINKING, **transport_options)
|
|
19
|
+
@api_key = api_key
|
|
20
|
+
@model = model
|
|
21
|
+
@thinking = thinking
|
|
22
|
+
@transport = Transport.new(origin: origin, **transport_options)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :model
|
|
26
|
+
|
|
27
|
+
def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
|
|
28
|
+
model = overrides.fetch(:model, @model)
|
|
29
|
+
assembler = Gemini::Assembler.new(model: model)
|
|
30
|
+
body = build_body(messages, system, tools, overrides)
|
|
31
|
+
path = "/v1beta/models/#{model}:streamGenerateContent?alt=sse"
|
|
32
|
+
outcome = @transport.stream_post(path, body: body, headers: headers,
|
|
33
|
+
signal: signal) do |record|
|
|
34
|
+
assembler.feed(record,
|
|
35
|
+
&emit)
|
|
36
|
+
end
|
|
37
|
+
outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
|
|
38
|
+
rescue Error => e
|
|
39
|
+
assembler.fail_stream(e, &emit)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def close = @transport.close
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def build_body(messages, system, tools, overrides)
|
|
47
|
+
body = { contents: Serializer.contents(messages) }
|
|
48
|
+
instruction = Serializer.system_instruction(system)
|
|
49
|
+
body[:systemInstruction] = instruction if instruction
|
|
50
|
+
body[:tools] = Serializer.tools(tools) if tools.any?
|
|
51
|
+
config = {}
|
|
52
|
+
thinking = overrides.fetch(:thinking, @thinking)
|
|
53
|
+
config[:thinkingConfig] = thinking if thinking
|
|
54
|
+
# Constrained decoding combines with tools only on 3-series models
|
|
55
|
+
# (preview); with tools present the task loop's validate-and-fix
|
|
56
|
+
# pass carries the guarantee instead.
|
|
57
|
+
if (schema = overrides[:output_schema]) && tools.empty?
|
|
58
|
+
config[:responseMimeType] = "application/json"
|
|
59
|
+
config[:responseJsonSchema] = Schema.strict(schema)
|
|
60
|
+
end
|
|
61
|
+
body[:generationConfig] = config unless config.empty?
|
|
62
|
+
body
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def headers
|
|
66
|
+
{ "x-goog-api-key" => @api_key }
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
require_relative "gemini/serializer"
|
|
73
|
+
require_relative "gemini/assembler"
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
module Providers
|
|
7
|
+
class OpenAI
|
|
8
|
+
# Folds the Responses API stream into the event union. Items arrive
|
|
9
|
+
# sequentially: output_item.added opens a block, typed deltas fill it,
|
|
10
|
+
# output_item.done closes it with the complete item, whose ids and
|
|
11
|
+
# encrypted reasoning land in the signature slots for replay.
|
|
12
|
+
#
|
|
13
|
+
# Unknown event and item types are skipped by contract.
|
|
14
|
+
class Assembler
|
|
15
|
+
def initialize(model:)
|
|
16
|
+
@model = model
|
|
17
|
+
@blocks = []
|
|
18
|
+
@current = nil
|
|
19
|
+
@usage = Usage.zero
|
|
20
|
+
@status = nil
|
|
21
|
+
@incomplete_reason = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def feed(record, &)
|
|
25
|
+
case record["type"]
|
|
26
|
+
when "response.output_item.added" then start_item(record["item"], &)
|
|
27
|
+
when "response.output_text.delta" then text_delta(record["delta"], &)
|
|
28
|
+
when "response.reasoning_summary_text.delta" then thinking_delta(record["delta"], &)
|
|
29
|
+
when "response.function_call_arguments.delta" then arguments_delta(record["delta"], &)
|
|
30
|
+
when "response.output_item.done" then finish_item(record["item"], &)
|
|
31
|
+
when "response.completed", "response.incomplete", "response.failed"
|
|
32
|
+
finish_response(record["response"] || {})
|
|
33
|
+
when "error" then @error = wire_error(record)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# A stream that ended without a terminal response event was truncated,
|
|
38
|
+
# not cancelled: fail it so the loop can treat it as retryable.
|
|
39
|
+
def finish(&emit)
|
|
40
|
+
return fail_stream(@error, &emit) if @error
|
|
41
|
+
return fail_stream("stream ended without a terminal event", &emit) unless @status
|
|
42
|
+
|
|
43
|
+
@message = assemble(stop_reason: stop_reason)
|
|
44
|
+
emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
|
|
45
|
+
@message
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def abort(&)
|
|
49
|
+
terminal(StopReason::ABORTED, "aborted", &)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# In-stream failures carry a code; rate limits and server errors must
|
|
53
|
+
# classify as retryable, not fold into prose.
|
|
54
|
+
def wire_error(record)
|
|
55
|
+
message = record["message"] || "provider error"
|
|
56
|
+
code = record["code"].to_s
|
|
57
|
+
klass = if code.include?("rate_limit") then RateLimitError
|
|
58
|
+
elsif code.include?("server") then ServerError
|
|
59
|
+
else ProviderError
|
|
60
|
+
end
|
|
61
|
+
klass.new(message)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def fail_stream(reason, &)
|
|
65
|
+
text = case reason
|
|
66
|
+
when ProviderError then "#{reason.class}: #{reason.describe}"
|
|
67
|
+
when Exception then "#{reason.class}: #{reason.message}"
|
|
68
|
+
else reason.to_s
|
|
69
|
+
end
|
|
70
|
+
terminal(StopReason::ERROR, text, error: ErrorData.for(reason), &)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def message = @message ||= finish
|
|
74
|
+
|
|
75
|
+
Builder = Struct.new(:kind, :index, :text, :json)
|
|
76
|
+
KINDS = { "message" => :text, "reasoning" => :thinking,
|
|
77
|
+
"function_call" => :toolcall }.freeze
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def start_item(item, &)
|
|
82
|
+
kind = KINDS[item&.fetch("type", nil)]
|
|
83
|
+
return unless kind
|
|
84
|
+
|
|
85
|
+
@current = Builder.new(kind, @blocks.size, +"", +"")
|
|
86
|
+
emit_event(:"#{kind}_start", content_index: @current.index, &)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def text_delta(delta, &)
|
|
90
|
+
return unless @current
|
|
91
|
+
|
|
92
|
+
@current.text << delta.to_s
|
|
93
|
+
emit_event(:text_delta, content_index: @current.index, delta: delta, &)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def thinking_delta(delta, &)
|
|
97
|
+
return unless @current
|
|
98
|
+
|
|
99
|
+
@current.text << delta.to_s
|
|
100
|
+
emit_event(:thinking_delta, content_index: @current.index, delta: delta, &)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def arguments_delta(delta, &)
|
|
104
|
+
return unless @current
|
|
105
|
+
|
|
106
|
+
@current.json << delta.to_s
|
|
107
|
+
emit_event(:toolcall_delta, content_index: @current.index, delta: delta, &)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# The done item is authoritative: its text, arguments, ids, and
|
|
111
|
+
# encrypted content replace whatever the deltas accumulated.
|
|
112
|
+
def finish_item(item, &)
|
|
113
|
+
kind = KINDS[item&.fetch("type", nil)]
|
|
114
|
+
return unless kind
|
|
115
|
+
|
|
116
|
+
index = @current&.index || @blocks.size
|
|
117
|
+
block = build_block(kind, item)
|
|
118
|
+
@blocks << block
|
|
119
|
+
@current = nil
|
|
120
|
+
fields = { content_index: index }
|
|
121
|
+
fields[:tool_call] = block if block.is_a?(ToolCall)
|
|
122
|
+
unless block.is_a?(ToolCall)
|
|
123
|
+
fields[:content] = block.respond_to?(:text) ? block.text : block.thinking
|
|
124
|
+
end
|
|
125
|
+
emit_event(:"#{kind}_end", **fields.compact, &)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def build_block(kind, item)
|
|
129
|
+
case kind
|
|
130
|
+
when :text
|
|
131
|
+
text = Array(item["content"]).filter_map { |part| part["text"] }.join
|
|
132
|
+
Content::Text.new(text: text, signature: item["id"])
|
|
133
|
+
when :thinking
|
|
134
|
+
summary = Array(item["summary"]).filter_map { |part| part["text"] }.join
|
|
135
|
+
Content::Thinking.new(thinking: summary, signature: JSON.generate(item))
|
|
136
|
+
when :toolcall
|
|
137
|
+
ToolCall.new(id: item["call_id"], name: item["name"],
|
|
138
|
+
arguments: parse_arguments(item["arguments"]), signature: item["id"])
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def parse_arguments(raw)
|
|
143
|
+
parsed = raw.to_s.strip.empty? ? {} : JSON.parse(raw)
|
|
144
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
145
|
+
rescue JSON::ParserError
|
|
146
|
+
fallback = PartialJson.parse(raw)
|
|
147
|
+
fallback.is_a?(Hash) ? fallback : {}
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def finish_response(response)
|
|
151
|
+
@status = response["status"] || "completed"
|
|
152
|
+
@incomplete_reason = response.dig("incomplete_details", "reason")
|
|
153
|
+
@error = response.dig("error", "message") if @status == "failed"
|
|
154
|
+
usage = response["usage"]
|
|
155
|
+
@usage = parse_usage(usage) if usage
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def stop_reason
|
|
159
|
+
return StopReason::LENGTH if @incomplete_reason == "max_output_tokens"
|
|
160
|
+
return StopReason::TOOL_USE if @blocks.any?(ToolCall)
|
|
161
|
+
|
|
162
|
+
StopReason::STOP
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def terminal(reason, text, error: nil, &emit)
|
|
166
|
+
@message = assemble(stop_reason: reason, error_message: text, error: error)
|
|
167
|
+
emit&.call(Event.new(type: :error, reason: reason, message: @message,
|
|
168
|
+
error_message: text))
|
|
169
|
+
@message
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def emit_event(type, **fields, &emit)
|
|
173
|
+
emit&.call(Event.new(type:, partial: assemble, **fields))
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def assemble(**meta)
|
|
177
|
+
blocks = @blocks.dup
|
|
178
|
+
blocks << partial_block(@current) if @current
|
|
179
|
+
Message.assistant(content: blocks, model: @model, provider: :openai,
|
|
180
|
+
usage: @usage, **meta)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def partial_block(builder)
|
|
184
|
+
case builder.kind
|
|
185
|
+
when :text then Content::Text.new(text: builder.text)
|
|
186
|
+
when :thinking then Content::Thinking.new(thinking: builder.text)
|
|
187
|
+
when :toolcall
|
|
188
|
+
args = PartialJson.parse(builder.json)
|
|
189
|
+
ToolCall.new(id: "pending", name: "pending",
|
|
190
|
+
arguments: args.is_a?(Hash) ? args : {})
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def parse_usage(raw)
|
|
195
|
+
details = raw["input_tokens_details"] || {}
|
|
196
|
+
output_details = raw["output_tokens_details"] || {}
|
|
197
|
+
cache_read = details["cached_tokens"].to_i
|
|
198
|
+
Usage.new(input: [raw["input_tokens"].to_i - cache_read, 0].max,
|
|
199
|
+
output: raw["output_tokens"].to_i, cache_read: cache_read,
|
|
200
|
+
reasoning: output_details["reasoning_tokens"].to_i)
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|