mistri 0.0.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +215 -0
- data/README.md +367 -3
- data/lib/generators/mistri/install/install_generator.rb +54 -0
- data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
- data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
- data/lib/generators/mistri/mcp/mcp_generator.rb +57 -0
- data/lib/generators/mistri/mcp/templates/migration.rb.tt +27 -0
- data/lib/generators/mistri/mcp/templates/model.rb.tt +63 -0
- data/lib/mistri/abort_signal.rb +63 -0
- data/lib/mistri/agent.rb +389 -0
- data/lib/mistri/budget.rb +29 -0
- data/lib/mistri/compaction.rb +78 -0
- data/lib/mistri/compactor.rb +182 -0
- data/lib/mistri/content.rb +89 -0
- data/lib/mistri/edit.rb +238 -0
- data/lib/mistri/errors.rb +94 -0
- data/lib/mistri/event.rb +54 -0
- data/lib/mistri/mcp/client.rb +156 -0
- data/lib/mistri/mcp/oauth.rb +286 -0
- data/lib/mistri/mcp/wires.rb +164 -0
- data/lib/mistri/mcp.rb +96 -0
- data/lib/mistri/memory.rb +26 -0
- data/lib/mistri/message.rb +90 -0
- data/lib/mistri/models.rb +43 -0
- data/lib/mistri/partial_json.rb +210 -0
- data/lib/mistri/providers/anthropic/assembler.rb +205 -0
- data/lib/mistri/providers/anthropic/serializer.rb +106 -0
- data/lib/mistri/providers/anthropic.rb +106 -0
- data/lib/mistri/providers/fake.rb +109 -0
- data/lib/mistri/providers/gemini/assembler.rb +163 -0
- data/lib/mistri/providers/gemini/serializer.rb +109 -0
- data/lib/mistri/providers/gemini.rb +73 -0
- data/lib/mistri/providers/openai/assembler.rb +205 -0
- data/lib/mistri/providers/openai/serializer.rb +104 -0
- data/lib/mistri/providers/openai.rb +72 -0
- data/lib/mistri/reminder.rb +36 -0
- data/lib/mistri/result.rb +32 -0
- data/lib/mistri/retry_policy.rb +47 -0
- data/lib/mistri/schema.rb +162 -0
- data/lib/mistri/session.rb +124 -0
- data/lib/mistri/sinks/action_cable.rb +30 -0
- data/lib/mistri/sinks/coalesced.rb +61 -0
- data/lib/mistri/sinks/sse.rb +26 -0
- data/lib/mistri/skill.rb +15 -0
- data/lib/mistri/skills.rb +81 -0
- data/lib/mistri/sse.rb +50 -0
- data/lib/mistri/stop_reason.rb +25 -0
- data/lib/mistri/stores/active_record.rb +47 -0
- data/lib/mistri/stores/jsonl.rb +37 -0
- data/lib/mistri/stores/memory.rb +22 -0
- data/lib/mistri/sub_agent.rb +211 -0
- data/lib/mistri/tool.rb +95 -0
- data/lib/mistri/tool_call.rb +18 -0
- data/lib/mistri/tool_context.rb +15 -0
- data/lib/mistri/tool_executor.rb +87 -0
- data/lib/mistri/tool_result.rb +23 -0
- data/lib/mistri/tools/edit_file.rb +37 -0
- data/lib/mistri/tools/find_in_file.rb +36 -0
- data/lib/mistri/tools/list_files.rb +16 -0
- data/lib/mistri/tools/read_file.rb +38 -0
- data/lib/mistri/tools/read_memory.rb +16 -0
- data/lib/mistri/tools/update_memory.rb +22 -0
- data/lib/mistri/tools/write_file.rb +20 -0
- data/lib/mistri/tools.rb +50 -0
- data/lib/mistri/transport.rb +228 -0
- data/lib/mistri/usage.rb +79 -0
- data/lib/mistri/version.rb +1 -1
- data/lib/mistri/workspace/active_record.rb +47 -0
- data/lib/mistri/workspace/directory.rb +52 -0
- data/lib/mistri/workspace/memory.rb +40 -0
- data/lib/mistri/workspace/single.rb +48 -0
- data/lib/mistri.rb +89 -0
- metadata +79 -10
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
class Anthropic
|
|
6
|
+
# Serializes protocol messages into Anthropic Messages API wire shapes.
|
|
7
|
+
#
|
|
8
|
+
# Wire rules that matter: consecutive tool results merge into one user
|
|
9
|
+
# turn (parallel tool calls demand their results together), thinking
|
|
10
|
+
# blocks replay with their signature and must never be altered, redacted
|
|
11
|
+
# thinking replays as its opaque payload, and cache_control marks the
|
|
12
|
+
# last system block and the last user message so the stable prefix and
|
|
13
|
+
# the growing history both cache.
|
|
14
|
+
module Serializer
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def system_blocks(system, cache:)
|
|
18
|
+
return nil if system.nil? || system.empty?
|
|
19
|
+
|
|
20
|
+
blocks = [{ type: "text", text: system }]
|
|
21
|
+
blocks.last[:cache_control] = { type: "ephemeral" } if cache
|
|
22
|
+
blocks
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def messages(history, cache: false)
|
|
26
|
+
turns = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
|
|
27
|
+
wire = turns.map do |group|
|
|
28
|
+
group.first.tool? ? tool_results(group) : message(group.first)
|
|
29
|
+
end
|
|
30
|
+
mark_last_user_turn(wire) if cache
|
|
31
|
+
wire
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def tools(definitions)
|
|
35
|
+
definitions.map do |tool|
|
|
36
|
+
spec = tool.transform_keys(&:to_sym)
|
|
37
|
+
wire = { name: spec[:name], description: spec[:description],
|
|
38
|
+
input_schema: spec[:input_schema] }
|
|
39
|
+
wire[:eager_input_streaming] = true if spec[:eager_input_streaming]
|
|
40
|
+
wire
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def message(msg)
|
|
45
|
+
{ role: msg.role.to_s, content: msg.content.filter_map { |block| block(block) } }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def tool_results(group)
|
|
49
|
+
{ role: "user", content: group.map do |msg|
|
|
50
|
+
blocks = msg.content.filter_map { |block| block(block) }
|
|
51
|
+
# The API rejects an empty tool_result; a space stands in for a
|
|
52
|
+
# tool that returned nothing.
|
|
53
|
+
blocks = [{ type: "text", text: " " }] if blocks.empty?
|
|
54
|
+
{ type: "tool_result", tool_use_id: msg.tool_call_id, content: blocks }
|
|
55
|
+
end }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns nil for a block the API would reject (empty text, unusable
|
|
59
|
+
# thinking), so callers filter_map it out.
|
|
60
|
+
def block(block)
|
|
61
|
+
case block
|
|
62
|
+
when Content::Text then text_block(block)
|
|
63
|
+
when Content::Thinking then thinking_block(block)
|
|
64
|
+
when Content::Image
|
|
65
|
+
{ type: "image",
|
|
66
|
+
source: { type: "base64", media_type: block.mime_type, data: block.data } }
|
|
67
|
+
when ToolCall
|
|
68
|
+
{ type: "tool_use", id: block.id, name: block.name, input: block.arguments }
|
|
69
|
+
else
|
|
70
|
+
raise SchemaError, "cannot serialize #{block.class} for Anthropic"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# The API rejects empty text content blocks.
|
|
75
|
+
def text_block(block)
|
|
76
|
+
block.text.empty? ? nil : { type: "text", text: block.text }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Thinking replays only with its signature. Redacted thinking carries
|
|
80
|
+
# its opaque payload; a normal thinking block missing its signature
|
|
81
|
+
# (an aborted turn cut before signature_delta) cannot replay, so it
|
|
82
|
+
# degrades to its text, or drops when even that is empty.
|
|
83
|
+
def thinking_block(block)
|
|
84
|
+
return { type: "redacted_thinking", data: block.signature } if block.redacted?
|
|
85
|
+
if block.signature
|
|
86
|
+
return { type: "thinking", thinking: block.thinking,
|
|
87
|
+
signature: block.signature }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
block.thinking.empty? ? nil : { type: "text", text: block.thinking }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def mark_last_user_turn(wire)
|
|
94
|
+
last_user = wire.rindex { |turn| turn[:role] == "user" }
|
|
95
|
+
return unless last_user
|
|
96
|
+
|
|
97
|
+
content = wire[last_user][:content]
|
|
98
|
+
return unless content.is_a?(Array) && content.any?
|
|
99
|
+
|
|
100
|
+
content.last[:cache_control] =
|
|
101
|
+
{ type: "ephemeral" }
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
# The Anthropic Messages API, streamed. Defaults target the current model
|
|
6
|
+
# generation: adaptive thinking with summarized display (so thinking
|
|
7
|
+
# streams for the UI), prompt caching on, 32k output headroom.
|
|
8
|
+
#
|
|
9
|
+
# Provider failures fold into the stream as an error turn rather than
|
|
10
|
+
# raising: the loop decides whether to retry, and the host always gets a
|
|
11
|
+
# message back.
|
|
12
|
+
class Anthropic
|
|
13
|
+
VERSION_HEADER = "2023-06-01"
|
|
14
|
+
DEFAULT_THINKING = { type: "adaptive", display: "summarized" }.freeze
|
|
15
|
+
|
|
16
|
+
# Messages API parameters passed through verbatim from a stream override.
|
|
17
|
+
PASSTHROUGH = %i[temperature top_p top_k stop_sequences metadata
|
|
18
|
+
tool_choice service_tier].freeze
|
|
19
|
+
# The ceiling for an uncatalogued model: high enough for headroom, low
|
|
20
|
+
# enough that every current model accepts it. Catalog a model to unlock
|
|
21
|
+
# its real output limit.
|
|
22
|
+
UNKNOWN_MODEL_MAX_TOKENS = 64_000
|
|
23
|
+
|
|
24
|
+
def initialize(api_key:, model: "claude-opus-4-8", origin: "https://api.anthropic.com",
|
|
25
|
+
max_tokens: nil, thinking: DEFAULT_THINKING, cache: true,
|
|
26
|
+
**transport_options)
|
|
27
|
+
@api_key = api_key
|
|
28
|
+
@model = model
|
|
29
|
+
@max_tokens = max_tokens
|
|
30
|
+
@thinking = thinking
|
|
31
|
+
@cache = cache
|
|
32
|
+
@transport = Transport.new(origin: origin, **transport_options)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
attr_reader :model
|
|
36
|
+
|
|
37
|
+
def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
|
|
38
|
+
model = overrides.fetch(:model, @model)
|
|
39
|
+
assembler = Anthropic::Assembler.new(model: model)
|
|
40
|
+
body = build_body(model, messages, system, tools, overrides)
|
|
41
|
+
outcome = @transport.stream_post("/v1/messages", body: body, headers: headers,
|
|
42
|
+
signal: signal) do |record|
|
|
43
|
+
assembler.feed(
|
|
44
|
+
record, &emit
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
|
|
48
|
+
rescue Error => e
|
|
49
|
+
assembler.fail_stream(e, &emit)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def close = @transport.close
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def build_body(model, messages, system, tools, overrides)
|
|
57
|
+
body = {
|
|
58
|
+
model: model,
|
|
59
|
+
max_tokens: max_tokens_for(model, overrides),
|
|
60
|
+
stream: true,
|
|
61
|
+
messages: Serializer.messages(messages, cache: @cache)
|
|
62
|
+
}
|
|
63
|
+
system_blocks = Serializer.system_blocks(system, cache: @cache)
|
|
64
|
+
body[:system] = system_blocks if system_blocks
|
|
65
|
+
body[:tools] = Serializer.tools(tools) if tools.any?
|
|
66
|
+
thinking = thinking_for(model, overrides)
|
|
67
|
+
body[:thinking] = thinking if thinking
|
|
68
|
+
if (schema = overrides[:output_schema])
|
|
69
|
+
body[:output_config] = { format: { type: "json_schema",
|
|
70
|
+
schema: Schema.strict(schema) } }
|
|
71
|
+
end
|
|
72
|
+
body.merge(PASSTHROUGH.each_with_object({}) do |key, params|
|
|
73
|
+
params[key] = overrides[key] if overrides.key?(key)
|
|
74
|
+
end)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Adaptive thinking 400s on budget-only models like Haiku 4.5, so the
|
|
78
|
+
# adaptive default is dropped for a model the catalog marks :budget; a
|
|
79
|
+
# host that wants thinking there passes an explicit budget config. An
|
|
80
|
+
# unknown model keeps the default, since new models are adaptive.
|
|
81
|
+
def thinking_for(model, overrides)
|
|
82
|
+
thinking = overrides.fetch(:thinking, @thinking)
|
|
83
|
+
return thinking unless thinking && thinking[:type] == "adaptive"
|
|
84
|
+
return nil if Models.thinking(model) == :budget
|
|
85
|
+
|
|
86
|
+
thinking
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# The API requires max_tokens and bills only actual output, so the
|
|
90
|
+
# default is the model's own catalogued ceiling: full headroom, no
|
|
91
|
+
# silent truncation. An uncatalogued model falls back safely.
|
|
92
|
+
def max_tokens_for(model, overrides)
|
|
93
|
+
overrides.fetch(:max_tokens) do
|
|
94
|
+
@max_tokens || Models.max_output(model) || UNKNOWN_MODEL_MAX_TOKENS
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def headers
|
|
99
|
+
{ "x-api-key" => @api_key, "anthropic-version" => VERSION_HEADER }
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
require_relative "anthropic/serializer"
|
|
106
|
+
require_relative "anthropic/assembler"
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
module Providers
|
|
7
|
+
# A scriptable provider: it streams each scripted turn as a well-formed
|
|
8
|
+
# event sequence and returns the assembled assistant message, so hosts test
|
|
9
|
+
# agent behavior hermetically while exercising real streaming semantics.
|
|
10
|
+
#
|
|
11
|
+
# provider = Mistri::Providers::Fake.new(turns: [
|
|
12
|
+
# { text: "Hello!" },
|
|
13
|
+
# { tool_calls: [{ name: "search", arguments: { "q" => "ruby" } }] },
|
|
14
|
+
# ])
|
|
15
|
+
#
|
|
16
|
+
# A turn may combine :thinking, :text, and :tool_calls, or carry :error to
|
|
17
|
+
# stream a failed turn. :stop_reason overrides the inferred reason.
|
|
18
|
+
class Fake
|
|
19
|
+
MODEL = "fake-1"
|
|
20
|
+
|
|
21
|
+
# Every #stream call is recorded here, so a test can assert what the
|
|
22
|
+
# agent actually sent.
|
|
23
|
+
attr_reader :requests
|
|
24
|
+
|
|
25
|
+
def model = MODEL
|
|
26
|
+
|
|
27
|
+
def initialize(turns: [], chunk_size: 12)
|
|
28
|
+
@turns = turns.map { |turn| turn.transform_keys(&:to_sym) }
|
|
29
|
+
@chunk_size = [chunk_size, 1].max
|
|
30
|
+
@requests = []
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def stream(messages: [], **options, &emit)
|
|
34
|
+
# Snapshot the array: the loop appends replies to it in place.
|
|
35
|
+
@requests << { messages: messages.dup, options: }
|
|
36
|
+
turn = @turns.shift
|
|
37
|
+
raise ConfigurationError, "fake provider has no scripted turns left" unless turn
|
|
38
|
+
|
|
39
|
+
blocks = []
|
|
40
|
+
emit_event(emit, :start, blocks)
|
|
41
|
+
return finish_error(turn, blocks, emit) if turn[:error]
|
|
42
|
+
|
|
43
|
+
stream_block(:thinking, turn[:thinking], blocks, emit) if turn[:thinking]
|
|
44
|
+
stream_block(:text, turn[:text], blocks, emit) if turn[:text]
|
|
45
|
+
Array(turn[:tool_calls]).each_with_index do |call, position|
|
|
46
|
+
stream_tool_call(call, position, blocks, emit)
|
|
47
|
+
end
|
|
48
|
+
finish(turn, blocks, emit)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def stream_block(kind, full_text, blocks, emit)
|
|
54
|
+
index = blocks.size
|
|
55
|
+
emit_event(emit, :"#{kind}_start", blocks, content_index: index)
|
|
56
|
+
built = +""
|
|
57
|
+
full_text.scan(/.{1,#{@chunk_size}}/m) do |chunk|
|
|
58
|
+
built << chunk
|
|
59
|
+
emit_event(emit, :"#{kind}_delta", blocks + [build_block(kind, built)],
|
|
60
|
+
content_index: index, delta: chunk)
|
|
61
|
+
end
|
|
62
|
+
blocks << build_block(kind, full_text)
|
|
63
|
+
emit_event(emit, :"#{kind}_end", blocks, content_index: index, content: full_text)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_block(kind, text)
|
|
67
|
+
kind == :text ? Content::Text.new(text:) : Content::Thinking.new(thinking: text)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def stream_tool_call(spec, position, blocks, emit)
|
|
71
|
+
spec = spec.transform_keys(&:to_sym)
|
|
72
|
+
call = ToolCall.new(id: spec[:id] || "call_#{position + 1}", name: spec[:name],
|
|
73
|
+
arguments: (spec[:arguments] || {}).transform_keys(&:to_s))
|
|
74
|
+
index = blocks.size
|
|
75
|
+
emit_event(emit, :toolcall_start, blocks, content_index: index)
|
|
76
|
+
emit_event(emit, :toolcall_delta, blocks, content_index: index,
|
|
77
|
+
delta: JSON.generate(call.arguments))
|
|
78
|
+
blocks << call
|
|
79
|
+
emit_event(emit, :toolcall_end, blocks, content_index: index, tool_call: call)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def finish(turn, blocks, emit)
|
|
83
|
+
reason = turn[:stop_reason] ||
|
|
84
|
+
(blocks.any?(ToolCall) ? StopReason::TOOL_USE : StopReason::STOP)
|
|
85
|
+
message = assemble(blocks, usage: turn[:usage] || Usage.zero, stop_reason: reason)
|
|
86
|
+
emit&.call(Event.new(type: :done, reason:, message:))
|
|
87
|
+
message
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def finish_error(turn, blocks, emit)
|
|
91
|
+
error = { "type" => turn.fetch(:error_type, "Error") }
|
|
92
|
+
error["status"] = turn[:status] if turn[:status]
|
|
93
|
+
message = assemble(blocks, usage: Usage.zero, stop_reason: StopReason::ERROR,
|
|
94
|
+
error_message: turn[:error], error: error)
|
|
95
|
+
emit&.call(Event.new(type: :error, reason: StopReason::ERROR, message:,
|
|
96
|
+
error_message: turn[:error]))
|
|
97
|
+
message
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def assemble(blocks, **meta)
|
|
101
|
+
Message.assistant(content: blocks, model: MODEL, provider: :fake, **meta)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def emit_event(emit, type, blocks, **fields)
|
|
105
|
+
emit&.call(Event.new(type:, partial: assemble(blocks), **fields))
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
module Providers
|
|
7
|
+
class Gemini
|
|
8
|
+
# Folds streamGenerateContent records into the event union. Each record
|
|
9
|
+
# carries delta parts: plain text extends a text block, thought parts a
|
|
10
|
+
# thinking block, and a functionCall arrives whole, so its three events
|
|
11
|
+
# emit back to back. A kind switch closes the open block.
|
|
12
|
+
#
|
|
13
|
+
# Thought signatures ride on individual parts and are captured onto the
|
|
14
|
+
# block they arrived with, verbatim, for replay.
|
|
15
|
+
class Assembler
|
|
16
|
+
def initialize(model:)
|
|
17
|
+
@model = model
|
|
18
|
+
@blocks = []
|
|
19
|
+
@current = nil
|
|
20
|
+
@usage = Usage.zero
|
|
21
|
+
@finish_reason = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def feed(record, &)
|
|
25
|
+
if (error = record["error"])
|
|
26
|
+
@error = ProviderError.new(error["message"] || "provider error",
|
|
27
|
+
status: error["code"])
|
|
28
|
+
return
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
candidate = record.dig("candidates", 0) || {}
|
|
32
|
+
Array(candidate.dig("content", "parts")).each { |part| fold_part(part, &) }
|
|
33
|
+
@finish_reason = candidate["finishReason"] if candidate["finishReason"]
|
|
34
|
+
@usage = parse_usage(record["usageMetadata"]) if record["usageMetadata"]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# A stream that ended without a finishReason was truncated, not
|
|
38
|
+
# cancelled: fail it so the loop can treat it as retryable.
|
|
39
|
+
def finish(&emit)
|
|
40
|
+
return fail_stream(@error, &emit) if @error
|
|
41
|
+
return fail_stream("stream ended without a finish reason", &emit) unless @finish_reason
|
|
42
|
+
|
|
43
|
+
close_current(&emit)
|
|
44
|
+
@message = assemble(stop_reason: stop_reason)
|
|
45
|
+
emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
|
|
46
|
+
@message
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def abort(&)
|
|
50
|
+
close_current
|
|
51
|
+
terminal(StopReason::ABORTED, "aborted", &)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def fail_stream(reason, &)
|
|
55
|
+
close_current
|
|
56
|
+
text = case reason
|
|
57
|
+
when ProviderError then "#{reason.class}: #{reason.describe}"
|
|
58
|
+
when Exception then "#{reason.class}: #{reason.message}"
|
|
59
|
+
else reason.to_s
|
|
60
|
+
end
|
|
61
|
+
terminal(StopReason::ERROR, text, error: ErrorData.for(reason), &)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def message = @message ||= finish
|
|
65
|
+
|
|
66
|
+
Builder = Struct.new(:kind, :index, :text, :signature)
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def fold_part(part, &)
|
|
71
|
+
if part.key?("functionCall")
|
|
72
|
+
fold_function_call(part, &)
|
|
73
|
+
elsif part.key?("text")
|
|
74
|
+
fold_text(part, part["thought"] ? :thinking : :text, &)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def fold_text(part, kind, &)
|
|
79
|
+
close_current(&) if @current && @current.kind != kind
|
|
80
|
+
unless @current
|
|
81
|
+
@current = Builder.new(kind, @blocks.size, +"", nil)
|
|
82
|
+
emit_event(:"#{kind}_start", content_index: @current.index, &)
|
|
83
|
+
end
|
|
84
|
+
@current.text << part["text"].to_s
|
|
85
|
+
@current.signature = part["thoughtSignature"] if part["thoughtSignature"]
|
|
86
|
+
delta_type = kind == :text ? :text_delta : :thinking_delta
|
|
87
|
+
emit_event(delta_type, content_index: @current.index, delta: part["text"], &)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# A function call arrives complete in one part: start, one delta with
|
|
91
|
+
# the full arguments, end.
|
|
92
|
+
def fold_function_call(part, &)
|
|
93
|
+
close_current(&)
|
|
94
|
+
call_spec = part["functionCall"] || {}
|
|
95
|
+
arguments = call_spec["args"].is_a?(Hash) ? call_spec["args"] : {}
|
|
96
|
+
call = ToolCall.new(id: call_spec["id"] || "call_#{@blocks.size + 1}",
|
|
97
|
+
name: call_spec["name"], arguments: arguments,
|
|
98
|
+
signature: part["thoughtSignature"])
|
|
99
|
+
index = @blocks.size
|
|
100
|
+
emit_event(:toolcall_start, content_index: index, &)
|
|
101
|
+
emit_event(:toolcall_delta, content_index: index,
|
|
102
|
+
delta: JSON.generate(arguments), &)
|
|
103
|
+
@blocks << call
|
|
104
|
+
emit_event(:toolcall_end, content_index: index, tool_call: call, &)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def close_current(&)
|
|
108
|
+
return unless @current
|
|
109
|
+
|
|
110
|
+
block = build_current
|
|
111
|
+
@blocks << block
|
|
112
|
+
kind = @current.kind
|
|
113
|
+
index = @current.index
|
|
114
|
+
@current = nil
|
|
115
|
+
emit_event(:"#{kind}_end", content_index: index,
|
|
116
|
+
content: kind == :text ? block.text : block.thinking, &)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def build_current
|
|
120
|
+
if @current.kind == :text
|
|
121
|
+
Content::Text.new(text: @current.text, signature: @current.signature)
|
|
122
|
+
else
|
|
123
|
+
Content::Thinking.new(thinking: @current.text, signature: @current.signature)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def stop_reason
|
|
128
|
+
return StopReason::TOOL_USE if @blocks.any?(ToolCall)
|
|
129
|
+
return StopReason::LENGTH if @finish_reason == "MAX_TOKENS"
|
|
130
|
+
|
|
131
|
+
StopReason::STOP
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def terminal(reason, text, error: nil, &emit)
|
|
135
|
+
@message = assemble(stop_reason: reason, error_message: text, error: error)
|
|
136
|
+
emit&.call(Event.new(type: :error, reason: reason, message: @message,
|
|
137
|
+
error_message: text))
|
|
138
|
+
@message
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def emit_event(type, **fields, &emit)
|
|
142
|
+
emit&.call(Event.new(type:, partial: assemble, **fields))
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def assemble(**meta)
|
|
146
|
+
blocks = @blocks.dup
|
|
147
|
+
blocks << build_current if @current
|
|
148
|
+
Message.assistant(content: blocks, model: @model, provider: :gemini,
|
|
149
|
+
usage: @usage, **meta)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def parse_usage(raw)
|
|
153
|
+
prompt = raw["promptTokenCount"].to_i
|
|
154
|
+
cache_read = raw["cachedContentTokenCount"].to_i
|
|
155
|
+
reasoning = raw["thoughtsTokenCount"].to_i
|
|
156
|
+
Usage.new(input: [prompt - cache_read, 0].max,
|
|
157
|
+
output: raw["candidatesTokenCount"].to_i + reasoning,
|
|
158
|
+
cache_read: cache_read, reasoning: reasoning)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
class Gemini
|
|
6
|
+
# Serializes protocol messages into generateContent contents.
|
|
7
|
+
#
|
|
8
|
+
# Wire rules that matter: roles are user and model, consecutive tool
|
|
9
|
+
# results merge into one user turn of functionResponse parts, and
|
|
10
|
+
# thought signatures echo back verbatim on the exact part they arrived
|
|
11
|
+
# with, but only for messages this provider produced; a foreign
|
|
12
|
+
# signature would be rejected. Thinking summaries are output-only and
|
|
13
|
+
# never replay.
|
|
14
|
+
module Serializer
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def contents(history)
|
|
18
|
+
groups = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
|
|
19
|
+
turns = groups.filter_map do |group|
|
|
20
|
+
group.first.tool? ? tool_turn(group) : turn(group.first)
|
|
21
|
+
end
|
|
22
|
+
merge_user_runs(turns)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# A steered run puts a user message right behind tool results, and
|
|
26
|
+
# both serialize as user turns. Gemini expects turns to alternate, so
|
|
27
|
+
# consecutive user turns merge into one.
|
|
28
|
+
def merge_user_runs(turns)
|
|
29
|
+
turns.chunk_while { |a, b| a[:role] == "user" && b[:role] == "user" }
|
|
30
|
+
.map do |run|
|
|
31
|
+
run.length == 1 ? run.first : { role: "user", parts: run.flat_map { |t| t[:parts] } }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def system_instruction(system)
|
|
36
|
+
return nil if system.nil? || system.empty?
|
|
37
|
+
|
|
38
|
+
{ parts: [{ text: system }] }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def tools(definitions)
|
|
42
|
+
declarations = definitions.map do |tool|
|
|
43
|
+
spec = tool.transform_keys(&:to_sym)
|
|
44
|
+
{ name: spec[:name], description: spec[:description],
|
|
45
|
+
parameters: spec[:input_schema] }
|
|
46
|
+
end
|
|
47
|
+
[{ functionDeclarations: declarations }]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def turn(msg)
|
|
51
|
+
parts = msg.assistant? ? assistant_parts(msg) : user_parts(msg)
|
|
52
|
+
return nil if parts.empty?
|
|
53
|
+
|
|
54
|
+
{ role: msg.assistant? ? "model" : "user", parts: parts }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Gemini pairs a functionResponse to its call by NAME; a wrong name
|
|
58
|
+
# silently mismatches, so a missing one fails loudly instead.
|
|
59
|
+
def tool_turn(group)
|
|
60
|
+
{ role: "user", parts: group.map do |msg|
|
|
61
|
+
unless msg.tool_name
|
|
62
|
+
raise SchemaError, "Gemini tool results need tool_name to pair with their call"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
{ functionResponse: { name: msg.tool_name,
|
|
66
|
+
response: { "result" => result_text(msg) } } }
|
|
67
|
+
end }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Non-text blocks in a tool result have no functionResponse encoding;
|
|
71
|
+
# note the omission rather than dropping it silently.
|
|
72
|
+
def result_text(msg)
|
|
73
|
+
omitted = msg.content.count { |block| !block.is_a?(Content::Text) }
|
|
74
|
+
text = msg.text.to_s
|
|
75
|
+
omitted.positive? ? "#{text}\n[#{omitted} non-text block(s) omitted]".strip : text
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def user_parts(msg)
|
|
79
|
+
msg.content.map do |block|
|
|
80
|
+
case block
|
|
81
|
+
when Content::Text then { text: block.text }
|
|
82
|
+
when Content::Image
|
|
83
|
+
{ inlineData: { mimeType: block.mime_type, data: block.data } }
|
|
84
|
+
else
|
|
85
|
+
raise SchemaError, "cannot serialize #{block.class} for Gemini user input"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def assistant_parts(msg)
|
|
91
|
+
own = msg.provider == :gemini
|
|
92
|
+
msg.content.filter_map do |block|
|
|
93
|
+
case block
|
|
94
|
+
when Content::Text then signed({ text: block.text }, block.signature, own)
|
|
95
|
+
when ToolCall
|
|
96
|
+
signed({ functionCall: { name: block.name, args: block.arguments } },
|
|
97
|
+
block.signature, own)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def signed(part, signature, own)
|
|
103
|
+
part[:thoughtSignature] = signature if own && signature
|
|
104
|
+
part
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
# The Gemini API (v1beta generateContent), streamed over SSE and
|
|
6
|
+
# stateless: the full history replays every turn.
|
|
7
|
+
#
|
|
8
|
+
# Thinking is deliberately unconstrained: no budget, no level, only
|
|
9
|
+
# includeThoughts so summaries stream for the UI. The model's own defaults
|
|
10
|
+
# decide how much to think, and a host override passes through verbatim.
|
|
11
|
+
# maxOutputTokens is omitted for the same reason: the API defaults to the
|
|
12
|
+
# model's ceiling.
|
|
13
|
+
class Gemini
|
|
14
|
+
DEFAULT_THINKING = { includeThoughts: true }.freeze
|
|
15
|
+
|
|
16
|
+
def initialize(api_key:, model: "gemini-2.5-flash",
|
|
17
|
+
origin: "https://generativelanguage.googleapis.com",
|
|
18
|
+
thinking: DEFAULT_THINKING, **transport_options)
|
|
19
|
+
@api_key = api_key
|
|
20
|
+
@model = model
|
|
21
|
+
@thinking = thinking
|
|
22
|
+
@transport = Transport.new(origin: origin, **transport_options)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :model
|
|
26
|
+
|
|
27
|
+
def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
|
|
28
|
+
model = overrides.fetch(:model, @model)
|
|
29
|
+
assembler = Gemini::Assembler.new(model: model)
|
|
30
|
+
body = build_body(messages, system, tools, overrides)
|
|
31
|
+
path = "/v1beta/models/#{model}:streamGenerateContent?alt=sse"
|
|
32
|
+
outcome = @transport.stream_post(path, body: body, headers: headers,
|
|
33
|
+
signal: signal) do |record|
|
|
34
|
+
assembler.feed(record,
|
|
35
|
+
&emit)
|
|
36
|
+
end
|
|
37
|
+
outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
|
|
38
|
+
rescue Error => e
|
|
39
|
+
assembler.fail_stream(e, &emit)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def close = @transport.close
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def build_body(messages, system, tools, overrides)
|
|
47
|
+
body = { contents: Serializer.contents(messages) }
|
|
48
|
+
instruction = Serializer.system_instruction(system)
|
|
49
|
+
body[:systemInstruction] = instruction if instruction
|
|
50
|
+
body[:tools] = Serializer.tools(tools) if tools.any?
|
|
51
|
+
config = {}
|
|
52
|
+
thinking = overrides.fetch(:thinking, @thinking)
|
|
53
|
+
config[:thinkingConfig] = thinking if thinking
|
|
54
|
+
# Constrained decoding combines with tools only on 3-series models
|
|
55
|
+
# (preview); with tools present the task loop's validate-and-fix
|
|
56
|
+
# pass carries the guarantee instead.
|
|
57
|
+
if (schema = overrides[:output_schema]) && tools.empty?
|
|
58
|
+
config[:responseMimeType] = "application/json"
|
|
59
|
+
config[:responseJsonSchema] = Schema.strict(schema)
|
|
60
|
+
end
|
|
61
|
+
body[:generationConfig] = config unless config.empty?
|
|
62
|
+
body
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def headers
|
|
66
|
+
{ "x-goog-api-key" => @api_key }
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
require_relative "gemini/serializer"
|
|
73
|
+
require_relative "gemini/assembler"
|