mistri 0.0.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +215 -0
  3. data/README.md +367 -3
  4. data/lib/generators/mistri/install/install_generator.rb +54 -0
  5. data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
  6. data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
  7. data/lib/generators/mistri/mcp/mcp_generator.rb +57 -0
  8. data/lib/generators/mistri/mcp/templates/migration.rb.tt +27 -0
  9. data/lib/generators/mistri/mcp/templates/model.rb.tt +63 -0
  10. data/lib/mistri/abort_signal.rb +63 -0
  11. data/lib/mistri/agent.rb +389 -0
  12. data/lib/mistri/budget.rb +29 -0
  13. data/lib/mistri/compaction.rb +78 -0
  14. data/lib/mistri/compactor.rb +182 -0
  15. data/lib/mistri/content.rb +89 -0
  16. data/lib/mistri/edit.rb +238 -0
  17. data/lib/mistri/errors.rb +94 -0
  18. data/lib/mistri/event.rb +54 -0
  19. data/lib/mistri/mcp/client.rb +156 -0
  20. data/lib/mistri/mcp/oauth.rb +286 -0
  21. data/lib/mistri/mcp/wires.rb +164 -0
  22. data/lib/mistri/mcp.rb +96 -0
  23. data/lib/mistri/memory.rb +26 -0
  24. data/lib/mistri/message.rb +90 -0
  25. data/lib/mistri/models.rb +43 -0
  26. data/lib/mistri/partial_json.rb +210 -0
  27. data/lib/mistri/providers/anthropic/assembler.rb +205 -0
  28. data/lib/mistri/providers/anthropic/serializer.rb +106 -0
  29. data/lib/mistri/providers/anthropic.rb +106 -0
  30. data/lib/mistri/providers/fake.rb +109 -0
  31. data/lib/mistri/providers/gemini/assembler.rb +163 -0
  32. data/lib/mistri/providers/gemini/serializer.rb +109 -0
  33. data/lib/mistri/providers/gemini.rb +73 -0
  34. data/lib/mistri/providers/openai/assembler.rb +205 -0
  35. data/lib/mistri/providers/openai/serializer.rb +104 -0
  36. data/lib/mistri/providers/openai.rb +72 -0
  37. data/lib/mistri/reminder.rb +36 -0
  38. data/lib/mistri/result.rb +32 -0
  39. data/lib/mistri/retry_policy.rb +47 -0
  40. data/lib/mistri/schema.rb +162 -0
  41. data/lib/mistri/session.rb +124 -0
  42. data/lib/mistri/sinks/action_cable.rb +30 -0
  43. data/lib/mistri/sinks/coalesced.rb +61 -0
  44. data/lib/mistri/sinks/sse.rb +26 -0
  45. data/lib/mistri/skill.rb +15 -0
  46. data/lib/mistri/skills.rb +81 -0
  47. data/lib/mistri/sse.rb +50 -0
  48. data/lib/mistri/stop_reason.rb +25 -0
  49. data/lib/mistri/stores/active_record.rb +47 -0
  50. data/lib/mistri/stores/jsonl.rb +37 -0
  51. data/lib/mistri/stores/memory.rb +22 -0
  52. data/lib/mistri/sub_agent.rb +211 -0
  53. data/lib/mistri/tool.rb +95 -0
  54. data/lib/mistri/tool_call.rb +18 -0
  55. data/lib/mistri/tool_context.rb +15 -0
  56. data/lib/mistri/tool_executor.rb +87 -0
  57. data/lib/mistri/tool_result.rb +23 -0
  58. data/lib/mistri/tools/edit_file.rb +37 -0
  59. data/lib/mistri/tools/find_in_file.rb +36 -0
  60. data/lib/mistri/tools/list_files.rb +16 -0
  61. data/lib/mistri/tools/read_file.rb +38 -0
  62. data/lib/mistri/tools/read_memory.rb +16 -0
  63. data/lib/mistri/tools/update_memory.rb +22 -0
  64. data/lib/mistri/tools/write_file.rb +20 -0
  65. data/lib/mistri/tools.rb +50 -0
  66. data/lib/mistri/transport.rb +228 -0
  67. data/lib/mistri/usage.rb +79 -0
  68. data/lib/mistri/version.rb +1 -1
  69. data/lib/mistri/workspace/active_record.rb +47 -0
  70. data/lib/mistri/workspace/directory.rb +52 -0
  71. data/lib/mistri/workspace/memory.rb +40 -0
  72. data/lib/mistri/workspace/single.rb +48 -0
  73. data/lib/mistri.rb +89 -0
  74. metadata +79 -10
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ class Anthropic
6
+ # Serializes protocol messages into Anthropic Messages API wire shapes.
7
+ #
8
+ # Wire rules that matter: consecutive tool results merge into one user
9
+ # turn (parallel tool calls demand their results together), thinking
10
+ # blocks replay with their signature and must never be altered, redacted
11
+ # thinking replays as its opaque payload, and cache_control marks the
12
+ # last system block and the last user message so the stable prefix and
13
+ # the growing history both cache.
14
+ module Serializer
15
+ module_function
16
+
17
+ def system_blocks(system, cache:)
18
+ return nil if system.nil? || system.empty?
19
+
20
+ blocks = [{ type: "text", text: system }]
21
+ blocks.last[:cache_control] = { type: "ephemeral" } if cache
22
+ blocks
23
+ end
24
+
25
+ def messages(history, cache: false)
26
+ turns = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
27
+ wire = turns.map do |group|
28
+ group.first.tool? ? tool_results(group) : message(group.first)
29
+ end
30
+ mark_last_user_turn(wire) if cache
31
+ wire
32
+ end
33
+
34
+ def tools(definitions)
35
+ definitions.map do |tool|
36
+ spec = tool.transform_keys(&:to_sym)
37
+ wire = { name: spec[:name], description: spec[:description],
38
+ input_schema: spec[:input_schema] }
39
+ wire[:eager_input_streaming] = true if spec[:eager_input_streaming]
40
+ wire
41
+ end
42
+ end
43
+
44
+ def message(msg)
45
+ { role: msg.role.to_s, content: msg.content.filter_map { |block| block(block) } }
46
+ end
47
+
48
+ def tool_results(group)
49
+ { role: "user", content: group.map do |msg|
50
+ blocks = msg.content.filter_map { |block| block(block) }
51
+ # The API rejects an empty tool_result; a space stands in for a
52
+ # tool that returned nothing.
53
+ blocks = [{ type: "text", text: " " }] if blocks.empty?
54
+ { type: "tool_result", tool_use_id: msg.tool_call_id, content: blocks }
55
+ end }
56
+ end
57
+
58
+ # Returns nil for a block the API would reject (empty text, unusable
59
+ # thinking), so callers filter_map it out.
60
+ def block(block)
61
+ case block
62
+ when Content::Text then text_block(block)
63
+ when Content::Thinking then thinking_block(block)
64
+ when Content::Image
65
+ { type: "image",
66
+ source: { type: "base64", media_type: block.mime_type, data: block.data } }
67
+ when ToolCall
68
+ { type: "tool_use", id: block.id, name: block.name, input: block.arguments }
69
+ else
70
+ raise SchemaError, "cannot serialize #{block.class} for Anthropic"
71
+ end
72
+ end
73
+
74
+ # The API rejects empty text content blocks.
75
+ def text_block(block)
76
+ block.text.empty? ? nil : { type: "text", text: block.text }
77
+ end
78
+
79
+ # Thinking replays only with its signature. Redacted thinking carries
80
+ # its opaque payload; a normal thinking block missing its signature
81
+ # (an aborted turn cut before signature_delta) cannot replay, so it
82
+ # degrades to its text, or drops when even that is empty.
83
+ def thinking_block(block)
84
+ return { type: "redacted_thinking", data: block.signature } if block.redacted?
85
+ if block.signature
86
+ return { type: "thinking", thinking: block.thinking,
87
+ signature: block.signature }
88
+ end
89
+
90
+ block.thinking.empty? ? nil : { type: "text", text: block.thinking }
91
+ end
92
+
93
+ def mark_last_user_turn(wire)
94
+ last_user = wire.rindex { |turn| turn[:role] == "user" }
95
+ return unless last_user
96
+
97
+ content = wire[last_user][:content]
98
+ return unless content.is_a?(Array) && content.any?
99
+
100
+ content.last[:cache_control] =
101
+ { type: "ephemeral" }
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ # The Anthropic Messages API, streamed. Defaults target the current model
6
+ # generation: adaptive thinking with summarized display (so thinking
7
+ # streams for the UI), prompt caching on, 32k output headroom.
8
+ #
9
+ # Provider failures fold into the stream as an error turn rather than
10
+ # raising: the loop decides whether to retry, and the host always gets a
11
+ # message back.
12
+ class Anthropic
13
+ VERSION_HEADER = "2023-06-01"
14
+ DEFAULT_THINKING = { type: "adaptive", display: "summarized" }.freeze
15
+
16
+ # Messages API parameters passed through verbatim from a stream override.
17
+ PASSTHROUGH = %i[temperature top_p top_k stop_sequences metadata
18
+ tool_choice service_tier].freeze
19
+ # The ceiling for an uncatalogued model: high enough for headroom, low
20
+ # enough that every current model accepts it. Catalog a model to unlock
21
+ # its real output limit.
22
+ UNKNOWN_MODEL_MAX_TOKENS = 64_000
23
+
24
+ def initialize(api_key:, model: "claude-opus-4-8", origin: "https://api.anthropic.com",
25
+ max_tokens: nil, thinking: DEFAULT_THINKING, cache: true,
26
+ **transport_options)
27
+ @api_key = api_key
28
+ @model = model
29
+ @max_tokens = max_tokens
30
+ @thinking = thinking
31
+ @cache = cache
32
+ @transport = Transport.new(origin: origin, **transport_options)
33
+ end
34
+
35
+ attr_reader :model
36
+
37
+ def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
38
+ model = overrides.fetch(:model, @model)
39
+ assembler = Anthropic::Assembler.new(model: model)
40
+ body = build_body(model, messages, system, tools, overrides)
41
+ outcome = @transport.stream_post("/v1/messages", body: body, headers: headers,
42
+ signal: signal) do |record|
43
+ assembler.feed(
44
+ record, &emit
45
+ )
46
+ end
47
+ outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
48
+ rescue Error => e
49
+ assembler.fail_stream(e, &emit)
50
+ end
51
+
52
+ def close = @transport.close
53
+
54
+ private
55
+
56
+ def build_body(model, messages, system, tools, overrides)
57
+ body = {
58
+ model: model,
59
+ max_tokens: max_tokens_for(model, overrides),
60
+ stream: true,
61
+ messages: Serializer.messages(messages, cache: @cache)
62
+ }
63
+ system_blocks = Serializer.system_blocks(system, cache: @cache)
64
+ body[:system] = system_blocks if system_blocks
65
+ body[:tools] = Serializer.tools(tools) if tools.any?
66
+ thinking = thinking_for(model, overrides)
67
+ body[:thinking] = thinking if thinking
68
+ if (schema = overrides[:output_schema])
69
+ body[:output_config] = { format: { type: "json_schema",
70
+ schema: Schema.strict(schema) } }
71
+ end
72
+ body.merge(PASSTHROUGH.each_with_object({}) do |key, params|
73
+ params[key] = overrides[key] if overrides.key?(key)
74
+ end)
75
+ end
76
+
77
+ # Adaptive thinking 400s on budget-only models like Haiku 4.5, so the
78
+ # adaptive default is dropped for a model the catalog marks :budget; a
79
+ # host that wants thinking there passes an explicit budget config. An
80
+ # unknown model keeps the default, since new models are adaptive.
81
+ def thinking_for(model, overrides)
82
+ thinking = overrides.fetch(:thinking, @thinking)
83
+ return thinking unless thinking && thinking[:type] == "adaptive"
84
+ return nil if Models.thinking(model) == :budget
85
+
86
+ thinking
87
+ end
88
+
89
+ # The API requires max_tokens and bills only actual output, so the
90
+ # default is the model's own catalogued ceiling: full headroom, no
91
+ # silent truncation. An uncatalogued model falls back safely.
92
+ def max_tokens_for(model, overrides)
93
+ overrides.fetch(:max_tokens) do
94
+ @max_tokens || Models.max_output(model) || UNKNOWN_MODEL_MAX_TOKENS
95
+ end
96
+ end
97
+
98
+ def headers
99
+ { "x-api-key" => @api_key, "anthropic-version" => VERSION_HEADER }
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ require_relative "anthropic/serializer"
106
+ require_relative "anthropic/assembler"
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ module Providers
7
+ # A scriptable provider: it streams each scripted turn as a well-formed
8
+ # event sequence and returns the assembled assistant message, so hosts test
9
+ # agent behavior hermetically while exercising real streaming semantics.
10
+ #
11
+ # provider = Mistri::Providers::Fake.new(turns: [
12
+ # { text: "Hello!" },
13
+ # { tool_calls: [{ name: "search", arguments: { "q" => "ruby" } }] },
14
+ # ])
15
+ #
16
+ # A turn may combine :thinking, :text, and :tool_calls, or carry :error to
17
+ # stream a failed turn. :stop_reason overrides the inferred reason.
18
+ class Fake
19
+ MODEL = "fake-1"
20
+
21
+ # Every #stream call is recorded here, so a test can assert what the
22
+ # agent actually sent.
23
+ attr_reader :requests
24
+
25
+ def model = MODEL
26
+
27
+ def initialize(turns: [], chunk_size: 12)
28
+ @turns = turns.map { |turn| turn.transform_keys(&:to_sym) }
29
+ @chunk_size = [chunk_size, 1].max
30
+ @requests = []
31
+ end
32
+
33
+ def stream(messages: [], **options, &emit)
34
+ # Snapshot the array: the loop appends replies to it in place.
35
+ @requests << { messages: messages.dup, options: }
36
+ turn = @turns.shift
37
+ raise ConfigurationError, "fake provider has no scripted turns left" unless turn
38
+
39
+ blocks = []
40
+ emit_event(emit, :start, blocks)
41
+ return finish_error(turn, blocks, emit) if turn[:error]
42
+
43
+ stream_block(:thinking, turn[:thinking], blocks, emit) if turn[:thinking]
44
+ stream_block(:text, turn[:text], blocks, emit) if turn[:text]
45
+ Array(turn[:tool_calls]).each_with_index do |call, position|
46
+ stream_tool_call(call, position, blocks, emit)
47
+ end
48
+ finish(turn, blocks, emit)
49
+ end
50
+
51
+ private
52
+
53
+ def stream_block(kind, full_text, blocks, emit)
54
+ index = blocks.size
55
+ emit_event(emit, :"#{kind}_start", blocks, content_index: index)
56
+ built = +""
57
+ full_text.scan(/.{1,#{@chunk_size}}/m) do |chunk|
58
+ built << chunk
59
+ emit_event(emit, :"#{kind}_delta", blocks + [build_block(kind, built)],
60
+ content_index: index, delta: chunk)
61
+ end
62
+ blocks << build_block(kind, full_text)
63
+ emit_event(emit, :"#{kind}_end", blocks, content_index: index, content: full_text)
64
+ end
65
+
66
+ def build_block(kind, text)
67
+ kind == :text ? Content::Text.new(text:) : Content::Thinking.new(thinking: text)
68
+ end
69
+
70
+ def stream_tool_call(spec, position, blocks, emit)
71
+ spec = spec.transform_keys(&:to_sym)
72
+ call = ToolCall.new(id: spec[:id] || "call_#{position + 1}", name: spec[:name],
73
+ arguments: (spec[:arguments] || {}).transform_keys(&:to_s))
74
+ index = blocks.size
75
+ emit_event(emit, :toolcall_start, blocks, content_index: index)
76
+ emit_event(emit, :toolcall_delta, blocks, content_index: index,
77
+ delta: JSON.generate(call.arguments))
78
+ blocks << call
79
+ emit_event(emit, :toolcall_end, blocks, content_index: index, tool_call: call)
80
+ end
81
+
82
+ def finish(turn, blocks, emit)
83
+ reason = turn[:stop_reason] ||
84
+ (blocks.any?(ToolCall) ? StopReason::TOOL_USE : StopReason::STOP)
85
+ message = assemble(blocks, usage: turn[:usage] || Usage.zero, stop_reason: reason)
86
+ emit&.call(Event.new(type: :done, reason:, message:))
87
+ message
88
+ end
89
+
90
+ def finish_error(turn, blocks, emit)
91
+ error = { "type" => turn.fetch(:error_type, "Error") }
92
+ error["status"] = turn[:status] if turn[:status]
93
+ message = assemble(blocks, usage: Usage.zero, stop_reason: StopReason::ERROR,
94
+ error_message: turn[:error], error: error)
95
+ emit&.call(Event.new(type: :error, reason: StopReason::ERROR, message:,
96
+ error_message: turn[:error]))
97
+ message
98
+ end
99
+
100
+ def assemble(blocks, **meta)
101
+ Message.assistant(content: blocks, model: MODEL, provider: :fake, **meta)
102
+ end
103
+
104
+ def emit_event(emit, type, blocks, **fields)
105
+ emit&.call(Event.new(type:, partial: assemble(blocks), **fields))
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ module Providers
7
+ class Gemini
8
+ # Folds streamGenerateContent records into the event union. Each record
9
+ # carries delta parts: plain text extends a text block, thought parts a
10
+ # thinking block, and a functionCall arrives whole, so its three events
11
+ # emit back to back. A kind switch closes the open block.
12
+ #
13
+ # Thought signatures ride on individual parts and are captured onto the
14
+ # block they arrived with, verbatim, for replay.
15
+ class Assembler
16
+ def initialize(model:)
17
+ @model = model
18
+ @blocks = []
19
+ @current = nil
20
+ @usage = Usage.zero
21
+ @finish_reason = nil
22
+ end
23
+
24
+ def feed(record, &)
25
+ if (error = record["error"])
26
+ @error = ProviderError.new(error["message"] || "provider error",
27
+ status: error["code"])
28
+ return
29
+ end
30
+
31
+ candidate = record.dig("candidates", 0) || {}
32
+ Array(candidate.dig("content", "parts")).each { |part| fold_part(part, &) }
33
+ @finish_reason = candidate["finishReason"] if candidate["finishReason"]
34
+ @usage = parse_usage(record["usageMetadata"]) if record["usageMetadata"]
35
+ end
36
+
37
+ # A stream that ended without a finishReason was truncated, not
38
+ # cancelled: fail it so the loop can treat it as retryable.
39
+ def finish(&emit)
40
+ return fail_stream(@error, &emit) if @error
41
+ return fail_stream("stream ended without a finish reason", &emit) unless @finish_reason
42
+
43
+ close_current(&emit)
44
+ @message = assemble(stop_reason: stop_reason)
45
+ emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
46
+ @message
47
+ end
48
+
49
+ def abort(&)
50
+ close_current
51
+ terminal(StopReason::ABORTED, "aborted", &)
52
+ end
53
+
54
+ def fail_stream(reason, &)
55
+ close_current
56
+ text = case reason
57
+ when ProviderError then "#{reason.class}: #{reason.describe}"
58
+ when Exception then "#{reason.class}: #{reason.message}"
59
+ else reason.to_s
60
+ end
61
+ terminal(StopReason::ERROR, text, error: ErrorData.for(reason), &)
62
+ end
63
+
64
+ def message = @message ||= finish
65
+
66
+ Builder = Struct.new(:kind, :index, :text, :signature)
67
+
68
+ private
69
+
70
+ def fold_part(part, &)
71
+ if part.key?("functionCall")
72
+ fold_function_call(part, &)
73
+ elsif part.key?("text")
74
+ fold_text(part, part["thought"] ? :thinking : :text, &)
75
+ end
76
+ end
77
+
78
+ def fold_text(part, kind, &)
79
+ close_current(&) if @current && @current.kind != kind
80
+ unless @current
81
+ @current = Builder.new(kind, @blocks.size, +"", nil)
82
+ emit_event(:"#{kind}_start", content_index: @current.index, &)
83
+ end
84
+ @current.text << part["text"].to_s
85
+ @current.signature = part["thoughtSignature"] if part["thoughtSignature"]
86
+ delta_type = kind == :text ? :text_delta : :thinking_delta
87
+ emit_event(delta_type, content_index: @current.index, delta: part["text"], &)
88
+ end
89
+
90
+ # A function call arrives complete in one part: start, one delta with
91
+ # the full arguments, end.
92
+ def fold_function_call(part, &)
93
+ close_current(&)
94
+ call_spec = part["functionCall"] || {}
95
+ arguments = call_spec["args"].is_a?(Hash) ? call_spec["args"] : {}
96
+ call = ToolCall.new(id: call_spec["id"] || "call_#{@blocks.size + 1}",
97
+ name: call_spec["name"], arguments: arguments,
98
+ signature: part["thoughtSignature"])
99
+ index = @blocks.size
100
+ emit_event(:toolcall_start, content_index: index, &)
101
+ emit_event(:toolcall_delta, content_index: index,
102
+ delta: JSON.generate(arguments), &)
103
+ @blocks << call
104
+ emit_event(:toolcall_end, content_index: index, tool_call: call, &)
105
+ end
106
+
107
+ def close_current(&)
108
+ return unless @current
109
+
110
+ block = build_current
111
+ @blocks << block
112
+ kind = @current.kind
113
+ index = @current.index
114
+ @current = nil
115
+ emit_event(:"#{kind}_end", content_index: index,
116
+ content: kind == :text ? block.text : block.thinking, &)
117
+ end
118
+
119
+ def build_current
120
+ if @current.kind == :text
121
+ Content::Text.new(text: @current.text, signature: @current.signature)
122
+ else
123
+ Content::Thinking.new(thinking: @current.text, signature: @current.signature)
124
+ end
125
+ end
126
+
127
+ def stop_reason
128
+ return StopReason::TOOL_USE if @blocks.any?(ToolCall)
129
+ return StopReason::LENGTH if @finish_reason == "MAX_TOKENS"
130
+
131
+ StopReason::STOP
132
+ end
133
+
134
+ def terminal(reason, text, error: nil, &emit)
135
+ @message = assemble(stop_reason: reason, error_message: text, error: error)
136
+ emit&.call(Event.new(type: :error, reason: reason, message: @message,
137
+ error_message: text))
138
+ @message
139
+ end
140
+
141
+ def emit_event(type, **fields, &emit)
142
+ emit&.call(Event.new(type:, partial: assemble, **fields))
143
+ end
144
+
145
+ def assemble(**meta)
146
+ blocks = @blocks.dup
147
+ blocks << build_current if @current
148
+ Message.assistant(content: blocks, model: @model, provider: :gemini,
149
+ usage: @usage, **meta)
150
+ end
151
+
152
+ def parse_usage(raw)
153
+ prompt = raw["promptTokenCount"].to_i
154
+ cache_read = raw["cachedContentTokenCount"].to_i
155
+ reasoning = raw["thoughtsTokenCount"].to_i
156
+ Usage.new(input: [prompt - cache_read, 0].max,
157
+ output: raw["candidatesTokenCount"].to_i + reasoning,
158
+ cache_read: cache_read, reasoning: reasoning)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ class Gemini
6
+ # Serializes protocol messages into generateContent contents.
7
+ #
8
+ # Wire rules that matter: roles are user and model, consecutive tool
9
+ # results merge into one user turn of functionResponse parts, and
10
+ # thought signatures echo back verbatim on the exact part they arrived
11
+ # with, but only for messages this provider produced; a foreign
12
+ # signature would be rejected. Thinking summaries are output-only and
13
+ # never replay.
14
+ module Serializer
15
+ module_function
16
+
17
+ def contents(history)
18
+ groups = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
19
+ turns = groups.filter_map do |group|
20
+ group.first.tool? ? tool_turn(group) : turn(group.first)
21
+ end
22
+ merge_user_runs(turns)
23
+ end
24
+
25
+ # A steered run puts a user message right behind tool results, and
26
+ # both serialize as user turns. Gemini expects turns to alternate, so
27
+ # consecutive user turns merge into one.
28
+ def merge_user_runs(turns)
29
+ turns.chunk_while { |a, b| a[:role] == "user" && b[:role] == "user" }
30
+ .map do |run|
31
+ run.length == 1 ? run.first : { role: "user", parts: run.flat_map { |t| t[:parts] } }
32
+ end
33
+ end
34
+
35
+ def system_instruction(system)
36
+ return nil if system.nil? || system.empty?
37
+
38
+ { parts: [{ text: system }] }
39
+ end
40
+
41
+ def tools(definitions)
42
+ declarations = definitions.map do |tool|
43
+ spec = tool.transform_keys(&:to_sym)
44
+ { name: spec[:name], description: spec[:description],
45
+ parameters: spec[:input_schema] }
46
+ end
47
+ [{ functionDeclarations: declarations }]
48
+ end
49
+
50
+ def turn(msg)
51
+ parts = msg.assistant? ? assistant_parts(msg) : user_parts(msg)
52
+ return nil if parts.empty?
53
+
54
+ { role: msg.assistant? ? "model" : "user", parts: parts }
55
+ end
56
+
57
+ # Gemini pairs a functionResponse to its call by NAME; a wrong name
58
+ # silently mismatches, so a missing one fails loudly instead.
59
+ def tool_turn(group)
60
+ { role: "user", parts: group.map do |msg|
61
+ unless msg.tool_name
62
+ raise SchemaError, "Gemini tool results need tool_name to pair with their call"
63
+ end
64
+
65
+ { functionResponse: { name: msg.tool_name,
66
+ response: { "result" => result_text(msg) } } }
67
+ end }
68
+ end
69
+
70
+ # Non-text blocks in a tool result have no functionResponse encoding;
71
+ # note the omission rather than dropping it silently.
72
+ def result_text(msg)
73
+ omitted = msg.content.count { |block| !block.is_a?(Content::Text) }
74
+ text = msg.text.to_s
75
+ omitted.positive? ? "#{text}\n[#{omitted} non-text block(s) omitted]".strip : text
76
+ end
77
+
78
+ def user_parts(msg)
79
+ msg.content.map do |block|
80
+ case block
81
+ when Content::Text then { text: block.text }
82
+ when Content::Image
83
+ { inlineData: { mimeType: block.mime_type, data: block.data } }
84
+ else
85
+ raise SchemaError, "cannot serialize #{block.class} for Gemini user input"
86
+ end
87
+ end
88
+ end
89
+
90
+ def assistant_parts(msg)
91
+ own = msg.provider == :gemini
92
+ msg.content.filter_map do |block|
93
+ case block
94
+ when Content::Text then signed({ text: block.text }, block.signature, own)
95
+ when ToolCall
96
+ signed({ functionCall: { name: block.name, args: block.arguments } },
97
+ block.signature, own)
98
+ end
99
+ end
100
+ end
101
+
102
+ def signed(part, signature, own)
103
+ part[:thoughtSignature] = signature if own && signature
104
+ part
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ # The Gemini API (v1beta generateContent), streamed over SSE and
6
+ # stateless: the full history replays every turn.
7
+ #
8
+ # Thinking is deliberately unconstrained: no budget, no level, only
9
+ # includeThoughts so summaries stream for the UI. The model's own defaults
10
+ # decide how much to think, and a host override passes through verbatim.
11
+ # maxOutputTokens is omitted for the same reason: the API defaults to the
12
+ # model's ceiling.
13
+ class Gemini
14
+ DEFAULT_THINKING = { includeThoughts: true }.freeze
15
+
16
+ def initialize(api_key:, model: "gemini-2.5-flash",
17
+ origin: "https://generativelanguage.googleapis.com",
18
+ thinking: DEFAULT_THINKING, **transport_options)
19
+ @api_key = api_key
20
+ @model = model
21
+ @thinking = thinking
22
+ @transport = Transport.new(origin: origin, **transport_options)
23
+ end
24
+
25
+ attr_reader :model
26
+
27
+ def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
28
+ model = overrides.fetch(:model, @model)
29
+ assembler = Gemini::Assembler.new(model: model)
30
+ body = build_body(messages, system, tools, overrides)
31
+ path = "/v1beta/models/#{model}:streamGenerateContent?alt=sse"
32
+ outcome = @transport.stream_post(path, body: body, headers: headers,
33
+ signal: signal) do |record|
34
+ assembler.feed(record,
35
+ &emit)
36
+ end
37
+ outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
38
+ rescue Error => e
39
+ assembler.fail_stream(e, &emit)
40
+ end
41
+
42
+ def close = @transport.close
43
+
44
+ private
45
+
46
+ def build_body(messages, system, tools, overrides)
47
+ body = { contents: Serializer.contents(messages) }
48
+ instruction = Serializer.system_instruction(system)
49
+ body[:systemInstruction] = instruction if instruction
50
+ body[:tools] = Serializer.tools(tools) if tools.any?
51
+ config = {}
52
+ thinking = overrides.fetch(:thinking, @thinking)
53
+ config[:thinkingConfig] = thinking if thinking
54
+ # Constrained decoding combines with tools only on 3-series models
55
+ # (preview); with tools present the task loop's validate-and-fix
56
+ # pass carries the guarantee instead.
57
+ if (schema = overrides[:output_schema]) && tools.empty?
58
+ config[:responseMimeType] = "application/json"
59
+ config[:responseJsonSchema] = Schema.strict(schema)
60
+ end
61
+ body[:generationConfig] = config unless config.empty?
62
+ body
63
+ end
64
+
65
+ def headers
66
+ { "x-goog-api-key" => @api_key }
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ require_relative "gemini/serializer"
73
+ require_relative "gemini/assembler"