mistri 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +162 -0
  3. data/README.md +314 -3
  4. data/lib/generators/mistri/install/install_generator.rb +54 -0
  5. data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
  6. data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
  7. data/lib/mistri/abort_signal.rb +63 -0
  8. data/lib/mistri/agent.rb +340 -0
  9. data/lib/mistri/budget.rb +29 -0
  10. data/lib/mistri/compaction.rb +78 -0
  11. data/lib/mistri/compactor.rb +182 -0
  12. data/lib/mistri/content.rb +89 -0
  13. data/lib/mistri/edit.rb +238 -0
  14. data/lib/mistri/errors.rb +94 -0
  15. data/lib/mistri/event.rb +50 -0
  16. data/lib/mistri/memory.rb +26 -0
  17. data/lib/mistri/message.rb +90 -0
  18. data/lib/mistri/models.rb +43 -0
  19. data/lib/mistri/partial_json.rb +210 -0
  20. data/lib/mistri/providers/anthropic/assembler.rb +205 -0
  21. data/lib/mistri/providers/anthropic/serializer.rb +106 -0
  22. data/lib/mistri/providers/anthropic.rb +106 -0
  23. data/lib/mistri/providers/fake.rb +109 -0
  24. data/lib/mistri/providers/gemini/assembler.rb +163 -0
  25. data/lib/mistri/providers/gemini/serializer.rb +109 -0
  26. data/lib/mistri/providers/gemini.rb +73 -0
  27. data/lib/mistri/providers/openai/assembler.rb +205 -0
  28. data/lib/mistri/providers/openai/serializer.rb +104 -0
  29. data/lib/mistri/providers/openai.rb +72 -0
  30. data/lib/mistri/result.rb +30 -0
  31. data/lib/mistri/retry_policy.rb +47 -0
  32. data/lib/mistri/schema.rb +162 -0
  33. data/lib/mistri/session.rb +124 -0
  34. data/lib/mistri/sinks/action_cable.rb +30 -0
  35. data/lib/mistri/sinks/coalesced.rb +61 -0
  36. data/lib/mistri/sinks/sse.rb +26 -0
  37. data/lib/mistri/skill.rb +15 -0
  38. data/lib/mistri/skills.rb +81 -0
  39. data/lib/mistri/sse.rb +50 -0
  40. data/lib/mistri/stop_reason.rb +25 -0
  41. data/lib/mistri/stores/active_record.rb +47 -0
  42. data/lib/mistri/stores/jsonl.rb +37 -0
  43. data/lib/mistri/stores/memory.rb +22 -0
  44. data/lib/mistri/sub_agent.rb +211 -0
  45. data/lib/mistri/tool.rb +94 -0
  46. data/lib/mistri/tool_call.rb +18 -0
  47. data/lib/mistri/tool_context.rb +15 -0
  48. data/lib/mistri/tool_executor.rb +66 -0
  49. data/lib/mistri/tool_result.rb +23 -0
  50. data/lib/mistri/tools/edit_file.rb +37 -0
  51. data/lib/mistri/tools/find_in_file.rb +36 -0
  52. data/lib/mistri/tools/list_files.rb +16 -0
  53. data/lib/mistri/tools/read_file.rb +38 -0
  54. data/lib/mistri/tools/read_memory.rb +16 -0
  55. data/lib/mistri/tools/update_memory.rb +22 -0
  56. data/lib/mistri/tools/write_file.rb +20 -0
  57. data/lib/mistri/tools.rb +50 -0
  58. data/lib/mistri/transport.rb +187 -0
  59. data/lib/mistri/usage.rb +79 -0
  60. data/lib/mistri/version.rb +1 -1
  61. data/lib/mistri/workspace/active_record.rb +47 -0
  62. data/lib/mistri/workspace/directory.rb +52 -0
  63. data/lib/mistri/workspace/memory.rb +40 -0
  64. data/lib/mistri/workspace/single.rb +48 -0
  65. data/lib/mistri.rb +87 -0
  66. metadata +68 -5
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ # Parses the JSON prefix a model has emitted so far, so in-flight tool-call
7
+ # arguments are readable before the closing brace arrives. Best effort by
8
+ # contract: never raises, drops a dangling key or half-written token, and
9
+ # returns {} for hopeless input.
10
+ module PartialJson
11
+ def self.parse(text)
12
+ s = text.to_s.strip
13
+ return {} if s.empty?
14
+
15
+ value = Parser.new(s).parse
16
+ value.equal?(Parser::NOTHING) ? {} : value
17
+ rescue StandardError, SystemStackError
18
+ {}
19
+ end
20
+
21
+ # Recursive descent over the prefix. Truncation trips @partial, and every
22
+ # frame unwinds keeping the structure built so far.
23
+ class Parser
24
+ NOTHING = Object.new
25
+ LITERALS = { "true" => true, "false" => false, "null" => nil }.freeze
26
+
27
+ # Nesting past this is treated as truncated: a model's real tool
28
+ # arguments never nest this deep, and the cap keeps a pathological input
29
+ # from overflowing the stack.
30
+ MAX_DEPTH = 256
31
+
32
+ def initialize(source)
33
+ @s = source
34
+ @n = source.length
35
+ @i = 0
36
+ @partial = false
37
+ @depth = 0
38
+ end
39
+
40
+ def parse = value
41
+
42
+ private
43
+
44
+ def value
45
+ skip_ws
46
+ return truncated if eof?
47
+
48
+ case @s[@i]
49
+ when '"' then string
50
+ when "{" then nested { object }
51
+ when "[" then nested { array }
52
+ else scalar
53
+ end
54
+ end
55
+
56
+ def nested
57
+ return truncated if @depth >= MAX_DEPTH
58
+
59
+ @depth += 1
60
+ begin
61
+ yield
62
+ ensure
63
+ @depth -= 1
64
+ end
65
+ end
66
+
67
+ def object
68
+ @i += 1
69
+ obj = {}
70
+ until @partial
71
+ skip_ws
72
+ break truncated if eof?
73
+ break @i += 1 if @s[@i] == "}"
74
+ break unless @s[@i] == '"'
75
+
76
+ key, val = pair
77
+ obj[key] = val unless key.equal?(NOTHING) || val.equal?(NOTHING)
78
+ skip_ws
79
+ @i += 1 if !eof? && @s[@i] == ","
80
+ end
81
+ obj
82
+ end
83
+
84
+ # One key/value. A truncation before the value completes the key's
85
+ # last-known state: mid-key or mid-separator drops the pair entirely.
86
+ def pair
87
+ key = string
88
+ return [NOTHING, NOTHING] if @partial
89
+
90
+ skip_ws
91
+ return [NOTHING, truncated] if eof?
92
+ return [NOTHING, NOTHING] unless @s[@i] == ":"
93
+
94
+ @i += 1
95
+ [key, value]
96
+ end
97
+
98
+ def array
99
+ @i += 1
100
+ arr = []
101
+ until @partial
102
+ skip_ws
103
+ break truncated if eof?
104
+ break @i += 1 if @s[@i] == "]"
105
+
106
+ element = value
107
+ arr << element unless element.equal?(NOTHING)
108
+ skip_ws
109
+ @i += 1 if !eof? && @s[@i] == ","
110
+ end
111
+ arr
112
+ end
113
+
114
+ def string
115
+ start = @i
116
+ @i += 1
117
+ escaped = false
118
+ while @i < @n
119
+ case
120
+ when escaped then escaped = false
121
+ when @s[@i] == "\\" then escaped = true
122
+ when @s[@i] == '"'
123
+ @i += 1
124
+ return decode(@s[start...@i])
125
+ end
126
+ @i += 1
127
+ end
128
+ truncated
129
+ salvage_string(@s[start..])
130
+ end
131
+
132
+ # Close an unterminated string, first shedding a half-written escape: a
133
+ # partial \uXXXX, or a lone trailing backslash. A backslash is only
134
+ # dangling when the trailing run of them is odd; an even run is complete
135
+ # escaped backslashes and must be kept.
136
+ def salvage_string(fragment)
137
+ candidate = fragment.sub(/\\u[0-9a-fA-F]{0,3}\z/, "")
138
+ trailing = candidate[/\\+\z/]
139
+ candidate = candidate[0..-2] if trailing&.length&.odd?
140
+ decode(%(#{candidate}"))
141
+ end
142
+
143
+ def scalar
144
+ start = @i
145
+ @i += 1 while @i < @n && !"},] \n\r\t".include?(@s[@i])
146
+ token = @s[start...@i]
147
+ # A structural character in value position: consume it so the caller's
148
+ # loop always makes progress.
149
+ return (@i += 1) && NOTHING if token.empty?
150
+
151
+ truncated if eof?
152
+ literal(token) { number(token) }
153
+ end
154
+
155
+ def literal(token)
156
+ return LITERALS[token] if LITERALS.key?(token)
157
+
158
+ if @partial
159
+ match = LITERALS.keys.find { |word| word.start_with?(token) }
160
+ return LITERALS[match] if match
161
+ end
162
+ yield
163
+ end
164
+
165
+ def number(token)
166
+ Integer(token)
167
+ rescue ArgumentError
168
+ begin
169
+ finite(Float(token))
170
+ rescue ArgumentError
171
+ trimmed_number(token)
172
+ end
173
+ end
174
+
175
+ # A number cut mid-token: shed the dangling exponent, decimal point, or
176
+ # bare minus and retry.
177
+ def trimmed_number(token)
178
+ trimmed = token.sub(/[eE][+-]?\z/, "").sub(/\.\z/, "")
179
+ return NOTHING if trimmed.empty? || trimmed == "-"
180
+
181
+ finite(Float(trimmed))
182
+ rescue ArgumentError
183
+ NOTHING
184
+ end
185
+
186
+ # A model that emits 1e999 yields Float::INFINITY, which JSON cannot
187
+ # generate, so it would crash replay and persistence. Drop it.
188
+ def finite(number)
189
+ number.finite? ? number : NOTHING
190
+ end
191
+
192
+ def decode(json_string)
193
+ JSON.parse(json_string)
194
+ rescue JSON::ParserError
195
+ NOTHING
196
+ end
197
+
198
+ def skip_ws
199
+ @i += 1 while @i < @n && " \n\r\t".include?(@s[@i])
200
+ end
201
+
202
+ def eof? = @i >= @n
203
+
204
+ def truncated
205
+ @partial = true
206
+ NOTHING
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ class Anthropic
6
+ # Folds the Messages API stream into the event union, building the
7
+ # assistant message block by block. Every emitted event carries an
8
+ # immutable snapshot of the message so far; in-flight tool arguments
9
+ # parse via PartialJson so consumers can read them mid-stream.
10
+ #
11
+ # Unknown event and block types are skipped by contract: the API adds
12
+ # types over time and a live stream must survive them.
13
+ class Assembler
14
+ def initialize(model:)
15
+ @model = model
16
+ @blocks = []
17
+ @current = nil
18
+ @usage = Usage.zero
19
+ @stop_reason = nil
20
+ @done = false
21
+ end
22
+
23
+ def feed(record, &)
24
+ case record["type"]
25
+ when "message_start" then @usage = parse_usage(record.dig("message", "usage"))
26
+ when "content_block_start" then start_block(record, &)
27
+ when "content_block_delta" then delta_block(record, &)
28
+ when "content_block_stop" then stop_block(record, &)
29
+ when "message_delta" then message_delta(record)
30
+ when "message_stop" then @done = true
31
+ when "error" then @error = wire_error(record["error"])
32
+ end
33
+ end
34
+
35
+ # Close the stream: the terminal event reflects how it ended. A stream
36
+ # that ended without message_stop was truncated (a dropped proxy, say),
37
+ # not user-aborted, so it fails for the loop to retry rather than
38
+ # reading as a cancellation.
39
+ def finish(&emit)
40
+ return fail_stream(@error, &emit) if @error
41
+ return fail_stream("stream ended without message_stop", &emit) unless @done
42
+
43
+ @message = assemble(stop_reason: @stop_reason || StopReason::STOP)
44
+ emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
45
+ @message
46
+ end
47
+
48
+ def abort(&emit)
49
+ finalize_current
50
+ @message = assemble(stop_reason: StopReason::ABORTED, error_message: "aborted")
51
+ emit&.call(Event.new(type: :error, reason: StopReason::ABORTED, message: @message,
52
+ error_message: "aborted"))
53
+ @message
54
+ end
55
+
56
+ # In-stream failures carry a wire type; overloaded ones must classify
57
+ # as retryable, not fold into prose.
58
+ def wire_error(payload)
59
+ message = payload&.dig("message") || "provider error"
60
+ klass = payload&.dig("type").to_s.include?("overloaded") ? OverloadedError : ProviderError
61
+ klass.new(message)
62
+ end
63
+
64
+ def fail_stream(reason, &emit)
65
+ finalize_current
66
+ text = case reason
67
+ when ProviderError then "#{reason.class}: #{reason.describe}"
68
+ when Exception then "#{reason.class}: #{reason.message}"
69
+ else reason.to_s
70
+ end
71
+ @message = assemble(stop_reason: StopReason::ERROR, error_message: text,
72
+ error: ErrorData.for(reason))
73
+ emit&.call(Event.new(type: :error, reason: StopReason::ERROR, message: @message,
74
+ error_message: text))
75
+ @message
76
+ end
77
+
78
+ def message = @message ||= finish
79
+
80
+ Builder = Struct.new(:kind, :index, :text, :json, :signature, :id, :name, :redacted)
81
+
82
+ private
83
+
84
+ def start_block(record, &)
85
+ block = record["content_block"] || {}
86
+ kind = { "text" => :text, "thinking" => :thinking, "redacted_thinking" => :thinking,
87
+ "tool_use" => :toolcall }[block["type"]]
88
+ return unless kind
89
+
90
+ @current = Builder.new(kind, @blocks.size, +"", +"", nil,
91
+ block["id"], block["name"], block["type"] == "redacted_thinking")
92
+ @current.signature = block["data"] if @current.redacted
93
+ emit_event(:"#{kind}_start", content_index: @current.index, &)
94
+ end
95
+
96
+ def delta_block(record, &)
97
+ return unless @current
98
+
99
+ delta = record["delta"] || {}
100
+ case delta["type"]
101
+ when "text_delta" then text_delta(delta["text"], &)
102
+ when "thinking_delta" then thinking_delta(delta["thinking"], &)
103
+ when "signature_delta"
104
+ @current.signature = "#{@current.signature}#{delta["signature"]}"
105
+ when "input_json_delta" then input_delta(delta["partial_json"], &)
106
+ end
107
+ end
108
+
109
+ def text_delta(text, &)
110
+ @current.text << text.to_s
111
+ emit_event(:text_delta, content_index: @current.index, delta: text, &)
112
+ end
113
+
114
+ def thinking_delta(text, &)
115
+ @current.text << text.to_s
116
+ emit_event(:thinking_delta, content_index: @current.index, delta: text, &)
117
+ end
118
+
119
+ def input_delta(fragment, &)
120
+ @current.json << fragment.to_s
121
+ emit_event(:toolcall_delta, content_index: @current.index, delta: fragment, &)
122
+ end
123
+
124
+ def stop_block(_record, &)
125
+ return unless @current
126
+
127
+ block = finalize_current
128
+ kind = block.is_a?(ToolCall) ? :toolcall : block.type
129
+ fields = { content_index: @blocks.size - 1 }
130
+ fields[:tool_call] = block if block.is_a?(ToolCall)
131
+ fields[:content] = @blocks.last.is_a?(ToolCall) ? nil : builder_text(block)
132
+ emit_event(:"#{kind}_end", **fields.compact, &)
133
+ end
134
+
135
+ def message_delta(record)
136
+ reason = record.dig("delta", "stop_reason")
137
+ @stop_reason = map_stop_reason(reason) if reason
138
+ # message_delta usage is cumulative; merge output counts over the
139
+ # opening snapshot rather than summing.
140
+ output = record.dig("usage", "output_tokens")
141
+ @usage = @usage.with(output: output.to_i) if output
142
+ end
143
+
144
+ def finalize_current
145
+ return unless @current
146
+
147
+ built = build_block(@current)
148
+ @blocks << built
149
+ @current = nil
150
+ built
151
+ end
152
+
153
+ def build_block(builder)
154
+ case builder.kind
155
+ when :text then Content::Text.new(text: builder.text)
156
+ when :thinking
157
+ Content::Thinking.new(thinking: builder.text, signature: builder.signature,
158
+ redacted: builder.redacted)
159
+ when :toolcall
160
+ ToolCall.new(id: builder.id, name: builder.name,
161
+ arguments: parsed_arguments(builder.json), signature: nil)
162
+ end
163
+ end
164
+
165
+ def parsed_arguments(json)
166
+ parsed = json.strip.empty? ? {} : PartialJson.parse(json)
167
+ parsed.is_a?(Hash) ? parsed : {}
168
+ end
169
+
170
+ def builder_text(block)
171
+ block.respond_to?(:text) ? block.text : block.thinking
172
+ end
173
+
174
+ def emit_event(type, **fields, &emit)
175
+ emit&.call(Event.new(type:, partial: assemble, **fields))
176
+ end
177
+
178
+ def assemble(**meta)
179
+ blocks = @blocks.dup
180
+ blocks << build_block(@current) if @current
181
+ Message.assistant(content: blocks, model: @model, provider: :anthropic,
182
+ usage: @usage, **meta)
183
+ end
184
+
185
+ # pause_turn (a server tool paused a long turn) maps to tool_use so the
186
+ # loop continues the turn rather than ending it.
187
+ def map_stop_reason(reason)
188
+ { "end_turn" => StopReason::STOP, "stop_sequence" => StopReason::STOP,
189
+ "max_tokens" => StopReason::LENGTH, "tool_use" => StopReason::TOOL_USE,
190
+ "pause_turn" => StopReason::TOOL_USE }.fetch(reason, StopReason::STOP)
191
+ end
192
+
193
+ def parse_usage(raw)
194
+ return Usage.zero unless raw
195
+
196
+ cache_creation = raw["cache_creation"] || {}
197
+ Usage.new(input: raw["input_tokens"].to_i, output: raw["output_tokens"].to_i,
198
+ cache_read: raw["cache_read_input_tokens"].to_i,
199
+ cache_write: raw["cache_creation_input_tokens"].to_i,
200
+ cache_write_1h: cache_creation["ephemeral_1h_input_tokens"].to_i)
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ class Anthropic
6
+ # Serializes protocol messages into Anthropic Messages API wire shapes.
7
+ #
8
+ # Wire rules that matter: consecutive tool results merge into one user
9
+ # turn (parallel tool calls demand their results together), thinking
10
+ # blocks replay with their signature and must never be altered, redacted
11
+ # thinking replays as its opaque payload, and cache_control marks the
12
+ # last system block and the last user message so the stable prefix and
13
+ # the growing history both cache.
14
+ module Serializer
15
+ module_function
16
+
17
+ def system_blocks(system, cache:)
18
+ return nil if system.nil? || system.empty?
19
+
20
+ blocks = [{ type: "text", text: system }]
21
+ blocks.last[:cache_control] = { type: "ephemeral" } if cache
22
+ blocks
23
+ end
24
+
25
+ def messages(history, cache: false)
26
+ turns = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
27
+ wire = turns.map do |group|
28
+ group.first.tool? ? tool_results(group) : message(group.first)
29
+ end
30
+ mark_last_user_turn(wire) if cache
31
+ wire
32
+ end
33
+
34
+ def tools(definitions)
35
+ definitions.map do |tool|
36
+ spec = tool.transform_keys(&:to_sym)
37
+ wire = { name: spec[:name], description: spec[:description],
38
+ input_schema: spec[:input_schema] }
39
+ wire[:eager_input_streaming] = true if spec[:eager_input_streaming]
40
+ wire
41
+ end
42
+ end
43
+
44
+ def message(msg)
45
+ { role: msg.role.to_s, content: msg.content.filter_map { |block| block(block) } }
46
+ end
47
+
48
+ def tool_results(group)
49
+ { role: "user", content: group.map do |msg|
50
+ blocks = msg.content.filter_map { |block| block(block) }
51
+ # The API rejects an empty tool_result; a space stands in for a
52
+ # tool that returned nothing.
53
+ blocks = [{ type: "text", text: " " }] if blocks.empty?
54
+ { type: "tool_result", tool_use_id: msg.tool_call_id, content: blocks }
55
+ end }
56
+ end
57
+
58
+ # Returns nil for a block the API would reject (empty text, unusable
59
+ # thinking), so callers filter_map it out.
60
+ def block(block)
61
+ case block
62
+ when Content::Text then text_block(block)
63
+ when Content::Thinking then thinking_block(block)
64
+ when Content::Image
65
+ { type: "image",
66
+ source: { type: "base64", media_type: block.mime_type, data: block.data } }
67
+ when ToolCall
68
+ { type: "tool_use", id: block.id, name: block.name, input: block.arguments }
69
+ else
70
+ raise SchemaError, "cannot serialize #{block.class} for Anthropic"
71
+ end
72
+ end
73
+
74
+ # The API rejects empty text content blocks.
75
+ def text_block(block)
76
+ block.text.empty? ? nil : { type: "text", text: block.text }
77
+ end
78
+
79
+ # Thinking replays only with its signature. Redacted thinking carries
80
+ # its opaque payload; a normal thinking block missing its signature
81
+ # (an aborted turn cut before signature_delta) cannot replay, so it
82
+ # degrades to its text, or drops when even that is empty.
83
+ def thinking_block(block)
84
+ return { type: "redacted_thinking", data: block.signature } if block.redacted?
85
+ if block.signature
86
+ return { type: "thinking", thinking: block.thinking,
87
+ signature: block.signature }
88
+ end
89
+
90
+ block.thinking.empty? ? nil : { type: "text", text: block.thinking }
91
+ end
92
+
93
+ def mark_last_user_turn(wire)
94
+ last_user = wire.rindex { |turn| turn[:role] == "user" }
95
+ return unless last_user
96
+
97
+ content = wire[last_user][:content]
98
+ return unless content.is_a?(Array) && content.any?
99
+
100
+ content.last[:cache_control] =
101
+ { type: "ephemeral" }
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ module Providers
5
+ # The Anthropic Messages API, streamed. Defaults target the current model
6
+ # generation: adaptive thinking with summarized display (so thinking
7
+ # streams for the UI), prompt caching on, 32k output headroom.
8
+ #
9
+ # Provider failures fold into the stream as an error turn rather than
10
+ # raising: the loop decides whether to retry, and the host always gets a
11
+ # message back.
12
+ class Anthropic
13
+ VERSION_HEADER = "2023-06-01"
14
+ DEFAULT_THINKING = { type: "adaptive", display: "summarized" }.freeze
15
+
16
+ # Messages API parameters passed through verbatim from a stream override.
17
+ PASSTHROUGH = %i[temperature top_p top_k stop_sequences metadata
18
+ tool_choice service_tier].freeze
19
+ # The ceiling for an uncatalogued model: high enough for headroom, low
20
+ # enough that every current model accepts it. Catalog a model to unlock
21
+ # its real output limit.
22
+ UNKNOWN_MODEL_MAX_TOKENS = 64_000
23
+
24
+ def initialize(api_key:, model: "claude-opus-4-8", origin: "https://api.anthropic.com",
25
+ max_tokens: nil, thinking: DEFAULT_THINKING, cache: true,
26
+ **transport_options)
27
+ @api_key = api_key
28
+ @model = model
29
+ @max_tokens = max_tokens
30
+ @thinking = thinking
31
+ @cache = cache
32
+ @transport = Transport.new(origin: origin, **transport_options)
33
+ end
34
+
35
+ attr_reader :model
36
+
37
+ def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
38
+ model = overrides.fetch(:model, @model)
39
+ assembler = Anthropic::Assembler.new(model: model)
40
+ body = build_body(model, messages, system, tools, overrides)
41
+ outcome = @transport.stream_post("/v1/messages", body: body, headers: headers,
42
+ signal: signal) do |record|
43
+ assembler.feed(
44
+ record, &emit
45
+ )
46
+ end
47
+ outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
48
+ rescue Error => e
49
+ assembler.fail_stream(e, &emit)
50
+ end
51
+
52
+ def close = @transport.close
53
+
54
+ private
55
+
56
+ def build_body(model, messages, system, tools, overrides)
57
+ body = {
58
+ model: model,
59
+ max_tokens: max_tokens_for(model, overrides),
60
+ stream: true,
61
+ messages: Serializer.messages(messages, cache: @cache)
62
+ }
63
+ system_blocks = Serializer.system_blocks(system, cache: @cache)
64
+ body[:system] = system_blocks if system_blocks
65
+ body[:tools] = Serializer.tools(tools) if tools.any?
66
+ thinking = thinking_for(model, overrides)
67
+ body[:thinking] = thinking if thinking
68
+ if (schema = overrides[:output_schema])
69
+ body[:output_config] = { format: { type: "json_schema",
70
+ schema: Schema.strict(schema) } }
71
+ end
72
+ body.merge(PASSTHROUGH.each_with_object({}) do |key, params|
73
+ params[key] = overrides[key] if overrides.key?(key)
74
+ end)
75
+ end
76
+
77
+ # Adaptive thinking 400s on budget-only models like Haiku 4.5, so the
78
+ # adaptive default is dropped for a model the catalog marks :budget; a
79
+ # host that wants thinking there passes an explicit budget config. An
80
+ # unknown model keeps the default, since new models are adaptive.
81
+ def thinking_for(model, overrides)
82
+ thinking = overrides.fetch(:thinking, @thinking)
83
+ return thinking unless thinking && thinking[:type] == "adaptive"
84
+ return nil if Models.thinking(model) == :budget
85
+
86
+ thinking
87
+ end
88
+
89
+ # The API requires max_tokens and bills only actual output, so the
90
+ # default is the model's own catalogued ceiling: full headroom, no
91
+ # silent truncation. An uncatalogued model falls back safely.
92
+ def max_tokens_for(model, overrides)
93
+ overrides.fetch(:max_tokens) do
94
+ @max_tokens || Models.max_output(model) || UNKNOWN_MODEL_MAX_TOKENS
95
+ end
96
+ end
97
+
98
+ def headers
99
+ { "x-api-key" => @api_key, "anthropic-version" => VERSION_HEADER }
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ require_relative "anthropic/serializer"
106
+ require_relative "anthropic/assembler"