mistri 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +162 -0
- data/README.md +314 -3
- data/lib/generators/mistri/install/install_generator.rb +54 -0
- data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
- data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
- data/lib/mistri/abort_signal.rb +63 -0
- data/lib/mistri/agent.rb +340 -0
- data/lib/mistri/budget.rb +29 -0
- data/lib/mistri/compaction.rb +78 -0
- data/lib/mistri/compactor.rb +182 -0
- data/lib/mistri/content.rb +89 -0
- data/lib/mistri/edit.rb +238 -0
- data/lib/mistri/errors.rb +94 -0
- data/lib/mistri/event.rb +50 -0
- data/lib/mistri/memory.rb +26 -0
- data/lib/mistri/message.rb +90 -0
- data/lib/mistri/models.rb +43 -0
- data/lib/mistri/partial_json.rb +210 -0
- data/lib/mistri/providers/anthropic/assembler.rb +205 -0
- data/lib/mistri/providers/anthropic/serializer.rb +106 -0
- data/lib/mistri/providers/anthropic.rb +106 -0
- data/lib/mistri/providers/fake.rb +109 -0
- data/lib/mistri/providers/gemini/assembler.rb +163 -0
- data/lib/mistri/providers/gemini/serializer.rb +109 -0
- data/lib/mistri/providers/gemini.rb +73 -0
- data/lib/mistri/providers/openai/assembler.rb +205 -0
- data/lib/mistri/providers/openai/serializer.rb +104 -0
- data/lib/mistri/providers/openai.rb +72 -0
- data/lib/mistri/result.rb +30 -0
- data/lib/mistri/retry_policy.rb +47 -0
- data/lib/mistri/schema.rb +162 -0
- data/lib/mistri/session.rb +124 -0
- data/lib/mistri/sinks/action_cable.rb +30 -0
- data/lib/mistri/sinks/coalesced.rb +61 -0
- data/lib/mistri/sinks/sse.rb +26 -0
- data/lib/mistri/skill.rb +15 -0
- data/lib/mistri/skills.rb +81 -0
- data/lib/mistri/sse.rb +50 -0
- data/lib/mistri/stop_reason.rb +25 -0
- data/lib/mistri/stores/active_record.rb +47 -0
- data/lib/mistri/stores/jsonl.rb +37 -0
- data/lib/mistri/stores/memory.rb +22 -0
- data/lib/mistri/sub_agent.rb +211 -0
- data/lib/mistri/tool.rb +94 -0
- data/lib/mistri/tool_call.rb +18 -0
- data/lib/mistri/tool_context.rb +15 -0
- data/lib/mistri/tool_executor.rb +66 -0
- data/lib/mistri/tool_result.rb +23 -0
- data/lib/mistri/tools/edit_file.rb +37 -0
- data/lib/mistri/tools/find_in_file.rb +36 -0
- data/lib/mistri/tools/list_files.rb +16 -0
- data/lib/mistri/tools/read_file.rb +38 -0
- data/lib/mistri/tools/read_memory.rb +16 -0
- data/lib/mistri/tools/update_memory.rb +22 -0
- data/lib/mistri/tools/write_file.rb +20 -0
- data/lib/mistri/tools.rb +50 -0
- data/lib/mistri/transport.rb +187 -0
- data/lib/mistri/usage.rb +79 -0
- data/lib/mistri/version.rb +1 -1
- data/lib/mistri/workspace/active_record.rb +47 -0
- data/lib/mistri/workspace/directory.rb +52 -0
- data/lib/mistri/workspace/memory.rb +40 -0
- data/lib/mistri/workspace/single.rb +48 -0
- data/lib/mistri.rb +87 -0
- metadata +68 -5
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
# Parses the JSON prefix a model has emitted so far, so in-flight tool-call
|
|
7
|
+
# arguments are readable before the closing brace arrives. Best effort by
|
|
8
|
+
# contract: never raises, drops a dangling key or half-written token, and
|
|
9
|
+
# returns {} for hopeless input.
|
|
10
|
+
module PartialJson
|
|
11
|
+
def self.parse(text)
|
|
12
|
+
s = text.to_s.strip
|
|
13
|
+
return {} if s.empty?
|
|
14
|
+
|
|
15
|
+
value = Parser.new(s).parse
|
|
16
|
+
value.equal?(Parser::NOTHING) ? {} : value
|
|
17
|
+
rescue StandardError, SystemStackError
|
|
18
|
+
{}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Recursive descent over the prefix. Truncation trips @partial, and every
|
|
22
|
+
# frame unwinds keeping the structure built so far.
|
|
23
|
+
class Parser
|
|
24
|
+
NOTHING = Object.new
|
|
25
|
+
LITERALS = { "true" => true, "false" => false, "null" => nil }.freeze
|
|
26
|
+
|
|
27
|
+
# Nesting past this is treated as truncated: a model's real tool
|
|
28
|
+
# arguments never nest this deep, and the cap keeps a pathological input
|
|
29
|
+
# from overflowing the stack.
|
|
30
|
+
MAX_DEPTH = 256
|
|
31
|
+
|
|
32
|
+
def initialize(source)
|
|
33
|
+
@s = source
|
|
34
|
+
@n = source.length
|
|
35
|
+
@i = 0
|
|
36
|
+
@partial = false
|
|
37
|
+
@depth = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def parse = value
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def value
|
|
45
|
+
skip_ws
|
|
46
|
+
return truncated if eof?
|
|
47
|
+
|
|
48
|
+
case @s[@i]
|
|
49
|
+
when '"' then string
|
|
50
|
+
when "{" then nested { object }
|
|
51
|
+
when "[" then nested { array }
|
|
52
|
+
else scalar
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def nested
|
|
57
|
+
return truncated if @depth >= MAX_DEPTH
|
|
58
|
+
|
|
59
|
+
@depth += 1
|
|
60
|
+
begin
|
|
61
|
+
yield
|
|
62
|
+
ensure
|
|
63
|
+
@depth -= 1
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def object
|
|
68
|
+
@i += 1
|
|
69
|
+
obj = {}
|
|
70
|
+
until @partial
|
|
71
|
+
skip_ws
|
|
72
|
+
break truncated if eof?
|
|
73
|
+
break @i += 1 if @s[@i] == "}"
|
|
74
|
+
break unless @s[@i] == '"'
|
|
75
|
+
|
|
76
|
+
key, val = pair
|
|
77
|
+
obj[key] = val unless key.equal?(NOTHING) || val.equal?(NOTHING)
|
|
78
|
+
skip_ws
|
|
79
|
+
@i += 1 if !eof? && @s[@i] == ","
|
|
80
|
+
end
|
|
81
|
+
obj
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# One key/value. A truncation before the value completes the key's
|
|
85
|
+
# last-known state: mid-key or mid-separator drops the pair entirely.
|
|
86
|
+
def pair
|
|
87
|
+
key = string
|
|
88
|
+
return [NOTHING, NOTHING] if @partial
|
|
89
|
+
|
|
90
|
+
skip_ws
|
|
91
|
+
return [NOTHING, truncated] if eof?
|
|
92
|
+
return [NOTHING, NOTHING] unless @s[@i] == ":"
|
|
93
|
+
|
|
94
|
+
@i += 1
|
|
95
|
+
[key, value]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def array
|
|
99
|
+
@i += 1
|
|
100
|
+
arr = []
|
|
101
|
+
until @partial
|
|
102
|
+
skip_ws
|
|
103
|
+
break truncated if eof?
|
|
104
|
+
break @i += 1 if @s[@i] == "]"
|
|
105
|
+
|
|
106
|
+
element = value
|
|
107
|
+
arr << element unless element.equal?(NOTHING)
|
|
108
|
+
skip_ws
|
|
109
|
+
@i += 1 if !eof? && @s[@i] == ","
|
|
110
|
+
end
|
|
111
|
+
arr
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def string
|
|
115
|
+
start = @i
|
|
116
|
+
@i += 1
|
|
117
|
+
escaped = false
|
|
118
|
+
while @i < @n
|
|
119
|
+
case
|
|
120
|
+
when escaped then escaped = false
|
|
121
|
+
when @s[@i] == "\\" then escaped = true
|
|
122
|
+
when @s[@i] == '"'
|
|
123
|
+
@i += 1
|
|
124
|
+
return decode(@s[start...@i])
|
|
125
|
+
end
|
|
126
|
+
@i += 1
|
|
127
|
+
end
|
|
128
|
+
truncated
|
|
129
|
+
salvage_string(@s[start..])
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Close an unterminated string, first shedding a half-written escape: a
|
|
133
|
+
# partial \uXXXX, or a lone trailing backslash. A backslash is only
|
|
134
|
+
# dangling when the trailing run of them is odd; an even run is complete
|
|
135
|
+
# escaped backslashes and must be kept.
|
|
136
|
+
def salvage_string(fragment)
|
|
137
|
+
candidate = fragment.sub(/\\u[0-9a-fA-F]{0,3}\z/, "")
|
|
138
|
+
trailing = candidate[/\\+\z/]
|
|
139
|
+
candidate = candidate[0..-2] if trailing&.length&.odd?
|
|
140
|
+
decode(%(#{candidate}"))
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def scalar
|
|
144
|
+
start = @i
|
|
145
|
+
@i += 1 while @i < @n && !"},] \n\r\t".include?(@s[@i])
|
|
146
|
+
token = @s[start...@i]
|
|
147
|
+
# A structural character in value position: consume it so the caller's
|
|
148
|
+
# loop always makes progress.
|
|
149
|
+
return (@i += 1) && NOTHING if token.empty?
|
|
150
|
+
|
|
151
|
+
truncated if eof?
|
|
152
|
+
literal(token) { number(token) }
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def literal(token)
|
|
156
|
+
return LITERALS[token] if LITERALS.key?(token)
|
|
157
|
+
|
|
158
|
+
if @partial
|
|
159
|
+
match = LITERALS.keys.find { |word| word.start_with?(token) }
|
|
160
|
+
return LITERALS[match] if match
|
|
161
|
+
end
|
|
162
|
+
yield
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def number(token)
|
|
166
|
+
Integer(token)
|
|
167
|
+
rescue ArgumentError
|
|
168
|
+
begin
|
|
169
|
+
finite(Float(token))
|
|
170
|
+
rescue ArgumentError
|
|
171
|
+
trimmed_number(token)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# A number cut mid-token: shed the dangling exponent, decimal point, or
|
|
176
|
+
# bare minus and retry.
|
|
177
|
+
def trimmed_number(token)
|
|
178
|
+
trimmed = token.sub(/[eE][+-]?\z/, "").sub(/\.\z/, "")
|
|
179
|
+
return NOTHING if trimmed.empty? || trimmed == "-"
|
|
180
|
+
|
|
181
|
+
finite(Float(trimmed))
|
|
182
|
+
rescue ArgumentError
|
|
183
|
+
NOTHING
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# A model that emits 1e999 yields Float::INFINITY, which JSON cannot
|
|
187
|
+
# generate, so it would crash replay and persistence. Drop it.
|
|
188
|
+
def finite(number)
|
|
189
|
+
number.finite? ? number : NOTHING
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def decode(json_string)
|
|
193
|
+
JSON.parse(json_string)
|
|
194
|
+
rescue JSON::ParserError
|
|
195
|
+
NOTHING
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def skip_ws
|
|
199
|
+
@i += 1 while @i < @n && " \n\r\t".include?(@s[@i])
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def eof? = @i >= @n
|
|
203
|
+
|
|
204
|
+
def truncated
|
|
205
|
+
@partial = true
|
|
206
|
+
NOTHING
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
class Anthropic
|
|
6
|
+
# Folds the Messages API stream into the event union, building the
|
|
7
|
+
# assistant message block by block. Every emitted event carries an
|
|
8
|
+
# immutable snapshot of the message so far; in-flight tool arguments
|
|
9
|
+
# parse via PartialJson so consumers can read them mid-stream.
|
|
10
|
+
#
|
|
11
|
+
# Unknown event and block types are skipped by contract: the API adds
|
|
12
|
+
# types over time and a live stream must survive them.
|
|
13
|
+
class Assembler
|
|
14
|
+
def initialize(model:)
|
|
15
|
+
@model = model
|
|
16
|
+
@blocks = []
|
|
17
|
+
@current = nil
|
|
18
|
+
@usage = Usage.zero
|
|
19
|
+
@stop_reason = nil
|
|
20
|
+
@done = false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def feed(record, &)
|
|
24
|
+
case record["type"]
|
|
25
|
+
when "message_start" then @usage = parse_usage(record.dig("message", "usage"))
|
|
26
|
+
when "content_block_start" then start_block(record, &)
|
|
27
|
+
when "content_block_delta" then delta_block(record, &)
|
|
28
|
+
when "content_block_stop" then stop_block(record, &)
|
|
29
|
+
when "message_delta" then message_delta(record)
|
|
30
|
+
when "message_stop" then @done = true
|
|
31
|
+
when "error" then @error = wire_error(record["error"])
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Close the stream: the terminal event reflects how it ended. A stream
|
|
36
|
+
# that ended without message_stop was truncated (a dropped proxy, say),
|
|
37
|
+
# not user-aborted, so it fails for the loop to retry rather than
|
|
38
|
+
# reading as a cancellation.
|
|
39
|
+
def finish(&emit)
|
|
40
|
+
return fail_stream(@error, &emit) if @error
|
|
41
|
+
return fail_stream("stream ended without message_stop", &emit) unless @done
|
|
42
|
+
|
|
43
|
+
@message = assemble(stop_reason: @stop_reason || StopReason::STOP)
|
|
44
|
+
emit&.call(Event.new(type: :done, reason: @message.stop_reason, message: @message))
|
|
45
|
+
@message
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def abort(&emit)
|
|
49
|
+
finalize_current
|
|
50
|
+
@message = assemble(stop_reason: StopReason::ABORTED, error_message: "aborted")
|
|
51
|
+
emit&.call(Event.new(type: :error, reason: StopReason::ABORTED, message: @message,
|
|
52
|
+
error_message: "aborted"))
|
|
53
|
+
@message
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# In-stream failures carry a wire type; overloaded ones must classify
|
|
57
|
+
# as retryable, not fold into prose.
|
|
58
|
+
def wire_error(payload)
|
|
59
|
+
message = payload&.dig("message") || "provider error"
|
|
60
|
+
klass = payload&.dig("type").to_s.include?("overloaded") ? OverloadedError : ProviderError
|
|
61
|
+
klass.new(message)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def fail_stream(reason, &emit)
|
|
65
|
+
finalize_current
|
|
66
|
+
text = case reason
|
|
67
|
+
when ProviderError then "#{reason.class}: #{reason.describe}"
|
|
68
|
+
when Exception then "#{reason.class}: #{reason.message}"
|
|
69
|
+
else reason.to_s
|
|
70
|
+
end
|
|
71
|
+
@message = assemble(stop_reason: StopReason::ERROR, error_message: text,
|
|
72
|
+
error: ErrorData.for(reason))
|
|
73
|
+
emit&.call(Event.new(type: :error, reason: StopReason::ERROR, message: @message,
|
|
74
|
+
error_message: text))
|
|
75
|
+
@message
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def message = @message ||= finish
|
|
79
|
+
|
|
80
|
+
Builder = Struct.new(:kind, :index, :text, :json, :signature, :id, :name, :redacted)
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def start_block(record, &)
|
|
85
|
+
block = record["content_block"] || {}
|
|
86
|
+
kind = { "text" => :text, "thinking" => :thinking, "redacted_thinking" => :thinking,
|
|
87
|
+
"tool_use" => :toolcall }[block["type"]]
|
|
88
|
+
return unless kind
|
|
89
|
+
|
|
90
|
+
@current = Builder.new(kind, @blocks.size, +"", +"", nil,
|
|
91
|
+
block["id"], block["name"], block["type"] == "redacted_thinking")
|
|
92
|
+
@current.signature = block["data"] if @current.redacted
|
|
93
|
+
emit_event(:"#{kind}_start", content_index: @current.index, &)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def delta_block(record, &)
|
|
97
|
+
return unless @current
|
|
98
|
+
|
|
99
|
+
delta = record["delta"] || {}
|
|
100
|
+
case delta["type"]
|
|
101
|
+
when "text_delta" then text_delta(delta["text"], &)
|
|
102
|
+
when "thinking_delta" then thinking_delta(delta["thinking"], &)
|
|
103
|
+
when "signature_delta"
|
|
104
|
+
@current.signature = "#{@current.signature}#{delta["signature"]}"
|
|
105
|
+
when "input_json_delta" then input_delta(delta["partial_json"], &)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def text_delta(text, &)
|
|
110
|
+
@current.text << text.to_s
|
|
111
|
+
emit_event(:text_delta, content_index: @current.index, delta: text, &)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def thinking_delta(text, &)
|
|
115
|
+
@current.text << text.to_s
|
|
116
|
+
emit_event(:thinking_delta, content_index: @current.index, delta: text, &)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def input_delta(fragment, &)
|
|
120
|
+
@current.json << fragment.to_s
|
|
121
|
+
emit_event(:toolcall_delta, content_index: @current.index, delta: fragment, &)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def stop_block(_record, &)
|
|
125
|
+
return unless @current
|
|
126
|
+
|
|
127
|
+
block = finalize_current
|
|
128
|
+
kind = block.is_a?(ToolCall) ? :toolcall : block.type
|
|
129
|
+
fields = { content_index: @blocks.size - 1 }
|
|
130
|
+
fields[:tool_call] = block if block.is_a?(ToolCall)
|
|
131
|
+
fields[:content] = @blocks.last.is_a?(ToolCall) ? nil : builder_text(block)
|
|
132
|
+
emit_event(:"#{kind}_end", **fields.compact, &)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def message_delta(record)
|
|
136
|
+
reason = record.dig("delta", "stop_reason")
|
|
137
|
+
@stop_reason = map_stop_reason(reason) if reason
|
|
138
|
+
# message_delta usage is cumulative; merge output counts over the
|
|
139
|
+
# opening snapshot rather than summing.
|
|
140
|
+
output = record.dig("usage", "output_tokens")
|
|
141
|
+
@usage = @usage.with(output: output.to_i) if output
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def finalize_current
|
|
145
|
+
return unless @current
|
|
146
|
+
|
|
147
|
+
built = build_block(@current)
|
|
148
|
+
@blocks << built
|
|
149
|
+
@current = nil
|
|
150
|
+
built
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def build_block(builder)
|
|
154
|
+
case builder.kind
|
|
155
|
+
when :text then Content::Text.new(text: builder.text)
|
|
156
|
+
when :thinking
|
|
157
|
+
Content::Thinking.new(thinking: builder.text, signature: builder.signature,
|
|
158
|
+
redacted: builder.redacted)
|
|
159
|
+
when :toolcall
|
|
160
|
+
ToolCall.new(id: builder.id, name: builder.name,
|
|
161
|
+
arguments: parsed_arguments(builder.json), signature: nil)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def parsed_arguments(json)
|
|
166
|
+
parsed = json.strip.empty? ? {} : PartialJson.parse(json)
|
|
167
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def builder_text(block)
|
|
171
|
+
block.respond_to?(:text) ? block.text : block.thinking
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def emit_event(type, **fields, &emit)
|
|
175
|
+
emit&.call(Event.new(type:, partial: assemble, **fields))
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def assemble(**meta)
|
|
179
|
+
blocks = @blocks.dup
|
|
180
|
+
blocks << build_block(@current) if @current
|
|
181
|
+
Message.assistant(content: blocks, model: @model, provider: :anthropic,
|
|
182
|
+
usage: @usage, **meta)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# pause_turn (a server tool paused a long turn) maps to tool_use so the
|
|
186
|
+
# loop continues the turn rather than ending it.
|
|
187
|
+
def map_stop_reason(reason)
|
|
188
|
+
{ "end_turn" => StopReason::STOP, "stop_sequence" => StopReason::STOP,
|
|
189
|
+
"max_tokens" => StopReason::LENGTH, "tool_use" => StopReason::TOOL_USE,
|
|
190
|
+
"pause_turn" => StopReason::TOOL_USE }.fetch(reason, StopReason::STOP)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def parse_usage(raw)
|
|
194
|
+
return Usage.zero unless raw
|
|
195
|
+
|
|
196
|
+
cache_creation = raw["cache_creation"] || {}
|
|
197
|
+
Usage.new(input: raw["input_tokens"].to_i, output: raw["output_tokens"].to_i,
|
|
198
|
+
cache_read: raw["cache_read_input_tokens"].to_i,
|
|
199
|
+
cache_write: raw["cache_creation_input_tokens"].to_i,
|
|
200
|
+
cache_write_1h: cache_creation["ephemeral_1h_input_tokens"].to_i)
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
class Anthropic
|
|
6
|
+
# Serializes protocol messages into Anthropic Messages API wire shapes.
|
|
7
|
+
#
|
|
8
|
+
# Wire rules that matter: consecutive tool results merge into one user
|
|
9
|
+
# turn (parallel tool calls demand their results together), thinking
|
|
10
|
+
# blocks replay with their signature and must never be altered, redacted
|
|
11
|
+
# thinking replays as its opaque payload, and cache_control marks the
|
|
12
|
+
# last system block and the last user message so the stable prefix and
|
|
13
|
+
# the growing history both cache.
|
|
14
|
+
module Serializer
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def system_blocks(system, cache:)
|
|
18
|
+
return nil if system.nil? || system.empty?
|
|
19
|
+
|
|
20
|
+
blocks = [{ type: "text", text: system }]
|
|
21
|
+
blocks.last[:cache_control] = { type: "ephemeral" } if cache
|
|
22
|
+
blocks
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def messages(history, cache: false)
|
|
26
|
+
turns = history.reject(&:system?).chunk_while { |a, b| a.tool? && b.tool? }
|
|
27
|
+
wire = turns.map do |group|
|
|
28
|
+
group.first.tool? ? tool_results(group) : message(group.first)
|
|
29
|
+
end
|
|
30
|
+
mark_last_user_turn(wire) if cache
|
|
31
|
+
wire
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def tools(definitions)
|
|
35
|
+
definitions.map do |tool|
|
|
36
|
+
spec = tool.transform_keys(&:to_sym)
|
|
37
|
+
wire = { name: spec[:name], description: spec[:description],
|
|
38
|
+
input_schema: spec[:input_schema] }
|
|
39
|
+
wire[:eager_input_streaming] = true if spec[:eager_input_streaming]
|
|
40
|
+
wire
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def message(msg)
|
|
45
|
+
{ role: msg.role.to_s, content: msg.content.filter_map { |block| block(block) } }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def tool_results(group)
|
|
49
|
+
{ role: "user", content: group.map do |msg|
|
|
50
|
+
blocks = msg.content.filter_map { |block| block(block) }
|
|
51
|
+
# The API rejects an empty tool_result; a space stands in for a
|
|
52
|
+
# tool that returned nothing.
|
|
53
|
+
blocks = [{ type: "text", text: " " }] if blocks.empty?
|
|
54
|
+
{ type: "tool_result", tool_use_id: msg.tool_call_id, content: blocks }
|
|
55
|
+
end }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns nil for a block the API would reject (empty text, unusable
|
|
59
|
+
# thinking), so callers filter_map it out.
|
|
60
|
+
def block(block)
|
|
61
|
+
case block
|
|
62
|
+
when Content::Text then text_block(block)
|
|
63
|
+
when Content::Thinking then thinking_block(block)
|
|
64
|
+
when Content::Image
|
|
65
|
+
{ type: "image",
|
|
66
|
+
source: { type: "base64", media_type: block.mime_type, data: block.data } }
|
|
67
|
+
when ToolCall
|
|
68
|
+
{ type: "tool_use", id: block.id, name: block.name, input: block.arguments }
|
|
69
|
+
else
|
|
70
|
+
raise SchemaError, "cannot serialize #{block.class} for Anthropic"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# The API rejects empty text content blocks.
|
|
75
|
+
def text_block(block)
|
|
76
|
+
block.text.empty? ? nil : { type: "text", text: block.text }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Thinking replays only with its signature. Redacted thinking carries
|
|
80
|
+
# its opaque payload; a normal thinking block missing its signature
|
|
81
|
+
# (an aborted turn cut before signature_delta) cannot replay, so it
|
|
82
|
+
# degrades to its text, or drops when even that is empty.
|
|
83
|
+
def thinking_block(block)
|
|
84
|
+
return { type: "redacted_thinking", data: block.signature } if block.redacted?
|
|
85
|
+
if block.signature
|
|
86
|
+
return { type: "thinking", thinking: block.thinking,
|
|
87
|
+
signature: block.signature }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
block.thinking.empty? ? nil : { type: "text", text: block.thinking }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def mark_last_user_turn(wire)
|
|
94
|
+
last_user = wire.rindex { |turn| turn[:role] == "user" }
|
|
95
|
+
return unless last_user
|
|
96
|
+
|
|
97
|
+
content = wire[last_user][:content]
|
|
98
|
+
return unless content.is_a?(Array) && content.any?
|
|
99
|
+
|
|
100
|
+
content.last[:cache_control] =
|
|
101
|
+
{ type: "ephemeral" }
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
module Providers
|
|
5
|
+
# The Anthropic Messages API, streamed. Defaults target the current model
|
|
6
|
+
# generation: adaptive thinking with summarized display (so thinking
|
|
7
|
+
# streams for the UI), prompt caching on, 32k output headroom.
|
|
8
|
+
#
|
|
9
|
+
# Provider failures fold into the stream as an error turn rather than
|
|
10
|
+
# raising: the loop decides whether to retry, and the host always gets a
|
|
11
|
+
# message back.
|
|
12
|
+
class Anthropic
|
|
13
|
+
VERSION_HEADER = "2023-06-01"
|
|
14
|
+
DEFAULT_THINKING = { type: "adaptive", display: "summarized" }.freeze
|
|
15
|
+
|
|
16
|
+
# Messages API parameters passed through verbatim from a stream override.
|
|
17
|
+
PASSTHROUGH = %i[temperature top_p top_k stop_sequences metadata
|
|
18
|
+
tool_choice service_tier].freeze
|
|
19
|
+
# The ceiling for an uncatalogued model: high enough for headroom, low
|
|
20
|
+
# enough that every current model accepts it. Catalog a model to unlock
|
|
21
|
+
# its real output limit.
|
|
22
|
+
UNKNOWN_MODEL_MAX_TOKENS = 64_000
|
|
23
|
+
|
|
24
|
+
def initialize(api_key:, model: "claude-opus-4-8", origin: "https://api.anthropic.com",
|
|
25
|
+
max_tokens: nil, thinking: DEFAULT_THINKING, cache: true,
|
|
26
|
+
**transport_options)
|
|
27
|
+
@api_key = api_key
|
|
28
|
+
@model = model
|
|
29
|
+
@max_tokens = max_tokens
|
|
30
|
+
@thinking = thinking
|
|
31
|
+
@cache = cache
|
|
32
|
+
@transport = Transport.new(origin: origin, **transport_options)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
attr_reader :model
|
|
36
|
+
|
|
37
|
+
def stream(messages:, system: nil, tools: [], signal: nil, **overrides, &emit)
|
|
38
|
+
model = overrides.fetch(:model, @model)
|
|
39
|
+
assembler = Anthropic::Assembler.new(model: model)
|
|
40
|
+
body = build_body(model, messages, system, tools, overrides)
|
|
41
|
+
outcome = @transport.stream_post("/v1/messages", body: body, headers: headers,
|
|
42
|
+
signal: signal) do |record|
|
|
43
|
+
assembler.feed(
|
|
44
|
+
record, &emit
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
outcome == :aborted ? assembler.abort(&emit) : assembler.finish(&emit)
|
|
48
|
+
rescue Error => e
|
|
49
|
+
assembler.fail_stream(e, &emit)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def close = @transport.close
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def build_body(model, messages, system, tools, overrides)
|
|
57
|
+
body = {
|
|
58
|
+
model: model,
|
|
59
|
+
max_tokens: max_tokens_for(model, overrides),
|
|
60
|
+
stream: true,
|
|
61
|
+
messages: Serializer.messages(messages, cache: @cache)
|
|
62
|
+
}
|
|
63
|
+
system_blocks = Serializer.system_blocks(system, cache: @cache)
|
|
64
|
+
body[:system] = system_blocks if system_blocks
|
|
65
|
+
body[:tools] = Serializer.tools(tools) if tools.any?
|
|
66
|
+
thinking = thinking_for(model, overrides)
|
|
67
|
+
body[:thinking] = thinking if thinking
|
|
68
|
+
if (schema = overrides[:output_schema])
|
|
69
|
+
body[:output_config] = { format: { type: "json_schema",
|
|
70
|
+
schema: Schema.strict(schema) } }
|
|
71
|
+
end
|
|
72
|
+
body.merge(PASSTHROUGH.each_with_object({}) do |key, params|
|
|
73
|
+
params[key] = overrides[key] if overrides.key?(key)
|
|
74
|
+
end)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Adaptive thinking 400s on budget-only models like Haiku 4.5, so the
|
|
78
|
+
# adaptive default is dropped for a model the catalog marks :budget; a
|
|
79
|
+
# host that wants thinking there passes an explicit budget config. An
|
|
80
|
+
# unknown model keeps the default, since new models are adaptive.
|
|
81
|
+
def thinking_for(model, overrides)
|
|
82
|
+
thinking = overrides.fetch(:thinking, @thinking)
|
|
83
|
+
return thinking unless thinking && thinking[:type] == "adaptive"
|
|
84
|
+
return nil if Models.thinking(model) == :budget
|
|
85
|
+
|
|
86
|
+
thinking
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# The API requires max_tokens and bills only actual output, so the
|
|
90
|
+
# default is the model's own catalogued ceiling: full headroom, no
|
|
91
|
+
# silent truncation. An uncatalogued model falls back safely.
|
|
92
|
+
def max_tokens_for(model, overrides)
|
|
93
|
+
overrides.fetch(:max_tokens) do
|
|
94
|
+
@max_tokens || Models.max_output(model) || UNKNOWN_MODEL_MAX_TOKENS
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def headers
|
|
99
|
+
{ "x-api-key" => @api_key, "anthropic-version" => VERSION_HEADER }
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
require_relative "anthropic/serializer"
|
|
106
|
+
require_relative "anthropic/assembler"
|