llms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +160 -0
  4. data/bin/llms-chat +6 -0
  5. data/bin/llms-test-model-access +4 -0
  6. data/bin/llms-test-model-image-support +4 -0
  7. data/bin/llms-test-model-prompt-caching +4 -0
  8. data/bin/llms-test-model-tool-use +5 -0
  9. data/lib/llms/adapters/anthropic_message_adapter.rb +73 -0
  10. data/lib/llms/adapters/anthropic_tool_call_adapter.rb +20 -0
  11. data/lib/llms/adapters/base_message_adapter.rb +60 -0
  12. data/lib/llms/adapters/google_gemini_message_adapter.rb +72 -0
  13. data/lib/llms/adapters/google_gemini_tool_call_adapter.rb +20 -0
  14. data/lib/llms/adapters/open_ai_compatible_message_adapter.rb +88 -0
  15. data/lib/llms/adapters/open_ai_compatible_tool_call_adapter.rb +67 -0
  16. data/lib/llms/adapters.rb +12 -0
  17. data/lib/llms/apis/google_gemini_api.rb +45 -0
  18. data/lib/llms/apis/open_ai_compatible_api.rb +54 -0
  19. data/lib/llms/cli/base.rb +186 -0
  20. data/lib/llms/cli/chat.rb +92 -0
  21. data/lib/llms/cli/test_access.rb +79 -0
  22. data/lib/llms/cli/test_image_support.rb +92 -0
  23. data/lib/llms/cli/test_prompt_caching.rb +275 -0
  24. data/lib/llms/cli/test_tool_use.rb +108 -0
  25. data/lib/llms/cli.rb +12 -0
  26. data/lib/llms/conversation.rb +100 -0
  27. data/lib/llms/conversation_message.rb +60 -0
  28. data/lib/llms/conversation_tool_call.rb +14 -0
  29. data/lib/llms/conversation_tool_result.rb +15 -0
  30. data/lib/llms/exceptions.rb +33 -0
  31. data/lib/llms/executors/anthropic_executor.rb +247 -0
  32. data/lib/llms/executors/base_executor.rb +144 -0
  33. data/lib/llms/executors/google_gemini_executor.rb +212 -0
  34. data/lib/llms/executors/hugging_face_executor.rb +17 -0
  35. data/lib/llms/executors/open_ai_compatible_executor.rb +209 -0
  36. data/lib/llms/executors.rb +52 -0
  37. data/lib/llms/models/model.rb +86 -0
  38. data/lib/llms/models/provider.rb +48 -0
  39. data/lib/llms/models.rb +187 -0
  40. data/lib/llms/parsers/anthropic_chat_response_stream_parser.rb +184 -0
  41. data/lib/llms/parsers/google_gemini_chat_response_stream_parser.rb +128 -0
  42. data/lib/llms/parsers/open_ai_compatible_chat_response_stream_parser.rb +170 -0
  43. data/lib/llms/parsers/partial_json_parser.rb +77 -0
  44. data/lib/llms/parsers/sse_chat_response_stream_parser.rb +72 -0
  45. data/lib/llms/public_models.json +607 -0
  46. data/lib/llms/stream/event_emitter.rb +48 -0
  47. data/lib/llms/stream/events.rb +104 -0
  48. data/lib/llms/usage/cost_calculator.rb +75 -0
  49. data/lib/llms/usage/usage_data.rb +46 -0
  50. data/lib/llms.rb +16 -0
  51. metadata +243 -0
@@ -0,0 +1,184 @@
1
+ require_relative '../stream/events'
2
+ require_relative './partial_json_parser'
3
+
4
+ module LLMs
5
+ module Parsers
6
+ class AnthropicChatResponseStreamParser
7
+ include PartialJsonParser
8
+
9
+ def initialize(emitter)
10
+ @emitter = emitter
11
+ @received_jsons = []
12
+
13
+ # Message metadata
14
+ @id = nil
15
+ @type = nil
16
+ @role = nil
17
+ @model = nil
18
+ @content = []
19
+ @stop_reason = nil
20
+ @stop_sequence = nil
21
+ @usage = nil
22
+ end
23
+
24
+ def full_response
25
+ {
26
+ 'id' => @id,
27
+ 'type' => @type,
28
+ 'role' => @role,
29
+ 'model' => @model,
30
+ 'content' => @content,
31
+ 'stop_reason' => @stop_reason,
32
+ 'stop_sequence' => @stop_sequence,
33
+ 'usage' => @usage
34
+ }
35
+ end
36
+
37
+ def handle_json(json)
38
+ @received_jsons << json
39
+
40
+ case json['type']
41
+ when 'message_start'
42
+ handle_message_start(json['message'])
43
+ when 'content_block_start'
44
+ handle_content_block_start(json)
45
+ when 'content_block_delta'
46
+ handle_content_block_delta(json)
47
+ when 'content_block_stop'
48
+ handle_content_block_stop(json)
49
+ when 'message_delta'
50
+ handle_message_delta(json)
51
+ when 'message_stop'
52
+ handle_message_stop
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def handle_message_start(message)
59
+ @id = message['id']
60
+ @type = message['type']
61
+ @role = message['role']
62
+ @model = message['model']
63
+ @content = message['content'].dup
64
+ @usage = message['usage'].dup
65
+ @emitter.emit(:message_started, Stream::Events::MessageStarted.new(@id))
66
+ @emitter.emit(:usage_updated, Stream::Events::UsageUpdated.new(@id, @usage))
67
+ end
68
+
69
+ def handle_content_block_start(json)
70
+ index = json['index']
71
+ block = json['content_block'].dup
72
+ @content[index] = block
73
+
74
+ if block['type'] == 'text' && block['text'] && !block['text'].empty?
75
+ ## May never happen, but just in case
76
+ @emitter.emit(:text_delta, Stream::Events::TextDelta.new(block['text']))
77
+
78
+ elsif block['type'] == 'thinking' && block['thinking'] && !block['thinking'].empty?
79
+ ## Maybe never happens, but just in case
80
+ @emitter.emit(:thinking_delta, Stream::Events::ThinkingDelta.new(@id, block['thinking']))
81
+
82
+ elsif block['type'] == 'tool_use'
83
+ @emitter.emit(:tool_call_started, Stream::Events::ToolCallStarted.new(
84
+ @id,
85
+ block['id'],
86
+ index,
87
+ block['name'],
88
+ block['input'].dup
89
+ ))
90
+ end
91
+ end
92
+
93
+ def handle_content_block_delta(json)
94
+ index = json['index']
95
+ current_block = @content[index]
96
+
97
+ case json['delta']['type']
98
+ when 'text_delta'
99
+ text = json['delta']['text']
100
+ current_block['text'] ||= ''
101
+ current_block['text'] << text
102
+ @emitter.emit(:text_delta, Stream::Events::TextDelta.new(@id, text))
103
+
104
+ when 'thinking_delta'
105
+ thinking = json['delta']['thinking']
106
+ current_block['thinking'] ||= ''
107
+ current_block['thinking'] << thinking
108
+ @emitter.emit(:thinking_delta, Stream::Events::ThinkingDelta.new(@id, thinking))
109
+
110
+ when 'input_json_delta'
111
+ if current_block['type'] == 'tool_use'
112
+ handle_tool_use_delta(index, json['delta']['partial_json'])
113
+ end
114
+ end
115
+ end
116
+
117
+ def handle_tool_use_delta(index, partial_json)
118
+ current_block = @content[index]
119
+
120
+ if current_block['input'] == {}
121
+ current_block['input'] = ''
122
+ end
123
+ current_block['input'] << partial_json # This is an empty string first time
124
+
125
+ @emitter.emit(:tool_call_arguments_json_delta, Stream::Events::ToolCallArgumentsJsonDelta.new(
126
+ @id,
127
+ current_block['id'],
128
+ index,
129
+ partial_json
130
+ ))
131
+
132
+ parsed, _ = attempt_parse_json(current_block['input'])
133
+ if parsed
134
+ @emitter.emit(:tool_call_arguments_updated, Stream::Events::ToolCallArgumentsUpdated.new(
135
+ @id,
136
+ current_block['id'],
137
+ index,
138
+ parsed
139
+ ))
140
+ end
141
+ end
142
+
143
+ def handle_content_block_stop(json)
144
+ index = json['index']
145
+ current_block = @content[index]
146
+
147
+ if current_block['type'] == 'tool_use'
148
+ parse_tool_use_input(index)
149
+
150
+ @emitter.emit(:tool_call_completed, Stream::Events::ToolCallCompleted.new(
151
+ @id,
152
+ current_block['id'],
153
+ index,
154
+ current_block['name'],
155
+ current_block['input']
156
+ ))
157
+ end
158
+ end
159
+
160
+ def handle_message_stop
161
+ @emitter.emit(:message_completed, Stream::Events::MessageCompleted.new(@id, full_response))
162
+ end
163
+
164
+ def parse_tool_use_input(index)
165
+ input = @content[index]['input'].to_s.strip
166
+ @content[index]['input'] = input.empty? ? {} : JSON.parse(input)
167
+ end
168
+
169
+ def handle_message_delta(json)
170
+ @stop_reason = json['delta']['stop_reason']
171
+ @stop_sequence = json['delta']['stop_sequence']
172
+ update_usage(json['usage']) if json['usage']
173
+ @emitter.emit(:usage_updated, Stream::Events::UsageUpdated.new(@id, @usage))
174
+ end
175
+
176
+ def update_usage(usage)
177
+ usage.each do |key, value|
178
+ @usage[key] = value
179
+ end
180
+ end
181
+
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,128 @@
1
+ require_relative './sse_chat_response_stream_parser'
2
+ require_relative '../stream/events'
3
+
4
+ module LLMs
5
+ module Parsers
6
+ class GoogleGeminiChatResponseStreamParser < SSEChatResponseStreamParser
7
+
8
+ attr_reader :current_message_id
9
+
10
+ def full_response
11
+ fr = {
12
+ 'candidates' => @candidates,
13
+ 'modelVersion' => @model_version,
14
+ 'usageMetadata' => @usage_metadata
15
+ }
16
+
17
+ pp fr
18
+
19
+ fr
20
+ end
21
+
22
+ protected
23
+
24
+ def initialize_state
25
+ @candidates = []
26
+ @model_version = nil
27
+ @usage_metadata = nil
28
+ @current_message_id = nil
29
+ @tool_call_count = 0
30
+ end
31
+
32
+ def handle_json(json)
33
+ update_candidates(json['candidates']) if json['candidates']
34
+ update_model_version(json['modelVersion']) if json['modelVersion']
35
+ update_usage_metadata(json['usageMetadata']) if json['usageMetadata']
36
+ end
37
+
38
+ private
39
+
40
+ def update_candidates(candidates)
41
+ candidates.each_with_index do |candidate, index|
42
+ @candidates[index] ||= {}
43
+ current_candidate = @candidates[index]
44
+
45
+ if @current_message_id.nil?
46
+ @current_message_id = "gemini-#{Time.now.to_i}"
47
+ @emitter.emit(:message_started, Stream::Events::MessageStarted.new(@current_message_id))
48
+ end
49
+
50
+ if content = candidate['content']
51
+ update_candidate_content(current_candidate, content)
52
+ end
53
+
54
+ if finish_reason = candidate['finishReason']
55
+ current_candidate['finishReason'] = finish_reason
56
+ @emitter.emit(:message_completed, Stream::Events::MessageCompleted.new(@current_message_id, full_response))
57
+ end
58
+ end
59
+ end
60
+
61
+ def update_candidate_content(current_candidate, content)
62
+ current_candidate['content'] ||= {}
63
+
64
+ if parts = content['parts']
65
+ current_candidate['content']['parts'] ||= []
66
+ current_candidate['content']['parts'] += parts
67
+
68
+ parts.each do |part|
69
+ if part['text']
70
+ @emitter.emit(:text_delta, Stream::Events::TextDelta.new(@current_message_id, part['text']))
71
+ end
72
+
73
+ if part['functionCall']
74
+ tool_call_id = "tool_call#{@tool_call_count}"
75
+
76
+ @emitter.emit(:tool_call_started, Stream::Events::ToolCallStarted.new(
77
+ @current_message_id,
78
+ tool_call_id,
79
+ @tool_call_count,
80
+ part['functionCall']['name'],
81
+ {}
82
+ ))
83
+
84
+ args = part['functionCall']['args']
85
+
86
+ @emitter.emit(:tool_call_arguments_json_delta, Stream::Events::ToolCallArgumentsJsonDelta.new(
87
+ @current_message_id,
88
+ tool_call_id,
89
+ @tool_call_count,
90
+ JSON.dump(args)
91
+ ))
92
+
93
+ @emitter.emit(:tool_call_arguments_updated, Stream::Events::ToolCallArgumentsUpdated.new(
94
+ @current_message_id,
95
+ tool_call_id,
96
+ @tool_call_count,
97
+ args
98
+ ))
99
+
100
+ @emitter.emit(:tool_call_completed, Stream::Events::ToolCallCompleted.new(
101
+ @current_message_id,
102
+ tool_call_id,
103
+ @tool_call_count,
104
+ part['functionCall']['name'],
105
+ args
106
+ ))
107
+
108
+ @tool_call_count += 1
109
+ end
110
+ end
111
+ end
112
+
113
+ if role = content['role']
114
+ current_candidate['role'] = role
115
+ end
116
+ end
117
+
118
+ def update_model_version(version)
119
+ @model_version = version
120
+ end
121
+
122
+ def update_usage_metadata(metadata)
123
+ @usage_metadata = metadata
124
+ @emitter.emit(:usage_updated, Stream::Events::UsageUpdated.new(@current_message_id, @usage_metadata))
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,170 @@
1
+ require_relative './sse_chat_response_stream_parser'
2
+ require_relative '../stream/events'
3
+
4
+ module LLMs
5
+ module Parsers
6
+ class OpenAICompatibleChatResponseStreamParser < SSEChatResponseStreamParser
7
+
8
+ def full_response
9
+ # to match the format for non-streamed responses, all tool call arguments
10
+ # must be serialized back to a JSON string
11
+ converted_choices = @choices.map do |c|
12
+ dup_c = c.dup
13
+ dup_c['tool_calls']&.each do |tc|
14
+ tc['function']['arguments'] = JSON.dump(tc['function']['arguments'])
15
+ end
16
+ dup_c
17
+ end
18
+ {
19
+ 'id' => @id,
20
+ 'model' => @model,
21
+ # TODO I think this should be converted_choices, but providers are inconsistent in their response formats - switch to this after more testing
22
+ 'choices' => @choices, #converted_choices,
23
+ 'usage' => @usage,
24
+ 'created' => @created
25
+ }
26
+ end
27
+
28
+ protected
29
+
30
+ def initialize_state
31
+ @id = nil
32
+ @model = nil
33
+ @choices = []
34
+ @usage = nil
35
+ @created = nil
36
+ end
37
+
38
+ def handle_json(json)
39
+ update_id(json['id']) if json['id']
40
+ update_choices(json['choices']) if json['choices']
41
+ update_model(json['model']) if json['model']
42
+ update_usage(json['usage']) if json['usage']
43
+ update_created(json['created']) if json['created']
44
+ end
45
+
46
+ private
47
+
48
+ def update_id(id)
49
+ if @id.nil?
50
+ @id = id
51
+ @emitter.emit(:message_started, Stream::Events::MessageStarted.new(id))
52
+ elsif @id != id
53
+ puts "WARNING: id mismatch: #{@id} != #{id}"
54
+ end
55
+ end
56
+
57
+ def update_choices(choices)
58
+ choices.each_with_index do |choice, index|
59
+ @choices[index] ||= { 'message' => {} }
60
+ current_choice = @choices[index]['message']
61
+
62
+ if delta = choice['delta']
63
+ update_choice_delta(current_choice, delta)
64
+ end
65
+
66
+ if finish_reason = choice['finish_reason']
67
+ current_choice['finish_reason'] = finish_reason
68
+ @emitter.emit(:message_completed, Stream::Events::MessageCompleted.new(@id, full_response))
69
+ end
70
+ end
71
+ end
72
+
73
+ def update_choice_delta(current_choice, delta)
74
+ if role = delta['role']
75
+ current_choice['role'] = role
76
+ end
77
+
78
+ if content = delta['content']
79
+ current_choice['content'] ||= ''
80
+ current_choice['content'] += content
81
+ @emitter.emit(:text_delta, Stream::Events::TextDelta.new(@id, content))
82
+ end
83
+
84
+ if tool_calls = delta['tool_calls']
85
+ tool_calls = [tool_calls] unless tool_calls.is_a?(Array)
86
+ update_tool_calls(current_choice, tool_calls)
87
+ end
88
+ end
89
+
90
+ def update_tool_calls(current_choice, tool_calls)
91
+ current_choice['tool_calls'] ||= []
92
+
93
+ tool_calls.each do |tool_call|
94
+ tool_index = tool_call['index']
95
+
96
+ if new_call = current_choice['tool_calls'][tool_index].nil?
97
+ current_choice['tool_calls'][tool_index] = {
98
+ 'id' => tool_call['id'],
99
+ 'type' => tool_call['type'],
100
+ 'function' => {
101
+ 'name' => tool_call['function']['name'],
102
+ 'arguments' => '' # Not this: ( tool_call['function']['arguments'].dup ) - since some providers append anyway
103
+ }
104
+ }
105
+ end
106
+
107
+ if new_call
108
+ @emitter.emit(:tool_call_started, Stream::Events::ToolCallStarted.new(
109
+ @id,
110
+ tool_call['id'],
111
+ tool_index,
112
+ tool_call['function']['name'],
113
+ {}
114
+ ))
115
+ end
116
+
117
+ if arguments = tool_call['function']['arguments']
118
+ current_tool_call = current_choice['tool_calls'][tool_index]
119
+ current_tool_call['function']['arguments'] += arguments
120
+
121
+ @emitter.emit(:tool_call_arguments_json_delta, Stream::Events::ToolCallArgumentsJsonDelta.new(
122
+ @id,
123
+ current_tool_call['id'],
124
+ tool_index,
125
+ arguments
126
+ ))
127
+
128
+ ## finish_reason"=>"tool_calls" <--- this is the finish reason when all tool calls completed
129
+ ## TODO use that instead?
130
+
131
+ parsed, corrected = attempt_parse_json(current_tool_call['function']['arguments'])
132
+
133
+ if parsed
134
+ if corrected
135
+ @emitter.emit(:tool_call_arguments_updated, Stream::Events::ToolCallArgumentsUpdated.new(
136
+ @id,
137
+ current_tool_call['id'],
138
+ tool_index,
139
+ parsed
140
+ ))
141
+ else
142
+ @emitter.emit(:tool_call_completed, Stream::Events::ToolCallCompleted.new(
143
+ @id,
144
+ current_tool_call['id'],
145
+ tool_index,
146
+ tool_call['function']['name'],
147
+ parsed
148
+ ))
149
+ end
150
+ end
151
+
152
+ end
153
+ end
154
+ end
155
+
156
+ def update_model(model)
157
+ @model = model
158
+ end
159
+
160
+ def update_usage(usage)
161
+ @usage = usage
162
+ @emitter.emit(:usage_updated, Stream::Events::UsageUpdated.new(@id, @usage))
163
+ end
164
+
165
+ def update_created(created)
166
+ @created = created
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,77 @@
1
+ require 'json'
2
+
3
+ module LLMs
4
+ module Parsers
5
+ module PartialJsonParser
6
+ def attempt_parse_json(json)
7
+ parsed = nil
8
+ corrected = false
9
+
10
+ begin
11
+ parsed = JSON.parse(json)
12
+ rescue JSON::ParserError
13
+ # Track unclosed delimiters
14
+ unclosed = []
15
+ in_string = false
16
+ escape_next = false
17
+
18
+ json.each_char.with_index do |char, i|
19
+ if escape_next
20
+ escape_next = false
21
+ next
22
+ end
23
+
24
+ case char
25
+ when '\\'
26
+ escape_next = true
27
+ when '"'
28
+ unless escape_next
29
+ if in_string
30
+ if unclosed.last == :quote
31
+ unclosed.pop
32
+ end
33
+ in_string = false
34
+ else
35
+ unclosed.push(:quote)
36
+ in_string = true
37
+ end
38
+ end
39
+ when '{'
40
+ unclosed.push(:brace) unless in_string
41
+ when '['
42
+ unclosed.push(:bracket) unless in_string
43
+ when '}'
44
+ if !in_string && unclosed.last == :brace
45
+ unclosed.pop
46
+ end
47
+ when ']'
48
+ if !in_string && unclosed.last == :bracket
49
+ unclosed.pop
50
+ end
51
+ end
52
+ end
53
+
54
+ # Build correction by closing delimiters in reverse order
55
+ correction = unclosed.reverse.map do |type|
56
+ case type
57
+ when :quote then '"'
58
+ when :brace then '}'
59
+ when :bracket then ']'
60
+ end
61
+ end.join
62
+
63
+ # Try parsing with correction
64
+ begin
65
+ corrected = true
66
+ corrected_json = json + correction
67
+ parsed = JSON.parse(corrected_json)
68
+ rescue JSON::ParserError
69
+ parsed = nil
70
+ end
71
+ end
72
+
73
+ [parsed, corrected]
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,72 @@
1
+ require 'json'
2
+ require_relative './partial_json_parser'
3
+
4
+ module LLMs
5
+ module Parsers
6
+ class SSEChatResponseStreamParser
7
+ include PartialJsonParser
8
+
9
+ def initialize(emitter)
10
+ @emitter = emitter
11
+ @buffer = ''
12
+ initialize_state
13
+ end
14
+
15
+ def add_data(data)
16
+ @buffer += data
17
+ process_buffer
18
+ end
19
+
20
+ def full_response
21
+ raise NotImplementedError, "Subclasses must implement full_response"
22
+ end
23
+
24
+ protected
25
+
26
+ def initialize_state
27
+ # Override in subclasses to initialize parser state
28
+ end
29
+
30
+ def process_buffer
31
+ while line = get_next_line
32
+ process_line(line)
33
+ end
34
+ end
35
+
36
+ def process_line(line)
37
+ if line.start_with?('data: ')
38
+ data = line[6..-1]
39
+ if data == '[DONE]'
40
+ handle_done
41
+ else
42
+ json = parse_line_data(data)
43
+ handle_json(json)
44
+ end
45
+ end
46
+ end
47
+
48
+ # Override in subclasses to rescue JSON parse errors if needed for the provider (shouldn't actually be needed for any?)
49
+ def parse_line_data(data)
50
+ JSON.parse(data)
51
+ end
52
+
53
+ def handle_json(json)
54
+ # Override in subclasses to handle JSON data
55
+ end
56
+
57
+ def handle_done
58
+ # Override in subclasses if needed
59
+ end
60
+
61
+ private
62
+
63
+ def get_next_line
64
+ if i = @buffer.index("\n")
65
+ line = @buffer[0...i].strip
66
+ @buffer = @buffer[(i + 1)..-1]
67
+ line
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end