smart_prompt 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 396c6097973289a34143e86b65f428d55919d0e755916992a5c714e289ebf5a2
4
- data.tar.gz: 5d2c2d81b486e1fb05b53f116047ab1f90be77c9536fd6440a96b573bdad00c3
3
+ metadata.gz: a1b5288acfef3c366b16a0e08dc0b4f43a7f8613f73879f92420a3cf60ce9332
4
+ data.tar.gz: c2fd82bf35e96c6784492dfe2dd9ee30dbf02e56b5cce41ee8d9895f4b050b13
5
5
  SHA512:
6
- metadata.gz: c6880395149678a195ea6efc46a81623d61c14c56534936041a625616a9e6b716597c078ccee0ac0d47c3b2a8e67d272742ee36940fa64321426799db0b26e4d
7
- data.tar.gz: 63f0b8ae0f6f62363443731cae6fed3eafe746c463d259c9b88eee8563d6ef0cdcd84e684d6daeedd7329c3dcab84748ab62358fa2935b9d0278ec347df45ffb
6
+ metadata.gz: ccd75b4af683bb4585ca46a24d60833fac4344e214f5fef6730e9b55e86e80a35951d39e6cbff8ad1f364623e62aade8bce065ade1cd8c6ddea7e1b1faea2126
7
+ data.tar.gz: 68cba98948160fc872d2b5661661fe16ae76f5b43025fa96ea0c546563e01be5a0930d76fd13ec327017e07b23c7b739953794a3b385185c448888862374190c
data/README.cn.md CHANGED
@@ -75,6 +75,14 @@ llms:
75
75
  adapter: openai
76
76
  url: http://localhost:11434/
77
77
  default_model: deepseek-r1
78
+ gemma4_local:
79
+ adapter: openai
80
+ url: http://localhost:8000/v1
81
+ api_key: dummy
82
+ default_model: gemma-4-12B-it
83
+ temperature: 1.0
84
+ top_p: 0.95
85
+ top_k: 64
78
86
  deepseek:
79
87
  adapter: openai
80
88
  url: https://api.deepseek.com
@@ -89,6 +97,10 @@ models:
89
97
  deepseekv3.2:
90
98
  use: SiliconFlow
91
99
  model: Pro/deepseek-ai/DeepSeek-V3.2
100
+ gemma4/12b:
101
+ use: gemma4_local
102
+ model: gemma-4-12B-it
103
+ max_tokens: 1024
92
104
 
93
105
  # 默认设置
94
106
  default_llm: SiliconFlow
@@ -170,6 +182,26 @@ engine.call_worker_by_stream(:streaming_chat, {
170
182
  end
171
183
  ```
172
184
 
185
+ ### Gemma 4 12B 多模态
186
+
187
+ Gemma 4 12B 可以通过 LiteRT-LM、LM Studio、Ollama、llama.cpp 等 OpenAI 兼容本地服务接入。SmartPrompt 会把图片放在文本前、音频放在文本后,以匹配 Gemma 4 的多模态最佳实践。
188
+
189
+ ```ruby
190
+ SmartPrompt.define_worker :gemma_multimodal_assistant do
191
+ use_model "gemma4/12b"
192
+ thinking params.fetch(:thinking, true)
193
+ sys_msg("你是一个严谨的本地多模态助手。", params)
194
+
195
+ image(params[:image], token_budget: params[:token_budget] || 280) if params[:image]
196
+ video(params[:video], fps: 1, max_seconds: 60) if params[:video]
197
+ audio(params[:audio]) if params[:audio]
198
+ prompt(params[:message])
199
+
200
+ request_options(response_format: { type: "json_object" }) if params[:json]
201
+ send_msg
202
+ end
203
+ ```
204
+
173
205
  ### 工具集成
174
206
 
175
207
  ```ruby
data/README.md CHANGED
@@ -75,6 +75,14 @@ llms:
75
75
  adapter: openai
76
76
  url: http://localhost:11434/
77
77
  default_model: deepseek-r1
78
+ gemma4_local:
79
+ adapter: openai
80
+ url: http://localhost:8000/v1
81
+ api_key: dummy
82
+ default_model: gemma-4-12B-it
83
+ temperature: 1.0
84
+ top_p: 0.95
85
+ top_k: 64
78
86
  deepseek:
79
87
  adapter: openai
80
88
  url: https://api.deepseek.com
@@ -89,6 +97,10 @@ models:
89
97
  deepseekv3.2:
90
98
  use: SiliconFlow
91
99
  model: Pro/deepseek-ai/DeepSeek-V3.2
100
+ gemma4/12b:
101
+ use: gemma4_local
102
+ model: gemma-4-12B-it
103
+ max_tokens: 1024
92
104
 
93
105
  # Default settings
94
106
  default_llm: SiliconFlow
@@ -170,6 +182,26 @@ engine.call_worker_by_stream(:streaming_chat, {
170
182
  end
171
183
  ```
172
184
 
185
+ ### Gemma 4 12B Multimodal
186
+
187
+ Gemma 4 12B can be connected through OpenAI-compatible local servers such as LiteRT-LM, LM Studio, Ollama, or llama.cpp. SmartPrompt places images before text and audio after text to match Gemma 4 multimodal best practices.
188
+
189
+ ```ruby
190
+ SmartPrompt.define_worker :gemma_multimodal_assistant do
191
+ use_model "gemma4/12b"
192
+ thinking params.fetch(:thinking, true)
193
+ sys_msg("You are a precise local multimodal assistant.", params)
194
+
195
+ image(params[:image], token_budget: params[:token_budget] || 280) if params[:image]
196
+ video(params[:video], fps: 1, max_seconds: 60) if params[:video]
197
+ audio(params[:audio]) if params[:audio]
198
+ prompt(params[:message])
199
+
200
+ request_options(response_format: { type: "json_object" }) if params[:json]
201
+ send_msg
202
+ end
203
+ ```
204
+
173
205
  ### Tool Integration
174
206
 
175
207
  ```ruby
@@ -1,10 +1,23 @@
1
1
  require "yaml"
2
2
  require "retriable"
3
3
  require "numo/narray"
4
+ require "base64"
4
5
 
5
6
  module SmartPrompt
6
7
  class Conversation
7
8
  include APIHandler
9
+ MODEL_REQUEST_OPTION_KEYS = %w[
10
+ max_tokens
11
+ max_completion_tokens
12
+ top_p
13
+ top_k
14
+ response_format
15
+ tool_choice
16
+ parallel_tool_calls
17
+ seed
18
+ stop
19
+ ].freeze
20
+
8
21
  attr_reader :messages, :last_response, :config_file
9
22
  attr_reader :last_call_id
10
23
 
@@ -21,6 +34,9 @@ module SmartPrompt
21
34
  @current_adapter = engine.current_adapter
22
35
  @last_response = nil
23
36
  @tools = tools
37
+ @request_options = {}
38
+ @pending_content_parts = []
39
+ @thinking_enabled = nil
24
40
  end
25
41
 
26
42
  def use(llm_name)
@@ -43,6 +59,7 @@ module SmartPrompt
43
59
 
44
60
  use(llm_name)
45
61
  model(configured_model_name)
62
+ merge_model_request_options(model_config)
46
63
  self
47
64
  end
48
65
 
@@ -54,6 +71,20 @@ module SmartPrompt
54
71
  @temperature = temperature
55
72
  end
56
73
 
74
+ def request_options(options = {})
75
+ @request_options.merge!(options || {})
76
+ self
77
+ end
78
+
79
+ def thinking(enabled = true)
80
+ @thinking_enabled = enabled
81
+ if @sys_msg
82
+ @sys_msg = thinking_system_message(@sys_msg)
83
+ refresh_system_message(@sys_msg)
84
+ end
85
+ self
86
+ end
87
+
57
88
  def history_messages
58
89
  @engine.history_messages
59
90
  end
@@ -71,23 +102,43 @@ module SmartPrompt
71
102
  SmartPrompt.logger.info "Use template #{template_name}"
72
103
  raise "Template #{template_name} not found" unless @templates.key?(template_name)
73
104
  content = @templates[template_name].render(params)
74
- add_message({ role: "user", content: content }, with_history)
105
+ add_user_content(content, with_history)
75
106
  self
76
107
  else
77
- add_message({ role: "user", content: template_name }, with_history)
108
+ add_user_content(template_name, with_history)
78
109
  self
79
110
  end
80
111
  end
81
112
 
82
113
  def sys_msg(message, params)
83
- @sys_msg = message
84
- add_message({ role: "system", content: message }, params[:with_history])
114
+ @sys_msg = thinking_system_message(message)
115
+ add_message({ role: "system", content: @sys_msg }, params[:with_history])
116
+ self
117
+ end
118
+
119
+ def multimodal_prompt(parts, with_history: false)
120
+ add_message({ role: "user", content: normalize_content_parts(parts) }, with_history)
121
+ self
122
+ end
123
+
124
+ def image(source, token_budget: nil, **metadata)
125
+ @pending_content_parts << media_part("image", source, token_budget: token_budget, **metadata)
126
+ self
127
+ end
128
+
129
+ def audio(source, **metadata)
130
+ @pending_content_parts << media_part("audio", source, **metadata)
131
+ self
132
+ end
133
+
134
+ def video(source, fps: nil, max_seconds: nil, **metadata)
135
+ @pending_content_parts << media_part("video", source, fps: fps, max_seconds: max_seconds, **metadata)
85
136
  self
86
137
  end
87
138
 
88
139
  def send_msg_once
89
140
  raise "No LLM selected" if @current_llm.nil?
90
- @last_response = @current_llm.send_request(@messages, @model_name, @temperature)
141
+ @last_response = send_llm_request(@messages, nil)
91
142
  @messages = []
92
143
  @messages << { role: "system", content: @sys_msg }
93
144
  @last_response
@@ -97,9 +148,9 @@ module SmartPrompt
97
148
  Retriable.retriable(RETRY_OPTIONS) do
98
149
  raise ConfigurationError, "No LLM selected" if @current_llm.nil?
99
150
  if params[:with_history]
100
- @last_response = @current_llm.send_request(history_messages, @model_name, @temperature, @tools, nil)
151
+ @last_response = send_llm_request(history_messages, nil)
101
152
  else
102
- @last_response = @current_llm.send_request(@messages, @model_name, @temperature, @tools, nil)
153
+ @last_response = send_llm_request(@messages, nil)
103
154
  end
104
155
  if @last_response == ""
105
156
  @last_response = @current_llm.last_response
@@ -116,9 +167,9 @@ module SmartPrompt
116
167
  Retriable.retriable(RETRY_OPTIONS) do
117
168
  raise ConfigurationError, "No LLM selected" if @current_llm.nil?
118
169
  if params[:with_history]
119
- @current_llm.send_request(history_messages, @model_name, @temperature, @tools, proc)
170
+ send_llm_request(history_messages, proc)
120
171
  else
121
- @current_llm.send_request(@messages, @model_name, @temperature, @tools, proc)
172
+ send_llm_request(@messages, proc)
122
173
  end
123
174
  @messages = []
124
175
  @messages << { role: "system", content: @sys_msg }
@@ -152,5 +203,120 @@ module SmartPrompt
152
203
  normalize(@last_response, length)
153
204
  end
154
205
  end
206
+
207
+ private
208
+
209
+ def send_llm_request(messages, proc)
210
+ parameters = @current_llm.method(:send_request).parameters
211
+ if parameters.length >= 6
212
+ @current_llm.send_request(messages, @model_name, @temperature, @tools, proc, @request_options)
213
+ else
214
+ @current_llm.send_request(messages, @model_name, @temperature, @tools, proc)
215
+ end
216
+ end
217
+
218
+ def merge_model_request_options(model_config)
219
+ explicit_options = model_config["request_options"] || model_config[:request_options] || {}
220
+ @request_options.merge!(explicit_options)
221
+ MODEL_REQUEST_OPTION_KEYS.each do |key|
222
+ value = model_config[key] || model_config[key.to_sym]
223
+ @request_options[key.to_sym] = value unless value.nil?
224
+ end
225
+ end
226
+
227
+ def add_user_content(content, with_history)
228
+ if @pending_content_parts.empty?
229
+ add_message({ role: "user", content: content }, with_history)
230
+ else
231
+ add_message({ role: "user", content: multimodal_content(content) }, with_history)
232
+ @pending_content_parts = []
233
+ end
234
+ end
235
+
236
+ def multimodal_content(text)
237
+ parts = @pending_content_parts
238
+ images_and_videos = parts.select { |part| ["image_url", "image", "video_url", "video"].include?(part[:type] || part["type"]) }
239
+ audio_parts = parts.select { |part| ["input_audio", "audio"].include?(part[:type] || part["type"]) }
240
+ other_parts = parts - images_and_videos - audio_parts
241
+ normalize_content_parts(images_and_videos + other_parts + [{ type: "text", text: text.to_s }] + audio_parts)
242
+ end
243
+
244
+ def normalize_content_parts(parts)
245
+ parts.map do |part|
246
+ normalized = part.transform_keys(&:to_s)
247
+ normalized["text"] = normalized.delete("content") if normalized["type"] == "text" && normalized.key?("content")
248
+ normalized
249
+ end
250
+ end
251
+
252
+ def media_part(type, source, **metadata)
253
+ case type
254
+ when "image"
255
+ mime_type = detect_image_mime(source)
256
+ data = File.binread(source)
257
+ base64_data = Base64.strict_encode64(data)
258
+ url = "data:#{mime_type};base64,#{base64_data}"
259
+ part = { type: "image_url", image_url: { url: url } }
260
+ when "audio"
261
+ format = detect_audio_format(source)
262
+ data = File.binread(source)
263
+ base64_data = Base64.strict_encode64(data)
264
+ part = { type: "input_audio", input_audio: { data: base64_data, format: format } }
265
+ when "video"
266
+ mime_type = detect_video_mime(source)
267
+ data = File.binread(source)
268
+ base64_data = Base64.strict_encode64(data)
269
+ url = "data:#{mime_type};base64,#{base64_data}"
270
+ part = { type: "video_url", video_url: { url: url } }
271
+ else
272
+ part = { type: type }
273
+ end
274
+ metadata.each do |key, value|
275
+ part[key] = value unless value.nil?
276
+ end
277
+ part
278
+ end
279
+
280
+ def detect_image_mime(path)
281
+ ext = File.extname(path).downcase
282
+ case ext
283
+ when ".png" then "image/png"
284
+ when ".jpg", ".jpeg" then "image/jpeg"
285
+ when ".gif" then "image/gif"
286
+ when ".webp" then "image/webp"
287
+ when ".bmp" then "image/bmp"
288
+ when ".svg" then "image/svg+xml"
289
+ else "application/octet-stream"
290
+ end
291
+ end
292
+
293
+ def detect_audio_format(path)
294
+ ext = File.extname(path).downcase.delete_prefix(".")
295
+ %w[wav mp3 ogg flac aac m4a].include?(ext) ? ext : "wav"
296
+ end
297
+
298
+ def detect_video_mime(path)
299
+ ext = File.extname(path).downcase
300
+ case ext
301
+ when ".mp4" then "video/mp4"
302
+ when ".webm" then "video/webm"
303
+ when ".mov" then "video/quicktime"
304
+ when ".avi" then "video/x-msvideo"
305
+ else "application/octet-stream"
306
+ end
307
+ end
308
+
309
+ def thinking_system_message(message)
310
+ message = message.to_s.sub(/\A<\|think\|>\n?/, "")
311
+ return message if @thinking_enabled == false
312
+ return message unless @thinking_enabled == true
313
+
314
+ "<|think|>\n#{message}"
315
+ end
316
+
317
+ def refresh_system_message(message)
318
+ system_message = @messages.find { |item| (item[:role] || item["role"]) == "system" }
319
+ system_message[:content] = message if system_message
320
+ end
155
321
  end
156
322
  end
@@ -123,15 +123,12 @@ module SmartPrompt
123
123
  if result.class == String
124
124
  recive_message = {
125
125
  "role": "assistant",
126
- "content": result,
126
+ "content": sanitize_history_content(result),
127
127
  }
128
128
  elsif result.class == Array
129
129
  recive_message = nil
130
130
  else
131
- recive_message = {
132
- "role": result.dig("choices", 0, "message", "role"),
133
- "content": result.dig("choices", 0, "message", "content").to_s + result.dig("choices", 0, "message", "tool_calls").to_s,
134
- }
131
+ recive_message = assistant_history_message(result)
135
132
  end
136
133
  worker.conversation.add_message(recive_message) if recive_message
137
134
  SmartPrompt.logger.info "Worker result is: #{result}"
@@ -175,5 +172,22 @@ module SmartPrompt
175
172
  def clear_history_messages
176
173
  @history_messages = []
177
174
  end
175
+
176
+ private
177
+
178
+ def assistant_history_message(result)
179
+ message = result.dig("choices", 0, "message") || {}
180
+ history_message = {
181
+ "role": message["role"] || "assistant",
182
+ "content": sanitize_history_content(message["content"].to_s),
183
+ }
184
+ tool_calls = message["tool_calls"]
185
+ history_message["tool_calls"] = tool_calls if tool_calls && !tool_calls.empty?
186
+ history_message
187
+ end
188
+
189
+ def sanitize_history_content(content)
190
+ content.to_s.gsub(/<\|channel\>thought\n.*?<channel\|>/m, "")
191
+ end
178
192
  end
179
193
  end
@@ -31,7 +31,19 @@ module SmartPrompt
31
31
  end
32
32
  end
33
33
 
34
- def send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil)
34
+ REQUEST_PARAMETER_KEYS = %w[
35
+ max_tokens
36
+ max_completion_tokens
37
+ top_p
38
+ top_k
39
+ response_format
40
+ tool_choice
41
+ parallel_tool_calls
42
+ seed
43
+ stop
44
+ ].freeze
45
+
46
+ def send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil, request_options = {})
35
47
  SmartPrompt.logger.info "OpenAIAdapter: Sending request to OpenAI"
36
48
  temperature = 0.7 if temperature == nil
37
49
  if model
@@ -46,6 +58,8 @@ module SmartPrompt
46
58
  messages: messages,
47
59
  temperature: @config["temperature"] || temperature,
48
60
  }
61
+ parameters.merge!(configured_request_parameters)
62
+ parameters.merge!(request_options || {})
49
63
  if proc
50
64
  parameters[:stream] = proc
51
65
  end
@@ -99,5 +113,15 @@ module SmartPrompt
99
113
  end
100
114
  return response.dig("data", 0, "embedding")
101
115
  end
116
+
117
+ private
118
+
119
+ def configured_request_parameters
120
+ REQUEST_PARAMETER_KEYS.each_with_object({}) do |key, parameters|
121
+ next unless @config.key?(key)
122
+
123
+ parameters[key.to_sym] = @config[key]
124
+ end
125
+ end
102
126
  end
103
127
  end
@@ -1,3 +1,3 @@
1
1
  module SmartPrompt
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smart_prompt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - zhuang biaowei
@@ -93,6 +93,20 @@ dependencies:
93
93
  - - "~>"
94
94
  - !ruby/object:Gem::Version
95
95
  version: 0.9.2.1
96
+ - !ruby/object:Gem::Dependency
97
+ name: base64
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: 0.3.0
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 0.3.0
96
110
  description: SmartPrompt provides a flexible DSL for managing prompts, interacting
97
111
  with multiple LLMs, and creating composable task workers.
98
112
  email:
@@ -138,7 +152,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
138
152
  - !ruby/object:Gem::Version
139
153
  version: '0'
140
154
  requirements: []
141
- rubygems_version: 4.0.10
155
+ rubygems_version: 4.0.13
142
156
  specification_version: 4
143
157
  summary: A smart prompt management and LLM interaction gem
144
158
  test_files: []