smart_prompt 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -191
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +84 -8
|
@@ -0,0 +1,616 @@
|
|
|
1
|
+
require "base64"
|
|
2
|
+
require "json"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
|
|
7
|
+
module SmartPrompt
|
|
8
|
+
# Adapter for 智谱 AI (BigModel / GLM) — covering all REST model categories behind one
|
|
9
|
+
# provider domain. One adapter owns the whole provider: every category shares the same
|
|
10
|
+
# base URL `https://open.bigmodel.cn/api/paas/v4` and Bearer-token auth, so a single config
|
|
11
|
+
# block serves them just by changing `model`.
|
|
12
|
+
#
|
|
13
|
+
# 1. 文本对话 (chat) — POST {base}/chat/completions (OpenAI-compatible; reasoning
|
|
14
|
+
# models return message.reasoning_content, the exact field the engine
|
|
15
|
+
# already reads — no remap needed)
|
|
16
|
+
# 2. 图文多模态 (vision) — same endpoint, OpenAI Vision content array
|
|
17
|
+
# 3. 向量 (embeddings) — POST {base}/embeddings (embedding-3, custom dimensions)
|
|
18
|
+
# 4. 文生图 (image) — POST {base}/images/generations (response is NESTED: data.images[].url)
|
|
19
|
+
# 5. 文生视频 (video) — POST {base}/videos/generations -> task_id; GET {base}/async-result?task_id=
|
|
20
|
+
# poll until SUCCESS -> video_result.url (async)
|
|
21
|
+
# 6. 语音合成 (TTS) — POST {base}/audio/speech (glm-tts)
|
|
22
|
+
# 7. 语音识别 (ASR) — POST {base}/audio/transcriptions (glm-asr-2512, multipart)
|
|
23
|
+
# 8. 重排 (rerank) — POST {base}/rerank
|
|
24
|
+
#
|
|
25
|
+
# We talk to the endpoints with Net::HTTP directly (like the SenseNova / image / tts / stt /
|
|
26
|
+
# video adapters) so we can control SSE streaming, the nested image shape, and the async
|
|
27
|
+
# video flow. No new gem deps.
|
|
28
|
+
class ZhipuAIAdapter < LLMAdapter
|
|
29
|
+
DEFAULT_BASE_URL = "https://open.bigmodel.cn/api/paas/v4".freeze
|
|
30
|
+
# CodeGeeX-4 / coding models use a separate base.
|
|
31
|
+
DEFAULT_CODING_BASE_URL = "https://open.bigmodel.cn/api/coding/paas/v4".freeze
|
|
32
|
+
SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
|
|
33
|
+
|
|
34
|
+
# Zhipu chat sampling parameters forwarded from config when present.
|
|
35
|
+
CHAT_OPTIONAL_KEYS = %w[
|
|
36
|
+
top_p max_tokens do_sample stop presence_penalty frequency_penalty thinking
|
|
37
|
+
].freeze
|
|
38
|
+
|
|
39
|
+
def initialize(config)
|
|
40
|
+
super
|
|
41
|
+
SmartPrompt.logger.info "Start create the SmartPrompt ZhipuAIAdapter."
|
|
42
|
+
|
|
43
|
+
api_key = @config["api_key"]
|
|
44
|
+
if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
|
|
45
|
+
api_key = eval(api_key)
|
|
46
|
+
end
|
|
47
|
+
# Match the other adapters: tolerate a missing key at construction so examples/config
|
|
48
|
+
# can load without a live key; the first request fails with a clear auth error.
|
|
49
|
+
SmartPrompt.logger.warn "Zhipu api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
|
|
50
|
+
|
|
51
|
+
@api_key = api_key
|
|
52
|
+
@base_url = (@config["url"] || DEFAULT_BASE_URL).to_s.chomp("/")
|
|
53
|
+
@coding_base = (@config["coding_url"] || DEFAULT_CODING_BASE_URL).to_s.chomp("/")
|
|
54
|
+
# Optional per-method URL overrides (default to the standard paths off @base_url).
|
|
55
|
+
@image_url = (@config["image_url"] || "#{@base_url}/images/generations").to_s
|
|
56
|
+
@video_url = (@config["video_url"] || "#{@base_url}/videos/generations").to_s
|
|
57
|
+
@query_url = (@config["query_url"] || "#{@base_url}/async-result").to_s
|
|
58
|
+
SmartPrompt.logger.info "Zhipu base_url=#{@base_url}"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
public
|
|
62
|
+
|
|
63
|
+
# ---- chat + vision -------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
# Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so last_response
|
|
66
|
+
# carries usage + reasoning_content); streaming calls +proc+ with each OpenAI-shaped chunk.
|
|
67
|
+
def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
|
|
68
|
+
model_name = model || @config["model"]
|
|
69
|
+
body = build_chat_body(messages, model_name, temperature, tools)
|
|
70
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
|
|
71
|
+
|
|
72
|
+
url = chat_url_for(model_name)
|
|
73
|
+
if proc
|
|
74
|
+
body["stream"] = true
|
|
75
|
+
stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
|
|
76
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: streaming request finished"
|
|
77
|
+
nil
|
|
78
|
+
else
|
|
79
|
+
raw = http_post_json(url, body)
|
|
80
|
+
response = build_completion_response(raw)
|
|
81
|
+
@last_response = response
|
|
82
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: received chat response"
|
|
83
|
+
response
|
|
84
|
+
end
|
|
85
|
+
rescue LLMAPIError, Error
|
|
86
|
+
raise
|
|
87
|
+
rescue => e
|
|
88
|
+
SmartPrompt.logger.error "Zhipu chat error: #{e.message}"
|
|
89
|
+
raise LLMAPIError, "Failed to call Zhipu chat: #{e.message}"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# ---- embeddings ----------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
# embedding-3 (default 2048 dims); supports a custom `dimensions` (256/512/1024/2048)
|
|
95
|
+
# via config. Returns the first embedding vector.
|
|
96
|
+
def embeddings(text, model)
|
|
97
|
+
model_name = model || @config["embedding_model"] || @config["model"]
|
|
98
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: embeddings model=#{model_name}"
|
|
99
|
+
|
|
100
|
+
body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
|
|
101
|
+
body["dimensions"] = @config["dimensions"] if @config["dimensions"]
|
|
102
|
+
body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
|
|
103
|
+
|
|
104
|
+
response =
|
|
105
|
+
begin
|
|
106
|
+
http_post_json("#{@base_url}/embeddings", body)
|
|
107
|
+
rescue LLMAPIError, Error
|
|
108
|
+
raise
|
|
109
|
+
rescue => e
|
|
110
|
+
raise LLMAPIError, "Failed to call Zhipu embeddings: #{e.message}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
items = response["data"]
|
|
114
|
+
unless items.is_a?(Array) && items.any? && items[0]["embedding"]
|
|
115
|
+
raise LLMAPIError, "No embedding vector in Zhipu response: #{response.inspect}"
|
|
116
|
+
end
|
|
117
|
+
items[0]["embedding"]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# ---- image (CogView / GLM-Image) -----------------------------------------
|
|
121
|
+
|
|
122
|
+
# Text-to-image. The Zhipu response is NESTED: data.images[].url (not OpenAI's data[]),
|
|
123
|
+
# so we parse defensively. Returns an Array of {url:, b64_json:}.
|
|
124
|
+
def generate_image(prompt, params = {})
|
|
125
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: generating image"
|
|
126
|
+
raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
|
|
127
|
+
|
|
128
|
+
model_name = params[:model] || @config["image_model"] || @config["model"]
|
|
129
|
+
raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
|
|
130
|
+
|
|
131
|
+
body = { "model" => model_name, "prompt" => prompt.to_s }
|
|
132
|
+
body["size"] = params[:size] if params[:size]
|
|
133
|
+
body["user"] = params[:user] if params[:user]
|
|
134
|
+
body["response_format"] = params[:response_format] if params[:response_format]
|
|
135
|
+
|
|
136
|
+
SmartPrompt.logger.info "Zhipu image params: #{body.except('prompt').inspect}"
|
|
137
|
+
response =
|
|
138
|
+
begin
|
|
139
|
+
http_post_json(@image_url, body)
|
|
140
|
+
rescue LLMAPIError, Error
|
|
141
|
+
raise
|
|
142
|
+
rescue => e
|
|
143
|
+
raise Error, "Failed to call Zhipu image generation: #{e.message}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
images = parse_image_response(response)
|
|
147
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: generated #{images.size} image(s)"
|
|
148
|
+
images
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Save one or many generated images to disk (Array from #generate_image or a single hash).
|
|
152
|
+
def save_image(image_data, output_dir = "./output", filename_prefix = "zhipu_image")
|
|
153
|
+
FileUtils.mkdir_p(output_dir)
|
|
154
|
+
images = image_data.is_a?(Array) ? image_data : [image_data]
|
|
155
|
+
saved = images.each_with_index.map do |img, index|
|
|
156
|
+
save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
|
|
157
|
+
end
|
|
158
|
+
SmartPrompt.logger.info "Saved #{saved.size} Zhipu image(s) to #{output_dir}"
|
|
159
|
+
saved
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# ---- video (CogVideoX, async) --------------------------------------------
|
|
163
|
+
|
|
164
|
+
# Submit a text-to-video (or image-to-video) job. Returns the task id.
|
|
165
|
+
def generate_video(prompt, params = {})
|
|
166
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: submitting video job"
|
|
167
|
+
model_name = params[:model] || @config["video_model"] || @config["model"]
|
|
168
|
+
raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
|
|
169
|
+
|
|
170
|
+
body = { "model" => model_name, "prompt" => prompt.to_s }
|
|
171
|
+
%i[quality fps duration with_audio resolution request_id seed].each do |k|
|
|
172
|
+
body[k.to_s] = params[k] unless params[k].nil?
|
|
173
|
+
end
|
|
174
|
+
body["image_url"] = normalize_image_url(params[:image_url]) if params[:image_url]
|
|
175
|
+
|
|
176
|
+
SmartPrompt.logger.info "Zhipu video params: #{body.except('prompt').inspect}"
|
|
177
|
+
response =
|
|
178
|
+
begin
|
|
179
|
+
http_post_json(@video_url, body)
|
|
180
|
+
rescue LLMAPIError, Error
|
|
181
|
+
raise
|
|
182
|
+
rescue => e
|
|
183
|
+
raise Error, "Failed to submit Zhipu video job: #{e.message}"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
task_id = response["id"] || response["task_id"]
|
|
187
|
+
raise LLMAPIError, "No task id in Zhipu video response: #{response.inspect}" unless task_id
|
|
188
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: video task #{task_id} submitted"
|
|
189
|
+
{ task_id: task_id, model: model_name, raw: response }
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Poll an async task. Returns the raw status hash (task_status etc.).
|
|
193
|
+
def check_video_status(task_id)
|
|
194
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: polling video task #{task_id}"
|
|
195
|
+
http_get_json("#{@query_url}/#{URI.encode_www_form_component(task_id)}")
|
|
196
|
+
rescue LLMAPIError, Error
|
|
197
|
+
raise
|
|
198
|
+
rescue => e
|
|
199
|
+
raise LLMAPIError, "Failed to query Zhipu video task: #{e.message}"
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Block until the task finishes (or times out), then return the video URL.
|
|
203
|
+
def wait_for_video_completion(task_id, check_interval: 10, timeout: 600)
|
|
204
|
+
start = Time.now
|
|
205
|
+
loop do
|
|
206
|
+
status = check_video_status(task_id)
|
|
207
|
+
case task_status_of(status)
|
|
208
|
+
when "SUCCESS"
|
|
209
|
+
url = video_url_of(status)
|
|
210
|
+
raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
|
|
211
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: video ready #{url}"
|
|
212
|
+
return { task_id: task_id, status: "SUCCESS", video_url: url, cover_image_url: cover_url_of(status), raw: status }
|
|
213
|
+
when "FAIL", "FAILED"
|
|
214
|
+
raise LLMAPIError, "Zhipu video generation failed: #{status.inspect}"
|
|
215
|
+
else
|
|
216
|
+
if Time.now - start > timeout
|
|
217
|
+
raise LLMAPIError, "Zhipu video generation timeout after #{timeout}s"
|
|
218
|
+
end
|
|
219
|
+
SmartPrompt.logger.info "Zhipu video task #{task_id} still processing..."
|
|
220
|
+
sleep(check_interval)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def download_video(video_url, output_path)
|
|
226
|
+
uri = URI.parse(video_url)
|
|
227
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
228
|
+
response = http.request(Net::HTTP::Get.new(uri.request_uri))
|
|
229
|
+
raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
230
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
231
|
+
File.binwrite(output_path, response.body)
|
|
232
|
+
SmartPrompt.logger.info "Zhipu video saved to #{output_path}"
|
|
233
|
+
output_path
|
|
234
|
+
rescue => e
|
|
235
|
+
raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading Zhipu video: #{e.message}"
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# ---- TTS (GLM-TTS) -------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
# Returns a base64 data URL for the synthesized audio. GLM-TTS accepts wav/pcm only
|
|
241
|
+
# (mp3/flac are rejected), so default to wav.
|
|
242
|
+
def synthesize_speech(text, voice: nil, model: nil, response_format: "wav", **opts)
|
|
243
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: TTS"
|
|
244
|
+
raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
|
|
245
|
+
|
|
246
|
+
model_name = model || @config["tts_model"] || "glm-tts"
|
|
247
|
+
body = { "model" => model_name, "input" => text.to_s }
|
|
248
|
+
body["voice"] = voice if voice
|
|
249
|
+
body["response_format"] = response_format
|
|
250
|
+
body["speed"] = opts[:speed] if opts[:speed]
|
|
251
|
+
body["emotion"] = opts[:emotion] if opts[:emotion]
|
|
252
|
+
|
|
253
|
+
audio = http_post_binary("#{@base_url}/audio/speech", body)
|
|
254
|
+
"data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
|
|
255
|
+
rescue LLMAPIError, Error
|
|
256
|
+
raise
|
|
257
|
+
rescue => e
|
|
258
|
+
raise Error, "Failed to call Zhipu TTS: #{e.message}"
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "wav", **opts)
|
|
262
|
+
data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
|
|
263
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
264
|
+
audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
|
|
265
|
+
File.binwrite(output_path, audio_bytes)
|
|
266
|
+
SmartPrompt.logger.info "Zhipu audio saved to #{output_path}"
|
|
267
|
+
{ file_path: output_path, format: response_format }
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# ---- ASR (GLM-ASR-2512) --------------------------------------------------
|
|
271
|
+
|
|
272
|
+
# Transcribe an audio file (local path). Returns {text:}.
|
|
273
|
+
def transcribe_audio(audio_file, model: nil, language: nil, **opts)
|
|
274
|
+
SmartPrompt.logger.info "ZhipuAIAdapter: ASR #{File.basename(audio_file)}"
|
|
275
|
+
raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
|
|
276
|
+
|
|
277
|
+
model_name = model || @config["asr_model"] || "glm-asr-2512"
|
|
278
|
+
form = { "model" => model_name }
|
|
279
|
+
form["language"] = language if language
|
|
280
|
+
form["prompt"] = opts[:prompt] if opts[:prompt]
|
|
281
|
+
form["response_format"] = opts[:response_format] if opts[:response_format]
|
|
282
|
+
|
|
283
|
+
response = http_post_multipart("#{@base_url}/audio/transcriptions", form, audio_file)
|
|
284
|
+
{ text: response["text"] }
|
|
285
|
+
rescue LLMAPIError, Error
|
|
286
|
+
raise
|
|
287
|
+
rescue => e
|
|
288
|
+
raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call Zhipu ASR: #{e.message}"
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# ---- rerank (bonus) ------------------------------------------------------
|
|
292
|
+
|
|
293
|
+
def rerank(query, documents, model: nil)
|
|
294
|
+
model_name = model || @config["rerank_model"] || @config["model"]
|
|
295
|
+
body = { "model" => model_name, "query" => query, "documents" => documents }
|
|
296
|
+
response = http_post_json("#{@base_url}/rerank", body)
|
|
297
|
+
(response["results"] || []).map { |r| { index: r["index"], relevance_score: r["relevance_score"] || r["score"] } }
|
|
298
|
+
rescue LLMAPIError, Error
|
|
299
|
+
raise
|
|
300
|
+
rescue => e
|
|
301
|
+
raise LLMAPIError, "Failed to call Zhipu rerank: #{e.message}"
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
private
|
|
305
|
+
|
|
306
|
+
# ---- chat building -------------------------------------------------------
|
|
307
|
+
|
|
308
|
+
def chat_url_for(model_name)
|
|
309
|
+
# CodeGeeX-4 and coding models are served from the coding base.
|
|
310
|
+
(model_name.to_s.include?("codegeex") || @config["coding"]) ? "#{@coding_base}/chat/completions" : "#{@base_url}/chat/completions"
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def build_chat_body(messages, model_name, temperature, tools)
|
|
314
|
+
body = {
|
|
315
|
+
"model" => model_name,
|
|
316
|
+
"messages" => process_multimodal_messages(messages),
|
|
317
|
+
"temperature" => @config["temperature"] || temperature || 0.7,
|
|
318
|
+
}
|
|
319
|
+
CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
|
|
320
|
+
body["tools"] = tools if tools && !tools.empty?
|
|
321
|
+
body
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Pass messages through, normalizing multimodal content (local image paths -> data URLs).
|
|
325
|
+
def process_multimodal_messages(messages)
|
|
326
|
+
messages.map do |msg|
|
|
327
|
+
role = msg[:role] || msg["role"]
|
|
328
|
+
content = msg[:content] || msg["content"]
|
|
329
|
+
content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
|
|
330
|
+
{ "role" => role, "content" => content }
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def normalize_content_item(item)
|
|
335
|
+
return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
|
|
336
|
+
|
|
337
|
+
type = item[:type] || item["type"]
|
|
338
|
+
if type == "image_url"
|
|
339
|
+
iu = item[:image_url] || item["image_url"]
|
|
340
|
+
url = iu.is_a?(Hash) ? (iu[:url] || iu["url"]) : iu
|
|
341
|
+
{ "type" => "image_url", "image_url" => { "url" => normalize_image_url(url) } }
|
|
342
|
+
else
|
|
343
|
+
stringify_hash(item)
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def normalize_image_url(url)
|
|
348
|
+
return url if url.nil?
|
|
349
|
+
return url if url.start_with?("http://", "https://", "data:")
|
|
350
|
+
|
|
351
|
+
raise Error, "Image file not found: #{url}" unless File.exist?(url)
|
|
352
|
+
ext = File.extname(url).downcase.delete(".")
|
|
353
|
+
raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
|
|
354
|
+
mime = ext == "jpg" ? "jpeg" : ext
|
|
355
|
+
"data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# ---- response shaping ----------------------------------------------------
|
|
359
|
+
|
|
360
|
+
# Zhipu's non-streaming chat response is already OpenAI-shaped; normalize to a consistent
|
|
361
|
+
# hash and preserve reasoning_content where present.
|
|
362
|
+
def build_completion_response(raw)
|
|
363
|
+
msg = raw.dig("choices", 0, "message") || {}
|
|
364
|
+
message = { "role" => msg["role"] || "assistant" }
|
|
365
|
+
message["content"] = msg["content"]
|
|
366
|
+
message["reasoning_content"] = msg["reasoning_content"] if msg["reasoning_content"]
|
|
367
|
+
message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
|
|
368
|
+
|
|
369
|
+
response = {
|
|
370
|
+
"id" => raw["id"],
|
|
371
|
+
"object" => raw["object"] || "chat.completion",
|
|
372
|
+
"created" => raw["created"],
|
|
373
|
+
"model" => raw["model"],
|
|
374
|
+
"choices" => [{
|
|
375
|
+
"index" => 0,
|
|
376
|
+
"message" => message,
|
|
377
|
+
"finish_reason" => raw.dig("choices", 0, "finish_reason"),
|
|
378
|
+
}],
|
|
379
|
+
}
|
|
380
|
+
response["usage"] = raw["usage"] if raw["usage"]
|
|
381
|
+
response
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Build an OpenAI-style streaming chunk from a Zhipu SSE event. reasoning_content and
|
|
385
|
+
# content pass through unchanged (Zhipu already uses these names).
|
|
386
|
+
def build_stream_chunk(data)
|
|
387
|
+
chunk = {
|
|
388
|
+
"id" => data["id"],
|
|
389
|
+
"object" => data["object"],
|
|
390
|
+
"created" => data["created"],
|
|
391
|
+
"model" => data["model"],
|
|
392
|
+
}
|
|
393
|
+
chunk["usage"] = data["usage"] if data["usage"]
|
|
394
|
+
|
|
395
|
+
choices = data["choices"] || []
|
|
396
|
+
if choices.any?
|
|
397
|
+
delta = choices[0]["delta"] || {}
|
|
398
|
+
new_delta = {}
|
|
399
|
+
new_delta["role"] = delta["role"] if delta["role"]
|
|
400
|
+
new_delta["content"] = delta["content"] if delta["content"]
|
|
401
|
+
new_delta["reasoning_content"] = delta["reasoning_content"] if delta["reasoning_content"]
|
|
402
|
+
new_delta["tool_calls"] = delta["tool_calls"] if delta["tool_calls"]
|
|
403
|
+
chunk["choices"] = [{
|
|
404
|
+
"index" => choices[0]["index"] || 0,
|
|
405
|
+
"delta" => new_delta,
|
|
406
|
+
"finish_reason" => choices[0]["finish_reason"],
|
|
407
|
+
}]
|
|
408
|
+
else
|
|
409
|
+
chunk["choices"] = []
|
|
410
|
+
end
|
|
411
|
+
chunk
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Zhipu image response: cogview-3-flash returns the FLAT OpenAI shape data[].url;
|
|
415
|
+
# older docs mention a NESTED data.images[].url. Handle both plus a bare-url array.
|
|
416
|
+
def parse_image_response(response)
|
|
417
|
+
container = response["data"]
|
|
418
|
+
items =
|
|
419
|
+
if container.is_a?(Hash)
|
|
420
|
+
container["images"] || container["data"] || container["url"]
|
|
421
|
+
elsif container.is_a?(Array)
|
|
422
|
+
container
|
|
423
|
+
end
|
|
424
|
+
items ||= response["images"] || response["urls"]
|
|
425
|
+
|
|
426
|
+
# Some responses return images as a bare array of URLs (strings).
|
|
427
|
+
items = items.map { |x| x.is_a?(String) ? { "url" => x } : x } if items.is_a?(Array)
|
|
428
|
+
|
|
429
|
+
unless items.is_a?(Array) && items.any?
|
|
430
|
+
SmartPrompt.logger.error "Zhipu image response had no images: #{response.inspect}"
|
|
431
|
+
raise LLMAPIError, "No image data in Zhipu response"
|
|
432
|
+
end
|
|
433
|
+
items.map { |d| { url: d["url"], b64_json: d["b64_json"] } }
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Zhipu async task status is under task_status; accept a few aliases.
|
|
437
|
+
def task_status_of(status)
|
|
438
|
+
status["task_status"] || status["status"] || "PROCESSING"
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# video_result is an Array: [{cover_image_url:, url:}]. Pull the first video url.
|
|
442
|
+
def video_url_of(status)
|
|
443
|
+
vr = status["video_result"]
|
|
444
|
+
item = vr.is_a?(Array) ? vr[0] : vr
|
|
445
|
+
return item["url"] || item["video_url"] if item.is_a?(Hash)
|
|
446
|
+
status["video_url"] || status.dig("data", "video_url")
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def cover_url_of(status)
|
|
450
|
+
vr = status["video_result"]
|
|
451
|
+
item = vr.is_a?(Array) ? vr[0] : vr
|
|
452
|
+
item.is_a?(Hash) ? (item["cover_image_url"] || item["cover_url"]) : nil
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# ---- HTTP ----------------------------------------------------------------
|
|
456
|
+
|
|
457
|
+
def http_post_json(url, body)
|
|
458
|
+
uri = URI.parse(url)
|
|
459
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
460
|
+
http.open_timeout = 30; http.read_timeout = 240
|
|
461
|
+
req = Net::HTTP::Post.new(uri.request_uri)
|
|
462
|
+
req["Content-Type"] = "application/json"
|
|
463
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
464
|
+
req.body = body.to_json
|
|
465
|
+
SmartPrompt.logger.debug "Zhipu POST #{uri} body=#{body.to_json}"
|
|
466
|
+
resp = http.request(req)
|
|
467
|
+
if resp.is_a?(Net::HTTPSuccess)
|
|
468
|
+
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
469
|
+
else
|
|
470
|
+
SmartPrompt.logger.error "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
471
|
+
raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def http_get_json(url)
|
|
476
|
+
uri = URI.parse(url)
|
|
477
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
478
|
+
http.open_timeout = 30; http.read_timeout = 60
|
|
479
|
+
req = Net::HTTP::Get.new(uri.request_uri)
|
|
480
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
481
|
+
SmartPrompt.logger.debug "Zhipu GET #{uri}"
|
|
482
|
+
resp = http.request(req)
|
|
483
|
+
if resp.is_a?(Net::HTTPSuccess)
|
|
484
|
+
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
485
|
+
else
|
|
486
|
+
raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
487
|
+
end
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
# Returns the raw response body bytes (for TTS audio).
|
|
491
|
+
def http_post_binary(url, body)
|
|
492
|
+
uri = URI.parse(url)
|
|
493
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
494
|
+
http.open_timeout = 30; http.read_timeout = 120
|
|
495
|
+
req = Net::HTTP::Post.new(uri.request_uri)
|
|
496
|
+
req["Content-Type"] = "application/json"
|
|
497
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
498
|
+
req.body = body.to_json
|
|
499
|
+
resp = http.request(req)
|
|
500
|
+
if resp.is_a?(Net::HTTPSuccess)
|
|
501
|
+
resp.body
|
|
502
|
+
else
|
|
503
|
+
raise LLMAPIError, "Zhipu TTS API error: #{resp.code} - #{resp.body}"
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# multipart/form-data POST with a file upload (for ASR). Returns parsed JSON.
|
|
508
|
+
def http_post_multipart(url, form, file_path)
|
|
509
|
+
uri = URI.parse(url)
|
|
510
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
511
|
+
http.open_timeout = 30; http.read_timeout = 180
|
|
512
|
+
|
|
513
|
+
boundary = "----SmartPrompt#{object_id}"
|
|
514
|
+
mime = "audio/#{(File.extname(file_path).downcase.delete(".") || "wav")}"
|
|
515
|
+
|
|
516
|
+
body = ""
|
|
517
|
+
form.each do |k, v|
|
|
518
|
+
body << "--#{boundary}\r\n"
|
|
519
|
+
body << "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n"
|
|
520
|
+
body << "#{v}\r\n"
|
|
521
|
+
end
|
|
522
|
+
File.open(file_path, "rb") do |f|
|
|
523
|
+
body << "--#{boundary}\r\n"
|
|
524
|
+
body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{File.basename(file_path)}\"\r\n"
|
|
525
|
+
body << "Content-Type: #{mime}\r\n\r\n"
|
|
526
|
+
body << f.read
|
|
527
|
+
body << "\r\n"
|
|
528
|
+
end
|
|
529
|
+
body << "--#{boundary}--\r\n"
|
|
530
|
+
|
|
531
|
+
req = Net::HTTP::Post.new(uri.request_uri)
|
|
532
|
+
req["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
|
|
533
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
534
|
+
req.body = body
|
|
535
|
+
resp = http.request(req)
|
|
536
|
+
if resp.is_a?(Net::HTTPSuccess)
|
|
537
|
+
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
538
|
+
else
|
|
539
|
+
raise LLMAPIError, "Zhipu ASR API error: #{resp.code} - #{resp.body}"
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# POST with stream:true and yield each parsed SSE `data:` payload to the block.
|
|
544
|
+
def stream_chat(url, body)
|
|
545
|
+
uri = URI.parse(url)
|
|
546
|
+
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
547
|
+
http.open_timeout = 30; http.read_timeout = 300
|
|
548
|
+
|
|
549
|
+
req = Net::HTTP::Post.new(uri.request_uri)
|
|
550
|
+
req["Content-Type"] = "application/json"
|
|
551
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
552
|
+
req["Accept"] = "text/event-stream"
|
|
553
|
+
req.body = body.to_json
|
|
554
|
+
|
|
555
|
+
buffer = ""
|
|
556
|
+
done = false
|
|
557
|
+
http.request(req) do |response|
|
|
558
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
559
|
+
raise LLMAPIError, "Zhipu stream error: #{response.code} - #{response.body}"
|
|
560
|
+
end
|
|
561
|
+
response.read_body do |segment|
|
|
562
|
+
break if done
|
|
563
|
+
buffer << segment
|
|
564
|
+
while (idx = buffer.index("\n"))
|
|
565
|
+
line = buffer.slice!(0, idx + 1).strip
|
|
566
|
+
next if line.empty? || !line.start_with?("data:")
|
|
567
|
+
payload = line.sub(/\Adata:\s*/, "")
|
|
568
|
+
if payload == "[DONE]"
|
|
569
|
+
done = true
|
|
570
|
+
break
|
|
571
|
+
end
|
|
572
|
+
begin
|
|
573
|
+
yield JSON.parse(payload)
|
|
574
|
+
rescue JSON::ParserError
|
|
575
|
+
next
|
|
576
|
+
end
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
def save_single_image(image_data, output_dir, filename)
|
|
583
|
+
if image_data[:b64_json]
|
|
584
|
+
file_path = File.join(output_dir, "#{filename}.png")
|
|
585
|
+
File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
|
|
586
|
+
elsif image_data[:url]
|
|
587
|
+
uri = URI.parse(image_data[:url])
|
|
588
|
+
response = Net::HTTP.get_response(uri)
|
|
589
|
+
raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
590
|
+
ext = case response["content-type"]
|
|
591
|
+
when "image/jpeg", "image/jpg" then "jpg"
|
|
592
|
+
when "image/png" then "png"
|
|
593
|
+
when "image/gif" then "gif"
|
|
594
|
+
when "image/webp" then "webp"
|
|
595
|
+
else "png"
|
|
596
|
+
end
|
|
597
|
+
file_path = File.join(output_dir, "#{filename}.#{ext}")
|
|
598
|
+
File.binwrite(file_path, response.body)
|
|
599
|
+
else
|
|
600
|
+
raise Error, "No image data available to save"
|
|
601
|
+
end
|
|
602
|
+
file_path
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
def stringify_hash(hash)
|
|
606
|
+
case hash
|
|
607
|
+
when Hash
|
|
608
|
+
hash.each_with_object({}) { |(k, v), memo| memo[k.to_s] = stringify_hash(v) }
|
|
609
|
+
when Array
|
|
610
|
+
hash.map { |v| stringify_hash(v) }
|
|
611
|
+
else
|
|
612
|
+
hash
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
end
|
|
616
|
+
end
|
data/lib/smart_prompt.rb
CHANGED
|
@@ -1,11 +1,30 @@
|
|
|
1
1
|
require File.expand_path('../smart_prompt/version', __FILE__)
|
|
2
|
+
require File.expand_path('../smart_prompt/token_counter', __FILE__)
|
|
3
|
+
require File.expand_path('../smart_prompt/message', __FILE__)
|
|
4
|
+
require File.expand_path('../smart_prompt/session', __FILE__)
|
|
5
|
+
require File.expand_path('../smart_prompt/context_strategy', __FILE__)
|
|
6
|
+
require File.expand_path('../smart_prompt/sliding_window_strategy', __FILE__)
|
|
7
|
+
require File.expand_path('../smart_prompt/relevance_based_strategy', __FILE__)
|
|
8
|
+
require File.expand_path('../smart_prompt/compression_engine', __FILE__)
|
|
9
|
+
require File.expand_path('../smart_prompt/summary_based_strategy', __FILE__)
|
|
10
|
+
require File.expand_path('../smart_prompt/hybrid_strategy', __FILE__)
|
|
11
|
+
require File.expand_path('../smart_prompt/persistence_layer', __FILE__)
|
|
12
|
+
require File.expand_path('../smart_prompt/lru_cache', __FILE__)
|
|
13
|
+
require File.expand_path('../smart_prompt/history_manager', __FILE__)
|
|
2
14
|
require File.expand_path('../smart_prompt/engine', __FILE__)
|
|
3
15
|
require File.expand_path('../smart_prompt/api_handler', __FILE__)
|
|
4
16
|
require File.expand_path('../smart_prompt/conversation', __FILE__)
|
|
5
17
|
require File.expand_path('../smart_prompt/llm_adapter', __FILE__)
|
|
6
18
|
require File.expand_path('../smart_prompt/openai_adapter', __FILE__)
|
|
7
|
-
require File.expand_path('../smart_prompt/anthropic_adapter', __FILE__)
|
|
8
19
|
require File.expand_path('../smart_prompt/llamacpp_adapter', __FILE__)
|
|
20
|
+
require File.expand_path('../smart_prompt/anthropic_adapter', __FILE__)
|
|
21
|
+
require File.expand_path('../smart_prompt/sensenova_adapter', __FILE__)
|
|
22
|
+
require File.expand_path('../smart_prompt/zhipu_adapter', __FILE__)
|
|
23
|
+
require File.expand_path('../smart_prompt/multimodal_adapter', __FILE__)
|
|
24
|
+
require File.expand_path('../smart_prompt/image_generation_adapter', __FILE__)
|
|
25
|
+
require File.expand_path('../smart_prompt/video_generation_adapter', __FILE__)
|
|
26
|
+
require File.expand_path('../smart_prompt/tts_adapter', __FILE__)
|
|
27
|
+
require File.expand_path('../smart_prompt/stt_adapter', __FILE__)
|
|
9
28
|
require File.expand_path('../smart_prompt/prompt_template', __FILE__)
|
|
10
29
|
require File.expand_path('../smart_prompt/worker', __FILE__)
|
|
11
30
|
|
|
@@ -14,6 +33,7 @@ module SmartPrompt
|
|
|
14
33
|
class ConfigurationError < Error; end
|
|
15
34
|
class LLMAPIError < Error; end
|
|
16
35
|
class CallWorkerError < Error; end
|
|
36
|
+
class HistoryManagerError < Error; end
|
|
17
37
|
|
|
18
38
|
attr_writer :logger
|
|
19
39
|
|
|
@@ -35,4 +55,4 @@ module SmartPrompt
|
|
|
35
55
|
log.progname = self.name
|
|
36
56
|
end
|
|
37
57
|
end
|
|
38
|
-
end
|
|
58
|
+
end
|