smart_prompt 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.cn.md +1 -0
- data/README.md +1 -0
- data/config/siliconflow_config.yml +95 -0
- data/examples/siliconflow_example.rb +175 -0
- data/lib/smart_prompt/adapters/siliconflow/embed.rb +33 -0
- data/lib/smart_prompt/adapters/siliconflow/image.rb +103 -0
- data/lib/smart_prompt/adapters/siliconflow/rerank.rb +41 -0
- data/lib/smart_prompt/adapters/siliconflow/text.rb +54 -0
- data/lib/smart_prompt/adapters/siliconflow/video.rb +111 -0
- data/lib/smart_prompt/adapters/siliconflow/voice.rb +102 -0
- data/lib/smart_prompt/adapters/zhipu/embed.rb +32 -0
- data/lib/smart_prompt/adapters/zhipu/image.rb +59 -0
- data/lib/smart_prompt/adapters/zhipu/rerank.rb +17 -0
- data/lib/smart_prompt/adapters/zhipu/text.rb +57 -0
- data/lib/smart_prompt/adapters/zhipu/video.rb +101 -0
- data/lib/smart_prompt/adapters/zhipu/voice.rb +55 -0
- data/lib/smart_prompt/concerns/http_client.rb +147 -0
- data/lib/smart_prompt/concerns/image_persistence.rb +62 -0
- data/lib/smart_prompt/concerns/multimodal_messages.rb +108 -0
- data/lib/smart_prompt/concerns/openai_chat_shaping.rb +87 -0
- data/lib/smart_prompt/sensenova_adapter.rb +34 -211
- data/lib/smart_prompt/siliconflow_adapter.rb +91 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/zhipu_adapter.rb +51 -575
- data/lib/smart_prompt.rb +1 -0
- data/workers/siliconflow_workers.rb +167 -0
- metadata +21 -1
|
@@ -3,38 +3,60 @@ require "json"
|
|
|
3
3
|
require "net/http"
|
|
4
4
|
require "uri"
|
|
5
5
|
require "fileutils"
|
|
6
|
+
require_relative "concerns/image_persistence"
|
|
7
|
+
require_relative "concerns/openai_chat_shaping"
|
|
8
|
+
require_relative "concerns/multimodal_messages"
|
|
9
|
+
require_relative "concerns/http_client"
|
|
10
|
+
require_relative "adapters/zhipu/text"
|
|
11
|
+
require_relative "adapters/zhipu/embed"
|
|
12
|
+
require_relative "adapters/zhipu/image"
|
|
13
|
+
require_relative "adapters/zhipu/video"
|
|
14
|
+
require_relative "adapters/zhipu/voice"
|
|
15
|
+
require_relative "adapters/zhipu/rerank"
|
|
6
16
|
|
|
7
17
|
module SmartPrompt
|
|
8
|
-
# Adapter for 智谱 AI (BigModel / GLM) —
|
|
9
|
-
#
|
|
10
|
-
# base URL `https://open.bigmodel.cn/api/paas/v4` and Bearer-token auth, so a single config
|
|
11
|
-
# block serves them just by changing `model`.
|
|
18
|
+
# Adapter for 智谱 AI (BigModel / GLM) — one adapter owns the whole provider: every
|
|
19
|
+
# category shares the base URL https://open.bigmodel.cn/api/paas/v4 and Bearer auth.
|
|
12
20
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
# 3. 向量 (embeddings) — POST {base}/embeddings (embedding-3, custom dimensions)
|
|
18
|
-
# 4. 文生图 (image) — POST {base}/images/generations (response is NESTED: data.images[].url)
|
|
19
|
-
# 5. 文生视频 (video) — POST {base}/videos/generations -> task_id; GET {base}/async-result?task_id=
|
|
20
|
-
# poll until SUCCESS -> video_result.url (async)
|
|
21
|
-
# 6. 语音合成 (TTS) — POST {base}/audio/speech (glm-tts)
|
|
22
|
-
# 7. 语音识别 (ASR) — POST {base}/audio/transcriptions (glm-asr-2512, multipart)
|
|
23
|
-
# 8. 重排 (rerank) — POST {base}/rerank
|
|
21
|
+
# Per-modality behavior lives in capability modules under adapters/zhipu/
|
|
22
|
+
# (Text / Embed / Image / Video / Voice / Rerank); cross-provider plumbing (HTTP,
|
|
23
|
+
# multimodal normalization, chat shaping, image saving) comes from the shared
|
|
24
|
+
# concerns. This class wires them together + holds config/credentials.
|
|
24
25
|
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
26
|
+
# chat/vision — POST {base}/chat/completions (OpenAI-compatible; reasoning_content)
|
|
27
|
+
# embeddings — POST {base}/embeddings (embedding-3, custom dimensions)
|
|
28
|
+
# image — POST {base}/images/generations (nested data.images[].url)
|
|
29
|
+
# video — POST {base}/videos/generations -> GET {base}/async-result (async)
|
|
30
|
+
# tts — POST {base}/audio/speech (glm-tts)
|
|
31
|
+
# asr — POST {base}/audio/transcriptions (multipart)
|
|
32
|
+
# rerank — POST {base}/rerank
|
|
28
33
|
class ZhipuAIAdapter < LLMAdapter
|
|
29
34
|
DEFAULT_BASE_URL = "https://open.bigmodel.cn/api/paas/v4".freeze
|
|
30
35
|
# CodeGeeX-4 / coding models use a separate base.
|
|
31
36
|
DEFAULT_CODING_BASE_URL = "https://open.bigmodel.cn/api/coding/paas/v4".freeze
|
|
32
|
-
SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
|
|
33
37
|
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
# Cross-provider shared concerns
|
|
39
|
+
include ImagePersistence
|
|
40
|
+
include OpenAIChatShaping
|
|
41
|
+
include MultimodalMessages
|
|
42
|
+
include HTTPClient
|
|
43
|
+
|
|
44
|
+
# Per-capability modules
|
|
45
|
+
include ZhipuAI::Text
|
|
46
|
+
include ZhipuAI::Embed
|
|
47
|
+
include ZhipuAI::Image
|
|
48
|
+
include ZhipuAI::Video
|
|
49
|
+
include ZhipuAI::Voice
|
|
50
|
+
include ZhipuAI::Rerank
|
|
51
|
+
|
|
52
|
+
# ---- hooks for shared concerns -------------------------------------------
|
|
53
|
+
def provider_label
|
|
54
|
+
"Zhipu"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def default_image_prefix
|
|
58
|
+
"zhipu_image"
|
|
59
|
+
end
|
|
38
60
|
|
|
39
61
|
def initialize(config)
|
|
40
62
|
super
|
|
@@ -44,8 +66,8 @@ module SmartPrompt
|
|
|
44
66
|
if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
|
|
45
67
|
api_key = eval(api_key)
|
|
46
68
|
end
|
|
47
|
-
#
|
|
48
|
-
#
|
|
69
|
+
# Tolerate a missing key at construction so examples/config can load without a
|
|
70
|
+
# live key; the first request fails with a clear auth error.
|
|
49
71
|
SmartPrompt.logger.warn "Zhipu api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
|
|
50
72
|
|
|
51
73
|
@api_key = api_key
|
|
@@ -58,559 +80,13 @@ module SmartPrompt
|
|
|
58
80
|
SmartPrompt.logger.info "Zhipu base_url=#{@base_url}"
|
|
59
81
|
end
|
|
60
82
|
|
|
61
|
-
public
|
|
62
|
-
|
|
63
|
-
# ---- chat + vision -------------------------------------------------------
|
|
64
|
-
|
|
65
|
-
# Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so last_response
|
|
66
|
-
# carries usage + reasoning_content); streaming calls +proc+ with each OpenAI-shaped chunk.
|
|
67
|
-
def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
|
|
68
|
-
model_name = model || @config["model"]
|
|
69
|
-
body = build_chat_body(messages, model_name, temperature, tools)
|
|
70
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
|
|
71
|
-
|
|
72
|
-
url = chat_url_for(model_name)
|
|
73
|
-
if proc
|
|
74
|
-
body["stream"] = true
|
|
75
|
-
stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
|
|
76
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: streaming request finished"
|
|
77
|
-
nil
|
|
78
|
-
else
|
|
79
|
-
raw = http_post_json(url, body)
|
|
80
|
-
response = build_completion_response(raw)
|
|
81
|
-
@last_response = response
|
|
82
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: received chat response"
|
|
83
|
-
response
|
|
84
|
-
end
|
|
85
|
-
rescue LLMAPIError, Error
|
|
86
|
-
raise
|
|
87
|
-
rescue => e
|
|
88
|
-
SmartPrompt.logger.error "Zhipu chat error: #{e.message}"
|
|
89
|
-
raise LLMAPIError, "Failed to call Zhipu chat: #{e.message}"
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# ---- embeddings ----------------------------------------------------------
|
|
93
|
-
|
|
94
|
-
# embedding-3 (default 2048 dims); supports a custom `dimensions` (256/512/1024/2048)
|
|
95
|
-
# via config. Returns the first embedding vector.
|
|
96
|
-
def embeddings(text, model)
|
|
97
|
-
model_name = model || @config["embedding_model"] || @config["model"]
|
|
98
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: embeddings model=#{model_name}"
|
|
99
|
-
|
|
100
|
-
body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
|
|
101
|
-
body["dimensions"] = @config["dimensions"] if @config["dimensions"]
|
|
102
|
-
body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
|
|
103
|
-
|
|
104
|
-
response =
|
|
105
|
-
begin
|
|
106
|
-
http_post_json("#{@base_url}/embeddings", body)
|
|
107
|
-
rescue LLMAPIError, Error
|
|
108
|
-
raise
|
|
109
|
-
rescue => e
|
|
110
|
-
raise LLMAPIError, "Failed to call Zhipu embeddings: #{e.message}"
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
items = response["data"]
|
|
114
|
-
unless items.is_a?(Array) && items.any? && items[0]["embedding"]
|
|
115
|
-
raise LLMAPIError, "No embedding vector in Zhipu response: #{response.inspect}"
|
|
116
|
-
end
|
|
117
|
-
items[0]["embedding"]
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# ---- image (CogView / GLM-Image) -----------------------------------------
|
|
121
|
-
|
|
122
|
-
# Text-to-image. The Zhipu response is NESTED: data.images[].url (not OpenAI's data[]),
|
|
123
|
-
# so we parse defensively. Returns an Array of {url:, b64_json:}.
|
|
124
|
-
def generate_image(prompt, params = {})
|
|
125
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: generating image"
|
|
126
|
-
raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
|
|
127
|
-
|
|
128
|
-
model_name = params[:model] || @config["image_model"] || @config["model"]
|
|
129
|
-
raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
|
|
130
|
-
|
|
131
|
-
body = { "model" => model_name, "prompt" => prompt.to_s }
|
|
132
|
-
body["size"] = params[:size] if params[:size]
|
|
133
|
-
body["user"] = params[:user] if params[:user]
|
|
134
|
-
body["response_format"] = params[:response_format] if params[:response_format]
|
|
135
|
-
|
|
136
|
-
SmartPrompt.logger.info "Zhipu image params: #{body.except('prompt').inspect}"
|
|
137
|
-
response =
|
|
138
|
-
begin
|
|
139
|
-
http_post_json(@image_url, body)
|
|
140
|
-
rescue LLMAPIError, Error
|
|
141
|
-
raise
|
|
142
|
-
rescue => e
|
|
143
|
-
raise Error, "Failed to call Zhipu image generation: #{e.message}"
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
images = parse_image_response(response)
|
|
147
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: generated #{images.size} image(s)"
|
|
148
|
-
images
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Save one or many generated images to disk (Array from #generate_image or a single hash).
|
|
152
|
-
def save_image(image_data, output_dir = "./output", filename_prefix = "zhipu_image")
|
|
153
|
-
FileUtils.mkdir_p(output_dir)
|
|
154
|
-
images = image_data.is_a?(Array) ? image_data : [image_data]
|
|
155
|
-
saved = images.each_with_index.map do |img, index|
|
|
156
|
-
save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
|
|
157
|
-
end
|
|
158
|
-
SmartPrompt.logger.info "Saved #{saved.size} Zhipu image(s) to #{output_dir}"
|
|
159
|
-
saved
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
# ---- video (CogVideoX, async) --------------------------------------------
|
|
163
|
-
|
|
164
|
-
# Submit a text-to-video (or image-to-video) job. Returns the task id.
|
|
165
|
-
def generate_video(prompt, params = {})
|
|
166
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: submitting video job"
|
|
167
|
-
model_name = params[:model] || @config["video_model"] || @config["model"]
|
|
168
|
-
raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
|
|
169
|
-
|
|
170
|
-
body = { "model" => model_name, "prompt" => prompt.to_s }
|
|
171
|
-
%i[quality fps duration with_audio resolution request_id seed].each do |k|
|
|
172
|
-
body[k.to_s] = params[k] unless params[k].nil?
|
|
173
|
-
end
|
|
174
|
-
body["image_url"] = normalize_image_url(params[:image_url]) if params[:image_url]
|
|
175
|
-
|
|
176
|
-
SmartPrompt.logger.info "Zhipu video params: #{body.except('prompt').inspect}"
|
|
177
|
-
response =
|
|
178
|
-
begin
|
|
179
|
-
http_post_json(@video_url, body)
|
|
180
|
-
rescue LLMAPIError, Error
|
|
181
|
-
raise
|
|
182
|
-
rescue => e
|
|
183
|
-
raise Error, "Failed to submit Zhipu video job: #{e.message}"
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
task_id = response["id"] || response["task_id"]
|
|
187
|
-
raise LLMAPIError, "No task id in Zhipu video response: #{response.inspect}" unless task_id
|
|
188
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: video task #{task_id} submitted"
|
|
189
|
-
{ task_id: task_id, model: model_name, raw: response }
|
|
190
|
-
end
|
|
191
|
-
|
|
192
|
-
# Poll an async task. Returns the raw status hash (task_status etc.).
|
|
193
|
-
def check_video_status(task_id)
|
|
194
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: polling video task #{task_id}"
|
|
195
|
-
http_get_json("#{@query_url}/#{URI.encode_www_form_component(task_id)}")
|
|
196
|
-
rescue LLMAPIError, Error
|
|
197
|
-
raise
|
|
198
|
-
rescue => e
|
|
199
|
-
raise LLMAPIError, "Failed to query Zhipu video task: #{e.message}"
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
# Block until the task finishes (or times out), then return the video URL.
|
|
203
|
-
def wait_for_video_completion(task_id, check_interval: 10, timeout: 600)
|
|
204
|
-
start = Time.now
|
|
205
|
-
loop do
|
|
206
|
-
status = check_video_status(task_id)
|
|
207
|
-
case task_status_of(status)
|
|
208
|
-
when "SUCCESS"
|
|
209
|
-
url = video_url_of(status)
|
|
210
|
-
raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
|
|
211
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: video ready #{url}"
|
|
212
|
-
return { task_id: task_id, status: "SUCCESS", video_url: url, cover_image_url: cover_url_of(status), raw: status }
|
|
213
|
-
when "FAIL", "FAILED"
|
|
214
|
-
raise LLMAPIError, "Zhipu video generation failed: #{status.inspect}"
|
|
215
|
-
else
|
|
216
|
-
if Time.now - start > timeout
|
|
217
|
-
raise LLMAPIError, "Zhipu video generation timeout after #{timeout}s"
|
|
218
|
-
end
|
|
219
|
-
SmartPrompt.logger.info "Zhipu video task #{task_id} still processing..."
|
|
220
|
-
sleep(check_interval)
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def download_video(video_url, output_path)
|
|
226
|
-
uri = URI.parse(video_url)
|
|
227
|
-
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
228
|
-
response = http.request(Net::HTTP::Get.new(uri.request_uri))
|
|
229
|
-
raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
230
|
-
FileUtils.mkdir_p(File.dirname(output_path))
|
|
231
|
-
File.binwrite(output_path, response.body)
|
|
232
|
-
SmartPrompt.logger.info "Zhipu video saved to #{output_path}"
|
|
233
|
-
output_path
|
|
234
|
-
rescue => e
|
|
235
|
-
raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading Zhipu video: #{e.message}"
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
# ---- TTS (GLM-TTS) -------------------------------------------------------
|
|
239
|
-
|
|
240
|
-
# Returns a base64 data URL for the synthesized audio. GLM-TTS accepts wav/pcm only
|
|
241
|
-
# (mp3/flac are rejected), so default to wav.
|
|
242
|
-
def synthesize_speech(text, voice: nil, model: nil, response_format: "wav", **opts)
|
|
243
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: TTS"
|
|
244
|
-
raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
|
|
245
|
-
|
|
246
|
-
model_name = model || @config["tts_model"] || "glm-tts"
|
|
247
|
-
body = { "model" => model_name, "input" => text.to_s }
|
|
248
|
-
body["voice"] = voice if voice
|
|
249
|
-
body["response_format"] = response_format
|
|
250
|
-
body["speed"] = opts[:speed] if opts[:speed]
|
|
251
|
-
body["emotion"] = opts[:emotion] if opts[:emotion]
|
|
252
|
-
|
|
253
|
-
audio = http_post_binary("#{@base_url}/audio/speech", body)
|
|
254
|
-
"data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
|
|
255
|
-
rescue LLMAPIError, Error
|
|
256
|
-
raise
|
|
257
|
-
rescue => e
|
|
258
|
-
raise Error, "Failed to call Zhipu TTS: #{e.message}"
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "wav", **opts)
|
|
262
|
-
data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
|
|
263
|
-
FileUtils.mkdir_p(File.dirname(output_path))
|
|
264
|
-
audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
|
|
265
|
-
File.binwrite(output_path, audio_bytes)
|
|
266
|
-
SmartPrompt.logger.info "Zhipu audio saved to #{output_path}"
|
|
267
|
-
{ file_path: output_path, format: response_format }
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
# ---- ASR (GLM-ASR-2512) --------------------------------------------------
|
|
271
|
-
|
|
272
|
-
# Transcribe an audio file (local path). Returns {text:}.
|
|
273
|
-
def transcribe_audio(audio_file, model: nil, language: nil, **opts)
|
|
274
|
-
SmartPrompt.logger.info "ZhipuAIAdapter: ASR #{File.basename(audio_file)}"
|
|
275
|
-
raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
|
|
276
|
-
|
|
277
|
-
model_name = model || @config["asr_model"] || "glm-asr-2512"
|
|
278
|
-
form = { "model" => model_name }
|
|
279
|
-
form["language"] = language if language
|
|
280
|
-
form["prompt"] = opts[:prompt] if opts[:prompt]
|
|
281
|
-
form["response_format"] = opts[:response_format] if opts[:response_format]
|
|
282
|
-
|
|
283
|
-
response = http_post_multipart("#{@base_url}/audio/transcriptions", form, audio_file)
|
|
284
|
-
{ text: response["text"] }
|
|
285
|
-
rescue LLMAPIError, Error
|
|
286
|
-
raise
|
|
287
|
-
rescue => e
|
|
288
|
-
raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call Zhipu ASR: #{e.message}"
|
|
289
|
-
end
|
|
290
|
-
|
|
291
|
-
# ---- rerank (bonus) ------------------------------------------------------
|
|
292
|
-
|
|
293
|
-
def rerank(query, documents, model: nil)
|
|
294
|
-
model_name = model || @config["rerank_model"] || @config["model"]
|
|
295
|
-
body = { "model" => model_name, "query" => query, "documents" => documents }
|
|
296
|
-
response = http_post_json("#{@base_url}/rerank", body)
|
|
297
|
-
(response["results"] || []).map { |r| { index: r["index"], relevance_score: r["relevance_score"] || r["score"] } }
|
|
298
|
-
rescue LLMAPIError, Error
|
|
299
|
-
raise
|
|
300
|
-
rescue => e
|
|
301
|
-
raise LLMAPIError, "Failed to call Zhipu rerank: #{e.message}"
|
|
302
|
-
end
|
|
303
|
-
|
|
304
83
|
private
|
|
305
84
|
|
|
306
|
-
#
|
|
307
|
-
|
|
308
|
-
def chat_url_for(model_name)
|
|
309
|
-
# CodeGeeX-4 and coding models are served from the coding base.
|
|
310
|
-
(model_name.to_s.include?("codegeex") || @config["coding"]) ? "#{@coding_base}/chat/completions" : "#{@base_url}/chat/completions"
|
|
311
|
-
end
|
|
312
|
-
|
|
313
|
-
def build_chat_body(messages, model_name, temperature, tools)
|
|
314
|
-
body = {
|
|
315
|
-
"model" => model_name,
|
|
316
|
-
"messages" => process_multimodal_messages(messages),
|
|
317
|
-
"temperature" => @config["temperature"] || temperature || 0.7,
|
|
318
|
-
}
|
|
319
|
-
CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
|
|
320
|
-
body["tools"] = tools if tools && !tools.empty?
|
|
321
|
-
body
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
# Pass messages through, normalizing multimodal content (local image paths -> data URLs).
|
|
325
|
-
def process_multimodal_messages(messages)
|
|
326
|
-
messages.map do |msg|
|
|
327
|
-
role = msg[:role] || msg["role"]
|
|
328
|
-
content = msg[:content] || msg["content"]
|
|
329
|
-
content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
|
|
330
|
-
{ "role" => role, "content" => content }
|
|
331
|
-
end
|
|
332
|
-
end
|
|
333
|
-
|
|
334
|
-
def normalize_content_item(item)
|
|
335
|
-
return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
|
|
336
|
-
|
|
337
|
-
type = item[:type] || item["type"]
|
|
338
|
-
if type == "image_url"
|
|
339
|
-
iu = item[:image_url] || item["image_url"]
|
|
340
|
-
url = iu.is_a?(Hash) ? (iu[:url] || iu["url"]) : iu
|
|
341
|
-
{ "type" => "image_url", "image_url" => { "url" => normalize_image_url(url) } }
|
|
342
|
-
else
|
|
343
|
-
stringify_hash(item)
|
|
344
|
-
end
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
def normalize_image_url(url)
|
|
348
|
-
return url if url.nil?
|
|
349
|
-
return url if url.start_with?("http://", "https://", "data:")
|
|
350
|
-
|
|
351
|
-
raise Error, "Image file not found: #{url}" unless File.exist?(url)
|
|
352
|
-
ext = File.extname(url).downcase.delete(".")
|
|
353
|
-
raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
|
|
354
|
-
mime = ext == "jpg" ? "jpeg" : ext
|
|
355
|
-
"data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
|
|
356
|
-
end
|
|
357
|
-
|
|
358
|
-
# ---- response shaping ----------------------------------------------------
|
|
359
|
-
|
|
360
|
-
# Zhipu's non-streaming chat response is already OpenAI-shaped; normalize to a consistent
|
|
361
|
-
# hash and preserve reasoning_content where present.
|
|
362
|
-
def build_completion_response(raw)
|
|
363
|
-
msg = raw.dig("choices", 0, "message") || {}
|
|
364
|
-
message = { "role" => msg["role"] || "assistant" }
|
|
365
|
-
message["content"] = msg["content"]
|
|
366
|
-
message["reasoning_content"] = msg["reasoning_content"] if msg["reasoning_content"]
|
|
367
|
-
message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
|
|
368
|
-
|
|
369
|
-
response = {
|
|
370
|
-
"id" => raw["id"],
|
|
371
|
-
"object" => raw["object"] || "chat.completion",
|
|
372
|
-
"created" => raw["created"],
|
|
373
|
-
"model" => raw["model"],
|
|
374
|
-
"choices" => [{
|
|
375
|
-
"index" => 0,
|
|
376
|
-
"message" => message,
|
|
377
|
-
"finish_reason" => raw.dig("choices", 0, "finish_reason"),
|
|
378
|
-
}],
|
|
379
|
-
}
|
|
380
|
-
response["usage"] = raw["usage"] if raw["usage"]
|
|
381
|
-
response
|
|
382
|
-
end
|
|
383
|
-
|
|
384
|
-
# Build an OpenAI-style streaming chunk from a Zhipu SSE event. reasoning_content and
|
|
385
|
-
# content pass through unchanged (Zhipu already uses these names).
|
|
386
|
-
def build_stream_chunk(data)
|
|
387
|
-
chunk = {
|
|
388
|
-
"id" => data["id"],
|
|
389
|
-
"object" => data["object"],
|
|
390
|
-
"created" => data["created"],
|
|
391
|
-
"model" => data["model"],
|
|
392
|
-
}
|
|
393
|
-
chunk["usage"] = data["usage"] if data["usage"]
|
|
394
|
-
|
|
395
|
-
choices = data["choices"] || []
|
|
396
|
-
if choices.any?
|
|
397
|
-
delta = choices[0]["delta"] || {}
|
|
398
|
-
new_delta = {}
|
|
399
|
-
new_delta["role"] = delta["role"] if delta["role"]
|
|
400
|
-
new_delta["content"] = delta["content"] if delta["content"]
|
|
401
|
-
new_delta["reasoning_content"] = delta["reasoning_content"] if delta["reasoning_content"]
|
|
402
|
-
new_delta["tool_calls"] = delta["tool_calls"] if delta["tool_calls"]
|
|
403
|
-
chunk["choices"] = [{
|
|
404
|
-
"index" => choices[0]["index"] || 0,
|
|
405
|
-
"delta" => new_delta,
|
|
406
|
-
"finish_reason" => choices[0]["finish_reason"],
|
|
407
|
-
}]
|
|
408
|
-
else
|
|
409
|
-
chunk["choices"] = []
|
|
410
|
-
end
|
|
411
|
-
chunk
|
|
412
|
-
end
|
|
413
|
-
|
|
414
|
-
# Zhipu image response: cogview-3-flash returns the FLAT OpenAI shape data[].url;
|
|
415
|
-
# older docs mention a NESTED data.images[].url. Handle both plus a bare-url array.
|
|
416
|
-
def parse_image_response(response)
|
|
417
|
-
container = response["data"]
|
|
418
|
-
items =
|
|
419
|
-
if container.is_a?(Hash)
|
|
420
|
-
container["images"] || container["data"] || container["url"]
|
|
421
|
-
elsif container.is_a?(Array)
|
|
422
|
-
container
|
|
423
|
-
end
|
|
424
|
-
items ||= response["images"] || response["urls"]
|
|
425
|
-
|
|
426
|
-
# Some responses return images as a bare array of URLs (strings).
|
|
427
|
-
items = items.map { |x| x.is_a?(String) ? { "url" => x } : x } if items.is_a?(Array)
|
|
428
|
-
|
|
429
|
-
unless items.is_a?(Array) && items.any?
|
|
430
|
-
SmartPrompt.logger.error "Zhipu image response had no images: #{response.inspect}"
|
|
431
|
-
raise LLMAPIError, "No image data in Zhipu response"
|
|
432
|
-
end
|
|
433
|
-
items.map { |d| { url: d["url"], b64_json: d["b64_json"] } }
|
|
434
|
-
end
|
|
435
|
-
|
|
436
|
-
# Zhipu async task status is under task_status; accept a few aliases.
|
|
437
|
-
def task_status_of(status)
|
|
438
|
-
status["task_status"] || status["status"] || "PROCESSING"
|
|
439
|
-
end
|
|
440
|
-
|
|
441
|
-
# video_result is an Array: [{cover_image_url:, url:}]. Pull the first video url.
|
|
442
|
-
def video_url_of(status)
|
|
443
|
-
vr = status["video_result"]
|
|
444
|
-
item = vr.is_a?(Array) ? vr[0] : vr
|
|
445
|
-
return item["url"] || item["video_url"] if item.is_a?(Hash)
|
|
446
|
-
status["video_url"] || status.dig("data", "video_url")
|
|
447
|
-
end
|
|
448
|
-
|
|
449
|
-
def cover_url_of(status)
|
|
450
|
-
vr = status["video_result"]
|
|
451
|
-
item = vr.is_a?(Array) ? vr[0] : vr
|
|
452
|
-
item.is_a?(Hash) ? (item["cover_image_url"] || item["cover_url"]) : nil
|
|
453
|
-
end
|
|
454
|
-
|
|
455
|
-
# ---- HTTP ----------------------------------------------------------------
|
|
456
|
-
|
|
457
|
-
def http_post_json(url, body)
|
|
458
|
-
uri = URI.parse(url)
|
|
459
|
-
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
460
|
-
http.open_timeout = 30; http.read_timeout = 240
|
|
461
|
-
req = Net::HTTP::Post.new(uri.request_uri)
|
|
462
|
-
req["Content-Type"] = "application/json"
|
|
463
|
-
req["Authorization"] = "Bearer #{@api_key}"
|
|
464
|
-
req.body = body.to_json
|
|
465
|
-
SmartPrompt.logger.debug "Zhipu POST #{uri} body=#{body.to_json}"
|
|
466
|
-
resp = http.request(req)
|
|
467
|
-
if resp.is_a?(Net::HTTPSuccess)
|
|
468
|
-
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
469
|
-
else
|
|
470
|
-
SmartPrompt.logger.error "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
471
|
-
raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
472
|
-
end
|
|
473
|
-
end
|
|
474
|
-
|
|
475
|
-
def http_get_json(url)
|
|
476
|
-
uri = URI.parse(url)
|
|
477
|
-
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
478
|
-
http.open_timeout = 30; http.read_timeout = 60
|
|
479
|
-
req = Net::HTTP::Get.new(uri.request_uri)
|
|
480
|
-
req["Authorization"] = "Bearer #{@api_key}"
|
|
481
|
-
SmartPrompt.logger.debug "Zhipu GET #{uri}"
|
|
482
|
-
resp = http.request(req)
|
|
483
|
-
if resp.is_a?(Net::HTTPSuccess)
|
|
484
|
-
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
485
|
-
else
|
|
486
|
-
raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
|
|
487
|
-
end
|
|
488
|
-
end
|
|
489
|
-
|
|
490
|
-
# Returns the raw response body bytes (for TTS audio).
|
|
491
|
-
def http_post_binary(url, body)
|
|
492
|
-
uri = URI.parse(url)
|
|
493
|
-
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
494
|
-
http.open_timeout = 30; http.read_timeout = 120
|
|
495
|
-
req = Net::HTTP::Post.new(uri.request_uri)
|
|
496
|
-
req["Content-Type"] = "application/json"
|
|
497
|
-
req["Authorization"] = "Bearer #{@api_key}"
|
|
498
|
-
req.body = body.to_json
|
|
499
|
-
resp = http.request(req)
|
|
500
|
-
if resp.is_a?(Net::HTTPSuccess)
|
|
501
|
-
resp.body
|
|
502
|
-
else
|
|
503
|
-
raise LLMAPIError, "Zhipu TTS API error: #{resp.code} - #{resp.body}"
|
|
504
|
-
end
|
|
505
|
-
end
|
|
506
|
-
|
|
507
|
-
# multipart/form-data POST with a file upload (for ASR). Returns parsed JSON.
|
|
85
|
+
# Zhipu's ASR call site uses the legacy 3-arg multipart shape (url, form, file_path).
|
|
86
|
+
# Adapt it to HTTPClient's 5-arg shape with a sensible audio mime.
|
|
508
87
|
def http_post_multipart(url, form, file_path)
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
http.open_timeout = 30; http.read_timeout = 180
|
|
512
|
-
|
|
513
|
-
boundary = "----SmartPrompt#{object_id}"
|
|
514
|
-
mime = "audio/#{(File.extname(file_path).downcase.delete(".") || "wav")}"
|
|
515
|
-
|
|
516
|
-
body = ""
|
|
517
|
-
form.each do |k, v|
|
|
518
|
-
body << "--#{boundary}\r\n"
|
|
519
|
-
body << "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n"
|
|
520
|
-
body << "#{v}\r\n"
|
|
521
|
-
end
|
|
522
|
-
File.open(file_path, "rb") do |f|
|
|
523
|
-
body << "--#{boundary}\r\n"
|
|
524
|
-
body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{File.basename(file_path)}\"\r\n"
|
|
525
|
-
body << "Content-Type: #{mime}\r\n\r\n"
|
|
526
|
-
body << f.read
|
|
527
|
-
body << "\r\n"
|
|
528
|
-
end
|
|
529
|
-
body << "--#{boundary}--\r\n"
|
|
530
|
-
|
|
531
|
-
req = Net::HTTP::Post.new(uri.request_uri)
|
|
532
|
-
req["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
|
|
533
|
-
req["Authorization"] = "Bearer #{@api_key}"
|
|
534
|
-
req.body = body
|
|
535
|
-
resp = http.request(req)
|
|
536
|
-
if resp.is_a?(Net::HTTPSuccess)
|
|
537
|
-
resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
|
|
538
|
-
else
|
|
539
|
-
raise LLMAPIError, "Zhipu ASR API error: #{resp.code} - #{resp.body}"
|
|
540
|
-
end
|
|
541
|
-
end
|
|
542
|
-
|
|
543
|
-
# POST with stream:true and yield each parsed SSE `data:` payload to the block.
|
|
544
|
-
def stream_chat(url, body)
|
|
545
|
-
uri = URI.parse(url)
|
|
546
|
-
http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
|
|
547
|
-
http.open_timeout = 30; http.read_timeout = 300
|
|
548
|
-
|
|
549
|
-
req = Net::HTTP::Post.new(uri.request_uri)
|
|
550
|
-
req["Content-Type"] = "application/json"
|
|
551
|
-
req["Authorization"] = "Bearer #{@api_key}"
|
|
552
|
-
req["Accept"] = "text/event-stream"
|
|
553
|
-
req.body = body.to_json
|
|
554
|
-
|
|
555
|
-
buffer = ""
|
|
556
|
-
done = false
|
|
557
|
-
http.request(req) do |response|
|
|
558
|
-
unless response.is_a?(Net::HTTPSuccess)
|
|
559
|
-
raise LLMAPIError, "Zhipu stream error: #{response.code} - #{response.body}"
|
|
560
|
-
end
|
|
561
|
-
response.read_body do |segment|
|
|
562
|
-
break if done
|
|
563
|
-
buffer << segment
|
|
564
|
-
while (idx = buffer.index("\n"))
|
|
565
|
-
line = buffer.slice!(0, idx + 1).strip
|
|
566
|
-
next if line.empty? || !line.start_with?("data:")
|
|
567
|
-
payload = line.sub(/\Adata:\s*/, "")
|
|
568
|
-
if payload == "[DONE]"
|
|
569
|
-
done = true
|
|
570
|
-
break
|
|
571
|
-
end
|
|
572
|
-
begin
|
|
573
|
-
yield JSON.parse(payload)
|
|
574
|
-
rescue JSON::ParserError
|
|
575
|
-
next
|
|
576
|
-
end
|
|
577
|
-
end
|
|
578
|
-
end
|
|
579
|
-
end
|
|
580
|
-
end
|
|
581
|
-
|
|
582
|
-
def save_single_image(image_data, output_dir, filename)
|
|
583
|
-
if image_data[:b64_json]
|
|
584
|
-
file_path = File.join(output_dir, "#{filename}.png")
|
|
585
|
-
File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
|
|
586
|
-
elsif image_data[:url]
|
|
587
|
-
uri = URI.parse(image_data[:url])
|
|
588
|
-
response = Net::HTTP.get_response(uri)
|
|
589
|
-
raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
590
|
-
ext = case response["content-type"]
|
|
591
|
-
when "image/jpeg", "image/jpg" then "jpg"
|
|
592
|
-
when "image/png" then "png"
|
|
593
|
-
when "image/gif" then "gif"
|
|
594
|
-
when "image/webp" then "webp"
|
|
595
|
-
else "png"
|
|
596
|
-
end
|
|
597
|
-
file_path = File.join(output_dir, "#{filename}.#{ext}")
|
|
598
|
-
File.binwrite(file_path, response.body)
|
|
599
|
-
else
|
|
600
|
-
raise Error, "No image data available to save"
|
|
601
|
-
end
|
|
602
|
-
file_path
|
|
603
|
-
end
|
|
604
|
-
|
|
605
|
-
def stringify_hash(hash)
|
|
606
|
-
case hash
|
|
607
|
-
when Hash
|
|
608
|
-
hash.each_with_object({}) { |(k, v), memo| memo[k.to_s] = stringify_hash(v) }
|
|
609
|
-
when Array
|
|
610
|
-
hash.map { |v| stringify_hash(v) }
|
|
611
|
-
else
|
|
612
|
-
hash
|
|
613
|
-
end
|
|
88
|
+
ext = File.extname(file_path).downcase.delete(".")
|
|
89
|
+
super(url, form, "file", file_path, "audio/#{ext.empty? ? 'wav' : ext}")
|
|
614
90
|
end
|
|
615
91
|
end
|
|
616
92
|
end
|
data/lib/smart_prompt.rb
CHANGED
|
@@ -21,6 +21,7 @@ require File.expand_path('../smart_prompt/llamacpp_adapter', __FILE__)
|
|
|
21
21
|
require File.expand_path('../smart_prompt/anthropic_adapter', __FILE__)
|
|
22
22
|
require File.expand_path('../smart_prompt/sensenova_adapter', __FILE__)
|
|
23
23
|
require File.expand_path('../smart_prompt/zhipu_adapter', __FILE__)
|
|
24
|
+
require File.expand_path('../smart_prompt/siliconflow_adapter', __FILE__)
|
|
24
25
|
require File.expand_path('../smart_prompt/multimodal_adapter', __FILE__)
|
|
25
26
|
require File.expand_path('../smart_prompt/image_generation_adapter', __FILE__)
|
|
26
27
|
require File.expand_path('../smart_prompt/video_generation_adapter', __FILE__)
|