smart_prompt 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -10
  3. data/README.cn.md +307 -64
  4. data/README.md +311 -64
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/examples/anthropic_basic_chat.rb +143 -0
  12. data/examples/anthropic_example.rb +232 -0
  13. data/examples/anthropic_multimodal.rb +212 -0
  14. data/examples/anthropic_streaming.rb +312 -0
  15. data/examples/anthropic_tool_calling.rb +393 -0
  16. data/examples/automatic_cleanup_example.rb +109 -0
  17. data/examples/history_management_examples.rb +522 -0
  18. data/examples/image_generation_example.rb +130 -0
  19. data/examples/monitoring_example.rb +121 -0
  20. data/examples/multimodal_example.rb +63 -0
  21. data/examples/relevance_based_strategy_example.rb +87 -0
  22. data/examples/sensenova_example.rb +129 -0
  23. data/examples/stt_example.rb +287 -0
  24. data/examples/tts_example.rb +244 -0
  25. data/examples/video_generation_example.rb +189 -0
  26. data/examples/zhipu_example.rb +151 -0
  27. data/lib/smart_prompt/anthropic_adapter.rb +363 -281
  28. data/lib/smart_prompt/compression_engine.rb +201 -0
  29. data/lib/smart_prompt/context_strategy.rb +22 -0
  30. data/lib/smart_prompt/conversation.rb +81 -191
  31. data/lib/smart_prompt/engine.rb +36 -19
  32. data/lib/smart_prompt/history_manager.rb +596 -0
  33. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  34. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  35. data/lib/smart_prompt/lru_cache.rb +133 -0
  36. data/lib/smart_prompt/message.rb +57 -0
  37. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  38. data/lib/smart_prompt/openai_adapter.rb +1 -25
  39. data/lib/smart_prompt/persistence_layer.rb +197 -0
  40. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  41. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  42. data/lib/smart_prompt/session.rb +140 -0
  43. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  44. data/lib/smart_prompt/stt_adapter.rb +381 -0
  45. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  46. data/lib/smart_prompt/token_counter.rb +74 -0
  47. data/lib/smart_prompt/tts_adapter.rb +403 -0
  48. data/lib/smart_prompt/version.rb +1 -1
  49. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  50. data/lib/smart_prompt/worker.rb +25 -3
  51. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  52. data/lib/smart_prompt.rb +22 -2
  53. data/workers/history_management_examples.rb +407 -0
  54. data/workers/image_generation_workers.rb +119 -0
  55. data/workers/multimodal_workers.rb +110 -0
  56. data/workers/sensenova_workers.rb +62 -0
  57. data/workers/stt_workers.rb +195 -0
  58. data/workers/tts_workers.rb +388 -0
  59. data/workers/video_generation_workers.rb +264 -0
  60. data/workers/zhipu_workers.rb +113 -0
  61. metadata +84 -8
@@ -0,0 +1,616 @@
1
+ require "base64"
2
+ require "json"
3
+ require "net/http"
4
+ require "uri"
5
+ require "fileutils"
6
+
7
+ module SmartPrompt
8
+ # Adapter for 智谱 AI (BigModel / GLM) — covering all REST model categories behind one
9
+ # provider domain. One adapter owns the whole provider: every category shares the same
10
+ # base URL `https://open.bigmodel.cn/api/paas/v4` and Bearer-token auth, so a single config
11
+ # block serves them just by changing `model`.
12
+ #
13
+ # 1. 文本对话 (chat) — POST {base}/chat/completions (OpenAI-compatible; reasoning
14
+ # models return message.reasoning_content, the exact field the engine
15
+ # already reads — no remap needed)
16
+ # 2. 图文多模态 (vision) — same endpoint, OpenAI Vision content array
17
+ # 3. 向量 (embeddings) — POST {base}/embeddings (embedding-3, custom dimensions)
18
+ # 4. 文生图 (image) — POST {base}/images/generations (response is NESTED: data.images[].url)
19
+ # 5. 文生视频 (video) — POST {base}/videos/generations -> task_id; GET {base}/async-result?task_id=
20
+ # poll until SUCCESS -> video_result.url (async)
21
+ # 6. 语音合成 (TTS) — POST {base}/audio/speech (glm-tts)
22
+ # 7. 语音识别 (ASR) — POST {base}/audio/transcriptions (glm-asr-2512, multipart)
23
+ # 8. 重排 (rerank) — POST {base}/rerank
24
+ #
25
+ # We talk to the endpoints with Net::HTTP directly (like the SenseNova / image / tts / stt /
26
+ # video adapters) so we can control SSE streaming, the nested image shape, and the async
27
+ # video flow. No new gem deps.
28
+ class ZhipuAIAdapter < LLMAdapter
29
+ DEFAULT_BASE_URL = "https://open.bigmodel.cn/api/paas/v4".freeze
30
+ # CodeGeeX-4 / coding models use a separate base.
31
+ DEFAULT_CODING_BASE_URL = "https://open.bigmodel.cn/api/coding/paas/v4".freeze
32
+ SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
33
+
34
+ # Zhipu chat sampling parameters forwarded from config when present.
35
+ CHAT_OPTIONAL_KEYS = %w[
36
+ top_p max_tokens do_sample stop presence_penalty frequency_penalty thinking
37
+ ].freeze
38
+
39
+ def initialize(config)
40
+ super
41
+ SmartPrompt.logger.info "Start create the SmartPrompt ZhipuAIAdapter."
42
+
43
+ api_key = @config["api_key"]
44
+ if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
45
+ api_key = eval(api_key)
46
+ end
47
+ # Match the other adapters: tolerate a missing key at construction so examples/config
48
+ # can load without a live key; the first request fails with a clear auth error.
49
+ SmartPrompt.logger.warn "Zhipu api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
50
+
51
+ @api_key = api_key
52
+ @base_url = (@config["url"] || DEFAULT_BASE_URL).to_s.chomp("/")
53
+ @coding_base = (@config["coding_url"] || DEFAULT_CODING_BASE_URL).to_s.chomp("/")
54
+ # Optional per-method URL overrides (default to the standard paths off @base_url).
55
+ @image_url = (@config["image_url"] || "#{@base_url}/images/generations").to_s
56
+ @video_url = (@config["video_url"] || "#{@base_url}/videos/generations").to_s
57
+ @query_url = (@config["query_url"] || "#{@base_url}/async-result").to_s
58
+ SmartPrompt.logger.info "Zhipu base_url=#{@base_url}"
59
+ end
60
+
61
+ public
62
+
63
+ # ---- chat + vision -------------------------------------------------------
64
+
65
+ # Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so last_response
66
+ # carries usage + reasoning_content); streaming calls +proc+ with each OpenAI-shaped chunk.
67
+ def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
68
+ model_name = model || @config["model"]
69
+ body = build_chat_body(messages, model_name, temperature, tools)
70
+ SmartPrompt.logger.info "ZhipuAIAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
71
+
72
+ url = chat_url_for(model_name)
73
+ if proc
74
+ body["stream"] = true
75
+ stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
76
+ SmartPrompt.logger.info "ZhipuAIAdapter: streaming request finished"
77
+ nil
78
+ else
79
+ raw = http_post_json(url, body)
80
+ response = build_completion_response(raw)
81
+ @last_response = response
82
+ SmartPrompt.logger.info "ZhipuAIAdapter: received chat response"
83
+ response
84
+ end
85
+ rescue LLMAPIError, Error
86
+ raise
87
+ rescue => e
88
+ SmartPrompt.logger.error "Zhipu chat error: #{e.message}"
89
+ raise LLMAPIError, "Failed to call Zhipu chat: #{e.message}"
90
+ end
91
+
92
+ # ---- embeddings ----------------------------------------------------------
93
+
94
+ # embedding-3 (default 2048 dims); supports a custom `dimensions` (256/512/1024/2048)
95
+ # via config. Returns the first embedding vector.
96
+ def embeddings(text, model)
97
+ model_name = model || @config["embedding_model"] || @config["model"]
98
+ SmartPrompt.logger.info "ZhipuAIAdapter: embeddings model=#{model_name}"
99
+
100
+ body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
101
+ body["dimensions"] = @config["dimensions"] if @config["dimensions"]
102
+ body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
103
+
104
+ response =
105
+ begin
106
+ http_post_json("#{@base_url}/embeddings", body)
107
+ rescue LLMAPIError, Error
108
+ raise
109
+ rescue => e
110
+ raise LLMAPIError, "Failed to call Zhipu embeddings: #{e.message}"
111
+ end
112
+
113
+ items = response["data"]
114
+ unless items.is_a?(Array) && items.any? && items[0]["embedding"]
115
+ raise LLMAPIError, "No embedding vector in Zhipu response: #{response.inspect}"
116
+ end
117
+ items[0]["embedding"]
118
+ end
119
+
120
+ # ---- image (CogView / GLM-Image) -----------------------------------------
121
+
122
+ # Text-to-image. The Zhipu response is NESTED: data.images[].url (not OpenAI's data[]),
123
+ # so we parse defensively. Returns an Array of {url:, b64_json:}.
124
+ def generate_image(prompt, params = {})
125
+ SmartPrompt.logger.info "ZhipuAIAdapter: generating image"
126
+ raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
127
+
128
+ model_name = params[:model] || @config["image_model"] || @config["model"]
129
+ raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
130
+
131
+ body = { "model" => model_name, "prompt" => prompt.to_s }
132
+ body["size"] = params[:size] if params[:size]
133
+ body["user"] = params[:user] if params[:user]
134
+ body["response_format"] = params[:response_format] if params[:response_format]
135
+
136
+ SmartPrompt.logger.info "Zhipu image params: #{body.except('prompt').inspect}"
137
+ response =
138
+ begin
139
+ http_post_json(@image_url, body)
140
+ rescue LLMAPIError, Error
141
+ raise
142
+ rescue => e
143
+ raise Error, "Failed to call Zhipu image generation: #{e.message}"
144
+ end
145
+
146
+ images = parse_image_response(response)
147
+ SmartPrompt.logger.info "ZhipuAIAdapter: generated #{images.size} image(s)"
148
+ images
149
+ end
150
+
151
+ # Save one or many generated images to disk (Array from #generate_image or a single hash).
152
+ def save_image(image_data, output_dir = "./output", filename_prefix = "zhipu_image")
153
+ FileUtils.mkdir_p(output_dir)
154
+ images = image_data.is_a?(Array) ? image_data : [image_data]
155
+ saved = images.each_with_index.map do |img, index|
156
+ save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
157
+ end
158
+ SmartPrompt.logger.info "Saved #{saved.size} Zhipu image(s) to #{output_dir}"
159
+ saved
160
+ end
161
+
162
+ # ---- video (CogVideoX, async) --------------------------------------------
163
+
164
+ # Submit a text-to-video (or image-to-video) job. Returns the task id.
165
+ def generate_video(prompt, params = {})
166
+ SmartPrompt.logger.info "ZhipuAIAdapter: submitting video job"
167
+ model_name = params[:model] || @config["video_model"] || @config["model"]
168
+ raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
169
+
170
+ body = { "model" => model_name, "prompt" => prompt.to_s }
171
+ %i[quality fps duration with_audio resolution request_id seed].each do |k|
172
+ body[k.to_s] = params[k] unless params[k].nil?
173
+ end
174
+ body["image_url"] = normalize_image_url(params[:image_url]) if params[:image_url]
175
+
176
+ SmartPrompt.logger.info "Zhipu video params: #{body.except('prompt').inspect}"
177
+ response =
178
+ begin
179
+ http_post_json(@video_url, body)
180
+ rescue LLMAPIError, Error
181
+ raise
182
+ rescue => e
183
+ raise Error, "Failed to submit Zhipu video job: #{e.message}"
184
+ end
185
+
186
+ task_id = response["id"] || response["task_id"]
187
+ raise LLMAPIError, "No task id in Zhipu video response: #{response.inspect}" unless task_id
188
+ SmartPrompt.logger.info "ZhipuAIAdapter: video task #{task_id} submitted"
189
+ { task_id: task_id, model: model_name, raw: response }
190
+ end
191
+
192
+ # Poll an async task. Returns the raw status hash (task_status etc.).
193
+ def check_video_status(task_id)
194
+ SmartPrompt.logger.info "ZhipuAIAdapter: polling video task #{task_id}"
195
+ http_get_json("#{@query_url}/#{URI.encode_www_form_component(task_id)}")
196
+ rescue LLMAPIError, Error
197
+ raise
198
+ rescue => e
199
+ raise LLMAPIError, "Failed to query Zhipu video task: #{e.message}"
200
+ end
201
+
202
+ # Block until the task finishes (or times out), then return the video URL.
203
+ def wait_for_video_completion(task_id, check_interval: 10, timeout: 600)
204
+ start = Time.now
205
+ loop do
206
+ status = check_video_status(task_id)
207
+ case task_status_of(status)
208
+ when "SUCCESS"
209
+ url = video_url_of(status)
210
+ raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
211
+ SmartPrompt.logger.info "ZhipuAIAdapter: video ready #{url}"
212
+ return { task_id: task_id, status: "SUCCESS", video_url: url, cover_image_url: cover_url_of(status), raw: status }
213
+ when "FAIL", "FAILED"
214
+ raise LLMAPIError, "Zhipu video generation failed: #{status.inspect}"
215
+ else
216
+ if Time.now - start > timeout
217
+ raise LLMAPIError, "Zhipu video generation timeout after #{timeout}s"
218
+ end
219
+ SmartPrompt.logger.info "Zhipu video task #{task_id} still processing..."
220
+ sleep(check_interval)
221
+ end
222
+ end
223
+ end
224
+
225
+ def download_video(video_url, output_path)
226
+ uri = URI.parse(video_url)
227
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
228
+ response = http.request(Net::HTTP::Get.new(uri.request_uri))
229
+ raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
230
+ FileUtils.mkdir_p(File.dirname(output_path))
231
+ File.binwrite(output_path, response.body)
232
+ SmartPrompt.logger.info "Zhipu video saved to #{output_path}"
233
+ output_path
234
+ rescue => e
235
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading Zhipu video: #{e.message}"
236
+ end
237
+
238
+ # ---- TTS (GLM-TTS) -------------------------------------------------------
239
+
240
+ # Returns a base64 data URL for the synthesized audio. GLM-TTS accepts wav/pcm only
241
+ # (mp3/flac are rejected), so default to wav.
242
+ def synthesize_speech(text, voice: nil, model: nil, response_format: "wav", **opts)
243
+ SmartPrompt.logger.info "ZhipuAIAdapter: TTS"
244
+ raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
245
+
246
+ model_name = model || @config["tts_model"] || "glm-tts"
247
+ body = { "model" => model_name, "input" => text.to_s }
248
+ body["voice"] = voice if voice
249
+ body["response_format"] = response_format
250
+ body["speed"] = opts[:speed] if opts[:speed]
251
+ body["emotion"] = opts[:emotion] if opts[:emotion]
252
+
253
+ audio = http_post_binary("#{@base_url}/audio/speech", body)
254
+ "data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
255
+ rescue LLMAPIError, Error
256
+ raise
257
+ rescue => e
258
+ raise Error, "Failed to call Zhipu TTS: #{e.message}"
259
+ end
260
+
261
+ def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "wav", **opts)
262
+ data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
263
+ FileUtils.mkdir_p(File.dirname(output_path))
264
+ audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
265
+ File.binwrite(output_path, audio_bytes)
266
+ SmartPrompt.logger.info "Zhipu audio saved to #{output_path}"
267
+ { file_path: output_path, format: response_format }
268
+ end
269
+
270
+ # ---- ASR (GLM-ASR-2512) --------------------------------------------------
271
+
272
+ # Transcribe an audio file (local path). Returns {text:}.
273
+ def transcribe_audio(audio_file, model: nil, language: nil, **opts)
274
+ SmartPrompt.logger.info "ZhipuAIAdapter: ASR #{File.basename(audio_file)}"
275
+ raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
276
+
277
+ model_name = model || @config["asr_model"] || "glm-asr-2512"
278
+ form = { "model" => model_name }
279
+ form["language"] = language if language
280
+ form["prompt"] = opts[:prompt] if opts[:prompt]
281
+ form["response_format"] = opts[:response_format] if opts[:response_format]
282
+
283
+ response = http_post_multipart("#{@base_url}/audio/transcriptions", form, audio_file)
284
+ { text: response["text"] }
285
+ rescue LLMAPIError, Error
286
+ raise
287
+ rescue => e
288
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call Zhipu ASR: #{e.message}"
289
+ end
290
+
291
+ # ---- rerank (bonus) ------------------------------------------------------
292
+
293
+ def rerank(query, documents, model: nil)
294
+ model_name = model || @config["rerank_model"] || @config["model"]
295
+ body = { "model" => model_name, "query" => query, "documents" => documents }
296
+ response = http_post_json("#{@base_url}/rerank", body)
297
+ (response["results"] || []).map { |r| { index: r["index"], relevance_score: r["relevance_score"] || r["score"] } }
298
+ rescue LLMAPIError, Error
299
+ raise
300
+ rescue => e
301
+ raise LLMAPIError, "Failed to call Zhipu rerank: #{e.message}"
302
+ end
303
+
304
+ private
305
+
306
+ # ---- chat building -------------------------------------------------------
307
+
308
+ def chat_url_for(model_name)
309
+ # CodeGeeX-4 and coding models are served from the coding base.
310
+ (model_name.to_s.include?("codegeex") || @config["coding"]) ? "#{@coding_base}/chat/completions" : "#{@base_url}/chat/completions"
311
+ end
312
+
313
+ def build_chat_body(messages, model_name, temperature, tools)
314
+ body = {
315
+ "model" => model_name,
316
+ "messages" => process_multimodal_messages(messages),
317
+ "temperature" => @config["temperature"] || temperature || 0.7,
318
+ }
319
+ CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
320
+ body["tools"] = tools if tools && !tools.empty?
321
+ body
322
+ end
323
+
324
+ # Pass messages through, normalizing multimodal content (local image paths -> data URLs).
325
+ def process_multimodal_messages(messages)
326
+ messages.map do |msg|
327
+ role = msg[:role] || msg["role"]
328
+ content = msg[:content] || msg["content"]
329
+ content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
330
+ { "role" => role, "content" => content }
331
+ end
332
+ end
333
+
334
+ def normalize_content_item(item)
335
+ return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
336
+
337
+ type = item[:type] || item["type"]
338
+ if type == "image_url"
339
+ iu = item[:image_url] || item["image_url"]
340
+ url = iu.is_a?(Hash) ? (iu[:url] || iu["url"]) : iu
341
+ { "type" => "image_url", "image_url" => { "url" => normalize_image_url(url) } }
342
+ else
343
+ stringify_hash(item)
344
+ end
345
+ end
346
+
347
+ def normalize_image_url(url)
348
+ return url if url.nil?
349
+ return url if url.start_with?("http://", "https://", "data:")
350
+
351
+ raise Error, "Image file not found: #{url}" unless File.exist?(url)
352
+ ext = File.extname(url).downcase.delete(".")
353
+ raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
354
+ mime = ext == "jpg" ? "jpeg" : ext
355
+ "data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
356
+ end
357
+
358
+ # ---- response shaping ----------------------------------------------------
359
+
360
+ # Zhipu's non-streaming chat response is already OpenAI-shaped; normalize to a consistent
361
+ # hash and preserve reasoning_content where present.
362
+ def build_completion_response(raw)
363
+ msg = raw.dig("choices", 0, "message") || {}
364
+ message = { "role" => msg["role"] || "assistant" }
365
+ message["content"] = msg["content"]
366
+ message["reasoning_content"] = msg["reasoning_content"] if msg["reasoning_content"]
367
+ message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
368
+
369
+ response = {
370
+ "id" => raw["id"],
371
+ "object" => raw["object"] || "chat.completion",
372
+ "created" => raw["created"],
373
+ "model" => raw["model"],
374
+ "choices" => [{
375
+ "index" => 0,
376
+ "message" => message,
377
+ "finish_reason" => raw.dig("choices", 0, "finish_reason"),
378
+ }],
379
+ }
380
+ response["usage"] = raw["usage"] if raw["usage"]
381
+ response
382
+ end
383
+
384
+ # Build an OpenAI-style streaming chunk from a Zhipu SSE event. reasoning_content and
385
+ # content pass through unchanged (Zhipu already uses these names).
386
+ def build_stream_chunk(data)
387
+ chunk = {
388
+ "id" => data["id"],
389
+ "object" => data["object"],
390
+ "created" => data["created"],
391
+ "model" => data["model"],
392
+ }
393
+ chunk["usage"] = data["usage"] if data["usage"]
394
+
395
+ choices = data["choices"] || []
396
+ if choices.any?
397
+ delta = choices[0]["delta"] || {}
398
+ new_delta = {}
399
+ new_delta["role"] = delta["role"] if delta["role"]
400
+ new_delta["content"] = delta["content"] if delta["content"]
401
+ new_delta["reasoning_content"] = delta["reasoning_content"] if delta["reasoning_content"]
402
+ new_delta["tool_calls"] = delta["tool_calls"] if delta["tool_calls"]
403
+ chunk["choices"] = [{
404
+ "index" => choices[0]["index"] || 0,
405
+ "delta" => new_delta,
406
+ "finish_reason" => choices[0]["finish_reason"],
407
+ }]
408
+ else
409
+ chunk["choices"] = []
410
+ end
411
+ chunk
412
+ end
413
+
414
+ # Zhipu image response: cogview-3-flash returns the FLAT OpenAI shape data[].url;
415
+ # older docs mention a NESTED data.images[].url. Handle both plus a bare-url array.
416
+ def parse_image_response(response)
417
+ container = response["data"]
418
+ items =
419
+ if container.is_a?(Hash)
420
+ container["images"] || container["data"] || container["url"]
421
+ elsif container.is_a?(Array)
422
+ container
423
+ end
424
+ items ||= response["images"] || response["urls"]
425
+
426
+ # Some responses return images as a bare array of URLs (strings).
427
+ items = items.map { |x| x.is_a?(String) ? { "url" => x } : x } if items.is_a?(Array)
428
+
429
+ unless items.is_a?(Array) && items.any?
430
+ SmartPrompt.logger.error "Zhipu image response had no images: #{response.inspect}"
431
+ raise LLMAPIError, "No image data in Zhipu response"
432
+ end
433
+ items.map { |d| { url: d["url"], b64_json: d["b64_json"] } }
434
+ end
435
+
436
+ # Zhipu async task status is under task_status; accept a few aliases.
437
+ def task_status_of(status)
438
+ status["task_status"] || status["status"] || "PROCESSING"
439
+ end
440
+
441
+ # video_result is an Array: [{cover_image_url:, url:}]. Pull the first video url.
442
+ def video_url_of(status)
443
+ vr = status["video_result"]
444
+ item = vr.is_a?(Array) ? vr[0] : vr
445
+ return item["url"] || item["video_url"] if item.is_a?(Hash)
446
+ status["video_url"] || status.dig("data", "video_url")
447
+ end
448
+
449
+ def cover_url_of(status)
450
+ vr = status["video_result"]
451
+ item = vr.is_a?(Array) ? vr[0] : vr
452
+ item.is_a?(Hash) ? (item["cover_image_url"] || item["cover_url"]) : nil
453
+ end
454
+
455
+ # ---- HTTP ----------------------------------------------------------------
456
+
457
+ def http_post_json(url, body)
458
+ uri = URI.parse(url)
459
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
460
+ http.open_timeout = 30; http.read_timeout = 240
461
+ req = Net::HTTP::Post.new(uri.request_uri)
462
+ req["Content-Type"] = "application/json"
463
+ req["Authorization"] = "Bearer #{@api_key}"
464
+ req.body = body.to_json
465
+ SmartPrompt.logger.debug "Zhipu POST #{uri} body=#{body.to_json}"
466
+ resp = http.request(req)
467
+ if resp.is_a?(Net::HTTPSuccess)
468
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
469
+ else
470
+ SmartPrompt.logger.error "Zhipu API error: #{resp.code} - #{resp.body}"
471
+ raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
472
+ end
473
+ end
474
+
475
+ def http_get_json(url)
476
+ uri = URI.parse(url)
477
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
478
+ http.open_timeout = 30; http.read_timeout = 60
479
+ req = Net::HTTP::Get.new(uri.request_uri)
480
+ req["Authorization"] = "Bearer #{@api_key}"
481
+ SmartPrompt.logger.debug "Zhipu GET #{uri}"
482
+ resp = http.request(req)
483
+ if resp.is_a?(Net::HTTPSuccess)
484
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
485
+ else
486
+ raise LLMAPIError, "Zhipu API error: #{resp.code} - #{resp.body}"
487
+ end
488
+ end
489
+
490
+ # Returns the raw response body bytes (for TTS audio).
491
+ def http_post_binary(url, body)
492
+ uri = URI.parse(url)
493
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
494
+ http.open_timeout = 30; http.read_timeout = 120
495
+ req = Net::HTTP::Post.new(uri.request_uri)
496
+ req["Content-Type"] = "application/json"
497
+ req["Authorization"] = "Bearer #{@api_key}"
498
+ req.body = body.to_json
499
+ resp = http.request(req)
500
+ if resp.is_a?(Net::HTTPSuccess)
501
+ resp.body
502
+ else
503
+ raise LLMAPIError, "Zhipu TTS API error: #{resp.code} - #{resp.body}"
504
+ end
505
+ end
506
+
507
+ # multipart/form-data POST with a file upload (for ASR). Returns parsed JSON.
508
+ def http_post_multipart(url, form, file_path)
509
+ uri = URI.parse(url)
510
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
511
+ http.open_timeout = 30; http.read_timeout = 180
512
+
513
+ boundary = "----SmartPrompt#{object_id}"
514
+ mime = "audio/#{(File.extname(file_path).downcase.delete(".") || "wav")}"
515
+
516
+ body = ""
517
+ form.each do |k, v|
518
+ body << "--#{boundary}\r\n"
519
+ body << "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n"
520
+ body << "#{v}\r\n"
521
+ end
522
+ File.open(file_path, "rb") do |f|
523
+ body << "--#{boundary}\r\n"
524
+ body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{File.basename(file_path)}\"\r\n"
525
+ body << "Content-Type: #{mime}\r\n\r\n"
526
+ body << f.read
527
+ body << "\r\n"
528
+ end
529
+ body << "--#{boundary}--\r\n"
530
+
531
+ req = Net::HTTP::Post.new(uri.request_uri)
532
+ req["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
533
+ req["Authorization"] = "Bearer #{@api_key}"
534
+ req.body = body
535
+ resp = http.request(req)
536
+ if resp.is_a?(Net::HTTPSuccess)
537
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
538
+ else
539
+ raise LLMAPIError, "Zhipu ASR API error: #{resp.code} - #{resp.body}"
540
+ end
541
+ end
542
+
543
+ # POST with stream:true and yield each parsed SSE `data:` payload to the block.
544
+ def stream_chat(url, body)
545
+ uri = URI.parse(url)
546
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
547
+ http.open_timeout = 30; http.read_timeout = 300
548
+
549
+ req = Net::HTTP::Post.new(uri.request_uri)
550
+ req["Content-Type"] = "application/json"
551
+ req["Authorization"] = "Bearer #{@api_key}"
552
+ req["Accept"] = "text/event-stream"
553
+ req.body = body.to_json
554
+
555
+ buffer = ""
556
+ done = false
557
+ http.request(req) do |response|
558
+ unless response.is_a?(Net::HTTPSuccess)
559
+ raise LLMAPIError, "Zhipu stream error: #{response.code} - #{response.body}"
560
+ end
561
+ response.read_body do |segment|
562
+ break if done
563
+ buffer << segment
564
+ while (idx = buffer.index("\n"))
565
+ line = buffer.slice!(0, idx + 1).strip
566
+ next if line.empty? || !line.start_with?("data:")
567
+ payload = line.sub(/\Adata:\s*/, "")
568
+ if payload == "[DONE]"
569
+ done = true
570
+ break
571
+ end
572
+ begin
573
+ yield JSON.parse(payload)
574
+ rescue JSON::ParserError
575
+ next
576
+ end
577
+ end
578
+ end
579
+ end
580
+ end
581
+
582
+ def save_single_image(image_data, output_dir, filename)
583
+ if image_data[:b64_json]
584
+ file_path = File.join(output_dir, "#{filename}.png")
585
+ File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
586
+ elsif image_data[:url]
587
+ uri = URI.parse(image_data[:url])
588
+ response = Net::HTTP.get_response(uri)
589
+ raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
590
+ ext = case response["content-type"]
591
+ when "image/jpeg", "image/jpg" then "jpg"
592
+ when "image/png" then "png"
593
+ when "image/gif" then "gif"
594
+ when "image/webp" then "webp"
595
+ else "png"
596
+ end
597
+ file_path = File.join(output_dir, "#{filename}.#{ext}")
598
+ File.binwrite(file_path, response.body)
599
+ else
600
+ raise Error, "No image data available to save"
601
+ end
602
+ file_path
603
+ end
604
+
605
+ def stringify_hash(hash)
606
+ case hash
607
+ when Hash
608
+ hash.each_with_object({}) { |(k, v), memo| memo[k.to_s] = stringify_hash(v) }
609
+ when Array
610
+ hash.map { |v| stringify_hash(v) }
611
+ else
612
+ hash
613
+ end
614
+ end
615
+ end
616
+ end
data/lib/smart_prompt.rb CHANGED
@@ -1,11 +1,30 @@
1
1
  require File.expand_path('../smart_prompt/version', __FILE__)
2
+ require File.expand_path('../smart_prompt/token_counter', __FILE__)
3
+ require File.expand_path('../smart_prompt/message', __FILE__)
4
+ require File.expand_path('../smart_prompt/session', __FILE__)
5
+ require File.expand_path('../smart_prompt/context_strategy', __FILE__)
6
+ require File.expand_path('../smart_prompt/sliding_window_strategy', __FILE__)
7
+ require File.expand_path('../smart_prompt/relevance_based_strategy', __FILE__)
8
+ require File.expand_path('../smart_prompt/compression_engine', __FILE__)
9
+ require File.expand_path('../smart_prompt/summary_based_strategy', __FILE__)
10
+ require File.expand_path('../smart_prompt/hybrid_strategy', __FILE__)
11
+ require File.expand_path('../smart_prompt/persistence_layer', __FILE__)
12
+ require File.expand_path('../smart_prompt/lru_cache', __FILE__)
13
+ require File.expand_path('../smart_prompt/history_manager', __FILE__)
2
14
  require File.expand_path('../smart_prompt/engine', __FILE__)
3
15
  require File.expand_path('../smart_prompt/api_handler', __FILE__)
4
16
  require File.expand_path('../smart_prompt/conversation', __FILE__)
5
17
  require File.expand_path('../smart_prompt/llm_adapter', __FILE__)
6
18
  require File.expand_path('../smart_prompt/openai_adapter', __FILE__)
7
- require File.expand_path('../smart_prompt/anthropic_adapter', __FILE__)
8
19
  require File.expand_path('../smart_prompt/llamacpp_adapter', __FILE__)
20
+ require File.expand_path('../smart_prompt/anthropic_adapter', __FILE__)
21
+ require File.expand_path('../smart_prompt/sensenova_adapter', __FILE__)
22
+ require File.expand_path('../smart_prompt/zhipu_adapter', __FILE__)
23
+ require File.expand_path('../smart_prompt/multimodal_adapter', __FILE__)
24
+ require File.expand_path('../smart_prompt/image_generation_adapter', __FILE__)
25
+ require File.expand_path('../smart_prompt/video_generation_adapter', __FILE__)
26
+ require File.expand_path('../smart_prompt/tts_adapter', __FILE__)
27
+ require File.expand_path('../smart_prompt/stt_adapter', __FILE__)
9
28
  require File.expand_path('../smart_prompt/prompt_template', __FILE__)
10
29
  require File.expand_path('../smart_prompt/worker', __FILE__)
11
30
 
@@ -14,6 +33,7 @@ module SmartPrompt
14
33
  class ConfigurationError < Error; end
15
34
  class LLMAPIError < Error; end
16
35
  class CallWorkerError < Error; end
36
+ class HistoryManagerError < Error; end
17
37
 
18
38
  attr_writer :logger
19
39
 
@@ -35,4 +55,4 @@ module SmartPrompt
35
55
  log.progname = self.name
36
56
  end
37
57
  end
38
- end
58
+ end