smart_prompt 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Speech synthesis (CosyVoice2 / MOSS-TTSD), speech recognition (SenseVoiceSmall),
4
+ # and custom-voice cloning management.
5
+ module Voice
6
+ # Returns a base64 data URL for the synthesized audio. SiliconFlow's
7
+ # /audio/speech returns the raw binary audio stream (NOT base64 / NOT JSON),
8
+ # so we base64-encode it ourselves.
9
+ def synthesize_speech(text, voice: nil, model: nil, response_format: "mp3", **opts)
10
+ SmartPrompt.logger.info "SiliconFlowAdapter: TTS"
11
+ raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
12
+
13
+ model_name = model || @config["tts_model"] || "FunAudioLLM/CosyVoice2-0.5B"
14
+ body = { "model" => model_name, "input" => text.to_s }
15
+ body["voice"] = voice if voice
16
+ body["response_format"] = response_format
17
+ body["speed"] = opts[:speed] if opts[:speed]
18
+ body["sample_rate"] = opts[:sample_rate] if opts[:sample_rate]
19
+ body["gain"] = opts[:gain] if opts[:gain]
20
+ body["language"] = opts[:language] if opts[:language]
21
+
22
+ audio = http_post_binary(@speech_url, body)
23
+ "data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
24
+ rescue LLMAPIError, Error
25
+ raise
26
+ rescue => e
27
+ raise Error, "Failed to call SiliconFlow TTS: #{e.message}"
28
+ end
29
+
30
+ def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "mp3", **opts)
31
+ data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
32
+ FileUtils.mkdir_p(File.dirname(output_path))
33
+ audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
34
+ File.binwrite(output_path, audio_bytes)
35
+ SmartPrompt.logger.info "SiliconFlow audio saved to #{output_path}"
36
+ { file_path: output_path, format: response_format }
37
+ end
38
+
39
+ # Transcribe an audio file (local path). Returns {text:}. The transcription
40
+ # endpoint takes multipart/form-data with a `file` field.
41
+ def transcribe_audio(audio_file, model: nil, language: nil, **opts)
42
+ SmartPrompt.logger.info "SiliconFlowAdapter: ASR #{File.basename(audio_file)}"
43
+ raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
44
+
45
+ model_name = model || @config["asr_model"] || "FunAudioLLM/SenseVoiceSmall"
46
+ form = { "model" => model_name }
47
+ form["language"] = language if language
48
+ form["prompt"] = opts[:prompt] if opts[:prompt]
49
+ form["response_format"] = opts[:response_format] if opts[:response_format]
50
+
51
+ mime = "audio/#{File.extname(audio_file).downcase.delete(".") || "wav"}"
52
+ response = http_post_multipart(@transcription_url, form, "file", audio_file, mime)
53
+ { text: response["text"] }
54
+ rescue LLMAPIError, Error
55
+ raise
56
+ rescue => e
57
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call SiliconFlow ASR: #{e.message}"
58
+ end
59
+
60
+ # Upload a reference audio to clone a custom voice. SiliconFlow returns
61
+ # {"uri": "speech:..."}. `customName` (camelCase) is the display name.
62
+ def upload_voice(name, audio_file, text: nil, model: nil)
63
+ SmartPrompt.logger.info "SiliconFlowAdapter: upload voice #{name}"
64
+ raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
65
+
66
+ model_name = model || @config["tts_model"] || "FunAudioLLM/CosyVoice2-0.5B"
67
+ form = { "model" => model_name, "customName" => name.to_s }
68
+ form["text"] = text.to_s if text
69
+ mime = "audio/#{File.extname(audio_file).downcase.delete(".") || "wav"}"
70
+ response = http_post_multipart(@voice_upload_url, form, "file", audio_file, mime)
71
+ raise LLMAPIError, "No uri in SiliconFlow voice upload response: #{response.inspect}" unless response["uri"]
72
+ { uri: response["uri"], name: name.to_s, raw: response }
73
+ rescue LLMAPIError, Error
74
+ raise
75
+ rescue => e
76
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to upload SiliconFlow voice: #{e.message}"
77
+ end
78
+
79
+ def list_voices
80
+ SmartPrompt.logger.info "SiliconFlowAdapter: list voices"
81
+ response = http_get_json(@voice_list_url)
82
+ (response["result"] || response["voices"] || response).yield_self do |items|
83
+ items.is_a?(Array) ? items.map { |v| { uri: v["uri"], name: v["customName"] || v["name"] } } : response
84
+ end
85
+ rescue LLMAPIError, Error
86
+ raise
87
+ rescue => e
88
+ raise LLMAPIError, "Failed to list SiliconFlow voices: #{e.message}"
89
+ end
90
+
91
+ def delete_voice(uri)
92
+ SmartPrompt.logger.info "SiliconFlowAdapter: delete voice #{uri}"
93
+ response = http_post_json(@voice_delete_url, { "uri" => uri })
94
+ { deleted: response["deleted"].nil? ? true : response["deleted"], uri: uri, raw: response }
95
+ rescue LLMAPIError, Error
96
+ raise
97
+ rescue => e
98
+ raise LLMAPIError, "Failed to delete SiliconFlow voice: #{e.message}"
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,32 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Embeddings (embedding-3, custom dimensions).
4
+ module Embed
5
+ # embedding-3 (default 2048 dims); supports a custom `dimensions` (256/512/1024/2048)
6
+ # via config. Returns the first embedding vector.
7
+ def embeddings(text, model)
8
+ model_name = model || @config["embedding_model"] || @config["model"]
9
+ SmartPrompt.logger.info "ZhipuAIAdapter: embeddings model=#{model_name}"
10
+
11
+ body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
12
+ body["dimensions"] = @config["dimensions"] if @config["dimensions"]
13
+ body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
14
+
15
+ response =
16
+ begin
17
+ http_post_json("#{@base_url}/embeddings", body)
18
+ rescue LLMAPIError, Error
19
+ raise
20
+ rescue => e
21
+ raise LLMAPIError, "Failed to call Zhipu embeddings: #{e.message}"
22
+ end
23
+
24
+ items = response["data"]
25
+ unless items.is_a?(Array) && items.any? && items[0]["embedding"]
26
+ raise LLMAPIError, "No embedding vector in Zhipu response: #{response.inspect}"
27
+ end
28
+ items[0]["embedding"]
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,59 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Text-to-image (CogView / GLM-Image). save_image comes from the ImagePersistence concern.
4
+ module Image
5
+ # Text-to-image. The Zhipu response is NESTED: data.images[].url (not OpenAI's data[]),
6
+ # so we parse defensively. Returns an Array of {url:, b64_json:}.
7
+ def generate_image(prompt, params = {})
8
+ SmartPrompt.logger.info "ZhipuAIAdapter: generating image"
9
+ raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
10
+
11
+ model_name = params[:model] || @config["image_model"] || @config["model"]
12
+ raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
13
+
14
+ body = { "model" => model_name, "prompt" => prompt.to_s }
15
+ body["size"] = params[:size] if params[:size]
16
+ body["user"] = params[:user] if params[:user]
17
+ body["response_format"] = params[:response_format] if params[:response_format]
18
+
19
+ SmartPrompt.logger.info "Zhipu image params: #{body.except('prompt').inspect}"
20
+ response =
21
+ begin
22
+ http_post_json(@image_url, body)
23
+ rescue LLMAPIError, Error
24
+ raise
25
+ rescue => e
26
+ raise Error, "Failed to call Zhipu image generation: #{e.message}"
27
+ end
28
+
29
+ images = parse_image_response(response)
30
+ SmartPrompt.logger.info "ZhipuAIAdapter: generated #{images.size} image(s)"
31
+ images
32
+ end
33
+
34
+ private
35
+
36
+ # Zhipu image response: cogview-3-flash returns the FLAT OpenAI shape data[].url;
37
+ # older docs mention a NESTED data.images[].url. Handle both plus a bare-url array.
38
+ def parse_image_response(response)
39
+ container = response["data"]
40
+ items =
41
+ if container.is_a?(Hash)
42
+ container["images"] || container["data"] || container["url"]
43
+ elsif container.is_a?(Array)
44
+ container
45
+ end
46
+ items ||= response["images"] || response["urls"]
47
+
48
+ # Some responses return images as a bare array of URLs (strings).
49
+ items = items.map { |x| x.is_a?(String) ? { "url" => x } : x } if items.is_a?(Array)
50
+
51
+ unless items.is_a?(Array) && items.any?
52
+ SmartPrompt.logger.error "Zhipu image response had no images: #{response.inspect}"
53
+ raise LLMAPIError, "No image data in Zhipu response"
54
+ end
55
+ items.map { |d| { url: d["url"], b64_json: d["b64_json"] } }
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,17 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Rerank (reorder documents by relevance to a query).
4
+ module Rerank
5
+ def rerank(query, documents, model: nil)
6
+ model_name = model || @config["rerank_model"] || @config["model"]
7
+ body = { "model" => model_name, "query" => query, "documents" => documents }
8
+ response = http_post_json("#{@base_url}/rerank", body)
9
+ (response["results"] || []).map { |r| { index: r["index"], relevance_score: r["relevance_score"] || r["score"] } }
10
+ rescue LLMAPIError, Error
11
+ raise
12
+ rescue => e
13
+ raise LLMAPIError, "Failed to call Zhipu rerank: #{e.message}"
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,57 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Text chat + vision (OpenAI-compatible /chat/completions, SSE streaming,
4
+ # reasoning_content passthrough). CodeGeeX/coding models use a separate base.
5
+ module Text
6
+ CHAT_OPTIONAL_KEYS = %w[
7
+ top_p max_tokens do_sample stop presence_penalty frequency_penalty thinking
8
+ ].freeze
9
+
10
+ # Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so
11
+ # last_response carries usage + reasoning_content); streaming calls +proc+
12
+ # with each OpenAI-shaped chunk.
13
+ def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
14
+ model_name = model || @config["model"]
15
+ body = build_chat_body(messages, model_name, temperature, tools)
16
+ SmartPrompt.logger.info "ZhipuAIAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
17
+
18
+ url = chat_url_for(model_name)
19
+ if proc
20
+ body["stream"] = true
21
+ stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
22
+ SmartPrompt.logger.info "ZhipuAIAdapter: streaming request finished"
23
+ nil
24
+ else
25
+ raw = http_post_json(url, body)
26
+ response = build_completion_response(raw)
27
+ @last_response = response
28
+ SmartPrompt.logger.info "ZhipuAIAdapter: received chat response"
29
+ response
30
+ end
31
+ rescue LLMAPIError, Error
32
+ raise
33
+ rescue => e
34
+ SmartPrompt.logger.error "Zhipu chat error: #{e.message}"
35
+ raise LLMAPIError, "Failed to call Zhipu chat: #{e.message}"
36
+ end
37
+
38
+ private
39
+
40
+ def chat_url_for(model_name)
41
+ # CodeGeeX-4 and coding models are served from the coding base.
42
+ (model_name.to_s.include?("codegeex") || @config["coding"]) ? "#{@coding_base}/chat/completions" : "#{@base_url}/chat/completions"
43
+ end
44
+
45
+ def build_chat_body(messages, model_name, temperature, tools)
46
+ body = {
47
+ "model" => model_name,
48
+ "messages" => process_multimodal_messages(messages),
49
+ "temperature" => @config["temperature"] || temperature || 0.7,
50
+ }
51
+ CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
52
+ body["tools"] = tools if tools && !tools.empty?
53
+ body
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,101 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Text-to-video / image-to-video (CogVideoX, async submit -> poll -> download).
4
+ module Video
5
+ # Submit a text-to-video (or image-to-video) job. Returns the task id.
6
+ def generate_video(prompt, params = {})
7
+ SmartPrompt.logger.info "ZhipuAIAdapter: submitting video job"
8
+ model_name = params[:model] || @config["video_model"] || @config["model"]
9
+ raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
10
+
11
+ body = { "model" => model_name, "prompt" => prompt.to_s }
12
+ %i[quality fps duration with_audio resolution request_id seed].each do |k|
13
+ body[k.to_s] = params[k] unless params[k].nil?
14
+ end
15
+ body["image_url"] = normalize_image_url(params[:image_url]) if params[:image_url]
16
+
17
+ SmartPrompt.logger.info "Zhipu video params: #{body.except('prompt').inspect}"
18
+ response =
19
+ begin
20
+ http_post_json(@video_url, body)
21
+ rescue LLMAPIError, Error
22
+ raise
23
+ rescue => e
24
+ raise Error, "Failed to submit Zhipu video job: #{e.message}"
25
+ end
26
+
27
+ task_id = response["id"] || response["task_id"]
28
+ raise LLMAPIError, "No task id in Zhipu video response: #{response.inspect}" unless task_id
29
+ SmartPrompt.logger.info "ZhipuAIAdapter: video task #{task_id} submitted"
30
+ { task_id: task_id, model: model_name, raw: response }
31
+ end
32
+
33
+ # Poll an async task. Returns the raw status hash (task_status etc.).
34
+ def check_video_status(task_id)
35
+ SmartPrompt.logger.info "ZhipuAIAdapter: polling video task #{task_id}"
36
+ http_get_json("#{@query_url}/#{URI.encode_www_form_component(task_id)}")
37
+ rescue LLMAPIError, Error
38
+ raise
39
+ rescue => e
40
+ raise LLMAPIError, "Failed to query Zhipu video task: #{e.message}"
41
+ end
42
+
43
+ # Block until the task finishes (or times out), then return the video URL.
44
+ def wait_for_video_completion(task_id, check_interval: 10, timeout: 600)
45
+ start = Time.now
46
+ loop do
47
+ status = check_video_status(task_id)
48
+ case task_status_of(status)
49
+ when "SUCCESS"
50
+ url = video_url_of(status)
51
+ raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
52
+ SmartPrompt.logger.info "ZhipuAIAdapter: video ready #{url}"
53
+ return { task_id: task_id, status: "SUCCESS", video_url: url, cover_image_url: cover_url_of(status), raw: status }
54
+ when "FAIL", "FAILED"
55
+ raise LLMAPIError, "Zhipu video generation failed: #{status.inspect}"
56
+ else
57
+ if Time.now - start > timeout
58
+ raise LLMAPIError, "Zhipu video generation timeout after #{timeout}s"
59
+ end
60
+ SmartPrompt.logger.info "Zhipu video task #{task_id} still processing..."
61
+ sleep(check_interval)
62
+ end
63
+ end
64
+ end
65
+
66
+ def download_video(video_url, output_path)
67
+ uri = URI.parse(video_url)
68
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
69
+ response = http.request(Net::HTTP::Get.new(uri.request_uri))
70
+ raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
71
+ FileUtils.mkdir_p(File.dirname(output_path))
72
+ File.binwrite(output_path, response.body)
73
+ SmartPrompt.logger.info "Zhipu video saved to #{output_path}"
74
+ output_path
75
+ rescue => e
76
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading Zhipu video: #{e.message}"
77
+ end
78
+
79
+ private
80
+
81
+ # Zhipu async task status is under task_status; accept a few aliases.
82
+ def task_status_of(status)
83
+ status["task_status"] || status["status"] || "PROCESSING"
84
+ end
85
+
86
+ # video_result is an Array: [{cover_image_url:, url:}]. Pull the first video url.
87
+ def video_url_of(status)
88
+ vr = status["video_result"]
89
+ item = vr.is_a?(Array) ? vr[0] : vr
90
+ return item["url"] || item["video_url"] if item.is_a?(Hash)
91
+ status["video_url"] || status.dig("data", "video_url")
92
+ end
93
+
94
+ def cover_url_of(status)
95
+ vr = status["video_result"]
96
+ item = vr.is_a?(Array) ? vr[0] : vr
97
+ item.is_a?(Hash) ? (item["cover_image_url"] || item["cover_url"]) : nil
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,55 @@
1
+ module SmartPrompt
2
+ module ZhipuAI
3
+ # Speech synthesis (GLM-TTS) + speech recognition (GLM-ASR-2512).
4
+ module Voice
5
+ # Returns a base64 data URL for the synthesized audio. GLM-TTS accepts wav/pcm only
6
+ # (mp3/flac are rejected), so default to wav.
7
+ def synthesize_speech(text, voice: nil, model: nil, response_format: "wav", **opts)
8
+ SmartPrompt.logger.info "ZhipuAIAdapter: TTS"
9
+ raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
10
+
11
+ model_name = model || @config["tts_model"] || "glm-tts"
12
+ body = { "model" => model_name, "input" => text.to_s }
13
+ body["voice"] = voice if voice
14
+ body["response_format"] = response_format
15
+ body["speed"] = opts[:speed] if opts[:speed]
16
+ body["emotion"] = opts[:emotion] if opts[:emotion]
17
+
18
+ audio = http_post_binary("#{@base_url}/audio/speech", body)
19
+ "data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
20
+ rescue LLMAPIError, Error
21
+ raise
22
+ rescue => e
23
+ raise Error, "Failed to call Zhipu TTS: #{e.message}"
24
+ end
25
+
26
+ def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "wav", **opts)
27
+ data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
28
+ FileUtils.mkdir_p(File.dirname(output_path))
29
+ audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
30
+ File.binwrite(output_path, audio_bytes)
31
+ SmartPrompt.logger.info "Zhipu audio saved to #{output_path}"
32
+ { file_path: output_path, format: response_format }
33
+ end
34
+
35
+ # Transcribe an audio file (local path). Returns {text:}.
36
+ def transcribe_audio(audio_file, model: nil, language: nil, **opts)
37
+ SmartPrompt.logger.info "ZhipuAIAdapter: ASR #{File.basename(audio_file)}"
38
+ raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
39
+
40
+ model_name = model || @config["asr_model"] || "glm-asr-2512"
41
+ form = { "model" => model_name }
42
+ form["language"] = language if language
43
+ form["prompt"] = opts[:prompt] if opts[:prompt]
44
+ form["response_format"] = opts[:response_format] if opts[:response_format]
45
+
46
+ response = http_post_multipart("#{@base_url}/audio/transcriptions", form, audio_file)
47
+ { text: response["text"] }
48
+ rescue LLMAPIError, Error
49
+ raise
50
+ rescue => e
51
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call Zhipu ASR: #{e.message}"
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,147 @@
1
+ require "json"
2
+ require "net/http"
3
+ require "uri"
4
+
5
+ module SmartPrompt
6
+ # Shared Net::HTTP plumbing for Net::HTTP-style adapters (ZhipuAI, SenseNova,
7
+ # SiliconFlow). Each previously carried its own copy of post/get/binary/multipart
8
+ # + SSE stream helpers, differing only in the provider label sprinkled through
9
+ # log/exception messages — which the `provider_label` hook now supplies.
10
+ #
11
+ # http_post_multipart takes the general 5-arg shape (file_field + mime); Zhipu's
12
+ # ASR call site uses a 3-arg shim defined on the adapter itself.
13
+ module HTTPClient
14
+ def http_post_json(url, body)
15
+ uri = URI.parse(url)
16
+ http = Net::HTTP.new(uri.host, uri.port)
17
+ http.use_ssl = (uri.scheme == "https")
18
+ http.open_timeout = 30
19
+ http.read_timeout = 240
20
+ req = Net::HTTP::Post.new(uri.request_uri)
21
+ req["Content-Type"] = "application/json"
22
+ req["Authorization"] = "Bearer #{@api_key}"
23
+ req.body = body.to_json
24
+ SmartPrompt.logger.debug "#{provider_label} POST #{uri} body=#{body.to_json}"
25
+ resp = http.request(req)
26
+ if resp.is_a?(Net::HTTPSuccess)
27
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
28
+ else
29
+ SmartPrompt.logger.error "#{provider_label} API error: #{resp.code} - #{resp.body}"
30
+ raise LLMAPIError, "#{provider_label} API error: #{resp.code} - #{resp.body}"
31
+ end
32
+ end
33
+
34
+ def http_get_json(url)
35
+ uri = URI.parse(url)
36
+ http = Net::HTTP.new(uri.host, uri.port)
37
+ http.use_ssl = (uri.scheme == "https")
38
+ http.open_timeout = 30
39
+ http.read_timeout = 60
40
+ req = Net::HTTP::Get.new(uri.request_uri)
41
+ req["Authorization"] = "Bearer #{@api_key}"
42
+ SmartPrompt.logger.debug "#{provider_label} GET #{uri}"
43
+ resp = http.request(req)
44
+ if resp.is_a?(Net::HTTPSuccess)
45
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
46
+ else
47
+ raise LLMAPIError, "#{provider_label} API error: #{resp.code} - #{resp.body}"
48
+ end
49
+ end
50
+
51
+ # Returns the raw response body bytes (for binary payloads like TTS audio).
52
+ def http_post_binary(url, body)
53
+ uri = URI.parse(url)
54
+ http = Net::HTTP.new(uri.host, uri.port)
55
+ http.use_ssl = (uri.scheme == "https")
56
+ http.open_timeout = 30
57
+ http.read_timeout = 120
58
+ req = Net::HTTP::Post.new(uri.request_uri)
59
+ req["Content-Type"] = "application/json"
60
+ req["Authorization"] = "Bearer #{@api_key}"
61
+ req.body = body.to_json
62
+ resp = http.request(req)
63
+ if resp.is_a?(Net::HTTPSuccess)
64
+ resp.body
65
+ else
66
+ raise LLMAPIError, "#{provider_label} TTS API error: #{resp.code} - #{resp.body}"
67
+ end
68
+ end
69
+
70
+ # multipart/form-data POST with a file upload (ASR, voice upload). Returns parsed JSON.
71
+ def http_post_multipart(url, form, file_field, file_path, mime)
72
+ uri = URI.parse(url)
73
+ http = Net::HTTP.new(uri.host, uri.port)
74
+ http.use_ssl = (uri.scheme == "https")
75
+ http.open_timeout = 30
76
+ http.read_timeout = 180
77
+
78
+ boundary = "----SmartPrompt#{object_id}"
79
+ body = +""
80
+ form.each do |k, v|
81
+ body << "--#{boundary}\r\n"
82
+ body << "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n"
83
+ body << "#{v}\r\n"
84
+ end
85
+ File.open(file_path, "rb") do |f|
86
+ body << "--#{boundary}\r\n"
87
+ body << "Content-Disposition: form-data; name=\"#{file_field}\"; filename=\"#{File.basename(file_path)}\"\r\n"
88
+ body << "Content-Type: #{mime}\r\n\r\n"
89
+ body << f.read
90
+ body << "\r\n"
91
+ end
92
+ body << "--#{boundary}--\r\n"
93
+
94
+ req = Net::HTTP::Post.new(uri.request_uri)
95
+ req["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
96
+ req["Authorization"] = "Bearer #{@api_key}"
97
+ req.body = body
98
+ resp = http.request(req)
99
+ if resp.is_a?(Net::HTTPSuccess)
100
+ resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
101
+ else
102
+ raise LLMAPIError, "#{provider_label} multipart API error: #{resp.code} - #{resp.body}"
103
+ end
104
+ end
105
+
106
+ # POST with stream:true and yield each parsed SSE `data:` payload to the block.
107
+ def stream_chat(url, body)
108
+ uri = URI.parse(url)
109
+ http = Net::HTTP.new(uri.host, uri.port)
110
+ http.use_ssl = (uri.scheme == "https")
111
+ http.open_timeout = 30
112
+ http.read_timeout = 300
113
+
114
+ req = Net::HTTP::Post.new(uri.request_uri)
115
+ req["Content-Type"] = "application/json"
116
+ req["Authorization"] = "Bearer #{@api_key}"
117
+ req["Accept"] = "text/event-stream"
118
+ req.body = body.to_json
119
+
120
+ buffer = +""
121
+ done = false
122
+ http.request(req) do |response|
123
+ unless response.is_a?(Net::HTTPSuccess)
124
+ raise LLMAPIError, "#{provider_label} stream error: #{response.code} - #{response.body}"
125
+ end
126
+ response.read_body do |segment|
127
+ break if done
128
+ buffer << segment
129
+ while (idx = buffer.index("\n"))
130
+ line = buffer.slice!(0, idx + 1).strip
131
+ next if line.empty? || !line.start_with?("data:")
132
+ payload = line.sub(/\Adata:\s*/, "")
133
+ if payload == "[DONE]"
134
+ done = true
135
+ break
136
+ end
137
+ begin
138
+ yield JSON.parse(payload)
139
+ rescue JSON::ParserError
140
+ next
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,62 @@
1
+ require "base64"
2
+ require "net/http"
3
+ require "uri"
4
+ require "fileutils"
5
+
6
+ module SmartPrompt
7
+ # Shared image-saving logic for adapters that produce generated images (ZhipuAI,
8
+ # SenseNova, SiliconFlow). Each previously carried a byte-identical copy of
9
+ # save_image / save_single_image; this concern is the single source.
10
+ #
11
+ # Adapters override two hooks:
12
+ # * default_image_prefix — filename prefix when the caller passes none
13
+ # (e.g. "zhipu_image", "sensenova_image", "siliconflow_image")
14
+ # * provider_label — human label for the "Saved N <label> image(s)" log line
15
+ module ImagePersistence
16
+ # Save one or many generated images to disk. Accepts the Array returned by
17
+ # generate_image/edit_image or a single image hash. Returns the written paths.
18
+ def save_image(image_data, output_dir = "./output", filename_prefix = nil)
19
+ FileUtils.mkdir_p(output_dir)
20
+ images = image_data.is_a?(Array) ? image_data : [image_data]
21
+ saved = images.each_with_index.map do |img, index|
22
+ save_single_image(img, output_dir, "#{filename_prefix || default_image_prefix}_#{index + 1}")
23
+ end
24
+ SmartPrompt.logger.info "Saved #{saved.size} #{provider_label} image(s) to #{output_dir}"
25
+ saved
26
+ end
27
+
28
+ def save_single_image(image_data, output_dir, filename)
29
+ if image_data[:b64_json]
30
+ file_path = File.join(output_dir, "#{filename}.png")
31
+ File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
32
+ elsif image_data[:url]
33
+ uri = URI.parse(image_data[:url])
34
+ response = Net::HTTP.get_response(uri)
35
+ raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
36
+
37
+ ext = case response["content-type"]
38
+ when "image/jpeg", "image/jpg" then "jpg"
39
+ when "image/png" then "png"
40
+ when "image/gif" then "gif"
41
+ when "image/webp" then "webp"
42
+ else "png"
43
+ end
44
+ file_path = File.join(output_dir, "#{filename}.#{ext}")
45
+ File.binwrite(file_path, response.body)
46
+ else
47
+ raise Error, "No image data available to save"
48
+ end
49
+ file_path
50
+ end
51
+
52
+ # ---- hooks (override in adapter) -----------------------------------------
53
+
54
+ def default_image_prefix
55
+ "image"
56
+ end
57
+
58
+ def provider_label
59
+ "Adapter"
60
+ end
61
+ end
62
+ end