smart_prompt 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1477a83b116691863533a9b1726e40be07b03b4faa67b60ebc72fe6a290d60f1
4
- data.tar.gz: c9e71e998318d186f296495679573ccb2ad9b539420b3b9c3ee02314db8e2d8b
3
+ metadata.gz: 28b52596217dded1ca7b39d43e098de3b01f9db8e0a6acb06201399ef6c6877d
4
+ data.tar.gz: f29cb242e7900d1c1321451f05b5aa1075155653c6b894707ed7b172893d6833
5
5
  SHA512:
6
- metadata.gz: a433f3724915b38af6e3a1e66d3f52568ec305f85d3b038643ffeb1ae5522547a8e5c6769e6c53cf1a081ca353587ddd0156987a4d2c705f25f267e388a7f5b9
7
- data.tar.gz: 90b3ba6033912705cc096b17765f0ad221d6f8a6e6b3b2a4350345d4b0777ebdee359d30604320918891ee7965cb55528a0aab4c12b9d9eebd67237ff52d7876
6
+ metadata.gz: ba8c550d079e37c5b5155393522568bef857821cb1440d810517f37d1067b9b7276ef46e52a5ef9e0605e066e65b53a6dc100c4a42a71585d9d99d0354322160
7
+ data.tar.gz: 4a2cb608d7b3aeb757a813e75ef5710ad4d106811e743d91e3dbdbb9c07f44c6d184e334eb1528a8999b551b32bbf2c4ab6037bfaabb405055fd4ab053ca4eec
@@ -0,0 +1,95 @@
1
+ # 硅基流动 (SiliconFlow / SiliconCloud) Configuration for SmartPrompt
2
+ #
3
+ # Get an API key from https://cloud.siliconflow.cn/ and export it as
4
+ # SILICONFLOW_API_KEY. A single SiliconFlowAdapter covers every model category —
5
+ # just point a different llm entry at each model. Defaults use the free-tier models
6
+ # so it runs out-of-box once the key is set.
7
+ #
8
+ # Base URL: https://api.siliconflow.cn/v1
9
+ # Auth: Authorization: Bearer <API_KEY>
10
+ #
11
+ # All categories share one base URL + Bearer auth. Notable SiliconFlow quirks
12
+ # (handled by the adapter):
13
+ # * image response is images[].url (not OpenAI's data[])
14
+ # * video is async: POST /video/submit -> {requestId}; POST /video/status
15
+ # (body {requestId}) -> status Succeeded/InQueue/InProgress/Failed,
16
+ # result at results.videos[].url
17
+ # * rerank score field is relevance_score (not score)
18
+ # * TTS /audio/speech returns raw binary audio
19
+
20
+ adapters:
21
+ siliconflow: "SiliconFlowAdapter"
22
+
23
+ llms:
24
+ # 1. 文本对话 (free-tier Qwen2.5-7B; reasoning: deepseek-ai/DeepSeek-R1)
25
+ sf_chat:
26
+ adapter: "siliconflow"
27
+ url: "https://api.siliconflow.cn/v1"
28
+ api_key: ENV["SILICONFLOW_API_KEY"]
29
+ model: "Qwen/Qwen2.5-7B-Instruct"
30
+ temperature: 0.7
31
+ # Optional sampling extras (forwarded to /chat/completions):
32
+ # top_p: 0.7
33
+ # max_tokens: 2048
34
+ # enable_thinking: true # for Qwen3 / GLM thinking models
35
+ # thinking_budget: 4096
36
+
37
+ # 2. 多模态 (vision; Qwen2.5-VL. audio/video needs an omni model like Qwen3-Omni)
38
+ sf_vision:
39
+ adapter: "siliconflow"
40
+ url: "https://api.siliconflow.cn/v1"
41
+ api_key: ENV["SILICONFLOW_API_KEY"]
42
+ model: "Qwen/Qwen2.5-VL-72B-Instruct"
43
+ # Paid alternatives: Qwen/Qwen3-VL-32B-Instruct, Qwen/Qwen3-Omni-30B-A3B-Instruct
44
+
45
+ # 3. 向量模型 (BAAI/bge-m3 — fixed 1024 dims. For custom dimensions use
46
+ # Qwen/Qwen3-Embedding-8B and set `dimensions`.)
47
+ sf_embed:
48
+ adapter: "siliconflow"
49
+ url: "https://api.siliconflow.cn/v1"
50
+ api_key: ENV["SILICONFLOW_API_KEY"]
51
+ model: "BAAI/bge-m3"
52
+ # dimensions: 1024 # only honored by Qwen3-Embedding series
53
+
54
+ # 4. 重排 (rerank). relevance_score field, not score.
55
+ sf_rerank:
56
+ adapter: "siliconflow"
57
+ url: "https://api.siliconflow.cn/v1"
58
+ api_key: ENV["SILICONFLOW_API_KEY"]
59
+ model: "BAAI/bge-reranker-v2-m3"
60
+
61
+ # 5. 文生图 (free-tier Kolors; edit: Qwen/Qwen-Image-Edit-2509)
62
+ sf_image:
63
+ adapter: "siliconflow"
64
+ url: "https://api.siliconflow.cn/v1"
65
+ api_key: ENV["SILICONFLOW_API_KEY"]
66
+ model: "Kwai-Kolors/Kolors"
67
+
68
+ # 6. 文生视频 / 图生视频 (async; only Wan2.2 series is available)
69
+ sf_video:
70
+ adapter: "siliconflow"
71
+ url: "https://api.siliconflow.cn/v1"
72
+ api_key: ENV["SILICONFLOW_API_KEY"]
73
+ model: "Wan-AI/Wan2.2-T2V-A14B"
74
+ # image-to-video: model "Wan-AI/Wan2.2-I2V-A14B" and pass an image
75
+
76
+ # 7. 语音合成 (CosyVoice2; preset voices need the model prefix, e.g.
77
+ # "FunAudioLLM/CosyVoice2-0.5B:alex")
78
+ sf_tts:
79
+ adapter: "siliconflow"
80
+ url: "https://api.siliconflow.cn/v1"
81
+ api_key: ENV["SILICONFLOW_API_KEY"]
82
+ model: "FunAudioLLM/CosyVoice2-0.5B"
83
+
84
+ # 8. 语音识别 (SenseVoiceSmall, free, multilingual)
85
+ sf_asr:
86
+ adapter: "siliconflow"
87
+ url: "https://api.siliconflow.cn/v1"
88
+ api_key: ENV["SILICONFLOW_API_KEY"]
89
+ model: "FunAudioLLM/SenseVoiceSmall"
90
+
91
+ default_llm: "sf_chat"
92
+
93
+ template_path: "./templates"
94
+ worker_path: "./workers"
95
+ logger_file: "./logs/smart_prompt.log"
@@ -0,0 +1,175 @@
1
+ # 硅基流动 (SiliconFlow / SiliconCloud) Example for SmartPrompt
2
+ #
3
+ # Demonstrates every SiliconFlow model category through one SiliconFlowAdapter:
4
+ # 1. 文本对话 (chat) — sync + streaming
5
+ # 2. 多模态 (vision)
6
+ # 3. 向量模型 (embeddings)
7
+ # 4. 重排 (rerank)
8
+ # 5. 文生图 (image)
9
+ # 6. 文生视频 (async submit -> poll -> download)
10
+ # 7. 语音合成 (TTS)
11
+ # 8. 语音识别 (ASR)
12
+ #
13
+ # Requires a valid SiliconFlow API key in SILICONFLOW_API_KEY
14
+ # (https://cloud.siliconflow.cn/). Defaults use free-tier models so it works
15
+ # out-of-box once the key is set.
16
+
17
+ require_relative "../lib/smart_prompt"
18
+
19
+ api_key = ENV["SILICONFLOW_API_KEY"]
20
+ base = "https://api.siliconflow.cn/v1"
21
+
22
+ config = {
23
+ "adapters" => { "siliconflow" => "SiliconFlowAdapter" },
24
+ "llms" => {
25
+ "sf_chat" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "Qwen/Qwen2.5-7B-Instruct" },
26
+ "sf_vision" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "Qwen/Qwen2.5-VL-72B-Instruct" },
27
+ "sf_embed" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "BAAI/bge-m3" },
28
+ "sf_rerank" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "BAAI/bge-reranker-v2-m3" },
29
+ "sf_image" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "Kwai-Kolors/Kolors" },
30
+ "sf_video" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "Wan-AI/Wan2.2-T2V-A14B" },
31
+ "sf_tts" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "FunAudioLLM/CosyVoice2-0.5B" },
32
+ "sf_asr" => { "adapter" => "siliconflow", "url" => base, "api_key" => api_key, "model" => "FunAudioLLM/SenseVoiceSmall" },
33
+ },
34
+ "default_llm" => "sf_chat",
35
+ "template_path" => "./templates",
36
+ "worker_path" => "./workers",
37
+ "logger_file" => "./logs/smart_prompt.log",
38
+ }
39
+
40
+ File.write("siliconflow_config.yml", config.to_yaml)
41
+ engine = SmartPrompt::Engine.new("siliconflow_config.yml")
42
+
43
+ puts "=== SmartPrompt 硅基流动 SiliconFlow Demo ==="
44
+ unless api_key
45
+ puts "Note: SILICONFLOW_API_KEY is not set — the API calls below will fail at the network layer."
46
+ end
47
+
48
+ # 1. Chat (sync)
49
+ puts "\n=== Example 1: 文本对话 (sync) ==="
50
+ begin
51
+ result = engine.call_worker(:siliconflow_chat, { prompt: "用一句话介绍硅基流动 SiliconFlow。" })
52
+ puts "Reply: #{result}"
53
+ rescue => e
54
+ puts "Error: #{e.message}"
55
+ end
56
+
57
+ # 2. Chat (streaming)
58
+ puts "\n=== Example 2: 文本对话 (streaming) ==="
59
+ begin
60
+ engine.call_worker_by_stream(:siliconflow_chat, { prompt: "写两句关于春天的诗。" }) do |chunk, _|
61
+ print chunk.dig("choices", 0, "delta", "content").to_s
62
+ end
63
+ puts
64
+ rescue => e
65
+ puts "Error: #{e.message}"
66
+ end
67
+
68
+ # 3. Multimodal vision
69
+ puts "\n=== Example 3: 多模态 ==="
70
+ begin
71
+ result = engine.call_worker(:siliconflow_vision, {
72
+ image_url: "https://img1.baidu.com/it/u=1966616150,2146512490&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=282",
73
+ question: "图片里有什么?",
74
+ })
75
+ puts "Vision result: #{result}"
76
+ rescue => e
77
+ puts "Error: #{e.message}"
78
+ end
79
+
80
+ # 4. Embeddings (BAAI/bge-m3)
81
+ puts "\n=== Example 4: 向量模型 ==="
82
+ begin
83
+ vector = engine.call_worker(:siliconflow_embed, { text: "硅基流动大模型", length: 1024 })
84
+ puts "Embedding dim: #{vector.is_a?(Array) ? vector.size : vector} (first 5: #{vector.first(5) rescue vector})"
85
+ rescue => e
86
+ puts "Error: #{e.message}"
87
+ end
88
+
89
+ # 5. Rerank — reorder documents by relevance to a query.
90
+ puts "\n=== Example 5: 重排 (rerank) ==="
91
+ begin
92
+ result = engine.call_worker(:siliconflow_rerank, {
93
+ query: "如何用 Python 读取文件?",
94
+ documents: [
95
+ "在 Python 中可以用 open() 函数打开文件并读取内容。",
96
+ "JavaScript 是一种运行在浏览器中的脚本语言。",
97
+ "使用 with open(path) as f 可以安全地读取文件。",
98
+ ],
99
+ top_n: 2,
100
+ })
101
+ result.each { |r| puts " idx=#{r[:index]} score=#{r[:relevance_score]}" }
102
+ rescue => e
103
+ puts "Error: #{e.message}"
104
+ end
105
+
106
+ # 6. Text-to-image (Kolors)
107
+ puts "\n=== Example 6: 文生图 ==="
108
+ begin
109
+ result = engine.call_worker(:siliconflow_image, {
110
+ prompt: "一只在书房里读书的猫,水墨画风格",
111
+ image_size: "1024x1024",
112
+ save_to_file: true,
113
+ output_dir: "./generated_images",
114
+ filename_prefix: "siliconflow_cat",
115
+ })
116
+ if result.is_a?(Hash) && result[:images]
117
+ puts "Generated #{result[:images].size} image(s); first URL: #{result[:images].first[:url]}"
118
+ puts "Saved files: #{result[:saved_files]}"
119
+ else
120
+ puts "Result: #{result}"
121
+ end
122
+ rescue => e
123
+ puts "Error: #{e.message}"
124
+ end
125
+
126
+ # 7. Text-to-video (Wan2.2, async) — may take a couple of minutes.
127
+ puts "\n=== Example 7: 文生视频 (async) ==="
128
+ begin
129
+ result = engine.call_worker(:siliconflow_video, {
130
+ prompt: "一只猫在阳光下打盹",
131
+ wait_for_completion: true,
132
+ download_to_file: true,
133
+ output_dir: "./generated_videos",
134
+ timeout: 600,
135
+ })
136
+ if result[:video]
137
+ puts "Video ready: #{result[:video][:video_url]}"
138
+ puts "Downloaded: #{result[:downloaded_file]}" if result[:downloaded_file]
139
+ else
140
+ puts "Submitted request: #{result[:submitted]}"
141
+ end
142
+ rescue => e
143
+ puts "Error: #{e.message}"
144
+ end
145
+
146
+ # 8. TTS (CosyVoice2)
147
+ puts "\n=== Example 8: 语音合成 (TTS) ==="
148
+ begin
149
+ info = engine.call_worker(:siliconflow_tts, {
150
+ text: "你好,这是硅基流动语音合成的测试。",
151
+ voice: "FunAudioLLM/CosyVoice2-0.5B:alex",
152
+ output_path: "./generated_audio/siliconflow_tts.mp3",
153
+ })
154
+ puts "Audio saved: #{info[:file_path]}"
155
+ rescue => e
156
+ puts "Error: #{e.message}"
157
+ end
158
+
159
+ # 9. ASR (SenseVoiceSmall) — needs a real audio file path.
160
+ puts "\n=== Example 9: 语音识别 (ASR) ==="
161
+ audio = ENV["SILICONFLOW_ASR_SAMPLE"] || "./generated_audio/siliconflow_tts.mp3"
162
+ if File.exist?(audio)
163
+ begin
164
+ result = engine.call_worker(:siliconflow_asr, { audio_file: audio })
165
+ puts "Transcription: #{result[:text]}"
166
+ rescue => e
167
+ puts "Error: #{e.message}"
168
+ end
169
+ else
170
+ puts "Skipped: set SILICONFLOW_ASR_SAMPLE to an audio file path (or run TTS first) to test ASR."
171
+ end
172
+
173
+ puts "\n=== All examples completed ==="
174
+
175
+ File.delete("siliconflow_config.yml") if File.exist?("siliconflow_config.yml")
@@ -0,0 +1,33 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Embeddings (BAAI/bge-m3 default; Qwen3-Embedding supports custom dimensions).
4
+ module Embed
5
+ # BAAI/bge-m3 (default, fixed 1024 dims) or Qwen3-Embedding (custom dimensions).
6
+ # `dimensions` is only honored when set in config (Qwen3-Embedding series only).
7
+ # Returns the first embedding vector.
8
+ def embeddings(text, model)
9
+ model_name = model || @config["embedding_model"] || @config["model"]
10
+ SmartPrompt.logger.info "SiliconFlowAdapter: embeddings model=#{model_name}"
11
+
12
+ body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
13
+ body["dimensions"] = @config["dimensions"] if @config["dimensions"]
14
+ body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
15
+
16
+ response =
17
+ begin
18
+ http_post_json("#{@base_url}/embeddings", body)
19
+ rescue LLMAPIError, Error
20
+ raise
21
+ rescue => e
22
+ raise LLMAPIError, "Failed to call SiliconFlow embeddings: #{e.message}"
23
+ end
24
+
25
+ items = response["data"]
26
+ unless items.is_a?(Array) && items.any? && items[0]["embedding"]
27
+ raise LLMAPIError, "No embedding vector in SiliconFlow response: #{response.inspect}"
28
+ end
29
+ items[0]["embedding"]
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,103 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Text-to-image (Kolors) + image editing (Qwen-Image-Edit). save_image comes from
4
+ # the ImagePersistence concern.
5
+ module Image
6
+ # Default resolution for text-to-image (Kolors accepts these "WxH" values).
7
+ DEFAULT_IMAGE_SIZE = "1024x1024".freeze
8
+
9
+ # Text-to-image. SiliconFlow response is images[].url (not OpenAI's data[]),
10
+ # and uses its own param names (image_size, batch_size, guidance_scale, ...).
11
+ # Returns an Array of {url:, b64_json:, seed:}.
12
+ def generate_image(prompt, params = {})
13
+ SmartPrompt.logger.info "SiliconFlowAdapter: generating image"
14
+ raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
15
+
16
+ model_name = params[:model] || @config["image_model"] || @config["model"]
17
+ raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
18
+
19
+ body = { "model" => model_name, "prompt" => prompt.to_s }
20
+ body["image_size"] = resolve_image_size(params[:image_size] || params[:size])
21
+ body["negative_prompt"] = params[:negative_prompt] if params[:negative_prompt]
22
+ body["seed"] = params[:seed] if params[:seed]
23
+ body["num_inference_steps"] = params[:num_inference_steps] if params[:num_inference_steps]
24
+ body["guidance_scale"] = params[:guidance_scale] if params[:guidance_scale]
25
+ # batch_size only applies to a subset of models (e.g. Kolors); send it only
26
+ # when the caller explicitly asks for it.
27
+ batch = params[:batch_size] || params[:n]
28
+ body["batch_size"] = batch if batch
29
+
30
+ SmartPrompt.logger.info "SiliconFlow image params: #{body.except('prompt').inspect}"
31
+ response =
32
+ begin
33
+ http_post_json(@image_url, body)
34
+ rescue LLMAPIError, Error
35
+ raise
36
+ rescue => e
37
+ raise Error, "Failed to call SiliconFlow image generation: #{e.message}"
38
+ end
39
+
40
+ images = parse_image_response(response)
41
+ SmartPrompt.logger.info "SiliconFlowAdapter: generated #{images.size} image(s)"
42
+ images
43
+ end
44
+
45
+ # Image editing / image-to-image (Qwen/Qwen-Image-Edit-2509 and Kolors composable).
46
+ # +image+ (and optionally +image2+/+image3+) may be a local file path, a base64
47
+ # data URL, or a public http(s) URL. Edit models reject image_size, so we omit it.
48
+ def edit_image(prompt, params = {})
49
+ SmartPrompt.logger.info "SiliconFlowAdapter: editing image"
50
+ raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
51
+ raise Error, "An input image is required for image editing" if params[:image].nil? && params[:image_file].nil?
52
+
53
+ model_name = params[:model] || @config["image_model"] || @config["model"]
54
+ raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
55
+
56
+ body = { "model" => model_name, "prompt" => prompt.to_s }
57
+ body["image"] = normalize_input_image(params[:image] || params[:image_file])
58
+ body["image2"] = normalize_input_image(params[:image2]) if params[:image2]
59
+ body["image3"] = normalize_input_image(params[:image3]) if params[:image3]
60
+ body["negative_prompt"] = params[:negative_prompt] if params[:negative_prompt]
61
+ body["seed"] = params[:seed] if params[:seed]
62
+ body["guidance_scale"] = params[:guidance_scale] if params[:guidance_scale]
63
+
64
+ SmartPrompt.logger.info "SiliconFlow image edit params: #{body.except('prompt', 'image', 'image2', 'image3').inspect}"
65
+ response =
66
+ begin
67
+ http_post_json(@image_url, body)
68
+ rescue LLMAPIError, Error
69
+ raise
70
+ rescue => e
71
+ raise Error, "Failed to call SiliconFlow image edit: #{e.message}"
72
+ end
73
+
74
+ images = parse_image_response(response)
75
+ SmartPrompt.logger.info "SiliconFlowAdapter: edited into #{images.size} image(s)"
76
+ images
77
+ end
78
+
79
+ private
80
+
81
+ # SiliconFlow image response: images[].url (FLAT). Fall back to OpenAI's
82
+ # data[] or a bare-url array for defensive compatibility.
83
+ def parse_image_response(response)
84
+ items = response["images"] || response["data"]
85
+ items = [] unless items.is_a?(Array)
86
+ if items.empty?
87
+ SmartPrompt.logger.error "No image data in SiliconFlow response: #{response.inspect}"
88
+ raise LLMAPIError, "No image data in SiliconFlow response"
89
+ end
90
+ items.map do |d|
91
+ d = { "url" => d } if d.is_a?(String)
92
+ { url: d["url"], b64_json: d["b64_json"], seed: d["seed"] }
93
+ end
94
+ end
95
+
96
+ # Resolve the image size: default 1024x1024 when none given.
97
+ def resolve_image_size(size)
98
+ return DEFAULT_IMAGE_SIZE if size.nil? || size.to_s.strip.empty?
99
+ size.to_s
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,41 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Rerank (reorder documents by relevance to a query).
4
+ module Rerank
5
+ # Reorder documents by relevance to a query. SiliconFlow returns
6
+ # results[].relevance_score (NOT "score"). Returns an Array of
7
+ # {index:, relevance_score:} sorted by the provider.
8
+ def rerank(query, documents, model: nil, **opts)
9
+ model_name = model || @config["rerank_model"] || @config["model"]
10
+ SmartPrompt.logger.info "SiliconFlowAdapter: rerank model=#{model_name}"
11
+
12
+ body = { "model" => model_name, "query" => query.to_s, "documents" => documents }
13
+ body["top_n"] = opts[:top_n] if opts[:top_n]
14
+ body["return_documents"] = opts[:return_documents] unless opts[:return_documents].nil?
15
+ body["max_chunks_per_doc"] = opts[:max_chunks_per_doc] if opts[:max_chunks_per_doc]
16
+ body["chunk_overlap_tokens"] = opts[:chunk_overlap_tokens] if opts[:chunk_overlap_tokens]
17
+ body["instruction"] = opts[:instruction] if opts[:instruction]
18
+
19
+ response =
20
+ begin
21
+ http_post_json("#{@base_url}/rerank", body)
22
+ rescue LLMAPIError, Error
23
+ raise
24
+ rescue => e
25
+ raise LLMAPIError, "Failed to call SiliconFlow rerank: #{e.message}"
26
+ end
27
+
28
+ parse_rerank_response(response)
29
+ end
30
+
31
+ private
32
+
33
+ # SiliconFlow rerank response: results[].relevance_score (NOT "score").
34
+ def parse_rerank_response(response)
35
+ (response["results"] || []).map do |r|
36
+ { index: r["index"], relevance_score: r["relevance_score"] || r["score"] }
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,54 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Text chat + multimodal vision (OpenAI-compatible /chat/completions, SSE streaming,
4
+ # reasoning_content passthrough).
5
+ module Text
6
+ CHAT_OPTIONAL_KEYS = %w[
7
+ top_p top_k frequency_penalty presence_penalty
8
+ max_tokens max_completion_tokens stop response_format
9
+ enable_thinking thinking_budget min_p reasoning_effort seed
10
+ ].freeze
11
+
12
+ # Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so
13
+ # last_response carries usage + reasoning_content); streaming calls +proc+
14
+ # with each OpenAI-shaped chunk and returns nil.
15
+ def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
16
+ model_name = model || @config["model"]
17
+ body = build_chat_body(messages, model_name, temperature, tools)
18
+ SmartPrompt.logger.info "SiliconFlowAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
19
+
20
+ url = "#{@base_url}/chat/completions"
21
+ if proc
22
+ body["stream"] = true
23
+ stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
24
+ SmartPrompt.logger.info "SiliconFlowAdapter: streaming request finished"
25
+ nil
26
+ else
27
+ raw = http_post_json(url, body)
28
+ response = build_completion_response(raw)
29
+ @last_response = response
30
+ SmartPrompt.logger.info "SiliconFlowAdapter: received chat response"
31
+ response
32
+ end
33
+ rescue LLMAPIError, Error
34
+ raise
35
+ rescue => e
36
+ SmartPrompt.logger.error "SiliconFlow chat error: #{e.message}"
37
+ raise LLMAPIError, "Failed to call SiliconFlow chat: #{e.message}"
38
+ end
39
+
40
+ private
41
+
42
+ def build_chat_body(messages, model_name, temperature, tools)
43
+ body = {
44
+ "model" => model_name,
45
+ "messages" => process_multimodal_messages(messages),
46
+ "temperature" => @config["temperature"] || temperature || 0.7,
47
+ }
48
+ CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
49
+ body["tools"] = tools if tools && !tools.empty?
50
+ body
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,111 @@
1
+ module SmartPrompt
2
+ module SiliconFlow
3
+ # Text-to-video / image-to-video (Wan2.2, async submit -> poll -> download).
4
+ module Video
5
+ # Text-to-video image_size enum (SiliconFlow rejects anything else).
6
+ VALID_VIDEO_SIZES = %w[1280x720 720x1280 960x960].freeze
7
+ DEFAULT_VIDEO_SIZE = "1280x720".freeze
8
+
9
+ # Submit a text-to-video (or image-to-video) job. Returns the requestId.
10
+ # SiliconFlow's submit endpoint returns {"requestId": "..."} (camelCase).
11
+ def generate_video(prompt, params = {})
12
+ SmartPrompt.logger.info "SiliconFlowAdapter: submitting video job"
13
+ model_name = params[:model] || @config["video_model"] || @config["model"]
14
+ raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
15
+
16
+ body = { "model" => model_name, "prompt" => prompt.to_s }
17
+ body["image_size"] = resolve_video_size(params[:image_size] || params[:size])
18
+ body["negative_prompt"] = params[:negative_prompt] if params[:negative_prompt]
19
+ body["seed"] = params[:seed] if params[:seed]
20
+ body["image"] = normalize_input_image(params[:image]) if params[:image]
21
+
22
+ SmartPrompt.logger.info "SiliconFlow video params: #{body.except('prompt').inspect}"
23
+ response =
24
+ begin
25
+ http_post_json(@video_submit_url, body)
26
+ rescue LLMAPIError, Error
27
+ raise
28
+ rescue => e
29
+ raise Error, "Failed to submit SiliconFlow video job: #{e.message}"
30
+ end
31
+
32
+ request_id = response["requestId"] || response["request_id"]
33
+ raise LLMAPIError, "No requestId in SiliconFlow video response: #{response.inspect}" unless request_id
34
+ SmartPrompt.logger.info "SiliconFlowAdapter: video request #{request_id} submitted"
35
+ { request_id: request_id, model: model_name, raw: response }
36
+ end
37
+
38
+ # Poll an async task. SiliconFlow's status endpoint is a POST (NOT GET) that
39
+ # takes {requestId} in the body. Returns the raw status hash.
40
+ def check_video_status(request_id)
41
+ SmartPrompt.logger.info "SiliconFlowAdapter: polling video request #{request_id}"
42
+ http_post_json(@video_status_url, { "requestId" => request_id })
43
+ rescue LLMAPIError, Error
44
+ raise
45
+ rescue => e
46
+ raise LLMAPIError, "Failed to query SiliconFlow video status: #{e.message}"
47
+ end
48
+
49
+ # Block until the task finishes (or times out), then return the video URL.
50
+ # SiliconFlow status values are exactly: Succeeded / InQueue / InProgress / Failed.
51
+ def wait_for_video_completion(request_id, check_interval: 10, timeout: 600)
52
+ start = Time.now
53
+ loop do
54
+ status = check_video_status(request_id)
55
+ case video_status_of(status)
56
+ when "Succeed"
57
+ url = video_url_of(status)
58
+ raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
59
+ SmartPrompt.logger.info "SiliconFlowAdapter: video ready #{url}"
60
+ return { request_id: request_id, status: "Succeeded", video_url: url, raw: status }
61
+ when "Failed"
62
+ raise LLMAPIError, "SiliconFlow video generation failed: #{status["reason"] || status.inspect}"
63
+ else
64
+ if Time.now - start > timeout
65
+ raise LLMAPIError, "SiliconFlow video generation timeout after #{timeout}s"
66
+ end
67
+ SmartPrompt.logger.info "SiliconFlow video request #{request_id} #{video_status_of(status)}..."
68
+ sleep(check_interval)
69
+ end
70
+ end
71
+ end
72
+
73
+ def download_video(video_url, output_path)
74
+ uri = URI.parse(video_url)
75
+ http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
76
+ response = http.request(Net::HTTP::Get.new(uri.request_uri))
77
+ raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
78
+ FileUtils.mkdir_p(File.dirname(output_path))
79
+ File.binwrite(output_path, response.body)
80
+ SmartPrompt.logger.info "SiliconFlow video saved to #{output_path}"
81
+ output_path
82
+ rescue => e
83
+ raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading SiliconFlow video: #{e.message}"
84
+ end
85
+
86
+ private
87
+
88
+ # SiliconFlow video status is under the top-level `status` field.
89
+ def video_status_of(status)
90
+ status["status"] || "InQueue"
91
+ end
92
+
93
+ # The video url lives at results.videos[].url (results is an OBJECT, not array).
94
+ def video_url_of(status)
95
+ videos = status.dig("results", "videos")
96
+ item = videos.is_a?(Array) ? videos[0] : videos
97
+ item.is_a?(Hash) ? (item["url"] || item["video_url"]) : nil
98
+ end
99
+
100
+ # Resolve the video image_size: default 1280x720; warn on unknown values.
101
+ def resolve_video_size(size)
102
+ size = size.nil? || size.to_s.strip.empty? ? DEFAULT_VIDEO_SIZE : size.to_s
103
+ unless VALID_VIDEO_SIZES.include?(size)
104
+ SmartPrompt.logger.warn "SiliconFlow video image_size '#{size}' is not in the known-valid list " \
105
+ "(#{VALID_VIDEO_SIZES.join(', ')}); the API may reject it."
106
+ end
107
+ size
108
+ end
109
+ end
110
+ end
111
+ end