smart_prompt 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/README.cn.md +305 -11
- data/README.md +309 -11
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/docs/ANTHROPIC_EXAMPLES.md +559 -0
- data/docs/CONVERSATION_INTEGRATION_SUMMARY.md +155 -0
- data/docs/HISTORY_EXAMPLES_README.md +533 -0
- data/docs/HISTORY_MANAGEMENT_GUIDE.md +797 -0
- data/docs/MONITORING_GUIDE.md +278 -0
- data/docs/MULTIMODAL_README.md +265 -0
- data/docs/RELEVANCE_BASED_STRATEGY_IMPLEMENTATION.md +124 -0
- data/docs/STT_README.md +302 -0
- data/docs/TTS_README.md +303 -0
- data/docs/VIDEO_GENERATION_README.md +246 -0
- data/docs/delete_files_list.md +124 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +407 -298
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +47 -4
- data/lib/smart_prompt/engine.rb +29 -2
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +28 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +21 -0
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +88 -1
data/README.md
CHANGED
|
@@ -11,13 +11,24 @@ SmartPrompt is a powerful Ruby gem that provides an elegant domain-specific lang
|
|
|
11
11
|
|
|
12
12
|
### Multi-LLM Support
|
|
13
13
|
- **OpenAI API Compatible**: Full support for OpenAI GPT models and compatible APIs
|
|
14
|
-
- **
|
|
14
|
+
- **Anthropic Claude**: Native support for Claude models with multimodal capabilities
|
|
15
|
+
- **SenseNova (商汤日日新)**: One adapter covers chat (商量), multimodal vision (图文多模态), Cupido embeddings (向量), and 秒画 text-to-image — see `examples/sensenova_example.rb`
|
|
16
|
+
- **智谱 AI (BigModel / GLM)**: One adapter covers all categories — chat (GLM-4), vision (GLM-4V), embeddings (embedding-3), text-to-image (CogView), text-to-video (CogVideoX), TTS (GLM-TTS), ASR (GLM-ASR) — see `examples/zhipu_example.rb`
|
|
17
|
+
- **Llama.cpp Integration**: Direct integration with local Llama.cpp servers
|
|
15
18
|
- **Extensible Adapters**: Easy-to-extend adapter system for new LLM providers
|
|
16
19
|
- **Unified Interface**: Same API regardless of the underlying LLM provider
|
|
17
20
|
|
|
21
|
+
### Multimodal AI Capabilities
|
|
22
|
+
- **Vision Models**: Support for image understanding and analysis
|
|
23
|
+
- **Image Generation**: Create images from text prompts using diffusion models
|
|
24
|
+
- **Video Generation**: Generate videos from text or image prompts
|
|
25
|
+
- **Text-to-Speech**: Convert text to natural-sounding speech
|
|
26
|
+
- **Speech-to-Text**: Transcribe audio files to text with multi-language support
|
|
27
|
+
|
|
18
28
|
### Flexible Architecture
|
|
19
29
|
- **Worker-based Tasks**: Define reusable workers for specific AI tasks
|
|
20
30
|
- **Template System**: ERB-based prompt templates with parameter injection
|
|
31
|
+
- **Intelligent History Management**: Session isolation, automatic compression, and multiple context strategies
|
|
21
32
|
- **Conversation Management**: Built-in conversation history and context management
|
|
22
33
|
- **Streaming Support**: Real-time response streaming for better user experience
|
|
23
34
|
|
|
@@ -26,6 +37,8 @@ SmartPrompt is a powerful Ruby gem that provides an elegant domain-specific lang
|
|
|
26
37
|
- **Retry Logic**: Robust error handling with configurable retry mechanisms
|
|
27
38
|
- **Embeddings**: Text embedding generation for semantic search and RAG applications
|
|
28
39
|
- **Configuration-driven**: YAML-based configuration for easy deployment management
|
|
40
|
+
- **Batch Processing**: Efficient processing of multiple files and tasks
|
|
41
|
+
- **Language Detection**: Automatic language identification from text and audio
|
|
29
42
|
|
|
30
43
|
### Production Ready
|
|
31
44
|
- **Comprehensive Logging**: Detailed logging for debugging and monitoring
|
|
@@ -61,6 +74,7 @@ Create a YAML configuration file (`config/smart_prompt.yml`):
|
|
|
61
74
|
# Adapter definitions
|
|
62
75
|
adapters:
|
|
63
76
|
openai: OpenAIAdapter
|
|
77
|
+
anthropic: AnthropicAdapter
|
|
64
78
|
# LLM configurations
|
|
65
79
|
llms:
|
|
66
80
|
SiliconFlow:
|
|
@@ -68,7 +82,13 @@ llms:
|
|
|
68
82
|
url: https://api.siliconflow.cn/v1/
|
|
69
83
|
api_key: ENV["APIKey"]
|
|
70
84
|
default_model: Qwen/Qwen2.5-7B-Instruct
|
|
71
|
-
|
|
85
|
+
claude:
|
|
86
|
+
adapter: anthropic
|
|
87
|
+
api_key: ENV["ANTHROPIC_API_KEY"]
|
|
88
|
+
model: claude-3-5-sonnet-20241022
|
|
89
|
+
temperature: 0.7
|
|
90
|
+
max_tokens: 4096
|
|
91
|
+
llamacpp:
|
|
72
92
|
adapter: openai
|
|
73
93
|
url: http://localhost:8080/
|
|
74
94
|
ollama:
|
|
@@ -238,7 +258,10 @@ end
|
|
|
238
258
|
|
|
239
259
|
### Conversation History
|
|
240
260
|
|
|
261
|
+
SmartPrompt provides intelligent conversation history management with session isolation, automatic compression, and multiple context strategies.
|
|
262
|
+
|
|
241
263
|
```ruby
|
|
264
|
+
# Basic usage with automatic history management
|
|
242
265
|
SmartPrompt.define_worker :conversational_chat do
|
|
243
266
|
use "deepseek"
|
|
244
267
|
model "deepseek-chat"
|
|
@@ -246,8 +269,38 @@ SmartPrompt.define_worker :conversational_chat do
|
|
|
246
269
|
prompt(params[:message], with_history: true)
|
|
247
270
|
send_msg
|
|
248
271
|
end
|
|
272
|
+
|
|
273
|
+
# Advanced usage with explicit session management
|
|
274
|
+
SmartPrompt.define_worker :session_chat do
|
|
275
|
+
use "deepseek"
|
|
276
|
+
model "deepseek-chat"
|
|
277
|
+
|
|
278
|
+
# Use session_id for isolated conversations
|
|
279
|
+
session_id = params[:session_id] || "default"
|
|
280
|
+
|
|
281
|
+
# Configure session behavior
|
|
282
|
+
session_config = {
|
|
283
|
+
max_messages: 100,
|
|
284
|
+
max_tokens: 4000,
|
|
285
|
+
context_strategy: :sliding_window # or :relevance_based, :summary_based, :hybrid
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
sys_msg("You are a helpful assistant.", params)
|
|
289
|
+
prompt(params[:message], with_history: true)
|
|
290
|
+
params.merge(session_id: session_id, session_config: session_config)
|
|
291
|
+
send_msg
|
|
292
|
+
end
|
|
249
293
|
```
|
|
250
294
|
|
|
295
|
+
**History Management Features:**
|
|
296
|
+
- **Session Isolation**: Each conversation has independent history
|
|
297
|
+
- **Context Strategies**: Choose from sliding window, relevance-based, summary-based, or hybrid
|
|
298
|
+
- **Automatic Compression**: Reduce token usage while preserving context
|
|
299
|
+
- **Persistence**: Save and restore conversations across restarts
|
|
300
|
+
- **Performance**: LRU caching and async I/O for optimal performance
|
|
301
|
+
|
|
302
|
+
See [History Management Guide](HISTORY_MANAGEMENT_GUIDE.md) for detailed documentation.
|
|
303
|
+
|
|
251
304
|
### Embeddings Generation
|
|
252
305
|
|
|
253
306
|
```ruby
|
|
@@ -265,6 +318,78 @@ embeddings = engine.call_worker(:text_embedder, {
|
|
|
265
318
|
})
|
|
266
319
|
```
|
|
267
320
|
|
|
321
|
+
### Multimodal AI Examples
|
|
322
|
+
|
|
323
|
+
#### Image Generation
|
|
324
|
+
```ruby
|
|
325
|
+
# Generate image from text prompt (SiliconFlow /v1/images/generations)
|
|
326
|
+
result = engine.call_worker(:image_generator, {
|
|
327
|
+
prompt: "A beautiful sunset over mountains",
|
|
328
|
+
image_size: "1024x1024", # "widthxheight"; aliases: size:
|
|
329
|
+
batch_size: 1, # only Kolors; aliases: n:
|
|
330
|
+
negative_prompt: "blurry, low quality",
|
|
331
|
+
save_to_file: true,
|
|
332
|
+
output_dir: "./generated_images"
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
puts "Generated #{result[:images].size} image(s)"
|
|
336
|
+
puts "First image URL: #{result[:images].first[:url]}"
|
|
337
|
+
puts "Saved files: #{result[:saved_files]}"
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
#### Video Generation
|
|
341
|
+
```ruby
|
|
342
|
+
# Generate video from text prompt
|
|
343
|
+
result = engine.call_worker(:video_generator, {
|
|
344
|
+
prompt: "A cat playing with a ball of yarn",
|
|
345
|
+
duration: 5,
|
|
346
|
+
resolution: "720p",
|
|
347
|
+
save_to_file: true,
|
|
348
|
+
output_dir: "./generated_videos"
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
puts "Video generation started: #{result[:video_id]}"
|
|
352
|
+
puts "Check status with: engine.call_worker(:video_status, {video_id: '#{result[:video_id]}'})"
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
#### Text-to-Speech
|
|
356
|
+
```ruby
|
|
357
|
+
# Convert text to speech
|
|
358
|
+
result = engine.call_worker(:tts_synthesizer, {
|
|
359
|
+
text: "Welcome to SmartPrompt, your AI assistant",
|
|
360
|
+
voice: "alloy",
|
|
361
|
+
speed: 1.0,
|
|
362
|
+
save_to_file: true,
|
|
363
|
+
output_dir: "./generated_audio"
|
|
364
|
+
})
|
|
365
|
+
|
|
366
|
+
puts "Audio file created: #{result[:audio_file][:file_path]}"
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
#### Speech-to-Text
|
|
370
|
+
```ruby
|
|
371
|
+
# Transcribe audio to text
|
|
372
|
+
result = engine.call_worker(:stt_transcriber, {
|
|
373
|
+
audio_file: "./audio.wav",
|
|
374
|
+
language: "en",
|
|
375
|
+
response_format: "json"
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
puts "Transcribed text: #{result[:transcription][:text]}"
|
|
379
|
+
puts "Language: #{result[:transcription][:language]}"
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
#### Vision Analysis
|
|
383
|
+
```ruby
|
|
384
|
+
# Analyze image with vision model
|
|
385
|
+
result = engine.call_worker(:vision_analyzer, {
|
|
386
|
+
image_file: "./image.jpg",
|
|
387
|
+
prompt: "Describe what you see in this image"
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
puts "Analysis: #{result[:response]}"
|
|
391
|
+
```
|
|
392
|
+
|
|
268
393
|
## 🏗️ Architecture Overview
|
|
269
394
|
|
|
270
395
|
SmartPrompt follows a modular architecture:
|
|
@@ -282,6 +407,13 @@ SmartPrompt follows a modular architecture:
|
|
|
282
407
|
│Workers│ │Conv.│ │Template│
|
|
283
408
|
│ │ │Mgmt │ │ System │
|
|
284
409
|
└───────┘ └─────┘ └────────┘
|
|
410
|
+
│
|
|
411
|
+
┌────────┴────────┐
|
|
412
|
+
│ │
|
|
413
|
+
┌───▼────────┐ ┌─────▼──────┐
|
|
414
|
+
│ History │ │Persistence │
|
|
415
|
+
│ Manager │ │ Layer │
|
|
416
|
+
└────────────┘ └────────────┘
|
|
285
417
|
```
|
|
286
418
|
|
|
287
419
|
### Core Components
|
|
@@ -289,8 +421,10 @@ SmartPrompt follows a modular architecture:
|
|
|
289
421
|
- **Engine**: Central orchestrator managing configuration, adapters, and workers
|
|
290
422
|
- **Workers**: Reusable task definitions with embedded business logic
|
|
291
423
|
- **Conversation**: Context and message history management
|
|
292
|
-
- **
|
|
424
|
+
- **History Manager**: Intelligent conversation history with session isolation and context strategies
|
|
425
|
+
- **Adapters**: LLM provider integrations (OpenAI, Anthropic, Llama.cpp, etc.)
|
|
293
426
|
- **Templates**: ERB-based prompt template system
|
|
427
|
+
- **Persistence Layer**: Save and restore conversation history across restarts
|
|
294
428
|
|
|
295
429
|
## 🔧 Configuration Reference
|
|
296
430
|
|
|
@@ -298,20 +432,177 @@ SmartPrompt follows a modular architecture:
|
|
|
298
432
|
|
|
299
433
|
```yaml
|
|
300
434
|
adapters:
|
|
301
|
-
openai: "OpenAIAdapter"
|
|
435
|
+
openai: "OpenAIAdapter" # For OpenAI API
|
|
436
|
+
anthropic: "AnthropicAdapter" # For Anthropic Claude API
|
|
437
|
+
sensenova: "SenseNovaAdapter" # For 商汤 SenseNova (chat/vision/embeddings/image)
|
|
438
|
+
zhipu: "ZhipuAIAdapter" # For 智谱 BigModel/GLM (chat/vision/embed/image/video/tts/asr)
|
|
439
|
+
multimodal: "MultimodalAdapter" # For vision models
|
|
440
|
+
image_generation: "ImageGenerationAdapter" # For image generation
|
|
441
|
+
video_generation: "VideoGenerationAdapter" # For video generation
|
|
442
|
+
tts: "TTSAdapter" # For text-to-speech
|
|
443
|
+
stt: "STTAdapter" # For speech-to-text
|
|
302
444
|
```
|
|
303
445
|
|
|
304
446
|
### LLM Configuration
|
|
305
447
|
|
|
306
448
|
```yaml
|
|
307
449
|
llms:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
model: "
|
|
450
|
+
# Text models
|
|
451
|
+
gpt:
|
|
452
|
+
adapter: "openai"
|
|
453
|
+
api_key: ENV["OPENAI_API_KEY"]
|
|
454
|
+
model: "gpt-4"
|
|
455
|
+
temperature: 0.7
|
|
456
|
+
|
|
457
|
+
# Anthropic Claude models
|
|
458
|
+
claude:
|
|
459
|
+
adapter: "anthropic"
|
|
460
|
+
api_key: ENV["ANTHROPIC_API_KEY"]
|
|
461
|
+
model: "claude-3-5-sonnet-20241022"
|
|
462
|
+
temperature: 0.7
|
|
463
|
+
max_tokens: 4096
|
|
464
|
+
|
|
465
|
+
claude_opus:
|
|
466
|
+
adapter: "anthropic"
|
|
467
|
+
api_key: ENV["ANTHROPIC_API_KEY"]
|
|
468
|
+
model: "claude-3-opus-20240229"
|
|
469
|
+
temperature: 0.7
|
|
470
|
+
max_tokens: 4096
|
|
471
|
+
|
|
472
|
+
claude_haiku:
|
|
473
|
+
adapter: "anthropic"
|
|
474
|
+
api_key: ENV["ANTHROPIC_API_KEY"]
|
|
475
|
+
model: "claude-3-5-haiku-20241022"
|
|
476
|
+
temperature: 0.7
|
|
477
|
+
max_tokens: 4096
|
|
478
|
+
|
|
479
|
+
# Custom Anthropic endpoint (for proxy or custom deployment)
|
|
480
|
+
claude_custom:
|
|
481
|
+
adapter: "anthropic"
|
|
482
|
+
api_key: ENV["ANTHROPIC_API_KEY"]
|
|
483
|
+
url: "https://your-custom-endpoint.com"
|
|
484
|
+
model: "claude-3-5-sonnet-20241022"
|
|
485
|
+
temperature: 0.7
|
|
486
|
+
max_tokens: 4096
|
|
487
|
+
|
|
488
|
+
# 商汤 SenseNova — one adapter covers all four model categories; just change `model`.
|
|
489
|
+
# Free-tier models run on token.sensenova.cn/v1; paid models (SenseChat-5, SenseNova-V6-*
|
|
490
|
+
# , Cupido) run on api.sensenova.cn/compatible-mode/v2 (returns 403 if your key lacks them).
|
|
491
|
+
sensechat: # 商量 文本对话 (free-tier)
|
|
492
|
+
adapter: "sensenova"
|
|
493
|
+
url: "https://token.sensenova.cn/v1"
|
|
494
|
+
api_key: ENV["SENSENOVA_API_KEY"]
|
|
495
|
+
model: "sensenova-6.7-flash-lite"
|
|
496
|
+
temperature: 0.7
|
|
497
|
+
# Optional SenseNova sampling extras (forwarded to /chat/completions):
|
|
498
|
+
# reasoning_effort: "medium"
|
|
499
|
+
# max_completion_tokens: 4096
|
|
500
|
+
# Paid: url https://api.sensenova.cn/compatible-mode/v2, model SenseChat-5
|
|
501
|
+
|
|
502
|
+
sensevision: # 商量 图文多模态 (flash-lite is natively multimodal)
|
|
503
|
+
adapter: "sensenova"
|
|
504
|
+
url: "https://token.sensenova.cn/v1"
|
|
505
|
+
api_key: ENV["SENSENOVA_API_KEY"]
|
|
506
|
+
model: "sensenova-6.7-flash-lite"
|
|
507
|
+
# Paid: url https://api.sensenova.cn/compatible-mode/v2, model SenseNova-V6-Pro
|
|
508
|
+
|
|
509
|
+
senseembedding: # Cupido 向量模型 (paid; native endpoint)
|
|
510
|
+
adapter: "sensenova"
|
|
511
|
+
url: "https://api.sensenova.cn/compatible-mode/v2"
|
|
512
|
+
embeddings_url: "https://api.sensenova.cn/v1/llm/embeddings"
|
|
513
|
+
api_key: ENV["SENSENOVA_API_KEY"]
|
|
514
|
+
model: "Cupido"
|
|
515
|
+
|
|
516
|
+
senseimage: # 秒画 文生图 (sensenova-u1-fast; token.sensenova.cn base)
|
|
517
|
+
adapter: "sensenova"
|
|
518
|
+
url: "https://token.sensenova.cn/v1"
|
|
519
|
+
image_url: "https://token.sensenova.cn/v1/images/generations"
|
|
520
|
+
api_key: ENV["SENSENOVA_API_KEY"]
|
|
521
|
+
model: "sensenova-u1-fast"
|
|
522
|
+
# sensenova-u1-fast only accepts specific sizes (default 2048x2048); see
|
|
523
|
+
# VALID_IMAGE_SIZES in sensenova_adapter.rb.
|
|
524
|
+
|
|
525
|
+
# 智谱 AI (BigModel/GLM) — one adapter covers all categories; just change `model`.
|
|
526
|
+
# Base https://open.bigmodel.cn/api/paas/v4 ; Bearer auth. Defaults use free-tier models.
|
|
527
|
+
glm: # 文本对话 (free glm-4-flash; paid glm-4-plus/glm-5.2)
|
|
528
|
+
adapter: "zhipu"
|
|
529
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
530
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
531
|
+
model: "glm-4-flash"
|
|
313
532
|
temperature: 0.7
|
|
314
|
-
#
|
|
533
|
+
# CodeGeeX-4: set `coding: true` and model: codegeex-4 (uses the coding base)
|
|
534
|
+
|
|
535
|
+
glm_vision: # 图文多模态 (free glm-4v-flash; paid glm-4v-plus)
|
|
536
|
+
adapter: "zhipu"
|
|
537
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
538
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
539
|
+
model: "glm-4v-flash"
|
|
540
|
+
|
|
541
|
+
embedding: # 向量模型 (embedding-3; custom dimensions 256/512/1024/2048)
|
|
542
|
+
adapter: "zhipu"
|
|
543
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
544
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
545
|
+
model: "embedding-3"
|
|
546
|
+
dimensions: 1024
|
|
547
|
+
|
|
548
|
+
cogview: # 文生图 (free cogview-3-flash; paid cogview-4/glm-image)
|
|
549
|
+
adapter: "zhipu"
|
|
550
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
551
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
552
|
+
model: "cogview-3-flash"
|
|
553
|
+
|
|
554
|
+
cogvideo: # 文生视频 (async submit->poll->download; free cogvideox-flash)
|
|
555
|
+
adapter: "zhipu"
|
|
556
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
557
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
558
|
+
model: "cogvideox-flash"
|
|
559
|
+
|
|
560
|
+
glm_tts: # 语音合成 (GLM-TTS)
|
|
561
|
+
adapter: "zhipu"
|
|
562
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
563
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
564
|
+
model: "glm-tts"
|
|
565
|
+
|
|
566
|
+
glm_asr: # 语音识别 (GLM-ASR-2512)
|
|
567
|
+
adapter: "zhipu"
|
|
568
|
+
url: "https://open.bigmodel.cn/api/paas/v4"
|
|
569
|
+
api_key: ENV["ZHIPUAI_API_KEY"]
|
|
570
|
+
model: "glm-asr-2512"
|
|
571
|
+
|
|
572
|
+
# Vision models
|
|
573
|
+
vision:
|
|
574
|
+
adapter: "multimodal"
|
|
575
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
576
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
577
|
+
model: "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
578
|
+
|
|
579
|
+
# Image generation (Kolors supports batch_size/guidance_scale; see Qwen-Image for cfg)
|
|
580
|
+
image_gen:
|
|
581
|
+
adapter: "image_generation"
|
|
582
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
583
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
584
|
+
model: "Kwai-Kolors/Kolors"
|
|
585
|
+
|
|
586
|
+
# Video generation
|
|
587
|
+
video_gen:
|
|
588
|
+
adapter: "video_generation"
|
|
589
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
590
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
591
|
+
model: "Wan-AI/Wan2.2-T2V-A14B"
|
|
592
|
+
|
|
593
|
+
# Text-to-speech
|
|
594
|
+
tts_service:
|
|
595
|
+
adapter: "tts"
|
|
596
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
597
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
598
|
+
model: "FunAudioLLM/CosyVoice2-0.5B"
|
|
599
|
+
|
|
600
|
+
# Speech-to-text
|
|
601
|
+
stt_service:
|
|
602
|
+
adapter: "stt"
|
|
603
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
604
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
605
|
+
model: "FunAudioLLM/CosyVoice2-0.5B"
|
|
315
606
|
```
|
|
316
607
|
|
|
317
608
|
### Model Alias Configuration
|
|
@@ -398,20 +689,27 @@ end
|
|
|
398
689
|
## 🚀 Real-world Use Cases
|
|
399
690
|
|
|
400
691
|
- **Chatbots and Conversational AI**: Build sophisticated chatbots with context awareness
|
|
401
|
-
- **Content Generation**: Automated content creation with template-driven prompts
|
|
692
|
+
- **Content Generation**: Automated content creation with template-driven prompts
|
|
402
693
|
- **Code Analysis**: AI-powered code review and documentation generation
|
|
403
694
|
- **Customer Support**: Intelligent ticket routing and response suggestions
|
|
404
695
|
- **Data Processing**: LLM-powered data extraction and transformation
|
|
405
696
|
- **Educational Tools**: AI tutors and learning assistance systems
|
|
697
|
+
- **Multimedia Content Creation**: Generate images, videos, and audio content
|
|
698
|
+
- **Voice Interfaces**: Build voice-enabled applications with TTS and STT
|
|
699
|
+
- **Visual Analysis**: Image understanding and object detection applications
|
|
700
|
+
- **Accessibility Tools**: Audio descriptions, text-to-speech for visually impaired
|
|
406
701
|
|
|
407
702
|
## 🛣️ Roadmap
|
|
408
703
|
|
|
704
|
+
- [x] **Multimodal AI Support** - Vision, Image Generation, Video Generation, TTS, STT
|
|
409
705
|
- [ ] Additional LLM provider adapters (Anthropic Claude, Google PaLM)
|
|
410
706
|
- [ ] Visual prompt builder and management interface
|
|
411
707
|
- [ ] Enhanced caching and performance optimizations
|
|
412
708
|
- [ ] Integration with vector databases for RAG applications
|
|
413
709
|
- [ ] Built-in evaluation and testing framework for prompts
|
|
414
710
|
- [ ] Distributed worker execution support
|
|
711
|
+
- [ ] Real-time audio/video streaming support
|
|
712
|
+
- [ ] Advanced multimodal prompt chaining
|
|
415
713
|
|
|
416
714
|
## 🤝 Contributing
|
|
417
715
|
|
data/Rakefile
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "bundler/gem_tasks"
|
|
4
|
-
|
|
4
|
+
require "rake/testtask"
|
|
5
|
+
|
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
|
7
|
+
t.libs << "lib"
|
|
8
|
+
t.libs << "test"
|
|
9
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
|
10
|
+
t.verbose = true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
task default: :test
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Anthropic Configuration for SmartPrompt
|
|
2
|
+
# This configuration enables Anthropic Claude models
|
|
3
|
+
|
|
4
|
+
# Adapter definitions
|
|
5
|
+
adapters:
|
|
6
|
+
openai: "OpenAIAdapter"
|
|
7
|
+
anthropic: "AnthropicAdapter"
|
|
8
|
+
|
|
9
|
+
# LLM configurations
|
|
10
|
+
llms:
|
|
11
|
+
deepseek_anthropic:
|
|
12
|
+
adapter: anthropic
|
|
13
|
+
api_key: ENV["ANTHROPIC_AUTH_TOKEN"]
|
|
14
|
+
url: "https://api.deepseek.com/anthropic"
|
|
15
|
+
temperature: 0.7
|
|
16
|
+
max_tokens: 4096
|
|
17
|
+
deepseek:
|
|
18
|
+
adapter: openai
|
|
19
|
+
api_key: ENV["DSKEY"]
|
|
20
|
+
url: "https://api.deepseek.com"
|
|
21
|
+
|
|
22
|
+
# Path configurations
|
|
23
|
+
template_path: "./templates"
|
|
24
|
+
worker_path: "./workers"
|
|
25
|
+
logger_file: "./logs/smart_prompt.log"
|
|
26
|
+
|
|
27
|
+
# Advanced settings
|
|
28
|
+
advanced:
|
|
29
|
+
# Timeout settings (in seconds)
|
|
30
|
+
request_timeout: 240
|
|
31
|
+
connection_timeout: 30
|
|
32
|
+
|
|
33
|
+
# Retry settings
|
|
34
|
+
max_retries: 3
|
|
35
|
+
retry_delay: 2
|
|
36
|
+
|
|
37
|
+
# Rate limiting
|
|
38
|
+
requests_per_minute: 60
|
|
39
|
+
|
|
40
|
+
# History Management Configuration
|
|
41
|
+
# SmartPrompt provides intelligent conversation history management with session isolation,
|
|
42
|
+
# automatic compression, and multiple context strategies.
|
|
43
|
+
history:
|
|
44
|
+
# Cache Configuration
|
|
45
|
+
# Maximum number of sessions to keep in memory (LRU eviction)
|
|
46
|
+
cache_size: 100
|
|
47
|
+
|
|
48
|
+
# Default Session Configuration
|
|
49
|
+
# These settings apply to all sessions unless overridden
|
|
50
|
+
session_defaults:
|
|
51
|
+
max_messages: 100 # Maximum messages per session (older messages removed)
|
|
52
|
+
max_tokens: 4000 # Maximum tokens per session (enforced during context retrieval)
|
|
53
|
+
context_strategy: sliding_window # Default strategy: sliding_window, relevance_based, summary_based, hybrid
|
|
54
|
+
preserve_system_messages: true # Always keep system messages regardless of limits
|
|
55
|
+
|
|
56
|
+
# Context Strategy Configurations
|
|
57
|
+
# Each strategy has specific parameters for fine-tuning behavior
|
|
58
|
+
strategies:
|
|
59
|
+
# Sliding Window: Keep the most recent N messages
|
|
60
|
+
sliding_window:
|
|
61
|
+
window_size: 10 # Number of recent messages to keep
|
|
62
|
+
preserve_system: true # Always include system messages
|
|
63
|
+
|
|
64
|
+
# Relevance-Based: Select messages based on semantic similarity
|
|
65
|
+
relevance_based:
|
|
66
|
+
top_k: 10 # Number of most relevant messages to select
|
|
67
|
+
recency_weight: 0.3 # Weight for recency (0.0-1.0)
|
|
68
|
+
relevance_weight: 0.7 # Weight for relevance (0.0-1.0)
|
|
69
|
+
embedding_service: null # Optional: embedding service for semantic similarity
|
|
70
|
+
|
|
71
|
+
# Summary-Based: Automatically compress old messages into summaries
|
|
72
|
+
summary_based:
|
|
73
|
+
summary_threshold: 20 # Trigger summarization after this many messages
|
|
74
|
+
keep_recent: 5 # Number of recent messages to keep uncompressed
|
|
75
|
+
compression_ratio: 0.5 # Target compression ratio (0.0-1.0)
|
|
76
|
+
|
|
77
|
+
# Hybrid: Adaptively combine multiple strategies
|
|
78
|
+
hybrid:
|
|
79
|
+
mode: adaptive # Mode: 'adaptive' (auto-select) or 'combined' (merge results)
|
|
80
|
+
sliding_window: {} # Override sliding window config
|
|
81
|
+
relevance_based: {} # Override relevance-based config
|
|
82
|
+
summary_based: {} # Override summary-based config
|
|
83
|
+
|
|
84
|
+
# Compression Configuration
|
|
85
|
+
# Automatic summarization to reduce token usage
|
|
86
|
+
compression:
|
|
87
|
+
enabled: true # Enable automatic compression
|
|
88
|
+
auto_compress_threshold: 50 # Auto-compress when session exceeds this many messages
|
|
89
|
+
compression_ratio: 0.5 # Target compression ratio
|
|
90
|
+
llm_adapter: null # LLM to use for summarization (uses default if null)
|
|
91
|
+
|
|
92
|
+
# Persistence Configuration
|
|
93
|
+
# Save and restore conversation history across restarts
|
|
94
|
+
persistence:
|
|
95
|
+
enabled: true # Enable persistence to disk
|
|
96
|
+
backend: filesystem # Backend type: 'filesystem' (more backends coming soon)
|
|
97
|
+
storage_path: "./history_data" # Directory for storing session data
|
|
98
|
+
async: true # Use async writes for better performance
|
|
99
|
+
|
|
100
|
+
# Cleanup Configuration
|
|
101
|
+
# Automatic cleanup of old or expired sessions
|
|
102
|
+
cleanup:
|
|
103
|
+
auto_cleanup: false # Enable automatic cleanup thread
|
|
104
|
+
cleanup_interval: 3600 # Cleanup interval in seconds (1 hour)
|
|
105
|
+
session_ttl: 86400 # Session time-to-live in seconds (24 hours)
|
|
106
|
+
cleanup_callback: null # Optional: custom cleanup logic (Ruby proc)
|
|
107
|
+
|
|
108
|
+
# Monitoring Configuration
|
|
109
|
+
# Logging and metrics for debugging and monitoring
|
|
110
|
+
monitoring:
|
|
111
|
+
enabled: true # Enable monitoring and logging
|
|
112
|
+
log_level: info # Log level: debug, info, warn, error
|
|
113
|
+
metrics_format: prometheus # Metrics format: prometheus, json, hash
|
|
114
|
+
|
|
115
|
+
# Example Configurations for Different Use Cases:
|
|
116
|
+
#
|
|
117
|
+
# 1. High-Volume Chat Application (optimize for performance):
|
|
118
|
+
# cache_size: 1000
|
|
119
|
+
# session_defaults:
|
|
120
|
+
# max_messages: 50
|
|
121
|
+
# max_tokens: 2000
|
|
122
|
+
# context_strategy: sliding_window
|
|
123
|
+
# cleanup:
|
|
124
|
+
# auto_cleanup: true
|
|
125
|
+
# session_ttl: 3600 # 1 hour
|
|
126
|
+
#
|
|
127
|
+
# 2. Long-Running Conversations (optimize for context retention):
|
|
128
|
+
# session_defaults:
|
|
129
|
+
# max_messages: 500
|
|
130
|
+
# max_tokens: 16000
|
|
131
|
+
# context_strategy: summary_based
|
|
132
|
+
# compression:
|
|
133
|
+
# enabled: true
|
|
134
|
+
# auto_compress_threshold: 100
|
|
135
|
+
#
|
|
136
|
+
# 3. Semantic Search Application (optimize for relevance):
|
|
137
|
+
# session_defaults:
|
|
138
|
+
# context_strategy: relevance_based
|
|
139
|
+
# strategies:
|
|
140
|
+
# relevance_based:
|
|
141
|
+
# top_k: 20
|
|
142
|
+
# recency_weight: 0.2
|
|
143
|
+
# relevance_weight: 0.8
|
|
144
|
+
#
|
|
145
|
+
# 4. Development/Testing (disable persistence and cleanup):
|
|
146
|
+
# persistence:
|
|
147
|
+
# enabled: false
|
|
148
|
+
# cleanup:
|
|
149
|
+
# auto_cleanup: false
|
|
150
|
+
# monitoring:
|
|
151
|
+
# log_level: debug
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Configuration for SiliconFlow image generation.
|
|
2
|
+
#
|
|
3
|
+
# Get an API key from https://siliconflow.cn and export it as SILICONFLOW_API_KEY.
|
|
4
|
+
# Available image models: Kwai-Kolors/Kolors, Qwen/Qwen-Image,
|
|
5
|
+
# Qwen/Qwen-Image-Edit (image editing). See:
|
|
6
|
+
# https://api-docs.siliconflow.cn/docs/api/images-generations-post
|
|
7
|
+
|
|
8
|
+
adapters:
|
|
9
|
+
image_generation: "ImageGenerationAdapter"
|
|
10
|
+
|
|
11
|
+
llms:
|
|
12
|
+
image_gen:
|
|
13
|
+
adapter: "image_generation"
|
|
14
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
15
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
16
|
+
# Kolors supports batch_size, guidance_scale and a range of image_size values.
|
|
17
|
+
model: "Kwai-Kolors/Kolors"
|
|
18
|
+
|
|
19
|
+
default_llm: "image_gen"
|
|
20
|
+
template_path: "./templates"
|
|
21
|
+
worker_path: "./workers"
|
|
22
|
+
logger_file: "./logs/smart_prompt.log"
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Multimodal Configuration for SmartPrompt
|
|
2
|
+
# This configuration enables multimodal capabilities with SiliconFlow
|
|
3
|
+
|
|
4
|
+
# Adapter definitions
|
|
5
|
+
adapters:
|
|
6
|
+
openai: "OpenAIAdapter"
|
|
7
|
+
multimodal: "MultimodalAdapter"
|
|
8
|
+
|
|
9
|
+
# LLM configurations
|
|
10
|
+
llms:
|
|
11
|
+
# Multimodal models for vision and video understanding
|
|
12
|
+
qwen_vl:
|
|
13
|
+
adapter: "multimodal"
|
|
14
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
15
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
16
|
+
default_model: "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
17
|
+
temperature: 0.7
|
|
18
|
+
|
|
19
|
+
qwen_omni:
|
|
20
|
+
adapter: "multimodal"
|
|
21
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
22
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
23
|
+
default_model: "Qwen/Qwen3-Omni-7B-Instruct"
|
|
24
|
+
temperature: 0.7
|
|
25
|
+
|
|
26
|
+
deepseek_vl:
|
|
27
|
+
adapter: "multimodal"
|
|
28
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
29
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
30
|
+
default_model: "deepseek-ai/DeepSeek-VL2"
|
|
31
|
+
temperature: 0.7
|
|
32
|
+
|
|
33
|
+
# Text-only models for comparison
|
|
34
|
+
siliconflow_text:
|
|
35
|
+
adapter: "openai"
|
|
36
|
+
url: "https://api.siliconflow.cn/v1/"
|
|
37
|
+
api_key: ENV["SILICONFLOW_API_KEY"]
|
|
38
|
+
default_model: "Qwen/Qwen2.5-7B-Instruct"
|
|
39
|
+
temperature: 0.7
|
|
40
|
+
|
|
41
|
+
# Default settings
|
|
42
|
+
default_llm: "qwen_vl"
|
|
43
|
+
|
|
44
|
+
# Path configurations
|
|
45
|
+
template_path: "./templates"
|
|
46
|
+
worker_path: "./workers"
|
|
47
|
+
logger_file: "./logs/smart_prompt.log"
|
|
48
|
+
|
|
49
|
+
# Multimodal specific settings
|
|
50
|
+
multimodal:
|
|
51
|
+
# Default image detail level ("low", "high", "auto")
|
|
52
|
+
default_image_detail: "auto"
|
|
53
|
+
|
|
54
|
+
# Default video extraction settings
|
|
55
|
+
default_max_frames: 10
|
|
56
|
+
default_fps: 1
|
|
57
|
+
|
|
58
|
+
# Supported file formats
|
|
59
|
+
supported_image_formats:
|
|
60
|
+
- "jpg"
|
|
61
|
+
- "jpeg"
|
|
62
|
+
- "png"
|
|
63
|
+
- "gif"
|
|
64
|
+
- "bmp"
|
|
65
|
+
- "webp"
|
|
66
|
+
|
|
67
|
+
supported_video_formats:
|
|
68
|
+
- "mp4"
|
|
69
|
+
- "mov"
|
|
70
|
+
- "avi"
|
|
71
|
+
- "mkv"
|
|
72
|
+
- "webm"
|
|
73
|
+
|
|
74
|
+
# Advanced settings
|
|
75
|
+
advanced:
|
|
76
|
+
# Timeout settings (in seconds)
|
|
77
|
+
request_timeout: 240
|
|
78
|
+
connection_timeout: 30
|
|
79
|
+
|
|
80
|
+
# Retry settings
|
|
81
|
+
max_retries: 3
|
|
82
|
+
retry_delay: 2
|
|
83
|
+
|
|
84
|
+
# Rate limiting
|
|
85
|
+
requests_per_minute: 60
|