smart_prompt 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.cn.md +305 -11
  4. data/README.md +309 -11
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/docs/ANTHROPIC_EXAMPLES.md +559 -0
  12. data/docs/CONVERSATION_INTEGRATION_SUMMARY.md +155 -0
  13. data/docs/HISTORY_EXAMPLES_README.md +533 -0
  14. data/docs/HISTORY_MANAGEMENT_GUIDE.md +797 -0
  15. data/docs/MONITORING_GUIDE.md +278 -0
  16. data/docs/MULTIMODAL_README.md +265 -0
  17. data/docs/RELEVANCE_BASED_STRATEGY_IMPLEMENTATION.md +124 -0
  18. data/docs/STT_README.md +302 -0
  19. data/docs/TTS_README.md +303 -0
  20. data/docs/VIDEO_GENERATION_README.md +246 -0
  21. data/docs/delete_files_list.md +124 -0
  22. data/examples/anthropic_basic_chat.rb +143 -0
  23. data/examples/anthropic_example.rb +232 -0
  24. data/examples/anthropic_multimodal.rb +212 -0
  25. data/examples/anthropic_streaming.rb +312 -0
  26. data/examples/anthropic_tool_calling.rb +393 -0
  27. data/examples/automatic_cleanup_example.rb +109 -0
  28. data/examples/history_management_examples.rb +522 -0
  29. data/examples/image_generation_example.rb +130 -0
  30. data/examples/monitoring_example.rb +121 -0
  31. data/examples/multimodal_example.rb +63 -0
  32. data/examples/relevance_based_strategy_example.rb +87 -0
  33. data/examples/sensenova_example.rb +129 -0
  34. data/examples/stt_example.rb +287 -0
  35. data/examples/tts_example.rb +244 -0
  36. data/examples/video_generation_example.rb +189 -0
  37. data/examples/zhipu_example.rb +151 -0
  38. data/lib/smart_prompt/anthropic_adapter.rb +407 -298
  39. data/lib/smart_prompt/compression_engine.rb +201 -0
  40. data/lib/smart_prompt/context_strategy.rb +22 -0
  41. data/lib/smart_prompt/conversation.rb +47 -4
  42. data/lib/smart_prompt/engine.rb +29 -2
  43. data/lib/smart_prompt/history_manager.rb +596 -0
  44. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  45. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  46. data/lib/smart_prompt/lru_cache.rb +133 -0
  47. data/lib/smart_prompt/message.rb +57 -0
  48. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  49. data/lib/smart_prompt/persistence_layer.rb +197 -0
  50. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  51. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  52. data/lib/smart_prompt/session.rb +140 -0
  53. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  54. data/lib/smart_prompt/stt_adapter.rb +381 -0
  55. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  56. data/lib/smart_prompt/token_counter.rb +74 -0
  57. data/lib/smart_prompt/tts_adapter.rb +403 -0
  58. data/lib/smart_prompt/version.rb +1 -1
  59. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  60. data/lib/smart_prompt/worker.rb +28 -3
  61. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  62. data/lib/smart_prompt.rb +21 -0
  63. data/workers/history_management_examples.rb +407 -0
  64. data/workers/image_generation_workers.rb +119 -0
  65. data/workers/multimodal_workers.rb +110 -0
  66. data/workers/sensenova_workers.rb +62 -0
  67. data/workers/stt_workers.rb +195 -0
  68. data/workers/tts_workers.rb +388 -0
  69. data/workers/video_generation_workers.rb +264 -0
  70. data/workers/zhipu_workers.rb +113 -0
  71. metadata +88 -1
data/README.md CHANGED
@@ -11,13 +11,24 @@ SmartPrompt is a powerful Ruby gem that provides an elegant domain-specific lang
11
11
 
12
12
  ### Multi-LLM Support
13
13
  - **OpenAI API Compatible**: Full support for OpenAI GPT models and compatible APIs
14
- - **Llama.cpp Integration**: Direct integration with local Llama.cpp servers
14
+ - **Anthropic Claude**: Native support for Claude models with multimodal capabilities
15
+ - **SenseNova (商汤日日新)**: One adapter covers chat (商量), multimodal vision (图文多模态), Cupido embeddings (向量), and 秒画 text-to-image — see `examples/sensenova_example.rb`
16
+ - **智谱 AI (BigModel / GLM)**: One adapter covers all categories — chat (GLM-4), vision (GLM-4V), embeddings (embedding-3), text-to-image (CogView), text-to-video (CogVideoX), TTS (GLM-TTS), ASR (GLM-ASR) — see `examples/zhipu_example.rb`
17
+ - **Llama.cpp Integration**: Direct integration with local Llama.cpp servers
15
18
  - **Extensible Adapters**: Easy-to-extend adapter system for new LLM providers
16
19
  - **Unified Interface**: Same API regardless of the underlying LLM provider
17
20
 
21
+ ### Multimodal AI Capabilities
22
+ - **Vision Models**: Support for image understanding and analysis
23
+ - **Image Generation**: Create images from text prompts using diffusion models
24
+ - **Video Generation**: Generate videos from text or image prompts
25
+ - **Text-to-Speech**: Convert text to natural-sounding speech
26
+ - **Speech-to-Text**: Transcribe audio files to text with multi-language support
27
+
18
28
  ### Flexible Architecture
19
29
  - **Worker-based Tasks**: Define reusable workers for specific AI tasks
20
30
  - **Template System**: ERB-based prompt templates with parameter injection
31
+ - **Intelligent History Management**: Session isolation, automatic compression, and multiple context strategies
21
32
  - **Conversation Management**: Built-in conversation history and context management
22
33
  - **Streaming Support**: Real-time response streaming for better user experience
23
34
 
@@ -26,6 +37,8 @@ SmartPrompt is a powerful Ruby gem that provides an elegant domain-specific lang
26
37
  - **Retry Logic**: Robust error handling with configurable retry mechanisms
27
38
  - **Embeddings**: Text embedding generation for semantic search and RAG applications
28
39
  - **Configuration-driven**: YAML-based configuration for easy deployment management
40
+ - **Batch Processing**: Efficient processing of multiple files and tasks
41
+ - **Language Detection**: Automatic language identification from text and audio
29
42
 
30
43
  ### Production Ready
31
44
  - **Comprehensive Logging**: Detailed logging for debugging and monitoring
@@ -61,6 +74,7 @@ Create a YAML configuration file (`config/smart_prompt.yml`):
61
74
  # Adapter definitions
62
75
  adapters:
63
76
  openai: OpenAIAdapter
77
+ anthropic: AnthropicAdapter
64
78
  # LLM configurations
65
79
  llms:
66
80
  SiliconFlow:
@@ -68,7 +82,13 @@ llms:
68
82
  url: https://api.siliconflow.cn/v1/
69
83
  api_key: ENV["APIKey"]
70
84
  default_model: Qwen/Qwen2.5-7B-Instruct
71
- local:
85
+ claude:
86
+ adapter: anthropic
87
+ api_key: ENV["ANTHROPIC_API_KEY"]
88
+ model: claude-3-5-sonnet-20241022
89
+ temperature: 0.7
90
+ max_tokens: 4096
91
+ llamacpp:
72
92
  adapter: openai
73
93
  url: http://localhost:8080/
74
94
  ollama:
@@ -238,7 +258,10 @@ end
238
258
 
239
259
  ### Conversation History
240
260
 
261
+ SmartPrompt provides intelligent conversation history management with session isolation, automatic compression, and multiple context strategies.
262
+
241
263
  ```ruby
264
+ # Basic usage with automatic history management
242
265
  SmartPrompt.define_worker :conversational_chat do
243
266
  use "deepseek"
244
267
  model "deepseek-chat"
@@ -246,8 +269,38 @@ SmartPrompt.define_worker :conversational_chat do
246
269
  prompt(params[:message], with_history: true)
247
270
  send_msg
248
271
  end
272
+
273
+ # Advanced usage with explicit session management
274
+ SmartPrompt.define_worker :session_chat do
275
+ use "deepseek"
276
+ model "deepseek-chat"
277
+
278
+ # Use session_id for isolated conversations
279
+ session_id = params[:session_id] || "default"
280
+
281
+ # Configure session behavior
282
+ session_config = {
283
+ max_messages: 100,
284
+ max_tokens: 4000,
285
+ context_strategy: :sliding_window # or :relevance_based, :summary_based, :hybrid
286
+ }
287
+
288
+ sys_msg("You are a helpful assistant.", params)
289
+ prompt(params[:message], with_history: true)
290
+ params.merge(session_id: session_id, session_config: session_config)
291
+ send_msg
292
+ end
249
293
  ```
250
294
 
295
+ **History Management Features:**
296
+ - **Session Isolation**: Each conversation has independent history
297
+ - **Context Strategies**: Choose from sliding window, relevance-based, summary-based, or hybrid
298
+ - **Automatic Compression**: Reduce token usage while preserving context
299
+ - **Persistence**: Save and restore conversations across restarts
300
+ - **Performance**: LRU caching and async I/O for optimal performance
301
+
302
+ See [History Management Guide](HISTORY_MANAGEMENT_GUIDE.md) for detailed documentation.
303
+
251
304
  ### Embeddings Generation
252
305
 
253
306
  ```ruby
@@ -265,6 +318,78 @@ embeddings = engine.call_worker(:text_embedder, {
265
318
  })
266
319
  ```
267
320
 
321
+ ### Multimodal AI Examples
322
+
323
+ #### Image Generation
324
+ ```ruby
325
+ # Generate image from text prompt (SiliconFlow /v1/images/generations)
326
+ result = engine.call_worker(:image_generator, {
327
+ prompt: "A beautiful sunset over mountains",
328
+ image_size: "1024x1024", # "widthxheight"; aliases: size:
329
+ batch_size: 1, # only Kolors; aliases: n:
330
+ negative_prompt: "blurry, low quality",
331
+ save_to_file: true,
332
+ output_dir: "./generated_images"
333
+ })
334
+
335
+ puts "Generated #{result[:images].size} image(s)"
336
+ puts "First image URL: #{result[:images].first[:url]}"
337
+ puts "Saved files: #{result[:saved_files]}"
338
+ ```
339
+
340
+ #### Video Generation
341
+ ```ruby
342
+ # Generate video from text prompt
343
+ result = engine.call_worker(:video_generator, {
344
+ prompt: "A cat playing with a ball of yarn",
345
+ duration: 5,
346
+ resolution: "720p",
347
+ save_to_file: true,
348
+ output_dir: "./generated_videos"
349
+ })
350
+
351
+ puts "Video generation started: #{result[:video_id]}"
352
+ puts "Check status with: engine.call_worker(:video_status, {video_id: '#{result[:video_id]}'})"
353
+ ```
354
+
355
+ #### Text-to-Speech
356
+ ```ruby
357
+ # Convert text to speech
358
+ result = engine.call_worker(:tts_synthesizer, {
359
+ text: "Welcome to SmartPrompt, your AI assistant",
360
+ voice: "alloy",
361
+ speed: 1.0,
362
+ save_to_file: true,
363
+ output_dir: "./generated_audio"
364
+ })
365
+
366
+ puts "Audio file created: #{result[:audio_file][:file_path]}"
367
+ ```
368
+
369
+ #### Speech-to-Text
370
+ ```ruby
371
+ # Transcribe audio to text
372
+ result = engine.call_worker(:stt_transcriber, {
373
+ audio_file: "./audio.wav",
374
+ language: "en",
375
+ response_format: "json"
376
+ })
377
+
378
+ puts "Transcribed text: #{result[:transcription][:text]}"
379
+ puts "Language: #{result[:transcription][:language]}"
380
+ ```
381
+
382
+ #### Vision Analysis
383
+ ```ruby
384
+ # Analyze image with vision model
385
+ result = engine.call_worker(:vision_analyzer, {
386
+ image_file: "./image.jpg",
387
+ prompt: "Describe what you see in this image"
388
+ })
389
+
390
+ puts "Analysis: #{result[:response]}"
391
+ ```
392
+
268
393
  ## 🏗️ Architecture Overview
269
394
 
270
395
  SmartPrompt follows a modular architecture:
@@ -282,6 +407,13 @@ SmartPrompt follows a modular architecture:
282
407
  │Workers│ │Conv.│ │Template│
283
408
  │ │ │Mgmt │ │ System │
284
409
  └───────┘ └─────┘ └────────┘
410
+
411
+ ┌────────┴────────┐
412
+ │ │
413
+ ┌───▼────────┐ ┌─────▼──────┐
414
+ │ History │ │Persistence │
415
+ │ Manager │ │ Layer │
416
+ └────────────┘ └────────────┘
285
417
  ```
286
418
 
287
419
  ### Core Components
@@ -289,8 +421,10 @@ SmartPrompt follows a modular architecture:
289
421
  - **Engine**: Central orchestrator managing configuration, adapters, and workers
290
422
  - **Workers**: Reusable task definitions with embedded business logic
291
423
  - **Conversation**: Context and message history management
292
- - **Adapters**: LLM provider integrations (OpenAI, Llama.cpp, etc.)
424
+ - **History Manager**: Intelligent conversation history with session isolation and context strategies
425
+ - **Adapters**: LLM provider integrations (OpenAI, Anthropic, Llama.cpp, etc.)
293
426
  - **Templates**: ERB-based prompt template system
427
+ - **Persistence Layer**: Save and restore conversation history across restarts
294
428
 
295
429
  ## 🔧 Configuration Reference
296
430
 
@@ -298,20 +432,177 @@ SmartPrompt follows a modular architecture:
298
432
 
299
433
  ```yaml
300
434
  adapters:
301
- openai: "OpenAIAdapter" # For OpenAI API
435
+ openai: "OpenAIAdapter" # For OpenAI API
436
+ anthropic: "AnthropicAdapter" # For Anthropic Claude API
437
+ sensenova: "SenseNovaAdapter" # For 商汤 SenseNova (chat/vision/embeddings/image)
438
+ zhipu: "ZhipuAIAdapter" # For 智谱 BigModel/GLM (chat/vision/embed/image/video/tts/asr)
439
+ multimodal: "MultimodalAdapter" # For vision models
440
+ image_generation: "ImageGenerationAdapter" # For image generation
441
+ video_generation: "VideoGenerationAdapter" # For video generation
442
+ tts: "TTSAdapter" # For text-to-speech
443
+ stt: "STTAdapter" # For speech-to-text
302
444
  ```
303
445
 
304
446
  ### LLM Configuration
305
447
 
306
448
  ```yaml
307
449
  llms:
308
- model_name:
309
- adapter: "adapter_name"
310
- api_key: "your_api_key" # Can use ENV['KEY_NAME']
311
- url: "https://api.url"
312
- model: "model_identifier"
450
+ # Text models
451
+ gpt:
452
+ adapter: "openai"
453
+ api_key: ENV["OPENAI_API_KEY"]
454
+ model: "gpt-4"
455
+ temperature: 0.7
456
+
457
+ # Anthropic Claude models
458
+ claude:
459
+ adapter: "anthropic"
460
+ api_key: ENV["ANTHROPIC_API_KEY"]
461
+ model: "claude-3-5-sonnet-20241022"
462
+ temperature: 0.7
463
+ max_tokens: 4096
464
+
465
+ claude_opus:
466
+ adapter: "anthropic"
467
+ api_key: ENV["ANTHROPIC_API_KEY"]
468
+ model: "claude-3-opus-20240229"
469
+ temperature: 0.7
470
+ max_tokens: 4096
471
+
472
+ claude_haiku:
473
+ adapter: "anthropic"
474
+ api_key: ENV["ANTHROPIC_API_KEY"]
475
+ model: "claude-3-5-haiku-20241022"
476
+ temperature: 0.7
477
+ max_tokens: 4096
478
+
479
+ # Custom Anthropic endpoint (for proxy or custom deployment)
480
+ claude_custom:
481
+ adapter: "anthropic"
482
+ api_key: ENV["ANTHROPIC_API_KEY"]
483
+ url: "https://your-custom-endpoint.com"
484
+ model: "claude-3-5-sonnet-20241022"
485
+ temperature: 0.7
486
+ max_tokens: 4096
487
+
488
+ # 商汤 SenseNova — one adapter covers all four model categories; just change `model`.
489
+ # Free-tier models run on token.sensenova.cn/v1; paid models (SenseChat-5, SenseNova-V6-*
490
+ # , Cupido) run on api.sensenova.cn/compatible-mode/v2 (returns 403 if your key lacks them).
491
+ sensechat: # 商量 文本对话 (free-tier)
492
+ adapter: "sensenova"
493
+ url: "https://token.sensenova.cn/v1"
494
+ api_key: ENV["SENSENOVA_API_KEY"]
495
+ model: "sensenova-6.7-flash-lite"
496
+ temperature: 0.7
497
+ # Optional SenseNova sampling extras (forwarded to /chat/completions):
498
+ # reasoning_effort: "medium"
499
+ # max_completion_tokens: 4096
500
+ # Paid: url https://api.sensenova.cn/compatible-mode/v2, model SenseChat-5
501
+
502
+ sensevision: # 商量 图文多模态 (flash-lite is natively multimodal)
503
+ adapter: "sensenova"
504
+ url: "https://token.sensenova.cn/v1"
505
+ api_key: ENV["SENSENOVA_API_KEY"]
506
+ model: "sensenova-6.7-flash-lite"
507
+ # Paid: url https://api.sensenova.cn/compatible-mode/v2, model SenseNova-V6-Pro
508
+
509
+ senseembedding: # Cupido 向量模型 (paid; native endpoint)
510
+ adapter: "sensenova"
511
+ url: "https://api.sensenova.cn/compatible-mode/v2"
512
+ embeddings_url: "https://api.sensenova.cn/v1/llm/embeddings"
513
+ api_key: ENV["SENSENOVA_API_KEY"]
514
+ model: "Cupido"
515
+
516
+ senseimage: # 秒画 文生图 (sensenova-u1-fast; token.sensenova.cn base)
517
+ adapter: "sensenova"
518
+ url: "https://token.sensenova.cn/v1"
519
+ image_url: "https://token.sensenova.cn/v1/images/generations"
520
+ api_key: ENV["SENSENOVA_API_KEY"]
521
+ model: "sensenova-u1-fast"
522
+ # sensenova-u1-fast only accepts specific sizes (default 2048x2048); see
523
+ # VALID_IMAGE_SIZES in sensenova_adapter.rb.
524
+
525
+ # 智谱 AI (BigModel/GLM) — one adapter covers all categories; just change `model`.
526
+ # Base https://open.bigmodel.cn/api/paas/v4 ; Bearer auth. Defaults use free-tier models.
527
+ glm: # 文本对话 (free glm-4-flash; paid glm-4-plus/glm-5.2)
528
+ adapter: "zhipu"
529
+ url: "https://open.bigmodel.cn/api/paas/v4"
530
+ api_key: ENV["ZHIPUAI_API_KEY"]
531
+ model: "glm-4-flash"
313
532
  temperature: 0.7
314
- # Additional provider-specific options
533
+ # CodeGeeX-4: set `coding: true` and model: codegeex-4 (uses the coding base)
534
+
535
+ glm_vision: # 图文多模态 (free glm-4v-flash; paid glm-4v-plus)
536
+ adapter: "zhipu"
537
+ url: "https://open.bigmodel.cn/api/paas/v4"
538
+ api_key: ENV["ZHIPUAI_API_KEY"]
539
+ model: "glm-4v-flash"
540
+
541
+ embedding: # 向量模型 (embedding-3; custom dimensions 256/512/1024/2048)
542
+ adapter: "zhipu"
543
+ url: "https://open.bigmodel.cn/api/paas/v4"
544
+ api_key: ENV["ZHIPUAI_API_KEY"]
545
+ model: "embedding-3"
546
+ dimensions: 1024
547
+
548
+ cogview: # 文生图 (free cogview-3-flash; paid cogview-4/glm-image)
549
+ adapter: "zhipu"
550
+ url: "https://open.bigmodel.cn/api/paas/v4"
551
+ api_key: ENV["ZHIPUAI_API_KEY"]
552
+ model: "cogview-3-flash"
553
+
554
+ cogvideo: # 文生视频 (async submit->poll->download; free cogvideox-flash)
555
+ adapter: "zhipu"
556
+ url: "https://open.bigmodel.cn/api/paas/v4"
557
+ api_key: ENV["ZHIPUAI_API_KEY"]
558
+ model: "cogvideox-flash"
559
+
560
+ glm_tts: # 语音合成 (GLM-TTS)
561
+ adapter: "zhipu"
562
+ url: "https://open.bigmodel.cn/api/paas/v4"
563
+ api_key: ENV["ZHIPUAI_API_KEY"]
564
+ model: "glm-tts"
565
+
566
+ glm_asr: # 语音识别 (GLM-ASR-2512)
567
+ adapter: "zhipu"
568
+ url: "https://open.bigmodel.cn/api/paas/v4"
569
+ api_key: ENV["ZHIPUAI_API_KEY"]
570
+ model: "glm-asr-2512"
571
+
572
+ # Vision models
573
+ vision:
574
+ adapter: "multimodal"
575
+ url: "https://api.siliconflow.cn/v1/"
576
+ api_key: ENV["SILICONFLOW_API_KEY"]
577
+ model: "Qwen/Qwen2.5-VL-7B-Instruct"
578
+
579
+ # Image generation (Kolors supports batch_size/guidance_scale; see Qwen-Image for cfg)
580
+ image_gen:
581
+ adapter: "image_generation"
582
+ url: "https://api.siliconflow.cn/v1/"
583
+ api_key: ENV["SILICONFLOW_API_KEY"]
584
+ model: "Kwai-Kolors/Kolors"
585
+
586
+ # Video generation
587
+ video_gen:
588
+ adapter: "video_generation"
589
+ url: "https://api.siliconflow.cn/v1/"
590
+ api_key: ENV["SILICONFLOW_API_KEY"]
591
+ model: "Wan-AI/Wan2.2-T2V-A14B"
592
+
593
+ # Text-to-speech
594
+ tts_service:
595
+ adapter: "tts"
596
+ url: "https://api.siliconflow.cn/v1/"
597
+ api_key: ENV["SILICONFLOW_API_KEY"]
598
+ model: "FunAudioLLM/CosyVoice2-0.5B"
599
+
600
+ # Speech-to-text
601
+ stt_service:
602
+ adapter: "stt"
603
+ url: "https://api.siliconflow.cn/v1/"
604
+ api_key: ENV["SILICONFLOW_API_KEY"]
605
+ model: "FunAudioLLM/CosyVoice2-0.5B"
315
606
  ```
316
607
 
317
608
  ### Model Alias Configuration
@@ -398,20 +689,27 @@ end
398
689
  ## 🚀 Real-world Use Cases
399
690
 
400
691
  - **Chatbots and Conversational AI**: Build sophisticated chatbots with context awareness
401
- - **Content Generation**: Automated content creation with template-driven prompts
692
+ - **Content Generation**: Automated content creation with template-driven prompts
402
693
  - **Code Analysis**: AI-powered code review and documentation generation
403
694
  - **Customer Support**: Intelligent ticket routing and response suggestions
404
695
  - **Data Processing**: LLM-powered data extraction and transformation
405
696
  - **Educational Tools**: AI tutors and learning assistance systems
697
+ - **Multimedia Content Creation**: Generate images, videos, and audio content
698
+ - **Voice Interfaces**: Build voice-enabled applications with TTS and STT
699
+ - **Visual Analysis**: Image understanding and object detection applications
700
+ - **Accessibility Tools**: Audio descriptions, text-to-speech for visually impaired
406
701
 
407
702
  ## 🛣️ Roadmap
408
703
 
704
+ - [x] **Multimodal AI Support** - Vision, Image Generation, Video Generation, TTS, STT
409
705
  - [ ] Additional LLM provider adapters (Anthropic Claude, Google PaLM)
410
706
  - [ ] Visual prompt builder and management interface
411
707
  - [ ] Enhanced caching and performance optimizations
412
708
  - [ ] Integration with vector databases for RAG applications
413
709
  - [ ] Built-in evaluation and testing framework for prompts
414
710
  - [ ] Distributed worker execution support
711
+ - [ ] Real-time audio/video streaming support
712
+ - [ ] Advanced multimodal prompt chaining
415
713
 
416
714
  ## 🤝 Contributing
417
715
 
data/Rakefile CHANGED
@@ -1,4 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "bundler/gem_tasks"
4
- task default: %i[]
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "lib"
8
+ t.libs << "test"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ t.verbose = true
11
+ end
12
+
13
+ task default: :test
@@ -0,0 +1,151 @@
1
+ # Anthropic Configuration for SmartPrompt
2
+ # This configuration enables Anthropic Claude models
3
+
4
+ # Adapter definitions
5
+ adapters:
6
+ openai: "OpenAIAdapter"
7
+ anthropic: "AnthropicAdapter"
8
+
9
+ # LLM configurations
10
+ llms:
11
+ deepseek_anthropic:
12
+ adapter: anthropic
13
+ api_key: ENV["ANTHROPIC_AUTH_TOKEN"]
14
+ url: "https://api.deepseek.com/anthropic"
15
+ temperature: 0.7
16
+ max_tokens: 4096
17
+ deepseek:
18
+ adapter: openai
19
+ api_key: ENV["DSKEY"]
20
+ url: "https://api.deepseek.com"
21
+
22
+ # Path configurations
23
+ template_path: "./templates"
24
+ worker_path: "./workers"
25
+ logger_file: "./logs/smart_prompt.log"
26
+
27
+ # Advanced settings
28
+ advanced:
29
+ # Timeout settings (in seconds)
30
+ request_timeout: 240
31
+ connection_timeout: 30
32
+
33
+ # Retry settings
34
+ max_retries: 3
35
+ retry_delay: 2
36
+
37
+ # Rate limiting
38
+ requests_per_minute: 60
39
+
40
+ # History Management Configuration
41
+ # SmartPrompt provides intelligent conversation history management with session isolation,
42
+ # automatic compression, and multiple context strategies.
43
+ history:
44
+ # Cache Configuration
45
+ # Maximum number of sessions to keep in memory (LRU eviction)
46
+ cache_size: 100
47
+
48
+ # Default Session Configuration
49
+ # These settings apply to all sessions unless overridden
50
+ session_defaults:
51
+ max_messages: 100 # Maximum messages per session (older messages removed)
52
+ max_tokens: 4000 # Maximum tokens per session (enforced during context retrieval)
53
+ context_strategy: sliding_window # Default strategy: sliding_window, relevance_based, summary_based, hybrid
54
+ preserve_system_messages: true # Always keep system messages regardless of limits
55
+
56
+ # Context Strategy Configurations
57
+ # Each strategy has specific parameters for fine-tuning behavior
58
+ strategies:
59
+ # Sliding Window: Keep the most recent N messages
60
+ sliding_window:
61
+ window_size: 10 # Number of recent messages to keep
62
+ preserve_system: true # Always include system messages
63
+
64
+ # Relevance-Based: Select messages based on semantic similarity
65
+ relevance_based:
66
+ top_k: 10 # Number of most relevant messages to select
67
+ recency_weight: 0.3 # Weight for recency (0.0-1.0)
68
+ relevance_weight: 0.7 # Weight for relevance (0.0-1.0)
69
+ embedding_service: null # Optional: embedding service for semantic similarity
70
+
71
+ # Summary-Based: Automatically compress old messages into summaries
72
+ summary_based:
73
+ summary_threshold: 20 # Trigger summarization after this many messages
74
+ keep_recent: 5 # Number of recent messages to keep uncompressed
75
+ compression_ratio: 0.5 # Target compression ratio (0.0-1.0)
76
+
77
+ # Hybrid: Adaptively combine multiple strategies
78
+ hybrid:
79
+ mode: adaptive # Mode: 'adaptive' (auto-select) or 'combined' (merge results)
80
+ sliding_window: {} # Override sliding window config
81
+ relevance_based: {} # Override relevance-based config
82
+ summary_based: {} # Override summary-based config
83
+
84
+ # Compression Configuration
85
+ # Automatic summarization to reduce token usage
86
+ compression:
87
+ enabled: true # Enable automatic compression
88
+ auto_compress_threshold: 50 # Auto-compress when session exceeds this many messages
89
+ compression_ratio: 0.5 # Target compression ratio
90
+ llm_adapter: null # LLM to use for summarization (uses default if null)
91
+
92
+ # Persistence Configuration
93
+ # Save and restore conversation history across restarts
94
+ persistence:
95
+ enabled: true # Enable persistence to disk
96
+ backend: filesystem # Backend type: 'filesystem' (more backends coming soon)
97
+ storage_path: "./history_data" # Directory for storing session data
98
+ async: true # Use async writes for better performance
99
+
100
+ # Cleanup Configuration
101
+ # Automatic cleanup of old or expired sessions
102
+ cleanup:
103
+ auto_cleanup: false # Enable automatic cleanup thread
104
+ cleanup_interval: 3600 # Cleanup interval in seconds (1 hour)
105
+ session_ttl: 86400 # Session time-to-live in seconds (24 hours)
106
+ cleanup_callback: null # Optional: custom cleanup logic (Ruby proc)
107
+
108
+ # Monitoring Configuration
109
+ # Logging and metrics for debugging and monitoring
110
+ monitoring:
111
+ enabled: true # Enable monitoring and logging
112
+ log_level: info # Log level: debug, info, warn, error
113
+ metrics_format: prometheus # Metrics format: prometheus, json, hash
114
+
115
+ # Example Configurations for Different Use Cases:
116
+ #
117
+ # 1. High-Volume Chat Application (optimize for performance):
118
+ # cache_size: 1000
119
+ # session_defaults:
120
+ # max_messages: 50
121
+ # max_tokens: 2000
122
+ # context_strategy: sliding_window
123
+ # cleanup:
124
+ # auto_cleanup: true
125
+ # session_ttl: 3600 # 1 hour
126
+ #
127
+ # 2. Long-Running Conversations (optimize for context retention):
128
+ # session_defaults:
129
+ # max_messages: 500
130
+ # max_tokens: 16000
131
+ # context_strategy: summary_based
132
+ # compression:
133
+ # enabled: true
134
+ # auto_compress_threshold: 100
135
+ #
136
+ # 3. Semantic Search Application (optimize for relevance):
137
+ # session_defaults:
138
+ # context_strategy: relevance_based
139
+ # strategies:
140
+ # relevance_based:
141
+ # top_k: 20
142
+ # recency_weight: 0.2
143
+ # relevance_weight: 0.8
144
+ #
145
+ # 4. Development/Testing (disable persistence and cleanup):
146
+ # persistence:
147
+ # enabled: false
148
+ # cleanup:
149
+ # auto_cleanup: false
150
+ # monitoring:
151
+ # log_level: debug
@@ -0,0 +1,22 @@
1
+ # Configuration for SiliconFlow image generation.
2
+ #
3
+ # Get an API key from https://siliconflow.cn and export it as SILICONFLOW_API_KEY.
4
+ # Available image models: Kwai-Kolors/Kolors, Qwen/Qwen-Image,
5
+ # Qwen/Qwen-Image-Edit (image editing). See:
6
+ # https://api-docs.siliconflow.cn/docs/api/images-generations-post
7
+
8
+ adapters:
9
+ image_generation: "ImageGenerationAdapter"
10
+
11
+ llms:
12
+ image_gen:
13
+ adapter: "image_generation"
14
+ url: "https://api.siliconflow.cn/v1/"
15
+ api_key: ENV["SILICONFLOW_API_KEY"]
16
+ # Kolors supports batch_size, guidance_scale and a range of image_size values.
17
+ model: "Kwai-Kolors/Kolors"
18
+
19
+ default_llm: "image_gen"
20
+ template_path: "./templates"
21
+ worker_path: "./workers"
22
+ logger_file: "./logs/smart_prompt.log"
@@ -0,0 +1,85 @@
1
+ # Multimodal Configuration for SmartPrompt
2
+ # This configuration enables multimodal capabilities with SiliconFlow
3
+
4
+ # Adapter definitions
5
+ adapters:
6
+ openai: "OpenAIAdapter"
7
+ multimodal: "MultimodalAdapter"
8
+
9
+ # LLM configurations
10
+ llms:
11
+ # Multimodal models for vision and video understanding
12
+ qwen_vl:
13
+ adapter: "multimodal"
14
+ url: "https://api.siliconflow.cn/v1/"
15
+ api_key: ENV["SILICONFLOW_API_KEY"]
16
+ default_model: "Qwen/Qwen2.5-VL-7B-Instruct"
17
+ temperature: 0.7
18
+
19
+ qwen_omni:
20
+ adapter: "multimodal"
21
+ url: "https://api.siliconflow.cn/v1/"
22
+ api_key: ENV["SILICONFLOW_API_KEY"]
23
+ default_model: "Qwen/Qwen3-Omni-7B-Instruct"
24
+ temperature: 0.7
25
+
26
+ deepseek_vl:
27
+ adapter: "multimodal"
28
+ url: "https://api.siliconflow.cn/v1/"
29
+ api_key: ENV["SILICONFLOW_API_KEY"]
30
+ default_model: "deepseek-ai/DeepSeek-VL2"
31
+ temperature: 0.7
32
+
33
+ # Text-only models for comparison
34
+ siliconflow_text:
35
+ adapter: "openai"
36
+ url: "https://api.siliconflow.cn/v1/"
37
+ api_key: ENV["SILICONFLOW_API_KEY"]
38
+ default_model: "Qwen/Qwen2.5-7B-Instruct"
39
+ temperature: 0.7
40
+
41
+ # Default settings
42
+ default_llm: "qwen_vl"
43
+
44
+ # Path configurations
45
+ template_path: "./templates"
46
+ worker_path: "./workers"
47
+ logger_file: "./logs/smart_prompt.log"
48
+
49
+ # Multimodal specific settings
50
+ multimodal:
51
+ # Default image detail level ("low", "high", "auto")
52
+ default_image_detail: "auto"
53
+
54
+ # Default video extraction settings
55
+ default_max_frames: 10
56
+ default_fps: 1
57
+
58
+ # Supported file formats
59
+ supported_image_formats:
60
+ - "jpg"
61
+ - "jpeg"
62
+ - "png"
63
+ - "gif"
64
+ - "bmp"
65
+ - "webp"
66
+
67
+ supported_video_formats:
68
+ - "mp4"
69
+ - "mov"
70
+ - "avi"
71
+ - "mkv"
72
+ - "webm"
73
+
74
+ # Advanced settings
75
+ advanced:
76
+ # Timeout settings (in seconds)
77
+ request_timeout: 240
78
+ connection_timeout: 30
79
+
80
+ # Retry settings
81
+ max_retries: 3
82
+ retry_delay: 2
83
+
84
+ # Rate limiting
85
+ requests_per_minute: 60