desiru 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.env.example +34 -0
  3. data/.rubocop.yml +7 -4
  4. data/.ruby-version +1 -0
  5. data/CLAUDE.md +4 -0
  6. data/Gemfile +21 -2
  7. data/Gemfile.lock +87 -12
  8. data/README.md +295 -2
  9. data/Rakefile +1 -0
  10. data/db/migrations/001_create_initial_tables.rb +96 -0
  11. data/db/migrations/002_create_job_results.rb +39 -0
  12. data/desiru.db +0 -0
  13. data/desiru.gemspec +2 -5
  14. data/docs/background_processing_roadmap.md +87 -0
  15. data/docs/job_scheduling.md +167 -0
  16. data/dspy-analysis-swarm.yml +60 -0
  17. data/dspy-feature-analysis.md +121 -0
  18. data/examples/README.md +69 -0
  19. data/examples/api_with_persistence.rb +122 -0
  20. data/examples/assertions_example.rb +232 -0
  21. data/examples/async_processing.rb +2 -0
  22. data/examples/few_shot_learning.rb +1 -2
  23. data/examples/graphql_api.rb +4 -2
  24. data/examples/graphql_integration.rb +3 -3
  25. data/examples/graphql_optimization_summary.md +143 -0
  26. data/examples/graphql_performance_benchmark.rb +247 -0
  27. data/examples/persistence_example.rb +102 -0
  28. data/examples/react_agent.rb +203 -0
  29. data/examples/rest_api.rb +173 -0
  30. data/examples/rest_api_advanced.rb +333 -0
  31. data/examples/scheduled_job_example.rb +116 -0
  32. data/examples/simple_qa.rb +1 -2
  33. data/examples/sinatra_api.rb +109 -0
  34. data/examples/typed_signatures.rb +1 -2
  35. data/graphql_optimization_summary.md +53 -0
  36. data/lib/desiru/api/grape_integration.rb +284 -0
  37. data/lib/desiru/api/persistence_middleware.rb +148 -0
  38. data/lib/desiru/api/sinatra_integration.rb +217 -0
  39. data/lib/desiru/api.rb +42 -0
  40. data/lib/desiru/assertions.rb +74 -0
  41. data/lib/desiru/async_status.rb +65 -0
  42. data/lib/desiru/cache.rb +1 -1
  43. data/lib/desiru/configuration.rb +2 -1
  44. data/lib/desiru/errors.rb +160 -0
  45. data/lib/desiru/field.rb +17 -14
  46. data/lib/desiru/graphql/batch_loader.rb +85 -0
  47. data/lib/desiru/graphql/data_loader.rb +242 -75
  48. data/lib/desiru/graphql/enum_builder.rb +75 -0
  49. data/lib/desiru/graphql/executor.rb +37 -4
  50. data/lib/desiru/graphql/schema_generator.rb +62 -158
  51. data/lib/desiru/graphql/type_builder.rb +138 -0
  52. data/lib/desiru/graphql/type_cache_warmer.rb +91 -0
  53. data/lib/desiru/jobs/async_predict.rb +1 -1
  54. data/lib/desiru/jobs/base.rb +67 -0
  55. data/lib/desiru/jobs/batch_processor.rb +6 -6
  56. data/lib/desiru/jobs/retriable.rb +119 -0
  57. data/lib/desiru/jobs/retry_strategies.rb +169 -0
  58. data/lib/desiru/jobs/scheduler.rb +219 -0
  59. data/lib/desiru/jobs/webhook_notifier.rb +242 -0
  60. data/lib/desiru/models/anthropic.rb +164 -0
  61. data/lib/desiru/models/base.rb +37 -3
  62. data/lib/desiru/models/open_ai.rb +151 -0
  63. data/lib/desiru/models/open_router.rb +161 -0
  64. data/lib/desiru/module.rb +59 -9
  65. data/lib/desiru/modules/chain_of_thought.rb +3 -3
  66. data/lib/desiru/modules/majority.rb +51 -0
  67. data/lib/desiru/modules/multi_chain_comparison.rb +204 -0
  68. data/lib/desiru/modules/predict.rb +8 -1
  69. data/lib/desiru/modules/program_of_thought.rb +139 -0
  70. data/lib/desiru/modules/react.rb +273 -0
  71. data/lib/desiru/modules/retrieve.rb +4 -2
  72. data/lib/desiru/optimizers/base.rb +2 -4
  73. data/lib/desiru/optimizers/bootstrap_few_shot.rb +2 -2
  74. data/lib/desiru/optimizers/copro.rb +268 -0
  75. data/lib/desiru/optimizers/knn_few_shot.rb +185 -0
  76. data/lib/desiru/persistence/database.rb +71 -0
  77. data/lib/desiru/persistence/models/api_request.rb +38 -0
  78. data/lib/desiru/persistence/models/job_result.rb +138 -0
  79. data/lib/desiru/persistence/models/module_execution.rb +37 -0
  80. data/lib/desiru/persistence/models/optimization_result.rb +28 -0
  81. data/lib/desiru/persistence/models/training_example.rb +25 -0
  82. data/lib/desiru/persistence/models.rb +11 -0
  83. data/lib/desiru/persistence/repositories/api_request_repository.rb +98 -0
  84. data/lib/desiru/persistence/repositories/base_repository.rb +77 -0
  85. data/lib/desiru/persistence/repositories/job_result_repository.rb +116 -0
  86. data/lib/desiru/persistence/repositories/module_execution_repository.rb +85 -0
  87. data/lib/desiru/persistence/repositories/optimization_result_repository.rb +67 -0
  88. data/lib/desiru/persistence/repositories/training_example_repository.rb +102 -0
  89. data/lib/desiru/persistence/repository.rb +29 -0
  90. data/lib/desiru/persistence/setup.rb +77 -0
  91. data/lib/desiru/persistence.rb +49 -0
  92. data/lib/desiru/registry.rb +3 -5
  93. data/lib/desiru/signature.rb +91 -24
  94. data/lib/desiru/version.rb +1 -1
  95. data/lib/desiru.rb +23 -8
  96. data/missing-features-analysis.md +192 -0
  97. metadata +63 -45
  98. data/lib/desiru/models/raix_adapter.rb +0 -210
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ # API Requests table
6
+ create_table(:api_requests) do
7
+ primary_key :id
8
+ String :method, null: false
9
+ String :path, null: false
10
+ String :remote_ip
11
+ Integer :status_code, null: false
12
+ Float :response_time
13
+ String :headers, text: true # JSON
14
+ String :params, text: true # JSON
15
+ String :response_body, text: true # JSON
16
+ String :error_message
17
+ DateTime :created_at, null: false
18
+ DateTime :updated_at, null: false
19
+
20
+ index :path
21
+ index :status_code
22
+ index :created_at
23
+ end
24
+
25
+ # Module Executions table
26
+ create_table(:module_executions) do
27
+ primary_key :id
28
+ foreign_key :api_request_id, :api_requests, on_delete: :cascade
29
+ String :module_name, null: false
30
+ String :module_type
31
+ String :status, null: false, default: 'pending'
32
+ String :inputs, text: true # JSON
33
+ String :outputs, text: true # JSON
34
+ String :metadata, text: true # JSON
35
+ String :error_message
36
+ String :error_backtrace, text: true
37
+ DateTime :started_at, null: false
38
+ DateTime :finished_at
39
+ DateTime :created_at, null: false
40
+ DateTime :updated_at, null: false
41
+
42
+ index :module_name
43
+ index :status
44
+ index :started_at
45
+ index %i[module_name status]
46
+ end
47
+
48
+ # Optimization Results table
49
+ create_table(:optimization_results) do
50
+ primary_key :id
51
+ String :module_name, null: false
52
+ String :optimizer_type, null: false
53
+ Float :score, null: false
54
+ Float :baseline_score
55
+ Integer :training_size
56
+ Integer :validation_size
57
+ String :parameters, text: true # JSON
58
+ String :metrics, text: true # JSON
59
+ String :best_prompts, text: true # JSON
60
+ DateTime :started_at
61
+ DateTime :finished_at
62
+ DateTime :created_at, null: false
63
+ DateTime :updated_at, null: false
64
+
65
+ index :module_name
66
+ index :optimizer_type
67
+ index :score
68
+ index %i[module_name optimizer_type]
69
+ end
70
+
71
+ # Training Examples table
72
+ create_table(:training_examples) do
73
+ primary_key :id
74
+ String :module_name, null: false
75
+ String :dataset_type, default: 'training'
76
+ String :inputs, text: true, null: false # JSON
77
+ String :expected_outputs, text: true # JSON
78
+ String :metadata, text: true # JSON
79
+ Integer :used_count, default: 0
80
+ DateTime :last_used_at
81
+ DateTime :created_at, null: false
82
+ DateTime :updated_at, null: false
83
+
84
+ index :module_name
85
+ index :dataset_type
86
+ index %i[module_name dataset_type]
87
+ end
88
+ end
89
+
90
+ down do
91
+ drop_table(:training_examples)
92
+ drop_table(:optimization_results)
93
+ drop_table(:module_executions)
94
+ drop_table(:api_requests)
95
+ end
96
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ up do
5
+ # Job Results table for persisting background job results
6
+ create_table(:job_results) do
7
+ primary_key :id
8
+ String :job_id, null: false, unique: true
9
+ String :job_class, null: false
10
+ String :queue, null: false
11
+ String :status, null: false, default: 'pending' # pending, processing, completed, failed
12
+ Integer :progress, default: 0
13
+ String :message
14
+ String :inputs, text: true # JSON
15
+ String :result, text: true # JSON
16
+ String :error_message
17
+ String :error_backtrace, text: true
18
+ Integer :retry_count, default: 0
19
+ DateTime :enqueued_at, null: false
20
+ DateTime :started_at
21
+ DateTime :finished_at
22
+ DateTime :expires_at
23
+ DateTime :created_at, null: false
24
+ DateTime :updated_at, null: false
25
+
26
+ index :job_id
27
+ index :job_class
28
+ index :status
29
+ index :queue
30
+ index :created_at
31
+ index :expires_at
32
+ index %i[job_class status]
33
+ end
34
+ end
35
+
36
+ down do
37
+ drop_table(:job_results)
38
+ end
39
+ end
data/desiru.db ADDED
Binary file
data/desiru.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
13
13
  'enabling reliable, maintainable, and portable AI programming.'
14
14
  spec.homepage = 'https://github.com/obie/desiru'
15
15
  spec.license = 'MIT'
16
- spec.required_ruby_version = '>= 3.4.2'
16
+ spec.required_ruby_version = '>= 3.3.0'
17
17
 
18
18
  spec.metadata['homepage_uri'] = spec.homepage
19
19
  spec.metadata['source_code_uri'] = 'https://github.com/obie/desiru'
@@ -36,9 +36,6 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency 'sidekiq', '~> 7.2'
37
37
  spec.add_dependency 'singleton', '~> 0.1'
38
38
 
39
- # Development dependencies (basic ones, others in Gemfile)
40
- spec.add_development_dependency 'bundler', '~> 2.0'
41
- spec.add_development_dependency 'rake', '~> 13.0'
42
- spec.add_development_dependency 'rspec', '~> 3.0'
39
+ # Development dependencies moved to Gemfile
43
40
  spec.metadata['rubygems_mfa_required'] = 'false'
44
41
  end
@@ -0,0 +1,87 @@
1
+ # Background Processing Roadmap for Desiru
2
+
3
+ ## Current State
4
+
5
+ The Desiru background processing system now includes:
6
+ - ✅ Sidekiq integration for job processing
7
+ - ✅ Redis for fast job status tracking
8
+ - ✅ Database persistence for long-term job result storage
9
+ - ✅ Async module capabilities
10
+ - ✅ Job status tracking and progress updates
11
+ - ✅ Batch processing support
12
+
13
+ ## Proposed Enhancements
14
+
15
+ ### 1. Advanced Retry Strategies (Priority: High)
16
+ Implement sophisticated retry mechanisms for failed jobs:
17
+ - **Exponential backoff** with jitter to prevent thundering herd
18
+ - **Circuit breaker pattern** for external service failures
19
+ - **Dead letter queue** for jobs that exceed retry limits
20
+ - **Custom retry policies** per job type
21
+
22
+ ### 2. Job Scheduling and Cron Support (Priority: Medium)
23
+ Add support for scheduled and recurring jobs:
24
+ - **Cron-style scheduling** using Sidekiq-cron or similar
25
+ - **Delayed job execution** with precise timing
26
+ - **Recurring optimization tasks** for model improvements
27
+ - **Scheduled cleanup jobs** for expired data
28
+
29
+ ### 3. Workflow Orchestration (Priority: Medium)
30
+ Enable complex multi-step workflows:
31
+ - **Job dependencies** - jobs that wait for others to complete
32
+ - **Parallel execution** with fan-out/fan-in patterns
33
+ - **Conditional branching** based on job results
34
+ - **Workflow visualization** and monitoring
35
+
36
+ ### 4. Enhanced Monitoring and Alerting (Priority: High)
37
+ Improve visibility into job processing:
38
+ - **Real-time dashboards** for job metrics
39
+ - **Performance analytics** per job type
40
+ - **Alert thresholds** for queue depth and processing time
41
+ - **Integration with monitoring services** (Datadog, New Relic, etc.)
42
+
43
+ ### 5. Webhook and Callback System (Priority: Low)
44
+ Notify external systems of job events:
45
+ - **Configurable webhooks** for job completion/failure
46
+ - **Event streaming** for real-time updates
47
+ - **Retry logic** for failed webhook deliveries
48
+ - **Security features** (HMAC signatures, etc.)
49
+
50
+ ### 6. Resource Management (Priority: Medium)
51
+ Optimize resource usage:
52
+ - **Dynamic worker scaling** based on queue depth
53
+ - **Memory limits** per job type
54
+ - **CPU throttling** for resource-intensive jobs
55
+ - **Priority-based resource allocation**
56
+
57
+ ### 7. Testing Improvements (Priority: High)
58
+ Enhance testing capabilities:
59
+ - **Job testing helpers** for easier unit tests
60
+ - **Performance benchmarking** framework
61
+ - **Chaos engineering** tools for resilience testing
62
+ - **Mock job execution** for integration tests
63
+
64
+ ## Implementation Priority
65
+
66
+ 1. **Phase 1** (Immediate):
67
+ - Advanced retry strategies
68
+ - Enhanced monitoring and alerting
69
+ - Testing improvements
70
+
71
+ 2. **Phase 2** (Near-term):
72
+ - Job scheduling and cron support
73
+ - Workflow orchestration basics
74
+ - Resource management
75
+
76
+ 3. **Phase 3** (Long-term):
77
+ - Full workflow orchestration
78
+ - Webhook and callback system
79
+ - Advanced resource optimization
80
+
81
+ ## Benefits
82
+
83
+ - **Reliability**: Better retry strategies reduce job failures
84
+ - **Scalability**: Resource management enables efficient scaling
85
+ - **Visibility**: Enhanced monitoring provides operational insights
86
+ - **Flexibility**: Workflow orchestration enables complex use cases
87
+ - **Integration**: Webhooks allow seamless external system integration
@@ -0,0 +1,167 @@
1
+ # Job Scheduling in Desiru
2
+
3
+ Desiru provides a built-in job scheduling system that allows you to run background jobs periodically without adding external dependencies like sidekiq-cron.
4
+
5
+ ## Features
6
+
7
+ - Simple cron-like scheduling expressions
8
+ - Interval-based scheduling ("every 5 minutes")
9
+ - Standard cron expressions support
10
+ - Lightweight implementation with no external dependencies
11
+ - Thread-safe singleton scheduler
12
+ - Easy mixin for making jobs schedulable
13
+
14
+ ## Usage
15
+
16
+ ### Making a Job Schedulable
17
+
18
+ Include the `Schedulable` mixin in your job class:
19
+
20
+ ```ruby
21
+ class MyPeriodicJob < Desiru::Jobs::Base
22
+ include Desiru::Jobs::Schedulable
23
+
24
+ def perform(job_id = nil)
25
+ # Your job logic here
26
+ puts "Running periodic job: #{job_id}"
27
+
28
+ # Store result if needed
29
+ store_result(job_id, { status: 'completed', timestamp: Time.now })
30
+ end
31
+ end
32
+ ```
33
+
34
+ ### Scheduling Jobs
35
+
36
+ #### Simple Interval Scheduling
37
+
38
+ ```ruby
39
+ # Run every 60 seconds
40
+ MyPeriodicJob.schedule(cron: '60')
41
+
42
+ # Run every 5 minutes
43
+ MyPeriodicJob.schedule(cron: 'every 5 minutes')
44
+
45
+ # Run every 2 hours
46
+ MyPeriodicJob.schedule(cron: 'every 2 hours')
47
+
48
+ # Run daily
49
+ MyPeriodicJob.schedule(cron: 'every 1 day')
50
+ ```
51
+
52
+ #### Cron Expression Scheduling
53
+
54
+ ```ruby
55
+ # Run every minute
56
+ MyPeriodicJob.schedule(cron: '* * * * *')
57
+
58
+ # Run every hour at minute 0
59
+ MyPeriodicJob.schedule(cron: '0 * * * *')
60
+
61
+ # Run daily at 9:30 AM
62
+ MyPeriodicJob.schedule(cron: '30 9 * * *')
63
+ ```
64
+
65
+ #### Advanced Scheduling Options
66
+
67
+ ```ruby
68
+ # Schedule with custom name
69
+ MyPeriodicJob.schedule(
70
+ name: 'custom_job_name',
71
+ cron: 'every 30 minutes'
72
+ )
73
+
74
+ # Schedule with arguments
75
+ MyPeriodicJob.schedule(
76
+ cron: 'every 1 hour',
77
+ args: ['arg1', 'arg2']
78
+ )
79
+
80
+ # Schedule with additional options
81
+ MyPeriodicJob.schedule(
82
+ name: 'important_job',
83
+ cron: '0 */6 * * *', # Every 6 hours
84
+ args: ['production'],
85
+ priority: 'high' # Additional options passed through
86
+ )
87
+ ```
88
+
89
+ ### Managing the Scheduler
90
+
91
+ ```ruby
92
+ scheduler = Desiru::Jobs::Scheduler.instance
93
+
94
+ # Start the scheduler
95
+ scheduler.start
96
+
97
+ # Check if scheduler is running
98
+ scheduler.running? # => true
99
+
100
+ # Stop the scheduler
101
+ scheduler.stop
102
+
103
+ # Get information about a scheduled job
104
+ info = scheduler.job_info('MyPeriodicJob')
105
+ # => { job_class: MyPeriodicJob, cron: '60', next_run: Time, ... }
106
+
107
+ # Clear all scheduled jobs
108
+ scheduler.clear
109
+ ```
110
+
111
+ ### Checking Job Status
112
+
113
+ ```ruby
114
+ # Check if a job is scheduled
115
+ MyPeriodicJob.scheduled? # => true
116
+
117
+ # Check by custom name
118
+ MyPeriodicJob.scheduled?(name: 'custom_job_name') # => true
119
+
120
+ # Unschedule a job
121
+ MyPeriodicJob.unschedule
122
+
123
+ # Unschedule by name
124
+ MyPeriodicJob.unschedule(name: 'custom_job_name')
125
+ ```
126
+
127
+ ## Supported Cron Formats
128
+
129
+ ### Interval Expressions
130
+
131
+ - Simple seconds: `"60"` (runs every 60 seconds)
132
+ - Natural language: `"every N [second(s)|minute(s)|hour(s)|day(s)]"`
133
+
134
+ ### Cron Expressions
135
+
136
+ Currently supports basic cron patterns:
137
+
138
+ - `* * * * *` - Every minute
139
+ - `0 * * * *` - Every hour at minute 0
140
+ - `30 10 * * *` - Daily at 10:30 AM
141
+
142
+ More complex cron patterns default to hourly execution.
143
+
144
+ ## Implementation Details
145
+
146
+ The scheduler:
147
+ - Runs in a background thread
148
+ - Checks for jobs to run every second
149
+ - Generates unique job IDs for each scheduled execution
150
+ - Logs job execution and errors
151
+ - Handles job execution errors gracefully without stopping the scheduler
152
+
153
+ ## Example
154
+
155
+ See `examples/scheduled_job_example.rb` for a complete working example of scheduled jobs.
156
+
157
+ ## Best Practices
158
+
159
+ 1. **Idempotent Jobs**: Make your scheduled jobs idempotent since they may run multiple times
160
+ 2. **Error Handling**: Include proper error handling in your job's perform method
161
+ 3. **Logging**: Use Desiru.logger for consistent logging
162
+ 4. **Resource Cleanup**: Stop the scheduler gracefully when shutting down your application
163
+ 5. **Monitoring**: Monitor scheduled job execution through job results and logs
164
+
165
+ ## Integration with Sidekiq
166
+
167
+ The scheduler integrates seamlessly with Sidekiq. When a scheduled job's time comes, the scheduler calls `perform_async` on the job class, which enqueues it into Sidekiq for processing.
@@ -0,0 +1,60 @@
1
+ version: 1
2
+ swarm:
3
+ name: "DSPy Analysis & Documentation Team"
4
+ main: lead_analyst
5
+ instances:
6
+ lead_analyst:
7
+ description: "Lead analyst coordinating DSPy feature analysis and documentation strategy"
8
+ directory: .
9
+ model: opus
10
+ connections: [feature_researcher, integration_tester, documentation_writer]
11
+ prompt: "You are the lead analyst for a Ruby port of DSPy. Your role is to coordinate analysis of missing features compared to Python DSPy, oversee integration testing, and guide documentation preparation. Focus on strategic decisions and high-level architecture analysis. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
12
+ allowed_tools:
13
+ - Read
14
+ - Edit
15
+ - MultiEdit
16
+ - WebSearch
17
+ - WebFetch
18
+ - Bash
19
+
20
+ feature_researcher:
21
+ description: "DSPy expert researching Python DSPy features to identify gaps in Ruby implementation"
22
+ directory: .
23
+ model: opus
24
+ prompt: "You specialize in analyzing the Python DSPy library to identify missing features in this Ruby port. Research DSPy documentation, compare with current Ruby implementation, and document feature gaps. Focus on core DSPy concepts like modules, optimizers, retrievals, and signatures. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
25
+ allowed_tools:
26
+ - Read
27
+ - WebSearch
28
+ - WebFetch
29
+ - Edit
30
+ - MultiEdit
31
+ - Write
32
+ - Bash
33
+
34
+ integration_tester:
35
+ description: "Integration testing specialist ensuring all Ruby DSPy features work correctly through comprehensive tests"
36
+ directory: .
37
+ model: opus
38
+ connections: [feature_researcher]
39
+ prompt: "You are responsible for creating and running comprehensive integration tests to verify that all DSPy features work correctly in the Ruby implementation. Focus on end-to-end workflows, real API interactions, and complex module compositions. Use RSpec exclusively for all testing. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
40
+ allowed_tools:
41
+ - Read
42
+ - Edit
43
+ - MultiEdit
44
+ - Write
45
+ - Bash
46
+
47
+ documentation_writer:
48
+ description: "Technical writer preparing GitHub wiki documentation following DSPy documentation patterns"
49
+ directory: .
50
+ model: opus
51
+ connections: [feature_researcher]
52
+ prompt: "You create comprehensive GitHub wiki documentation for the Ruby DSPy port, following the structure and style of the original Python DSPy documentation. Focus on API references, usage examples, tutorials, and migration guides. Research the original DSPy docs for inspiration and maintain consistency with Ruby conventions. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
53
+ allowed_tools:
54
+ - Read
55
+ - Write
56
+ - Edit
57
+ - MultiEdit
58
+ - WebSearch
59
+ - WebFetch
60
+ - Bash
@@ -0,0 +1,121 @@
1
+ # DSPy Feature Analysis for Desiru Implementation
2
+
3
+ This document provides a comprehensive analysis of the Python DSPy library's core features, modules, and components to guide the Ruby implementation of Desiru.
4
+
5
+ ## Core Concepts
6
+
7
+ ### 1. Programming Model
8
+ - **Declarative Approach**: DSPy separates program flow (modules and logic) from parameters (prompts) that control LLM behavior
9
+ - **Compositional**: Build complex systems by composing simple modules
10
+ - **Self-Improving**: Programs can be automatically optimized through compilation
11
+
12
+ ### 2. Signatures
13
+ - Function declarations that specify what a text transformation should do (not how)
14
+ - Format: `"input1, input2 -> output1, output2"`
15
+ - Examples:
16
+ - `"question -> answer"` for basic Q&A
17
+ - `"context, question -> answer"` for retrieval-augmented generation
18
+ - `"sentence -> sentiment: bool"` for classification
19
+ - Include field names and optional metadata
20
+ - Support type hints to shape LM behavior
21
+
22
+ ### 3. Modules
23
+ Core building blocks inspired by PyTorch modules:
24
+ - **dspy.Predict**: Basic predictor, handles instructions and demonstrations
25
+ - **dspy.ChainOfThought**: Adds step-by-step reasoning before output
26
+ - **dspy.ProgramOfThought**: Outputs executable code
27
+ - **dspy.ReAct**: Agent that can use tools to implement signatures
28
+ - **dspy.MultiChainComparison**: Compares multiple ChainOfThought outputs
29
+ - **dspy.Retrieve**: Information retrieval module
30
+ - **dspy.BestOfN**: Runs module N times, returns best result
31
+ - **dspy.Refine**: Iterative refinement of outputs
32
+
33
+ ### 4. Data Handling
34
+ - **Example**: Core data type, similar to Python dict with utilities
35
+ - **Prediction**: Special subclass of Example returned by modules
36
+ - Supports loading from HuggingFace datasets, CSV files
37
+ - Built-in train/test split capabilities
38
+
39
+ ### 5. Metrics
40
+ - Functions that take (example, prediction, optional trace) and return a score
41
+ - Can be simple boolean checks or complex DSPy programs
42
+ - Used for both evaluation and optimization
43
+ - Support for LLM-as-Judge metrics
44
+
45
+ ### 6. Optimizers (Teleprompters)
46
+ Automated prompt optimization strategies:
47
+ - **LabeledFewShot**: Uses provided labeled examples
48
+ - **BootstrapFewShot**: Generates demonstrations from program execution
49
+ - **BootstrapFewShotWithRandomSearch**: Multiple runs with random search
50
+ - **MIPROv2**: Advanced optimizer using Bayesian optimization
51
+ - **BootstrapFinetune**: Generates data for finetuning
52
+ - **COPRO**: Collaborative prompt optimization
53
+ - **KNNFewShot**: K-nearest neighbor example selection
54
+ - **Ensemble**: Combines multiple optimized programs
55
+
56
+ ### 7. Compilation Process
57
+ 1. **Bootstrapping**: Run program on training data to collect execution traces
58
+ 2. **Filtering**: Keep only traces that pass the metric
59
+ 3. **Demonstration Selection**: Choose best examples for few-shot prompts
60
+ 4. **Instruction Generation**: Create optimized instructions (some optimizers)
61
+ 5. **Parameter Updates**: Update module prompts and demonstrations
62
+
63
+ ### 8. Assertions and Constraints
64
+ - **dspy.Assert**: Hard constraints that must be satisfied
65
+ - **dspy.Suggest**: Soft constraints for guidance
66
+ - **dspy.Refine**: Iterative refinement based on constraints
67
+ - **dspy.BestOfN**: Sample multiple outputs, select best
68
+
69
+ ### 9. Retrieval and RAG
70
+ - Built-in support for retrieval-augmented generation
71
+ - **ColBERTv2** integration for semantic search
72
+ - Composable retrieval modules
73
+ - Support for various vector databases
74
+
75
+ ### 10. Agent Capabilities
76
+ - **ReAct** module for tool use and multi-step reasoning
77
+ - Support for building complex agent loops
78
+ - Integration with external tools and APIs
79
+
80
+ ## Key Architectural Patterns
81
+
82
+ 1. **Separation of Concerns**: Program logic separate from LM parameters
83
+ 2. **Modular Composition**: Build complex systems from simple modules
84
+ 3. **Automatic Optimization**: Compile programs to improve performance
85
+ 4. **Trace-Based Learning**: Learn from execution traces, not just outputs
86
+ 5. **Metric-Driven Development**: Define success metrics, let DSPy optimize
87
+
88
+ ## Implementation Priorities for Desiru
89
+
90
+ ### Phase 1: Core Foundation
91
+ 1. Signature parsing and representation
92
+ 2. Basic Predict module
93
+ 3. Example and Prediction data structures
94
+ 4. Simple metrics system
95
+
96
+ ### Phase 2: Essential Modules
97
+ 1. ChainOfThought module
98
+ 2. Basic optimizer (BootstrapFewShot)
99
+ 3. Compilation infrastructure
100
+ 4. Trace collection system
101
+
102
+ ### Phase 3: Advanced Features
103
+ 1. ReAct agent module
104
+ 2. Retrieval modules
105
+ 3. Advanced optimizers (MIPROv2)
106
+ 4. Assertion system
107
+
108
+ ### Phase 4: Ecosystem
109
+ 1. Data loaders
110
+ 2. Integration with Ruby ML libraries
111
+ 3. Performance optimizations
112
+ 4. Documentation and examples
113
+
114
+ ## Design Considerations for Ruby
115
+
116
+ 1. **Module System**: Leverage Ruby's module system for composability
117
+ 2. **DSL**: Create Ruby-idiomatic DSL for signatures
118
+ 3. **Blocks**: Use blocks for metric definitions
119
+ 4. **Method Missing**: Consider for dynamic module composition
120
+ 5. **Lazy Evaluation**: For efficient trace collection
121
+ 6. **Concurrent Processing**: For parallel optimization runs
data/examples/README.md CHANGED
@@ -39,6 +39,75 @@ Shows how to use the BootstrapFewShot optimizer to improve module performance wi
39
39
  ruby examples/few_shot_learning.rb
40
40
  ```
41
41
 
42
+ ### rest_api.rb
43
+ Creates a REST API server using Grape integration, exposing Desiru modules as HTTP endpoints.
44
+
45
+ ```bash
46
+ ruby examples/rest_api.rb
47
+ # Visit http://localhost:9292 for API documentation
48
+ ```
49
+
50
+ ### rest_api_advanced.rb
51
+ Advanced REST API with authentication, rate limiting, and tool-using AI agents.
52
+
53
+ ```bash
54
+ ruby examples/rest_api_advanced.rb
55
+ # API keys: demo-key-123, test-key-456
56
+ ```
57
+
58
+ ### sinatra_api.rb
59
+ Lightweight REST API using Sinatra integration as an alternative to Grape.
60
+
61
+ ```bash
62
+ ruby examples/sinatra_api.rb
63
+ # Visit http://localhost:9293 for simpler API endpoints
64
+ ```
65
+
66
+ ### graphql_integration.rb
67
+ GraphQL API server with automatic schema generation from Desiru signatures.
68
+
69
+ ```bash
70
+ ruby examples/graphql_integration.rb
71
+ # GraphiQL interface at http://localhost:9292/graphiql
72
+ ```
73
+
74
+ ### react_agent.rb
75
+ Demonstrates the ReAct module for building tool-using AI agents.
76
+
77
+ ```bash
78
+ ruby examples/react_agent.rb
79
+ ```
80
+
81
+ ### async_processing.rb
82
+ Shows how to use background job processing with Sidekiq for long-running operations.
83
+
84
+ ```bash
85
+ # Start Redis first
86
+ redis-server
87
+
88
+ # In another terminal, start Sidekiq workers
89
+ bundle exec sidekiq
90
+
91
+ # Run the example
92
+ ruby examples/async_processing.rb
93
+ ```
94
+
95
+ ### persistence_example.rb
96
+ Demonstrates Sequel-based persistence for tracking module executions, API requests, and training data.
97
+
98
+ ```bash
99
+ ruby examples/persistence_example.rb
100
+ # Creates a SQLite database with execution history
101
+ ```
102
+
103
+ ### api_with_persistence.rb
104
+ REST API server with automatic request tracking and analytics dashboard.
105
+
106
+ ```bash
107
+ ruby examples/api_with_persistence.rb
108
+ # Visit http://localhost:9294 for the dashboard
109
+ ```
110
+
42
111
  ## Creating Your Own Examples
43
112
 
44
113
  When creating new examples: