desiru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +34 -0
- data/.rubocop.yml +7 -4
- data/.ruby-version +1 -0
- data/CLAUDE.md +4 -0
- data/Gemfile +21 -2
- data/Gemfile.lock +87 -12
- data/README.md +295 -2
- data/Rakefile +1 -0
- data/db/migrations/001_create_initial_tables.rb +96 -0
- data/db/migrations/002_create_job_results.rb +39 -0
- data/desiru.db +0 -0
- data/desiru.gemspec +2 -5
- data/docs/background_processing_roadmap.md +87 -0
- data/docs/job_scheduling.md +167 -0
- data/dspy-analysis-swarm.yml +60 -0
- data/dspy-feature-analysis.md +121 -0
- data/examples/README.md +69 -0
- data/examples/api_with_persistence.rb +122 -0
- data/examples/assertions_example.rb +232 -0
- data/examples/async_processing.rb +2 -0
- data/examples/few_shot_learning.rb +1 -2
- data/examples/graphql_api.rb +4 -2
- data/examples/graphql_integration.rb +3 -3
- data/examples/graphql_optimization_summary.md +143 -0
- data/examples/graphql_performance_benchmark.rb +247 -0
- data/examples/persistence_example.rb +102 -0
- data/examples/react_agent.rb +203 -0
- data/examples/rest_api.rb +173 -0
- data/examples/rest_api_advanced.rb +333 -0
- data/examples/scheduled_job_example.rb +116 -0
- data/examples/simple_qa.rb +1 -2
- data/examples/sinatra_api.rb +109 -0
- data/examples/typed_signatures.rb +1 -2
- data/graphql_optimization_summary.md +53 -0
- data/lib/desiru/api/grape_integration.rb +284 -0
- data/lib/desiru/api/persistence_middleware.rb +148 -0
- data/lib/desiru/api/sinatra_integration.rb +217 -0
- data/lib/desiru/api.rb +42 -0
- data/lib/desiru/assertions.rb +74 -0
- data/lib/desiru/async_status.rb +65 -0
- data/lib/desiru/cache.rb +1 -1
- data/lib/desiru/configuration.rb +2 -1
- data/lib/desiru/errors.rb +160 -0
- data/lib/desiru/field.rb +17 -14
- data/lib/desiru/graphql/batch_loader.rb +85 -0
- data/lib/desiru/graphql/data_loader.rb +242 -75
- data/lib/desiru/graphql/enum_builder.rb +75 -0
- data/lib/desiru/graphql/executor.rb +37 -4
- data/lib/desiru/graphql/schema_generator.rb +62 -158
- data/lib/desiru/graphql/type_builder.rb +138 -0
- data/lib/desiru/graphql/type_cache_warmer.rb +91 -0
- data/lib/desiru/jobs/async_predict.rb +1 -1
- data/lib/desiru/jobs/base.rb +67 -0
- data/lib/desiru/jobs/batch_processor.rb +6 -6
- data/lib/desiru/jobs/retriable.rb +119 -0
- data/lib/desiru/jobs/retry_strategies.rb +169 -0
- data/lib/desiru/jobs/scheduler.rb +219 -0
- data/lib/desiru/jobs/webhook_notifier.rb +242 -0
- data/lib/desiru/models/anthropic.rb +164 -0
- data/lib/desiru/models/base.rb +37 -3
- data/lib/desiru/models/open_ai.rb +151 -0
- data/lib/desiru/models/open_router.rb +161 -0
- data/lib/desiru/module.rb +59 -9
- data/lib/desiru/modules/chain_of_thought.rb +3 -3
- data/lib/desiru/modules/majority.rb +51 -0
- data/lib/desiru/modules/multi_chain_comparison.rb +204 -0
- data/lib/desiru/modules/predict.rb +8 -1
- data/lib/desiru/modules/program_of_thought.rb +139 -0
- data/lib/desiru/modules/react.rb +273 -0
- data/lib/desiru/modules/retrieve.rb +4 -2
- data/lib/desiru/optimizers/base.rb +2 -4
- data/lib/desiru/optimizers/bootstrap_few_shot.rb +2 -2
- data/lib/desiru/optimizers/copro.rb +268 -0
- data/lib/desiru/optimizers/knn_few_shot.rb +185 -0
- data/lib/desiru/persistence/database.rb +71 -0
- data/lib/desiru/persistence/models/api_request.rb +38 -0
- data/lib/desiru/persistence/models/job_result.rb +138 -0
- data/lib/desiru/persistence/models/module_execution.rb +37 -0
- data/lib/desiru/persistence/models/optimization_result.rb +28 -0
- data/lib/desiru/persistence/models/training_example.rb +25 -0
- data/lib/desiru/persistence/models.rb +11 -0
- data/lib/desiru/persistence/repositories/api_request_repository.rb +98 -0
- data/lib/desiru/persistence/repositories/base_repository.rb +77 -0
- data/lib/desiru/persistence/repositories/job_result_repository.rb +116 -0
- data/lib/desiru/persistence/repositories/module_execution_repository.rb +85 -0
- data/lib/desiru/persistence/repositories/optimization_result_repository.rb +67 -0
- data/lib/desiru/persistence/repositories/training_example_repository.rb +102 -0
- data/lib/desiru/persistence/repository.rb +29 -0
- data/lib/desiru/persistence/setup.rb +77 -0
- data/lib/desiru/persistence.rb +49 -0
- data/lib/desiru/registry.rb +3 -5
- data/lib/desiru/signature.rb +91 -24
- data/lib/desiru/version.rb +1 -1
- data/lib/desiru.rb +23 -8
- data/missing-features-analysis.md +192 -0
- metadata +63 -45
- data/lib/desiru/models/raix_adapter.rb +0 -210
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
# API Requests table
|
6
|
+
create_table(:api_requests) do
|
7
|
+
primary_key :id
|
8
|
+
String :method, null: false
|
9
|
+
String :path, null: false
|
10
|
+
String :remote_ip
|
11
|
+
Integer :status_code, null: false
|
12
|
+
Float :response_time
|
13
|
+
String :headers, text: true # JSON
|
14
|
+
String :params, text: true # JSON
|
15
|
+
String :response_body, text: true # JSON
|
16
|
+
String :error_message
|
17
|
+
DateTime :created_at, null: false
|
18
|
+
DateTime :updated_at, null: false
|
19
|
+
|
20
|
+
index :path
|
21
|
+
index :status_code
|
22
|
+
index :created_at
|
23
|
+
end
|
24
|
+
|
25
|
+
# Module Executions table
|
26
|
+
create_table(:module_executions) do
|
27
|
+
primary_key :id
|
28
|
+
foreign_key :api_request_id, :api_requests, on_delete: :cascade
|
29
|
+
String :module_name, null: false
|
30
|
+
String :module_type
|
31
|
+
String :status, null: false, default: 'pending'
|
32
|
+
String :inputs, text: true # JSON
|
33
|
+
String :outputs, text: true # JSON
|
34
|
+
String :metadata, text: true # JSON
|
35
|
+
String :error_message
|
36
|
+
String :error_backtrace, text: true
|
37
|
+
DateTime :started_at, null: false
|
38
|
+
DateTime :finished_at
|
39
|
+
DateTime :created_at, null: false
|
40
|
+
DateTime :updated_at, null: false
|
41
|
+
|
42
|
+
index :module_name
|
43
|
+
index :status
|
44
|
+
index :started_at
|
45
|
+
index %i[module_name status]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Optimization Results table
|
49
|
+
create_table(:optimization_results) do
|
50
|
+
primary_key :id
|
51
|
+
String :module_name, null: false
|
52
|
+
String :optimizer_type, null: false
|
53
|
+
Float :score, null: false
|
54
|
+
Float :baseline_score
|
55
|
+
Integer :training_size
|
56
|
+
Integer :validation_size
|
57
|
+
String :parameters, text: true # JSON
|
58
|
+
String :metrics, text: true # JSON
|
59
|
+
String :best_prompts, text: true # JSON
|
60
|
+
DateTime :started_at
|
61
|
+
DateTime :finished_at
|
62
|
+
DateTime :created_at, null: false
|
63
|
+
DateTime :updated_at, null: false
|
64
|
+
|
65
|
+
index :module_name
|
66
|
+
index :optimizer_type
|
67
|
+
index :score
|
68
|
+
index %i[module_name optimizer_type]
|
69
|
+
end
|
70
|
+
|
71
|
+
# Training Examples table
|
72
|
+
create_table(:training_examples) do
|
73
|
+
primary_key :id
|
74
|
+
String :module_name, null: false
|
75
|
+
String :dataset_type, default: 'training'
|
76
|
+
String :inputs, text: true, null: false # JSON
|
77
|
+
String :expected_outputs, text: true # JSON
|
78
|
+
String :metadata, text: true # JSON
|
79
|
+
Integer :used_count, default: 0
|
80
|
+
DateTime :last_used_at
|
81
|
+
DateTime :created_at, null: false
|
82
|
+
DateTime :updated_at, null: false
|
83
|
+
|
84
|
+
index :module_name
|
85
|
+
index :dataset_type
|
86
|
+
index %i[module_name dataset_type]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
down do
|
91
|
+
drop_table(:training_examples)
|
92
|
+
drop_table(:optimization_results)
|
93
|
+
drop_table(:module_executions)
|
94
|
+
drop_table(:api_requests)
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
# Job Results table for persisting background job results
|
6
|
+
create_table(:job_results) do
|
7
|
+
primary_key :id
|
8
|
+
String :job_id, null: false, unique: true
|
9
|
+
String :job_class, null: false
|
10
|
+
String :queue, null: false
|
11
|
+
String :status, null: false, default: 'pending' # pending, processing, completed, failed
|
12
|
+
Integer :progress, default: 0
|
13
|
+
String :message
|
14
|
+
String :inputs, text: true # JSON
|
15
|
+
String :result, text: true # JSON
|
16
|
+
String :error_message
|
17
|
+
String :error_backtrace, text: true
|
18
|
+
Integer :retry_count, default: 0
|
19
|
+
DateTime :enqueued_at, null: false
|
20
|
+
DateTime :started_at
|
21
|
+
DateTime :finished_at
|
22
|
+
DateTime :expires_at
|
23
|
+
DateTime :created_at, null: false
|
24
|
+
DateTime :updated_at, null: false
|
25
|
+
|
26
|
+
index :job_id
|
27
|
+
index :job_class
|
28
|
+
index :status
|
29
|
+
index :queue
|
30
|
+
index :created_at
|
31
|
+
index :expires_at
|
32
|
+
index %i[job_class status]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
down do
|
37
|
+
drop_table(:job_results)
|
38
|
+
end
|
39
|
+
end
|
data/desiru.db
ADDED
Binary file
|
data/desiru.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
|
|
13
13
|
'enabling reliable, maintainable, and portable AI programming.'
|
14
14
|
spec.homepage = 'https://github.com/obie/desiru'
|
15
15
|
spec.license = 'MIT'
|
16
|
-
spec.required_ruby_version = '>= 3.
|
16
|
+
spec.required_ruby_version = '>= 3.3.0'
|
17
17
|
|
18
18
|
spec.metadata['homepage_uri'] = spec.homepage
|
19
19
|
spec.metadata['source_code_uri'] = 'https://github.com/obie/desiru'
|
@@ -36,9 +36,6 @@ Gem::Specification.new do |spec|
|
|
36
36
|
spec.add_dependency 'sidekiq', '~> 7.2'
|
37
37
|
spec.add_dependency 'singleton', '~> 0.1'
|
38
38
|
|
39
|
-
# Development dependencies
|
40
|
-
spec.add_development_dependency 'bundler', '~> 2.0'
|
41
|
-
spec.add_development_dependency 'rake', '~> 13.0'
|
42
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
39
|
+
# Development dependencies moved to Gemfile
|
43
40
|
spec.metadata['rubygems_mfa_required'] = 'false'
|
44
41
|
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# Background Processing Roadmap for Desiru
|
2
|
+
|
3
|
+
## Current State
|
4
|
+
|
5
|
+
The Desiru background processing system now includes:
|
6
|
+
- ✅ Sidekiq integration for job processing
|
7
|
+
- ✅ Redis for fast job status tracking
|
8
|
+
- ✅ Database persistence for long-term job result storage
|
9
|
+
- ✅ Async module capabilities
|
10
|
+
- ✅ Job status tracking and progress updates
|
11
|
+
- ✅ Batch processing support
|
12
|
+
|
13
|
+
## Proposed Enhancements
|
14
|
+
|
15
|
+
### 1. Advanced Retry Strategies (Priority: High)
|
16
|
+
Implement sophisticated retry mechanisms for failed jobs:
|
17
|
+
- **Exponential backoff** with jitter to prevent thundering herd
|
18
|
+
- **Circuit breaker pattern** for external service failures
|
19
|
+
- **Dead letter queue** for jobs that exceed retry limits
|
20
|
+
- **Custom retry policies** per job type
|
21
|
+
|
22
|
+
### 2. Job Scheduling and Cron Support (Priority: Medium)
|
23
|
+
Add support for scheduled and recurring jobs:
|
24
|
+
- **Cron-style scheduling** using Sidekiq-cron or similar
|
25
|
+
- **Delayed job execution** with precise timing
|
26
|
+
- **Recurring optimization tasks** for model improvements
|
27
|
+
- **Scheduled cleanup jobs** for expired data
|
28
|
+
|
29
|
+
### 3. Workflow Orchestration (Priority: Medium)
|
30
|
+
Enable complex multi-step workflows:
|
31
|
+
- **Job dependencies** - jobs that wait for others to complete
|
32
|
+
- **Parallel execution** with fan-out/fan-in patterns
|
33
|
+
- **Conditional branching** based on job results
|
34
|
+
- **Workflow visualization** and monitoring
|
35
|
+
|
36
|
+
### 4. Enhanced Monitoring and Alerting (Priority: High)
|
37
|
+
Improve visibility into job processing:
|
38
|
+
- **Real-time dashboards** for job metrics
|
39
|
+
- **Performance analytics** per job type
|
40
|
+
- **Alert thresholds** for queue depth and processing time
|
41
|
+
- **Integration with monitoring services** (Datadog, New Relic, etc.)
|
42
|
+
|
43
|
+
### 5. Webhook and Callback System (Priority: Low)
|
44
|
+
Notify external systems of job events:
|
45
|
+
- **Configurable webhooks** for job completion/failure
|
46
|
+
- **Event streaming** for real-time updates
|
47
|
+
- **Retry logic** for failed webhook deliveries
|
48
|
+
- **Security features** (HMAC signatures, etc.)
|
49
|
+
|
50
|
+
### 6. Resource Management (Priority: Medium)
|
51
|
+
Optimize resource usage:
|
52
|
+
- **Dynamic worker scaling** based on queue depth
|
53
|
+
- **Memory limits** per job type
|
54
|
+
- **CPU throttling** for resource-intensive jobs
|
55
|
+
- **Priority-based resource allocation**
|
56
|
+
|
57
|
+
### 7. Testing Improvements (Priority: High)
|
58
|
+
Enhance testing capabilities:
|
59
|
+
- **Job testing helpers** for easier unit tests
|
60
|
+
- **Performance benchmarking** framework
|
61
|
+
- **Chaos engineering** tools for resilience testing
|
62
|
+
- **Mock job execution** for integration tests
|
63
|
+
|
64
|
+
## Implementation Priority
|
65
|
+
|
66
|
+
1. **Phase 1** (Immediate):
|
67
|
+
- Advanced retry strategies
|
68
|
+
- Enhanced monitoring and alerting
|
69
|
+
- Testing improvements
|
70
|
+
|
71
|
+
2. **Phase 2** (Near-term):
|
72
|
+
- Job scheduling and cron support
|
73
|
+
- Workflow orchestration basics
|
74
|
+
- Resource management
|
75
|
+
|
76
|
+
3. **Phase 3** (Long-term):
|
77
|
+
- Full workflow orchestration
|
78
|
+
- Webhook and callback system
|
79
|
+
- Advanced resource optimization
|
80
|
+
|
81
|
+
## Benefits
|
82
|
+
|
83
|
+
- **Reliability**: Better retry strategies reduce job failures
|
84
|
+
- **Scalability**: Resource management enables efficient scaling
|
85
|
+
- **Visibility**: Enhanced monitoring provides operational insights
|
86
|
+
- **Flexibility**: Workflow orchestration enables complex use cases
|
87
|
+
- **Integration**: Webhooks allow seamless external system integration
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# Job Scheduling in Desiru
|
2
|
+
|
3
|
+
Desiru provides a built-in job scheduling system that allows you to run background jobs periodically without adding external dependencies like sidekiq-cron.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- Simple cron-like scheduling expressions
|
8
|
+
- Interval-based scheduling ("every 5 minutes")
|
9
|
+
- Standard cron expressions support
|
10
|
+
- Lightweight implementation with no external dependencies
|
11
|
+
- Thread-safe singleton scheduler
|
12
|
+
- Easy mixin for making jobs schedulable
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### Making a Job Schedulable
|
17
|
+
|
18
|
+
Include the `Schedulable` mixin in your job class:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
class MyPeriodicJob < Desiru::Jobs::Base
|
22
|
+
include Desiru::Jobs::Schedulable
|
23
|
+
|
24
|
+
def perform(job_id = nil)
|
25
|
+
# Your job logic here
|
26
|
+
puts "Running periodic job: #{job_id}"
|
27
|
+
|
28
|
+
# Store result if needed
|
29
|
+
store_result(job_id, { status: 'completed', timestamp: Time.now })
|
30
|
+
end
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
### Scheduling Jobs
|
35
|
+
|
36
|
+
#### Simple Interval Scheduling
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
# Run every 60 seconds
|
40
|
+
MyPeriodicJob.schedule(cron: '60')
|
41
|
+
|
42
|
+
# Run every 5 minutes
|
43
|
+
MyPeriodicJob.schedule(cron: 'every 5 minutes')
|
44
|
+
|
45
|
+
# Run every 2 hours
|
46
|
+
MyPeriodicJob.schedule(cron: 'every 2 hours')
|
47
|
+
|
48
|
+
# Run daily
|
49
|
+
MyPeriodicJob.schedule(cron: 'every 1 day')
|
50
|
+
```
|
51
|
+
|
52
|
+
#### Cron Expression Scheduling
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
# Run every minute
|
56
|
+
MyPeriodicJob.schedule(cron: '* * * * *')
|
57
|
+
|
58
|
+
# Run every hour at minute 0
|
59
|
+
MyPeriodicJob.schedule(cron: '0 * * * *')
|
60
|
+
|
61
|
+
# Run daily at 9:30 AM
|
62
|
+
MyPeriodicJob.schedule(cron: '30 9 * * *')
|
63
|
+
```
|
64
|
+
|
65
|
+
#### Advanced Scheduling Options
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
# Schedule with custom name
|
69
|
+
MyPeriodicJob.schedule(
|
70
|
+
name: 'custom_job_name',
|
71
|
+
cron: 'every 30 minutes'
|
72
|
+
)
|
73
|
+
|
74
|
+
# Schedule with arguments
|
75
|
+
MyPeriodicJob.schedule(
|
76
|
+
cron: 'every 1 hour',
|
77
|
+
args: ['arg1', 'arg2']
|
78
|
+
)
|
79
|
+
|
80
|
+
# Schedule with additional options
|
81
|
+
MyPeriodicJob.schedule(
|
82
|
+
name: 'important_job',
|
83
|
+
cron: '0 */6 * * *', # Every 6 hours
|
84
|
+
args: ['production'],
|
85
|
+
priority: 'high' # Additional options passed through
|
86
|
+
)
|
87
|
+
```
|
88
|
+
|
89
|
+
### Managing the Scheduler
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
scheduler = Desiru::Jobs::Scheduler.instance
|
93
|
+
|
94
|
+
# Start the scheduler
|
95
|
+
scheduler.start
|
96
|
+
|
97
|
+
# Check if scheduler is running
|
98
|
+
scheduler.running? # => true
|
99
|
+
|
100
|
+
# Stop the scheduler
|
101
|
+
scheduler.stop
|
102
|
+
|
103
|
+
# Get information about a scheduled job
|
104
|
+
info = scheduler.job_info('MyPeriodicJob')
|
105
|
+
# => { job_class: MyPeriodicJob, cron: '60', next_run: Time, ... }
|
106
|
+
|
107
|
+
# Clear all scheduled jobs
|
108
|
+
scheduler.clear
|
109
|
+
```
|
110
|
+
|
111
|
+
### Checking Job Status
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
# Check if a job is scheduled
|
115
|
+
MyPeriodicJob.scheduled? # => true
|
116
|
+
|
117
|
+
# Check by custom name
|
118
|
+
MyPeriodicJob.scheduled?(name: 'custom_job_name') # => true
|
119
|
+
|
120
|
+
# Unschedule a job
|
121
|
+
MyPeriodicJob.unschedule
|
122
|
+
|
123
|
+
# Unschedule by name
|
124
|
+
MyPeriodicJob.unschedule(name: 'custom_job_name')
|
125
|
+
```
|
126
|
+
|
127
|
+
## Supported Cron Formats
|
128
|
+
|
129
|
+
### Interval Expressions
|
130
|
+
|
131
|
+
- Simple seconds: `"60"` (runs every 60 seconds)
|
132
|
+
- Natural language: `"every N [second(s)|minute(s)|hour(s)|day(s)]"`
|
133
|
+
|
134
|
+
### Cron Expressions
|
135
|
+
|
136
|
+
Currently supports basic cron patterns:
|
137
|
+
|
138
|
+
- `* * * * *` - Every minute
|
139
|
+
- `0 * * * *` - Every hour at minute 0
|
140
|
+
- `30 10 * * *` - Daily at 10:30 AM
|
141
|
+
|
142
|
+
More complex cron patterns default to hourly execution.
|
143
|
+
|
144
|
+
## Implementation Details
|
145
|
+
|
146
|
+
The scheduler:
|
147
|
+
- Runs in a background thread
|
148
|
+
- Checks for jobs to run every second
|
149
|
+
- Generates unique job IDs for each scheduled execution
|
150
|
+
- Logs job execution and errors
|
151
|
+
- Handles job execution errors gracefully without stopping the scheduler
|
152
|
+
|
153
|
+
## Example
|
154
|
+
|
155
|
+
See `examples/scheduled_job_example.rb` for a complete working example of scheduled jobs.
|
156
|
+
|
157
|
+
## Best Practices
|
158
|
+
|
159
|
+
1. **Idempotent Jobs**: Make your scheduled jobs idempotent since they may run multiple times
|
160
|
+
2. **Error Handling**: Include proper error handling in your job's perform method
|
161
|
+
3. **Logging**: Use Desiru.logger for consistent logging
|
162
|
+
4. **Resource Cleanup**: Stop the scheduler gracefully when shutting down your application
|
163
|
+
5. **Monitoring**: Monitor scheduled job execution through job results and logs
|
164
|
+
|
165
|
+
## Integration with Sidekiq
|
166
|
+
|
167
|
+
The scheduler integrates seamlessly with Sidekiq. When a scheduled job's time comes, the scheduler calls `perform_async` on the job class, which enqueues it into Sidekiq for processing.
|
@@ -0,0 +1,60 @@
|
|
1
|
+
version: 1
|
2
|
+
swarm:
|
3
|
+
name: "DSPy Analysis & Documentation Team"
|
4
|
+
main: lead_analyst
|
5
|
+
instances:
|
6
|
+
lead_analyst:
|
7
|
+
description: "Lead analyst coordinating DSPy feature analysis and documentation strategy"
|
8
|
+
directory: .
|
9
|
+
model: opus
|
10
|
+
connections: [feature_researcher, integration_tester, documentation_writer]
|
11
|
+
prompt: "You are the lead analyst for a Ruby port of DSPy. Your role is to coordinate analysis of missing features compared to Python DSPy, oversee integration testing, and guide documentation preparation. Focus on strategic decisions and high-level architecture analysis. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
|
12
|
+
allowed_tools:
|
13
|
+
- Read
|
14
|
+
- Edit
|
15
|
+
- MultiEdit
|
16
|
+
- WebSearch
|
17
|
+
- WebFetch
|
18
|
+
- Bash
|
19
|
+
|
20
|
+
feature_researcher:
|
21
|
+
description: "DSPy expert researching Python DSPy features to identify gaps in Ruby implementation"
|
22
|
+
directory: .
|
23
|
+
model: opus
|
24
|
+
prompt: "You specialize in analyzing the Python DSPy library to identify missing features in this Ruby port. Research DSPy documentation, compare with current Ruby implementation, and document feature gaps. Focus on core DSPy concepts like modules, optimizers, retrievals, and signatures. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
|
25
|
+
allowed_tools:
|
26
|
+
- Read
|
27
|
+
- WebSearch
|
28
|
+
- WebFetch
|
29
|
+
- Edit
|
30
|
+
- MultiEdit
|
31
|
+
- Write
|
32
|
+
- Bash
|
33
|
+
|
34
|
+
integration_tester:
|
35
|
+
description: "Integration testing specialist ensuring all Ruby DSPy features work correctly through comprehensive tests"
|
36
|
+
directory: .
|
37
|
+
model: opus
|
38
|
+
connections: [feature_researcher]
|
39
|
+
prompt: "You are responsible for creating and running comprehensive integration tests to verify that all DSPy features work correctly in the Ruby implementation. Focus on end-to-end workflows, real API interactions, and complex module compositions. Use RSpec exclusively for all testing. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
|
40
|
+
allowed_tools:
|
41
|
+
- Read
|
42
|
+
- Edit
|
43
|
+
- MultiEdit
|
44
|
+
- Write
|
45
|
+
- Bash
|
46
|
+
|
47
|
+
documentation_writer:
|
48
|
+
description: "Technical writer preparing GitHub wiki documentation following DSPy documentation patterns"
|
49
|
+
directory: .
|
50
|
+
model: opus
|
51
|
+
connections: [feature_researcher]
|
52
|
+
prompt: "You create comprehensive GitHub wiki documentation for the Ruby DSPy port, following the structure and style of the original Python DSPy documentation. Focus on API references, usage examples, tutorials, and migration guides. Research the original DSPy docs for inspiration and maintain consistency with Ruby conventions. For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially."
|
53
|
+
allowed_tools:
|
54
|
+
- Read
|
55
|
+
- Write
|
56
|
+
- Edit
|
57
|
+
- MultiEdit
|
58
|
+
- WebSearch
|
59
|
+
- WebFetch
|
60
|
+
- Bash
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# DSPy Feature Analysis for Desiru Implementation
|
2
|
+
|
3
|
+
This document provides a comprehensive analysis of the Python DSPy library's core features, modules, and components to guide the Ruby implementation of Desiru.
|
4
|
+
|
5
|
+
## Core Concepts
|
6
|
+
|
7
|
+
### 1. Programming Model
|
8
|
+
- **Declarative Approach**: DSPy separates program flow (modules and logic) from parameters (prompts) that control LLM behavior
|
9
|
+
- **Compositional**: Build complex systems by composing simple modules
|
10
|
+
- **Self-Improving**: Programs can be automatically optimized through compilation
|
11
|
+
|
12
|
+
### 2. Signatures
|
13
|
+
- Function declarations that specify what a text transformation should do (not how)
|
14
|
+
- Format: `"input1, input2 -> output1, output2"`
|
15
|
+
- Examples:
|
16
|
+
- `"question -> answer"` for basic Q&A
|
17
|
+
- `"context, question -> answer"` for retrieval-augmented generation
|
18
|
+
- `"sentence -> sentiment: bool"` for classification
|
19
|
+
- Include field names and optional metadata
|
20
|
+
- Support type hints to shape LM behavior
|
21
|
+
|
22
|
+
### 3. Modules
|
23
|
+
Core building blocks inspired by PyTorch modules:
|
24
|
+
- **dspy.Predict**: Basic predictor, handles instructions and demonstrations
|
25
|
+
- **dspy.ChainOfThought**: Adds step-by-step reasoning before output
|
26
|
+
- **dspy.ProgramOfThought**: Outputs executable code
|
27
|
+
- **dspy.ReAct**: Agent that can use tools to implement signatures
|
28
|
+
- **dspy.MultiChainComparison**: Compares multiple ChainOfThought outputs
|
29
|
+
- **dspy.Retrieve**: Information retrieval module
|
30
|
+
- **dspy.BestOfN**: Runs module N times, returns best result
|
31
|
+
- **dspy.Refine**: Iterative refinement of outputs
|
32
|
+
|
33
|
+
### 4. Data Handling
|
34
|
+
- **Example**: Core data type, similar to Python dict with utilities
|
35
|
+
- **Prediction**: Special subclass of Example returned by modules
|
36
|
+
- Supports loading from HuggingFace datasets, CSV files
|
37
|
+
- Built-in train/test split capabilities
|
38
|
+
|
39
|
+
### 5. Metrics
|
40
|
+
- Functions that take (example, prediction, optional trace) and return a score
|
41
|
+
- Can be simple boolean checks or complex DSPy programs
|
42
|
+
- Used for both evaluation and optimization
|
43
|
+
- Support for LLM-as-Judge metrics
|
44
|
+
|
45
|
+
### 6. Optimizers (Teleprompters)
|
46
|
+
Automated prompt optimization strategies:
|
47
|
+
- **LabeledFewShot**: Uses provided labeled examples
|
48
|
+
- **BootstrapFewShot**: Generates demonstrations from program execution
|
49
|
+
- **BootstrapFewShotWithRandomSearch**: Multiple runs with random search
|
50
|
+
- **MIPROv2**: Advanced optimizer using Bayesian optimization
|
51
|
+
- **BootstrapFinetune**: Generates data for finetuning
|
52
|
+
- **COPRO**: Collaborative prompt optimization
|
53
|
+
- **KNNFewShot**: K-nearest neighbor example selection
|
54
|
+
- **Ensemble**: Combines multiple optimized programs
|
55
|
+
|
56
|
+
### 7. Compilation Process
|
57
|
+
1. **Bootstrapping**: Run program on training data to collect execution traces
|
58
|
+
2. **Filtering**: Keep only traces that pass the metric
|
59
|
+
3. **Demonstration Selection**: Choose best examples for few-shot prompts
|
60
|
+
4. **Instruction Generation**: Create optimized instructions (some optimizers)
|
61
|
+
5. **Parameter Updates**: Update module prompts and demonstrations
|
62
|
+
|
63
|
+
### 8. Assertions and Constraints
|
64
|
+
- **dspy.Assert**: Hard constraints that must be satisfied
|
65
|
+
- **dspy.Suggest**: Soft constraints for guidance
|
66
|
+
- **dspy.Refine**: Iterative refinement based on constraints
|
67
|
+
- **dspy.BestOfN**: Sample multiple outputs, select best
|
68
|
+
|
69
|
+
### 9. Retrieval and RAG
|
70
|
+
- Built-in support for retrieval-augmented generation
|
71
|
+
- **ColBERTv2** integration for semantic search
|
72
|
+
- Composable retrieval modules
|
73
|
+
- Support for various vector databases
|
74
|
+
|
75
|
+
### 10. Agent Capabilities
|
76
|
+
- **ReAct** module for tool use and multi-step reasoning
|
77
|
+
- Support for building complex agent loops
|
78
|
+
- Integration with external tools and APIs
|
79
|
+
|
80
|
+
## Key Architectural Patterns
|
81
|
+
|
82
|
+
1. **Separation of Concerns**: Program logic separate from LM parameters
|
83
|
+
2. **Modular Composition**: Build complex systems from simple modules
|
84
|
+
3. **Automatic Optimization**: Compile programs to improve performance
|
85
|
+
4. **Trace-Based Learning**: Learn from execution traces, not just outputs
|
86
|
+
5. **Metric-Driven Development**: Define success metrics, let DSPy optimize
|
87
|
+
|
88
|
+
## Implementation Priorities for Desiru
|
89
|
+
|
90
|
+
### Phase 1: Core Foundation
|
91
|
+
1. Signature parsing and representation
|
92
|
+
2. Basic Predict module
|
93
|
+
3. Example and Prediction data structures
|
94
|
+
4. Simple metrics system
|
95
|
+
|
96
|
+
### Phase 2: Essential Modules
|
97
|
+
1. ChainOfThought module
|
98
|
+
2. Basic optimizer (BootstrapFewShot)
|
99
|
+
3. Compilation infrastructure
|
100
|
+
4. Trace collection system
|
101
|
+
|
102
|
+
### Phase 3: Advanced Features
|
103
|
+
1. ReAct agent module
|
104
|
+
2. Retrieval modules
|
105
|
+
3. Advanced optimizers (MIPROv2)
|
106
|
+
4. Assertion system
|
107
|
+
|
108
|
+
### Phase 4: Ecosystem
|
109
|
+
1. Data loaders
|
110
|
+
2. Integration with Ruby ML libraries
|
111
|
+
3. Performance optimizations
|
112
|
+
4. Documentation and examples
|
113
|
+
|
114
|
+
## Design Considerations for Ruby
|
115
|
+
|
116
|
+
1. **Module System**: Leverage Ruby's module system for composability
|
117
|
+
2. **DSL**: Create Ruby-idiomatic DSL for signatures
|
118
|
+
3. **Blocks**: Use blocks for metric definitions
|
119
|
+
4. **Method Missing**: Consider for dynamic module composition
|
120
|
+
5. **Lazy Evaluation**: For efficient trace collection
|
121
|
+
6. **Concurrent Processing**: For parallel optimization runs
|
data/examples/README.md
CHANGED
@@ -39,6 +39,75 @@ Shows how to use the BootstrapFewShot optimizer to improve module performance wi
|
|
39
39
|
ruby examples/few_shot_learning.rb
|
40
40
|
```
|
41
41
|
|
42
|
+
### rest_api.rb
|
43
|
+
Creates a REST API server using Grape integration, exposing Desiru modules as HTTP endpoints.
|
44
|
+
|
45
|
+
```bash
|
46
|
+
ruby examples/rest_api.rb
|
47
|
+
# Visit http://localhost:9292 for API documentation
|
48
|
+
```
|
49
|
+
|
50
|
+
### rest_api_advanced.rb
|
51
|
+
Advanced REST API with authentication, rate limiting, and tool-using AI agents.
|
52
|
+
|
53
|
+
```bash
|
54
|
+
ruby examples/rest_api_advanced.rb
|
55
|
+
# API keys: demo-key-123, test-key-456
|
56
|
+
```
|
57
|
+
|
58
|
+
### sinatra_api.rb
|
59
|
+
Lightweight REST API using Sinatra integration as an alternative to Grape.
|
60
|
+
|
61
|
+
```bash
|
62
|
+
ruby examples/sinatra_api.rb
|
63
|
+
# Visit http://localhost:9293 for simpler API endpoints
|
64
|
+
```
|
65
|
+
|
66
|
+
### graphql_integration.rb
|
67
|
+
GraphQL API server with automatic schema generation from Desiru signatures.
|
68
|
+
|
69
|
+
```bash
|
70
|
+
ruby examples/graphql_integration.rb
|
71
|
+
# GraphiQL interface at http://localhost:9292/graphiql
|
72
|
+
```
|
73
|
+
|
74
|
+
### react_agent.rb
|
75
|
+
Demonstrates the ReAct module for building tool-using AI agents.
|
76
|
+
|
77
|
+
```bash
|
78
|
+
ruby examples/react_agent.rb
|
79
|
+
```
|
80
|
+
|
81
|
+
### async_processing.rb
|
82
|
+
Shows how to use background job processing with Sidekiq for long-running operations.
|
83
|
+
|
84
|
+
```bash
|
85
|
+
# Start Redis first
|
86
|
+
redis-server
|
87
|
+
|
88
|
+
# In another terminal, start Sidekiq workers
|
89
|
+
bundle exec sidekiq
|
90
|
+
|
91
|
+
# Run the example
|
92
|
+
ruby examples/async_processing.rb
|
93
|
+
```
|
94
|
+
|
95
|
+
### persistence_example.rb
|
96
|
+
Demonstrates Sequel-based persistence for tracking module executions, API requests, and training data.
|
97
|
+
|
98
|
+
```bash
|
99
|
+
ruby examples/persistence_example.rb
|
100
|
+
# Creates a SQLite database with execution history
|
101
|
+
```
|
102
|
+
|
103
|
+
### api_with_persistence.rb
|
104
|
+
REST API server with automatic request tracking and analytics dashboard.
|
105
|
+
|
106
|
+
```bash
|
107
|
+
ruby examples/api_with_persistence.rb
|
108
|
+
# Visit http://localhost:9294 for the dashboard
|
109
|
+
```
|
110
|
+
|
42
111
|
## Creating Your Own Examples
|
43
112
|
|
44
113
|
When creating new examples:
|