desiru 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/settings.local.json +11 -0
- data/.env.example +34 -0
- data/.rubocop.yml +7 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +73 -0
- data/CLAUDE.local.md +3 -0
- data/CLAUDE.md +10 -1
- data/Gemfile +21 -2
- data/Gemfile.lock +88 -13
- data/README.md +301 -2
- data/Rakefile +1 -0
- data/db/migrations/001_create_initial_tables.rb +96 -0
- data/db/migrations/002_create_job_results.rb +39 -0
- data/desiru-development-swarm.yml +185 -0
- data/desiru.db +0 -0
- data/desiru.gemspec +2 -5
- data/docs/background_processing_roadmap.md +87 -0
- data/docs/job_scheduling.md +167 -0
- data/dspy-analysis-swarm.yml +60 -0
- data/dspy-feature-analysis.md +121 -0
- data/examples/README.md +69 -0
- data/examples/api_with_persistence.rb +122 -0
- data/examples/assertions_example.rb +232 -0
- data/examples/async_processing.rb +2 -0
- data/examples/few_shot_learning.rb +1 -2
- data/examples/graphql_api.rb +4 -2
- data/examples/graphql_integration.rb +3 -3
- data/examples/graphql_optimization_summary.md +143 -0
- data/examples/graphql_performance_benchmark.rb +247 -0
- data/examples/persistence_example.rb +102 -0
- data/examples/react_agent.rb +203 -0
- data/examples/rest_api.rb +173 -0
- data/examples/rest_api_advanced.rb +333 -0
- data/examples/scheduled_job_example.rb +116 -0
- data/examples/simple_qa.rb +1 -2
- data/examples/sinatra_api.rb +109 -0
- data/examples/typed_signatures.rb +1 -2
- data/graphql_optimization_summary.md +53 -0
- data/lib/desiru/api/grape_integration.rb +284 -0
- data/lib/desiru/api/persistence_middleware.rb +148 -0
- data/lib/desiru/api/sinatra_integration.rb +217 -0
- data/lib/desiru/api.rb +42 -0
- data/lib/desiru/assertions.rb +74 -0
- data/lib/desiru/async_status.rb +65 -0
- data/lib/desiru/cache.rb +1 -1
- data/lib/desiru/configuration.rb +2 -1
- data/lib/desiru/core/compiler.rb +231 -0
- data/lib/desiru/core/example.rb +96 -0
- data/lib/desiru/core/prediction.rb +108 -0
- data/lib/desiru/core/trace.rb +330 -0
- data/lib/desiru/core/traceable.rb +61 -0
- data/lib/desiru/core.rb +12 -0
- data/lib/desiru/errors.rb +160 -0
- data/lib/desiru/field.rb +17 -14
- data/lib/desiru/graphql/batch_loader.rb +85 -0
- data/lib/desiru/graphql/data_loader.rb +242 -75
- data/lib/desiru/graphql/enum_builder.rb +75 -0
- data/lib/desiru/graphql/executor.rb +37 -4
- data/lib/desiru/graphql/schema_generator.rb +62 -158
- data/lib/desiru/graphql/type_builder.rb +138 -0
- data/lib/desiru/graphql/type_cache_warmer.rb +91 -0
- data/lib/desiru/jobs/async_predict.rb +1 -1
- data/lib/desiru/jobs/base.rb +67 -0
- data/lib/desiru/jobs/batch_processor.rb +6 -6
- data/lib/desiru/jobs/retriable.rb +119 -0
- data/lib/desiru/jobs/retry_strategies.rb +169 -0
- data/lib/desiru/jobs/scheduler.rb +219 -0
- data/lib/desiru/jobs/webhook_notifier.rb +242 -0
- data/lib/desiru/models/anthropic.rb +164 -0
- data/lib/desiru/models/base.rb +37 -3
- data/lib/desiru/models/open_ai.rb +151 -0
- data/lib/desiru/models/open_router.rb +161 -0
- data/lib/desiru/module.rb +67 -9
- data/lib/desiru/modules/best_of_n.rb +306 -0
- data/lib/desiru/modules/chain_of_thought.rb +3 -3
- data/lib/desiru/modules/majority.rb +51 -0
- data/lib/desiru/modules/multi_chain_comparison.rb +256 -0
- data/lib/desiru/modules/predict.rb +15 -1
- data/lib/desiru/modules/program_of_thought.rb +338 -0
- data/lib/desiru/modules/react.rb +273 -0
- data/lib/desiru/modules/retrieve.rb +4 -2
- data/lib/desiru/optimizers/base.rb +32 -4
- data/lib/desiru/optimizers/bootstrap_few_shot.rb +2 -2
- data/lib/desiru/optimizers/copro.rb +268 -0
- data/lib/desiru/optimizers/knn_few_shot.rb +185 -0
- data/lib/desiru/optimizers/mipro_v2.rb +889 -0
- data/lib/desiru/persistence/database.rb +71 -0
- data/lib/desiru/persistence/models/api_request.rb +38 -0
- data/lib/desiru/persistence/models/job_result.rb +138 -0
- data/lib/desiru/persistence/models/module_execution.rb +37 -0
- data/lib/desiru/persistence/models/optimization_result.rb +28 -0
- data/lib/desiru/persistence/models/training_example.rb +25 -0
- data/lib/desiru/persistence/models.rb +11 -0
- data/lib/desiru/persistence/repositories/api_request_repository.rb +98 -0
- data/lib/desiru/persistence/repositories/base_repository.rb +77 -0
- data/lib/desiru/persistence/repositories/job_result_repository.rb +116 -0
- data/lib/desiru/persistence/repositories/module_execution_repository.rb +85 -0
- data/lib/desiru/persistence/repositories/optimization_result_repository.rb +67 -0
- data/lib/desiru/persistence/repositories/training_example_repository.rb +102 -0
- data/lib/desiru/persistence/repository.rb +29 -0
- data/lib/desiru/persistence/setup.rb +77 -0
- data/lib/desiru/persistence.rb +49 -0
- data/lib/desiru/registry.rb +3 -5
- data/lib/desiru/signature.rb +91 -24
- data/lib/desiru/version.rb +1 -1
- data/lib/desiru.rb +33 -8
- data/missing-features-analysis.md +192 -0
- metadata +75 -45
- data/lib/desiru/models/raix_adapter.rb +0 -210
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Desiru
|
|
4
|
+
module Modules
|
|
5
|
+
# MultiChainComparison module that generates multiple chain-of-thought
|
|
6
|
+
# reasoning paths and compares them to produce the best answer
|
|
7
|
+
class MultiChainComparison < Desiru::Module
|
|
8
|
+
DEFAULT_SIGNATURE = 'question: string -> answer: string, reasoning: string'
|
|
9
|
+
|
|
10
|
+
def initialize(signature = nil, model: nil, **kwargs)
|
|
11
|
+
# Extract our specific options before passing to parent
|
|
12
|
+
@num_chains = kwargs.delete(:num_chains) || 3
|
|
13
|
+
@comparison_strategy = kwargs.delete(:comparison_strategy) || :vote
|
|
14
|
+
@temperature = kwargs.delete(:temperature) || 0.7
|
|
15
|
+
|
|
16
|
+
# Use default signature if none provided
|
|
17
|
+
signature ||= DEFAULT_SIGNATURE
|
|
18
|
+
|
|
19
|
+
# Pass remaining kwargs to parent (config, demos, metadata)
|
|
20
|
+
super
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def forward(**inputs)
|
|
24
|
+
# Handle edge case of zero chains
|
|
25
|
+
return {} if @num_chains <= 0
|
|
26
|
+
|
|
27
|
+
# Generate multiple reasoning chains
|
|
28
|
+
chains = generate_chains(inputs)
|
|
29
|
+
|
|
30
|
+
# Compare chains to determine best answer
|
|
31
|
+
best_result = case @comparison_strategy
|
|
32
|
+
when :vote
|
|
33
|
+
vote_on_chains(chains)
|
|
34
|
+
when :llm_judge
|
|
35
|
+
llm_judge_chains(chains, inputs)
|
|
36
|
+
when :confidence
|
|
37
|
+
select_by_confidence(chains)
|
|
38
|
+
else
|
|
39
|
+
chains.first || {} # Fallback to first chain or empty hash
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Ensure best_result is not nil
|
|
43
|
+
best_result ||= {}
|
|
44
|
+
|
|
45
|
+
# Include comparison metadata if requested
|
|
46
|
+
if signature.output_fields.key?('comparison_data') || signature.output_fields.key?(:comparison_data)
|
|
47
|
+
best_result[:comparison_data] = {
|
|
48
|
+
num_chains: chains.length,
|
|
49
|
+
strategy: @comparison_strategy,
|
|
50
|
+
all_chains: chains.map { |c| c[:reasoning] }
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
best_result
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def generate_chains(inputs)
|
|
60
|
+
chains = []
|
|
61
|
+
|
|
62
|
+
@num_chains.times do |i|
|
|
63
|
+
chain_prompt = build_chain_prompt(inputs, i)
|
|
64
|
+
|
|
65
|
+
response = model.complete(
|
|
66
|
+
messages: [{ role: 'user', content: chain_prompt }],
|
|
67
|
+
temperature: @temperature
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
chain_result = parse_chain_response(response[:content])
|
|
71
|
+
chains << chain_result
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
chains
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def build_chain_prompt(inputs, chain_index)
|
|
78
|
+
prompt = "Please solve this problem step by step (Approach #{chain_index + 1}):\n\n"
|
|
79
|
+
|
|
80
|
+
# Add inputs
|
|
81
|
+
inputs.each do |key, value|
|
|
82
|
+
prompt += "#{key}: #{value}\n"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
prompt += "\nProvide your reasoning step by step, then give your final answer.\n"
|
|
86
|
+
prompt += "Format your response as:\n"
|
|
87
|
+
prompt += "REASONING: [Your step-by-step reasoning]\n"
|
|
88
|
+
prompt += "ANSWER: [Your final answer]\n"
|
|
89
|
+
|
|
90
|
+
# Add output field descriptions
|
|
91
|
+
if signature.output_fields.any?
|
|
92
|
+
prompt += "\nMake sure your answer includes:\n"
|
|
93
|
+
signature.output_fields.each do |name, field|
|
|
94
|
+
next if %w[reasoning comparison_data].include?(name.to_s)
|
|
95
|
+
|
|
96
|
+
prompt += "- #{name}: #{field.description || field.type}\n"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
prompt
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def parse_chain_response(response)
|
|
104
|
+
result = {}
|
|
105
|
+
|
|
106
|
+
# Extract reasoning
|
|
107
|
+
reasoning_match = response.match(/REASONING:\s*(.+?)(?=ANSWER:|$)/mi)
|
|
108
|
+
result[:reasoning] = reasoning_match ? reasoning_match[1].strip : response
|
|
109
|
+
|
|
110
|
+
# Extract answer
|
|
111
|
+
answer_match = response.match(/ANSWER:\s*(.+)/mi)
|
|
112
|
+
|
|
113
|
+
if answer_match
|
|
114
|
+
answer_text = answer_match[1].strip
|
|
115
|
+
|
|
116
|
+
# Try to parse structured answer
|
|
117
|
+
if answer_text.include?(':') || answer_text.include?('{')
|
|
118
|
+
result.merge!(parse_structured_answer(answer_text))
|
|
119
|
+
elsif !answer_text.empty?
|
|
120
|
+
# Single value answer
|
|
121
|
+
main_output_field = signature.output_fields.keys.map(&:to_sym).find do |k|
|
|
122
|
+
!%i[reasoning comparison_data].include?(k)
|
|
123
|
+
end
|
|
124
|
+
result[main_output_field] = answer_text if main_output_field
|
|
125
|
+
end
|
|
126
|
+
else
|
|
127
|
+
# No ANSWER: section found - check if we should extract from reasoning
|
|
128
|
+
signature.output_fields.keys.map(&:to_sym).find do |k|
|
|
129
|
+
!%i[reasoning comparison_data].include?(k)
|
|
130
|
+
end
|
|
131
|
+
# Don't set the field if there's no clear answer
|
|
132
|
+
# result[main_output_field] = nil if main_output_field
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Parse any additional fields that might be in the response
|
|
136
|
+
response.scan(/(\w+):\s*([^\n]+)/).each do |key, value|
|
|
137
|
+
key_sym = key.downcase.to_sym
|
|
138
|
+
result[key_sym] = value.strip if signature.output_fields.key?(key_sym) && !result.key?(key_sym)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
result
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def parse_structured_answer(answer_text)
|
|
145
|
+
parsed = {}
|
|
146
|
+
|
|
147
|
+
# Try to parse as key-value pairs
|
|
148
|
+
answer_text.scan(/(\w+):\s*([^\n,}]+)/).each do |key, value|
|
|
149
|
+
key_sym = key.downcase.to_sym
|
|
150
|
+
if signature.output_fields.key?(key_sym) || signature.output_fields.key?(key.downcase)
|
|
151
|
+
parsed[key_sym] =
|
|
152
|
+
value.strip
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
parsed
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def vote_on_chains(chains)
|
|
160
|
+
return {} if chains.empty?
|
|
161
|
+
|
|
162
|
+
# Count votes for each unique answer
|
|
163
|
+
votes = Hash.new(0)
|
|
164
|
+
answer_to_chain = {}
|
|
165
|
+
|
|
166
|
+
chains.each do |chain|
|
|
167
|
+
# Get the main answer field (first non-metadata field)
|
|
168
|
+
answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
|
|
169
|
+
!%i[reasoning comparison_data].include?(k)
|
|
170
|
+
end
|
|
171
|
+
answer_value = chain[answer_key]
|
|
172
|
+
|
|
173
|
+
if answer_value && !answer_value.to_s.empty?
|
|
174
|
+
votes[answer_value] += 1
|
|
175
|
+
answer_to_chain[answer_value] ||= chain
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Return the chain with the most common answer
|
|
180
|
+
if votes.empty?
|
|
181
|
+
chains.first || {}
|
|
182
|
+
else
|
|
183
|
+
winning_answer = votes.max_by { |_, count| count }.first
|
|
184
|
+
answer_to_chain[winning_answer] || chains.first || {}
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def llm_judge_chains(chains, original_inputs)
|
|
189
|
+
judge_prompt = "Given the following problem and multiple solution attempts, select the best answer:\n\n"
|
|
190
|
+
|
|
191
|
+
# Add original inputs
|
|
192
|
+
judge_prompt += "Original Problem:\n"
|
|
193
|
+
original_inputs.each do |key, value|
|
|
194
|
+
judge_prompt += "#{key}: #{value}\n"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Add all chains
|
|
198
|
+
judge_prompt += "\nSolution Attempts:\n"
|
|
199
|
+
chains.each_with_index do |chain, i|
|
|
200
|
+
judge_prompt += "\n--- Attempt #{i + 1} ---\n"
|
|
201
|
+
judge_prompt += "Reasoning: #{chain[:reasoning]}\n"
|
|
202
|
+
|
|
203
|
+
answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
|
|
204
|
+
!%i[reasoning comparison_data].include?(k)
|
|
205
|
+
end
|
|
206
|
+
judge_prompt += "Answer: #{chain[answer_key]}\n" if chain[answer_key]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
judge_prompt += "\nSelect the best attempt (1-#{chains.length}) and explain why:"
|
|
210
|
+
|
|
211
|
+
response = model.complete(
|
|
212
|
+
messages: [{ role: 'user', content: judge_prompt }],
|
|
213
|
+
temperature: 0.1 # Low temperature for more consistent judgment
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Extract selected chain index
|
|
217
|
+
selection_match = response[:content].match(/(?:attempt|option|choice)\s*#?(\d+)/i)
|
|
218
|
+
selected_index = selection_match ? selection_match[1].to_i - 1 : 0
|
|
219
|
+
selected_index = selected_index.clamp(0, chains.length - 1)
|
|
220
|
+
|
|
221
|
+
chains[selected_index]
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def select_by_confidence(chains)
|
|
225
|
+
# Ask model to rate confidence for each chain
|
|
226
|
+
chains_with_confidence = chains.map do |chain|
|
|
227
|
+
confidence_prompt = "Rate your confidence (0-100) in this reasoning and answer:\n"
|
|
228
|
+
confidence_prompt += "Reasoning: #{chain[:reasoning]}\n"
|
|
229
|
+
|
|
230
|
+
answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
|
|
231
|
+
!%i[reasoning comparison_data].include?(k)
|
|
232
|
+
end
|
|
233
|
+
confidence_prompt += "Answer: #{chain[answer_key]}\n" if chain[answer_key]
|
|
234
|
+
|
|
235
|
+
confidence_prompt += "\nRespond with just a number between 0 and 100:"
|
|
236
|
+
|
|
237
|
+
response = model.complete(
|
|
238
|
+
messages: [{ role: 'user', content: confidence_prompt }],
|
|
239
|
+
temperature: 0.1
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
confidence = response[:content].scan(/\d+/).first&.to_i || 50
|
|
243
|
+
chain.merge(confidence: confidence)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Select chain with highest confidence
|
|
247
|
+
chains_with_confidence.max_by { |c| c[:confidence] }
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Register in the main module namespace for convenience
|
|
254
|
+
module Desiru
|
|
255
|
+
MultiChainComparison = Modules::MultiChainComparison
|
|
256
|
+
end
|
|
@@ -4,6 +4,13 @@ module Desiru
|
|
|
4
4
|
module Modules
|
|
5
5
|
# Basic prediction module - the fundamental building block
|
|
6
6
|
class Predict < Module
|
|
7
|
+
DEFAULT_SIGNATURE = 'question: string -> answer: string'
|
|
8
|
+
|
|
9
|
+
def initialize(signature = nil, model: nil, **)
|
|
10
|
+
signature ||= DEFAULT_SIGNATURE
|
|
11
|
+
super
|
|
12
|
+
end
|
|
13
|
+
|
|
7
14
|
def forward(inputs)
|
|
8
15
|
prompt = build_prompt(inputs)
|
|
9
16
|
|
|
@@ -14,6 +21,8 @@ module Desiru
|
|
|
14
21
|
demos: demos
|
|
15
22
|
)
|
|
16
23
|
|
|
24
|
+
Desiru.logger.info("Predict response: #{response}")
|
|
25
|
+
|
|
17
26
|
parse_response(response[:content])
|
|
18
27
|
end
|
|
19
28
|
|
|
@@ -32,7 +41,12 @@ module Desiru
|
|
|
32
41
|
|
|
33
42
|
#{format_signature}
|
|
34
43
|
|
|
35
|
-
|
|
44
|
+
Format your response with each output field on its own line using the pattern:
|
|
45
|
+
field_name: value
|
|
46
|
+
|
|
47
|
+
For example, if the output field is "answer", write:
|
|
48
|
+
answer: Your answer here
|
|
49
|
+
|
|
36
50
|
#{format_descriptions}
|
|
37
51
|
PROMPT
|
|
38
52
|
end
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'timeout'
|
|
4
|
+
|
|
5
|
+
module Desiru
|
|
6
|
+
module Modules
|
|
7
|
+
# ProgramOfThought module that generates executable code to solve problems
|
|
8
|
+
# Similar to ChainOfThought but produces code instead of reasoning steps
|
|
9
|
+
# Supports both Ruby and Python code generation
|
|
10
|
+
class ProgramOfThought < Desiru::Module
|
|
11
|
+
DEFAULT_SIGNATURE = 'question: string -> answer: string, code: string'
|
|
12
|
+
|
|
13
|
+
def initialize(signature = nil, model: nil, **kwargs)
|
|
14
|
+
# Extract our specific options before passing to parent
|
|
15
|
+
@max_iterations = kwargs.delete(:max_iterations) || 1
|
|
16
|
+
@code_language = validate_language(kwargs.delete(:code_language) || 'ruby')
|
|
17
|
+
@timeout = kwargs.delete(:timeout) || 5 # seconds
|
|
18
|
+
@safe_mode = kwargs.delete(:safe_mode) != false # default true
|
|
19
|
+
|
|
20
|
+
# Use default signature if none provided
|
|
21
|
+
signature ||= DEFAULT_SIGNATURE
|
|
22
|
+
|
|
23
|
+
# If signature is a double/mock (for testing), store it directly
|
|
24
|
+
if signature.respond_to?(:output_fields) && signature.respond_to?(:input_fields) &&
|
|
25
|
+
!signature.is_a?(Signature) && !signature.is_a?(String)
|
|
26
|
+
@signature = signature
|
|
27
|
+
@model = model || Desiru.configuration.default_model
|
|
28
|
+
@config = default_config.merge(kwargs[:config] || {})
|
|
29
|
+
@demos = kwargs[:demos] || []
|
|
30
|
+
@metadata = kwargs[:metadata] || {}
|
|
31
|
+
@call_count = 0
|
|
32
|
+
validate_model! if respond_to?(:validate_model!, true)
|
|
33
|
+
register_module if respond_to?(:register_module, true)
|
|
34
|
+
else
|
|
35
|
+
# Pass remaining kwargs to parent (config, demos, metadata)
|
|
36
|
+
super
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def forward(**inputs)
|
|
41
|
+
trace_metadata = { code_language: @code_language, safe_mode: @safe_mode }
|
|
42
|
+
|
|
43
|
+
if defined?(Desiru::TraceContext) && Desiru::TraceContext.respond_to?(:current) && Desiru::TraceContext.current
|
|
44
|
+
Desiru::TraceContext.add_metadata(trace_metadata)
|
|
45
|
+
elsif defined?(Desiru::Core) && Desiru::Core.respond_to?(:trace_context) &&
|
|
46
|
+
Desiru::Core.trace_context.respond_to?(:current) && Desiru::Core.trace_context.current
|
|
47
|
+
Desiru::Core.trace_context.add_metadata(trace_metadata)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Enhance the prompt to request code generation
|
|
51
|
+
code_prompt = build_code_prompt(inputs)
|
|
52
|
+
|
|
53
|
+
# Get the model to generate code
|
|
54
|
+
response = model.complete(
|
|
55
|
+
messages: [{ role: 'user', content: code_prompt }],
|
|
56
|
+
temperature: 0.3 # Lower temperature for more deterministic code
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
generated_code = extract_code(response[:content])
|
|
60
|
+
|
|
61
|
+
Desiru.logger.debug("Generated #{@code_language} code: #{generated_code}")
|
|
62
|
+
|
|
63
|
+
# Execute the generated code if safe
|
|
64
|
+
result = if @safe_mode && !safe_to_execute?(generated_code)
|
|
65
|
+
{ error: "Generated code deemed unsafe to execute" }
|
|
66
|
+
else
|
|
67
|
+
execute_code(generated_code, inputs)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Format outputs according to signature
|
|
71
|
+
format_outputs(result, generated_code)
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
Desiru.logger.error("ProgramOfThought error: #{e.message}")
|
|
74
|
+
format_error_output(e, '')
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def validate_language(language)
|
|
80
|
+
supported = %w[ruby python]
|
|
81
|
+
unless supported.include?(language.to_s.downcase)
|
|
82
|
+
raise ModuleError, "Unsupported language: #{language}. Supported: #{supported.join(', ')}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
language.to_s.downcase
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def build_code_prompt(inputs)
|
|
89
|
+
prompt = "You are a programming assistant. Generate #{@code_language} code to solve this problem.\n\n"
|
|
90
|
+
|
|
91
|
+
# Add input context
|
|
92
|
+
prompt += "Given inputs:\n" if inputs.any?
|
|
93
|
+
if inputs.any?
|
|
94
|
+
inputs.each do |key, value|
|
|
95
|
+
prompt += "#{key}: #{format_input_value(value)}\n"
|
|
96
|
+
end
|
|
97
|
+
prompt += "\n"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Add expected output format
|
|
101
|
+
prompt += "Expected outputs:\n"
|
|
102
|
+
signature.output_fields.each do |name, field|
|
|
103
|
+
next if name == :code # Skip the code field itself
|
|
104
|
+
|
|
105
|
+
prompt += "- #{name} (#{field.type}): #{field.description || 'No description'}\n"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
prompt += "\nGenerate executable #{@code_language} code that processes the inputs "
|
|
109
|
+
prompt += "and returns the expected outputs. "
|
|
110
|
+
prompt += "Wrap your code in triple backticks with the language identifier.\n"
|
|
111
|
+
|
|
112
|
+
if @code_language == 'ruby'
|
|
113
|
+
prompt += "The code should define a method called 'solve' that takes the inputs "
|
|
114
|
+
prompt += "as keyword arguments and returns a hash with the output values."
|
|
115
|
+
else # python
|
|
116
|
+
prompt += "The code should define a function called 'solve' that takes the inputs "
|
|
117
|
+
prompt += "as keyword arguments and returns a dictionary with the output values."
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
prompt
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def format_input_value(value)
|
|
124
|
+
case value
|
|
125
|
+
when Array
|
|
126
|
+
"[#{value.map { |v| format_input_value(v) }.join(', ')}]"
|
|
127
|
+
when Hash
|
|
128
|
+
value.to_json
|
|
129
|
+
else
|
|
130
|
+
value.to_s
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def extract_code(response)
|
|
135
|
+
# Extract code from markdown code blocks
|
|
136
|
+
code_match = response.match(/```#{@code_language}?\n(.*?)```/m)
|
|
137
|
+
return code_match[1].strip if code_match
|
|
138
|
+
|
|
139
|
+
# Fallback: try to extract any code block
|
|
140
|
+
code_match = response.match(/```\n(.*?)```/m)
|
|
141
|
+
return code_match[1].strip if code_match
|
|
142
|
+
|
|
143
|
+
# Last resort: assume the entire response is code
|
|
144
|
+
response.strip
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def safe_to_execute?(code)
|
|
148
|
+
return true unless @safe_mode
|
|
149
|
+
|
|
150
|
+
# Language-specific dangerous patterns
|
|
151
|
+
dangerous_patterns = case @code_language
|
|
152
|
+
when 'ruby'
|
|
153
|
+
ruby_dangerous_patterns
|
|
154
|
+
when 'python'
|
|
155
|
+
python_dangerous_patterns
|
|
156
|
+
else
|
|
157
|
+
[]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
dangerous_patterns.none? { |pattern| code.match?(pattern) }
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def ruby_dangerous_patterns
|
|
164
|
+
[
|
|
165
|
+
/system\s*\(/,
|
|
166
|
+
/exec\s*\(/,
|
|
167
|
+
/eval\s*\(/,
|
|
168
|
+
/%x\{/,
|
|
169
|
+
/`.*`/,
|
|
170
|
+
/File\s*\.\s*delete/,
|
|
171
|
+
/FileUtils\s*\.\s*rm/,
|
|
172
|
+
/Dir\s*\.\s*delete/,
|
|
173
|
+
/require\s+['"]net/,
|
|
174
|
+
/Socket/,
|
|
175
|
+
/Process\s*\.\s*kill/,
|
|
176
|
+
/IO\s*\.\s*popen/,
|
|
177
|
+
/Open3/,
|
|
178
|
+
/\$SAFE\s*=/
|
|
179
|
+
]
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def python_dangerous_patterns
|
|
183
|
+
[
|
|
184
|
+
/os\.system/,
|
|
185
|
+
/subprocess/,
|
|
186
|
+
/eval\s*\(/,
|
|
187
|
+
/exec\s*\(/,
|
|
188
|
+
/compile\s*\(/,
|
|
189
|
+
/__import__/,
|
|
190
|
+
/open\s*\([^,)]*,\s*['"][wa]/,
|
|
191
|
+
/os\.remove/,
|
|
192
|
+
/shutil\.rmtree/,
|
|
193
|
+
/socket/,
|
|
194
|
+
/requests/,
|
|
195
|
+
/urllib/
|
|
196
|
+
]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def execute_code(code, inputs)
|
|
200
|
+
case @code_language
|
|
201
|
+
when 'ruby'
|
|
202
|
+
execute_ruby_code(code, inputs)
|
|
203
|
+
when 'python'
|
|
204
|
+
execute_python_code(code, inputs)
|
|
205
|
+
else
|
|
206
|
+
{ error: "Unsupported language for execution: #{@code_language}" }
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def execute_ruby_code(code, inputs)
|
|
211
|
+
# Create a safe execution context
|
|
212
|
+
context = Object.new
|
|
213
|
+
|
|
214
|
+
# Use timeout for safety
|
|
215
|
+
result = Timeout.timeout(@timeout) do
|
|
216
|
+
# Define the code in the context
|
|
217
|
+
context.instance_eval(code)
|
|
218
|
+
|
|
219
|
+
# Call the solve method if it exists
|
|
220
|
+
if context.respond_to?(:solve)
|
|
221
|
+
context.solve(**inputs.transform_keys(&:to_sym))
|
|
222
|
+
else
|
|
223
|
+
{ error: "Generated code does not define a 'solve' method" }
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Ensure result is a hash
|
|
228
|
+
result.is_a?(Hash) ? result : { result: result }
|
|
229
|
+
rescue Timeout::Error
|
|
230
|
+
{ error: "Code execution timed out after #{@timeout} seconds" }
|
|
231
|
+
rescue StandardError => e
|
|
232
|
+
{ error: "Code execution failed: #{e.message}" }
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def execute_python_code(code, _inputs)
|
|
236
|
+
# For Python execution, we would need to use a Python interpreter
|
|
237
|
+
# This is a placeholder that returns a message about Python support
|
|
238
|
+
{
|
|
239
|
+
error: "Python code execution not yet implemented. Generated code saved.",
|
|
240
|
+
python_code: code,
|
|
241
|
+
note: "To execute Python code, integrate with a Python runtime or use system calls in non-safe mode."
|
|
242
|
+
}
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def format_outputs(result, generated_code)
|
|
246
|
+
outputs = {}
|
|
247
|
+
|
|
248
|
+
# Always include the generated code if requested in signature
|
|
249
|
+
outputs[:code] = generated_code if signature.output_fields.key?(:code)
|
|
250
|
+
|
|
251
|
+
if result[:error]
|
|
252
|
+
# Handle error case - always include error
|
|
253
|
+
outputs[:error] = result[:error]
|
|
254
|
+
|
|
255
|
+
# Add any additional error info
|
|
256
|
+
outputs[:python_code] = result[:python_code] if result[:python_code]
|
|
257
|
+
outputs[:note] = result[:note] if result[:note]
|
|
258
|
+
|
|
259
|
+
# Fill other fields with defaults
|
|
260
|
+
signature.output_fields.each do |name, field|
|
|
261
|
+
next if outputs.key?(name)
|
|
262
|
+
|
|
263
|
+
outputs[name] = field.default || nil
|
|
264
|
+
end
|
|
265
|
+
else
|
|
266
|
+
# Map result to expected outputs
|
|
267
|
+
signature.output_fields.each do |name, field|
|
|
268
|
+
next if name == :code # Already handled
|
|
269
|
+
|
|
270
|
+
# Don't use || here because it will treat false as falsy
|
|
271
|
+
value = result.key?(name) ? result[name] : result[name.to_s]
|
|
272
|
+
outputs[name] = if value.nil?
|
|
273
|
+
field.default || nil
|
|
274
|
+
else
|
|
275
|
+
coerce_output_value(value, field)
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
outputs
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def format_error_output(error, code = '')
|
|
284
|
+
outputs = {}
|
|
285
|
+
|
|
286
|
+
# Always include code field if it's in the signature, even if empty
|
|
287
|
+
outputs[:code] = code if signature.output_fields.key?(:code)
|
|
288
|
+
outputs[:error] = "ProgramOfThought error: #{error.message}"
|
|
289
|
+
|
|
290
|
+
# Fill other fields with defaults
|
|
291
|
+
signature.output_fields.each do |name, field|
|
|
292
|
+
next if outputs.key?(name)
|
|
293
|
+
|
|
294
|
+
outputs[name] = field.default || nil
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
outputs
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def coerce_output_value(value, field)
|
|
301
|
+
return value unless value && field.type
|
|
302
|
+
|
|
303
|
+
case field.type
|
|
304
|
+
when :int
|
|
305
|
+
# Only coerce if it's a valid integer representation
|
|
306
|
+
return value unless value.to_s.match?(/\A-?\d+\z/)
|
|
307
|
+
|
|
308
|
+
value.to_i
|
|
309
|
+
when :float
|
|
310
|
+
# Only coerce if it's a valid float representation
|
|
311
|
+
begin
|
|
312
|
+
Float(value.to_s)
|
|
313
|
+
rescue StandardError
|
|
314
|
+
(value)
|
|
315
|
+
end
|
|
316
|
+
when :bool
|
|
317
|
+
return true if value.to_s.downcase == 'true'
|
|
318
|
+
return false if value.to_s.downcase == 'false'
|
|
319
|
+
|
|
320
|
+
!!value
|
|
321
|
+
when :list
|
|
322
|
+
Array(value)
|
|
323
|
+
when :hash
|
|
324
|
+
value.is_a?(Hash) ? value : { value: value }
|
|
325
|
+
else
|
|
326
|
+
value
|
|
327
|
+
end
|
|
328
|
+
rescue StandardError
|
|
329
|
+
value
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# Register in the main module namespace for convenience
|
|
336
|
+
module Desiru
|
|
337
|
+
ProgramOfThought = Modules::ProgramOfThought
|
|
338
|
+
end
|