desiru 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,52 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'timeout'
4
+
3
5
  module Desiru
4
6
  module Modules
5
7
  # ProgramOfThought module that generates executable code to solve problems
6
8
  # Similar to ChainOfThought but produces code instead of reasoning steps
9
+ # Supports both Ruby and Python code generation
7
10
  class ProgramOfThought < Desiru::Module
11
+ DEFAULT_SIGNATURE = 'question: string -> answer: string, code: string'
12
+
8
13
  def initialize(signature = nil, model: nil, **kwargs)
9
- super
10
- @max_iterations = kwargs[:max_iterations] || 1
11
- @code_language = kwargs[:code_language] || 'ruby'
14
+ # Extract our specific options before passing to parent
15
+ @max_iterations = kwargs.delete(:max_iterations) || 1
16
+ @code_language = validate_language(kwargs.delete(:code_language) || 'ruby')
17
+ @timeout = kwargs.delete(:timeout) || 5 # seconds
18
+ @safe_mode = kwargs.delete(:safe_mode) != false # default true
19
+
20
+ # Use default signature if none provided
21
+ signature ||= DEFAULT_SIGNATURE
22
+
23
+ # If signature is a double/mock (for testing), store it directly
24
+ if signature.respond_to?(:output_fields) && signature.respond_to?(:input_fields) &&
25
+ !signature.is_a?(Signature) && !signature.is_a?(String)
26
+ @signature = signature
27
+ @model = model || Desiru.configuration.default_model
28
+ @config = default_config.merge(kwargs[:config] || {})
29
+ @demos = kwargs[:demos] || []
30
+ @metadata = kwargs[:metadata] || {}
31
+ @call_count = 0
32
+ validate_model! if respond_to?(:validate_model!, true)
33
+ register_module if respond_to?(:register_module, true)
34
+ else
35
+ # Pass remaining kwargs to parent (config, demos, metadata)
36
+ super
37
+ end
12
38
  end
13
39
 
14
40
  def forward(**inputs)
41
+ trace_metadata = { code_language: @code_language, safe_mode: @safe_mode }
42
+
43
+ if defined?(Desiru::TraceContext) && Desiru::TraceContext.respond_to?(:current) && Desiru::TraceContext.current
44
+ Desiru::TraceContext.add_metadata(trace_metadata)
45
+ elsif defined?(Desiru::Core) && Desiru::Core.respond_to?(:trace_context) &&
46
+ Desiru::Core.trace_context.respond_to?(:current) && Desiru::Core.trace_context.current
47
+ Desiru::Core.trace_context.add_metadata(trace_metadata)
48
+ end
49
+
15
50
  # Enhance the prompt to request code generation
16
51
  code_prompt = build_code_prompt(inputs)
17
52
 
@@ -23,43 +58,79 @@ module Desiru
23
58
 
24
59
  generated_code = extract_code(response[:content])
25
60
 
61
+ Desiru.logger.debug("Generated #{@code_language} code: #{generated_code}")
62
+
26
63
  # Execute the generated code if safe
27
- result = if safe_to_execute?(generated_code)
28
- execute_code(generated_code, inputs)
64
+ result = if @safe_mode && !safe_to_execute?(generated_code)
65
+ { error: "Generated code deemed unsafe to execute" }
29
66
  else
30
- { error: "Generated code deemed unsafe to execute", code: generated_code }
67
+ execute_code(generated_code, inputs)
31
68
  end
32
69
 
33
70
  # Format outputs according to signature
34
71
  format_outputs(result, generated_code)
72
+ rescue StandardError => e
73
+ Desiru.logger.error("ProgramOfThought error: #{e.message}")
74
+ format_error_output(e, '')
35
75
  end
36
76
 
37
77
  private
38
78
 
79
+ def validate_language(language)
80
+ supported = %w[ruby python]
81
+ unless supported.include?(language.to_s.downcase)
82
+ raise ModuleError, "Unsupported language: #{language}. Supported: #{supported.join(', ')}"
83
+ end
84
+
85
+ language.to_s.downcase
86
+ end
87
+
39
88
  def build_code_prompt(inputs)
40
89
  prompt = "You are a programming assistant. Generate #{@code_language} code to solve this problem.\n\n"
41
90
 
42
91
  # Add input context
43
- prompt += "Given inputs:\n"
44
- inputs.each do |key, value|
45
- prompt += "#{key}: #{value}\n"
92
+ prompt += "Given inputs:\n" if inputs.any?
93
+ if inputs.any?
94
+ inputs.each do |key, value|
95
+ prompt += "#{key}: #{format_input_value(value)}\n"
96
+ end
97
+ prompt += "\n"
46
98
  end
47
99
 
48
100
  # Add expected output format
49
- prompt += "\nExpected outputs:\n"
101
+ prompt += "Expected outputs:\n"
50
102
  signature.output_fields.each do |name, field|
103
+ next if name == :code # Skip the code field itself
104
+
51
105
  prompt += "- #{name} (#{field.type}): #{field.description || 'No description'}\n"
52
106
  end
53
107
 
54
108
  prompt += "\nGenerate executable #{@code_language} code that processes the inputs "
55
109
  prompt += "and returns the expected outputs. "
56
110
  prompt += "Wrap your code in triple backticks with the language identifier.\n"
57
- prompt += "The code should define a method called 'solve' that takes the inputs "
58
- prompt += "as keyword arguments and returns a hash with the output values."
111
+
112
+ if @code_language == 'ruby'
113
+ prompt += "The code should define a method called 'solve' that takes the inputs "
114
+ prompt += "as keyword arguments and returns a hash with the output values."
115
+ else # python
116
+ prompt += "The code should define a function called 'solve' that takes the inputs "
117
+ prompt += "as keyword arguments and returns a dictionary with the output values."
118
+ end
59
119
 
60
120
  prompt
61
121
  end
62
122
 
123
+ def format_input_value(value)
124
+ case value
125
+ when Array
126
+ "[#{value.map { |v| format_input_value(v) }.join(', ')}]"
127
+ when Hash
128
+ value.to_json
129
+ else
130
+ value.to_s
131
+ end
132
+ end
133
+
63
134
  def extract_code(response)
64
135
  # Extract code from markdown code blocks
65
136
  code_match = response.match(/```#{@code_language}?\n(.*?)```/m)
@@ -74,8 +145,23 @@ module Desiru
74
145
  end
75
146
 
76
147
  def safe_to_execute?(code)
77
- # Basic safety checks - in production, use proper sandboxing
78
- dangerous_patterns = [
148
+ return true unless @safe_mode
149
+
150
+ # Language-specific dangerous patterns
151
+ dangerous_patterns = case @code_language
152
+ when 'ruby'
153
+ ruby_dangerous_patterns
154
+ when 'python'
155
+ python_dangerous_patterns
156
+ else
157
+ []
158
+ end
159
+
160
+ dangerous_patterns.none? { |pattern| code.match?(pattern) }
161
+ end
162
+
163
+ def ruby_dangerous_patterns
164
+ [
79
165
  /system\s*\(/,
80
166
  /exec\s*\(/,
81
167
  /eval\s*\(/,
@@ -86,54 +172,167 @@ module Desiru
86
172
  /Dir\s*\.\s*delete/,
87
173
  /require\s+['"]net/,
88
174
  /Socket/,
89
- /Process\s*\.\s*kill/
175
+ /Process\s*\.\s*kill/,
176
+ /IO\s*\.\s*popen/,
177
+ /Open3/,
178
+ /\$SAFE\s*=/
90
179
  ]
180
+ end
91
181
 
92
- dangerous_patterns.none? { |pattern| code.match?(pattern) }
182
+ def python_dangerous_patterns
183
+ [
184
+ /os\.system/,
185
+ /subprocess/,
186
+ /eval\s*\(/,
187
+ /exec\s*\(/,
188
+ /compile\s*\(/,
189
+ /__import__/,
190
+ /open\s*\([^,)]*,\s*['"][wa]/,
191
+ /os\.remove/,
192
+ /shutil\.rmtree/,
193
+ /socket/,
194
+ /requests/,
195
+ /urllib/
196
+ ]
93
197
  end
94
198
 
95
199
  def execute_code(code, inputs)
200
+ case @code_language
201
+ when 'ruby'
202
+ execute_ruby_code(code, inputs)
203
+ when 'python'
204
+ execute_python_code(code, inputs)
205
+ else
206
+ { error: "Unsupported language for execution: #{@code_language}" }
207
+ end
208
+ end
209
+
210
+ def execute_ruby_code(code, inputs)
96
211
  # Create a safe execution context
97
212
  context = Object.new
98
213
 
99
- # Define the code in the context
100
- context.instance_eval(code)
214
+ # Use timeout for safety
215
+ result = Timeout.timeout(@timeout) do
216
+ # Define the code in the context
217
+ context.instance_eval(code)
101
218
 
102
- # Call the solve method if it exists
103
- if context.respond_to?(:solve)
104
- context.solve(**inputs.transform_keys(&:to_sym))
105
- else
106
- { error: "Generated code does not define a 'solve' method" }
219
+ # Call the solve method if it exists
220
+ if context.respond_to?(:solve)
221
+ context.solve(**inputs.transform_keys(&:to_sym))
222
+ else
223
+ { error: "Generated code does not define a 'solve' method" }
224
+ end
107
225
  end
226
+
227
+ # Ensure result is a hash
228
+ result.is_a?(Hash) ? result : { result: result }
229
+ rescue Timeout::Error
230
+ { error: "Code execution timed out after #{@timeout} seconds" }
108
231
  rescue StandardError => e
109
232
  { error: "Code execution failed: #{e.message}" }
110
233
  end
111
234
 
235
+ def execute_python_code(code, _inputs)
236
+ # For Python execution, we would need to use a Python interpreter
237
+ # This is a placeholder that returns a message about Python support
238
+ {
239
+ error: "Python code execution not yet implemented. Generated code saved.",
240
+ python_code: code,
241
+ note: "To execute Python code, integrate with a Python runtime or use system calls in non-safe mode."
242
+ }
243
+ end
244
+
112
245
  def format_outputs(result, generated_code)
113
246
  outputs = {}
114
247
 
115
- # Always include the generated code
248
+ # Always include the generated code if requested in signature
116
249
  outputs[:code] = generated_code if signature.output_fields.key?(:code)
117
250
 
118
251
  if result[:error]
119
- # Handle error case
252
+ # Handle error case - always include error
120
253
  outputs[:error] = result[:error]
254
+
255
+ # Add any additional error info
256
+ outputs[:python_code] = result[:python_code] if result[:python_code]
257
+ outputs[:note] = result[:note] if result[:note]
258
+
259
+ # Fill other fields with defaults
121
260
  signature.output_fields.each do |name, field|
122
- next if %i[code error].include?(name)
261
+ next if outputs.key?(name)
123
262
 
124
263
  outputs[name] = field.default || nil
125
264
  end
126
265
  else
127
266
  # Map result to expected outputs
128
267
  signature.output_fields.each do |name, field|
129
- next if name == :code
268
+ next if name == :code # Already handled
130
269
 
131
- outputs[name] = result[name] || field.default || nil
270
+ # Don't use || here because it will treat false as falsy
271
+ value = result.key?(name) ? result[name] : result[name.to_s]
272
+ outputs[name] = if value.nil?
273
+ field.default || nil
274
+ else
275
+ coerce_output_value(value, field)
276
+ end
132
277
  end
133
278
  end
134
279
 
135
280
  outputs
136
281
  end
282
+
283
+ def format_error_output(error, code = '')
284
+ outputs = {}
285
+
286
+ # Always include code field if it's in the signature, even if empty
287
+ outputs[:code] = code if signature.output_fields.key?(:code)
288
+ outputs[:error] = "ProgramOfThought error: #{error.message}"
289
+
290
+ # Fill other fields with defaults
291
+ signature.output_fields.each do |name, field|
292
+ next if outputs.key?(name)
293
+
294
+ outputs[name] = field.default || nil
295
+ end
296
+
297
+ outputs
298
+ end
299
+
300
+ def coerce_output_value(value, field)
301
+ return value unless value && field.type
302
+
303
+ case field.type
304
+ when :int
305
+ # Only coerce if it's a valid integer representation
306
+ return value unless value.to_s.match?(/\A-?\d+\z/)
307
+
308
+ value.to_i
309
+ when :float
310
+ # Only coerce if it's a valid float representation
311
+ begin
312
+ Float(value.to_s)
313
+ rescue StandardError
314
+ (value)
315
+ end
316
+ when :bool
317
+ return true if value.to_s.downcase == 'true'
318
+ return false if value.to_s.downcase == 'false'
319
+
320
+ !!value
321
+ when :list
322
+ Array(value)
323
+ when :hash
324
+ value.is_a?(Hash) ? value : { value: value }
325
+ else
326
+ value
327
+ end
328
+ rescue StandardError
329
+ value
330
+ end
137
331
  end
138
332
  end
139
333
  end
334
+
335
+ # Register in the main module namespace for convenience
336
+ module Desiru
337
+ ProgramOfThought = Modules::ProgramOfThought
338
+ end
@@ -22,7 +22,21 @@ module Desiru
22
22
 
23
23
  def evaluate(program, dataset)
24
24
  scores = dataset.map do |example|
25
- prediction = program.call(example.except(:answer, :output))
25
+ # Extract inputs (exclude answer/output fields)
26
+ inputs = {}
27
+ if example.respond_to?(:to_h)
28
+ example.to_h.each do |k, v|
29
+ inputs[k] = v unless %i[answer output].include?(k)
30
+ end
31
+ elsif example.is_a?(Hash)
32
+ example.each do |k, v|
33
+ inputs[k] = v unless %i[answer output].include?(k.to_sym)
34
+ end
35
+ else
36
+ inputs = example
37
+ end
38
+
39
+ prediction = program.call(inputs)
26
40
  score_prediction(prediction, example)
27
41
  end
28
42
 
@@ -55,6 +69,10 @@ module Desiru
55
69
  f1_score(prediction, ground_truth)
56
70
  when :accuracy
57
71
  accuracy_score(prediction, ground_truth)
72
+ when :confidence
73
+ confidence_score(prediction, ground_truth)
74
+ when :consistency
75
+ consistency_score(prediction, ground_truth)
58
76
  else
59
77
  raise OptimizerError, "Unknown metric: #{@metric}"
60
78
  end
@@ -86,6 +104,18 @@ module Desiru
86
104
  exact_match_score(prediction, ground_truth)
87
105
  end
88
106
 
107
+ def confidence_score(prediction, ground_truth)
108
+ # Simple confidence score based on exact match
109
+ # In a real implementation, this would use model confidence scores
110
+ (exact_match_score(prediction, ground_truth) * 0.9) + 0.1
111
+ end
112
+
113
+ def consistency_score(prediction, ground_truth)
114
+ # Simple consistency score based on exact match
115
+ # In a real implementation, this would track consistency across examples
116
+ (exact_match_score(prediction, ground_truth) * 0.8) + 0.2
117
+ end
118
+
89
119
  def extract_answer(data)
90
120
  case data
91
121
  when ModuleResult, ProgramResult, Hash