dspy 0.28.0 → 0.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,40 +11,78 @@ module DSPy
11
11
  class GroundedProposer
12
12
  extend T::Sig
13
13
 
14
- # Configuration for instruction proposal
14
+ # Python-compatible TIPS dictionary for instruction generation
15
+ TIPS = {
16
+ "none" => "",
17
+ "creative" => "Don't be afraid to be creative when creating the new instruction!",
18
+ "simple" => "Keep the instruction clear and concise.",
19
+ "description" => "Make sure your instruction is very informative and descriptive.",
20
+ "high_stakes" => "The instruction should include a high stakes scenario in which the LM must solve the task!",
21
+ "persona" => 'Include a persona that is relevant to the task in the instruction (ie. "You are a ...")'
22
+ }.freeze
23
+
24
+ # Configuration for instruction proposal (Python-compatible)
15
25
  class Config
16
26
  extend T::Sig
17
27
 
28
+ # Core parameters
18
29
  sig { returns(Integer) }
19
30
  attr_accessor :num_instruction_candidates
20
31
 
32
+ # Python-compatible awareness flags (match Python defaults exactly)
33
+ sig { returns(T::Boolean) }
34
+ attr_accessor :program_aware
35
+
36
+ sig { returns(T::Boolean) }
37
+ attr_accessor :use_dataset_summary
38
+
39
+ sig { returns(T::Boolean) }
40
+ attr_accessor :use_task_demos
41
+
42
+ sig { returns(T::Boolean) }
43
+ attr_accessor :use_tip
44
+
45
+ sig { returns(T::Boolean) }
46
+ attr_accessor :use_instruct_history
47
+
48
+ # Additional parameters
21
49
  sig { returns(Integer) }
22
- attr_accessor :max_examples_for_analysis
50
+ attr_accessor :view_data_batch_size
23
51
 
24
52
  sig { returns(Integer) }
25
- attr_accessor :max_instruction_length
53
+ attr_accessor :num_demos_in_context
26
54
 
27
55
  sig { returns(T::Boolean) }
28
- attr_accessor :use_task_description
56
+ attr_accessor :set_tip_randomly
29
57
 
30
58
  sig { returns(T::Boolean) }
31
- attr_accessor :use_input_output_analysis
59
+ attr_accessor :set_history_randomly
32
60
 
33
- sig { returns(T::Boolean) }
34
- attr_accessor :use_few_shot_examples
61
+ sig { returns(Float) }
62
+ attr_accessor :init_temperature
35
63
 
36
- sig { returns(String) }
37
- attr_accessor :proposal_model
64
+ sig { returns(T::Boolean) }
65
+ attr_accessor :verbose
38
66
 
39
67
  sig { void }
40
68
  def initialize
69
+ # Core parameters
41
70
  @num_instruction_candidates = 5
42
- @max_examples_for_analysis = 10
43
- @max_instruction_length = 200
44
- @use_task_description = true
45
- @use_input_output_analysis = true
46
- @use_few_shot_examples = true
47
- @proposal_model = "gpt-4o-mini"
71
+
72
+ # Python-compatible awareness flags (match Python defaults)
73
+ @program_aware = true
74
+ @use_dataset_summary = true
75
+ @use_task_demos = true
76
+ @use_tip = true
77
+ @use_instruct_history = true
78
+
79
+ # Additional parameters
80
+ @view_data_batch_size = 10
81
+ @num_demos_in_context = 3
82
+ @set_tip_randomly = true
83
+ @set_history_randomly = true
84
+ @init_temperature = 1.0
85
+ @verbose = false
48
86
  end
49
87
  end
50
88
 
@@ -88,11 +126,66 @@ module DSPy
88
126
  sig { returns(Config) }
89
127
  attr_reader :config
90
128
 
91
- sig { params(config: T.nilable(Config)).void }
92
- def initialize(config: nil)
129
+ sig do
130
+ params(
131
+ config: T.nilable(Config),
132
+ program: T.nilable(T.untyped),
133
+ trainset: T.nilable(T::Array[DSPy::Example])
134
+ ).void
135
+ end
136
+ def initialize(config: nil, program: nil, trainset: nil)
93
137
  @config = config || Config.new
138
+ @program = program
139
+ @trainset = trainset
140
+ @dataset_summary = nil
141
+ @program_code_string = nil
142
+
143
+ # Generate dataset summary if data-aware mode enabled (Python: use_dataset_summary)
144
+ if @config.use_dataset_summary && trainset && !trainset.empty?
145
+ begin
146
+ require_relative 'dataset_summary_generator'
147
+ @dataset_summary = DatasetSummaryGenerator.create_dataset_summary(
148
+ trainset,
149
+ @config.view_data_batch_size,
150
+ DSPy.current_lm,
151
+ verbose: @config.verbose
152
+ )
153
+ rescue => e
154
+ DSPy.logger.warn("Failed to generate dataset summary: #{e.message}")
155
+ @dataset_summary = nil
156
+ end
157
+ end
158
+
159
+ # Extract program source code if program-aware mode enabled
160
+ if @config.program_aware && program
161
+ @program_code_string = extract_program_source(program)
162
+ end
163
+ end
164
+
165
+ private
166
+
167
+ # Extract source code from program for program-aware mode
168
+ sig { params(program: T.untyped).returns(T.nilable(String)) }
169
+ def extract_program_source(program)
170
+ # Get the program's class
171
+ klass = program.is_a?(Class) ? program : program.class
172
+
173
+ # Try to get source location
174
+ source_location = klass.instance_method(:forward).source_location rescue nil
175
+ return nil unless source_location
176
+
177
+ file, line = source_location
178
+ # Read the source file and extract the class definition
179
+ # This is a simplified version - could be enhanced with method_source gem
180
+ code = "Program: #{klass.name}\nSource: #{file}:#{line}"
181
+ code
182
+ rescue => e
183
+ DSPy.logger.warn("Could not extract program source: #{e.message}")
184
+ nil
94
185
  end
95
186
 
187
+ public
188
+
96
189
  # Generate instruction candidates for a signature and training examples
97
190
  sig do
98
191
  params(
@@ -116,9 +209,10 @@ module DSPy
116
209
 
117
210
  # Generate instruction candidates
118
211
  candidates = generate_instruction_candidates(
119
- signature_class,
120
- analysis,
121
- current_instruction
212
+ signature_class,
213
+ analysis,
214
+ current_instruction,
215
+ few_shot_examples: few_shot_examples
122
216
  )
123
217
 
124
218
  # Filter and rank candidates
@@ -126,8 +220,8 @@ module DSPy
126
220
 
127
221
  metadata = {
128
222
  generation_timestamp: Time.now.iso8601,
129
- model_used: @config.proposal_model,
130
- num_examples_analyzed: [examples.size, @config.max_examples_for_analysis].min,
223
+ model_used: DSPy.current_lm.model,
224
+ num_examples_analyzed: [examples.size, @config.view_data_batch_size].min,
131
225
  original_instruction: current_instruction
132
226
  }
133
227
 
@@ -204,7 +298,7 @@ module DSPy
204
298
  # Analyze patterns in training examples
205
299
  sig { params(examples: T::Array[T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
206
300
  def analyze_example_patterns(examples)
207
- analysis_examples = examples.take(@config.max_examples_for_analysis)
301
+ analysis_examples = examples.take(@config.view_data_batch_size)
208
302
 
209
303
  {
210
304
  total_examples: examples.size,
@@ -323,12 +417,18 @@ module DSPy
323
417
  params(
324
418
  signature_class: T.class_of(DSPy::Signature),
325
419
  analysis: T::Hash[Symbol, T.untyped],
326
- current_instruction: T.nilable(String)
420
+ current_instruction: T.nilable(String),
421
+ few_shot_examples: T.nilable(T::Array[T.untyped])
327
422
  ).returns(T::Array[String])
328
423
  end
329
- def generate_instruction_candidates(signature_class, analysis, current_instruction)
424
+ def generate_instruction_candidates(signature_class, analysis, current_instruction, few_shot_examples: nil)
330
425
  # Build context for instruction generation
331
- context = build_generation_context(signature_class, analysis, current_instruction)
426
+ context = build_generation_context(
427
+ signature_class,
428
+ analysis,
429
+ current_instruction,
430
+ few_shot_examples: few_shot_examples
431
+ )
332
432
 
333
433
  # Create instruction generation signature
334
434
  instruction_signature = create_instruction_generation_signature
@@ -346,16 +446,7 @@ module DSPy
346
446
  )
347
447
 
348
448
  instruction = result.instruction.strip
349
-
350
- # Truncate if too long
351
- if instruction.length > @config.max_instruction_length
352
- instruction = instruction[0, @config.max_instruction_length].strip
353
- # Try to end at a word boundary
354
- if instruction.include?(' ')
355
- instruction = instruction.rpartition(' ').first + '.'
356
- end
357
- end
358
-
449
+
359
450
  candidates << instruction if instruction.length > 0
360
451
  rescue => error
361
452
  DSPy.logger.warn("Failed to generate instruction candidate #{i + 1}: #{error.message}")
@@ -375,32 +466,56 @@ module DSPy
375
466
  params(
376
467
  signature_class: T.class_of(DSPy::Signature),
377
468
  analysis: T::Hash[Symbol, T.untyped],
378
- current_instruction: T.nilable(String)
469
+ current_instruction: T.nilable(String),
470
+ few_shot_examples: T.nilable(T::Array[T.untyped])
379
471
  ).returns(String)
380
472
  end
381
- def build_generation_context(signature_class, analysis, current_instruction)
473
+ def build_generation_context(signature_class, analysis, current_instruction, few_shot_examples: nil)
382
474
  context_parts = []
383
-
384
- context_parts << "Task: #{signature_class.description}" if @config.use_task_description
385
-
386
- if @config.use_input_output_analysis
387
- # Build detailed field descriptions including enum values
388
- input_descriptions = analysis[:input_fields].map { |f| format_field_description(f) }
389
- output_descriptions = analysis[:output_fields].map { |f| format_field_description(f) }
390
-
391
- context_parts << "Input fields: #{input_descriptions.join(', ')}"
392
- context_parts << "Output fields: #{output_descriptions.join(', ')}"
475
+
476
+ # Include dataset summary if enabled and available
477
+ if @config.use_dataset_summary && @dataset_summary
478
+ context_parts << "Dataset Summary: #{@dataset_summary}"
393
479
  end
394
-
480
+
481
+ # Include program code if enabled and available
482
+ if @config.program_aware && @program_code_string
483
+ context_parts << "Program Code:\n#{@program_code_string}"
484
+ end
485
+
486
+ # Always include task description (fundamental to understanding the task)
487
+ context_parts << "Task: #{signature_class.description}"
488
+
489
+ # Always include field analysis (fundamental to understanding inputs/outputs)
490
+ input_descriptions = analysis[:input_fields].map { |f| format_field_description(f) }
491
+ output_descriptions = analysis[:output_fields].map { |f| format_field_description(f) }
492
+
493
+ context_parts << "Input fields: #{input_descriptions.join(', ')}"
494
+ context_parts << "Output fields: #{output_descriptions.join(', ')}"
495
+
496
+ # Include task demos if enabled and available
497
+ if @config.use_task_demos && few_shot_examples && !few_shot_examples.empty?
498
+ demo_strings = few_shot_examples.take(@config.num_demos_in_context).map do |example|
499
+ format_example_as_demo(example)
500
+ end
501
+ context_parts << "Task Demos:\n#{demo_strings.join("\n\n")}"
502
+ end
503
+
395
504
  if analysis[:common_themes] && analysis[:common_themes].any?
396
505
  context_parts << "Task themes: #{analysis[:common_themes].join(', ')}"
397
506
  end
398
-
507
+
399
508
  if current_instruction
400
509
  context_parts << "Current instruction: \"#{current_instruction}\""
401
510
  end
402
-
403
- context_parts.join("\n")
511
+
512
+ # Include tip if enabled
513
+ if @config.use_tip
514
+ tip = select_tip
515
+ context_parts << "Tip: #{tip}" if tip && !tip.empty?
516
+ end
517
+
518
+ context_parts.join("\n\n")
404
519
  end
405
520
 
406
521
  # Format field description with enum values if applicable
@@ -414,6 +529,42 @@ module DSPy
414
529
  end
415
530
  end
416
531
 
532
+ # Format an example as a demo for context
533
+ sig { params(example: T.untyped).returns(String) }
534
+ def format_example_as_demo(example)
535
+ return example.to_s unless example.respond_to?(:inputs) && example.respond_to?(:expected)
536
+
537
+ parts = []
538
+
539
+ # Format inputs
540
+ if example.inputs && !example.inputs.empty?
541
+ input_strs = example.inputs.map { |k, v| "#{k}: #{v.inspect}" }
542
+ parts << "Inputs: #{input_strs.join(', ')}"
543
+ end
544
+
545
+ # Format expected outputs
546
+ if example.expected && !example.expected.empty?
547
+ output_strs = example.expected.map { |k, v| "#{k}: #{v.inspect}" }
548
+ parts << "Expected: #{output_strs.join(', ')}"
549
+ end
550
+
551
+ parts.join(" | ")
552
+ end
553
+
554
+ # Select a tip based on configuration
555
+ sig { returns(T.nilable(String)) }
556
+ def select_tip
557
+ if @config.set_tip_randomly
558
+ # Randomly select a tip (excluding "none")
559
+ tip_keys = TIPS.keys.reject { |k| k == "none" }
560
+ selected_key = tip_keys.sample
561
+ TIPS[selected_key]
562
+ else
563
+ # Return empty string when not using random tips
564
+ ""
565
+ end
566
+ end
567
+
417
568
  # Build requirements text for instruction generation
418
569
  sig { params(analysis: T::Hash[Symbol, T.untyped]).returns(String) }
419
570
  def build_requirements_text(analysis)
@@ -478,25 +629,21 @@ module DSPy
478
629
  # Filter out duplicates and empty candidates
479
630
  filtered = candidates.uniq.reject(&:empty?)
480
631
 
481
- # Simple ranking based on length and content quality
632
+ # Simple ranking based on content quality (Python-compatible: no length scoring)
482
633
  filtered.sort_by do |instruction|
483
634
  score = 0
484
-
485
- # Prefer moderate length instructions
486
- length_score = [instruction.length, @config.max_instruction_length].min / @config.max_instruction_length.to_f
487
- score += length_score * 0.3
488
-
635
+
489
636
  # Prefer instructions with action words
490
637
  action_words = %w[analyze classify generate explain solve determine identify]
491
638
  action_score = action_words.count { |word| instruction.downcase.include?(word) }
492
639
  score += action_score * 0.4
493
-
640
+
494
641
  # Prefer instructions that mention reasoning for complex tasks
495
642
  if analysis[:complexity_indicators][:requires_reasoning]
496
643
  reasoning_score = instruction.downcase.match?(/\b(step|think|reason|explain)\b/) ? 1 : 0
497
644
  score += reasoning_score * 0.3
498
645
  end
499
-
646
+
500
647
  -score # Negative for descending sort
501
648
  end
502
649
  end
@@ -588,7 +735,7 @@ module DSPy
588
735
  'proposal.num_candidates' => result.num_candidates,
589
736
  'proposal.best_instruction_length' => result.best_instruction.length,
590
737
  'proposal.analysis_themes' => result.analysis[:common_themes] || [],
591
- 'proposal.model_used' => @config.proposal_model
738
+ 'proposal.model_used' => DSPy.current_lm.model
592
739
  })
593
740
  end
594
741
  end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative 'prompt'
5
+
6
+ module DSPy
7
+ # Optimized prompt for structured outputs that omits redundant schema information
8
+ # since the schema is already enforced by API parameters (response_format, generation_config, tools)
9
+ class StructuredOutputsPrompt < Prompt
10
+ extend T::Sig
11
+
12
+ # Render minimal system prompt without output schema or JSON formatting instructions
13
+ sig { returns(String) }
14
+ def render_system_prompt
15
+ sections = []
16
+
17
+ sections << "Your input schema fields are:"
18
+ sections << "```json"
19
+ sections << JSON.pretty_generate(@input_schema)
20
+ sections << "```"
21
+
22
+ # Add few-shot examples if present
23
+ if @few_shot_examples.any?
24
+ sections << ""
25
+ sections << "Here are some examples:"
26
+ sections << ""
27
+ @few_shot_examples.each_with_index do |example, index|
28
+ sections << "### Example #{index + 1}"
29
+ sections << example.to_prompt_section
30
+ sections << ""
31
+ end
32
+ end
33
+
34
+ sections << ""
35
+ sections << "Your objective is: #{@instruction}"
36
+
37
+ sections.join("\n")
38
+ end
39
+
40
+ # Render minimal user prompt without JSON formatting instructions
41
+ sig { params(input_values: T::Hash[Symbol, T.untyped]).returns(String) }
42
+ def render_user_prompt(input_values)
43
+ sections = []
44
+
45
+ sections << "## Input Values"
46
+ sections << "```json"
47
+ sections << JSON.pretty_generate(serialize_for_json(input_values))
48
+ sections << "```"
49
+
50
+ sections.join("\n")
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ module DSPy
6
+ module Teleprompt
7
+ # Bootstrap strategy enum for create_n_fewshot_demo_sets
8
+ # Provides type-safe alternatives to Python's magic number seeds
9
+ class BootstrapStrategy < T::Enum
10
+ enums do
11
+ # No demonstrations - zero-shot learning (Python seed = -3)
12
+ ZeroShot = new
13
+
14
+ # Labeled examples only - no bootstrap generation (Python seed = -2)
15
+ LabeledOnly = new
16
+
17
+ # Bootstrapped demonstrations without shuffling (Python seed = -1)
18
+ Unshuffled = new
19
+
20
+ # Bootstrapped demonstrations with shuffling and random size (Python seed >= 0)
21
+ # Requires separate seed parameter for reproducibility
22
+ Shuffled = new
23
+ end
24
+ end
25
+ end
26
+ end