dspy 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ module DSPy
6
+ module Memory
7
+ # Abstract base class for memory storage backends
8
+ class MemoryStore
9
+ extend T::Sig
10
+ extend T::Helpers
11
+ abstract!
12
+
13
+ # Store a memory record
14
+ sig { abstract.params(record: MemoryRecord).returns(T::Boolean) }
15
+ def store(record); end
16
+
17
+ # Retrieve a memory record by ID
18
+ sig { abstract.params(id: String).returns(T.nilable(MemoryRecord)) }
19
+ def retrieve(id); end
20
+
21
+ # Update an existing memory record
22
+ sig { abstract.params(record: MemoryRecord).returns(T::Boolean) }
23
+ def update(record); end
24
+
25
+ # Delete a memory record by ID
26
+ sig { abstract.params(id: String).returns(T::Boolean) }
27
+ def delete(id); end
28
+
29
+ # List all memory records for a user
30
+ sig { abstract.params(user_id: T.nilable(String), limit: T.nilable(Integer), offset: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
31
+ def list(user_id: nil, limit: nil, offset: nil); end
32
+
33
+ # Search memories by content (basic text search)
34
+ sig { abstract.params(query: String, user_id: T.nilable(String), limit: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
35
+ def search(query, user_id: nil, limit: nil); end
36
+
37
+ # Search memories by tags
38
+ sig { abstract.params(tags: T::Array[String], user_id: T.nilable(String), limit: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
39
+ def search_by_tags(tags, user_id: nil, limit: nil); end
40
+
41
+ # Vector similarity search (if supported by backend)
42
+ sig { abstract.params(embedding: T::Array[Float], user_id: T.nilable(String), limit: T.nilable(Integer), threshold: T.nilable(Float)).returns(T::Array[MemoryRecord]) }
43
+ def vector_search(embedding, user_id: nil, limit: nil, threshold: nil); end
44
+
45
+ # Count total memories
46
+ sig { abstract.params(user_id: T.nilable(String)).returns(Integer) }
47
+ def count(user_id: nil); end
48
+
49
+ # Clear all memories for a user (or all if user_id is nil)
50
+ sig { abstract.params(user_id: T.nilable(String)).returns(Integer) }
51
+ def clear(user_id: nil); end
52
+
53
+ # Check if the store supports vector search
54
+ sig { returns(T::Boolean) }
55
+ def supports_vector_search?
56
+ false
57
+ end
58
+
59
+ # Get store statistics
60
+ sig { returns(T::Hash[Symbol, T.untyped]) }
61
+ def stats
62
+ {
63
+ total_memories: count,
64
+ supports_vector_search: supports_vector_search?
65
+ }
66
+ end
67
+
68
+ # Batch operations
69
+ sig { params(records: T::Array[MemoryRecord]).returns(T::Array[T::Boolean]) }
70
+ def store_batch(records)
71
+ records.map { |record| store(record) }
72
+ end
73
+
74
+ sig { params(ids: T::Array[String]).returns(T::Array[T.nilable(MemoryRecord)]) }
75
+ def retrieve_batch(ids)
76
+ ids.map { |id| retrieve(id) }
77
+ end
78
+
79
+ sig { params(records: T::Array[MemoryRecord]).returns(T::Array[T::Boolean]) }
80
+ def update_batch(records)
81
+ records.map { |record| update(record) }
82
+ end
83
+
84
+ sig { params(ids: T::Array[String]).returns(T::Array[T::Boolean]) }
85
+ def delete_batch(ids)
86
+ ids.map { |id| delete(id) }
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'memory/memory_record'
4
+ require_relative 'memory/memory_store'
5
+ require_relative 'memory/in_memory_store'
6
+ require_relative 'memory/embedding_engine'
7
+ require_relative 'memory/local_embedding_engine'
8
+ require_relative 'memory/memory_compactor'
9
+ require_relative 'memory/memory_manager'
10
+
11
+ module DSPy
12
+ # Memory system for persistent, searchable agent memory
13
+ module Memory
14
+ class << self
15
+ extend T::Sig
16
+
17
+ # Configure the memory system
18
+ sig { returns(MemoryManager) }
19
+ def manager
20
+ @manager ||= MemoryManager.new
21
+ end
22
+
23
+ # Reset the memory system (useful for testing)
24
+ sig { void }
25
+ def reset!
26
+ @manager = nil
27
+ end
28
+ end
29
+ end
30
+ end
@@ -65,7 +65,7 @@ module DSPy
65
65
  # Smart consolidation: skip nested events when higher-level events are being emitted
66
66
  if is_nested_context?
67
67
  # If we're in a nested context, only emit higher-level events
68
- event_name.match?(/^dspy\.(chain_of_thought|react)$/)
68
+ event_name.match?(/^dspy\.(chain_of_thought|react|codeact)$/)
69
69
  else
70
70
  # If we're not in a nested context, emit all events normally
71
71
  true
@@ -103,11 +103,9 @@ module DSPy
103
103
  return false if caller_locations.nil?
104
104
 
105
105
  # Look for higher-level DSPy modules in the call stack
106
- # We consider ChainOfThought and ReAct as higher-level modules
106
+ # We consider ChainOfThought, ReAct, and CodeAct as higher-level modules
107
107
  higher_level_modules = caller_locations.select do |loc|
108
- loc.path.include?('chain_of_thought') ||
109
- loc.path.include?('re_act') ||
110
- loc.path.include?('react')
108
+ loc.path.match?(/(?:chain_of_thought|re_act|react|code_act)/)
111
109
  end
112
110
 
113
111
  # If we have higher-level modules in the call stack, we're in a nested context
@@ -25,6 +25,9 @@ module DSPy
25
25
  sig { params(value: T.untyped, prop_type: T.untyped).returns(T.untyped) }
26
26
  def coerce_value_to_type(value, prop_type)
27
27
  return value unless prop_type
28
+
29
+ # If value is nil, return it as-is for nilable types
30
+ return value if value.nil?
28
31
 
29
32
  case prop_type
30
33
  when ->(type) { enum_type?(type) }
data/lib/dspy/prompt.rb CHANGED
@@ -121,7 +121,7 @@ module DSPy
121
121
 
122
122
  sections << "## Input Values"
123
123
  sections << "```json"
124
- sections << JSON.pretty_generate(input_values)
124
+ sections << JSON.pretty_generate(serialize_for_json(input_values))
125
125
  sections << "```"
126
126
 
127
127
  sections << ""
@@ -218,5 +218,52 @@ module DSPy
218
218
  output_fields: @output_schema.dig(:properties)&.keys&.length || 0
219
219
  }
220
220
  end
221
+
222
+ private
223
+
224
+ # Recursively serialize complex objects for JSON representation
225
+ sig { params(obj: T.untyped).returns(T.untyped) }
226
+ def serialize_for_json(obj)
227
+ case obj
228
+ when T::Struct
229
+ # Convert T::Struct to hash using to_h method if available
230
+ if obj.respond_to?(:to_h)
231
+ serialize_for_json(obj.to_h)
232
+ else
233
+ # Fallback: serialize using struct properties
234
+ serialize_struct_to_hash(obj)
235
+ end
236
+ when Hash
237
+ # Recursively serialize hash values
238
+ obj.transform_values { |v| serialize_for_json(v) }
239
+ when Array
240
+ # Recursively serialize array elements
241
+ obj.map { |item| serialize_for_json(item) }
242
+ when T::Enum
243
+ # Serialize enums to their string representation
244
+ obj.serialize
245
+ else
246
+ # For basic types (String, Integer, Float, Boolean, etc.), return as-is
247
+ obj
248
+ end
249
+ end
250
+
251
+ # Fallback method to serialize T::Struct to hash when to_h is not available
252
+ sig { params(struct_obj: T::Struct).returns(T::Hash[Symbol, T.untyped]) }
253
+ def serialize_struct_to_hash(struct_obj)
254
+ result = {}
255
+
256
+ # Use struct's props method to get all properties
257
+ if struct_obj.class.respond_to?(:props)
258
+ struct_obj.class.props.each do |prop_name, _prop_info|
259
+ if struct_obj.respond_to?(prop_name)
260
+ value = struct_obj.public_send(prop_name)
261
+ result[prop_name] = serialize_for_json(value)
262
+ end
263
+ end
264
+ end
265
+
266
+ result
267
+ end
221
268
  end
222
269
  end
@@ -52,6 +52,18 @@ module DSPy
52
52
  log_react_tool_call(event)
53
53
  end
54
54
 
55
+ DSPy::Instrumentation.subscribe('dspy.codeact') do |event|
56
+ log_codeact(event)
57
+ end
58
+
59
+ DSPy::Instrumentation.subscribe('dspy.codeact.iteration_complete') do |event|
60
+ log_codeact_iteration_complete(event)
61
+ end
62
+
63
+ DSPy::Instrumentation.subscribe('dspy.codeact.code_execution') do |event|
64
+ log_codeact_code_execution(event)
65
+ end
66
+
55
67
  # Subscribe to optimization events
56
68
  DSPy::Instrumentation.subscribe('dspy.optimization.start') do |event|
57
69
  log_optimization_start(event)
@@ -236,7 +248,7 @@ module DSPy
236
248
  "status=#{status}",
237
249
  "duration_ms=#{duration}"
238
250
  ]
239
- log_parts << "thought=\"#{thought&.truncate(100)}\"" if thought
251
+ log_parts << "thought=\"#{thought && thought.length > 100 ? thought[0..97] + '...' : thought}\"" if thought
240
252
  log_parts << "action=\"#{action}\"" if action
241
253
  log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
242
254
 
@@ -263,6 +275,84 @@ module DSPy
263
275
  logger.info(log_parts.join(' '))
264
276
  end
265
277
 
278
+ sig { params(event: T.untyped).void }
279
+ def log_codeact(event)
280
+ payload = event.payload
281
+ signature = payload[:signature_class]
282
+ duration = payload[:duration_ms]&.round(2)
283
+ status = payload[:status]
284
+ iteration_count = payload[:iteration_count]
285
+ code_executions = payload[:code_executions]
286
+ final_answer = payload[:final_answer]
287
+ timestamp = format_timestamp(payload)
288
+
289
+ log_parts = [
290
+ "event=codeact",
291
+ timestamp,
292
+ "signature=#{signature}",
293
+ "status=#{status}",
294
+ "duration_ms=#{duration}"
295
+ ].compact
296
+ log_parts << "iterations=#{iteration_count}" if iteration_count
297
+ log_parts << "code_executions=#{code_executions}" if code_executions
298
+ log_parts << "final_answer=\"#{final_answer&.truncate(100)}\"" if final_answer
299
+ log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
300
+
301
+ logger.info(log_parts.join(' '))
302
+ end
303
+
304
+ sig { params(event: T.untyped).void }
305
+ def log_codeact_iteration_complete(event)
306
+ payload = event.payload
307
+ iteration = payload[:iteration]
308
+ thought = payload[:thought]
309
+ ruby_code = payload[:ruby_code]
310
+ observation = payload[:observation]
311
+ duration = payload[:duration_ms]&.round(2)
312
+ status = payload[:status]
313
+ timestamp = format_timestamp(payload)
314
+
315
+ log_parts = [
316
+ "event=codeact_iteration",
317
+ timestamp,
318
+ "iteration=#{iteration}",
319
+ "status=#{status}",
320
+ "duration_ms=#{duration}"
321
+ ].compact
322
+ log_parts << "thought=\"#{thought && thought.length > 100 ? thought[0..97] + '...' : thought}\"" if thought
323
+ log_parts << "code=\"#{ruby_code && ruby_code.length > 100 ? ruby_code[0..97] + '...' : ruby_code}\"" if ruby_code
324
+ log_parts << "observation=\"#{observation && observation.length > 100 ? observation[0..97] + '...' : observation}\"" if observation
325
+ log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
326
+
327
+ logger.info(log_parts.join(' '))
328
+ end
329
+
330
+ sig { params(event: T.untyped).void }
331
+ def log_codeact_code_execution(event)
332
+ payload = event.payload
333
+ iteration = payload[:iteration]
334
+ ruby_code = payload[:ruby_code]
335
+ execution_result = payload[:execution_result]
336
+ execution_error = payload[:execution_error]
337
+ duration = payload[:duration_ms]&.round(2)
338
+ status = payload[:status]
339
+ timestamp = format_timestamp(payload)
340
+
341
+ log_parts = [
342
+ "event=code_execution",
343
+ timestamp,
344
+ "iteration=#{iteration}",
345
+ "status=#{status}",
346
+ "duration_ms=#{duration}"
347
+ ].compact
348
+ log_parts << "code=\"#{ruby_code && ruby_code.length > 50 ? ruby_code[0..47] + '...' : ruby_code}\"" if ruby_code
349
+ log_parts << "result=\"#{execution_result && execution_result.length > 100 ? execution_result[0..97] + '...' : execution_result}\"" if execution_result
350
+ log_parts << "execution_error=\"#{execution_error}\"" if execution_error
351
+ log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
352
+
353
+ logger.info(log_parts.join(' '))
354
+ end
355
+
266
356
  # Optimization event logging methods
267
357
  sig { params(event: T.untyped).void }
268
358
  def log_optimization_start(event)
@@ -195,7 +195,7 @@ module DSPy
195
195
 
196
196
  # Subclasses must implement their own call method with their own signature
197
197
 
198
- private
198
+ protected
199
199
 
200
200
  # Convert argument to the expected type based on JSON schema
201
201
  sig { params(value: T.untyped, schema: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative 'toolset'
5
+
6
+ module DSPy
7
+ module Tools
8
+ # Example implementation of a memory toolset for agents
9
+ # Provides tools for storing, retrieving, and managing memory
10
+ class MemoryToolset < Toolset
11
+ extend T::Sig
12
+
13
+ toolset_name "memory"
14
+
15
+ # Expose methods as tools with descriptions
16
+ tool :store, description: "Store a key-value pair in memory with optional tags"
17
+ tool :retrieve, description: "Retrieve a value by key from memory"
18
+ tool :search, description: "Search memories by pattern in keys and/or values"
19
+ tool :list_keys, tool_name: "memory_list", description: "List all stored memory keys"
20
+ tool :update, description: "Update an existing memory value"
21
+ tool :delete, description: "Delete a memory by key"
22
+ tool :clear, description: "Clear all stored memories"
23
+ tool :count, description: "Get the count of stored memories"
24
+ tool :get_metadata, description: "Get metadata for a specific memory"
25
+
26
+ sig { void }
27
+ def initialize
28
+ @memory = T.let({}, T::Hash[String, T::Hash[Symbol, T.untyped]])
29
+ end
30
+
31
+ sig { params(key: String, value: String, tags: T.nilable(T::Array[String])).returns(String) }
32
+ def store(key:, value:, tags: nil)
33
+ @memory[key] = {
34
+ value: value,
35
+ tags: tags || [],
36
+ created_at: Time.now,
37
+ updated_at: Time.now,
38
+ access_count: 0
39
+ }
40
+ "Stored memory '#{key}' successfully"
41
+ end
42
+
43
+ sig { params(key: String).returns(T.nilable(String)) }
44
+ def retrieve(key:)
45
+ entry = @memory[key]
46
+ return nil unless entry
47
+
48
+ # Track access
49
+ entry[:access_count] += 1
50
+ entry[:last_accessed_at] = Time.now
51
+ entry[:value]
52
+ end
53
+
54
+ sig { params(pattern: String, in_keys: T::Boolean, in_values: T::Boolean).returns(T::Array[T::Hash[Symbol, String]]) }
55
+ def search(pattern:, in_keys: true, in_values: true)
56
+ results = []
57
+ regex = Regexp.new(pattern, Regexp::IGNORECASE)
58
+
59
+ @memory.each do |key, entry|
60
+ match = (in_keys && key.match?(regex)) || (in_values && entry[:value].match?(regex))
61
+ results << { key: key, value: entry[:value] } if match
62
+ end
63
+
64
+ results
65
+ end
66
+
67
+ sig { returns(T::Array[String]) }
68
+ def list_keys
69
+ @memory.keys.sort
70
+ end
71
+
72
+ sig { params(key: String, value: String).returns(String) }
73
+ def update(key:, value:)
74
+ return "Memory '#{key}' not found" unless @memory.key?(key)
75
+
76
+ @memory[key][:value] = value
77
+ @memory[key][:updated_at] = Time.now
78
+ "Updated memory '#{key}' successfully"
79
+ end
80
+
81
+ sig { params(key: String).returns(String) }
82
+ def delete(key:)
83
+ return "Memory '#{key}' not found" unless @memory.key?(key)
84
+
85
+ @memory.delete(key)
86
+ "Deleted memory '#{key}' successfully"
87
+ end
88
+
89
+ sig { returns(String) }
90
+ def clear
91
+ count = @memory.size
92
+ @memory.clear
93
+ "Cleared #{count} memories"
94
+ end
95
+
96
+ sig { returns(Integer) }
97
+ def count
98
+ @memory.size
99
+ end
100
+
101
+ sig { params(key: String).returns(T.nilable(T::Hash[Symbol, T.untyped])) }
102
+ def get_metadata(key:)
103
+ entry = @memory[key]
104
+ return nil unless entry
105
+
106
+ {
107
+ created_at: entry[:created_at],
108
+ updated_at: entry[:updated_at],
109
+ access_count: entry[:access_count],
110
+ last_accessed_at: entry[:last_accessed_at],
111
+ tags: entry[:tags],
112
+ value_length: entry[:value].length
113
+ }
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require 'tempfile'
5
+ require 'set'
6
+ require_relative 'toolset'
7
+
8
+ module DSPy
9
+ module Tools
10
+ # Text processing toolset that provides text analysis and manipulation tools
11
+ # Includes grep, word count, ripgrep, and other text processing utilities
12
+ class TextProcessingToolset < Toolset
13
+ extend T::Sig
14
+
15
+ toolset_name "text"
16
+
17
+ # Expose methods as tools with descriptions
18
+ tool :grep, description: "Search for patterns in text using grep"
19
+ tool :word_count, tool_name: "text_wc", description: "Count lines, words, and characters in text"
20
+ tool :ripgrep, tool_name: "text_rg", description: "Fast text search using ripgrep"
21
+ tool :extract_lines, description: "Extract specific line ranges from text"
22
+ tool :filter_lines, description: "Filter lines matching or not matching a pattern"
23
+ tool :unique_lines, description: "Get unique lines from text"
24
+ tool :sort_lines, description: "Sort lines in text"
25
+ tool :summarize_text, description: "Generate statistical summary of text content"
26
+
27
+ sig { void }
28
+ def initialize
29
+ # No persistent state needed for text processing
30
+ end
31
+
32
+ sig { params(text: String, pattern: String, ignore_case: T::Boolean, count_only: T::Boolean).returns(String) }
33
+ def grep(text:, pattern:, ignore_case: true, count_only: false)
34
+ # Create temporary file to use with grep
35
+ temp_file = Tempfile.new('text_processing')
36
+ temp_file.write(text)
37
+ temp_file.close
38
+
39
+ flags = []
40
+ flags << '-i' if ignore_case
41
+ flags << '-c' if count_only
42
+
43
+ cmd = "grep #{flags.join(' ')} '#{pattern}' '#{temp_file.path}'"
44
+ result = `#{cmd} 2>/dev/null`
45
+
46
+ temp_file.unlink
47
+
48
+ if count_only
49
+ "Found #{result.strip} matches for pattern '#{pattern}'"
50
+ elsif result.empty?
51
+ "No matches found for pattern '#{pattern}'"
52
+ else
53
+ result
54
+ end
55
+ rescue => e
56
+ "Error running grep: #{e.message}"
57
+ end
58
+
59
+ sig { params(text: String, lines_only: T::Boolean, words_only: T::Boolean, chars_only: T::Boolean).returns(String) }
60
+ def word_count(text:, lines_only: false, words_only: false, chars_only: false)
61
+ lines = text.lines.count
62
+ words = text.split(/\s+/).reject(&:empty?).count
63
+ chars = text.length
64
+
65
+ if lines_only
66
+ "Lines: #{lines}"
67
+ elsif words_only
68
+ "Words: #{words}"
69
+ elsif chars_only
70
+ "Characters: #{chars}"
71
+ else
72
+ "Lines: #{lines}, Words: #{words}, Characters: #{chars}"
73
+ end
74
+ end
75
+
76
+ sig { params(text: String, pattern: String, context: Integer).returns(String) }
77
+ def ripgrep(text:, pattern:, context: 0)
78
+ temp_file = Tempfile.new('text_processing')
79
+ temp_file.write(text)
80
+ temp_file.close
81
+
82
+ cmd = "rg"
83
+ cmd += " -C #{context}" if context > 0
84
+ cmd += " '#{pattern}' '#{temp_file.path}'"
85
+
86
+ result = `#{cmd} 2>/dev/null`
87
+
88
+ temp_file.unlink
89
+
90
+ if result.empty?
91
+ "No matches found for pattern '#{pattern}'"
92
+ else
93
+ result
94
+ end
95
+ rescue => e
96
+ "Error running ripgrep: #{e.message}"
97
+ end
98
+
99
+ sig { params(text: String, start_line: Integer, end_line: T.nilable(Integer)).returns(String) }
100
+ def extract_lines(text:, start_line:, end_line: nil)
101
+ lines = text.lines
102
+ start_idx = [start_line - 1, 0].max # Convert to 0-based, ensure >= 0
103
+
104
+ if end_line
105
+ end_idx = [end_line - 1, lines.length - 1].min # Convert to 0-based, ensure <= last line
106
+ extracted = lines[start_idx..end_idx]
107
+ else
108
+ extracted = lines[start_idx, 1] # Just one line
109
+ end
110
+
111
+ extracted&.join || ""
112
+ end
113
+
114
+ sig { params(text: String, pattern: String, invert: T::Boolean).returns(String) }
115
+ def filter_lines(text:, pattern:, invert: false)
116
+ lines = text.lines
117
+ regex = Regexp.new(pattern, Regexp::IGNORECASE)
118
+
119
+ filtered = if invert
120
+ lines.reject { |line| line.match?(regex) }
121
+ else
122
+ lines.select { |line| line.match?(regex) }
123
+ end
124
+
125
+ filtered.join
126
+ end
127
+
128
+ sig { params(text: String, preserve_order: T::Boolean).returns(String) }
129
+ def unique_lines(text:, preserve_order: true)
130
+ lines = text.lines.map(&:chomp)
131
+
132
+ unique = if preserve_order
133
+ lines.uniq
134
+ else
135
+ lines.to_set.to_a.sort
136
+ end
137
+
138
+ unique.map { |line| "#{line}\n" }.join
139
+ end
140
+
141
+ sig { params(text: String, reverse: T::Boolean, numeric: T::Boolean).returns(String) }
142
+ def sort_lines(text:, reverse: false, numeric: false)
143
+ lines = text.lines.map(&:chomp)
144
+
145
+ sorted = if numeric
146
+ lines.sort_by { |line| line.to_f }
147
+ else
148
+ lines.sort
149
+ end
150
+
151
+ sorted.reverse! if reverse
152
+ sorted.map { |line| "#{line}\n" }.join
153
+ end
154
+
155
+ sig { params(text: String).returns(String) }
156
+ def summarize_text(text:)
157
+ lines = text.lines
158
+ words = text.split(/\s+/).reject(&:empty?)
159
+ chars = text.length
160
+
161
+ # Find most common words (simple analysis)
162
+ word_freq = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase.gsub(/[^\w]/, '')] += 1 }
163
+ top_words = word_freq.reject { |word, _| word.length < 3 }.sort_by { |_, count| -count }.first(5)
164
+
165
+ # Basic text statistics
166
+ avg_line_length = lines.empty? ? 0 : (chars.to_f / lines.count).round(2)
167
+ avg_word_length = words.empty? ? 0 : (words.sum(&:length).to_f / words.count).round(2)
168
+
169
+ summary = []
170
+ summary << "Text Summary:"
171
+ summary << " Lines: #{lines.count}"
172
+ summary << " Words: #{words.count}"
173
+ summary << " Characters: #{chars}"
174
+ summary << " Average line length: #{avg_line_length}"
175
+ summary << " Average word length: #{avg_word_length}"
176
+
177
+ unless top_words.empty?
178
+ summary << " Most frequent words:"
179
+ top_words.each { |word, count| summary << " #{word}: #{count}" }
180
+ end
181
+
182
+ summary.join("\n")
183
+ end
184
+ end
185
+ end
186
+ end