dspy 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/dspy/code_act.rb +463 -0
- data/lib/dspy/instrumentation.rb +15 -0
- data/lib/dspy/lm/adapters/anthropic_adapter.rb +106 -0
- data/lib/dspy/lm.rb +21 -7
- data/lib/dspy/memory/embedding_engine.rb +68 -0
- data/lib/dspy/memory/in_memory_store.rb +216 -0
- data/lib/dspy/memory/local_embedding_engine.rb +241 -0
- data/lib/dspy/memory/memory_compactor.rb +299 -0
- data/lib/dspy/memory/memory_manager.rb +248 -0
- data/lib/dspy/memory/memory_record.rb +163 -0
- data/lib/dspy/memory/memory_store.rb +90 -0
- data/lib/dspy/memory.rb +30 -0
- data/lib/dspy/mixins/instrumentation_helpers.rb +3 -5
- data/lib/dspy/mixins/type_coercion.rb +3 -0
- data/lib/dspy/prompt.rb +48 -1
- data/lib/dspy/subscribers/logger_subscriber.rb +91 -1
- data/lib/dspy/tools/base.rb +1 -1
- data/lib/dspy/tools/memory_toolset.rb +117 -0
- data/lib/dspy/tools/text_processing_toolset.rb +186 -0
- data/lib/dspy/tools/toolset.rb +223 -0
- data/lib/dspy/tools.rb +1 -0
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +2 -0
- metadata +28 -2
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
|
5
|
+
module DSPy
|
6
|
+
module Memory
|
7
|
+
# Abstract base class for memory storage backends
|
8
|
+
class MemoryStore
|
9
|
+
extend T::Sig
|
10
|
+
extend T::Helpers
|
11
|
+
abstract!
|
12
|
+
|
13
|
+
# Store a memory record
|
14
|
+
sig { abstract.params(record: MemoryRecord).returns(T::Boolean) }
|
15
|
+
def store(record); end
|
16
|
+
|
17
|
+
# Retrieve a memory record by ID
|
18
|
+
sig { abstract.params(id: String).returns(T.nilable(MemoryRecord)) }
|
19
|
+
def retrieve(id); end
|
20
|
+
|
21
|
+
# Update an existing memory record
|
22
|
+
sig { abstract.params(record: MemoryRecord).returns(T::Boolean) }
|
23
|
+
def update(record); end
|
24
|
+
|
25
|
+
# Delete a memory record by ID
|
26
|
+
sig { abstract.params(id: String).returns(T::Boolean) }
|
27
|
+
def delete(id); end
|
28
|
+
|
29
|
+
# List all memory records for a user
|
30
|
+
sig { abstract.params(user_id: T.nilable(String), limit: T.nilable(Integer), offset: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
|
31
|
+
def list(user_id: nil, limit: nil, offset: nil); end
|
32
|
+
|
33
|
+
# Search memories by content (basic text search)
|
34
|
+
sig { abstract.params(query: String, user_id: T.nilable(String), limit: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
|
35
|
+
def search(query, user_id: nil, limit: nil); end
|
36
|
+
|
37
|
+
# Search memories by tags
|
38
|
+
sig { abstract.params(tags: T::Array[String], user_id: T.nilable(String), limit: T.nilable(Integer)).returns(T::Array[MemoryRecord]) }
|
39
|
+
def search_by_tags(tags, user_id: nil, limit: nil); end
|
40
|
+
|
41
|
+
# Vector similarity search (if supported by backend)
|
42
|
+
sig { abstract.params(embedding: T::Array[Float], user_id: T.nilable(String), limit: T.nilable(Integer), threshold: T.nilable(Float)).returns(T::Array[MemoryRecord]) }
|
43
|
+
def vector_search(embedding, user_id: nil, limit: nil, threshold: nil); end
|
44
|
+
|
45
|
+
# Count total memories
|
46
|
+
sig { abstract.params(user_id: T.nilable(String)).returns(Integer) }
|
47
|
+
def count(user_id: nil); end
|
48
|
+
|
49
|
+
# Clear all memories for a user (or all if user_id is nil)
|
50
|
+
sig { abstract.params(user_id: T.nilable(String)).returns(Integer) }
|
51
|
+
def clear(user_id: nil); end
|
52
|
+
|
53
|
+
# Check if the store supports vector search
|
54
|
+
sig { returns(T::Boolean) }
|
55
|
+
def supports_vector_search?
|
56
|
+
false
|
57
|
+
end
|
58
|
+
|
59
|
+
# Get store statistics
|
60
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
61
|
+
def stats
|
62
|
+
{
|
63
|
+
total_memories: count,
|
64
|
+
supports_vector_search: supports_vector_search?
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
# Batch operations
|
69
|
+
sig { params(records: T::Array[MemoryRecord]).returns(T::Array[T::Boolean]) }
|
70
|
+
def store_batch(records)
|
71
|
+
records.map { |record| store(record) }
|
72
|
+
end
|
73
|
+
|
74
|
+
sig { params(ids: T::Array[String]).returns(T::Array[T.nilable(MemoryRecord)]) }
|
75
|
+
def retrieve_batch(ids)
|
76
|
+
ids.map { |id| retrieve(id) }
|
77
|
+
end
|
78
|
+
|
79
|
+
sig { params(records: T::Array[MemoryRecord]).returns(T::Array[T::Boolean]) }
|
80
|
+
def update_batch(records)
|
81
|
+
records.map { |record| update(record) }
|
82
|
+
end
|
83
|
+
|
84
|
+
sig { params(ids: T::Array[String]).returns(T::Array[T::Boolean]) }
|
85
|
+
def delete_batch(ids)
|
86
|
+
ids.map { |id| delete(id) }
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/dspy/memory.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'memory/memory_record'
|
4
|
+
require_relative 'memory/memory_store'
|
5
|
+
require_relative 'memory/in_memory_store'
|
6
|
+
require_relative 'memory/embedding_engine'
|
7
|
+
require_relative 'memory/local_embedding_engine'
|
8
|
+
require_relative 'memory/memory_compactor'
|
9
|
+
require_relative 'memory/memory_manager'
|
10
|
+
|
11
|
+
module DSPy
|
12
|
+
# Memory system for persistent, searchable agent memory
|
13
|
+
module Memory
|
14
|
+
class << self
|
15
|
+
extend T::Sig
|
16
|
+
|
17
|
+
# Configure the memory system
|
18
|
+
sig { returns(MemoryManager) }
|
19
|
+
def manager
|
20
|
+
@manager ||= MemoryManager.new
|
21
|
+
end
|
22
|
+
|
23
|
+
# Reset the memory system (useful for testing)
|
24
|
+
sig { void }
|
25
|
+
def reset!
|
26
|
+
@manager = nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -65,7 +65,7 @@ module DSPy
|
|
65
65
|
# Smart consolidation: skip nested events when higher-level events are being emitted
|
66
66
|
if is_nested_context?
|
67
67
|
# If we're in a nested context, only emit higher-level events
|
68
|
-
event_name.match?(/^dspy\.(chain_of_thought|react)$/)
|
68
|
+
event_name.match?(/^dspy\.(chain_of_thought|react|codeact)$/)
|
69
69
|
else
|
70
70
|
# If we're not in a nested context, emit all events normally
|
71
71
|
true
|
@@ -103,11 +103,9 @@ module DSPy
|
|
103
103
|
return false if caller_locations.nil?
|
104
104
|
|
105
105
|
# Look for higher-level DSPy modules in the call stack
|
106
|
-
# We consider ChainOfThought and
|
106
|
+
# We consider ChainOfThought, ReAct, and CodeAct as higher-level modules
|
107
107
|
higher_level_modules = caller_locations.select do |loc|
|
108
|
-
loc.path.
|
109
|
-
loc.path.include?('re_act') ||
|
110
|
-
loc.path.include?('react')
|
108
|
+
loc.path.match?(/(?:chain_of_thought|re_act|react|code_act)/)
|
111
109
|
end
|
112
110
|
|
113
111
|
# If we have higher-level modules in the call stack, we're in a nested context
|
@@ -25,6 +25,9 @@ module DSPy
|
|
25
25
|
sig { params(value: T.untyped, prop_type: T.untyped).returns(T.untyped) }
|
26
26
|
def coerce_value_to_type(value, prop_type)
|
27
27
|
return value unless prop_type
|
28
|
+
|
29
|
+
# If value is nil, return it as-is for nilable types
|
30
|
+
return value if value.nil?
|
28
31
|
|
29
32
|
case prop_type
|
30
33
|
when ->(type) { enum_type?(type) }
|
data/lib/dspy/prompt.rb
CHANGED
@@ -121,7 +121,7 @@ module DSPy
|
|
121
121
|
|
122
122
|
sections << "## Input Values"
|
123
123
|
sections << "```json"
|
124
|
-
sections << JSON.pretty_generate(input_values)
|
124
|
+
sections << JSON.pretty_generate(serialize_for_json(input_values))
|
125
125
|
sections << "```"
|
126
126
|
|
127
127
|
sections << ""
|
@@ -218,5 +218,52 @@ module DSPy
|
|
218
218
|
output_fields: @output_schema.dig(:properties)&.keys&.length || 0
|
219
219
|
}
|
220
220
|
end
|
221
|
+
|
222
|
+
private
|
223
|
+
|
224
|
+
# Recursively serialize complex objects for JSON representation
|
225
|
+
sig { params(obj: T.untyped).returns(T.untyped) }
|
226
|
+
def serialize_for_json(obj)
|
227
|
+
case obj
|
228
|
+
when T::Struct
|
229
|
+
# Convert T::Struct to hash using to_h method if available
|
230
|
+
if obj.respond_to?(:to_h)
|
231
|
+
serialize_for_json(obj.to_h)
|
232
|
+
else
|
233
|
+
# Fallback: serialize using struct properties
|
234
|
+
serialize_struct_to_hash(obj)
|
235
|
+
end
|
236
|
+
when Hash
|
237
|
+
# Recursively serialize hash values
|
238
|
+
obj.transform_values { |v| serialize_for_json(v) }
|
239
|
+
when Array
|
240
|
+
# Recursively serialize array elements
|
241
|
+
obj.map { |item| serialize_for_json(item) }
|
242
|
+
when T::Enum
|
243
|
+
# Serialize enums to their string representation
|
244
|
+
obj.serialize
|
245
|
+
else
|
246
|
+
# For basic types (String, Integer, Float, Boolean, etc.), return as-is
|
247
|
+
obj
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Fallback method to serialize T::Struct to hash when to_h is not available
|
252
|
+
sig { params(struct_obj: T::Struct).returns(T::Hash[Symbol, T.untyped]) }
|
253
|
+
def serialize_struct_to_hash(struct_obj)
|
254
|
+
result = {}
|
255
|
+
|
256
|
+
# Use struct's props method to get all properties
|
257
|
+
if struct_obj.class.respond_to?(:props)
|
258
|
+
struct_obj.class.props.each do |prop_name, _prop_info|
|
259
|
+
if struct_obj.respond_to?(prop_name)
|
260
|
+
value = struct_obj.public_send(prop_name)
|
261
|
+
result[prop_name] = serialize_for_json(value)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
result
|
267
|
+
end
|
221
268
|
end
|
222
269
|
end
|
@@ -52,6 +52,18 @@ module DSPy
|
|
52
52
|
log_react_tool_call(event)
|
53
53
|
end
|
54
54
|
|
55
|
+
DSPy::Instrumentation.subscribe('dspy.codeact') do |event|
|
56
|
+
log_codeact(event)
|
57
|
+
end
|
58
|
+
|
59
|
+
DSPy::Instrumentation.subscribe('dspy.codeact.iteration_complete') do |event|
|
60
|
+
log_codeact_iteration_complete(event)
|
61
|
+
end
|
62
|
+
|
63
|
+
DSPy::Instrumentation.subscribe('dspy.codeact.code_execution') do |event|
|
64
|
+
log_codeact_code_execution(event)
|
65
|
+
end
|
66
|
+
|
55
67
|
# Subscribe to optimization events
|
56
68
|
DSPy::Instrumentation.subscribe('dspy.optimization.start') do |event|
|
57
69
|
log_optimization_start(event)
|
@@ -236,7 +248,7 @@ module DSPy
|
|
236
248
|
"status=#{status}",
|
237
249
|
"duration_ms=#{duration}"
|
238
250
|
]
|
239
|
-
log_parts << "thought=\"#{thought
|
251
|
+
log_parts << "thought=\"#{thought && thought.length > 100 ? thought[0..97] + '...' : thought}\"" if thought
|
240
252
|
log_parts << "action=\"#{action}\"" if action
|
241
253
|
log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
|
242
254
|
|
@@ -263,6 +275,84 @@ module DSPy
|
|
263
275
|
logger.info(log_parts.join(' '))
|
264
276
|
end
|
265
277
|
|
278
|
+
sig { params(event: T.untyped).void }
|
279
|
+
def log_codeact(event)
|
280
|
+
payload = event.payload
|
281
|
+
signature = payload[:signature_class]
|
282
|
+
duration = payload[:duration_ms]&.round(2)
|
283
|
+
status = payload[:status]
|
284
|
+
iteration_count = payload[:iteration_count]
|
285
|
+
code_executions = payload[:code_executions]
|
286
|
+
final_answer = payload[:final_answer]
|
287
|
+
timestamp = format_timestamp(payload)
|
288
|
+
|
289
|
+
log_parts = [
|
290
|
+
"event=codeact",
|
291
|
+
timestamp,
|
292
|
+
"signature=#{signature}",
|
293
|
+
"status=#{status}",
|
294
|
+
"duration_ms=#{duration}"
|
295
|
+
].compact
|
296
|
+
log_parts << "iterations=#{iteration_count}" if iteration_count
|
297
|
+
log_parts << "code_executions=#{code_executions}" if code_executions
|
298
|
+
log_parts << "final_answer=\"#{final_answer&.truncate(100)}\"" if final_answer
|
299
|
+
log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
|
300
|
+
|
301
|
+
logger.info(log_parts.join(' '))
|
302
|
+
end
|
303
|
+
|
304
|
+
sig { params(event: T.untyped).void }
|
305
|
+
def log_codeact_iteration_complete(event)
|
306
|
+
payload = event.payload
|
307
|
+
iteration = payload[:iteration]
|
308
|
+
thought = payload[:thought]
|
309
|
+
ruby_code = payload[:ruby_code]
|
310
|
+
observation = payload[:observation]
|
311
|
+
duration = payload[:duration_ms]&.round(2)
|
312
|
+
status = payload[:status]
|
313
|
+
timestamp = format_timestamp(payload)
|
314
|
+
|
315
|
+
log_parts = [
|
316
|
+
"event=codeact_iteration",
|
317
|
+
timestamp,
|
318
|
+
"iteration=#{iteration}",
|
319
|
+
"status=#{status}",
|
320
|
+
"duration_ms=#{duration}"
|
321
|
+
].compact
|
322
|
+
log_parts << "thought=\"#{thought && thought.length > 100 ? thought[0..97] + '...' : thought}\"" if thought
|
323
|
+
log_parts << "code=\"#{ruby_code && ruby_code.length > 100 ? ruby_code[0..97] + '...' : ruby_code}\"" if ruby_code
|
324
|
+
log_parts << "observation=\"#{observation && observation.length > 100 ? observation[0..97] + '...' : observation}\"" if observation
|
325
|
+
log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
|
326
|
+
|
327
|
+
logger.info(log_parts.join(' '))
|
328
|
+
end
|
329
|
+
|
330
|
+
sig { params(event: T.untyped).void }
|
331
|
+
def log_codeact_code_execution(event)
|
332
|
+
payload = event.payload
|
333
|
+
iteration = payload[:iteration]
|
334
|
+
ruby_code = payload[:ruby_code]
|
335
|
+
execution_result = payload[:execution_result]
|
336
|
+
execution_error = payload[:execution_error]
|
337
|
+
duration = payload[:duration_ms]&.round(2)
|
338
|
+
status = payload[:status]
|
339
|
+
timestamp = format_timestamp(payload)
|
340
|
+
|
341
|
+
log_parts = [
|
342
|
+
"event=code_execution",
|
343
|
+
timestamp,
|
344
|
+
"iteration=#{iteration}",
|
345
|
+
"status=#{status}",
|
346
|
+
"duration_ms=#{duration}"
|
347
|
+
].compact
|
348
|
+
log_parts << "code=\"#{ruby_code && ruby_code.length > 50 ? ruby_code[0..47] + '...' : ruby_code}\"" if ruby_code
|
349
|
+
log_parts << "result=\"#{execution_result && execution_result.length > 100 ? execution_result[0..97] + '...' : execution_result}\"" if execution_result
|
350
|
+
log_parts << "execution_error=\"#{execution_error}\"" if execution_error
|
351
|
+
log_parts << "error=\"#{payload[:error_message]}\"" if status == 'error' && payload[:error_message]
|
352
|
+
|
353
|
+
logger.info(log_parts.join(' '))
|
354
|
+
end
|
355
|
+
|
266
356
|
# Optimization event logging methods
|
267
357
|
sig { params(event: T.untyped).void }
|
268
358
|
def log_optimization_start(event)
|
data/lib/dspy/tools/base.rb
CHANGED
@@ -195,7 +195,7 @@ module DSPy
|
|
195
195
|
|
196
196
|
# Subclasses must implement their own call method with their own signature
|
197
197
|
|
198
|
-
|
198
|
+
protected
|
199
199
|
|
200
200
|
# Convert argument to the expected type based on JSON schema
|
201
201
|
sig { params(value: T.untyped, schema: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require_relative 'toolset'
|
5
|
+
|
6
|
+
module DSPy
|
7
|
+
module Tools
|
8
|
+
# Example implementation of a memory toolset for agents
|
9
|
+
# Provides tools for storing, retrieving, and managing memory
|
10
|
+
class MemoryToolset < Toolset
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
toolset_name "memory"
|
14
|
+
|
15
|
+
# Expose methods as tools with descriptions
|
16
|
+
tool :store, description: "Store a key-value pair in memory with optional tags"
|
17
|
+
tool :retrieve, description: "Retrieve a value by key from memory"
|
18
|
+
tool :search, description: "Search memories by pattern in keys and/or values"
|
19
|
+
tool :list_keys, tool_name: "memory_list", description: "List all stored memory keys"
|
20
|
+
tool :update, description: "Update an existing memory value"
|
21
|
+
tool :delete, description: "Delete a memory by key"
|
22
|
+
tool :clear, description: "Clear all stored memories"
|
23
|
+
tool :count, description: "Get the count of stored memories"
|
24
|
+
tool :get_metadata, description: "Get metadata for a specific memory"
|
25
|
+
|
26
|
+
sig { void }
|
27
|
+
def initialize
|
28
|
+
@memory = T.let({}, T::Hash[String, T::Hash[Symbol, T.untyped]])
|
29
|
+
end
|
30
|
+
|
31
|
+
sig { params(key: String, value: String, tags: T.nilable(T::Array[String])).returns(String) }
|
32
|
+
def store(key:, value:, tags: nil)
|
33
|
+
@memory[key] = {
|
34
|
+
value: value,
|
35
|
+
tags: tags || [],
|
36
|
+
created_at: Time.now,
|
37
|
+
updated_at: Time.now,
|
38
|
+
access_count: 0
|
39
|
+
}
|
40
|
+
"Stored memory '#{key}' successfully"
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { params(key: String).returns(T.nilable(String)) }
|
44
|
+
def retrieve(key:)
|
45
|
+
entry = @memory[key]
|
46
|
+
return nil unless entry
|
47
|
+
|
48
|
+
# Track access
|
49
|
+
entry[:access_count] += 1
|
50
|
+
entry[:last_accessed_at] = Time.now
|
51
|
+
entry[:value]
|
52
|
+
end
|
53
|
+
|
54
|
+
sig { params(pattern: String, in_keys: T::Boolean, in_values: T::Boolean).returns(T::Array[T::Hash[Symbol, String]]) }
|
55
|
+
def search(pattern:, in_keys: true, in_values: true)
|
56
|
+
results = []
|
57
|
+
regex = Regexp.new(pattern, Regexp::IGNORECASE)
|
58
|
+
|
59
|
+
@memory.each do |key, entry|
|
60
|
+
match = (in_keys && key.match?(regex)) || (in_values && entry[:value].match?(regex))
|
61
|
+
results << { key: key, value: entry[:value] } if match
|
62
|
+
end
|
63
|
+
|
64
|
+
results
|
65
|
+
end
|
66
|
+
|
67
|
+
sig { returns(T::Array[String]) }
|
68
|
+
def list_keys
|
69
|
+
@memory.keys.sort
|
70
|
+
end
|
71
|
+
|
72
|
+
sig { params(key: String, value: String).returns(String) }
|
73
|
+
def update(key:, value:)
|
74
|
+
return "Memory '#{key}' not found" unless @memory.key?(key)
|
75
|
+
|
76
|
+
@memory[key][:value] = value
|
77
|
+
@memory[key][:updated_at] = Time.now
|
78
|
+
"Updated memory '#{key}' successfully"
|
79
|
+
end
|
80
|
+
|
81
|
+
sig { params(key: String).returns(String) }
|
82
|
+
def delete(key:)
|
83
|
+
return "Memory '#{key}' not found" unless @memory.key?(key)
|
84
|
+
|
85
|
+
@memory.delete(key)
|
86
|
+
"Deleted memory '#{key}' successfully"
|
87
|
+
end
|
88
|
+
|
89
|
+
sig { returns(String) }
|
90
|
+
def clear
|
91
|
+
count = @memory.size
|
92
|
+
@memory.clear
|
93
|
+
"Cleared #{count} memories"
|
94
|
+
end
|
95
|
+
|
96
|
+
sig { returns(Integer) }
|
97
|
+
def count
|
98
|
+
@memory.size
|
99
|
+
end
|
100
|
+
|
101
|
+
sig { params(key: String).returns(T.nilable(T::Hash[Symbol, T.untyped])) }
|
102
|
+
def get_metadata(key:)
|
103
|
+
entry = @memory[key]
|
104
|
+
return nil unless entry
|
105
|
+
|
106
|
+
{
|
107
|
+
created_at: entry[:created_at],
|
108
|
+
updated_at: entry[:updated_at],
|
109
|
+
access_count: entry[:access_count],
|
110
|
+
last_accessed_at: entry[:last_accessed_at],
|
111
|
+
tags: entry[:tags],
|
112
|
+
value_length: entry[:value].length
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'set'
|
6
|
+
require_relative 'toolset'
|
7
|
+
|
8
|
+
module DSPy
|
9
|
+
module Tools
|
10
|
+
# Text processing toolset that provides text analysis and manipulation tools
|
11
|
+
# Includes grep, word count, ripgrep, and other text processing utilities
|
12
|
+
class TextProcessingToolset < Toolset
|
13
|
+
extend T::Sig
|
14
|
+
|
15
|
+
toolset_name "text"
|
16
|
+
|
17
|
+
# Expose methods as tools with descriptions
|
18
|
+
tool :grep, description: "Search for patterns in text using grep"
|
19
|
+
tool :word_count, tool_name: "text_wc", description: "Count lines, words, and characters in text"
|
20
|
+
tool :ripgrep, tool_name: "text_rg", description: "Fast text search using ripgrep"
|
21
|
+
tool :extract_lines, description: "Extract specific line ranges from text"
|
22
|
+
tool :filter_lines, description: "Filter lines matching or not matching a pattern"
|
23
|
+
tool :unique_lines, description: "Get unique lines from text"
|
24
|
+
tool :sort_lines, description: "Sort lines in text"
|
25
|
+
tool :summarize_text, description: "Generate statistical summary of text content"
|
26
|
+
|
27
|
+
sig { void }
|
28
|
+
def initialize
|
29
|
+
# No persistent state needed for text processing
|
30
|
+
end
|
31
|
+
|
32
|
+
sig { params(text: String, pattern: String, ignore_case: T::Boolean, count_only: T::Boolean).returns(String) }
|
33
|
+
def grep(text:, pattern:, ignore_case: true, count_only: false)
|
34
|
+
# Create temporary file to use with grep
|
35
|
+
temp_file = Tempfile.new('text_processing')
|
36
|
+
temp_file.write(text)
|
37
|
+
temp_file.close
|
38
|
+
|
39
|
+
flags = []
|
40
|
+
flags << '-i' if ignore_case
|
41
|
+
flags << '-c' if count_only
|
42
|
+
|
43
|
+
cmd = "grep #{flags.join(' ')} '#{pattern}' '#{temp_file.path}'"
|
44
|
+
result = `#{cmd} 2>/dev/null`
|
45
|
+
|
46
|
+
temp_file.unlink
|
47
|
+
|
48
|
+
if count_only
|
49
|
+
"Found #{result.strip} matches for pattern '#{pattern}'"
|
50
|
+
elsif result.empty?
|
51
|
+
"No matches found for pattern '#{pattern}'"
|
52
|
+
else
|
53
|
+
result
|
54
|
+
end
|
55
|
+
rescue => e
|
56
|
+
"Error running grep: #{e.message}"
|
57
|
+
end
|
58
|
+
|
59
|
+
sig { params(text: String, lines_only: T::Boolean, words_only: T::Boolean, chars_only: T::Boolean).returns(String) }
|
60
|
+
def word_count(text:, lines_only: false, words_only: false, chars_only: false)
|
61
|
+
lines = text.lines.count
|
62
|
+
words = text.split(/\s+/).reject(&:empty?).count
|
63
|
+
chars = text.length
|
64
|
+
|
65
|
+
if lines_only
|
66
|
+
"Lines: #{lines}"
|
67
|
+
elsif words_only
|
68
|
+
"Words: #{words}"
|
69
|
+
elsif chars_only
|
70
|
+
"Characters: #{chars}"
|
71
|
+
else
|
72
|
+
"Lines: #{lines}, Words: #{words}, Characters: #{chars}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
sig { params(text: String, pattern: String, context: Integer).returns(String) }
|
77
|
+
def ripgrep(text:, pattern:, context: 0)
|
78
|
+
temp_file = Tempfile.new('text_processing')
|
79
|
+
temp_file.write(text)
|
80
|
+
temp_file.close
|
81
|
+
|
82
|
+
cmd = "rg"
|
83
|
+
cmd += " -C #{context}" if context > 0
|
84
|
+
cmd += " '#{pattern}' '#{temp_file.path}'"
|
85
|
+
|
86
|
+
result = `#{cmd} 2>/dev/null`
|
87
|
+
|
88
|
+
temp_file.unlink
|
89
|
+
|
90
|
+
if result.empty?
|
91
|
+
"No matches found for pattern '#{pattern}'"
|
92
|
+
else
|
93
|
+
result
|
94
|
+
end
|
95
|
+
rescue => e
|
96
|
+
"Error running ripgrep: #{e.message}"
|
97
|
+
end
|
98
|
+
|
99
|
+
sig { params(text: String, start_line: Integer, end_line: T.nilable(Integer)).returns(String) }
|
100
|
+
def extract_lines(text:, start_line:, end_line: nil)
|
101
|
+
lines = text.lines
|
102
|
+
start_idx = [start_line - 1, 0].max # Convert to 0-based, ensure >= 0
|
103
|
+
|
104
|
+
if end_line
|
105
|
+
end_idx = [end_line - 1, lines.length - 1].min # Convert to 0-based, ensure <= last line
|
106
|
+
extracted = lines[start_idx..end_idx]
|
107
|
+
else
|
108
|
+
extracted = lines[start_idx, 1] # Just one line
|
109
|
+
end
|
110
|
+
|
111
|
+
extracted&.join || ""
|
112
|
+
end
|
113
|
+
|
114
|
+
sig { params(text: String, pattern: String, invert: T::Boolean).returns(String) }
|
115
|
+
def filter_lines(text:, pattern:, invert: false)
|
116
|
+
lines = text.lines
|
117
|
+
regex = Regexp.new(pattern, Regexp::IGNORECASE)
|
118
|
+
|
119
|
+
filtered = if invert
|
120
|
+
lines.reject { |line| line.match?(regex) }
|
121
|
+
else
|
122
|
+
lines.select { |line| line.match?(regex) }
|
123
|
+
end
|
124
|
+
|
125
|
+
filtered.join
|
126
|
+
end
|
127
|
+
|
128
|
+
sig { params(text: String, preserve_order: T::Boolean).returns(String) }
|
129
|
+
def unique_lines(text:, preserve_order: true)
|
130
|
+
lines = text.lines.map(&:chomp)
|
131
|
+
|
132
|
+
unique = if preserve_order
|
133
|
+
lines.uniq
|
134
|
+
else
|
135
|
+
lines.to_set.to_a.sort
|
136
|
+
end
|
137
|
+
|
138
|
+
unique.map { |line| "#{line}\n" }.join
|
139
|
+
end
|
140
|
+
|
141
|
+
sig { params(text: String, reverse: T::Boolean, numeric: T::Boolean).returns(String) }
|
142
|
+
def sort_lines(text:, reverse: false, numeric: false)
|
143
|
+
lines = text.lines.map(&:chomp)
|
144
|
+
|
145
|
+
sorted = if numeric
|
146
|
+
lines.sort_by { |line| line.to_f }
|
147
|
+
else
|
148
|
+
lines.sort
|
149
|
+
end
|
150
|
+
|
151
|
+
sorted.reverse! if reverse
|
152
|
+
sorted.map { |line| "#{line}\n" }.join
|
153
|
+
end
|
154
|
+
|
155
|
+
sig { params(text: String).returns(String) }
|
156
|
+
def summarize_text(text:)
|
157
|
+
lines = text.lines
|
158
|
+
words = text.split(/\s+/).reject(&:empty?)
|
159
|
+
chars = text.length
|
160
|
+
|
161
|
+
# Find most common words (simple analysis)
|
162
|
+
word_freq = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase.gsub(/[^\w]/, '')] += 1 }
|
163
|
+
top_words = word_freq.reject { |word, _| word.length < 3 }.sort_by { |_, count| -count }.first(5)
|
164
|
+
|
165
|
+
# Basic text statistics
|
166
|
+
avg_line_length = lines.empty? ? 0 : (chars.to_f / lines.count).round(2)
|
167
|
+
avg_word_length = words.empty? ? 0 : (words.sum(&:length).to_f / words.count).round(2)
|
168
|
+
|
169
|
+
summary = []
|
170
|
+
summary << "Text Summary:"
|
171
|
+
summary << " Lines: #{lines.count}"
|
172
|
+
summary << " Words: #{words.count}"
|
173
|
+
summary << " Characters: #{chars}"
|
174
|
+
summary << " Average line length: #{avg_line_length}"
|
175
|
+
summary << " Average word length: #{avg_word_length}"
|
176
|
+
|
177
|
+
unless top_words.empty?
|
178
|
+
summary << " Most frequent words:"
|
179
|
+
top_words.each { |word, count| summary << " #{word}: #{count}" }
|
180
|
+
end
|
181
|
+
|
182
|
+
summary.join("\n")
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|