dexter_llm 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +1246 -0
  4. data/lib/dexter_llm/adapters/anthropic.rb +513 -0
  5. data/lib/dexter_llm/adapters/base.rb +61 -0
  6. data/lib/dexter_llm/adapters/google.rb +392 -0
  7. data/lib/dexter_llm/adapters/openai.rb +415 -0
  8. data/lib/dexter_llm/agent/agent.rb +277 -0
  9. data/lib/dexter_llm/agent/agent_busy_error.rb +9 -0
  10. data/lib/dexter_llm/agent/console.rb +525 -0
  11. data/lib/dexter_llm/agent/error.rb +5 -0
  12. data/lib/dexter_llm/agent/event.rb +27 -0
  13. data/lib/dexter_llm/agent/loop.rb +256 -0
  14. data/lib/dexter_llm/agent/max_iterations_error.rb +9 -0
  15. data/lib/dexter_llm/agent/session.rb +271 -0
  16. data/lib/dexter_llm/agent/state.rb +75 -0
  17. data/lib/dexter_llm/api.rb +9 -0
  18. data/lib/dexter_llm/api_error.rb +55 -0
  19. data/lib/dexter_llm/assistant_message.rb +47 -0
  20. data/lib/dexter_llm/authentication_error.rb +5 -0
  21. data/lib/dexter_llm/built_in_tool.rb +68 -0
  22. data/lib/dexter_llm/built_in_tools/web_fetch.rb +92 -0
  23. data/lib/dexter_llm/built_in_tools/web_search.rb +84 -0
  24. data/lib/dexter_llm/cancellation_signal.rb +31 -0
  25. data/lib/dexter_llm/cancelled_error.rb +12 -0
  26. data/lib/dexter_llm/client.rb +410 -0
  27. data/lib/dexter_llm/configuration.rb +119 -0
  28. data/lib/dexter_llm/content.rb +338 -0
  29. data/lib/dexter_llm/context_overflow_error.rb +5 -0
  30. data/lib/dexter_llm/documents/ingestor.rb +107 -0
  31. data/lib/dexter_llm/documents/store.rb +46 -0
  32. data/lib/dexter_llm/documents/stored_document.rb +27 -0
  33. data/lib/dexter_llm/documents/stores/file_system.rb +131 -0
  34. data/lib/dexter_llm/error.rb +5 -0
  35. data/lib/dexter_llm/instrumentation.rb +11 -0
  36. data/lib/dexter_llm/invalid_request_error.rb +5 -0
  37. data/lib/dexter_llm/message.rb +30 -0
  38. data/lib/dexter_llm/message_transformer.rb +90 -0
  39. data/lib/dexter_llm/model.rb +52 -0
  40. data/lib/dexter_llm/models/catalog.yml +324 -0
  41. data/lib/dexter_llm/models.rb +99 -0
  42. data/lib/dexter_llm/pricing.rb +46 -0
  43. data/lib/dexter_llm/prompt/materializer.rb +121 -0
  44. data/lib/dexter_llm/provider.rb +9 -0
  45. data/lib/dexter_llm/rate_limit_error.rb +5 -0
  46. data/lib/dexter_llm/retry_policy.rb +25 -0
  47. data/lib/dexter_llm/schema/builder.rb +258 -0
  48. data/lib/dexter_llm/schema/coercer.rb +159 -0
  49. data/lib/dexter_llm/schema/validator.rb +212 -0
  50. data/lib/dexter_llm/schema.rb +66 -0
  51. data/lib/dexter_llm/session/compaction.rb +216 -0
  52. data/lib/dexter_llm/session/compaction_settings.rb +17 -0
  53. data/lib/dexter_llm/session/entry.rb +589 -0
  54. data/lib/dexter_llm/session/error.rb +10 -0
  55. data/lib/dexter_llm/session/loaded_session.rb +18 -0
  56. data/lib/dexter_llm/session/manager.rb +181 -0
  57. data/lib/dexter_llm/session/store.rb +17 -0
  58. data/lib/dexter_llm/session/stores/jsonl_file.rb +99 -0
  59. data/lib/dexter_llm/stop_reason.rb +11 -0
  60. data/lib/dexter_llm/stream_event.rb +225 -0
  61. data/lib/dexter_llm/streaming/events.rb +7 -0
  62. data/lib/dexter_llm/streaming/sse_parser.rb +69 -0
  63. data/lib/dexter_llm/summary_message.rb +27 -0
  64. data/lib/dexter_llm/thinking_level.rb +31 -0
  65. data/lib/dexter_llm/token_estimator.rb +58 -0
  66. data/lib/dexter_llm/tool.rb +208 -0
  67. data/lib/dexter_llm/tool_result_message.rb +32 -0
  68. data/lib/dexter_llm/unsupported_content_error.rb +5 -0
  69. data/lib/dexter_llm/usage.rb +107 -0
  70. data/lib/dexter_llm/user_message.rb +23 -0
  71. data/lib/dexter_llm/version.rb +5 -0
  72. data/lib/dexter_llm.rb +103 -0
  73. metadata +158 -0
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DexterLlm
4
+ module Schema
5
+ class ValidationError < DexterLlm::Error
6
+ attr_reader :errors
7
+
8
+ def initialize(errors)
9
+ @errors = errors
10
+ super("Validation failed: #{errors.join(', ')}")
11
+ end
12
+ end
13
+
14
+ class Validator
15
+ def initialize(schema)
16
+ @schema = schema
17
+ end
18
+
19
+ def validate!(arguments)
20
+ errors = validate(arguments)
21
+ raise ValidationError.new(errors) if errors.any?
22
+ true
23
+ end
24
+
25
+ def validate(arguments)
26
+ errors = []
27
+ validate_object(arguments, @schema, [], errors)
28
+ errors
29
+ end
30
+
31
+ def valid?(arguments)
32
+ validate(arguments).empty?
33
+ end
34
+
35
+ private
36
+
37
+ def validate_object(value, schema, path, errors)
38
+ return unless schema["type"] == "object"
39
+
40
+ unless value.is_a?(Hash)
41
+ errors << "#{format_path(path)}: expected object, got #{value.class}"
42
+ return
43
+ end
44
+
45
+ properties = schema["properties"] || {}
46
+ required = schema["required"] || []
47
+
48
+ # Check required fields
49
+ required.each do |field|
50
+ unless value.key?(field) || value.key?(field.to_sym)
51
+ errors << "#{format_path(path + [ field ])}: is required"
52
+ end
53
+ end
54
+
55
+ # Validate each property
56
+ value.each do |key, val|
57
+ key_str = key.to_s
58
+ prop_schema = properties[key_str]
59
+ next unless prop_schema
60
+
61
+ required_field = required.include?(key_str)
62
+ validate_value(val, prop_schema, path + [ key_str ], errors, required: required_field)
63
+ end
64
+ end
65
+
66
+ def validate_value(value, schema, path, errors, required: false)
67
+ return if value.nil? && !required
68
+
69
+ type = schema["type"]
70
+ any_of = schema["anyOf"]
71
+
72
+ if any_of
73
+ validate_any_of(value, any_of, path, errors, required: required)
74
+ return
75
+ end
76
+
77
+ case type
78
+ when "string"
79
+ validate_string(value, schema, path, errors)
80
+ when "number"
81
+ validate_number(value, schema, path, errors)
82
+ when "integer"
83
+ validate_integer(value, schema, path, errors)
84
+ when "boolean"
85
+ validate_boolean(value, schema, path, errors)
86
+ when "null"
87
+ validate_null(value, path, errors)
88
+ when "array"
89
+ validate_array(value, schema, path, errors)
90
+ when "object"
91
+ validate_object(value, schema, path, errors)
92
+ end
93
+ end
94
+
95
+ def validate_string(value, schema, path, errors)
96
+ unless value.is_a?(String)
97
+ errors << "#{format_path(path)}: expected string, got #{value.class}"
98
+ return
99
+ end
100
+
101
+ if schema["enum"] && !schema["enum"].include?(value)
102
+ errors << "#{format_path(path)}: must be one of #{schema['enum'].inspect}"
103
+ end
104
+
105
+ if schema["minLength"] && value.length < schema["minLength"]
106
+ errors << "#{format_path(path)}: must be at least #{schema['minLength']} characters"
107
+ end
108
+
109
+ if schema["maxLength"] && value.length > schema["maxLength"]
110
+ errors << "#{format_path(path)}: must be at most #{schema['maxLength']} characters"
111
+ end
112
+
113
+ if schema["pattern"] && !Regexp.new(schema["pattern"]).match?(value)
114
+ errors << "#{format_path(path)}: must match pattern #{schema['pattern']}"
115
+ end
116
+ end
117
+
118
+ def validate_number(value, schema, path, errors)
119
+ unless value.is_a?(Numeric)
120
+ errors << "#{format_path(path)}: expected number, got #{value.class}"
121
+ return
122
+ end
123
+
124
+ validate_numeric_constraints(value, schema, path, errors)
125
+ end
126
+
127
+ def validate_integer(value, schema, path, errors)
128
+ unless value.is_a?(Integer)
129
+ errors << "#{format_path(path)}: expected integer, got #{value.class}"
130
+ return
131
+ end
132
+
133
+ validate_numeric_constraints(value, schema, path, errors)
134
+ end
135
+
136
+ def validate_numeric_constraints(value, schema, path, errors)
137
+ if schema["minimum"] && value < schema["minimum"]
138
+ errors << "#{format_path(path)}: must be >= #{schema['minimum']}"
139
+ end
140
+
141
+ if schema["maximum"] && value > schema["maximum"]
142
+ errors << "#{format_path(path)}: must be <= #{schema['maximum']}"
143
+ end
144
+
145
+ if schema["exclusiveMinimum"] && value <= schema["exclusiveMinimum"]
146
+ errors << "#{format_path(path)}: must be > #{schema['exclusiveMinimum']}"
147
+ end
148
+
149
+ if schema["exclusiveMaximum"] && value >= schema["exclusiveMaximum"]
150
+ errors << "#{format_path(path)}: must be < #{schema['exclusiveMaximum']}"
151
+ end
152
+
153
+ if schema["multipleOf"] && (value % schema["multipleOf"]) != 0
154
+ errors << "#{format_path(path)}: must be a multiple of #{schema['multipleOf']}"
155
+ end
156
+ end
157
+
158
+ def validate_boolean(value, _schema, path, errors)
159
+ unless value == true || value == false
160
+ errors << "#{format_path(path)}: expected boolean, got #{value.class}"
161
+ end
162
+ end
163
+
164
+ def validate_null(value, path, errors)
165
+ unless value.nil?
166
+ errors << "#{format_path(path)}: expected null, got #{value.class}"
167
+ end
168
+ end
169
+
170
+ def validate_array(value, schema, path, errors)
171
+ unless value.is_a?(Array)
172
+ errors << "#{format_path(path)}: expected array, got #{value.class}"
173
+ return
174
+ end
175
+
176
+ if schema["minItems"] && value.length < schema["minItems"]
177
+ errors << "#{format_path(path)}: must have at least #{schema['minItems']} items"
178
+ end
179
+
180
+ if schema["maxItems"] && value.length > schema["maxItems"]
181
+ errors << "#{format_path(path)}: must have at most #{schema['maxItems']} items"
182
+ end
183
+
184
+ items_schema = schema["items"]
185
+ return unless items_schema
186
+
187
+ value.each_with_index do |item, index|
188
+ validate_value(item, items_schema, path + [ index.to_s ], errors)
189
+ end
190
+ end
191
+
192
+ def validate_any_of(value, variants, path, errors, required: false)
193
+ variant_errors = []
194
+
195
+ variants.each do |variant_schema|
196
+ variant_errs = []
197
+ validate_value(value, variant_schema, path, variant_errs, required: required)
198
+ return if variant_errs.empty? # Valid against this variant
199
+ variant_errors << variant_errs
200
+ end
201
+
202
+ # None of the variants matched
203
+ errors << "#{format_path(path)}: does not match any of the allowed types"
204
+ end
205
+
206
+ def format_path(path)
207
+ return "root" if path.empty?
208
+ path.join(".")
209
+ end
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DexterLlm
4
+ module Schema
5
+ # Base class for defining reusable schemas
6
+ class Base
7
+ class << self
8
+ def string(name, **options, &block)
9
+ schema_definitions << [ :string, name, options, block ]
10
+ end
11
+
12
+ def number(name, **options, &block)
13
+ schema_definitions << [ :number, name, options, block ]
14
+ end
15
+
16
+ def integer(name, **options, &block)
17
+ schema_definitions << [ :integer, name, options, block ]
18
+ end
19
+
20
+ def boolean(name, **options, &block)
21
+ schema_definitions << [ :boolean, name, options, block ]
22
+ end
23
+
24
+ def null(name, **options)
25
+ schema_definitions << [ :null, name, options, nil ]
26
+ end
27
+
28
+ def object(name, **options, &block)
29
+ schema_definitions << [ :object, name, options, block ]
30
+ end
31
+
32
+ def array(name, **options, &block)
33
+ schema_definitions << [ :array, name, options, block ]
34
+ end
35
+
36
+ def any_of(name, **options, &block)
37
+ schema_definitions << [ :any_of, name, options, block ]
38
+ end
39
+
40
+ def schema_definitions
41
+ @schema_definitions ||= []
42
+ end
43
+
44
+ def inherited(subclass)
45
+ super
46
+ # Copy parent definitions to subclass
47
+ subclass.instance_variable_set(:@schema_definitions, schema_definitions.dup)
48
+ end
49
+ end
50
+
51
+ def to_json_schema
52
+ builder = Builder.new
53
+
54
+ self.class.schema_definitions.each do |type, name, options, block|
55
+ if block
56
+ builder.send(type, name, **options, &block)
57
+ else
58
+ builder.send(type, name, **options)
59
+ end
60
+ end
61
+
62
+ builder.to_json_schema
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,216 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DexterLlm::Session
4
+ module Compaction
5
+ SUMMARIZATION_PROMPT = <<~PROMPT
6
+ You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
7
+
8
+ Include:
9
+ - Current progress and key decisions made
10
+ - Important context, constraints, or user preferences
11
+ - Absolute file paths of any relevant files that were read or modified
12
+ - What remains to be done (clear next steps)
13
+ - Any critical data, examples, or references needed to continue
14
+
15
+ Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
16
+ PROMPT
17
+
18
+ class CutPointResult
19
+ attr_reader :first_kept_entry_id, :first_kept_entry_index, :turn_start_index, :is_split_turn
20
+
21
+ def initialize(first_kept_entry_id:, first_kept_entry_index:, turn_start_index: -1, is_split_turn: false)
22
+ @first_kept_entry_id = first_kept_entry_id
23
+ @first_kept_entry_index = first_kept_entry_index
24
+ @turn_start_index = turn_start_index
25
+ @is_split_turn = is_split_turn
26
+ end
27
+ end
28
+
29
+ class << self
30
+ def should_compact?(prompt_tokens, context_window, settings)
31
+ return false unless settings.enabled
32
+ prompt_tokens > context_window - settings.reserve_tokens
33
+ end
34
+
35
+ def compact(entries:, model:, client:, settings:, signal: nil, custom_instructions: nil)
36
+ # Don't compact if last entry is already a compaction
37
+ raise AlreadyCompactedError, "Session was just compacted" if entries.last&.type == :compaction
38
+
39
+ # Find previous compaction boundary
40
+ prev_compaction_idx = entries.rindex { |e| e.type == :compaction } || -1
41
+ boundary_start = prev_compaction_idx + 1
42
+ boundary_end = entries.length
43
+
44
+ # Get current token count from last assistant message usage
45
+ last_usage = find_last_assistant_usage(entries)
46
+ tokens_before = last_usage&.prompt_tokens.to_i
47
+
48
+ # Find cut point
49
+ cut_result = find_cut_point(entries, boundary_start, boundary_end, settings.keep_recent_tokens)
50
+
51
+ # Extract messages for summarization (before cut point)
52
+ history_end = cut_result.is_split_turn ? cut_result.turn_start_index : cut_result.first_kept_entry_index
53
+ history_messages = entries[boundary_start...history_end]
54
+ .select { |e| e.type == :message }
55
+ .map(&:message)
56
+
57
+ # Include previous summary if exists
58
+ if prev_compaction_idx >= 0
59
+ prev_summary = entries[prev_compaction_idx].summary
60
+ history_messages.unshift(DexterLlm::UserMessage.new("Previous session summary:\n#{prev_summary}"))
61
+ end
62
+
63
+ # Generate summary via LLM
64
+ summary = generate_summary(
65
+ messages: history_messages,
66
+ model: model,
67
+ client: client,
68
+ reserve_tokens: settings.reserve_tokens,
69
+ signal: signal,
70
+ custom_instructions: custom_instructions
71
+ )
72
+
73
+ Entry::Compaction.new(
74
+ summary: summary,
75
+ first_kept_entry_id: cut_result.first_kept_entry_id,
76
+ tokens_before: tokens_before
77
+ )
78
+ end
79
+
80
+ def find_cut_point(entries, start_index, end_index, keep_recent_tokens)
81
+ # Build list of valid cut points: user, assistant messages (never tool_result)
82
+ valid_cut_points = []
83
+ (start_index...end_index).each do |i|
84
+ entry = entries[i]
85
+ next unless entry.type == :message
86
+ role = entry.message.role
87
+ valid_cut_points << i if [ :user, :assistant ].include?(role)
88
+ end
89
+
90
+ if valid_cut_points.empty?
91
+ entry = entries[start_index]
92
+ entry_id = entry.respond_to?(:id) ? entry.id : nil
93
+ return CutPointResult.new(first_kept_entry_id: entry_id, first_kept_entry_index: start_index)
94
+ end
95
+
96
+ # Walk backwards, accumulating estimated tokens
97
+ accumulated = 0
98
+ cut_index = start_index
99
+
100
+ (end_index - 1).downto(start_index) do |i|
101
+ entry = entries[i]
102
+ next unless entry.type == :message
103
+
104
+ accumulated += DexterLlm::TokenEstimator.estimate_message_tokens(entry.message)
105
+
106
+ if accumulated >= keep_recent_tokens
107
+ # Find closest valid cut point at or after this entry
108
+ cut_index = valid_cut_points.find { |c| c >= i } || cut_index
109
+ break
110
+ end
111
+ end
112
+
113
+ # Determine if splitting mid-turn
114
+ cut_entry = entries[cut_index]
115
+ cut_entry_id = cut_entry.respond_to?(:id) ? cut_entry.id : nil
116
+
117
+ if cut_entry&.type == :message
118
+ is_user = cut_entry.message.role == :user
119
+ turn_start = is_user ? -1 : find_turn_start(entries, cut_index, start_index)
120
+
121
+ CutPointResult.new(
122
+ first_kept_entry_id: cut_entry_id,
123
+ first_kept_entry_index: cut_index,
124
+ turn_start_index: turn_start,
125
+ is_split_turn: !is_user && turn_start != -1
126
+ )
127
+ else
128
+ CutPointResult.new(
129
+ first_kept_entry_id: cut_entry_id,
130
+ first_kept_entry_index: cut_index
131
+ )
132
+ end
133
+ end
134
+
135
+ def estimate_prompt_tokens(messages)
136
+ DexterLlm::TokenEstimator.estimate_tokens(messages)
137
+ end
138
+
139
+ def calculate_prompt_tokens_from_usage(usage)
140
+ usage&.prompt_tokens.to_i
141
+ end
142
+
143
+ private
144
+
145
+ def find_last_assistant_usage(entries)
146
+ entries.reverse_each do |entry|
147
+ if entry.type == :message && entry.message.role == :assistant
148
+ return entry.message.usage
149
+ end
150
+ end
151
+ nil
152
+ end
153
+
154
+ def find_turn_start(entries, from_index, min_index)
155
+ # Walk backward to find the user message that started this turn
156
+ (from_index - 1).downto(min_index) do |i|
157
+ entry = entries[i]
158
+ next unless entry.type == :message
159
+ return i if entry.message.role == :user
160
+ end
161
+ -1
162
+ end
163
+
164
+ def generate_summary(messages:, model:, client:, reserve_tokens:, signal: nil, custom_instructions: nil)
165
+ signal&.throw_if_cancelled!
166
+
167
+ max_tokens = (0.8 * reserve_tokens).floor.clamp(1000, 16_000)
168
+
169
+ prompt = if custom_instructions
170
+ "#{SUMMARIZATION_PROMPT}\n\nAdditional focus: #{custom_instructions}"
171
+ else
172
+ SUMMARIZATION_PROMPT
173
+ end
174
+
175
+ summarization_messages = limit_messages_for_summary(
176
+ messages,
177
+ model: model,
178
+ reserve_tokens: reserve_tokens
179
+ ) + [ DexterLlm::UserMessage.new(prompt) ]
180
+
181
+ begin
182
+ response = client.complete(
183
+ model: model,
184
+ messages: summarization_messages,
185
+ max_tokens: max_tokens
186
+ )
187
+
188
+ response.text
189
+ rescue StandardError => e
190
+ raise DexterLlm::Session::CompactionError, "Summarization failed: #{e.message}"
191
+ end
192
+ end
193
+
194
+ def limit_messages_for_summary(messages, model:, reserve_tokens:)
195
+ context_window = model.respond_to?(:context_window) ? model.context_window : nil
196
+ return messages if context_window.nil?
197
+
198
+ max_prompt_tokens = context_window - reserve_tokens
199
+ return messages if max_prompt_tokens <= 0
200
+
201
+ kept = []
202
+ total = 0
203
+
204
+ messages.reverse_each do |msg|
205
+ msg_tokens = DexterLlm::TokenEstimator.estimate_message_tokens(msg)
206
+ break if total + msg_tokens > max_prompt_tokens && !kept.empty?
207
+
208
+ kept << msg
209
+ total += msg_tokens
210
+ end
211
+
212
+ kept.reverse
213
+ end
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DexterLlm::Session
4
+ class CompactionSettings
5
+ attr_reader :enabled, :reserve_tokens, :keep_recent_tokens
6
+
7
+ def initialize(
8
+ enabled: true,
9
+ reserve_tokens: 16_384,
10
+ keep_recent_tokens: 20_000
11
+ )
12
+ @enabled = enabled
13
+ @reserve_tokens = reserve_tokens
14
+ @keep_recent_tokens = keep_recent_tokens
15
+ end
16
+ end
17
+ end