llm_optimizer 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b5f6d0b99af3e0801e77df0316ac767e0e10e0d4e7bba9dc19623797681a2961
4
- data.tar.gz: d8644df814cb0c7f219a51620d3cd409e1bb5822228278245b2572dbaf666fdc
3
+ metadata.gz: 3a0ec4bdfa750f16155927a3e00c9fe2c1c39da7e85866eb6c65855ac6eebaef
4
+ data.tar.gz: 0e5820f0503fbef14dc1ad858dfaa7527e3dba278fbf7640df377d82fbc61ad7
5
5
  SHA512:
6
- metadata.gz: 1396d95f7e3f498e600cf6e3b99627ee2f746692a1f002be989ce1b13859f5a1af8f50656a82ff0fa853d3e42b0c219de49ac152baa2107d9c9529fc82bf63e4
7
- data.tar.gz: 6b45bae664e4d43fd54fe47c2c8c9ebdeea2b4442f78bf22daee56d730095a471f58faf0e6432c7e5ef18a58cfc20a9279dc8706c9f41e4ca55db0cb441df1e8
6
+ metadata.gz: 8c2f376e324a7678063e66a89b6ad89e476bd699fd3a816c7c91a79b16ba40e09111cfdfacb1206946e2d111122e63cf70babc09a0467821723b2b286eda235a
7
+ data.tar.gz: 5bba8c343627f230c13f0671cd8b1374ab0405f6c6369457b92e9093ac1cd2f780797797a26fabdc865981ca1e131b6dc80ae4a97342f3de2f3297255d8e13c9
data/CHANGELOG.md CHANGED
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.6] - 2026-05-04
11
+
12
+ ### Added
13
+ - `with_tools` configuration option (aliased as `tools`) — allows passing function/tool definitions to LLM calls via the `optimize` method
14
+ - Tool support for both `llm_caller` and `messages_caller` — `tools:` keyword argument is now passed to all underlying LLM callers
15
+ - `with_tools` examples in the README and Rails initializer template
16
+ - `cache_scope` configuration option — isolates semantic cache entries into separate namespaces; useful for ensuring cache hits only occur within specific contexts (e.g., user IDs, account types, or dynamic categories)
17
+
18
+ ### Changed
19
+ - `Pipeline#raw_llm_call` refactored to handle global and per-call tools consistently
20
+ - Refactored `Pipeline` to remove duplicate internal method definitions (`semantic_cache_lookup`, `store_in_cache`)
21
+ - `SemanticCache#lookup` return format updated to `[response, token_info]` to support better metadata tracking
22
+
23
+ ### Fixed
24
+ - RuboCop `Metrics/ParameterLists` offense in `OptimizeResult#initialize` by adding targeted override for the necessary result fields
25
+
10
26
  ## [0.1.5] - 2026-04-22
11
27
 
12
28
  ### Added
data/README.md CHANGED
@@ -200,6 +200,7 @@ end
200
200
  | `messages_caller` | Lambda | `nil` | `(messages, model:) -> String` — used when `conversation_id` is present; receives full history including current user turn |
201
201
  | `system_prompt` | String | `nil` | Seeded as the first system message when a new conversation is created via `conversation_id` |
202
202
  | `conversation_ttl` | Integer | `86400` | TTL in seconds for Redis-backed conversation history (`0` for no expiry) |
203
+ | `with_tools` | Array | `nil` | Tools (functions) available to the LLM; passed as `tools:` keyword to callers |
203
204
 
204
205
  ## Per-call configuration
205
206
 
@@ -212,19 +213,6 @@ result = LlmOptimizer.optimize(prompt) do |config|
212
213
  end
213
214
  ```
214
215
 
215
- ## Conversation history
216
-
217
- Pass a `messages` array to enable history management:
218
-
219
- ```ruby
220
- messages = [
221
- { role: "user", content: "Tell me about Redis" },
222
- { role: "assistant", content: "Redis is an in-memory data store..." },
223
- # ... more messages
224
- ]
225
-
226
- result = LlmOptimizer.optimize("What else can it do?", messages: messages)
227
-
228
216
  ## OptimizeResult
229
217
 
230
218
  Every call returns an `OptimizeResult` struct:
@@ -27,6 +27,9 @@ LlmOptimizer.configure do |config|
27
27
  config.cache_ttl = 86_400 # cache entry TTL in seconds (default: 24h)
28
28
  config.timeout_seconds = 5 # timeout for embedding / external API calls
29
29
 
30
+ # --- Tools ---
31
+ # config.with_tools = [] # Array of tool definitions (OpenAI/Anthropic format)
32
+
30
33
  # --- Logging ---
31
34
  config.logger = Rails.logger
32
35
  config.debug_logging = Rails.env.development?
@@ -81,36 +84,39 @@ LlmOptimizer.configure do |config|
81
84
  # Messages caller for history manager/conversation summary - Optional
82
85
  # config.system_prompt = "You are a helpful person who gives responses in a non harmful way. " \
83
86
  # "If any serious question is asked, handle it in effectively."
84
- # OpenAI implmeentation -
85
- # config.messages_caller = ->(messages, model:) {
86
- # response = $openai.chat(
87
- # parameters: {
88
- # model: model,
89
- # messages: messages.map { |m| { role: m[:role], content: m[:content] } }
90
- # }
91
- # )
87
+ # OpenAI implementation -
88
+ # config.messages_caller = ->(messages, model:, tools: nil) {
89
+ # parameters = {
90
+ # model: model,
91
+ # messages: messages.map { |m| { role: m[:role], content: m[:content] } }
92
+ # }
93
+ # parameters[:tools] = tools if tools&.any?
94
+ #
95
+ # response = $openai.chat(parameters: parameters)
92
96
  # response.dig("choices", 0, "message", "content")
93
97
  # }
94
98
 
95
99
  # RubyLLM implementation -
96
- # config.messages_caller = ->(messages, model:) {
100
+ # config.messages_caller = ->(messages, model:, tools: nil) {
97
101
  # chat = RubyLLM.chat(model: model)
102
+ # chat.with_tools(*tools) if tools&.any?
98
103
  # messages[0..-2].each { |m| chat.add_message(role: m[:role], content: m[:content]) }
99
104
  # chat.ask(messages.last[:content]).content
100
105
  # }
101
106
 
102
107
  # Anthropic implementation -
103
- # config.messages_caller = ->(messages, model:) {
108
+ # config.messages_caller = ->(messages, model:, tools: nil) {
104
109
  # # Anthropic separates system messages from the messages array
105
110
  # system_msg = messages.find { |m| m[:role] == "system" }&.dig(:content)
106
111
  # chat_msgs = messages.reject { |m| m[:role] == "system" }
107
112
  # .map { |m| { role: m[:role], content: m[:content] } }
108
-
113
+ #
109
114
  # response = $anthropic.messages(
110
115
  # model: model,
111
116
  # max_tokens: 1024,
112
117
  # system: system_msg,
113
- # messages: chat_msgs
118
+ # messages: chat_msgs,
119
+ # tools: tools
114
120
  # )
115
121
  # response["content"].first["text"]
116
122
  # }
@@ -25,6 +25,10 @@ module LlmOptimizer
25
25
  conversation_ttl
26
26
  system_prompt
27
27
  messages_caller
28
+ cache_scope
29
+ tools
30
+ with_tools
31
+ tools_caller
28
32
  ].freeze
29
33
 
30
34
  # Define readers for all known keys (setters below track explicit sets)
@@ -52,6 +56,7 @@ module LlmOptimizer
52
56
  @classifier_caller = nil
53
57
  @conversation_ttl = 86_400
54
58
  @system_prompt = nil
59
+ @with_tools = nil
55
60
  end
56
61
 
57
62
  # Copies only explicitly set keys from other_config without resetting unmentioned keys.
@@ -1,8 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmOptimizer
4
- OptimizeResult = Struct.new(
5
- :response, :model, :model_tier, :cache_status,
6
- :original_tokens, :compressed_tokens, :latency_ms, :messages
7
- )
4
+ class OptimizeResult
5
+ attr_accessor :response, :model, :model_tier, :cache_status,
6
+ :original_tokens, :compressed_tokens, :input_tokens,
7
+ :output_tokens, :cached_tokens, :latency_ms, :messages
8
+
9
+ # rubocop:disable Metrics/ParameterLists
10
+ def initialize(response: nil, model: nil, model_tier: nil, cache_status: nil,
11
+ original_tokens: 0, compressed_tokens: 0, input_tokens: 0,
12
+ output_tokens: 0, cached_tokens: 0, latency_ms: 0, messages: [])
13
+ @response = response
14
+ @model = model
15
+ @model_tier = model_tier
16
+ @cache_status = cache_status
17
+ @original_tokens = original_tokens
18
+ @compressed_tokens = compressed_tokens
19
+ @input_tokens = input_tokens
20
+ @output_tokens = output_tokens
21
+ @cached_tokens = cached_tokens
22
+ @latency_ms = latency_ms
23
+ @messages = messages
24
+ end
25
+ # rubocop:enable Metrics/ParameterLists
26
+
27
+ def to_h
28
+ {
29
+ response: @response,
30
+ model: @model,
31
+ model_tier: @model_tier,
32
+ cache_status: @cache_status,
33
+ original_tokens: @original_tokens,
34
+ compressed_tokens: @compressed_tokens,
35
+ input_tokens: @input_tokens,
36
+ output_tokens: @output_tokens,
37
+ cached_tokens: @cached_tokens,
38
+ latency_ms: @latency_ms,
39
+ messages: @messages
40
+ }
41
+ end
42
+ end
8
43
  end
@@ -44,25 +44,6 @@ module LlmOptimizer
44
44
  [model_tier, model]
45
45
  end
46
46
 
47
- def semantic_cache_lookup(prompt, model, model_tier, original_tokens,
48
- compressed_tokens, original_prompt, start, config)
49
- return [nil, nil] unless config.use_semantic_cache
50
-
51
- emb_client = EmbeddingClient.new(
52
- model: config.embedding_model,
53
- timeout_seconds: config.timeout_seconds,
54
- embedding_caller: config.embedding_caller
55
- )
56
- embedding = emb_client.embed(prompt)
57
- embedding, result = check_cache_hit(embedding, prompt, model, model_tier,
58
- original_tokens, compressed_tokens,
59
- original_prompt, start, config)
60
- [embedding, result]
61
- rescue EmbeddingError => e
62
- config.logger.warn("[llm_optimizer] EmbeddingError (treating as cache miss): #{e.message}")
63
- [nil, nil]
64
- end
65
-
66
47
  def load_conversation(conversation_id, options, config)
67
48
  return [options[:messages], nil] unless conversation_id
68
49
 
@@ -93,41 +74,42 @@ module LlmOptimizer
93
74
  store.save(conversation_id, messages, prompt, response) || messages
94
75
  end
95
76
 
96
- def store_in_cache(embedding, response, config)
97
- return unless config.use_semantic_cache && embedding && config.redis_url
98
-
99
- redis = build_redis(config.redis_url)
100
- cache = SemanticCache.new(redis, threshold: config.similarity_threshold, ttl: config.cache_ttl)
101
- cache.store(embedding, response)
102
- rescue StandardError => e
103
- config.logger.warn("[llm_optimizer] SemanticCache store failed: #{e.message}")
104
- end
105
-
106
77
  def build_result(response, model, model_tier, cache_status,
107
- original_tokens, compressed_tokens, latency_ms, messages)
78
+ original_tokens, compressed_tokens, latency_ms, messages, token_info = {})
108
79
  OptimizeResult.new(
109
80
  response: response, model: model, model_tier: model_tier,
110
81
  cache_status: cache_status, original_tokens: original_tokens,
111
- compressed_tokens: compressed_tokens, latency_ms: latency_ms,
82
+ compressed_tokens: compressed_tokens,
83
+ input_tokens: token_info[:input_tokens] || compressed_tokens || original_tokens,
84
+ output_tokens: token_info[:output_tokens],
85
+ cached_tokens: token_info[:cached_tokens],
86
+ latency_ms: latency_ms,
112
87
  messages: messages
113
88
  )
114
89
  end
115
90
 
116
91
  def fallback_result(original_prompt, original_tokens, options, start)
117
92
  latency_ms = elapsed_ms(start)
118
- response = raw_llm_call(original_prompt, model: nil, config: configuration)
93
+ response, _token_info = raw_llm_call(original_prompt, model: nil, config: configuration)
119
94
  build_result(response, nil, nil, :miss, original_tokens || 0, nil,
120
95
  latency_ms, options[:messages])
121
96
  end
122
97
 
123
98
  def raw_llm_call(prompt, model:, messages: nil, config: nil)
124
- if messages && !messages.empty? && config&.messages_caller
125
- config.messages_caller.call(messages + [{ role: "user", content: prompt }], model: model)
99
+ tools = config&.with_tools || config&.tools
100
+ result = if messages && !messages.empty? && config&.messages_caller
101
+ config.messages_caller.call(messages + [{ role: "user", content: prompt }], model: model, tools: tools)
102
+ else
103
+ llm = config&.llm_caller || @_current_llm_caller
104
+ raise ConfigurationError, "No llm_caller configured." unless llm
105
+
106
+ llm.call(prompt, model: model, tools: tools)
107
+ end
108
+
109
+ if result.is_a?(Hash)
110
+ [result[:content], result]
126
111
  else
127
- llm = config&.llm_caller || @_current_llm_caller
128
- raise ConfigurationError, "No llm_caller configured." unless llm
129
-
130
- llm.call(prompt, model: model)
112
+ [result, {}]
131
113
  end
132
114
  end
133
115
 
@@ -152,22 +134,41 @@ module LlmOptimizer
152
134
  Redis.new(url: redis_url)
153
135
  end
154
136
 
155
- def check_cache_hit(embedding, _prompt, model, model_tier, original_tokens,
156
- compressed_tokens, original_prompt, start, config)
157
- return [embedding, nil] unless config.redis_url
137
+ def semantic_cache_lookup(prompt, model, model_tier, original_tokens,
138
+ compressed_tokens, original_prompt, start, config)
139
+ return [nil, nil] unless config.use_semantic_cache
140
+
141
+ embedding = config.embedding_caller.call(prompt)
142
+ cache = SemanticCache.new(build_redis(config.redis_url),
143
+ threshold: config.similarity_threshold,
144
+ ttl: config.cache_ttl,
145
+ cache_scope: config.cache_scope)
146
+ cached, token_info = cache.lookup(embedding)
147
+
148
+ if cached
149
+ latency_ms = elapsed_ms(start)
150
+ emit_log(config.logger, config,
151
+ cache_status: :hit, model_tier: model_tier,
152
+ original_tokens: original_tokens, compressed_tokens: compressed_tokens,
153
+ latency_ms: latency_ms, prompt: original_prompt, response: cached)
154
+
155
+ [embedding, build_result(cached, model, model_tier, :hit,
156
+ original_tokens, compressed_tokens, latency_ms, nil, token_info)]
157
+ else
158
+ [embedding, nil]
159
+ end
160
+ rescue StandardError => e
161
+ config.logger.warn("[llm_optimizer] semantic_cache_lookup failed: #{e.message}")
162
+ [nil, nil]
163
+ end
158
164
 
159
- redis = build_redis(config.redis_url)
160
- cache = SemanticCache.new(redis, threshold: config.similarity_threshold, ttl: config.cache_ttl)
161
- cached = cache.lookup(embedding)
162
- return [embedding, nil] unless cached
165
+ def store_in_cache(embedding, response, config, token_info = {})
166
+ return unless config.use_semantic_cache && embedding
163
167
 
164
- latency_ms = elapsed_ms(start)
165
- emit_log(config.logger, config,
166
- cache_status: :hit, model_tier: model_tier,
167
- original_tokens: original_tokens, compressed_tokens: compressed_tokens,
168
- latency_ms: latency_ms, prompt: original_prompt, response: cached)
169
- [embedding, build_result(cached, model, model_tier, :hit,
170
- original_tokens, compressed_tokens, latency_ms, nil)]
168
+ SemanticCache.new(build_redis(config.redis_url),
169
+ threshold: config.similarity_threshold,
170
+ ttl: config.cache_ttl,
171
+ cache_scope: config.cache_scope).store(embedding, response, token_info)
171
172
  end
172
173
  end
173
174
  end
@@ -7,20 +7,19 @@ module LlmOptimizer
7
7
  class SemanticCache
8
8
  KEY_NAMESPACE = "llm_optimizer:cache:"
9
9
 
10
- def initialize(redis_client, threshold:, ttl:)
11
- @redis = redis_client
12
- @threshold = threshold
13
- @ttl = ttl
10
+ def initialize(redis_client, threshold:, ttl:, cache_scope: nil)
11
+ @redis = redis_client
12
+ @threshold = threshold
13
+ @ttl = ttl
14
+ @cache_scope = cache_scope
14
15
  end
15
16
 
16
- def store(embedding, response)
17
+ def store(embedding, response, token_info = {})
17
18
  key = cache_key(embedding)
18
- # Serialize embedding as raw 64-bit big-endian doubles to preserve full
19
- # Float precision. MessagePack silently downcasts Ruby Float to 32-bit,
20
- # which corrupts cosine similarity on deserialization.
21
19
  payload = MessagePack.pack({
22
- "embedding" => embedding.pack("G*"), # binary string, lossless
23
- "response" => response
20
+ "embedding" => embedding.pack("G*"),
21
+ "response" => response,
22
+ "token_info" => token_info
24
23
  })
25
24
  @redis.set(key, payload, ex: @ttl)
26
25
  rescue ::Redis::BaseError => e
@@ -28,28 +27,32 @@ module LlmOptimizer
28
27
  end
29
28
 
30
29
  def lookup(embedding)
31
- keys = @redis.keys("#{KEY_NAMESPACE}*")
30
+ prefix = KEY_NAMESPACE
31
+ prefix += "#{@cache_scope}:" if @cache_scope
32
+ keys = @redis.keys("#{prefix}*")
33
+
34
+ keys.reject! { |k| k.count(":") > 2 } unless @cache_scope
35
+
32
36
  return nil if keys.empty?
33
37
 
34
38
  best_score = -Float::INFINITY
35
- best_response = nil
39
+ best_entry = nil
36
40
 
37
41
  keys.each do |key|
38
42
  raw = @redis.get(key)
39
43
  next unless raw
40
44
 
41
45
  entry = MessagePack.unpack(raw)
42
- # Unpack the binary string back to 64-bit doubles
43
46
  stored_embedding = entry["embedding"].unpack("G*")
44
47
  score = cosine_similarity(embedding, stored_embedding)
45
48
 
46
49
  if score > best_score
47
50
  best_score = score
48
- best_response = entry["response"]
51
+ best_entry = entry
49
52
  end
50
53
  end
51
54
 
52
- best_score >= @threshold ? best_response : nil
55
+ [best_entry["response"], best_entry["token_info"] || {}] if best_score >= @threshold
53
56
  rescue ::Redis::BaseError => e
54
57
  warn "[llm_optimizer] SemanticCache lookup failed: #{e.message}"
55
58
  nil
@@ -70,7 +73,9 @@ module LlmOptimizer
70
73
  # Use "G*" (64-bit big-endian double) to match Ruby's native Float precision.
71
74
  # "f*" (32-bit) truncates precision and produces inconsistent hashes for the
72
75
  # same embedding across serialize/deserialize round trips.
73
- KEY_NAMESPACE + Digest::SHA256.hexdigest(embedding.pack("G*"))
76
+ prefix = KEY_NAMESPACE
77
+ prefix += "#{@cache_scope}:" if @cache_scope
78
+ prefix + Digest::SHA256.hexdigest(embedding.pack("G*"))
74
79
  end
75
80
  end
76
81
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmOptimizer
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.6"
5
5
  end
data/lib/llm_optimizer.rb CHANGED
@@ -103,17 +103,18 @@ module LlmOptimizer
103
103
 
104
104
  messages, store = load_conversation(conversation_id, options, call_config)
105
105
  messages = apply_history_manager(messages, call_config)
106
- response = raw_llm_call(prompt, messages: messages, model: model, config: call_config)
107
- messages = persist_conversation(store, conversation_id, messages, prompt, response)
108
- store_in_cache(embedding, response, call_config)
106
+ response, token_info = raw_llm_call(prompt, messages: messages, model: model, config: call_config)
107
+ messages = persist_conversation(store, conversation_id, messages, prompt, response)
108
+ store_in_cache(embedding, response, call_config, token_info)
109
109
 
110
110
  latency_ms = elapsed_ms(start)
111
111
  emit_log(call_config.logger, call_config,
112
112
  cache_status: :miss, model_tier: model_tier,
113
113
  original_tokens: original_tokens, compressed_tokens: compressed_tokens,
114
114
  latency_ms: latency_ms, prompt: original_prompt, response: response)
115
+
115
116
  build_result(response, model, model_tier, :miss, original_tokens, compressed_tokens,
116
- latency_ms, messages)
117
+ latency_ms, messages, token_info)
117
118
  rescue EmbeddingError => e
118
119
  configuration.logger.warn("[llm_optimizer] EmbeddingError (outer rescue): #{e.message}")
119
120
  fallback_result(original_prompt, original_tokens, options, start)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm_optimizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - arun kumar