llm_optimizer 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.overcommit.yml +17 -0
- data/CHANGELOG.md +17 -0
- data/CONTRIBUTING.md +228 -0
- data/README.md +3 -0
- data/lib/generators/llm_optimizer/templates/initializer.rb +4 -4
- data/lib/llm_optimizer/compressor.rb +1 -1
- data/lib/llm_optimizer/configuration.rb +2 -3
- data/lib/llm_optimizer/embedding_client.rb +5 -2
- data/lib/llm_optimizer/history_manager.rb +3 -2
- data/lib/llm_optimizer/model_router.rb +1 -1
- data/lib/llm_optimizer/optimize_result.rb +1 -2
- data/lib/llm_optimizer/version.rb +1 -1
- data/lib/llm_optimizer.rb +50 -49
- metadata +33 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d382e2ae48971edae81c24fa4e05bbacf9394c04dabed28b0277ca429e75a98d
|
|
4
|
+
data.tar.gz: d807840237cf09e8b271063660242ae1c460b682425a50389c60f396b086e2c4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8cac9e17c1f243c17d997e799daf25d886b329c09e83c84d9151f55abbb50d36a7e1b486171e401a645443022bb4de05e4430d0e303e05587dd1b244eda18cbe
|
|
7
|
+
data.tar.gz: 598b000eabc6a4c0000b3b9bd2162231c619d7618653ca6356948b623f0524db048c7b1f9a8a589905c6b611a10fe6bdc42e7490b432fe8f6047b75dcc35038a
|
data/.overcommit.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
gemfile: Gemfile
|
|
2
|
+
|
|
3
|
+
PreCommit:
|
|
4
|
+
RuboCop:
|
|
5
|
+
enabled: true
|
|
6
|
+
on_warn: fail
|
|
7
|
+
command: ["bundle", "exec", "rubocop"]
|
|
8
|
+
description: "Check RuboCop offenses"
|
|
9
|
+
|
|
10
|
+
MiniTest:
|
|
11
|
+
enabled: true
|
|
12
|
+
description: "Run Minitest suite"
|
|
13
|
+
required_executable: "bundle"
|
|
14
|
+
command: ["bundle", "exec", "rake", "test"]
|
|
15
|
+
include:
|
|
16
|
+
- "lib/**/*.rb"
|
|
17
|
+
- "test/**/*.rb"
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.1] - 2026-04-10
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- RuboCop offenses across all source and test files
|
|
14
|
+
- `missing keyword: :_model` error in test lambdas — use `**_kwargs` pattern
|
|
15
|
+
- HistoryManager summarization tests failing due to keyword argument mismatch
|
|
16
|
+
- Suppress third-party gem warnings in test output
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- Full unit test suite with positive and negative scenarios (Minitest + Mocha)
|
|
20
|
+
- Mock JSON fixtures for OpenAI embedding API responses
|
|
21
|
+
- `CONTRIBUTING.md` with fork setup, issue guidelines, PR checklist, and overcommit instructions
|
|
22
|
+
- Pre-commit hooks via overcommit (RuboCop + Minitest)
|
|
23
|
+
- Rails generator: `rails generate llm_optimizer:install`
|
|
24
|
+
- `embedding_caller` and `llm_caller` injectable lambdas — no forced provider dependency
|
|
25
|
+
- `logger` gem explicit dependency for Ruby 3.5+ compatibility
|
|
26
|
+
|
|
10
27
|
## [0.1.0] - 2026-04-10
|
|
11
28
|
|
|
12
29
|
### Added
|
data/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Contributing to llm_optimizer
|
|
2
|
+
|
|
3
|
+
Thanks for taking the time to contribute. This document covers everything you need to get set up, raise issues, and submit pull requests.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Setting Up the Repo](#setting-up-the-repo)
|
|
8
|
+
- [Running Tests and Linting](#running-tests-and-linting)
|
|
9
|
+
- [Pre-commit Hooks with Overcommit](#pre-commit-hooks-with-overcommit)
|
|
10
|
+
- [Raising an Issue](#raising-an-issue)
|
|
11
|
+
- [Submitting a Pull Request](#submitting-a-pull-request)
|
|
12
|
+
- [Code Style](#code-style)
|
|
13
|
+
- [Commit Message Guidelines](#commit-message-guidelines)
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Setting Up the Repo
|
|
18
|
+
|
|
19
|
+
**1. Fork the repository**
|
|
20
|
+
|
|
21
|
+
Click "Fork" on [github.com/arunkumarry/llm_optimizer](https://github.com/arunkumarry/llm_optimizer).
|
|
22
|
+
|
|
23
|
+
**2. Clone your fork**
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/<your-username>/llm_optimizer.git
|
|
27
|
+
cd llm_optimizer
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**3. Add the upstream remote**
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
git remote add upstream https://github.com/arunkumarry/llm_optimizer.git
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
**4. Install dependencies**
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
bundle install
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**5. Verify everything works**
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
bundle exec rake test
|
|
46
|
+
bundle exec rubocop
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Both should pass before you start making changes.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Running Tests and Linting
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Run the full test suite
|
|
57
|
+
bundle exec rake test
|
|
58
|
+
|
|
59
|
+
# Run RuboCop
|
|
60
|
+
bundle exec rubocop
|
|
61
|
+
|
|
62
|
+
# Run both (default rake task)
|
|
63
|
+
bundle exec rake
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Add tests for any new behaviour you introduce. Tests live in `test/unit/`.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Pre-commit Hooks with Overcommit
|
|
71
|
+
|
|
72
|
+
This repo uses [overcommit](https://github.com/sds/overcommit) to run RuboCop and the test suite before every commit.
|
|
73
|
+
|
|
74
|
+
**Install the hooks after cloning:**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
bundle exec overcommit --install
|
|
78
|
+
bundle exec overcommit --sign
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Every `git commit` will now automatically run RuboCop and the test suite. If either fails, the commit is blocked.
|
|
82
|
+
|
|
83
|
+
**Bypass for a WIP commit:**
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
git commit --no-verify -m "wip: work in progress"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Re-enable checks before opening a PR — all hooks must pass.
|
|
90
|
+
|
|
91
|
+
**Note on rebasing:** If overcommit hooks cause issues during `git pull --rebase`, uninstall them first:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
bundle exec overcommit --uninstall
|
|
95
|
+
git pull --rebase origin main
|
|
96
|
+
bundle exec overcommit --install
|
|
97
|
+
bundle exec overcommit --sign
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Raising an Issue
|
|
103
|
+
|
|
104
|
+
Search [existing issues](https://github.com/arunkumarry/llm_optimizer/issues) before opening a new one.
|
|
105
|
+
|
|
106
|
+
**Bug reports should include:**
|
|
107
|
+
- Ruby version (`ruby -v`)
|
|
108
|
+
- Gem version (`bundle exec gem list llm_optimizer`)
|
|
109
|
+
- Minimal reproduction script
|
|
110
|
+
- Expected vs actual behaviour
|
|
111
|
+
- Full error message and backtrace
|
|
112
|
+
|
|
113
|
+
**Feature requests should include:**
|
|
114
|
+
- The problem you're solving
|
|
115
|
+
- What the API should look like
|
|
116
|
+
- Alternatives you've considered
|
|
117
|
+
|
|
118
|
+
Use labels: `bug`, `enhancement`, `question`, or `documentation`.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Submitting a Pull Request
|
|
123
|
+
|
|
124
|
+
**1. Sync with upstream before starting**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
git fetch upstream
|
|
128
|
+
git rebase upstream/main
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**2. Create a branch**
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
git checkout -b feature/your-feature-name
|
|
135
|
+
# or
|
|
136
|
+
git checkout -b fix/issue-description
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Branch naming:
|
|
140
|
+
- `feature/` — new functionality
|
|
141
|
+
- `fix/` — bug fixes
|
|
142
|
+
- `docs/` — documentation only
|
|
143
|
+
- `refactor/` — no behaviour change
|
|
144
|
+
- `test/` — test additions or fixes
|
|
145
|
+
|
|
146
|
+
**3. Make your changes**
|
|
147
|
+
|
|
148
|
+
- Write tests for new behaviour — PRs without tests won't be merged
|
|
149
|
+
- Keep changes focused — one concern per PR
|
|
150
|
+
- Update `CHANGELOG.md` under `[Unreleased]`
|
|
151
|
+
|
|
152
|
+
**4. Ensure all checks pass**
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
bundle exec rake
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**5. Push and open a PR**
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
git push origin feature/your-feature-name
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Open a PR against `main` on GitHub.
|
|
165
|
+
|
|
166
|
+
**PR description should include:**
|
|
167
|
+
- What the PR does and why
|
|
168
|
+
- Link to the related issue (`Closes #42`)
|
|
169
|
+
- How to test it manually if applicable
|
|
170
|
+
- Any breaking changes clearly called out
|
|
171
|
+
|
|
172
|
+
**PR checklist:**
|
|
173
|
+
- [ ] Tests added or updated
|
|
174
|
+
- [ ] `bundle exec rake` passes locally
|
|
175
|
+
- [ ] `CHANGELOG.md` updated under `[Unreleased]`
|
|
176
|
+
- [ ] No unrelated changes included
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Code Style
|
|
181
|
+
|
|
182
|
+
RuboCop is configured in `.rubocop.yml`. Run before committing:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
bundle exec rubocop
|
|
186
|
+
|
|
187
|
+
# Auto-fix safe offenses
|
|
188
|
+
bundle exec rubocop -a
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Key conventions:
|
|
192
|
+
- Double-quoted strings
|
|
193
|
+
- `# frozen_string_literal: true` in every file
|
|
194
|
+
- No monkey-patching without explicit opt-in
|
|
195
|
+
- Errors handled gracefully — never let the optimizer break the app
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Commit Message Guidelines
|
|
200
|
+
|
|
201
|
+
Follow [Conventional Commits](https://www.conventionalcommits.org/):
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
<type>: <short description>
|
|
205
|
+
|
|
206
|
+
[optional body explaining why]
|
|
207
|
+
|
|
208
|
+
[optional footer: Closes #issue]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore`
|
|
212
|
+
|
|
213
|
+
Examples:
|
|
214
|
+
```
|
|
215
|
+
feat: add embedding_caller config for custom providers
|
|
216
|
+
fix: handle nil model in raw_llm_call fallback
|
|
217
|
+
docs: update README with per-call config example
|
|
218
|
+
test: add negative scenarios for SemanticCache lookup
|
|
219
|
+
chore: add overcommit pre-commit hooks
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Subject line under 72 characters. Body explains *why*, not *what*.
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Questions?
|
|
227
|
+
|
|
228
|
+
Open a [GitHub Discussion](https://github.com/arunkumarry/llm_optimizer/discussions) or file an issue with the `question` label.
|
data/README.md
CHANGED
|
@@ -24,8 +24,8 @@ LlmOptimizer.configure do |config|
|
|
|
24
24
|
# --- Tuning ---
|
|
25
25
|
config.similarity_threshold = 0.96 # cosine similarity cutoff for a cache hit
|
|
26
26
|
config.token_budget = 4000 # token limit before history summarization kicks in
|
|
27
|
-
config.cache_ttl =
|
|
28
|
-
config.timeout_seconds = 5
|
|
27
|
+
config.cache_ttl = 86_400 # cache entry TTL in seconds (default: 24h)
|
|
28
|
+
config.timeout_seconds = 5 # timeout for embedding / external API calls
|
|
29
29
|
|
|
30
30
|
# --- Logging ---
|
|
31
31
|
config.logger = Rails.logger
|
|
@@ -52,9 +52,9 @@ LlmOptimizer.configure do |config|
|
|
|
52
52
|
# RubyLLM.chat(model: model, provider: provider, assume_model_exists: true) }
|
|
53
53
|
# end
|
|
54
54
|
#
|
|
55
|
-
config.llm_caller =
|
|
55
|
+
config.llm_caller = lambda { |_prompt, **_kwargs|
|
|
56
56
|
raise NotImplementedError, "[llm_optimizer] llm_caller is not configured. " \
|
|
57
|
-
|
|
57
|
+
"Edit config/initializers/llm_optimizer.rb and wire it to your LLM client."
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
# --- Embeddings caller (optional) ---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "logger"
|
|
4
|
-
require "set"
|
|
5
4
|
|
|
6
5
|
module LlmOptimizer
|
|
7
6
|
class Configuration
|
|
@@ -43,7 +42,7 @@ module LlmOptimizer
|
|
|
43
42
|
@logger = Logger.new($stdout)
|
|
44
43
|
@debug_logging = false
|
|
45
44
|
@timeout_seconds = 5
|
|
46
|
-
@cache_ttl =
|
|
45
|
+
@cache_ttl = 86_400
|
|
47
46
|
@llm_caller = nil
|
|
48
47
|
@embedding_caller = nil
|
|
49
48
|
end
|
|
@@ -56,7 +55,7 @@ module LlmOptimizer
|
|
|
56
55
|
self
|
|
57
56
|
end
|
|
58
57
|
|
|
59
|
-
def method_missing(name, *args, &
|
|
58
|
+
def method_missing(name, *args, &)
|
|
60
59
|
key = name.to_s.chomp("=").to_sym
|
|
61
60
|
raise ConfigurationError, "Unknown configuration key: #{key}" unless KNOWN_KEYS.include?(key)
|
|
62
61
|
|
|
@@ -29,8 +29,11 @@ module LlmOptimizer
|
|
|
29
29
|
private
|
|
30
30
|
|
|
31
31
|
def embed_via_openai(text)
|
|
32
|
-
api_key = ENV
|
|
33
|
-
|
|
32
|
+
api_key = ENV.fetch("OPENAI_API_KEY", nil)
|
|
33
|
+
if api_key.nil? || api_key.empty?
|
|
34
|
+
raise EmbeddingError,
|
|
35
|
+
"OPENAI_API_KEY is not set and no embedding_caller configured"
|
|
36
|
+
end
|
|
34
37
|
|
|
35
38
|
uri = URI(OPENAI_ENDPOINT)
|
|
36
39
|
body = JSON.generate({ model: @model, input: text })
|
|
@@ -18,7 +18,7 @@ module LlmOptimizer
|
|
|
18
18
|
def process(messages)
|
|
19
19
|
return messages if estimate_tokens(messages) <= @token_budget
|
|
20
20
|
|
|
21
|
-
count
|
|
21
|
+
count = [SUMMARIZE_COUNT, messages.length].min
|
|
22
22
|
to_summarize = messages.first(count)
|
|
23
23
|
remainder = messages.drop(count)
|
|
24
24
|
|
|
@@ -32,7 +32,8 @@ module LlmOptimizer
|
|
|
32
32
|
|
|
33
33
|
def summarize(messages)
|
|
34
34
|
conversation = messages.map { |m| "#{m[:role] || m["role"]}: #{m[:content] || m["content"]}" }.join("\n")
|
|
35
|
-
prompt = "Summarize the following conversation history concisely,
|
|
35
|
+
prompt = "Summarize the following conversation history concisely, " \
|
|
36
|
+
"preserving key facts and decisions:\n\n#{conversation}"
|
|
36
37
|
|
|
37
38
|
@llm_caller.call(prompt, model: @simple_model)
|
|
38
39
|
rescue StandardError => e
|
|
@@ -12,7 +12,7 @@ module LlmOptimizer
|
|
|
12
12
|
|
|
13
13
|
def route(prompt)
|
|
14
14
|
# explicit override
|
|
15
|
-
return @config.route_to if
|
|
15
|
+
return @config.route_to if %i[simple complex].include?(@config.route_to)
|
|
16
16
|
|
|
17
17
|
# fenced code block
|
|
18
18
|
return :complex if CODE_BLOCK_RE.match?(prompt)
|
data/lib/llm_optimizer.rb
CHANGED
|
@@ -37,13 +37,13 @@ module LlmOptimizer
|
|
|
37
37
|
|
|
38
38
|
# Warns about misconfigured options rather than failing silently at call time.
|
|
39
39
|
def self.validate_configuration!(config)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
return unless config.use_semantic_cache && config.embedding_caller.nil?
|
|
41
|
+
|
|
42
|
+
config.logger.warn(
|
|
43
|
+
"[llm_optimizer] use_semantic_cache is true but no embedding_caller is configured. " \
|
|
44
|
+
"Semantic caching will be skipped. Set config.embedding_caller to enable it."
|
|
45
|
+
)
|
|
46
|
+
config.use_semantic_cache = false
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
# Returns the current global Configuration, lazy-initializing if nil.
|
|
@@ -59,11 +59,11 @@ module LlmOptimizer
|
|
|
59
59
|
|
|
60
60
|
# Opt-in client wrapping
|
|
61
61
|
module WrapperModule
|
|
62
|
-
def chat(params, &
|
|
62
|
+
def chat(params, &)
|
|
63
63
|
prompt = params[:messages] || params[:prompt]
|
|
64
64
|
optimized = LlmOptimizer.optimize(prompt)
|
|
65
65
|
params = params.merge(messages: optimized.messages, model: optimized.model)
|
|
66
|
-
super
|
|
66
|
+
super
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
|
|
@@ -80,7 +80,7 @@ module LlmOptimizer
|
|
|
80
80
|
# options hash keys mirror Configuration attr_accessors and are merged over
|
|
81
81
|
# the global config for this call only. An optional block is yielded a
|
|
82
82
|
# per-call Configuration for fine-grained control.
|
|
83
|
-
def self.optimize(prompt, options = {}
|
|
83
|
+
def self.optimize(prompt, options = {})
|
|
84
84
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
85
85
|
|
|
86
86
|
# Resolve per-call configuration — only pass known config keys
|
|
@@ -88,6 +88,7 @@ module LlmOptimizer
|
|
|
88
88
|
call_config.merge!(configuration)
|
|
89
89
|
options.each do |k, v|
|
|
90
90
|
next unless LlmOptimizer::Configuration::KNOWN_KEYS.include?(k.to_sym)
|
|
91
|
+
|
|
91
92
|
call_config.public_send(:"#{k}=", v)
|
|
92
93
|
end
|
|
93
94
|
yield call_config if block_given?
|
|
@@ -118,8 +119,8 @@ module LlmOptimizer
|
|
|
118
119
|
if call_config.use_semantic_cache
|
|
119
120
|
begin
|
|
120
121
|
emb_client = EmbeddingClient.new(
|
|
121
|
-
model:
|
|
122
|
-
timeout_seconds:
|
|
122
|
+
model: call_config.embedding_model,
|
|
123
|
+
timeout_seconds: call_config.timeout_seconds,
|
|
123
124
|
embedding_caller: call_config.embedding_caller
|
|
124
125
|
)
|
|
125
126
|
embedding = emb_client.embed(prompt)
|
|
@@ -136,14 +137,14 @@ module LlmOptimizer
|
|
|
136
137
|
original_tokens: original_tokens, compressed_tokens: compressed_tokens,
|
|
137
138
|
latency_ms: latency_ms, prompt: original_prompt, response: cached)
|
|
138
139
|
return OptimizeResult.new(
|
|
139
|
-
response:
|
|
140
|
-
model:
|
|
141
|
-
model_tier:
|
|
142
|
-
cache_status:
|
|
143
|
-
original_tokens:
|
|
140
|
+
response: cached,
|
|
141
|
+
model: model,
|
|
142
|
+
model_tier: model_tier,
|
|
143
|
+
cache_status: :hit,
|
|
144
|
+
original_tokens: original_tokens,
|
|
144
145
|
compressed_tokens: compressed_tokens,
|
|
145
|
-
latency_ms:
|
|
146
|
-
messages:
|
|
146
|
+
latency_ms: latency_ms,
|
|
147
|
+
messages: options[:messages]
|
|
147
148
|
)
|
|
148
149
|
end
|
|
149
150
|
end
|
|
@@ -159,7 +160,7 @@ module LlmOptimizer
|
|
|
159
160
|
if call_config.manage_history && messages
|
|
160
161
|
llm_caller = ->(p, model:) { raw_llm_call(p, model: model) }
|
|
161
162
|
history_mgr = HistoryManager.new(
|
|
162
|
-
llm_caller:
|
|
163
|
+
llm_caller: llm_caller,
|
|
163
164
|
simple_model: call_config.simple_model,
|
|
164
165
|
token_budget: call_config.token_budget
|
|
165
166
|
)
|
|
@@ -188,16 +189,15 @@ module LlmOptimizer
|
|
|
188
189
|
latency_ms: latency_ms, prompt: original_prompt, response: response)
|
|
189
190
|
|
|
190
191
|
OptimizeResult.new(
|
|
191
|
-
response:
|
|
192
|
-
model:
|
|
193
|
-
model_tier:
|
|
194
|
-
cache_status:
|
|
195
|
-
original_tokens:
|
|
192
|
+
response: response,
|
|
193
|
+
model: model,
|
|
194
|
+
model_tier: model_tier,
|
|
195
|
+
cache_status: :miss,
|
|
196
|
+
original_tokens: original_tokens,
|
|
196
197
|
compressed_tokens: compressed_tokens,
|
|
197
|
-
latency_ms:
|
|
198
|
-
messages:
|
|
198
|
+
latency_ms: latency_ms,
|
|
199
|
+
messages: messages
|
|
199
200
|
)
|
|
200
|
-
|
|
201
201
|
rescue EmbeddingError => e
|
|
202
202
|
# Treat embedding failures as cache miss — continue to raw LLM call
|
|
203
203
|
logger = configuration.logger
|
|
@@ -205,30 +205,29 @@ module LlmOptimizer
|
|
|
205
205
|
latency_ms = elapsed_ms(start)
|
|
206
206
|
response = raw_llm_call(original_prompt, model: nil, config: configuration)
|
|
207
207
|
OptimizeResult.new(
|
|
208
|
-
response:
|
|
209
|
-
model:
|
|
210
|
-
model_tier:
|
|
211
|
-
cache_status:
|
|
212
|
-
original_tokens:
|
|
208
|
+
response: response,
|
|
209
|
+
model: nil,
|
|
210
|
+
model_tier: nil,
|
|
211
|
+
cache_status: :miss,
|
|
212
|
+
original_tokens: original_tokens || 0,
|
|
213
213
|
compressed_tokens: nil,
|
|
214
|
-
latency_ms:
|
|
215
|
-
messages:
|
|
214
|
+
latency_ms: latency_ms,
|
|
215
|
+
messages: options[:messages]
|
|
216
216
|
)
|
|
217
|
-
|
|
218
217
|
rescue LlmOptimizer::Error, StandardError => e
|
|
219
218
|
logger = configuration.logger
|
|
220
219
|
logger.error("[llm_optimizer] #{e.class}: #{e.message}\n#{e.backtrace&.first(5)&.join("\n")}")
|
|
221
220
|
latency_ms = elapsed_ms(start)
|
|
222
221
|
response = raw_llm_call(original_prompt, model: nil, config: configuration)
|
|
223
222
|
OptimizeResult.new(
|
|
224
|
-
response:
|
|
225
|
-
model:
|
|
226
|
-
model_tier:
|
|
227
|
-
cache_status:
|
|
228
|
-
original_tokens:
|
|
223
|
+
response: response,
|
|
224
|
+
model: nil,
|
|
225
|
+
model_tier: nil,
|
|
226
|
+
cache_status: :miss,
|
|
227
|
+
original_tokens: original_tokens || 0,
|
|
229
228
|
compressed_tokens: nil,
|
|
230
|
-
latency_ms:
|
|
231
|
-
messages:
|
|
229
|
+
latency_ms: latency_ms,
|
|
230
|
+
messages: options[:messages]
|
|
232
231
|
)
|
|
233
232
|
end
|
|
234
233
|
|
|
@@ -239,8 +238,11 @@ module LlmOptimizer
|
|
|
239
238
|
|
|
240
239
|
def raw_llm_call(prompt, model:, config: nil)
|
|
241
240
|
caller = config&.llm_caller || @_current_llm_caller
|
|
242
|
-
|
|
243
|
-
|
|
241
|
+
unless caller
|
|
242
|
+
raise ConfigurationError,
|
|
243
|
+
"No llm_caller configured. " \
|
|
244
|
+
"Set it via LlmOptimizer.configure { |c| c.llm_caller = ->(prompt, model:) { ... } }"
|
|
245
|
+
end
|
|
244
246
|
|
|
245
247
|
caller.call(prompt, model: model)
|
|
246
248
|
end
|
|
@@ -251,7 +253,6 @@ module LlmOptimizer
|
|
|
251
253
|
|
|
252
254
|
def emit_log(logger, config, cache_status:, model_tier:, original_tokens:,
|
|
253
255
|
compressed_tokens:, latency_ms:, prompt:, response:)
|
|
254
|
-
|
|
255
256
|
logger.info(
|
|
256
257
|
"[llm_optimizer] { cache_status: #{cache_status.inspect}, " \
|
|
257
258
|
"model_tier: #{model_tier.inspect}, " \
|
|
@@ -260,9 +261,9 @@ module LlmOptimizer
|
|
|
260
261
|
"latency_ms: #{latency_ms.inspect} }"
|
|
261
262
|
)
|
|
262
263
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
264
|
+
return unless config.debug_logging
|
|
265
|
+
|
|
266
|
+
logger.debug("[llm_optimizer] prompt=#{prompt.inspect} response=#{response.inspect}")
|
|
266
267
|
end
|
|
267
268
|
|
|
268
269
|
def build_redis(redis_url)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_optimizer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- arun kumar
|
|
@@ -10,19 +10,19 @@ cert_chain: []
|
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
|
-
name:
|
|
13
|
+
name: logger
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
16
|
- - "~>"
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: '
|
|
18
|
+
version: '1.6'
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - "~>"
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version: '
|
|
25
|
+
version: '1.6'
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: msgpack
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -38,58 +38,74 @@ dependencies:
|
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '1.7'
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
|
-
name:
|
|
41
|
+
name: redis
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
43
43
|
requirements:
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '
|
|
46
|
+
version: '5.0'
|
|
47
47
|
type: :runtime
|
|
48
48
|
prerelease: false
|
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
50
|
requirements:
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '
|
|
53
|
+
version: '5.0'
|
|
54
54
|
- !ruby/object:Gem::Dependency
|
|
55
|
-
name:
|
|
55
|
+
name: mocha
|
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
|
57
57
|
requirements:
|
|
58
58
|
- - "~>"
|
|
59
59
|
- !ruby/object:Gem::Version
|
|
60
|
-
version: '
|
|
60
|
+
version: '2.0'
|
|
61
61
|
type: :development
|
|
62
62
|
prerelease: false
|
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
64
|
requirements:
|
|
65
65
|
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: '
|
|
67
|
+
version: '2.0'
|
|
68
68
|
- !ruby/object:Gem::Dependency
|
|
69
|
-
name:
|
|
69
|
+
name: overcommit
|
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
|
71
71
|
requirements:
|
|
72
72
|
- - "~>"
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: '
|
|
74
|
+
version: '0.65'
|
|
75
75
|
type: :development
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - "~>"
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: '
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
81
|
+
version: '0.65'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: prop_check
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - "~>"
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '1.0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - "~>"
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '1.0'
|
|
96
|
+
description: llm_optimizer reduces LLM API costs by up to 80% through semantic caching,
|
|
97
|
+
intelligent model routing, token pruning, and conversation history summarization.
|
|
98
|
+
Strictly opt-in and non-invasive.
|
|
85
99
|
email:
|
|
86
100
|
- arunr.rubydev@gmail.com
|
|
87
101
|
executables: []
|
|
88
102
|
extensions: []
|
|
89
103
|
extra_rdoc_files: []
|
|
90
104
|
files:
|
|
105
|
+
- ".overcommit.yml"
|
|
91
106
|
- CHANGELOG.md
|
|
92
107
|
- CODE_OF_CONDUCT.md
|
|
108
|
+
- CONTRIBUTING.md
|
|
93
109
|
- LICENSE.txt
|
|
94
110
|
- README.md
|
|
95
111
|
- Rakefile
|
|
@@ -114,6 +130,7 @@ metadata:
|
|
|
114
130
|
homepage_uri: https://github.com/arunkumarry/llm_optimizer
|
|
115
131
|
source_code_uri: https://github.com/arunkumarry/llm_optimizer/tree/main
|
|
116
132
|
changelog_uri: https://github.com/arunkumarry/llm_optimizer/blob/main/CHANGELOG.md
|
|
133
|
+
rubygems_mfa_required: 'true'
|
|
117
134
|
rdoc_options: []
|
|
118
135
|
require_paths:
|
|
119
136
|
- lib
|