llmemory 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -1
- data/lib/llmemory/cli/commands/stats.rb +5 -0
- data/lib/llmemory/instrumentation.rb +4 -2
- data/lib/llmemory/llm/anthropic.rb +8 -3
- data/lib/llmemory/llm/base.rb +42 -0
- data/lib/llmemory/llm/openai.rb +27 -12
- data/lib/llmemory/llm/response.rb +18 -0
- data/lib/llmemory/llm/tracking_client.rb +61 -0
- data/lib/llmemory/llm/usage.rb +31 -0
- data/lib/llmemory/llm/usage_ledger.rb +118 -0
- data/lib/llmemory/llm/usage_recorder.rb +37 -0
- data/lib/llmemory/llm.rb +5 -0
- data/lib/llmemory/long_term/episodic/memory.rb +12 -1
- data/lib/llmemory/long_term/graph_based/memory.rb +13 -0
- data/lib/llmemory/long_term/procedural/memory.rb +12 -1
- data/lib/llmemory/mcp/tools/memory_stats.rb +13 -0
- data/lib/llmemory/memory.rb +34 -15
- data/lib/llmemory/short_term/checkpoint.rb +2 -0
- data/lib/llmemory/vector_store/active_record_store.rb +6 -0
- data/lib/llmemory/vector_store/memory_store.rb +6 -0
- data/lib/llmemory/vector_store/openai_embeddings.rb +23 -2
- data/lib/llmemory/version.rb +1 -1
- metadata +6 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 296b9d61d6c474145ecaa607653b37438b2491c846aac602f65d5fd850dae9ef
|
|
4
|
+
data.tar.gz: 521fd05b577c6c17a7dbc5d3771ff9fb3f7cddeaeef31938efabdcfd40db74a5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: edf6ee6b41cb366f2ddef5ac2deec9c3c03090d4920d2386f9578e31999d135b34ad7f995ee9a9e803cf43627f969a3fab9ee191c68e2362a4d62fa8bade0729
|
|
7
|
+
data.tar.gz: 83266918faeb2bb4f7b57e89007b642283d7d64600bf83513948d5378a625fa911aefa6823c54115961bafe34dfdd147f4fb297a8226caad49aaceb6a01372e0
|
data/README.md
CHANGED
|
@@ -51,6 +51,39 @@ memory.clear_session!
|
|
|
51
51
|
- **`prune!(mode: nil)`** — Prunes oversized tool results (soft-trim or hard-clear). Only when `prune_tool_results_enabled` is true.
|
|
52
52
|
- **`check_context_window!`** — Triggers consolidate and compact when context exceeds configured thresholds.
|
|
53
53
|
- **`clear_session!`** — Clears short-term only.
|
|
54
|
+
- **`llm_usage`** — Returns cumulative LLM token usage for this `user_id` (chat/completions + embeddings), persisted in the short-term store.
|
|
55
|
+
|
|
56
|
+
## LLM token usage
|
|
57
|
+
|
|
58
|
+
llmemory captures **real token counts** from OpenAI and Anthropic API responses (chat and embeddings), accumulates them per `user_id`, and exposes them for cost monitoring.
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
memory = Llmemory::Memory.new(user_id: "user_123")
|
|
62
|
+
memory.consolidate!
|
|
63
|
+
memory.maintain!
|
|
64
|
+
|
|
65
|
+
usage = memory.llm_usage
|
|
66
|
+
# => {
|
|
67
|
+
# invoke: { input_tokens: 1200, output_tokens: 400, total_tokens: 1600, calls: 3 },
|
|
68
|
+
# embed: { total_tokens: 48, calls: 2 },
|
|
69
|
+
# updated_at: "2026-07-02T12:00:00Z"
|
|
70
|
+
# }
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
| What | Details |
|
|
74
|
+
|------|---------|
|
|
75
|
+
| **Counted** | `consolidate!`, reflection, skill mining, compaction summaries, iterative retrieval, graph/file extraction, OpenAI embeddings (index + search) |
|
|
76
|
+
| **Scope** | Cumulative per `user_id` (not per session); stored under pseudo-session `__llm_usage__` |
|
|
77
|
+
| **Not counted** | `context_tokens` (local byte estimate), retrieval context budget, MCP auth tokens |
|
|
78
|
+
| **Cache** | Embedding cache hits record zero tokens |
|
|
79
|
+
|
|
80
|
+
**Other surfaces:**
|
|
81
|
+
|
|
82
|
+
- **CLI:** `llmemory stats USER_ID` prints an `LLM TOKEN USAGE` section.
|
|
83
|
+
- **MCP:** `memory_stats` includes the same totals.
|
|
84
|
+
- **Rails metrics:** subscribe to `llm_invoke.llmemory` and `llm_embed.llmemory` (payload includes `input_tokens`, `output_tokens`, `total_tokens`, `response_chars`).
|
|
85
|
+
|
|
86
|
+
Dollar cost is not computed — multiply tokens by your model pricing externally. For lower-level access, `Llmemory::LLM::OpenAI#invoke` returns a `Response` with `#content` (via `#to_s`) and `#usage`.
|
|
54
87
|
|
|
55
88
|
## Configuration
|
|
56
89
|
|
|
@@ -685,7 +718,7 @@ MCP_TOKEN=your-secret-token llmemory mcp serve --http --port 443 \
|
|
|
685
718
|
| `memory_timeline_context` | Get N items before/after a specific memory |
|
|
686
719
|
| `memory_add_message` | Add message to short-term conversation (roles: user, assistant, system, tool, tool_result) |
|
|
687
720
|
| `memory_consolidate` | Extract facts from conversation to long-term |
|
|
688
|
-
| `memory_stats` | Get memory statistics for a user |
|
|
721
|
+
| `memory_stats` | Get memory statistics for a user (includes LLM token usage) |
|
|
689
722
|
| `memory_info` | Documentation on how to use the tools |
|
|
690
723
|
| `memory_episode_record` / `memory_episodes` | Record / list episodic trajectories |
|
|
691
724
|
| `memory_skill_register` / `memory_skill_report` / `memory_skills` | Register / outcome-track / list procedural skills |
|
|
@@ -41,6 +41,11 @@ module Llmemory
|
|
|
41
41
|
puts "Long-term (file) categories: #{storage.list_categories(user_id).size}"
|
|
42
42
|
puts "Long-term (file) resources: #{storage.list_resources(user_id: user_id).size}"
|
|
43
43
|
end
|
|
44
|
+
|
|
45
|
+
puts "---"
|
|
46
|
+
puts Llmemory::LLM::UsageLedger.format_text(
|
|
47
|
+
Llmemory::LLM::UsageLedger.new(store: short_store).totals(user_id)
|
|
48
|
+
)
|
|
44
49
|
end
|
|
45
50
|
|
|
46
51
|
def print_global_stats(short_store, long_type)
|
|
@@ -10,8 +10,10 @@ module Llmemory
|
|
|
10
10
|
# Events (payload keys are best-effort; subscribers should treat them as
|
|
11
11
|
# optional):
|
|
12
12
|
#
|
|
13
|
-
# llm_invoke.llmemory provider:, model:, prompt_chars:, response_chars
|
|
14
|
-
#
|
|
13
|
+
# llm_invoke.llmemory provider:, model:, prompt_chars:, response_chars:,
|
|
14
|
+
# input_tokens:, output_tokens:, total_tokens:
|
|
15
|
+
# llm_embed.llmemory provider:, model:, text_chars:, input_tokens:,
|
|
16
|
+
# output_tokens:, total_tokens:
|
|
15
17
|
# memory_write.llmemory memory_type:, user_id:
|
|
16
18
|
# memory_forget.llmemory memory_type:, user_id:, count:
|
|
17
19
|
# retrieve.llmemory query_chars:, candidates:, results:
|
|
@@ -11,6 +11,7 @@ module Llmemory
|
|
|
11
11
|
DEFAULT_MODEL = "claude-sonnet-4-6"
|
|
12
12
|
|
|
13
13
|
def initialize(api_key: nil, model: nil, base_url: nil)
|
|
14
|
+
super()
|
|
14
15
|
@api_key = api_key || config.llm_api_key || ENV["ANTHROPIC_API_KEY"]
|
|
15
16
|
@model = model || config.llm_model || DEFAULT_MODEL
|
|
16
17
|
@base_url = base_url || config.llm_base_url || DEFAULT_BASE_URL
|
|
@@ -18,7 +19,8 @@ module Llmemory
|
|
|
18
19
|
|
|
19
20
|
def invoke(prompt)
|
|
20
21
|
result = nil
|
|
21
|
-
|
|
22
|
+
payload = { provider: :anthropic, model: @model, prompt_chars: prompt.to_s.length }
|
|
23
|
+
Llmemory::Instrumentation.instrument(:llm_invoke, payload) do
|
|
22
24
|
response = connection.post("v1/messages") do |req|
|
|
23
25
|
req.body = {
|
|
24
26
|
model: @model,
|
|
@@ -33,8 +35,11 @@ module Llmemory
|
|
|
33
35
|
raise Llmemory::LLMError, "Anthropic API error: #{response.body}" unless response.success?
|
|
34
36
|
|
|
35
37
|
body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
|
|
36
|
-
content = body.dig("content", 0, "text")
|
|
37
|
-
|
|
38
|
+
content = body.dig("content", 0, "text")&.strip || ""
|
|
39
|
+
usage = parse_anthropic_usage(body["usage"])
|
|
40
|
+
record_usage(usage)
|
|
41
|
+
payload.merge!(instrumentation_payload(usage, content))
|
|
42
|
+
result = Response.new(content, usage: usage)
|
|
38
43
|
end
|
|
39
44
|
result
|
|
40
45
|
end
|
data/lib/llmemory/llm/base.rb
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "usage"
|
|
4
|
+
require_relative "response"
|
|
5
|
+
|
|
3
6
|
module Llmemory
|
|
4
7
|
module LLM
|
|
5
8
|
class Base
|
|
9
|
+
attr_reader :last_usage
|
|
10
|
+
|
|
11
|
+
def initialize(*)
|
|
12
|
+
@last_usage = Usage.zero
|
|
13
|
+
end
|
|
14
|
+
|
|
6
15
|
def invoke(prompt)
|
|
7
16
|
raise NotImplementedError, "#{self.class}#invoke must be implemented"
|
|
8
17
|
end
|
|
@@ -18,6 +27,39 @@ module Llmemory
|
|
|
18
27
|
def config
|
|
19
28
|
Llmemory.configuration
|
|
20
29
|
end
|
|
30
|
+
|
|
31
|
+
def parse_openai_chat_usage(raw)
|
|
32
|
+
return Usage.zero unless raw.is_a?(Hash)
|
|
33
|
+
|
|
34
|
+
Usage.new(
|
|
35
|
+
input_tokens: raw["prompt_tokens"] || raw[:prompt_tokens] || 0,
|
|
36
|
+
output_tokens: raw["completion_tokens"] || raw[:completion_tokens] || 0,
|
|
37
|
+
total_tokens: raw["total_tokens"] || raw[:total_tokens]
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def parse_anthropic_usage(raw)
|
|
42
|
+
return Usage.zero unless raw.is_a?(Hash)
|
|
43
|
+
|
|
44
|
+
input = raw["input_tokens"] || raw[:input_tokens] || 0
|
|
45
|
+
output = raw["output_tokens"] || raw[:output_tokens] || 0
|
|
46
|
+
Usage.new(input_tokens: input, output_tokens: output)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def parse_openai_embed_usage(raw)
|
|
50
|
+
return Usage.zero unless raw.is_a?(Hash)
|
|
51
|
+
|
|
52
|
+
total = raw["total_tokens"] || raw[:total_tokens] || 0
|
|
53
|
+
Usage.new(input_tokens: 0, output_tokens: 0, total_tokens: total)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def record_usage(usage)
|
|
57
|
+
@last_usage = usage
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def instrumentation_payload(usage, content, extra = {})
|
|
61
|
+
usage.to_h.merge(response_chars: content.to_s.length).merge(extra)
|
|
62
|
+
end
|
|
21
63
|
end
|
|
22
64
|
end
|
|
23
65
|
end
|
data/lib/llmemory/llm/openai.rb
CHANGED
|
@@ -11,6 +11,7 @@ module Llmemory
|
|
|
11
11
|
DEFAULT_MODEL = "gpt-4"
|
|
12
12
|
|
|
13
13
|
def initialize(api_key: nil, model: nil, base_url: nil)
|
|
14
|
+
super()
|
|
14
15
|
@api_key = api_key || config.llm_api_key
|
|
15
16
|
@model = model || config.llm_model || DEFAULT_MODEL
|
|
16
17
|
@base_url = base_url || config.llm_base_url || DEFAULT_BASE_URL
|
|
@@ -18,7 +19,8 @@ module Llmemory
|
|
|
18
19
|
|
|
19
20
|
def invoke(prompt)
|
|
20
21
|
result = nil
|
|
21
|
-
|
|
22
|
+
payload = { provider: :openai, model: @model, prompt_chars: prompt.to_s.length }
|
|
23
|
+
Llmemory::Instrumentation.instrument(:llm_invoke, payload) do
|
|
22
24
|
response = connection.post("chat/completions") do |req|
|
|
23
25
|
req.body = {
|
|
24
26
|
model: @model,
|
|
@@ -32,7 +34,11 @@ module Llmemory
|
|
|
32
34
|
raise Llmemory::LLMError, "OpenAI API error: #{response.body}" unless response.success?
|
|
33
35
|
|
|
34
36
|
body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
|
|
35
|
-
|
|
37
|
+
content = body.dig("choices", 0, "message", "content")&.strip || ""
|
|
38
|
+
usage = parse_openai_chat_usage(body["usage"])
|
|
39
|
+
record_usage(usage)
|
|
40
|
+
payload.merge!(instrumentation_payload(usage, content))
|
|
41
|
+
result = Response.new(content, usage: usage)
|
|
36
42
|
end
|
|
37
43
|
result
|
|
38
44
|
end
|
|
@@ -54,18 +60,27 @@ module Llmemory
|
|
|
54
60
|
}
|
|
55
61
|
}
|
|
56
62
|
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
63
|
+
parsed = nil
|
|
64
|
+
instrument_payload = { provider: :openai, model: @model, prompt_chars: prompt.to_s.length }
|
|
65
|
+
Llmemory::Instrumentation.instrument(:llm_invoke, instrument_payload) do
|
|
66
|
+
response = connection.post("chat/completions") do |req|
|
|
67
|
+
req.body = payload.to_json
|
|
68
|
+
req.headers["Content-Type"] = "application/json"
|
|
69
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
70
|
+
end
|
|
62
71
|
|
|
63
|
-
|
|
72
|
+
raise Llmemory::LLMError, "OpenAI API error: #{response.body}" unless response.success?
|
|
73
|
+
|
|
74
|
+
body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
|
|
75
|
+
content = body.dig("choices", 0, "message", "content")&.strip
|
|
76
|
+
usage = parse_openai_chat_usage(body["usage"])
|
|
77
|
+
record_usage(usage)
|
|
78
|
+
instrument_payload.merge!(instrumentation_payload(usage, content.to_s))
|
|
79
|
+
return {} if content.nil? || content.empty?
|
|
64
80
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
JSON.parse(content)
|
|
81
|
+
parsed = JSON.parse(content)
|
|
82
|
+
end
|
|
83
|
+
parsed
|
|
69
84
|
rescue JSON::ParserError => e
|
|
70
85
|
raise Llmemory::LLMError, "Failed to parse JSON response: #{e.message}"
|
|
71
86
|
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Llmemory
|
|
4
|
+
module LLM
|
|
5
|
+
class Response
|
|
6
|
+
attr_reader :content, :usage
|
|
7
|
+
|
|
8
|
+
def initialize(content, usage: Usage.zero)
|
|
9
|
+
@content = content.to_s
|
|
10
|
+
@usage = usage
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_s
|
|
14
|
+
@content
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "usage_recorder"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
module LLM
|
|
7
|
+
# Transparent wrapper that records token usage to the per-user ledger.
|
|
8
|
+
class TrackingClient
|
|
9
|
+
def initialize(inner, user_id:, store: nil, api_key: nil)
|
|
10
|
+
@inner = inner
|
|
11
|
+
@user_id = user_id
|
|
12
|
+
@store = store
|
|
13
|
+
@api_key = api_key
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def invoke(prompt)
|
|
17
|
+
response = inner_client.invoke(prompt)
|
|
18
|
+
usage = if response.respond_to?(:usage)
|
|
19
|
+
response.usage
|
|
20
|
+
elsif inner_client.respond_to?(:last_usage)
|
|
21
|
+
inner_client.last_usage
|
|
22
|
+
else
|
|
23
|
+
Usage.zero
|
|
24
|
+
end
|
|
25
|
+
UsageRecorder.record(user_id: @user_id, usage: usage, operation: :invoke, store: @store)
|
|
26
|
+
response
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def invoke_with_json_schema(prompt, json_schema)
|
|
30
|
+
result = inner_client.invoke_with_json_schema(prompt, json_schema)
|
|
31
|
+
usage = inner_client.respond_to?(:last_usage) ? inner_client.last_usage : Usage.zero
|
|
32
|
+
UsageRecorder.record(user_id: @user_id, usage: usage, operation: :invoke, store: @store)
|
|
33
|
+
result
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def last_usage
|
|
37
|
+
return inner_client.last_usage if inner_client.respond_to?(:last_usage)
|
|
38
|
+
|
|
39
|
+
Usage.zero
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def respond_to?(method, include_private = false)
|
|
43
|
+
inner_client.respond_to?(method, include_private) || super
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def method_missing(method, *args, &block)
|
|
47
|
+
if inner_client.respond_to?(method)
|
|
48
|
+
inner_client.public_send(method, *args, &block)
|
|
49
|
+
else
|
|
50
|
+
super
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def inner_client
|
|
57
|
+
@inner_client ||= @inner || Llmemory::LLM.client(api_key: @api_key)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Llmemory
|
|
4
|
+
module LLM
|
|
5
|
+
class Usage
|
|
6
|
+
attr_reader :input_tokens, :output_tokens, :total_tokens
|
|
7
|
+
|
|
8
|
+
def initialize(input_tokens:, output_tokens:, total_tokens: nil)
|
|
9
|
+
@input_tokens = input_tokens.to_i
|
|
10
|
+
@output_tokens = output_tokens.to_i
|
|
11
|
+
@total_tokens = total_tokens.nil? ? (@input_tokens + @output_tokens) : total_tokens.to_i
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.zero
|
|
15
|
+
new(input_tokens: 0, output_tokens: 0, total_tokens: 0)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def +(other)
|
|
19
|
+
self.class.new(
|
|
20
|
+
input_tokens: @input_tokens + other.input_tokens,
|
|
21
|
+
output_tokens: @output_tokens + other.output_tokens,
|
|
22
|
+
total_tokens: @total_tokens + other.total_tokens
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_h
|
|
27
|
+
{ input_tokens: @input_tokens, output_tokens: @output_tokens, total_tokens: @total_tokens }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
require_relative "../short_term/stores"
|
|
5
|
+
|
|
6
|
+
module Llmemory
|
|
7
|
+
module LLM
|
|
8
|
+
# Cumulative LLM token usage per user, persisted in the short-term store
|
|
9
|
+
# under a pseudo-session key (same pattern as ForgetLog).
|
|
10
|
+
class UsageLedger
|
|
11
|
+
SESSION_KEY = "__llm_usage__"
|
|
12
|
+
|
|
13
|
+
def initialize(store: nil)
|
|
14
|
+
@store = store || ShortTerm::Stores.build
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def record(user_id, usage, operation:)
|
|
18
|
+
state = load_raw(user_id)
|
|
19
|
+
case operation.to_sym
|
|
20
|
+
when :invoke
|
|
21
|
+
bucket = symbolize_bucket(state[:invoke] || state["invoke"])
|
|
22
|
+
state = state.merge(
|
|
23
|
+
invoke: {
|
|
24
|
+
input_tokens: bucket[:input_tokens] + usage.input_tokens,
|
|
25
|
+
output_tokens: bucket[:output_tokens] + usage.output_tokens,
|
|
26
|
+
total_tokens: bucket[:total_tokens] + usage.total_tokens,
|
|
27
|
+
calls: bucket[:calls] + 1
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
when :embed
|
|
31
|
+
bucket = symbolize_bucket(state[:embed] || state["embed"], embed: true)
|
|
32
|
+
state = state.merge(
|
|
33
|
+
embed: {
|
|
34
|
+
total_tokens: bucket[:total_tokens] + usage.total_tokens,
|
|
35
|
+
calls: bucket[:calls] + 1
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
else
|
|
39
|
+
return totals(user_id)
|
|
40
|
+
end
|
|
41
|
+
state[:updated_at] = Time.now.iso8601
|
|
42
|
+
@store.save(user_id, SESSION_KEY, stringify(state))
|
|
43
|
+
totals(user_id)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def totals(user_id)
|
|
47
|
+
normalize(load_raw(user_id))
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def reset!(user_id)
|
|
51
|
+
empty = default_state
|
|
52
|
+
@store.save(user_id, SESSION_KEY, stringify(empty))
|
|
53
|
+
empty
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.format_text(totals)
|
|
57
|
+
inv = totals[:invoke]
|
|
58
|
+
emb = totals[:embed]
|
|
59
|
+
lines = [
|
|
60
|
+
"LLM TOKEN USAGE:",
|
|
61
|
+
" Chat/completions: #{inv[:total_tokens]} total (#{inv[:input_tokens]} in, #{inv[:output_tokens]} out, #{inv[:calls]} calls)",
|
|
62
|
+
" Embeddings: #{emb[:total_tokens]} total (#{emb[:calls]} calls)"
|
|
63
|
+
]
|
|
64
|
+
lines << " Last updated: #{totals[:updated_at]}" if totals[:updated_at]
|
|
65
|
+
lines.join("\n")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def load_raw(user_id)
|
|
71
|
+
state = @store.load(user_id, SESSION_KEY)
|
|
72
|
+
return default_state unless state.is_a?(Hash)
|
|
73
|
+
normalize(state)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def default_state
|
|
77
|
+
{
|
|
78
|
+
invoke: { input_tokens: 0, output_tokens: 0, total_tokens: 0, calls: 0 },
|
|
79
|
+
embed: { total_tokens: 0, calls: 0 },
|
|
80
|
+
updated_at: nil
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def normalize(state)
|
|
85
|
+
invoke = symbolize_bucket(state[:invoke] || state["invoke"])
|
|
86
|
+
embed = symbolize_bucket(state[:embed] || state["embed"], embed: true)
|
|
87
|
+
{
|
|
88
|
+
invoke: invoke,
|
|
89
|
+
embed: embed,
|
|
90
|
+
updated_at: state[:updated_at] || state["updated_at"]
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def symbolize_bucket(bucket, embed: false)
|
|
95
|
+
bucket = {} unless bucket.is_a?(Hash)
|
|
96
|
+
if embed
|
|
97
|
+
{
|
|
98
|
+
total_tokens: (bucket[:total_tokens] || bucket["total_tokens"] || 0).to_i,
|
|
99
|
+
calls: (bucket[:calls] || bucket["calls"] || 0).to_i
|
|
100
|
+
}
|
|
101
|
+
else
|
|
102
|
+
{
|
|
103
|
+
input_tokens: (bucket[:input_tokens] || bucket["input_tokens"] || 0).to_i,
|
|
104
|
+
output_tokens: (bucket[:output_tokens] || bucket["output_tokens"] || 0).to_i,
|
|
105
|
+
total_tokens: (bucket[:total_tokens] || bucket["total_tokens"] || 0).to_i,
|
|
106
|
+
calls: (bucket[:calls] || bucket["calls"] || 0).to_i
|
|
107
|
+
}
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def stringify(state)
|
|
112
|
+
state.transform_keys(&:to_s).transform_values do |v|
|
|
113
|
+
v.is_a?(Hash) ? v.transform_keys(&:to_s) : v
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "usage_ledger"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
module LLM
|
|
7
|
+
module UsageRecorder
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def record(user_id:, usage:, operation:, store: nil)
|
|
11
|
+
return if user_id.nil? || user_id.to_s.empty?
|
|
12
|
+
return if usage.nil?
|
|
13
|
+
|
|
14
|
+
UsageLedger.new(store: store).record(user_id, usage, operation: operation)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def record_embed_from_store(user_id:, vector_store:, store: nil)
|
|
18
|
+
usage = embed_usage_from(vector_store)
|
|
19
|
+
return unless usage
|
|
20
|
+
|
|
21
|
+
record(user_id: user_id, usage: usage, operation: :embed, store: store)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def embed_usage_from(vector_store)
|
|
25
|
+
return nil unless vector_store
|
|
26
|
+
|
|
27
|
+
if vector_store.respond_to?(:last_usage)
|
|
28
|
+
usage = vector_store.last_usage
|
|
29
|
+
return usage unless usage.nil?
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
provider = vector_store.instance_variable_get(:@embedding_provider) if vector_store.instance_variable_defined?(:@embedding_provider)
|
|
33
|
+
provider&.last_usage if provider&.respond_to?(:last_usage)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/llmemory/llm.rb
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "llm/base"
|
|
4
|
+
require_relative "llm/usage"
|
|
5
|
+
require_relative "llm/response"
|
|
6
|
+
require_relative "llm/usage_ledger"
|
|
7
|
+
require_relative "llm/usage_recorder"
|
|
8
|
+
require_relative "llm/tracking_client"
|
|
4
9
|
require_relative "llm/openai"
|
|
5
10
|
require_relative "llm/anthropic"
|
|
6
11
|
|
|
@@ -135,6 +135,7 @@ module Llmemory
|
|
|
135
135
|
vs = vector_store
|
|
136
136
|
return if vs.nil? || text.to_s.strip.empty?
|
|
137
137
|
embedding = vs.embed(text)
|
|
138
|
+
record_embed_usage(vs)
|
|
138
139
|
return unless embedding
|
|
139
140
|
vs.store(id: id, embedding: embedding, metadata: { text: text, created_at: Time.now }, user_id: @user_id)
|
|
140
141
|
rescue StandardError
|
|
@@ -142,7 +143,9 @@ module Llmemory
|
|
|
142
143
|
end
|
|
143
144
|
|
|
144
145
|
def vector_candidates(query, top_k, vs)
|
|
145
|
-
vs.search_by_text(query.to_s, top_k: top_k, user_id: @user_id)
|
|
146
|
+
results = vs.search_by_text(query.to_s, top_k: top_k, user_id: @user_id)
|
|
147
|
+
record_embed_usage(vs)
|
|
148
|
+
results.filter_map do |r|
|
|
146
149
|
raw = @storage.get_episode(@user_id, r[:id] || r["id"])
|
|
147
150
|
raw && candidate_for(raw, (r[:score] || r["score"] || 1.0).to_f)
|
|
148
151
|
end
|
|
@@ -183,6 +186,14 @@ module Llmemory
|
|
|
183
186
|
return nil if actions.empty?
|
|
184
187
|
"Episode with #{normalized.size} step(s): #{actions.join(' -> ')}"
|
|
185
188
|
end
|
|
189
|
+
|
|
190
|
+
def record_embed_usage(vector_store)
|
|
191
|
+
Llmemory::LLM::UsageRecorder.record_embed_from_store(
|
|
192
|
+
user_id: @user_id,
|
|
193
|
+
vector_store: vector_store,
|
|
194
|
+
store: Llmemory::ShortTerm::Stores.build(cipher: @cipher)
|
|
195
|
+
)
|
|
196
|
+
end
|
|
186
197
|
end
|
|
187
198
|
end
|
|
188
199
|
end
|
|
@@ -162,6 +162,7 @@ module Llmemory
|
|
|
162
162
|
|
|
163
163
|
edge_text = "#{subject} #{predicate} #{object}"
|
|
164
164
|
embedding = @vector_store.respond_to?(:embed) ? @vector_store.embed(edge_text) : nil
|
|
165
|
+
record_embed_usage(@vector_store) if embedding
|
|
165
166
|
if embedding && @vector_store.respond_to?(:store)
|
|
166
167
|
@vector_store.store(id: edge_id, embedding: embedding, metadata: { text: edge_text, created_at: Time.now }, user_id: @user_id)
|
|
167
168
|
end
|
|
@@ -172,8 +173,10 @@ module Llmemory
|
|
|
172
173
|
vector_results = []
|
|
173
174
|
if @vector_store.respond_to?(:search_by_text)
|
|
174
175
|
vector_results = @vector_store.search_by_text(query.to_s, top_k: top_k, user_id: @user_id)
|
|
176
|
+
record_embed_usage(@vector_store)
|
|
175
177
|
elsif @vector_store.respond_to?(:embed) && @vector_store.respond_to?(:search)
|
|
176
178
|
emb = @vector_store.embed(query.to_s)
|
|
179
|
+
record_embed_usage(@vector_store)
|
|
177
180
|
vector_results = @vector_store.search(emb, top_k: top_k, user_id: @user_id)
|
|
178
181
|
end
|
|
179
182
|
|
|
@@ -231,6 +234,16 @@ module Llmemory
|
|
|
231
234
|
lines << "=== END MEMORIES ==="
|
|
232
235
|
lines.join("\n")
|
|
233
236
|
end
|
|
237
|
+
|
|
238
|
+
def record_embed_usage(vector_store)
|
|
239
|
+
return unless vector_store
|
|
240
|
+
|
|
241
|
+
Llmemory::LLM::UsageRecorder.record_embed_from_store(
|
|
242
|
+
user_id: @user_id,
|
|
243
|
+
vector_store: vector_store,
|
|
244
|
+
store: Llmemory::ShortTerm::Stores.build(cipher: @cipher)
|
|
245
|
+
)
|
|
246
|
+
end
|
|
234
247
|
end
|
|
235
248
|
end
|
|
236
249
|
end
|
|
@@ -141,6 +141,7 @@ module Llmemory
|
|
|
141
141
|
vs = vector_store
|
|
142
142
|
return if vs.nil? || text.to_s.strip.empty?
|
|
143
143
|
embedding = vs.embed(text)
|
|
144
|
+
record_embed_usage(vs)
|
|
144
145
|
return unless embedding
|
|
145
146
|
vs.store(id: id, embedding: embedding, metadata: { text: text, created_at: Time.now }, user_id: @user_id)
|
|
146
147
|
rescue StandardError
|
|
@@ -148,7 +149,9 @@ module Llmemory
|
|
|
148
149
|
end
|
|
149
150
|
|
|
150
151
|
def vector_candidates(query, top_k, vs)
|
|
151
|
-
vs.search_by_text(query.to_s, top_k: top_k, user_id: @user_id)
|
|
152
|
+
results = vs.search_by_text(query.to_s, top_k: top_k, user_id: @user_id)
|
|
153
|
+
record_embed_usage(vs)
|
|
154
|
+
results.filter_map do |r|
|
|
152
155
|
raw = @storage.get_skill(@user_id, r[:id] || r["id"])
|
|
153
156
|
raw && candidate_for(raw, (r[:score] || r["score"] || 1.0).to_f)
|
|
154
157
|
end
|
|
@@ -178,6 +181,14 @@ module Llmemory
|
|
|
178
181
|
end
|
|
179
182
|
by_id.values.sort_by { |c| -c[:score].to_f }.first(top_k)
|
|
180
183
|
end
|
|
184
|
+
|
|
185
|
+
def record_embed_usage(vector_store)
|
|
186
|
+
Llmemory::LLM::UsageRecorder.record_embed_from_store(
|
|
187
|
+
user_id: @user_id,
|
|
188
|
+
vector_store: vector_store,
|
|
189
|
+
store: Llmemory::ShortTerm::Stores.build(cipher: @cipher)
|
|
190
|
+
)
|
|
191
|
+
end
|
|
181
192
|
end
|
|
182
193
|
end
|
|
183
194
|
end
|
|
@@ -52,6 +52,8 @@ module Llmemory
|
|
|
52
52
|
stats[:long_term] = { error: e.message }
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
stats[:llm_usage] = Llmemory::LLM::UsageLedger.new(store: store).totals(user_id)
|
|
56
|
+
|
|
55
57
|
::MCP::Tool::Response.new([{
|
|
56
58
|
type: "text",
|
|
57
59
|
text: format_stats(stats)
|
|
@@ -102,8 +104,19 @@ module Llmemory
|
|
|
102
104
|
output << " Resources: #{stats[:long_term][:resources]}"
|
|
103
105
|
end
|
|
104
106
|
|
|
107
|
+
output << ""
|
|
108
|
+
output << Llmemory::LLM::UsageLedger.format_text(stats[:llm_usage] || default_llm_usage)
|
|
109
|
+
|
|
105
110
|
output.join("\n")
|
|
106
111
|
end
|
|
112
|
+
|
|
113
|
+
def default_llm_usage
|
|
114
|
+
{
|
|
115
|
+
invoke: { input_tokens: 0, output_tokens: 0, total_tokens: 0, calls: 0 },
|
|
116
|
+
embed: { total_tokens: 0, calls: 0 },
|
|
117
|
+
updated_at: nil
|
|
118
|
+
}
|
|
119
|
+
end
|
|
107
120
|
end
|
|
108
121
|
end
|
|
109
122
|
end
|
data/lib/llmemory/memory.rb
CHANGED
|
@@ -15,22 +15,28 @@ module Llmemory
|
|
|
15
15
|
@session_id = session_id
|
|
16
16
|
resolved_key = encryption_key == :inherit ? nil : encryption_key
|
|
17
17
|
@cipher = Llmemory.build_cipher(resolved_key)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
if checkpoint
|
|
19
|
+
@checkpoint = checkpoint
|
|
20
|
+
@short_term_store = checkpoint.store
|
|
21
|
+
else
|
|
22
|
+
@short_term_store = build_short_term_store(@cipher)
|
|
23
|
+
@checkpoint = ShortTerm::Checkpoint.new(
|
|
24
|
+
user_id: user_id,
|
|
25
|
+
session_id: session_id,
|
|
26
|
+
store: @short_term_store,
|
|
27
|
+
cipher: @cipher
|
|
28
|
+
)
|
|
29
|
+
end
|
|
23
30
|
@working_memory = working_memory
|
|
24
31
|
@episodic = episodic
|
|
25
32
|
@procedural = procedural
|
|
26
|
-
@
|
|
33
|
+
@api_key = api_key unless api_key.to_s.empty?
|
|
27
34
|
type = long_term_type || Llmemory.configuration.long_term_type || :file_based
|
|
28
35
|
@long_term = long_term || build_long_term(type)
|
|
29
|
-
short_term_store = build_short_term_store(@cipher)
|
|
30
36
|
@retrieval_engine = retrieval_engine || Retrieval::Engine.new(
|
|
31
37
|
@long_term,
|
|
32
|
-
llm:
|
|
33
|
-
feedback: Retrieval::FeedbackStore.new(store: short_term_store)
|
|
38
|
+
llm: tracked_llm_client,
|
|
39
|
+
feedback: Retrieval::FeedbackStore.new(store: @short_term_store)
|
|
34
40
|
)
|
|
35
41
|
end
|
|
36
42
|
|
|
@@ -66,14 +72,14 @@ module Llmemory
|
|
|
66
72
|
# Reflects over recent episodes and writes distilled insights to the
|
|
67
73
|
# semantic store (file/graph) with provenance back to source episodes.
|
|
68
74
|
def reflect!(window: 10, category: "insights")
|
|
69
|
-
Reflection::Reflector.new(episodic: episodic, semantic: @long_term, llm:
|
|
75
|
+
Reflection::Reflector.new(episodic: episodic, semantic: @long_term, llm: tracked_llm_client)
|
|
70
76
|
.reflect(window: window, category: category)
|
|
71
77
|
end
|
|
72
78
|
|
|
73
79
|
# Reasoning action: render a prompt from working memory, call the LLM, write
|
|
74
80
|
# the result back. Composable; does not touch long-term memory.
|
|
75
81
|
def reason(template:, into: Actions::Reason::DEFAULT_SLOT, parse: nil)
|
|
76
|
-
Actions::Reason.call(working_memory: working_memory, template: template, into: into, parse: parse, llm:
|
|
82
|
+
Actions::Reason.call(working_memory: working_memory, template: template, into: into, parse: parse, llm: tracked_llm_client)
|
|
77
83
|
end
|
|
78
84
|
|
|
79
85
|
# Mines recent episodes for reusable skills (Voyager-style). Human-in-the-loop
|
|
@@ -81,7 +87,7 @@ module Llmemory
|
|
|
81
87
|
# `auto_register: true`, registers them in procedural memory (with provenance
|
|
82
88
|
# back to the source episodes) and returns the new skill ids.
|
|
83
89
|
def mine_skills!(window: SkillMining::Miner::DEFAULT_WINDOW, outcomes: nil, auto_register: false)
|
|
84
|
-
SkillMining::Miner.new(episodic: episodic, procedural: procedural, llm:
|
|
90
|
+
SkillMining::Miner.new(episodic: episodic, procedural: procedural, llm: tracked_llm_client)
|
|
85
91
|
.mine(window: window, outcomes: outcomes, auto_register: auto_register)
|
|
86
92
|
end
|
|
87
93
|
|
|
@@ -91,7 +97,7 @@ module Llmemory
|
|
|
91
97
|
def maintain!(**opts)
|
|
92
98
|
Maintenance::CognitivePass.run!(
|
|
93
99
|
@user_id,
|
|
94
|
-
memory: self, episodic: episodic, procedural: procedural, semantic: @long_term, llm:
|
|
100
|
+
memory: self, episodic: episodic, procedural: procedural, semantic: @long_term, llm: tracked_llm_client,
|
|
95
101
|
**opts
|
|
96
102
|
)
|
|
97
103
|
end
|
|
@@ -245,6 +251,10 @@ module Llmemory
|
|
|
245
251
|
@user_id
|
|
246
252
|
end
|
|
247
253
|
|
|
254
|
+
def llm_usage
|
|
255
|
+
Llmemory::LLM::UsageLedger.new(store: @short_term_store).totals(@user_id)
|
|
256
|
+
end
|
|
257
|
+
|
|
248
258
|
private
|
|
249
259
|
|
|
250
260
|
def summarize_messages(msgs)
|
|
@@ -263,7 +273,16 @@ module Llmemory
|
|
|
263
273
|
end
|
|
264
274
|
|
|
265
275
|
def llm_client
|
|
266
|
-
|
|
276
|
+
tracked_llm_client
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def tracked_llm_client
|
|
280
|
+
@tracked_llm_client ||= Llmemory::LLM::TrackingClient.new(
|
|
281
|
+
nil,
|
|
282
|
+
user_id: @user_id,
|
|
283
|
+
store: @short_term_store,
|
|
284
|
+
api_key: @api_key
|
|
285
|
+
)
|
|
267
286
|
end
|
|
268
287
|
|
|
269
288
|
def flush_memory_before_compaction!(msgs)
|
|
@@ -339,7 +358,7 @@ module Llmemory
|
|
|
339
358
|
end
|
|
340
359
|
|
|
341
360
|
def build_long_term(long_term_type)
|
|
342
|
-
llm_opts =
|
|
361
|
+
llm_opts = { llm: tracked_llm_client }
|
|
343
362
|
case long_term_type.to_s.to_sym
|
|
344
363
|
when :graph_based
|
|
345
364
|
LongTerm::GraphBased::Memory.new(
|
|
@@ -26,6 +26,12 @@ module Llmemory
|
|
|
26
26
|
@embedding_provider.embed(text)
|
|
27
27
|
end
|
|
28
28
|
|
|
29
|
+
def last_usage
|
|
30
|
+
return @embedding_provider.last_usage if @embedding_provider&.respond_to?(:last_usage)
|
|
31
|
+
|
|
32
|
+
Llmemory::LLM::Usage.zero
|
|
33
|
+
end
|
|
34
|
+
|
|
29
35
|
def store(id:, embedding:, metadata: {}, user_id: nil)
|
|
30
36
|
return id if user_id.nil? || user_id.to_s.empty?
|
|
31
37
|
text_content = (metadata || {}).dig("text") || (metadata || {}).dig(:text)
|
|
@@ -16,6 +16,12 @@ module Llmemory
|
|
|
16
16
|
@embedding_provider.embed(text)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
+
def last_usage
|
|
20
|
+
return @embedding_provider.last_usage if @embedding_provider&.respond_to?(:last_usage)
|
|
21
|
+
|
|
22
|
+
Llmemory::LLM::Usage.zero
|
|
23
|
+
end
|
|
24
|
+
|
|
19
25
|
def store(id:, embedding:, metadata: {}, user_id: nil)
|
|
20
26
|
key = user_id ? "#{user_id}:#{id}" : id.to_s
|
|
21
27
|
meta = (metadata || {}).dup
|
|
@@ -4,6 +4,7 @@ require "faraday"
|
|
|
4
4
|
require "json"
|
|
5
5
|
require "digest"
|
|
6
6
|
require_relative "base"
|
|
7
|
+
require_relative "../llm/usage"
|
|
7
8
|
|
|
8
9
|
module Llmemory
|
|
9
10
|
module VectorStore
|
|
@@ -11,11 +12,14 @@ module Llmemory
|
|
|
11
12
|
DEFAULT_MODEL = "text-embedding-3-small"
|
|
12
13
|
DEFAULT_DIMS = 1536
|
|
13
14
|
|
|
15
|
+
attr_reader :last_usage
|
|
16
|
+
|
|
14
17
|
def initialize(api_key: nil, model: nil)
|
|
15
18
|
@api_key = api_key || Llmemory.configuration.llm_api_key
|
|
16
19
|
@model = model || DEFAULT_MODEL
|
|
17
20
|
@cache = {}
|
|
18
21
|
@cache_order = []
|
|
22
|
+
@last_usage = Llmemory::LLM::Usage.zero
|
|
19
23
|
end
|
|
20
24
|
|
|
21
25
|
def embed(text)
|
|
@@ -23,7 +27,10 @@ module Llmemory
|
|
|
23
27
|
|
|
24
28
|
if Llmemory.configuration.embedding_cache_enabled
|
|
25
29
|
key = cache_key(text)
|
|
26
|
-
|
|
30
|
+
if @cache.key?(key)
|
|
31
|
+
@last_usage = Llmemory::LLM::Usage.zero
|
|
32
|
+
return @cache[key].dup
|
|
33
|
+
end
|
|
27
34
|
end
|
|
28
35
|
|
|
29
36
|
result = fetch_embedding(text)
|
|
@@ -55,7 +62,8 @@ module Llmemory
|
|
|
55
62
|
|
|
56
63
|
def fetch_embedding(text)
|
|
57
64
|
result = nil
|
|
58
|
-
|
|
65
|
+
payload = { provider: :openai, model: @model, text_chars: text.to_s.length }
|
|
66
|
+
Llmemory::Instrumentation.instrument(:llm_embed, payload) do
|
|
59
67
|
response = connection.post("embeddings") do |req|
|
|
60
68
|
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
61
69
|
req.headers["Content-Type"] = "application/json"
|
|
@@ -63,11 +71,24 @@ module Llmemory
|
|
|
63
71
|
end
|
|
64
72
|
raise Llmemory::LLMError, "OpenAI Embeddings API error: #{response.body}" unless response.success?
|
|
65
73
|
body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
|
|
74
|
+
@last_usage = parse_embed_usage(body["usage"])
|
|
75
|
+
payload.merge!(
|
|
76
|
+
input_tokens: @last_usage.input_tokens,
|
|
77
|
+
output_tokens: @last_usage.output_tokens,
|
|
78
|
+
total_tokens: @last_usage.total_tokens
|
|
79
|
+
)
|
|
66
80
|
result = body.dig("data", 0, "embedding")&.map(&:to_f) || Array.new(DEFAULT_DIMS, 0.0)
|
|
67
81
|
end
|
|
68
82
|
result
|
|
69
83
|
end
|
|
70
84
|
|
|
85
|
+
def parse_embed_usage(raw)
|
|
86
|
+
return Llmemory::LLM::Usage.zero unless raw.is_a?(Hash)
|
|
87
|
+
|
|
88
|
+
total = raw["total_tokens"] || raw[:total_tokens] || 0
|
|
89
|
+
Llmemory::LLM::Usage.new(input_tokens: 0, output_tokens: 0, total_tokens: total)
|
|
90
|
+
end
|
|
91
|
+
|
|
71
92
|
def connection
|
|
72
93
|
@connection ||= Faraday.new(url: "https://api.openai.com/v1") do |f|
|
|
73
94
|
f.request :json
|
data/lib/llmemory/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llmemory
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- llmemory
|
|
@@ -175,6 +175,11 @@ files:
|
|
|
175
175
|
- lib/llmemory/llm/anthropic.rb
|
|
176
176
|
- lib/llmemory/llm/base.rb
|
|
177
177
|
- lib/llmemory/llm/openai.rb
|
|
178
|
+
- lib/llmemory/llm/response.rb
|
|
179
|
+
- lib/llmemory/llm/tracking_client.rb
|
|
180
|
+
- lib/llmemory/llm/usage.rb
|
|
181
|
+
- lib/llmemory/llm/usage_ledger.rb
|
|
182
|
+
- lib/llmemory/llm/usage_recorder.rb
|
|
178
183
|
- lib/llmemory/long_term.rb
|
|
179
184
|
- lib/llmemory/long_term/episodic.rb
|
|
180
185
|
- lib/llmemory/long_term/episodic/episode.rb
|