llmemory 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +65 -1
  3. data/lib/llmemory/cli/commands/stats.rb +5 -0
  4. data/lib/llmemory/configuration.rb +22 -2
  5. data/lib/llmemory/crypto/cipher.rb +147 -0
  6. data/lib/llmemory/crypto/field_helpers.rb +110 -0
  7. data/lib/llmemory/instrumentation.rb +4 -2
  8. data/lib/llmemory/llm/anthropic.rb +10 -4
  9. data/lib/llmemory/llm/base.rb +42 -0
  10. data/lib/llmemory/llm/openai.rb +29 -13
  11. data/lib/llmemory/llm/response.rb +18 -0
  12. data/lib/llmemory/llm/tracking_client.rb +61 -0
  13. data/lib/llmemory/llm/usage.rb +31 -0
  14. data/lib/llmemory/llm/usage_ledger.rb +118 -0
  15. data/lib/llmemory/llm/usage_recorder.rb +37 -0
  16. data/lib/llmemory/llm.rb +5 -0
  17. data/lib/llmemory/long_term/episodic/memory.rb +16 -4
  18. data/lib/llmemory/long_term/episodic/storage.rb +11 -4
  19. data/lib/llmemory/long_term/episodic/storages/active_record_storage.rb +19 -6
  20. data/lib/llmemory/long_term/episodic/storages/database_storage.rb +25 -3
  21. data/lib/llmemory/long_term/episodic/storages/file_storage.rb +22 -5
  22. data/lib/llmemory/long_term/file_based/storage.rb +11 -4
  23. data/lib/llmemory/long_term/file_based/storages/active_record_storage.rb +16 -10
  24. data/lib/llmemory/long_term/file_based/storages/database_storage.rb +24 -8
  25. data/lib/llmemory/long_term/file_based/storages/file_storage.rb +28 -14
  26. data/lib/llmemory/long_term/graph_based/memory.rb +17 -3
  27. data/lib/llmemory/long_term/graph_based/storage.rb +3 -2
  28. data/lib/llmemory/long_term/graph_based/storages/active_record_storage.rb +47 -21
  29. data/lib/llmemory/long_term/procedural/memory.rb +16 -4
  30. data/lib/llmemory/long_term/procedural/storage.rb +11 -4
  31. data/lib/llmemory/long_term/procedural/storages/active_record_storage.rb +33 -13
  32. data/lib/llmemory/long_term/procedural/storages/database_storage.rb +25 -4
  33. data/lib/llmemory/long_term/procedural/storages/file_storage.rb +23 -6
  34. data/lib/llmemory/mcp/tools/memory_stats.rb +13 -0
  35. data/lib/llmemory/memory.rb +66 -15
  36. data/lib/llmemory/short_term/checkpoint.rb +5 -2
  37. data/lib/llmemory/short_term/stores/active_record_store.rb +12 -10
  38. data/lib/llmemory/short_term/stores/memory_store.rb +1 -1
  39. data/lib/llmemory/short_term/stores/postgres_store.rb +11 -5
  40. data/lib/llmemory/short_term/stores/redis_store.rb +7 -5
  41. data/lib/llmemory/short_term/stores.rb +7 -6
  42. data/lib/llmemory/vector_store/active_record_store.rb +30 -3
  43. data/lib/llmemory/vector_store/memory_store.rb +29 -3
  44. data/lib/llmemory/vector_store/openai_embeddings.rb +23 -2
  45. data/lib/llmemory/vector_store.rb +4 -3
  46. data/lib/llmemory/version.rb +1 -1
  47. data/lib/llmemory.rb +2 -0
  48. metadata +8 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fdcf202249038554cae18d79da76c261a9c7a80687081126ce985562ef8607ae
4
- data.tar.gz: 9b28b0ba29d4444712c2592a808f6b08bbf38f804c031e6b8b246914f7b86699
3
+ metadata.gz: 296b9d61d6c474145ecaa607653b37438b2491c846aac602f65d5fd850dae9ef
4
+ data.tar.gz: 521fd05b577c6c17a7dbc5d3771ff9fb3f7cddeaeef31938efabdcfd40db74a5
5
5
  SHA512:
6
- metadata.gz: 4bddb0f7e9a4bfe6cfd488a341efce98ab4194c36fd6ebcb22b024db338224d0191e3bfb5bbc9638ad43092cac66bb29d159e87d81b97f6d17c8ee82b400c716
7
- data.tar.gz: 8e5fb5edddabce0b1b57903282bb868a076d1595c4d187c973ccb7a7fa61f11847c9889e185459438423553aee0bf52f216e3bbdf0d9ac870f9f8196c230c1f1
6
+ metadata.gz: edf6ee6b41cb366f2ddef5ac2deec9c3c03090d4920d2386f9578e31999d135b34ad7f995ee9a9e803cf43627f969a3fab9ee191c68e2362a4d62fa8bade0729
7
+ data.tar.gz: 83266918faeb2bb4f7b57e89007b642283d7d64600bf83513948d5378a625fa911aefa6823c54115961bafe34dfdd147f4fb297a8226caad49aaceb6a01372e0
data/README.md CHANGED
@@ -51,6 +51,39 @@ memory.clear_session!
51
51
  - **`prune!(mode: nil)`** — Prunes oversized tool results (soft-trim or hard-clear). Only when `prune_tool_results_enabled` is true.
52
52
  - **`check_context_window!`** — Triggers consolidate and compact when context exceeds configured thresholds.
53
53
  - **`clear_session!`** — Clears short-term only.
54
+ - **`llm_usage`** — Returns cumulative LLM token usage for this `user_id` (chat/completions + embeddings), persisted in the short-term store.
55
+
56
+ ## LLM token usage
57
+
58
+ llmemory captures **real token counts** from OpenAI and Anthropic API responses (chat and embeddings), accumulates them per `user_id`, and exposes them for cost monitoring.
59
+
60
+ ```ruby
61
+ memory = Llmemory::Memory.new(user_id: "user_123")
62
+ memory.consolidate!
63
+ memory.maintain!
64
+
65
+ usage = memory.llm_usage
66
+ # => {
67
+ # invoke: { input_tokens: 1200, output_tokens: 400, total_tokens: 1600, calls: 3 },
68
+ # embed: { total_tokens: 48, calls: 2 },
69
+ # updated_at: "2026-07-02T12:00:00Z"
70
+ # }
71
+ ```
72
+
73
+ | What | Details |
74
+ |------|---------|
75
+ | **Counted** | `consolidate!`, reflection, skill mining, compaction summaries, iterative retrieval, graph/file extraction, OpenAI embeddings (index + search) |
76
+ | **Scope** | Cumulative per `user_id` (not per session); stored under pseudo-session `__llm_usage__` |
77
+ | **Not counted** | `context_tokens` (local byte estimate), retrieval context budget, MCP auth tokens |
78
+ | **Cache** | Embedding cache hits record zero tokens |
79
+
80
+ **Other surfaces:**
81
+
82
+ - **CLI:** `llmemory stats USER_ID` prints an `LLM TOKEN USAGE` section.
83
+ - **MCP:** `memory_stats` includes the same totals.
84
+ - **Rails metrics:** subscribe to `llm_invoke.llmemory` and `llm_embed.llmemory` (payload includes `input_tokens`, `output_tokens`, `total_tokens`, `response_chars`).
85
+
86
+ Dollar cost is not computed — multiply tokens by your model pricing externally. For lower-level access, `Llmemory::LLM::OpenAI#invoke` returns a `Response` with `#content` (via `#to_s`) and `#usage`.
54
87
 
55
88
  ## Configuration
56
89
 
@@ -65,6 +98,12 @@ Llmemory.configure do |config|
65
98
  config.long_term_store = :memory # or :file, :postgres, :active_record
66
99
  config.long_term_storage_path = "./llmemory_data" # for :file
67
100
  config.database_url = ENV["DATABASE_URL"] # for :postgres
101
+
102
+ # Optional encryption at rest (AES-256-GCM). Requires a key; isolates data
103
+ # cryptographically per key (e.g. per agent/user). See "Encryption at rest".
104
+ config.encryption_enabled = false
105
+ config.encryption_key = ENV["LLMEMORY_ENCRYPTION_KEY"]
106
+
68
107
  config.time_decay_half_life_days = 30
69
108
  config.max_retrieval_tokens = 2000
70
109
  config.prune_after_days = 90
@@ -112,6 +151,31 @@ Llmemory.configure do |config|
112
151
  end
113
152
  ```
114
153
 
154
+ ## Encryption at rest
155
+
156
+ Optional AES-256-GCM encryption protects persisted memory. Without the key, stored data is unreadable — useful for isolating agents or tenants.
157
+
158
+ ```ruby
159
+ # Global default key (applies to all Memory instances)
160
+ Llmemory.configure do |config|
161
+ config.encryption_enabled = true
162
+ config.encryption_key = ENV["LLMEMORY_ENCRYPTION_KEY"]
163
+ end
164
+
165
+ memory = Llmemory::Memory.new(user_id: "agent-1")
166
+
167
+ # Per-instance key override (isolates this agent even if global config differs)
168
+ memory = Llmemory::Memory.new(user_id: "agent-1", encryption_key: "tenant-specific-secret")
169
+ ```
170
+
171
+ **What is encrypted:** conversation checkpoints (redis/postgres/active_record), file-based facts/resources/categories, episodic/procedural documents, graph node names/types/predicates (deterministic) and properties (random IV). **Vector embeddings are not encrypted** (required for pgvector search); associated `text_content` metadata is encrypted.
172
+
173
+ **Trade-offs:**
174
+ - Database keyword search (`LIKE`, BM25 on encrypted columns) no longer works on ciphertext; file backends still search in memory after decrypt.
175
+ - `:memory` backends are in-process only and are **not** encrypted at rest.
176
+ - Existing plaintext data remains readable (markers `enc:v1:` / `encd:v1:`); new writes are encrypted when enabled.
177
+ - Deterministic encryption on graph identifiers leaks equality (same name ⇒ same ciphertext) but keeps graph traversal working.
178
+
115
179
  ## Long-Term Storage
116
180
 
117
181
  Long-term memory can use different backends:
@@ -654,7 +718,7 @@ MCP_TOKEN=your-secret-token llmemory mcp serve --http --port 443 \
654
718
  | `memory_timeline_context` | Get N items before/after a specific memory |
655
719
  | `memory_add_message` | Add message to short-term conversation (roles: user, assistant, system, tool, tool_result) |
656
720
  | `memory_consolidate` | Extract facts from conversation to long-term |
657
- | `memory_stats` | Get memory statistics for a user |
721
+ | `memory_stats` | Get memory statistics for a user (includes LLM token usage) |
658
722
  | `memory_info` | Documentation on how to use the tools |
659
723
  | `memory_episode_record` / `memory_episodes` | Record / list episodic trajectories |
660
724
  | `memory_skill_register` / `memory_skill_report` / `memory_skills` | Register / outcome-track / list procedural skills |
@@ -41,6 +41,11 @@ module Llmemory
41
41
  puts "Long-term (file) categories: #{storage.list_categories(user_id).size}"
42
42
  puts "Long-term (file) resources: #{storage.list_resources(user_id: user_id).size}"
43
43
  end
44
+
45
+ puts "---"
46
+ puts Llmemory::LLM::UsageLedger.format_text(
47
+ Llmemory::LLM::UsageLedger.new(store: short_store).totals(user_id)
48
+ )
44
49
  end
45
50
 
46
51
  def print_global_stats(short_store, long_type)
@@ -48,12 +48,14 @@ module Llmemory
48
48
  :message_sanitizer_enabled,
49
49
  :ttl_episodic_days,
50
50
  :ttl_procedural_days,
51
- :skill_mining_enabled
51
+ :skill_mining_enabled,
52
+ :encryption_enabled,
53
+ :encryption_key
52
54
 
53
55
  def initialize
54
56
  @llm_provider = :openai
55
57
  @llm_api_key = ENV["OPENAI_API_KEY"]
56
- @llm_model = "gpt-4"
58
+ @llm_model = nil # falls back to the active provider's DEFAULT_MODEL
57
59
  @llm_base_url = nil
58
60
  @short_term_store = :memory
59
61
  @redis_url = ENV["REDIS_URL"] || "redis://localhost:6379/0"
@@ -98,6 +100,8 @@ module Llmemory
98
100
  @embedding_cache_max_entries = 10_000
99
101
  @max_message_chars = 32_000
100
102
  @message_sanitizer_enabled = false
103
+ @encryption_enabled = false
104
+ @encryption_key = ENV["LLMEMORY_ENCRYPTION_KEY"]
101
105
  end
102
106
  end
103
107
 
@@ -113,5 +117,21 @@ module Llmemory
113
117
  def reset_configuration!
114
118
  @configuration = Configuration.new
115
119
  end
120
+
121
+ # Builds a Crypto::Cipher when encryption is enabled and a key is present;
122
+ # otherwise returns Crypto::NullCipher. An explicit non-empty instance key
123
+ # enables encryption even when the global flag is off.
124
+ def build_cipher(key = nil)
125
+ explicit_key = !key.nil? && !key.to_s.empty?
126
+ resolved = key.nil? ? configuration.encryption_key : key
127
+ enabled = configuration.encryption_enabled || explicit_key
128
+ if enabled && !resolved.to_s.empty?
129
+ require_relative "crypto/cipher"
130
+ Crypto::Cipher.new(resolved)
131
+ else
132
+ require_relative "crypto/cipher"
133
+ Crypto::NullCipher.new
134
+ end
135
+ end
116
136
  end
117
137
  end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openssl"
4
+ require "json"
5
+
6
+ module Llmemory
7
+ module Crypto
8
+ class DecryptionError < Llmemory::Error; end
9
+
10
+ # No-op cipher when encryption is disabled or no key is configured.
11
+ class NullCipher
12
+ def enabled?
13
+ false
14
+ end
15
+
16
+ def encrypt(str)
17
+ str.to_s
18
+ end
19
+
20
+ def encrypt_deterministic(str)
21
+ str.to_s
22
+ end
23
+
24
+ def decrypt(str)
25
+ str.to_s
26
+ end
27
+
28
+ def encrypt_json(obj)
29
+ JSON.generate(obj)
30
+ end
31
+
32
+ def decrypt_json(str)
33
+ JSON.parse(str.to_s, symbolize_names: true)
34
+ end
35
+
36
+ def encrypted?(str)
37
+ false
38
+ end
39
+ end
40
+
41
+ # AES-256-GCM encryption with separate content (random IV) and index
42
+ # (deterministic IV) subkeys derived from the master key via HMAC-SHA256.
43
+ class Cipher
44
+ MARKER = "enc:v1:"
45
+ DETERMINISTIC_MARKER = "encd:v1:"
46
+ IV_LENGTH = 12
47
+ TAG_LENGTH = 16
48
+
49
+ def initialize(key)
50
+ @master_key = derive_master_key(key)
51
+ @content_key = derive_subkey("content")
52
+ @index_key = derive_subkey("index")
53
+ end
54
+
55
+ def enabled?
56
+ true
57
+ end
58
+
59
+ def encrypt(plaintext)
60
+ str = plaintext.to_s
61
+ return str if str.empty?
62
+
63
+ encrypt_with_key(str, @content_key, iv: OpenSSL::Random.random_bytes(IV_LENGTH), marker: MARKER)
64
+ end
65
+
66
+ def encrypt_deterministic(plaintext)
67
+ str = plaintext.to_s
68
+ return str if str.empty?
69
+
70
+ iv = OpenSSL::HMAC.digest("SHA256", @index_key, str)[0, IV_LENGTH]
71
+ encrypt_with_key(str, @index_key, iv: iv, marker: DETERMINISTIC_MARKER)
72
+ end
73
+
74
+ def decrypt(ciphertext)
75
+ str = ciphertext.to_s
76
+ return str if str.empty?
77
+ return str unless encrypted?(str)
78
+
79
+ marker, key = if str.start_with?(DETERMINISTIC_MARKER)
80
+ [DETERMINISTIC_MARKER, @index_key]
81
+ else
82
+ [MARKER, @content_key]
83
+ end
84
+
85
+ payload = decode64(str.delete_prefix(marker))
86
+ iv = payload[0, IV_LENGTH]
87
+ tag = payload[IV_LENGTH, TAG_LENGTH]
88
+ ct = payload[(IV_LENGTH + TAG_LENGTH)..]
89
+
90
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
91
+ cipher.decrypt
92
+ cipher.key = key
93
+ cipher.iv = iv
94
+ cipher.auth_tag = tag
95
+ cipher.auth_data = ""
96
+ cipher.update(ct) + cipher.final
97
+ rescue OpenSSL::Cipher::CipherError, ArgumentError => e
98
+ raise DecryptionError, "Failed to decrypt data: #{e.message}"
99
+ end
100
+
101
+ def encrypt_json(obj)
102
+ encrypt(JSON.generate(obj))
103
+ end
104
+
105
+ def decrypt_json(str)
106
+ JSON.parse(decrypt(str), symbolize_names: true)
107
+ end
108
+
109
+ def encrypted?(str)
110
+ s = str.to_s
111
+ s.start_with?(MARKER) || s.start_with?(DETERMINISTIC_MARKER)
112
+ end
113
+
114
+ private
115
+
116
+ def encrypt_with_key(plaintext, key, iv:, marker:)
117
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
118
+ cipher.encrypt
119
+ cipher.key = key
120
+ cipher.iv = iv
121
+ cipher.auth_data = ""
122
+ ct = cipher.update(plaintext) + cipher.final
123
+ tag = cipher.auth_tag
124
+ marker + encode64(iv + tag + ct)
125
+ end
126
+
127
+ def encode64(bin)
128
+ [bin].pack("m0")
129
+ end
130
+
131
+ def decode64(str)
132
+ str.unpack1("m0")
133
+ end
134
+
135
+ def derive_master_key(key)
136
+ raw = key.to_s
137
+ raise ConfigurationError, "encryption_key cannot be empty when encryption is enabled" if raw.empty?
138
+
139
+ OpenSSL::Digest::SHA256.digest(raw)
140
+ end
141
+
142
+ def derive_subkey(label)
143
+ OpenSSL::HMAC.digest("SHA256", @master_key, "llmemory:#{label}")[0, 32]
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Llmemory
6
+ module Crypto
7
+ # Shared encrypt/decrypt helpers for storage backends.
8
+ module FieldHelpers
9
+ private
10
+
11
+ def cipher
12
+ @cipher || Llmemory.build_cipher
13
+ end
14
+
15
+ def enc(str)
16
+ return str if str.nil?
17
+ return str.to_s unless cipher.enabled?
18
+
19
+ cipher.encrypt(str.to_s)
20
+ end
21
+
22
+ def dec(str)
23
+ return str if str.nil?
24
+ return str unless str.is_a?(String) && cipher.encrypted?(str)
25
+
26
+ cipher.decrypt(str)
27
+ end
28
+
29
+ def enc_det(str)
30
+ return str if str.nil?
31
+ return str.to_s unless cipher.enabled?
32
+
33
+ cipher.encrypt_deterministic(str.to_s)
34
+ end
35
+
36
+ def enc_json(obj)
37
+ return obj if obj.nil?
38
+ return obj unless cipher.enabled?
39
+
40
+ cipher.encrypt_json(obj)
41
+ end
42
+
43
+ def dec_json(value)
44
+ return value if value.nil?
45
+ return value.transform_keys(&:to_sym) if value.is_a?(Hash)
46
+ return value unless value.is_a?(String) && cipher.encrypted?(value)
47
+
48
+ cipher.decrypt_json(value)
49
+ end
50
+
51
+ def write_encrypted_file(path, data)
52
+ payload = JSON.generate(data)
53
+ File.write(path, cipher.enabled? ? cipher.encrypt(payload) : payload)
54
+ end
55
+
56
+ def read_encrypted_file(path)
57
+ raw = File.read(path)
58
+ json = cipher.enabled? && cipher.encrypted?(raw) ? cipher.decrypt(raw) : raw
59
+ JSON.parse(json, symbolize_names: true)
60
+ end
61
+
62
+ def write_encrypted_text_file(path, content, append: false)
63
+ text = content.to_s
64
+ if cipher.enabled?
65
+ if append && File.file?(path)
66
+ existing = read_encrypted_text_file(path)
67
+ text = existing + text
68
+ end
69
+ File.write(path, cipher.encrypt(text))
70
+ elsif append && File.file?(path)
71
+ File.write(path, File.read(path) + text)
72
+ else
73
+ File.write(path, text)
74
+ end
75
+ end
76
+
77
+ def read_encrypted_text_file(path)
78
+ raw = File.read(path)
79
+ cipher.enabled? && cipher.encrypted?(raw) ? cipher.decrypt(raw) : raw
80
+ end
81
+
82
+ def serialize_state(state)
83
+ json = JSON.generate(state)
84
+ return json unless cipher.enabled?
85
+
86
+ cipher.encrypt(json)
87
+ end
88
+
89
+ def deserialize_state(data)
90
+ if data.is_a?(Hash)
91
+ return data.transform_keys(&:to_sym)
92
+ end
93
+
94
+ str = data.to_s
95
+ json = cipher.enabled? && cipher.encrypted?(str) ? cipher.decrypt(str) : str
96
+ JSON.parse(json, symbolize_names: true)
97
+ end
98
+
99
+ def parse_provenance(value)
100
+ return nil if value.nil?
101
+ return value.transform_keys(&:to_sym) if value.is_a?(Hash)
102
+ return dec_json(value) if value.is_a?(String) && cipher.encrypted?(value)
103
+
104
+ JSON.parse(value.to_s, symbolize_names: true)
105
+ rescue JSON::ParserError
106
+ nil
107
+ end
108
+ end
109
+ end
110
+ end
@@ -10,8 +10,10 @@ module Llmemory
10
10
  # Events (payload keys are best-effort; subscribers should treat them as
11
11
  # optional):
12
12
  #
13
- # llm_invoke.llmemory provider:, model:, prompt_chars:, response_chars:
14
- # llm_embed.llmemory provider:, model:, text_chars:, dimensions:
13
+ # llm_invoke.llmemory provider:, model:, prompt_chars:, response_chars:,
14
+ # input_tokens:, output_tokens:, total_tokens:
15
+ # llm_embed.llmemory provider:, model:, text_chars:, input_tokens:,
16
+ # output_tokens:, total_tokens:
15
17
  # memory_write.llmemory memory_type:, user_id:
16
18
  # memory_forget.llmemory memory_type:, user_id:, count:
17
19
  # retrieve.llmemory query_chars:, candidates:, results:
@@ -8,16 +8,19 @@ module Llmemory
8
8
  module LLM
9
9
  class Anthropic < Base
10
10
  DEFAULT_BASE_URL = "https://api.anthropic.com"
11
+ DEFAULT_MODEL = "claude-sonnet-4-6"
11
12
 
12
13
  def initialize(api_key: nil, model: nil, base_url: nil)
14
+ super()
13
15
  @api_key = api_key || config.llm_api_key || ENV["ANTHROPIC_API_KEY"]
14
- @model = model || config.llm_model || "claude-3-sonnet-20240229"
16
+ @model = model || config.llm_model || DEFAULT_MODEL
15
17
  @base_url = base_url || config.llm_base_url || DEFAULT_BASE_URL
16
18
  end
17
19
 
18
20
  def invoke(prompt)
19
21
  result = nil
20
- Llmemory::Instrumentation.instrument(:llm_invoke, provider: :anthropic, model: @model, prompt_chars: prompt.to_s.length) do
22
+ payload = { provider: :anthropic, model: @model, prompt_chars: prompt.to_s.length }
23
+ Llmemory::Instrumentation.instrument(:llm_invoke, payload) do
21
24
  response = connection.post("v1/messages") do |req|
22
25
  req.body = {
23
26
  model: @model,
@@ -32,8 +35,11 @@ module Llmemory
32
35
  raise Llmemory::LLMError, "Anthropic API error: #{response.body}" unless response.success?
33
36
 
34
37
  body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
35
- content = body.dig("content", 0, "text")
36
- result = content&.strip || ""
38
+ content = body.dig("content", 0, "text")&.strip || ""
39
+ usage = parse_anthropic_usage(body["usage"])
40
+ record_usage(usage)
41
+ payload.merge!(instrumentation_payload(usage, content))
42
+ result = Response.new(content, usage: usage)
37
43
  end
38
44
  result
39
45
  end
@@ -1,8 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "usage"
4
+ require_relative "response"
5
+
3
6
  module Llmemory
4
7
  module LLM
5
8
  class Base
9
+ attr_reader :last_usage
10
+
11
+ def initialize(*)
12
+ @last_usage = Usage.zero
13
+ end
14
+
6
15
  def invoke(prompt)
7
16
  raise NotImplementedError, "#{self.class}#invoke must be implemented"
8
17
  end
@@ -18,6 +27,39 @@ module Llmemory
18
27
  def config
19
28
  Llmemory.configuration
20
29
  end
30
+
31
+ def parse_openai_chat_usage(raw)
32
+ return Usage.zero unless raw.is_a?(Hash)
33
+
34
+ Usage.new(
35
+ input_tokens: raw["prompt_tokens"] || raw[:prompt_tokens] || 0,
36
+ output_tokens: raw["completion_tokens"] || raw[:completion_tokens] || 0,
37
+ total_tokens: raw["total_tokens"] || raw[:total_tokens]
38
+ )
39
+ end
40
+
41
+ def parse_anthropic_usage(raw)
42
+ return Usage.zero unless raw.is_a?(Hash)
43
+
44
+ input = raw["input_tokens"] || raw[:input_tokens] || 0
45
+ output = raw["output_tokens"] || raw[:output_tokens] || 0
46
+ Usage.new(input_tokens: input, output_tokens: output)
47
+ end
48
+
49
+ def parse_openai_embed_usage(raw)
50
+ return Usage.zero unless raw.is_a?(Hash)
51
+
52
+ total = raw["total_tokens"] || raw[:total_tokens] || 0
53
+ Usage.new(input_tokens: 0, output_tokens: 0, total_tokens: total)
54
+ end
55
+
56
+ def record_usage(usage)
57
+ @last_usage = usage
58
+ end
59
+
60
+ def instrumentation_payload(usage, content, extra = {})
61
+ usage.to_h.merge(response_chars: content.to_s.length).merge(extra)
62
+ end
21
63
  end
22
64
  end
23
65
  end
@@ -8,16 +8,19 @@ module Llmemory
8
8
  module LLM
9
9
  class OpenAI < Base
10
10
  DEFAULT_BASE_URL = "https://api.openai.com/v1"
11
+ DEFAULT_MODEL = "gpt-4"
11
12
 
12
13
  def initialize(api_key: nil, model: nil, base_url: nil)
14
+ super()
13
15
  @api_key = api_key || config.llm_api_key
14
- @model = model || config.llm_model
16
+ @model = model || config.llm_model || DEFAULT_MODEL
15
17
  @base_url = base_url || config.llm_base_url || DEFAULT_BASE_URL
16
18
  end
17
19
 
18
20
  def invoke(prompt)
19
21
  result = nil
20
- Llmemory::Instrumentation.instrument(:llm_invoke, provider: :openai, model: @model, prompt_chars: prompt.to_s.length) do
22
+ payload = { provider: :openai, model: @model, prompt_chars: prompt.to_s.length }
23
+ Llmemory::Instrumentation.instrument(:llm_invoke, payload) do
21
24
  response = connection.post("chat/completions") do |req|
22
25
  req.body = {
23
26
  model: @model,
@@ -31,7 +34,11 @@ module Llmemory
31
34
  raise Llmemory::LLMError, "OpenAI API error: #{response.body}" unless response.success?
32
35
 
33
36
  body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
34
- result = body.dig("choices", 0, "message", "content")&.strip || ""
37
+ content = body.dig("choices", 0, "message", "content")&.strip || ""
38
+ usage = parse_openai_chat_usage(body["usage"])
39
+ record_usage(usage)
40
+ payload.merge!(instrumentation_payload(usage, content))
41
+ result = Response.new(content, usage: usage)
35
42
  end
36
43
  result
37
44
  end
@@ -53,18 +60,27 @@ module Llmemory
53
60
  }
54
61
  }
55
62
  }
56
- response = connection.post("chat/completions") do |req|
57
- req.body = payload.to_json
58
- req.headers["Content-Type"] = "application/json"
59
- req.headers["Authorization"] = "Bearer #{@api_key}"
60
- end
63
+ parsed = nil
64
+ instrument_payload = { provider: :openai, model: @model, prompt_chars: prompt.to_s.length }
65
+ Llmemory::Instrumentation.instrument(:llm_invoke, instrument_payload) do
66
+ response = connection.post("chat/completions") do |req|
67
+ req.body = payload.to_json
68
+ req.headers["Content-Type"] = "application/json"
69
+ req.headers["Authorization"] = "Bearer #{@api_key}"
70
+ end
61
71
 
62
- raise Llmemory::LLMError, "OpenAI API error: #{response.body}" unless response.success?
72
+ raise Llmemory::LLMError, "OpenAI API error: #{response.body}" unless response.success?
73
+
74
+ body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
75
+ content = body.dig("choices", 0, "message", "content")&.strip
76
+ usage = parse_openai_chat_usage(body["usage"])
77
+ record_usage(usage)
78
+ instrument_payload.merge!(instrumentation_payload(usage, content.to_s))
79
+ return {} if content.nil? || content.empty?
63
80
 
64
- body = response.body.is_a?(Hash) ? response.body : JSON.parse(response.body.to_s)
65
- content = body.dig("choices", 0, "message", "content")&.strip
66
- return {} if content.nil? || content.empty?
67
- JSON.parse(content)
81
+ parsed = JSON.parse(content)
82
+ end
83
+ parsed
68
84
  rescue JSON::ParserError => e
69
85
  raise Llmemory::LLMError, "Failed to parse JSON response: #{e.message}"
70
86
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Llmemory
4
+ module LLM
5
+ class Response
6
+ attr_reader :content, :usage
7
+
8
+ def initialize(content, usage: Usage.zero)
9
+ @content = content.to_s
10
+ @usage = usage
11
+ end
12
+
13
+ def to_s
14
+ @content
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "usage_recorder"
4
+
5
+ module Llmemory
6
+ module LLM
7
+ # Transparent wrapper that records token usage to the per-user ledger.
8
+ class TrackingClient
9
+ def initialize(inner, user_id:, store: nil, api_key: nil)
10
+ @inner = inner
11
+ @user_id = user_id
12
+ @store = store
13
+ @api_key = api_key
14
+ end
15
+
16
+ def invoke(prompt)
17
+ response = inner_client.invoke(prompt)
18
+ usage = if response.respond_to?(:usage)
19
+ response.usage
20
+ elsif inner_client.respond_to?(:last_usage)
21
+ inner_client.last_usage
22
+ else
23
+ Usage.zero
24
+ end
25
+ UsageRecorder.record(user_id: @user_id, usage: usage, operation: :invoke, store: @store)
26
+ response
27
+ end
28
+
29
+ def invoke_with_json_schema(prompt, json_schema)
30
+ result = inner_client.invoke_with_json_schema(prompt, json_schema)
31
+ usage = inner_client.respond_to?(:last_usage) ? inner_client.last_usage : Usage.zero
32
+ UsageRecorder.record(user_id: @user_id, usage: usage, operation: :invoke, store: @store)
33
+ result
34
+ end
35
+
36
+ def last_usage
37
+ return inner_client.last_usage if inner_client.respond_to?(:last_usage)
38
+
39
+ Usage.zero
40
+ end
41
+
42
+ def respond_to?(method, include_private = false)
43
+ inner_client.respond_to?(method, include_private) || super
44
+ end
45
+
46
+ def method_missing(method, *args, &block)
47
+ if inner_client.respond_to?(method)
48
+ inner_client.public_send(method, *args, &block)
49
+ else
50
+ super
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def inner_client
57
+ @inner_client ||= @inner || Llmemory::LLM.client(api_key: @api_key)
58
+ end
59
+ end
60
+ end
61
+ end