lex-llm-ledger 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a69d4ccf5975f45c795e6e54ab6ed59272a269855394bbef4d1149a48bd338e7
4
- data.tar.gz: 60bfc3f268526d0cf427ccfa90f2e230f9e0491fd88584ff685604c93faf63c4
3
+ metadata.gz: bfcca825db5b0e313cbeafc6c9f396b3f964dd4debd911854c89affe3bcb79ef
4
+ data.tar.gz: 1a4ea16c0c4ca34cc1ae4e3d0efa19cab3948c932162d9072c21127dae1ea375
5
5
  SHA512:
6
- metadata.gz: e166ea0dc31b758dd45d5c1595f6a72c78629b7dfd69d09d9928a699ce14f475658d9bf77f08018608bc98f394532f4c19b5887c3b2649bb1761aecbe2b4b5c8
7
- data.tar.gz: 226be9d0592d67fcca0ff956fe0c62db56a311cc3f6bde5a16523f21801d43079d976a6d6582e2a0f1022396089555495df37ccf81ccfd56ddfd3f2344f9d27f
6
+ metadata.gz: 362b96b3b385cfcca6c2edbe3fa9c6e30a23fbaa80bd5136fa1f95cc35af8ea5aa93ea89cd7cf1415999ac82ce881311aeea084ee44ba19cffadcbef0858a71b
7
+ data.tar.gz: 89c63b437c02355a9cc39182d42ac0a1971b7dcdf80473d7ef237d92bbb39cef96c13dd0ea3b34f13d6ed1f662cb6bb30aa473658dde119739b641bbf7b92dac
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.3] - 2026-05-17
4
+
5
+ ### Fixed
6
+ - Extract inline `<think>` / `<thinking>` tags from string responses into `response_thinking_json` at write time instead of leaving them in `response_json`.
7
+ - Fall back to `ThinkingExtractor` when `response_thinking` is absent from the audit payload (covers Ollama, vLLM, and OpenAI-compatible gateways that pass thinking inline).
8
+ - Guard `finish_reason` and `thinking_response` against `String#dig` TypeError when `body[:response]` is a plain string.
9
+
10
+ ## [0.3.2] - 2026-05-13
11
+
12
+ ### Fixed
13
+ - Keep metering-only writes from creating placeholder conversation messages so later prompt audits can attach the real user and assistant messages without sequence collisions.
14
+ - Use the request reference as the default inference metric idempotency key so metering and prompt audit events enrich the same metric row.
15
+ - Suppress duplicate insert warnings for unique races handled by the official ledger writer while retaining debug-level collision messages.
16
+
3
17
  ## [0.3.1] - 2026-05-13
4
18
 
5
19
  ### Fixed
@@ -184,7 +184,7 @@ module Legion
184
184
  end
185
185
 
186
186
  def official_metric_uuid(payload)
187
- ref = payload[:message_id] || "metric:#{Writers::OfficialRecordWriter.request_ref(payload)}"
187
+ ref = payload[:metric_id] || payload[:metric_ref] || "metric:#{Writers::OfficialRecordWriter.request_ref(payload)}"
188
188
  Writers::OfficialRecordWriter.stable_uuid(ref)
189
189
  end
190
190
 
@@ -19,13 +19,15 @@ module Legion
19
19
 
20
20
  module_function
21
21
 
22
- def insert_row(db, table, attributes, operation:)
22
+ def insert_row(db, table, attributes, operation:, warn_on_unique: true)
23
23
  row_id = db[table].insert(attributes)
24
24
  log.info(log_message('inserted', table, operation, row_id, attributes))
25
25
  row_id
26
26
  rescue Sequel::UniqueConstraintViolation => e
27
- log.warn(log_message('insert_failed', table, operation, nil, attributes,
28
- error_class: e.class, error: e.message))
27
+ if warn_on_unique
28
+ log.warn(log_message('insert_failed', table, operation, nil, attributes,
29
+ error_class: e.class, error: e.message))
30
+ end
29
31
  raise
30
32
  rescue StandardError => e
31
33
  log.error(log_message('insert_failed', table, operation, nil, attributes,
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/extensions/llm/responses/thinking_extractor'
3
4
  require_relative '../helpers/caller_identity'
4
5
  require_relative '../helpers/json'
5
6
 
@@ -96,7 +97,30 @@ module Legion
96
97
  end
97
98
 
98
99
  def response_thinking(body)
99
- body[:response_thinking] || body[:thinking] || body.dig(:response, :thinking) || {}
100
+ thinking = body[:response_thinking] || body[:thinking]
101
+ thinking ||= body.dig(:response, :thinking) if body[:response].is_a?(Hash)
102
+ if thinking
103
+ thinking.is_a?(Hash) ? thinking : { content: thinking }
104
+ else
105
+ extract_thinking_from_content(body)
106
+ end
107
+ end
108
+
109
+ def extract_thinking_from_content(body)
110
+ content_str = body[:response_content] || body[:response] || body[:content]
111
+ return {} unless content_str.is_a?(String)
112
+
113
+ _clean, extracted = extract_inline_thinking(content_str)
114
+ extracted ? { content: extracted } : {}
115
+ end
116
+
117
+ def extract_inline_thinking(text)
118
+ if defined?(::Legion::Extensions::Llm::Responses::ThinkingExtractor)
119
+ extraction = ::Legion::Extensions::Llm::Responses::ThinkingExtractor.extract(text)
120
+ [extraction.content, extraction.thinking]
121
+ else
122
+ [text, nil]
123
+ end
100
124
  end
101
125
 
102
126
  def official_prompt_payload(body, ctx, props, headers, expires_at)
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ledger
7
- VERSION = '0.3.1'
7
+ VERSION = '0.3.3'
8
8
  end
9
9
  end
10
10
  end
@@ -3,6 +3,7 @@
3
3
  require 'digest'
4
4
  require 'securerandom'
5
5
  require 'legion/logging'
6
+ require 'legion/extensions/llm/responses/thinking_extractor'
6
7
  require_relative '../helpers/json'
7
8
  require_relative '../helpers/persistence_logging'
8
9
 
@@ -42,8 +43,7 @@ module Legion
42
43
 
43
44
  db.transaction do
44
45
  conversation = find_or_create_conversation(db, body)
45
- user_message = find_or_create_user_message(db, conversation, body)
46
- request = find_or_create_request(db, conversation, user_message, body)
46
+ request = find_or_create_request(db, conversation, nil, body)
47
47
  response = find_or_create_response(db, request, nil, body)
48
48
  metric = find_or_create_metric(db, request, response, body)
49
49
  result = { result: :ok, request_id: request[:id], response_id: response[:id], metric_id: metric[:id] }
@@ -109,17 +109,14 @@ module Legion
109
109
  def find_or_create_request(db, conversation, latest_message, body)
110
110
  request_id = request_ref(body)
111
111
  existing = db[:llm_message_inference_requests].where(request_ref: request_id).first
112
- if existing
113
- enrich_request!(db, existing, body)
114
- return existing
115
- end
112
+ return enrich_request!(db, existing, body, latest_message) if existing
116
113
 
117
114
  operation = operation(body)
118
115
  caller_refs = caller_identity_refs(db, body)
119
116
  id = insert_with_savepoint(db, :llm_message_inference_requests, {
120
117
  uuid: stable_uuid(request_id),
121
118
  conversation_id: conversation[:id],
122
- latest_message_id: latest_message[:id],
119
+ latest_message_id: latest_message&.dig(:id),
123
120
  caller_principal_id: caller_refs[:principal_id],
124
121
  caller_identity_id: caller_refs[:identity_id],
125
122
  runtime_caller_type: caller_type(body),
@@ -144,10 +141,7 @@ module Legion
144
141
  rescue Sequel::UniqueConstraintViolation => e
145
142
  log.debug("[ledger] request collision resolved request_ref=#{request_id} error=#{e.class}")
146
143
  existing = db[:llm_message_inference_requests].where(request_ref: request_id).first
147
- if existing
148
- enrich_request!(db, existing, body)
149
- return existing
150
- end
144
+ return enrich_request!(db, existing, body, latest_message) if existing
151
145
 
152
146
  raise
153
147
  end
@@ -252,7 +246,7 @@ module Legion
252
246
  end
253
247
 
254
248
  def find_or_create_metric(db, request, response, body)
255
- metric_uuid = stable_uuid(reference(body, :message_id) || "metric:#{request_ref(body)}")
249
+ metric_uuid = stable_uuid(reference(body, :metric_id, :metric_ref) || "metric:#{request_ref(body)}")
256
250
  existing = db[:llm_message_inference_metrics].where(uuid: metric_uuid).first
257
251
  return existing if existing
258
252
 
@@ -287,7 +281,7 @@ module Legion
287
281
  end
288
282
 
289
283
  def insert_row(db, table, attributes, operation:)
290
- Helpers::PersistenceLogging.insert_row(db, table, attributes, operation: operation)
284
+ Helpers::PersistenceLogging.insert_row(db, table, attributes, operation: operation, warn_on_unique: false)
291
285
  end
292
286
 
293
287
  def insert_with_savepoint(db, table, attributes, operation:)
@@ -309,8 +303,9 @@ module Legion
309
303
  db[:llm_messages].where(id: response_message[:id]).update(message_inference_response_id: response[:id])
310
304
  end
311
305
 
312
- def enrich_request!(db, existing, body)
306
+ def enrich_request!(db, existing, body, latest_message = nil)
313
307
  updates = {}
308
+ update_if_missing(updates, existing, :latest_message_id, latest_message&.dig(:id))
314
309
  caller_refs = caller_identity_refs(db, body)
315
310
  updates[:caller_identity_id] = caller_refs[:identity_id] if existing[:caller_identity_id].nil? && caller_refs[:identity_id]
316
311
  updates[:caller_principal_id] = caller_refs[:principal_id] if existing[:caller_principal_id].nil? && caller_refs[:principal_id]
@@ -322,10 +317,11 @@ module Legion
322
317
  msg_count = Array(body.dig(:request, :messages) || body[:messages]).size
323
318
  updates[:context_message_count] = msg_count if existing[:context_message_count].to_i.zero? && msg_count.positive?
324
319
 
325
- return if updates.empty?
320
+ return existing if updates.empty?
326
321
 
327
322
  db[:llm_message_inference_requests].where(id: existing[:id]).update(updates)
328
323
  log.info("[ledger] enriched request id=#{existing[:id]} fields=#{updates.keys.join(',')}")
324
+ existing.merge(updates)
329
325
  end
330
326
 
331
327
  def caller_identity(body)
@@ -585,18 +581,38 @@ module Legion
585
581
 
586
582
  def visible_response(body)
587
583
  response = body[:response] || body[:response_content] || body[:content] || {}
588
- return { content: response } if response.is_a?(String)
584
+ if response.is_a?(String)
585
+ clean, _thinking = extract_inline_thinking(response)
586
+ return { content: clean }
587
+ end
589
588
  return { content: response[:content] } if response.is_a?(Hash) && response.key?(:content)
590
589
 
591
590
  response.is_a?(Hash) ? response.except(:thinking) : { content: response.to_s }
592
591
  end
593
592
 
594
593
  def thinking_response(body)
595
- thinking = body[:response_thinking] || body[:thinking] || body.dig(:response, :thinking)
596
- return {} if thinking.nil?
597
- return { content: thinking } if thinking.is_a?(String)
594
+ thinking = body[:response_thinking] || body[:thinking]
595
+ thinking ||= body.dig(:response, :thinking) if body[:response].is_a?(Hash)
596
+ if thinking
597
+ return { content: thinking } if thinking.is_a?(String)
598
+
599
+ return thinking
600
+ end
598
601
 
599
- thinking
602
+ content_str = body[:response_content] || body[:response] || body[:content]
603
+ return {} unless content_str.is_a?(String)
604
+
605
+ _clean, extracted = extract_inline_thinking(content_str)
606
+ extracted ? { content: extracted } : {}
607
+ end
608
+
609
+ def extract_inline_thinking(text)
610
+ if defined?(::Legion::Extensions::Llm::Responses::ThinkingExtractor)
611
+ extraction = ::Legion::Extensions::Llm::Responses::ThinkingExtractor.extract(text)
612
+ [extraction.content, extraction.thinking]
613
+ else
614
+ [text, nil]
615
+ end
600
616
  end
601
617
 
602
618
  def response_content(body)
@@ -604,7 +620,10 @@ module Legion
604
620
  end
605
621
 
606
622
  def finish_reason(body)
607
- body[:finish_reason] || body.dig(:response, :finish_reason) || body.dig(:response, :stop, :reason)
623
+ return body[:finish_reason] if body[:finish_reason]
624
+ return nil unless body[:response].is_a?(Hash)
625
+
626
+ body.dig(:response, :finish_reason) || body.dig(:response, :stop, :reason)
608
627
  end
609
628
 
610
629
  def classification_level(body)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ledger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity