lex-llm-ledger 0.1.12 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -0
- data/Gemfile +2 -0
- data/README.md +129 -14
- data/lex-llm-ledger.gemspec +3 -3
- data/lib/legion/extensions/llm/ledger/actors/{metering_writer.rb → metering.rb} +8 -1
- data/lib/legion/extensions/llm/ledger/actors/{prompt_writer.rb → prompts.rb} +6 -4
- data/lib/legion/extensions/llm/ledger/actors/{registry_availability_writer.rb → registry_availability.rb} +6 -4
- data/lib/legion/extensions/llm/ledger/actors/spool_flush.rb +1 -1
- data/lib/legion/extensions/llm/ledger/actors/{tool_writer.rb → tools.rb} +6 -4
- data/lib/legion/extensions/llm/ledger/backfill/legacy_llm_records.rb +223 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/005_rename_tables_add_llm_prefix.rb +37 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/006_relax_not_null_constraints.rb +61 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/007_relax_remaining_not_null.rb +37 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/008_relax_message_id_not_null.rb +19 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/009_add_caller_to_metering.rb +17 -0
- data/lib/legion/extensions/llm/ledger/data/migrations/010_add_response_thinking_json_to_prompt_records.rb +15 -0
- data/lib/legion/extensions/llm/ledger/helpers/caller_identity.rb +82 -0
- data/lib/legion/extensions/llm/ledger/helpers/decryption.rb +17 -5
- data/lib/legion/extensions/llm/ledger/helpers/json.rb +45 -0
- data/lib/legion/extensions/llm/ledger/helpers/persistence_logging.rb +64 -0
- data/lib/legion/extensions/llm/ledger/helpers/retention.rb +35 -17
- data/lib/legion/extensions/llm/ledger/helpers/subscription_actor.rb +31 -0
- data/lib/legion/extensions/llm/ledger/helpers/subscription_message.rb +26 -14
- data/lib/legion/extensions/llm/ledger/runners/metering.rb +61 -20
- data/lib/legion/extensions/llm/ledger/runners/prompts.rb +110 -44
- data/lib/legion/extensions/llm/ledger/runners/provider_stats.rb +44 -24
- data/lib/legion/extensions/llm/ledger/runners/registry_availability.rb +16 -4
- data/lib/legion/extensions/llm/ledger/runners/tools.rb +45 -23
- data/lib/legion/extensions/llm/ledger/runners/usage_reporter.rb +58 -22
- data/lib/legion/extensions/llm/ledger/version.rb +1 -1
- data/lib/legion/extensions/llm/ledger/writers/official_metering_writer.rb +21 -0
- data/lib/legion/extensions/llm/ledger/writers/official_prompt_writer.rb +21 -0
- data/lib/legion/extensions/llm/ledger/writers/official_record_writer.rb +372 -0
- data/lib/legion/extensions/llm/ledger.rb +25 -5
- metadata +29 -15
- /data/lib/legion/extensions/llm/ledger/{migrations → data/migrations}/001_create_metering_records.rb +0 -0
- /data/lib/legion/extensions/llm/ledger/{migrations → data/migrations}/002_create_prompt_records.rb +0 -0
- /data/lib/legion/extensions/llm/ledger/{migrations → data/migrations}/003_create_tool_records.rb +0 -0
- /data/lib/legion/extensions/llm/ledger/{migrations → data/migrations}/004_create_registry_availability_records.rb +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 83376ade1924f74e2c734ccf0b9e67b1a1db0ad0494a883469fb86a4eb1763a1
|
|
4
|
+
data.tar.gz: 83c5f37ff7b8d63c2865e3f758f0ecbb116746d93817a2b67aadc7a14f66f503
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dcd276003a26041f9b0e5aa373547d7d03f74010c1a7fd011cd0a6fd1a9ded022425476ff95e7efc5a466c94c8362b699e5d1e3f8468609eec2bc6391e02b016
|
|
7
|
+
data.tar.gz: 0b0168807bc938bdcba4d3a2955c3d5e86f58a6138a93970fea1addbd2bc1f38fef44ed5bbedfcd7d01c3d26e23c451920c645d79acce603b26b98ae7bfffddc
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,53 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.5] - 2026-05-06
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Log every successful ledger audit and metric database insert at `info` with safe row context.
|
|
7
|
+
- Log duplicate insert failures at `warn` and unexpected insert failures at `error` before returning or re-raising.
|
|
8
|
+
|
|
9
|
+
## [0.2.4] - 2026-05-06
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
- Replace generated runner subscription actors with runner-named ledger-owned subscription actors so audit queues are consumed through the ledger decoder.
|
|
13
|
+
- Route ledger subscription actor payload decoding through the ledger decoder so encrypted audit messages preserve metadata and missing-IV messages dead-letter before core decryption.
|
|
14
|
+
|
|
15
|
+
## [0.2.3] - 2026-05-06
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- Use the real `legion-json` load contract for ledger JSON parsing and remove root `JSON` fallbacks from runtime code.
|
|
19
|
+
- Route retention TTL overrides through extension-scoped Legion settings and add default retention settings metadata.
|
|
20
|
+
- Send handled runner/backfill errors through `handle_exception` for structured Legion logging.
|
|
21
|
+
- Reject encrypted audit payloads that are missing the required `iv` header before attempting decryption.
|
|
22
|
+
|
|
23
|
+
## [0.2.2] - 2026-05-06
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
- Persist official response-message foreign keys, keep generated request references stable within a write, and remove raw payload logging from ledger runners.
|
|
27
|
+
- Make legacy backfill counts idempotent and attach legacy tool rows only to existing official inference responses.
|
|
28
|
+
- Clarify README cutover status for tool and registry projection tables.
|
|
29
|
+
|
|
30
|
+
## [0.2.1] - 2026-05-06
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
- Preserve namespaced caller identities from current LLM audit and metering envelopes instead of storing ambiguous display identities such as `system`.
|
|
34
|
+
|
|
35
|
+
## [0.2.0] - 2026-05-06
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
- Write prompt audit and metering events into the official `legion-data` LLM lifecycle schema instead of legacy ledger-only tables.
|
|
39
|
+
- Move provider stats and usage reporting to official inference request, response, and metric tables grouped by provider, provider instance, model, and operation.
|
|
40
|
+
- Bumped the transport dependency floor to `legion-transport >= 1.4.14` for the coordinated fleet envelope sweep.
|
|
41
|
+
|
|
42
|
+
### Added
|
|
43
|
+
- Add official prompt and metering writers plus legacy LLM ledger backfill for prompt, metering, tool, and registry availability records.
|
|
44
|
+
- Add a hard stop for legacy-only writer mode after official cutover.
|
|
45
|
+
|
|
46
|
+
## [0.1.13] - 2026-05-03
|
|
47
|
+
|
|
48
|
+
### Added
|
|
49
|
+
- Add `response_thinking_json` to prompt audit records so provider thinking payloads are stored separately from assistant response content.
|
|
50
|
+
|
|
3
51
|
## [0.1.12] - 2026-04-28
|
|
4
52
|
|
|
5
53
|
### Added
|
data/Gemfile
CHANGED
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
source 'https://rubygems.org'
|
|
4
4
|
|
|
5
5
|
group :test do
|
|
6
|
+
legion_data_path = ENV.fetch('LEGION_DATA_PATH', File.expand_path('../../legion-data', __dir__))
|
|
6
7
|
llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
|
|
8
|
+
gem 'legion-data', path: legion_data_path if File.directory?(legion_data_path)
|
|
7
9
|
gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
|
|
8
10
|
end
|
|
9
11
|
|
data/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# lex-llm-ledger
|
|
2
2
|
|
|
3
3
|
LLM observability persistence for LegionIO. Consumes metering and audit messages from
|
|
4
|
-
AMQP queues, decrypts audit payloads, enforces retention policies, and writes
|
|
5
|
-
|
|
4
|
+
AMQP queues, decrypts audit payloads, enforces retention policies, and writes official
|
|
5
|
+
`legion-data` LLM lifecycle records for usage reporting and compliance.
|
|
6
6
|
|
|
7
7
|
## Queues Consumed
|
|
8
8
|
|
|
@@ -12,11 +12,61 @@ to a database for usage reporting and compliance.
|
|
|
12
12
|
| `llm.audit.prompts` | `llm.audit` (topic) | `audit.prompt.#` | Encrypted prompt+response pairs |
|
|
13
13
|
| `llm.audit.tools` | `llm.audit` (topic) | `audit.tool.#` | Encrypted tool call records |
|
|
14
14
|
|
|
15
|
-
## Tables
|
|
15
|
+
## Official Tables
|
|
16
16
|
|
|
17
|
-
- `
|
|
18
|
-
- `
|
|
19
|
-
- `
|
|
17
|
+
- `llm_conversations` - Conversation container and retention/classification metadata
|
|
18
|
+
- `llm_messages` - Model-visible user and assistant messages
|
|
19
|
+
- `llm_message_inference_requests` - Operation, correlation, request payload, and policy context
|
|
20
|
+
- `llm_message_inference_responses` - Provider, provider instance, model, dispatch path, visible response, and thinking payload
|
|
21
|
+
- `llm_message_inference_metrics` - Tokens, latency, cost, and finance allocation
|
|
22
|
+
- `llm_tool_calls` - Provider-requested tool call lineage
|
|
23
|
+
- `llm_registry_events` - Provider/model availability events
|
|
24
|
+
|
|
25
|
+
Prompt and metering consumers write the official lifecycle tables directly.
|
|
26
|
+
`llm_tool_records` and `llm_registry_availability_records` remain operational
|
|
27
|
+
projection tables while the official tool/registry event cutover continues.
|
|
28
|
+
The legacy backfill reconciles those rows into `llm_tool_calls` and
|
|
29
|
+
`llm_registry_events` when they can be linked to official inference responses.
|
|
30
|
+
Legacy-only prompt/metering writer mode hard-stops instead of silently writing
|
|
31
|
+
stale projections.
|
|
32
|
+
|
|
33
|
+
## Event Spine Target
|
|
34
|
+
|
|
35
|
+
The existing tables are useful reporting projections, but the uplift target is end-to-end visibility for every LLM-related lifecycle event. Ledger should add a canonical `llm_events` stream/table and keep `metering_records`, `prompt_records`, and `tool_records` as specialized query views or companion tables.
|
|
36
|
+
|
|
37
|
+
Every event should share these correlation keys:
|
|
38
|
+
|
|
39
|
+
- `conversation_id`
|
|
40
|
+
- `request_id`
|
|
41
|
+
- `exchange_id`
|
|
42
|
+
- `message_id`
|
|
43
|
+
- `parent_message_id`
|
|
44
|
+
- `message_seq`
|
|
45
|
+
- `correlation_id`
|
|
46
|
+
- `trace_id`
|
|
47
|
+
- `span_id`
|
|
48
|
+
- `event_id`
|
|
49
|
+
- `event_seq`
|
|
50
|
+
|
|
51
|
+
Event types should cover at least:
|
|
52
|
+
|
|
53
|
+
- request received, normalized, classified, enriched, and context-assembled
|
|
54
|
+
- routing candidates built, candidates excluded, offering selected, failover attempted, escalation attempted
|
|
55
|
+
- provider request started, provider response received, provider error/timeout/cancel
|
|
56
|
+
- response normalized, streamed chunk emitted, final response returned
|
|
57
|
+
- MCP/tool call planned, started, completed, failed, denied, or timed out
|
|
58
|
+
- fleet request published, broker accepted/unroutable, worker accepted, worker rejected, fleet response received
|
|
59
|
+
- metering emitted, audit emitted, ledger write queued, ledger write succeeded/failed/spooled
|
|
60
|
+
|
|
61
|
+
This lets operators reconstruct a conversation without replaying prompt bodies. Example: conversation `123` had 32 messages, one failed, five executed on Anthropic direct, four locally, the rest on GPU fleet, with per-step response time, token totals, cost allocation, and failover history.
|
|
62
|
+
|
|
63
|
+
Ledger has three distinct outputs:
|
|
64
|
+
|
|
65
|
+
1. **Legal/evidence reconstruction** - immutable, correlated, retention-controlled event evidence sufficient to answer a legal or security request. This favors completeness, ordering, integrity, and capture-mode correctness.
|
|
66
|
+
2. **Operational analytics** - structured projections for high-level patterns, cost, latency, quality, routing behavior, fleet utilization, tool usage, and failure rates. This favors queryability and aggregation without requiring raw prompt bodies.
|
|
67
|
+
3. **Governed training/evaluation datasets** - policy-approved derived datasets for model improvement, team/org use-case tuning, eval generation, routing-quality analysis, and tool-use learning. This must be derived from ledger events through explicit consent, classification, redaction/de-identification, retention, and export controls.
|
|
68
|
+
|
|
69
|
+
Training/eval export is not automatic reuse of raw audit. A future dataset builder should select eligible events, apply redaction and capture-mode policy, preserve provenance back to `event_id`/`conversation_id`, and write a dataset manifest that records data classes, consent basis, source filters, transform versions, and approval state.
|
|
20
70
|
|
|
21
71
|
## Key Design Decisions
|
|
22
72
|
|
|
@@ -24,29 +74,94 @@ to a database for usage reporting and compliance.
|
|
|
24
74
|
- **Passive exchange references** - does not declare `llm.metering` or `llm.audit` (owned by legion-llm)
|
|
25
75
|
- **DecryptionUnavailable causes NACK** - messages requeue until the node has Vault credentials
|
|
26
76
|
- **PHI TTL cap** - records flagged `contains_phi` are capped at 30 days regardless of retention label
|
|
27
|
-
- **Idempotent writes** - duplicate
|
|
77
|
+
- **Idempotent official writes** - duplicate request/response/message references resolve to existing official rows
|
|
78
|
+
|
|
79
|
+
## Routing Uplift Target
|
|
80
|
+
|
|
81
|
+
The 2026-04-25 `legion-llm` routing redesign moves routing to operation-aware model offerings. Ledger should persist the enriched metadata published by `legion-llm` without owning routing policy.
|
|
82
|
+
|
|
83
|
+
Target metering, prompt, and tool records should be able to store:
|
|
84
|
+
|
|
85
|
+
- selected offering identity: `offering_id`, `provider_family`, `instance_id`, `canonical_model`, `provider_model`, `operation`, `transport`, `region`, `endpoint_hash`
|
|
86
|
+
- routing details: requested route, selected route, excluded candidates, lateral failover chain, vertical escalation chain, and policy decisions
|
|
87
|
+
- identity details: caller principal/canonical name/kind/source, accepting runtime identity, executing runtime identity for fleet requests, fleet lane, fleet class, network boundary, placement policy, fleet correlation ID, hashed reply target, and credential lease/grant metadata
|
|
88
|
+
- token and cost allocation: conversation ID, input/output/total tokens, selected-offering cost, pricing tier, configured baseline/comparable provider cost, avoided cost, and aggregation keys for tier, fleet class, provider family, instance, model, transport, and lane
|
|
89
|
+
- compliance details: `contains_pii`, `contains_phi`, `contains_pci`, `data_classes`, `jurisdictions`, `retention_policy`, and `capture_mode`
|
|
90
|
+
- model provenance: management state, model depot registry ID, artifact digest, signature verification status, rollout ring, and approval state
|
|
91
|
+
- tool provenance: source type/server, policy tags, approval/denial state, redacted or hashed resource identifiers, and input/output classification flags
|
|
92
|
+
- registry/availability events: worker heartbeat, lane availability, offering availability, model sync state, degraded/draining/blocked transitions, and capacity changes from `llm.registry`
|
|
93
|
+
|
|
94
|
+
The uplift must validate the existing runners and migrations against this target. Current tables already capture core metering, prompt audit, and tool audit, but they need additional correlation fields, routing/offering fields, token context fields, cost allocation fields, identity/fleet fields, and event-spine coverage for request/response/MCP lifecycle events that are not prompt or tool records.
|
|
95
|
+
|
|
96
|
+
Audit capture modes expected from `legion-llm`:
|
|
97
|
+
|
|
98
|
+
- `none` - do not publish prompt/tool body audit
|
|
99
|
+
- `metadata_only` - store routing/classification/token/cost metadata only
|
|
100
|
+
- `redacted` - store redacted bodies plus redaction metadata
|
|
101
|
+
- `encrypted_raw` - store encrypted full payloads for approved consumers
|
|
102
|
+
- `raw` - plaintext full payloads for local/dev or explicitly approved environments
|
|
103
|
+
|
|
104
|
+
Prompt/tool audit should be durable. If transport is unavailable, `legion-llm` should spool audit records or use a durable local audit queue unless capture mode is `none` or policy explicitly allows best-effort audit.
|
|
105
|
+
|
|
106
|
+
For async `:fleet` inference, ledger records should preserve the original caller identity and record both runtimes: the process that accepted/enqueued the request and the worker process that executed the provider call. Fleet records should also persist the selected lane, worker fleet class (`endpoint`, `datacenter`, `cloud_vpc`, etc.), placement policy, and model provenance so investigators can tell whether a request ran on the caller's own machine, another endpoint, a datacenter GPU, or a cloud-adjacent worker. The raw RabbitMQ `reply_to` queue should remain transport-only; persisted records should use a stable hash plus the `correlation_id` for reconstruction.
|
|
107
|
+
|
|
108
|
+
Fleet registry history should arrive through RabbitMQ rather than endpoint workers writing directly to the database. `legion-llm` and provider workers publish availability events to `llm.registry`; ledger consumes those events and persists durable history for operator diagnostics, audit, and legal reconstruction.
|
|
109
|
+
|
|
110
|
+
Ledger should be able to answer spend-allocation questions without replaying raw prompts: how many input/output tokens a conversation used, how tokens split across Anthropic direct versus fleet GPU versus endpoint MacBook fleet, and estimated dollars saved by local/fleet execution compared with a configured cloud/frontier baseline.
|
|
111
|
+
|
|
112
|
+
Ledger is not on the LLM execution critical path. If the database is unavailable, ledger consumers should retry, requeue, DLQ, or spool according to transport policy while `legion-llm` continues routing and executing requests. Compliance profiles that require durable audit before response are the explicit exception and should fail closed upstream with a clear policy error.
|
|
28
113
|
|
|
29
114
|
## Requirements
|
|
30
115
|
|
|
31
|
-
- `legion-data` >= 1.
|
|
116
|
+
- `legion-data` >= 1.8.0 (official LLM lifecycle schema)
|
|
32
117
|
- `legion-json` >= 1.2 (JSON serialization)
|
|
33
|
-
- `legion-
|
|
118
|
+
- `legion-logging` >= 1.3 (structured exception logging)
|
|
119
|
+
- `legion-settings` >= 1.3 (extension-scoped retention settings)
|
|
120
|
+
- `legion-transport` >= 1.4.14 (AMQP transport)
|
|
34
121
|
- `legion-crypt` >= 1.5 (for decrypting audit messages, optional at runtime)
|
|
35
122
|
|
|
123
|
+
## Configuration
|
|
124
|
+
|
|
125
|
+
Ledger runs with safe defaults and reads extension settings from
|
|
126
|
+
`extensions.llm.ledger`:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"extensions": {
|
|
131
|
+
"llm": {
|
|
132
|
+
"ledger": {
|
|
133
|
+
"retention": {
|
|
134
|
+
"default_days": 90,
|
|
135
|
+
"phi_ttl_days": 30
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
`default_days` controls records with the `default` retention label. `phi_ttl_days`
|
|
144
|
+
caps PHI records even when the event asks for longer or permanent retention.
|
|
145
|
+
Encrypted audit messages must include an `iv` header; missing-IV messages are
|
|
146
|
+
rejected as malformed encrypted audit records rather than retried.
|
|
147
|
+
|
|
36
148
|
## Usage
|
|
37
149
|
|
|
38
150
|
```ruby
|
|
39
|
-
# Metering write (called by
|
|
40
|
-
Legion::Extensions::
|
|
151
|
+
# Metering write (called by Metering actor)
|
|
152
|
+
Legion::Extensions::Llm::Ledger::Runners::Metering.write_metering_record(payload, metadata)
|
|
41
153
|
|
|
42
154
|
# Usage summary
|
|
43
|
-
Legion::Extensions::
|
|
155
|
+
Legion::Extensions::Llm::Ledger::Runners::UsageReporter.summary(period: 'day', group_by: 'provider_instance')
|
|
44
156
|
|
|
45
157
|
# Budget check
|
|
46
|
-
Legion::Extensions::
|
|
158
|
+
Legion::Extensions::Llm::Ledger::Runners::UsageReporter.budget_check(budget_id: 'budget_q1', budget_usd: 100.0)
|
|
47
159
|
|
|
48
160
|
# Provider health
|
|
49
|
-
Legion::Extensions::
|
|
161
|
+
Legion::Extensions::Llm::Ledger::Runners::ProviderStats.health_report
|
|
162
|
+
|
|
163
|
+
# One-time legacy reconciliation
|
|
164
|
+
Legion::Extensions::Llm::Ledger::Backfill::LegacyLlmRecords.run
|
|
50
165
|
```
|
|
51
166
|
|
|
52
167
|
## Development
|
data/lex-llm-ledger.gemspec
CHANGED
|
@@ -28,12 +28,12 @@ Gem::Specification.new do |spec|
|
|
|
28
28
|
end
|
|
29
29
|
spec.require_paths = ['lib']
|
|
30
30
|
|
|
31
|
-
spec.add_dependency 'legion-data', '>= 1.
|
|
31
|
+
spec.add_dependency 'legion-data', '>= 1.8.0'
|
|
32
32
|
spec.add_dependency 'legion-json', '>= 1.2'
|
|
33
33
|
spec.add_dependency 'legion-logging', '>= 1.3'
|
|
34
34
|
spec.add_dependency 'legion-settings', '>= 1.3'
|
|
35
|
-
spec.add_dependency 'legion-transport', '>= 1.4'
|
|
36
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
35
|
+
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
36
|
+
spec.add_dependency 'lex-llm', '>= 0.4.0'
|
|
37
37
|
|
|
38
38
|
spec.add_development_dependency 'rspec'
|
|
39
39
|
spec.add_development_dependency 'rubocop'
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/actors/subscription'
|
|
4
|
+
require_relative '../helpers/subscription_actor'
|
|
4
5
|
|
|
5
6
|
module Legion
|
|
6
7
|
module Extensions
|
|
7
8
|
module Llm
|
|
8
9
|
module Ledger
|
|
9
10
|
module Actor
|
|
10
|
-
class
|
|
11
|
+
class Metering < Legion::Extensions::Actors::Subscription
|
|
12
|
+
include Helpers::SubscriptionActor
|
|
13
|
+
|
|
11
14
|
def runner_class = Legion::Extensions::Llm::Ledger::Runners::Metering
|
|
12
15
|
|
|
13
16
|
def runner_function
|
|
@@ -17,6 +20,10 @@ module Legion
|
|
|
17
20
|
def use_runner?
|
|
18
21
|
false
|
|
19
22
|
end
|
|
23
|
+
|
|
24
|
+
def queue
|
|
25
|
+
Legion::Extensions::Llm::Ledger::Transport::Queues::MeteringWrite
|
|
26
|
+
end
|
|
20
27
|
end
|
|
21
28
|
end
|
|
22
29
|
end
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/actors/subscription'
|
|
4
|
-
require_relative '../helpers/
|
|
4
|
+
require_relative '../helpers/subscription_actor'
|
|
5
5
|
|
|
6
6
|
module Legion
|
|
7
7
|
module Extensions
|
|
8
8
|
module Llm
|
|
9
9
|
module Ledger
|
|
10
10
|
module Actor
|
|
11
|
-
class
|
|
11
|
+
class Prompts < Legion::Extensions::Actors::Subscription
|
|
12
|
+
include Helpers::SubscriptionActor
|
|
13
|
+
|
|
12
14
|
def runner_class = Legion::Extensions::Llm::Ledger::Runners::Prompts
|
|
13
15
|
|
|
14
16
|
def runner_function
|
|
@@ -19,8 +21,8 @@ module Legion
|
|
|
19
21
|
false
|
|
20
22
|
end
|
|
21
23
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
+
def queue
|
|
25
|
+
Legion::Extensions::Llm::Ledger::Transport::Queues::AuditPrompts
|
|
24
26
|
end
|
|
25
27
|
end
|
|
26
28
|
end
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/actors/subscription'
|
|
4
|
-
require_relative '../helpers/
|
|
4
|
+
require_relative '../helpers/subscription_actor'
|
|
5
5
|
|
|
6
6
|
module Legion
|
|
7
7
|
module Extensions
|
|
8
8
|
module Llm
|
|
9
9
|
module Ledger
|
|
10
10
|
module Actor
|
|
11
|
-
class
|
|
11
|
+
class RegistryAvailability < Legion::Extensions::Actors::Subscription
|
|
12
|
+
include Helpers::SubscriptionActor
|
|
13
|
+
|
|
12
14
|
def runner_class = Legion::Extensions::Llm::Ledger::Runners::RegistryAvailability
|
|
13
15
|
|
|
14
16
|
def runner_function
|
|
@@ -19,8 +21,8 @@ module Legion
|
|
|
19
21
|
false
|
|
20
22
|
end
|
|
21
23
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
+
def queue
|
|
25
|
+
Legion::Extensions::Llm::Ledger::Transport::Queues::RegistryAvailability
|
|
24
26
|
end
|
|
25
27
|
end
|
|
26
28
|
end
|
|
@@ -24,7 +24,7 @@ module Legion
|
|
|
24
24
|
|
|
25
25
|
Legion::LLM::Metering.flush_spool
|
|
26
26
|
rescue StandardError => e
|
|
27
|
-
|
|
27
|
+
handle_exception(e, level: :warn, handled: true, operation: 'spool_flush')
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
def run_now?
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/actors/subscription'
|
|
4
|
-
require_relative '../helpers/
|
|
4
|
+
require_relative '../helpers/subscription_actor'
|
|
5
5
|
|
|
6
6
|
module Legion
|
|
7
7
|
module Extensions
|
|
8
8
|
module Llm
|
|
9
9
|
module Ledger
|
|
10
10
|
module Actor
|
|
11
|
-
class
|
|
11
|
+
class Tools < Legion::Extensions::Actors::Subscription
|
|
12
|
+
include Helpers::SubscriptionActor
|
|
13
|
+
|
|
12
14
|
def runner_class = Legion::Extensions::Llm::Ledger::Runners::Tools
|
|
13
15
|
|
|
14
16
|
def runner_function
|
|
@@ -19,8 +21,8 @@ module Legion
|
|
|
19
21
|
false
|
|
20
22
|
end
|
|
21
23
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
+
def queue
|
|
25
|
+
Legion::Extensions::Llm::Ledger::Transport::Queues::AuditTools
|
|
24
26
|
end
|
|
25
27
|
end
|
|
26
28
|
end
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/logging'
|
|
4
|
+
require_relative '../helpers/json'
|
|
5
|
+
require_relative '../helpers/persistence_logging'
|
|
6
|
+
|
|
7
|
+
module Legion
|
|
8
|
+
module Extensions
|
|
9
|
+
module Llm
|
|
10
|
+
module Ledger
|
|
11
|
+
module Backfill
|
|
12
|
+
module LegacyLlmRecords
|
|
13
|
+
extend Legion::Logging::Helper
|
|
14
|
+
|
|
15
|
+
LEGACY_TABLES = %i[
|
|
16
|
+
llm_prompt_records
|
|
17
|
+
llm_metering_records
|
|
18
|
+
llm_tool_records
|
|
19
|
+
llm_registry_availability_records
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
module_function
|
|
23
|
+
|
|
24
|
+
def run(limit: nil, writer_mode: :official)
|
|
25
|
+
ensure_no_legacy_writer_mode!(writer_mode)
|
|
26
|
+
|
|
27
|
+
LEGACY_TABLES.to_h do |table|
|
|
28
|
+
[table, table_present?(table) ? backfill_table(table, limit:) : 0]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def ensure_no_legacy_writer_mode!(mode)
|
|
33
|
+
return unless %i[legacy legacy_only legacy_table_only].include?(mode.to_sym)
|
|
34
|
+
|
|
35
|
+
raise ArgumentError, 'Legacy LLM writer mode is disabled after official backfill; configure official LLM writers.'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def backfill_table(table, limit:)
|
|
39
|
+
dataset = db[table].order(:id)
|
|
40
|
+
dataset = dataset.limit(limit) if limit
|
|
41
|
+
dataset.all.sum { |row| backfill_row(table, row) }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def backfill_row(table, row)
|
|
45
|
+
case table
|
|
46
|
+
when :llm_prompt_records
|
|
47
|
+
backfill_prompt(row)
|
|
48
|
+
when :llm_metering_records
|
|
49
|
+
backfill_metering(row)
|
|
50
|
+
when :llm_tool_records
|
|
51
|
+
backfill_tool(row)
|
|
52
|
+
when :llm_registry_availability_records
|
|
53
|
+
backfill_registry(row)
|
|
54
|
+
end
|
|
55
|
+
rescue Sequel::UniqueConstraintViolation => e
|
|
56
|
+
handle_exception(e, level: :warn, handled: true, operation: 'legacy_llm_backfill.duplicate')
|
|
57
|
+
0
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def backfill_prompt(row)
|
|
61
|
+
payload = prompt_payload(row)
|
|
62
|
+
return 0 if official_metric_exists?(payload)
|
|
63
|
+
|
|
64
|
+
Writers::OfficialPromptWriter.write(payload)
|
|
65
|
+
1
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def backfill_metering(row)
|
|
69
|
+
payload = metering_payload(row)
|
|
70
|
+
return 0 if official_metric_exists?(payload)
|
|
71
|
+
|
|
72
|
+
Writers::OfficialMeteringWriter.write(payload)
|
|
73
|
+
1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def prompt_payload(row)
|
|
77
|
+
{
|
|
78
|
+
message_id: row[:message_id],
|
|
79
|
+
correlation_id: row[:correlation_id],
|
|
80
|
+
conversation_id: row[:conversation_id],
|
|
81
|
+
response_message_id: row[:response_message_id],
|
|
82
|
+
request_id: row[:request_id],
|
|
83
|
+
exchange_id: row[:exchange_id],
|
|
84
|
+
operation: row[:request_type],
|
|
85
|
+
provider: row[:provider],
|
|
86
|
+
model_id: row[:model_id],
|
|
87
|
+
tier: row[:tier],
|
|
88
|
+
request: json_load(row[:request_json]),
|
|
89
|
+
response: json_load(row[:response_json]),
|
|
90
|
+
response_thinking: json_load(row[:response_thinking_json]),
|
|
91
|
+
input_tokens: row[:input_tokens],
|
|
92
|
+
output_tokens: row[:output_tokens],
|
|
93
|
+
total_tokens: row[:total_tokens],
|
|
94
|
+
cost_usd: row[:cost_usd],
|
|
95
|
+
classification_level: row[:classification_level],
|
|
96
|
+
contains_phi: row[:contains_phi],
|
|
97
|
+
contains_pii: row[:contains_pii],
|
|
98
|
+
retention_policy: row[:retention_policy],
|
|
99
|
+
expires_at: row[:expires_at],
|
|
100
|
+
recorded_at: row[:recorded_at]
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def metering_payload(row)
|
|
105
|
+
{
|
|
106
|
+
message_id: row[:message_id],
|
|
107
|
+
correlation_id: row[:correlation_id],
|
|
108
|
+
conversation_id: row[:conversation_id],
|
|
109
|
+
request_id: row[:request_id],
|
|
110
|
+
exchange_id: row[:exchange_id],
|
|
111
|
+
operation: row[:request_type],
|
|
112
|
+
provider: row[:provider],
|
|
113
|
+
provider_instance: row[:worker_id],
|
|
114
|
+
model_id: row[:model_id],
|
|
115
|
+
tier: row[:tier],
|
|
116
|
+
input_tokens: row[:input_tokens],
|
|
117
|
+
output_tokens: row[:output_tokens],
|
|
118
|
+
thinking_tokens: row[:thinking_tokens],
|
|
119
|
+
total_tokens: row[:total_tokens],
|
|
120
|
+
latency_ms: row[:latency_ms],
|
|
121
|
+
wall_clock_ms: row[:wall_clock_ms],
|
|
122
|
+
cost_usd: row[:cost_usd],
|
|
123
|
+
recorded_at: row[:recorded_at],
|
|
124
|
+
billing: {
|
|
125
|
+
cost_center: row[:cost_center],
|
|
126
|
+
budget_id: row[:budget_id]
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def backfill_tool(row)
|
|
132
|
+
response = response_for_request(row[:request_id])
|
|
133
|
+
return 0 unless response
|
|
134
|
+
|
|
135
|
+
tool_uuid = Writers::OfficialRecordWriter.stable_uuid(row[:tool_call_id] || row[:message_id])
|
|
136
|
+
return 0 if db[:llm_tool_calls].where(uuid: tool_uuid).first
|
|
137
|
+
|
|
138
|
+
insert_row(:llm_tool_calls, {
|
|
139
|
+
uuid: tool_uuid,
|
|
140
|
+
message_inference_response_id: response[:id],
|
|
141
|
+
tool_call_index: next_tool_index(response[:id]),
|
|
142
|
+
provider_tool_call_ref: row[:tool_call_id],
|
|
143
|
+
tool_name: row[:tool_name],
|
|
144
|
+
tool_source_type: row[:tool_source_type],
|
|
145
|
+
tool_source_server: row[:tool_source_server],
|
|
146
|
+
status: row[:tool_status],
|
|
147
|
+
requested_at: row[:tool_start_at],
|
|
148
|
+
completed_at: row[:tool_end_at],
|
|
149
|
+
inserted_at: Time.now.utc
|
|
150
|
+
}, operation: 'legacy_llm_backfill.tool_call')
|
|
151
|
+
1
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def backfill_registry(row)
|
|
155
|
+
uuid = Writers::OfficialRecordWriter.stable_uuid(row[:event_id] || row[:message_id])
|
|
156
|
+
return 0 if db[:llm_registry_events].where(uuid: uuid).first
|
|
157
|
+
|
|
158
|
+
insert_row(:llm_registry_events, {
|
|
159
|
+
uuid: uuid,
|
|
160
|
+
provider: row[:provider_family],
|
|
161
|
+
model_key: row[:model_id],
|
|
162
|
+
event_type: row[:event_type],
|
|
163
|
+
status: registry_status(row),
|
|
164
|
+
reason: row[:metadata_json],
|
|
165
|
+
recorded_at: row[:occurred_at],
|
|
166
|
+
inserted_at: Time.now.utc
|
|
167
|
+
}, operation: 'legacy_llm_backfill.registry_event')
|
|
168
|
+
1
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def insert_row(table, attributes, operation:)
|
|
172
|
+
Helpers::PersistenceLogging.insert_row(db, table, attributes, operation: operation)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def response_for_request(request_id)
|
|
176
|
+
request = db[:llm_message_inference_requests].where(request_ref: request_id).first
|
|
177
|
+
return nil unless request
|
|
178
|
+
|
|
179
|
+
db[:llm_message_inference_responses].where(message_inference_request_id: request[:id]).first
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def official_metric_exists?(payload)
|
|
183
|
+
db[:llm_message_inference_metrics].where(uuid: official_metric_uuid(payload)).first
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def official_metric_uuid(payload)
|
|
187
|
+
ref = payload[:message_id] || "metric:#{Writers::OfficialRecordWriter.request_ref(payload)}"
|
|
188
|
+
Writers::OfficialRecordWriter.stable_uuid(ref)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def next_tool_index(response_id)
|
|
192
|
+
db[:llm_tool_calls].where(message_inference_response_id: response_id).max(:tool_call_index).to_i + 1
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def registry_status(row)
|
|
196
|
+
health = json_load(row[:health_json])
|
|
197
|
+
health[:status] || health['status'] || row[:event_type] || 'unknown'
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def table_present?(table)
|
|
201
|
+
db.table_exists?(table)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def db
|
|
205
|
+
::Legion::Data.connection
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def json_load(value)
|
|
209
|
+
return {} if value.nil? || value.to_s.empty?
|
|
210
|
+
|
|
211
|
+
Helpers::Json.load(value)
|
|
212
|
+
rescue StandardError => e
|
|
213
|
+
raise unless Helpers::Json.parse_error?(e)
|
|
214
|
+
|
|
215
|
+
handle_exception(e, level: :warn, handled: true, operation: 'legacy_llm_backfill.json_load')
|
|
216
|
+
{ content: value }
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
rename_table :metering_records, :llm_metering_records
|
|
6
|
+
rename_table :prompt_records, :llm_prompt_records
|
|
7
|
+
rename_table :tool_records, :llm_tool_records
|
|
8
|
+
rename_table :registry_availability_records, :llm_registry_availability_records
|
|
9
|
+
|
|
10
|
+
alter_table(:llm_metering_records) do
|
|
11
|
+
set_column_allow_null :correlation_id
|
|
12
|
+
set_column_allow_null :conversation_id
|
|
13
|
+
set_column_allow_null :message_id_ctx
|
|
14
|
+
set_column_allow_null :request_id
|
|
15
|
+
set_column_allow_null :tier
|
|
16
|
+
set_column_allow_null :provider
|
|
17
|
+
set_column_allow_null :node_id
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
down do
|
|
22
|
+
alter_table(:llm_metering_records) do
|
|
23
|
+
set_column_not_null :correlation_id
|
|
24
|
+
set_column_not_null :conversation_id
|
|
25
|
+
set_column_not_null :message_id_ctx
|
|
26
|
+
set_column_not_null :request_id
|
|
27
|
+
set_column_not_null :tier
|
|
28
|
+
set_column_not_null :provider
|
|
29
|
+
set_column_not_null :node_id
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
rename_table :llm_metering_records, :metering_records
|
|
33
|
+
rename_table :llm_prompt_records, :prompt_records
|
|
34
|
+
rename_table :llm_tool_records, :tool_records
|
|
35
|
+
rename_table :llm_registry_availability_records, :registry_availability_records
|
|
36
|
+
end
|
|
37
|
+
end
|