lex-confabulation 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +15 -0
- data/LICENSE +21 -0
- data/README.md +71 -0
- data/lex-confabulation.gemspec +29 -0
- data/lib/legion/extensions/confabulation/actors/decay.rb +41 -0
- data/lib/legion/extensions/confabulation/client.rb +24 -0
- data/lib/legion/extensions/confabulation/helpers/claim.rb +63 -0
- data/lib/legion/extensions/confabulation/helpers/confabulation_engine.rb +116 -0
- data/lib/legion/extensions/confabulation/helpers/constants.rb +25 -0
- data/lib/legion/extensions/confabulation/runners/confabulation.rb +70 -0
- data/lib/legion/extensions/confabulation/version.rb +9 -0
- data/lib/legion/extensions/confabulation.rb +15 -0
- data/spec/legion/extensions/confabulation/client_spec.rb +34 -0
- data/spec/legion/extensions/confabulation/helpers/claim_spec.rb +119 -0
- data/spec/legion/extensions/confabulation/helpers/confabulation_engine_spec.rb +163 -0
- data/spec/legion/extensions/confabulation/helpers/constants_spec.rb +55 -0
- data/spec/legion/extensions/confabulation/runners/confabulation_spec.rb +119 -0
- data/spec/spec_helper.rb +20 -0
- metadata +79 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ee2c37bdb8c637bc9e4668ebd47d457a5b8deea44bba412c42c35752ac8f1cc8
|
|
4
|
+
data.tar.gz: 47875db096f924f3298fa4880588c3990ae219c3823d13277b1431939e91aac7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 8cb8165024fd17dfba7de1aad74eab7f6484aad5be58da44df11cba8b9536265be446ceacbcd5223058ab8bb52e9af0b72763d866ca7e1670f400c0a7c5f3c72
|
|
7
|
+
data.tar.gz: 5b8eb4dad3bf6866806538b7a363d1a112c4c2b5f261410ac821fc7a6cb039dea6bc7639dd5a7785e46dbf3ae4ba84a296be9000abaf9b0f45602c6e029892a4
|
data/Gemfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
source 'https://rubygems.org'
|
|
4
|
+
gemspec
|
|
5
|
+
|
|
6
|
+
group :test do
|
|
7
|
+
gem 'rake'
|
|
8
|
+
gem 'rspec'
|
|
9
|
+
gem 'rspec_junit_formatter'
|
|
10
|
+
gem 'rubocop', require: false
|
|
11
|
+
gem 'rubocop-rspec', require: false
|
|
12
|
+
gem 'simplecov'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
gem 'legion-gaia', path: '../../legion-gaia'
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Matthew Iverson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# lex-confabulation
|
|
2
|
+
|
|
3
|
+
A LegionIO cognitive architecture extension for confabulation detection and confidence calibration. Confabulation is the cognitive phenomenon of producing plausible but false narratives to fill gaps — occurring when an agent's confidence in a claim exceeds the evidence supporting it. This extension tracks cognitive claims, monitors the confidence-evidence gap, and surfaces high-risk assertions before they influence downstream decisions.
|
|
4
|
+
|
|
5
|
+
## What It Does
|
|
6
|
+
|
|
7
|
+
Manages a registry of **claims** — assertions made by the agent with an associated confidence level and an evidence strength rating. The gap between these two values is the **confabulation risk**.
|
|
8
|
+
|
|
9
|
+
- **Register claims** as the agent makes assertions, capturing confidence and evidence strength at the time
|
|
10
|
+
- **Verify claims** when supporting evidence is confirmed
|
|
11
|
+
- **Flag confabulation** when a claim is later found to be false or unsupported
|
|
12
|
+
- **Monitor calibration** — the average alignment between confidence and evidence across all claims
|
|
13
|
+
- **Surface high-risk claims** — those where confidence significantly exceeds evidence (risk >= 0.6)
|
|
14
|
+
|
|
15
|
+
A periodic `Decay` actor fires `confabulation_report` every 300 seconds for ongoing monitoring.
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```ruby
|
|
20
|
+
require 'lex-confabulation'
|
|
21
|
+
|
|
22
|
+
client = Legion::Extensions::Confabulation::Client.new
|
|
23
|
+
|
|
24
|
+
# Register a claim made by the agent
|
|
25
|
+
claim = client.register_claim(
|
|
26
|
+
content: 'The database migration completed successfully',
|
|
27
|
+
claim_type: :factual,
|
|
28
|
+
confidence: 0.9,
|
|
29
|
+
evidence_strength: 0.4
|
|
30
|
+
)
|
|
31
|
+
# => { id: "uuid...", content: "...", claim_type: :factual,
|
|
32
|
+
# confidence: 0.9, evidence_strength: 0.4,
|
|
33
|
+
# confabulation_risk: 0.5, risk_label: :moderate,
|
|
34
|
+
# verified: false, confabulated: false, created_at: "..." }
|
|
35
|
+
|
|
36
|
+
claim_id = claim[:id]
|
|
37
|
+
|
|
38
|
+
# Verify a claim when evidence is confirmed
|
|
39
|
+
client.verify_claim(claim_id: claim_id)
|
|
40
|
+
# => { found: true, claim_id: "uuid...", verified: true }
|
|
41
|
+
|
|
42
|
+
# Flag a claim as confabulation when found to be false
|
|
43
|
+
client.flag_confabulation(claim_id: claim_id)
|
|
44
|
+
# => { found: true, claim_id: "uuid...", confabulated: true }
|
|
45
|
+
|
|
46
|
+
# Retrieve all claims with high confabulation risk (risk >= 0.6)
|
|
47
|
+
client.high_risk_claims
|
|
48
|
+
# => { claims: [...], count: 1 }
|
|
49
|
+
|
|
50
|
+
# Full report across all claims
|
|
51
|
+
client.confabulation_report
|
|
52
|
+
# => { total_claims: 5, high_risk_claims: 1, verified_claims: 2,
|
|
53
|
+
# confabulated_claims: 1, confabulation_rate: 0.2,
|
|
54
|
+
# average_calibration: 0.75, overall_risk: 0.2, risk_label: :low }
|
|
55
|
+
|
|
56
|
+
# Engine summary
|
|
57
|
+
client.confabulation_status
|
|
58
|
+
# => { engine: { claim_count: 5, confabulation_rate: 0.2, average_calibration: 0.75 } }
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Development
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
bundle install
|
|
65
|
+
bundle exec rspec
|
|
66
|
+
bundle exec rubocop
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
MIT
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/legion/extensions/confabulation/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'lex-confabulation'
|
|
7
|
+
spec.version = Legion::Extensions::Confabulation::VERSION
|
|
8
|
+
spec.authors = ['Esity']
|
|
9
|
+
spec.email = ['matthewdiverson@gmail.com']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'LEX Confabulation'
|
|
12
|
+
spec.description = 'Confabulation detection and confidence calibration for brain-modeled agentic AI'
|
|
13
|
+
spec.homepage = 'https://github.com/LegionIO/lex-confabulation'
|
|
14
|
+
spec.license = 'MIT'
|
|
15
|
+
spec.required_ruby_version = '>= 3.4'
|
|
16
|
+
|
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
18
|
+
spec.metadata['source_code_uri'] = 'https://github.com/LegionIO/lex-confabulation'
|
|
19
|
+
spec.metadata['documentation_uri'] = 'https://github.com/LegionIO/lex-confabulation'
|
|
20
|
+
spec.metadata['changelog_uri'] = 'https://github.com/LegionIO/lex-confabulation'
|
|
21
|
+
spec.metadata['bug_tracker_uri'] = 'https://github.com/LegionIO/lex-confabulation/issues'
|
|
22
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
23
|
+
|
|
24
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
25
|
+
Dir.glob('{lib,spec}/**/*') + %w[lex-confabulation.gemspec Gemfile LICENSE README.md]
|
|
26
|
+
end
|
|
27
|
+
spec.require_paths = ['lib']
|
|
28
|
+
spec.add_development_dependency 'legion-gaia'
|
|
29
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/every'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Confabulation
|
|
8
|
+
module Actor
|
|
9
|
+
class Decay < Legion::Extensions::Actors::Every
|
|
10
|
+
def runner_class
|
|
11
|
+
Legion::Extensions::Confabulation::Runners::Confabulation
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def runner_function
|
|
15
|
+
'confabulation_report'
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def time
|
|
19
|
+
300
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def run_now?
|
|
23
|
+
false
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def use_runner?
|
|
27
|
+
false
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def check_subtask?
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def generate_task?
|
|
35
|
+
false
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/confabulation/helpers/constants'
|
|
4
|
+
require 'legion/extensions/confabulation/helpers/claim'
|
|
5
|
+
require 'legion/extensions/confabulation/helpers/confabulation_engine'
|
|
6
|
+
require 'legion/extensions/confabulation/runners/confabulation'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Confabulation
|
|
11
|
+
class Client
|
|
12
|
+
include Runners::Confabulation
|
|
13
|
+
|
|
14
|
+
def initialize(**)
|
|
15
|
+
@confabulation_engine = Helpers::ConfabulationEngine.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
attr_reader :confabulation_engine
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Confabulation
|
|
8
|
+
module Helpers
|
|
9
|
+
class Claim
|
|
10
|
+
attr_reader :id, :content, :claim_type, :confidence, :evidence_strength,
|
|
11
|
+
:verified, :confabulated, :created_at
|
|
12
|
+
|
|
13
|
+
def initialize(content:, claim_type:, confidence:, evidence_strength:)
|
|
14
|
+
@id = SecureRandom.uuid
|
|
15
|
+
@content = content
|
|
16
|
+
@claim_type = claim_type
|
|
17
|
+
@confidence = confidence.clamp(0.0, 1.0)
|
|
18
|
+
@evidence_strength = evidence_strength.clamp(0.0, 1.0)
|
|
19
|
+
@verified = false
|
|
20
|
+
@confabulated = false
|
|
21
|
+
@created_at = Time.now.utc
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def confabulation_risk
|
|
25
|
+
(confidence - evidence_strength).clamp(0.0, 1.0)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def verify!
|
|
29
|
+
@verified = true
|
|
30
|
+
self
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def mark_confabulated!
|
|
34
|
+
@confabulated = true
|
|
35
|
+
self
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def risk_label
|
|
39
|
+
Constants::RISK_LABELS.each do |range, label|
|
|
40
|
+
return label if range.cover?(confabulation_risk)
|
|
41
|
+
end
|
|
42
|
+
:extreme
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def to_h
|
|
46
|
+
{
|
|
47
|
+
id: id,
|
|
48
|
+
content: content,
|
|
49
|
+
claim_type: claim_type,
|
|
50
|
+
confidence: confidence.round(10),
|
|
51
|
+
evidence_strength: evidence_strength.round(10),
|
|
52
|
+
confabulation_risk: confabulation_risk.round(10),
|
|
53
|
+
risk_label: risk_label,
|
|
54
|
+
verified: verified,
|
|
55
|
+
confabulated: confabulated,
|
|
56
|
+
created_at: created_at.iso8601
|
|
57
|
+
}
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Confabulation
|
|
6
|
+
module Helpers
|
|
7
|
+
class ConfabulationEngine
|
|
8
|
+
attr_reader :claims
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@claims = {}
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def register_claim(content:, claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
15
|
+
claim_type = claim_type.to_sym
|
|
16
|
+
claim_type = :factual unless Constants::CLAIM_TYPES.include?(claim_type)
|
|
17
|
+
|
|
18
|
+
claim = Claim.new(
|
|
19
|
+
content: content,
|
|
20
|
+
claim_type: claim_type,
|
|
21
|
+
confidence: confidence,
|
|
22
|
+
evidence_strength: evidence_strength
|
|
23
|
+
)
|
|
24
|
+
prune_if_needed
|
|
25
|
+
@claims[claim.id] = claim
|
|
26
|
+
claim
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def verify_claim(claim_id:)
|
|
30
|
+
claim = @claims[claim_id]
|
|
31
|
+
return { found: false, claim_id: claim_id } unless claim
|
|
32
|
+
|
|
33
|
+
claim.verify!
|
|
34
|
+
{ found: true, claim_id: claim_id, verified: true }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def flag_confabulation(claim_id:)
|
|
38
|
+
claim = @claims[claim_id]
|
|
39
|
+
return { found: false, claim_id: claim_id } unless claim
|
|
40
|
+
|
|
41
|
+
claim.mark_confabulated!
|
|
42
|
+
{ found: true, claim_id: claim_id, confabulated: true }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def high_risk_claims
|
|
46
|
+
@claims.values.select { |c| c.confabulation_risk >= Constants::CONFABULATION_THRESHOLD }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def verified_claims
|
|
50
|
+
@claims.values.select(&:verified)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def confabulation_rate
|
|
54
|
+
total = @claims.size
|
|
55
|
+
return 0.0 if total.zero?
|
|
56
|
+
|
|
57
|
+
flagged = @claims.values.count(&:confabulated)
|
|
58
|
+
(flagged.to_f / total).round(10)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def average_calibration
|
|
62
|
+
return 0.0 if @claims.empty?
|
|
63
|
+
|
|
64
|
+
total_gap = @claims.values.sum { |c| (c.confidence - c.evidence_strength).abs }
|
|
65
|
+
gap = total_gap / @claims.size.to_f
|
|
66
|
+
(1.0 - gap).clamp(0.0, 1.0).round(10)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def confabulation_report
|
|
70
|
+
total = @claims.size
|
|
71
|
+
high_risk = high_risk_claims.size
|
|
72
|
+
verified = verified_claims.size
|
|
73
|
+
confabulated = @claims.values.count(&:confabulated)
|
|
74
|
+
overall_risk = total.zero? ? 0.0 : high_risk.to_f / total
|
|
75
|
+
risk_label = risk_label_for(overall_risk)
|
|
76
|
+
|
|
77
|
+
{
|
|
78
|
+
total_claims: total,
|
|
79
|
+
high_risk_claims: high_risk,
|
|
80
|
+
verified_claims: verified,
|
|
81
|
+
confabulated_claims: confabulated,
|
|
82
|
+
confabulation_rate: confabulation_rate,
|
|
83
|
+
average_calibration: average_calibration,
|
|
84
|
+
overall_risk: overall_risk.round(10),
|
|
85
|
+
risk_label: risk_label
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def prune_if_needed
|
|
90
|
+
return unless @claims.size >= Constants::MAX_CLAIMS
|
|
91
|
+
|
|
92
|
+
oldest_key = @claims.min_by { |_, c| c.created_at }&.first
|
|
93
|
+
@claims.delete(oldest_key)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def to_h
|
|
97
|
+
{
|
|
98
|
+
claim_count: @claims.size,
|
|
99
|
+
confabulation_rate: confabulation_rate,
|
|
100
|
+
average_calibration: average_calibration
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def risk_label_for(value)
|
|
107
|
+
Constants::RISK_LABELS.each do |range, label|
|
|
108
|
+
return label if range.cover?(value)
|
|
109
|
+
end
|
|
110
|
+
:extreme
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Confabulation
|
|
6
|
+
module Helpers
|
|
7
|
+
module Constants
|
|
8
|
+
MAX_CLAIMS = 500
|
|
9
|
+
CONFABULATION_THRESHOLD = 0.6
|
|
10
|
+
EVIDENCE_DECAY = 0.02
|
|
11
|
+
|
|
12
|
+
RISK_LABELS = {
|
|
13
|
+
0.0..0.2 => :minimal,
|
|
14
|
+
0.2..0.4 => :low,
|
|
15
|
+
0.4..0.6 => :moderate,
|
|
16
|
+
0.6..0.8 => :high,
|
|
17
|
+
0.8..1.0 => :extreme
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
CLAIM_TYPES = %i[factual causal explanatory predictive autobiographical].freeze
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Confabulation
|
|
6
|
+
module Runners
|
|
7
|
+
module Confabulation
|
|
8
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
9
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
10
|
+
|
|
11
|
+
def register_claim(content:, claim_type: :factual, confidence: 0.5, evidence_strength: 0.5, **)
|
|
12
|
+
claim = confabulation_engine.register_claim(
|
|
13
|
+
content: content,
|
|
14
|
+
claim_type: claim_type,
|
|
15
|
+
confidence: confidence,
|
|
16
|
+
evidence_strength: evidence_strength
|
|
17
|
+
)
|
|
18
|
+
Legion::Logging.debug "[confabulation] register: id=#{claim.id} type=#{claim.claim_type} " \
|
|
19
|
+
"risk=#{claim.confabulation_risk.round(2)} label=#{claim.risk_label}"
|
|
20
|
+
claim.to_h
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def verify_claim(claim_id:, **)
|
|
24
|
+
result = confabulation_engine.verify_claim(claim_id: claim_id)
|
|
25
|
+
if result[:found]
|
|
26
|
+
Legion::Logging.info "[confabulation] verified: claim_id=#{claim_id}"
|
|
27
|
+
else
|
|
28
|
+
Legion::Logging.debug "[confabulation] verify: claim_id=#{claim_id} not found"
|
|
29
|
+
end
|
|
30
|
+
result
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def flag_confabulation(claim_id:, **)
|
|
34
|
+
result = confabulation_engine.flag_confabulation(claim_id: claim_id)
|
|
35
|
+
if result[:found]
|
|
36
|
+
Legion::Logging.warn "[confabulation] flagged: claim_id=#{claim_id} marked as confabulated"
|
|
37
|
+
else
|
|
38
|
+
Legion::Logging.debug "[confabulation] flag: claim_id=#{claim_id} not found"
|
|
39
|
+
end
|
|
40
|
+
result
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def confabulation_report(**)
|
|
44
|
+
report = confabulation_engine.confabulation_report
|
|
45
|
+
Legion::Logging.debug "[confabulation] report: total=#{report[:total_claims]} " \
|
|
46
|
+
"high_risk=#{report[:high_risk_claims]} " \
|
|
47
|
+
"rate=#{report[:confabulation_rate].round(2)} label=#{report[:risk_label]}"
|
|
48
|
+
report
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def high_risk_claims(**)
|
|
52
|
+
claims = confabulation_engine.high_risk_claims
|
|
53
|
+
Legion::Logging.debug "[confabulation] high_risk_claims: count=#{claims.size}"
|
|
54
|
+
{ claims: claims.map(&:to_h), count: claims.size }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def confabulation_status(**)
|
|
58
|
+
{ engine: confabulation_engine.to_h }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def confabulation_engine
|
|
64
|
+
@confabulation_engine ||= Helpers::ConfabulationEngine.new
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/confabulation/version'
|
|
4
|
+
require 'legion/extensions/confabulation/helpers/constants'
|
|
5
|
+
require 'legion/extensions/confabulation/helpers/claim'
|
|
6
|
+
require 'legion/extensions/confabulation/helpers/confabulation_engine'
|
|
7
|
+
require 'legion/extensions/confabulation/runners/confabulation'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Confabulation
|
|
12
|
+
extend Legion::Extensions::Core if Legion::Extensions.const_defined? :Core
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/confabulation/client'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Confabulation::Client do
|
|
6
|
+
let(:client) { described_class.new }
|
|
7
|
+
|
|
8
|
+
it 'responds to confabulation runner methods' do
|
|
9
|
+
expect(client).to respond_to(:register_claim)
|
|
10
|
+
expect(client).to respond_to(:verify_claim)
|
|
11
|
+
expect(client).to respond_to(:flag_confabulation)
|
|
12
|
+
expect(client).to respond_to(:confabulation_report)
|
|
13
|
+
expect(client).to respond_to(:high_risk_claims)
|
|
14
|
+
expect(client).to respond_to(:confabulation_status)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'starts with an empty engine' do
|
|
18
|
+
status = client.confabulation_status
|
|
19
|
+
expect(status[:engine][:claim_count]).to eq(0)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'persists claims across calls within the same instance' do
|
|
23
|
+
client.register_claim(content: 'persistent', claim_type: :factual,
|
|
24
|
+
confidence: 0.7, evidence_strength: 0.3)
|
|
25
|
+
expect(client.confabulation_status[:engine][:claim_count]).to eq(1)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'maintains separate state between instances' do
|
|
29
|
+
client.register_claim(content: 'instance A claim', claim_type: :factual,
|
|
30
|
+
confidence: 0.7, evidence_strength: 0.3)
|
|
31
|
+
other = described_class.new
|
|
32
|
+
expect(other.confabulation_status[:engine][:claim_count]).to eq(0)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Legion::Extensions::Confabulation::Helpers::Claim do
|
|
4
|
+
let(:claim) do
|
|
5
|
+
described_class.new(
|
|
6
|
+
content: 'The sky is green',
|
|
7
|
+
claim_type: :factual,
|
|
8
|
+
confidence: 0.8,
|
|
9
|
+
evidence_strength: 0.3
|
|
10
|
+
)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
describe '#initialize' do
|
|
14
|
+
it 'generates a UUID id' do
|
|
15
|
+
expect(claim.id).to match(/\A[0-9a-f-]{36}\z/)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'stores content' do
|
|
19
|
+
expect(claim.content).to eq('The sky is green')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'stores claim_type' do
|
|
23
|
+
expect(claim.claim_type).to eq(:factual)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it 'clamps confidence to [0, 1]' do
|
|
27
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 1.5, evidence_strength: 0.0)
|
|
28
|
+
expect(c.confidence).to eq(1.0)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it 'clamps evidence_strength to [0, 1]' do
|
|
32
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 0.5, evidence_strength: -0.5)
|
|
33
|
+
expect(c.evidence_strength).to eq(0.0)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it 'starts unverified' do
|
|
37
|
+
expect(claim.verified).to be false
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it 'starts not confabulated' do
|
|
41
|
+
expect(claim.confabulated).to be false
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it 'sets created_at to utc time' do
|
|
45
|
+
expect(claim.created_at).to be_a(Time)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe '#confabulation_risk' do
|
|
50
|
+
it 'computes the gap between confidence and evidence_strength' do
|
|
51
|
+
expect(claim.confabulation_risk).to be_within(0.001).of(0.5)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'clamps to 0.0 when evidence >= confidence' do
|
|
55
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 0.3, evidence_strength: 0.9)
|
|
56
|
+
expect(c.confabulation_risk).to eq(0.0)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'clamps to 1.0 when gap exceeds 1.0' do
|
|
60
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 1.0, evidence_strength: 0.0)
|
|
61
|
+
expect(c.confabulation_risk).to eq(1.0)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
describe '#verify!' do
|
|
66
|
+
it 'marks the claim as verified' do
|
|
67
|
+
claim.verify!
|
|
68
|
+
expect(claim.verified).to be true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'returns self' do
|
|
72
|
+
expect(claim.verify!).to eq(claim)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
describe '#mark_confabulated!' do
|
|
77
|
+
it 'marks the claim as confabulated' do
|
|
78
|
+
claim.mark_confabulated!
|
|
79
|
+
expect(claim.confabulated).to be true
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'returns self' do
|
|
83
|
+
expect(claim.mark_confabulated!).to eq(claim)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
describe '#risk_label' do
|
|
88
|
+
it 'returns :extreme for risk >= 0.8' do
|
|
89
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 1.0, evidence_strength: 0.1)
|
|
90
|
+
expect(c.risk_label).to eq(:extreme)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'returns :minimal for risk <= 0.2' do
|
|
94
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 0.2, evidence_strength: 0.1)
|
|
95
|
+
expect(c.risk_label).to eq(:minimal)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it 'returns :moderate for mid-range risk' do
|
|
99
|
+
c = described_class.new(content: 'x', claim_type: :factual, confidence: 0.7, evidence_strength: 0.25)
|
|
100
|
+
expect(c.risk_label).to eq(:moderate)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
describe '#to_h' do
|
|
105
|
+
it 'includes all expected keys' do
|
|
106
|
+
h = claim.to_h
|
|
107
|
+
expect(h.keys).to include(
|
|
108
|
+
:id, :content, :claim_type, :confidence, :evidence_strength,
|
|
109
|
+
:confabulation_risk, :risk_label, :verified, :confabulated, :created_at
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it 'rounds numeric fields to 10 decimal places' do
|
|
114
|
+
h = claim.to_h
|
|
115
|
+
expect(h[:confidence]).to eq(0.8)
|
|
116
|
+
expect(h[:confabulation_risk]).to be_a(Float)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Legion::Extensions::Confabulation::Helpers::ConfabulationEngine do
|
|
4
|
+
subject(:engine) { described_class.new }
|
|
5
|
+
|
|
6
|
+
describe '#register_claim' do
|
|
7
|
+
it 'stores the claim and returns a Claim object' do
|
|
8
|
+
claim = engine.register_claim(content: 'Cats can fly', claim_type: :factual,
|
|
9
|
+
confidence: 0.9, evidence_strength: 0.1)
|
|
10
|
+
expect(claim).to be_a(Legion::Extensions::Confabulation::Helpers::Claim)
|
|
11
|
+
expect(engine.claims.size).to eq(1)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it 'assigns the claim an id' do
|
|
15
|
+
claim = engine.register_claim(content: 'test', claim_type: :causal,
|
|
16
|
+
confidence: 0.5, evidence_strength: 0.5)
|
|
17
|
+
expect(claim.id).not_to be_nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'defaults claim_type to :factual for unknown types' do
|
|
21
|
+
claim = engine.register_claim(content: 'x', claim_type: :unknown,
|
|
22
|
+
confidence: 0.5, evidence_strength: 0.5)
|
|
23
|
+
expect(claim.claim_type).to eq(:factual)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it 'accepts all valid claim types' do
|
|
27
|
+
Legion::Extensions::Confabulation::Helpers::Constants::CLAIM_TYPES.each do |type|
|
|
28
|
+
claim = engine.register_claim(content: 'x', claim_type: type,
|
|
29
|
+
confidence: 0.5, evidence_strength: 0.5)
|
|
30
|
+
expect(claim.claim_type).to eq(type)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
describe '#verify_claim' do
|
|
36
|
+
it 'marks claim as verified' do
|
|
37
|
+
claim = engine.register_claim(content: 'x', claim_type: :factual,
|
|
38
|
+
confidence: 0.5, evidence_strength: 0.5)
|
|
39
|
+
result = engine.verify_claim(claim_id: claim.id)
|
|
40
|
+
expect(result[:found]).to be true
|
|
41
|
+
expect(result[:verified]).to be true
|
|
42
|
+
expect(engine.claims[claim.id].verified).to be true
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it 'returns found: false for unknown id' do
|
|
46
|
+
result = engine.verify_claim(claim_id: 'no-such-id')
|
|
47
|
+
expect(result[:found]).to be false
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
describe '#flag_confabulation' do
|
|
52
|
+
it 'marks claim as confabulated' do
|
|
53
|
+
claim = engine.register_claim(content: 'x', claim_type: :factual,
|
|
54
|
+
confidence: 0.9, evidence_strength: 0.1)
|
|
55
|
+
result = engine.flag_confabulation(claim_id: claim.id)
|
|
56
|
+
expect(result[:found]).to be true
|
|
57
|
+
expect(result[:confabulated]).to be true
|
|
58
|
+
expect(engine.claims[claim.id].confabulated).to be true
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it 'returns found: false for unknown id' do
|
|
62
|
+
result = engine.flag_confabulation(claim_id: 'no-such-id')
|
|
63
|
+
expect(result[:found]).to be false
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe '#high_risk_claims' do
|
|
68
|
+
it 'returns claims above the confabulation threshold' do
|
|
69
|
+
engine.register_claim(content: 'risky', claim_type: :factual, confidence: 0.9, evidence_strength: 0.1)
|
|
70
|
+
engine.register_claim(content: 'safe', claim_type: :factual, confidence: 0.4, evidence_strength: 0.4)
|
|
71
|
+
high_risk = engine.high_risk_claims
|
|
72
|
+
expect(high_risk.size).to eq(1)
|
|
73
|
+
expect(high_risk.first.content).to eq('risky')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
it 'returns empty array when no high-risk claims exist' do
|
|
77
|
+
engine.register_claim(content: 'safe', claim_type: :factual, confidence: 0.3, evidence_strength: 0.3)
|
|
78
|
+
expect(engine.high_risk_claims).to be_empty
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
describe '#verified_claims' do
|
|
83
|
+
it 'returns only verified claims' do
|
|
84
|
+
c1 = engine.register_claim(content: 'verified', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
85
|
+
engine.register_claim(content: 'unverified', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
86
|
+
engine.verify_claim(claim_id: c1.id)
|
|
87
|
+
expect(engine.verified_claims.size).to eq(1)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
describe '#confabulation_rate' do
|
|
92
|
+
it 'returns 0.0 when no claims' do
|
|
93
|
+
expect(engine.confabulation_rate).to eq(0.0)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it 'returns fraction of confabulated claims' do
|
|
97
|
+
c1 = engine.register_claim(content: 'a', claim_type: :factual, confidence: 0.9, evidence_strength: 0.1)
|
|
98
|
+
engine.register_claim(content: 'b', claim_type: :factual, confidence: 0.4, evidence_strength: 0.4)
|
|
99
|
+
engine.flag_confabulation(claim_id: c1.id)
|
|
100
|
+
expect(engine.confabulation_rate).to be_within(0.001).of(0.5)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
describe '#average_calibration' do
|
|
105
|
+
it 'returns 0.0 when no claims' do
|
|
106
|
+
expect(engine.average_calibration).to eq(0.0)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it 'returns 1.0 when all claims are perfectly calibrated' do
|
|
110
|
+
engine.register_claim(content: 'x', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
111
|
+
expect(engine.average_calibration).to eq(1.0)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it 'returns lower value when confidence mismatches evidence' do
|
|
115
|
+
engine.register_claim(content: 'x', claim_type: :factual, confidence: 1.0, evidence_strength: 0.0)
|
|
116
|
+
expect(engine.average_calibration).to be < 1.0
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
describe '#confabulation_report' do
|
|
121
|
+
it 'returns a comprehensive report hash' do
|
|
122
|
+
engine.register_claim(content: 'x', claim_type: :factual, confidence: 0.8, evidence_strength: 0.2)
|
|
123
|
+
report = engine.confabulation_report
|
|
124
|
+
expect(report.keys).to include(
|
|
125
|
+
:total_claims, :high_risk_claims, :verified_claims,
|
|
126
|
+
:confabulated_claims, :confabulation_rate, :average_calibration,
|
|
127
|
+
:overall_risk, :risk_label
|
|
128
|
+
)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it 'risk_label is a symbol' do
|
|
132
|
+
report = engine.confabulation_report
|
|
133
|
+
expect(report[:risk_label]).to be_a(Symbol)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it 'totals match registered claims' do
|
|
137
|
+
2.times { |i| engine.register_claim(content: "claim#{i}", claim_type: :factual, confidence: 0.5, evidence_strength: 0.5) }
|
|
138
|
+
expect(engine.confabulation_report[:total_claims]).to eq(2)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
describe '#prune_if_needed' do
|
|
143
|
+
it 'prunes oldest claim when at MAX_CLAIMS capacity' do
|
|
144
|
+
max = Legion::Extensions::Confabulation::Helpers::Constants::MAX_CLAIMS
|
|
145
|
+
first_claim = engine.register_claim(content: 'first', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
146
|
+
(max - 1).times { |i| engine.register_claim(content: "claim#{i}", claim_type: :factual, confidence: 0.5, evidence_strength: 0.5) }
|
|
147
|
+
expect(engine.claims.size).to eq(max)
|
|
148
|
+
engine.register_claim(content: 'overflow', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
149
|
+
expect(engine.claims.size).to eq(max)
|
|
150
|
+
expect(engine.claims[first_claim.id]).to be_nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
describe '#to_h' do
|
|
155
|
+
it 'returns a summary hash' do
|
|
156
|
+
engine.register_claim(content: 'x', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
157
|
+
h = engine.to_h
|
|
158
|
+
expect(h).to have_key(:claim_count)
|
|
159
|
+
expect(h).to have_key(:confabulation_rate)
|
|
160
|
+
expect(h).to have_key(:average_calibration)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Legion::Extensions::Confabulation::Helpers::Constants do
|
|
4
|
+
describe 'MAX_CLAIMS' do
|
|
5
|
+
it 'is 500' do
|
|
6
|
+
expect(described_class::MAX_CLAIMS).to eq(500)
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
describe 'CONFABULATION_THRESHOLD' do
|
|
11
|
+
it 'is 0.6' do
|
|
12
|
+
expect(described_class::CONFABULATION_THRESHOLD).to eq(0.6)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe 'EVIDENCE_DECAY' do
|
|
17
|
+
it 'is 0.02' do
|
|
18
|
+
expect(described_class::EVIDENCE_DECAY).to eq(0.02)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
describe 'RISK_LABELS' do
|
|
23
|
+
it 'covers the full 0.0-1.0 range' do
|
|
24
|
+
expect(described_class::RISK_LABELS.keys.map(&:min).min).to eq(0.0)
|
|
25
|
+
expect(described_class::RISK_LABELS.keys.map(&:max).max).to eq(1.0)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'includes all five risk levels' do
|
|
29
|
+
labels = described_class::RISK_LABELS.values
|
|
30
|
+
expect(labels).to include(:minimal, :low, :moderate, :high, :extreme)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'maps 0.0 to minimal' do
|
|
34
|
+
label = described_class::RISK_LABELS.find { |range, _| range.cover?(0.0) }&.last
|
|
35
|
+
expect(label).to eq(:minimal)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it 'maps 0.9 to extreme' do
|
|
39
|
+
label = described_class::RISK_LABELS.find { |range, _| range.cover?(0.9) }&.last
|
|
40
|
+
expect(label).to eq(:extreme)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe 'CLAIM_TYPES' do
|
|
45
|
+
it 'includes all five types' do
|
|
46
|
+
expect(described_class::CLAIM_TYPES).to include(
|
|
47
|
+
:factual, :causal, :explanatory, :predictive, :autobiographical
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'is frozen' do
|
|
52
|
+
expect(described_class::CLAIM_TYPES).to be_frozen
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/confabulation/client'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Confabulation::Runners::Confabulation do
|
|
6
|
+
let(:client) { Legion::Extensions::Confabulation::Client.new }
|
|
7
|
+
|
|
8
|
+
describe '#register_claim' do
|
|
9
|
+
it 'returns a claim hash with an id' do
|
|
10
|
+
result = client.register_claim(content: 'Pigeons navigate by smell', claim_type: :factual,
|
|
11
|
+
confidence: 0.7, evidence_strength: 0.2)
|
|
12
|
+
expect(result[:id]).not_to be_nil
|
|
13
|
+
expect(result[:claim_type]).to eq(:factual)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'includes confabulation_risk and risk_label' do
|
|
17
|
+
result = client.register_claim(content: 'x', claim_type: :causal, confidence: 0.9, evidence_strength: 0.1)
|
|
18
|
+
expect(result).to have_key(:confabulation_risk)
|
|
19
|
+
expect(result).to have_key(:risk_label)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'accepts all valid claim types' do
|
|
23
|
+
Legion::Extensions::Confabulation::Helpers::Constants::CLAIM_TYPES.each do |type|
|
|
24
|
+
result = client.register_claim(content: 'x', claim_type: type, confidence: 0.5, evidence_strength: 0.5)
|
|
25
|
+
expect(result[:claim_type]).to eq(type)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
describe '#verify_claim' do
|
|
31
|
+
it 'verifies an existing claim' do
|
|
32
|
+
claim = client.register_claim(content: 'testable fact', claim_type: :factual,
|
|
33
|
+
confidence: 0.6, evidence_strength: 0.6)
|
|
34
|
+
result = client.verify_claim(claim_id: claim[:id])
|
|
35
|
+
expect(result[:found]).to be true
|
|
36
|
+
expect(result[:verified]).to be true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it 'returns found: false for a missing claim' do
|
|
40
|
+
result = client.verify_claim(claim_id: 'does-not-exist')
|
|
41
|
+
expect(result[:found]).to be false
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
describe '#flag_confabulation' do
|
|
46
|
+
it 'flags an existing claim as confabulated' do
|
|
47
|
+
claim = client.register_claim(content: 'false memory', claim_type: :autobiographical,
|
|
48
|
+
confidence: 0.9, evidence_strength: 0.05)
|
|
49
|
+
result = client.flag_confabulation(claim_id: claim[:id])
|
|
50
|
+
expect(result[:found]).to be true
|
|
51
|
+
expect(result[:confabulated]).to be true
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'returns found: false for a missing claim' do
|
|
55
|
+
result = client.flag_confabulation(claim_id: 'no-such-id')
|
|
56
|
+
expect(result[:found]).to be false
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
describe '#confabulation_report' do
|
|
61
|
+
it 'returns a report hash' do
|
|
62
|
+
client.register_claim(content: 'risky', claim_type: :factual, confidence: 0.9, evidence_strength: 0.05)
|
|
63
|
+
report = client.confabulation_report
|
|
64
|
+
expect(report[:total_claims]).to eq(1)
|
|
65
|
+
expect(report[:risk_label]).to be_a(Symbol)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'includes overall_risk and average_calibration' do
|
|
69
|
+
report = client.confabulation_report
|
|
70
|
+
expect(report).to have_key(:overall_risk)
|
|
71
|
+
expect(report).to have_key(:average_calibration)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
describe '#high_risk_claims' do
|
|
76
|
+
it 'returns claims above the threshold' do
|
|
77
|
+
client.register_claim(content: 'overconfident', claim_type: :predictive,
|
|
78
|
+
confidence: 0.95, evidence_strength: 0.05)
|
|
79
|
+
client.register_claim(content: 'calibrated', claim_type: :predictive,
|
|
80
|
+
confidence: 0.5, evidence_strength: 0.5)
|
|
81
|
+
result = client.high_risk_claims
|
|
82
|
+
expect(result[:count]).to eq(1)
|
|
83
|
+
expect(result[:claims].first[:content]).to eq('overconfident')
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'returns empty list when no high-risk claims' do
|
|
87
|
+
client.register_claim(content: 'ok', claim_type: :factual, confidence: 0.5, evidence_strength: 0.5)
|
|
88
|
+
result = client.high_risk_claims
|
|
89
|
+
expect(result[:count]).to eq(0)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
describe '#confabulation_status' do
|
|
94
|
+
it 'returns engine summary' do
|
|
95
|
+
result = client.confabulation_status
|
|
96
|
+
expect(result[:engine]).to have_key(:claim_count)
|
|
97
|
+
expect(result[:engine]).to have_key(:confabulation_rate)
|
|
98
|
+
expect(result[:engine]).to have_key(:average_calibration)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
describe 'full cycle' do
|
|
103
|
+
it 'registers, verifies, flags and reports correctly' do
|
|
104
|
+
c1 = client.register_claim(content: 'valid fact', claim_type: :factual,
|
|
105
|
+
confidence: 0.6, evidence_strength: 0.6)
|
|
106
|
+
c2 = client.register_claim(content: 'false memory', claim_type: :autobiographical,
|
|
107
|
+
confidence: 0.9, evidence_strength: 0.05)
|
|
108
|
+
|
|
109
|
+
client.verify_claim(claim_id: c1[:id])
|
|
110
|
+
client.flag_confabulation(claim_id: c2[:id])
|
|
111
|
+
|
|
112
|
+
report = client.confabulation_report
|
|
113
|
+
expect(report[:total_claims]).to eq(2)
|
|
114
|
+
expect(report[:verified_claims]).to eq(1)
|
|
115
|
+
expect(report[:confabulated_claims]).to eq(1)
|
|
116
|
+
expect(report[:confabulation_rate]).to be_within(0.001).of(0.5)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/setup'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Logging
|
|
7
|
+
def self.debug(_msg); end
|
|
8
|
+
def self.info(_msg); end
|
|
9
|
+
def self.warn(_msg); end
|
|
10
|
+
def self.error(_msg); end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
require 'legion/extensions/confabulation'
|
|
15
|
+
|
|
16
|
+
RSpec.configure do |config|
|
|
17
|
+
config.example_status_persistence_file_path = '.rspec_status'
|
|
18
|
+
config.disable_monkey_patching!
|
|
19
|
+
config.expect_with(:rspec) { |c| c.syntax = :expect }
|
|
20
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lex-confabulation
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Esity
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: legion-gaia
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :development
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
description: Confabulation detection and confidence calibration for brain-modeled
|
|
27
|
+
agentic AI
|
|
28
|
+
email:
|
|
29
|
+
- matthewdiverson@gmail.com
|
|
30
|
+
executables: []
|
|
31
|
+
extensions: []
|
|
32
|
+
extra_rdoc_files: []
|
|
33
|
+
files:
|
|
34
|
+
- Gemfile
|
|
35
|
+
- LICENSE
|
|
36
|
+
- README.md
|
|
37
|
+
- lex-confabulation.gemspec
|
|
38
|
+
- lib/legion/extensions/confabulation.rb
|
|
39
|
+
- lib/legion/extensions/confabulation/actors/decay.rb
|
|
40
|
+
- lib/legion/extensions/confabulation/client.rb
|
|
41
|
+
- lib/legion/extensions/confabulation/helpers/claim.rb
|
|
42
|
+
- lib/legion/extensions/confabulation/helpers/confabulation_engine.rb
|
|
43
|
+
- lib/legion/extensions/confabulation/helpers/constants.rb
|
|
44
|
+
- lib/legion/extensions/confabulation/runners/confabulation.rb
|
|
45
|
+
- lib/legion/extensions/confabulation/version.rb
|
|
46
|
+
- spec/legion/extensions/confabulation/client_spec.rb
|
|
47
|
+
- spec/legion/extensions/confabulation/helpers/claim_spec.rb
|
|
48
|
+
- spec/legion/extensions/confabulation/helpers/confabulation_engine_spec.rb
|
|
49
|
+
- spec/legion/extensions/confabulation/helpers/constants_spec.rb
|
|
50
|
+
- spec/legion/extensions/confabulation/runners/confabulation_spec.rb
|
|
51
|
+
- spec/spec_helper.rb
|
|
52
|
+
homepage: https://github.com/LegionIO/lex-confabulation
|
|
53
|
+
licenses:
|
|
54
|
+
- MIT
|
|
55
|
+
metadata:
|
|
56
|
+
homepage_uri: https://github.com/LegionIO/lex-confabulation
|
|
57
|
+
source_code_uri: https://github.com/LegionIO/lex-confabulation
|
|
58
|
+
documentation_uri: https://github.com/LegionIO/lex-confabulation
|
|
59
|
+
changelog_uri: https://github.com/LegionIO/lex-confabulation
|
|
60
|
+
bug_tracker_uri: https://github.com/LegionIO/lex-confabulation/issues
|
|
61
|
+
rubygems_mfa_required: 'true'
|
|
62
|
+
rdoc_options: []
|
|
63
|
+
require_paths:
|
|
64
|
+
- lib
|
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '3.4'
|
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '0'
|
|
75
|
+
requirements: []
|
|
76
|
+
rubygems_version: 3.6.9
|
|
77
|
+
specification_version: 4
|
|
78
|
+
summary: LEX Confabulation
|
|
79
|
+
test_files: []
|