lex-reward 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3289d5de024c3ca01934e873312a89a06e0ce8585c731ce66d484f843010fc59
4
+ data.tar.gz: 7bf0e6577cafb46e3740d9ec0d3e142111720c3fd5d84569c3f2ffb6969284e6
5
+ SHA512:
6
+ metadata.gz: 71327dd3001af4e5bc4a417f69ebf1e62bfafb454234570b34e5bce982db454816eb1e3fbecb77065914f8b9306ea27575940feeaab80aac98d85835c36dafed
7
+ data.tar.gz: 9f4d6bf4b3a7b278b19383e50ee902a9ba23c26dfa718575d99d0775b45cbfba4bc66fbf099decb671113f8b00ae029c54566f4f63cd7cd9ce3aee1ab00677e5
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/reward/helpers/constants'
4
+ require 'legion/extensions/reward/helpers/reward_signal'
5
+ require 'legion/extensions/reward/helpers/reward_store'
6
+ require 'legion/extensions/reward/runners/reward'
7
+
8
+ module Legion
9
+ module Extensions
10
+ module Reward
11
+ class Client
12
+ include Runners::Reward
13
+
14
+ attr_reader :reward_store
15
+
16
+ def initialize(reward_store: nil, **)
17
+ @reward_store = reward_store || Helpers::RewardStore.new
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Reward
6
+ module Helpers
7
+ module Constants
8
+ # Reward sources with weights (sum to 1.0)
9
+ # Each source contributes independently to the composite reward signal
10
+ REWARD_SOURCES = {
11
+ prediction_accuracy: { weight: 0.20, description: 'Correct predictions reinforced' },
12
+ curiosity_resolved: { weight: 0.15, description: 'Wonder resolution satisfaction' },
13
+ goal_achieved: { weight: 0.20, description: 'Intention completion reward' },
14
+ social_approval: { weight: 0.10, description: 'Trust increase from peers' },
15
+ flow_state: { weight: 0.10, description: 'Intrinsic flow motivation' },
16
+ error_avoidance: { weight: 0.10, description: 'Low error rate maintenance' },
17
+ novelty_encounter: { weight: 0.10, description: 'Novel experience exploration' },
18
+ homeostatic_balance: { weight: 0.05, description: 'System stability maintenance' }
19
+ }.freeze
20
+
21
+ # EMA alpha for running reward average
22
+ REWARD_ALPHA = 0.15
23
+
24
+ # EMA alpha for reward prediction (expected reward baseline)
25
+ PREDICTION_ALPHA = 0.1
26
+
27
+ # Minimum RPE magnitude to trigger learning signal
28
+ RPE_THRESHOLD = 0.05
29
+
30
+ # Reward signal range
31
+ REWARD_RANGE = { min: -1.0, max: 1.0 }.freeze
32
+
33
+ # RPE classification thresholds
34
+ RPE_LEVELS = {
35
+ large_positive: 0.3, # "Way better than expected!" — strong reinforcement
36
+ positive: 0.1, # "Better than expected" — moderate reinforcement
37
+ neutral: 0.05, # "About as expected" — maintenance
38
+ negative: -0.1, # "Worse than expected" — mild suppression
39
+ large_negative: -0.3 # "Way worse than expected!" — strong suppression
40
+ }.freeze
41
+
42
+ # Temporal discount factor (per tick, for weighted history)
43
+ TEMPORAL_DISCOUNT = 0.95
44
+
45
+ # History cap
46
+ MAX_REWARD_HISTORY = 200
47
+
48
+ # Domain-specific reward history cap
49
+ MAX_DOMAIN_HISTORY = 50
50
+
51
+ # Anhedonia threshold — running average below this triggers concern
52
+ ANHEDONIA_THRESHOLD = -0.3
53
+
54
+ # Euphoria threshold — running average above this triggers concern
55
+ EUPHORIA_THRESHOLD = 0.7
56
+
57
+ # Reward momentum (how much prior reward influences next prediction)
58
+ MOMENTUM_WINDOW = 10
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Reward
6
+ module Helpers
7
+ class RewardSignal
8
+ attr_reader :running_average, :predicted_reward, :last_rpe,
9
+ :history, :domain_history, :tick_count
10
+
11
+ def initialize
12
+ @running_average = 0.0
13
+ @predicted_reward = 0.0
14
+ @last_rpe = 0.0
15
+ @history = []
16
+ @domain_history = {}
17
+ @tick_count = 0
18
+ end
19
+
20
+ def compute(source_signals)
21
+ @tick_count += 1
22
+ raw_reward = weighted_sum(source_signals)
23
+ reward = raw_reward.clamp(Constants::REWARD_RANGE[:min], Constants::REWARD_RANGE[:max])
24
+
25
+ @last_rpe = reward - @predicted_reward
26
+ @running_average = ema(@running_average, reward, Constants::REWARD_ALPHA)
27
+ @predicted_reward = ema(@predicted_reward, reward, Constants::PREDICTION_ALPHA)
28
+
29
+ record(reward, source_signals)
30
+
31
+ {
32
+ reward: reward.round(4),
33
+ rpe: @last_rpe.round(4),
34
+ rpe_class: classify_rpe(@last_rpe),
35
+ running_average: @running_average.round(4),
36
+ predicted_reward: @predicted_reward.round(4),
37
+ sources: source_signals,
38
+ learning_signal: learning_signal?
39
+ }
40
+ end
41
+
42
+ def record_domain_reward(domain, reward)
43
+ @domain_history[domain] ||= []
44
+ @domain_history[domain] << { reward: reward, at: Time.now.utc }
45
+ @domain_history[domain].shift while @domain_history[domain].size > Constants::MAX_DOMAIN_HISTORY
46
+ end
47
+
48
+ def domain_average(domain)
49
+ entries = @domain_history[domain]
50
+ return 0.0 if entries.nil? || entries.empty?
51
+
52
+ entries.sum { |e| e[:reward] } / entries.size.to_f
53
+ end
54
+
55
+ def domain_trend(domain)
56
+ entries = @domain_history[domain]
57
+ return :no_data if entries.nil? || entries.size < 5
58
+
59
+ recent = entries.last(10)
60
+ values = recent.map { |e| e[:reward] }
61
+ first_half = values[0...(values.size / 2)]
62
+ second_half = values[(values.size / 2)..]
63
+ diff = mean(second_half) - mean(first_half)
64
+
65
+ if diff > 0.05
66
+ :improving
67
+ elsif diff < -0.05
68
+ :declining
69
+ else
70
+ :stable
71
+ end
72
+ end
73
+
74
+ def anhedonic?
75
+ @running_average < Constants::ANHEDONIA_THRESHOLD
76
+ end
77
+
78
+ def euphoric?
79
+ @running_average > Constants::EUPHORIA_THRESHOLD
80
+ end
81
+
82
+ def learning_signal?
83
+ @last_rpe.abs >= Constants::RPE_THRESHOLD
84
+ end
85
+
86
+ def recent_rewards(limit = 20)
87
+ @history.last(limit)
88
+ end
89
+
90
+ def discounted_return(window = nil)
91
+ entries = window ? @history.last(window) : @history
92
+ return 0.0 if entries.empty?
93
+
94
+ total = 0.0
95
+ entries.reverse_each.with_index do |entry, idx|
96
+ total += entry[:reward] * (Constants::TEMPORAL_DISCOUNT**idx)
97
+ end
98
+ total
99
+ end
100
+
101
+ def reward_volatility
102
+ return 0.0 if @history.size < 3
103
+
104
+ recent = @history.last(Constants::MOMENTUM_WINDOW).map { |h| h[:reward] }
105
+ avg = mean(recent)
106
+ variance = recent.sum { |r| (r - avg)**2 } / recent.size.to_f
107
+ Math.sqrt(variance)
108
+ end
109
+
110
+ def to_h
111
+ {
112
+ running_average: @running_average.round(4),
113
+ predicted_reward: @predicted_reward.round(4),
114
+ last_rpe: @last_rpe.round(4),
115
+ rpe_class: classify_rpe(@last_rpe),
116
+ tick_count: @tick_count,
117
+ learning_signal: learning_signal?,
118
+ anhedonic: anhedonic?,
119
+ euphoric: euphoric?,
120
+ volatility: reward_volatility.round(4),
121
+ domains_tracked: @domain_history.keys.size,
122
+ history_size: @history.size
123
+ }
124
+ end
125
+
126
+ private
127
+
128
+ def weighted_sum(source_signals)
129
+ total = 0.0
130
+ Constants::REWARD_SOURCES.each do |source, config|
131
+ value = source_signals[source] || 0.0
132
+ total += value * config[:weight]
133
+ end
134
+ total
135
+ end
136
+
137
+ def classify_rpe(rpe)
138
+ if rpe >= Constants::RPE_LEVELS[:large_positive]
139
+ :large_positive
140
+ elsif rpe >= Constants::RPE_LEVELS[:positive]
141
+ :positive
142
+ elsif rpe >= -Constants::RPE_LEVELS[:neutral]
143
+ :neutral
144
+ elsif rpe >= Constants::RPE_LEVELS[:large_negative]
145
+ :negative
146
+ else
147
+ :large_negative
148
+ end
149
+ end
150
+
151
+ def ema(current, observed, alpha)
152
+ (current * (1.0 - alpha)) + (observed * alpha)
153
+ end
154
+
155
+ def mean(values)
156
+ return 0.0 if values.empty?
157
+
158
+ values.sum / values.size.to_f
159
+ end
160
+
161
+ def record(reward, sources)
162
+ @history << {
163
+ reward: reward,
164
+ rpe: @last_rpe,
165
+ sources: sources,
166
+ at: Time.now.utc
167
+ }
168
+ @history.shift while @history.size > Constants::MAX_REWARD_HISTORY
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Reward
6
+ module Helpers
7
+ class RewardStore
8
+ attr_reader :signal
9
+
10
+ def initialize(signal: nil)
11
+ @signal = signal || RewardSignal.new
12
+ end
13
+
14
+ def process_tick(tick_results)
15
+ source_signals = extract_signals(tick_results)
16
+ result = @signal.compute(source_signals)
17
+
18
+ domain = extract_domain(tick_results)
19
+ @signal.record_domain_reward(domain, result[:reward]) if domain
20
+
21
+ result
22
+ end
23
+
24
+ def domain_report(domain)
25
+ {
26
+ domain: domain,
27
+ average: @signal.domain_average(domain),
28
+ trend: @signal.domain_trend(domain),
29
+ history: @signal.domain_history[domain]&.last(10) || []
30
+ }
31
+ end
32
+
33
+ def all_domain_averages
34
+ @signal.domain_history.keys.to_h do |domain|
35
+ [domain, @signal.domain_average(domain).round(4)]
36
+ end
37
+ end
38
+
39
+ def health_assessment
40
+ avg = @signal.running_average
41
+ vol = @signal.reward_volatility
42
+
43
+ if @signal.anhedonic?
44
+ { status: :anhedonic, description: 'Persistently low reward — possible disengagement', severity: :high }
45
+ elsif @signal.euphoric?
46
+ { status: :euphoric, description: 'Persistently high reward — possible overconfidence', severity: :moderate }
47
+ elsif vol > 0.4
48
+ { status: :volatile, description: 'Highly variable reward — unstable learning signals', severity: :moderate }
49
+ elsif avg.between?(-0.1, 0.1)
50
+ { status: :neutral, description: 'Low reward signal — minimal learning happening', severity: :low }
51
+ else
52
+ { status: :healthy, description: 'Balanced reward signal — healthy learning', severity: :none }
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def extract_signals(tick_results)
59
+ {
60
+ prediction_accuracy: extract_prediction_reward(tick_results),
61
+ curiosity_resolved: extract_curiosity_reward(tick_results),
62
+ goal_achieved: extract_goal_reward(tick_results),
63
+ social_approval: extract_social_reward(tick_results),
64
+ flow_state: extract_flow_reward(tick_results),
65
+ error_avoidance: extract_error_reward(tick_results),
66
+ novelty_encounter: extract_novelty_reward(tick_results),
67
+ homeostatic_balance: extract_homeostatic_reward(tick_results)
68
+ }
69
+ end
70
+
71
+ def extract_prediction_reward(tick_results)
72
+ accuracy = tick_results.dig(:prediction_engine, :rolling_accuracy)
73
+ return 0.0 unless accuracy
74
+
75
+ (accuracy - 0.5) * 2.0
76
+ end
77
+
78
+ def extract_curiosity_reward(tick_results)
79
+ resolved = tick_results.dig(:curiosity, :resolved_count) || 0
80
+ intensity = tick_results.dig(:curiosity, :intensity) || 0.0
81
+
82
+ resolved_signal = [resolved * 0.3, 1.0].min
83
+ (resolved_signal + (intensity * 0.2)).clamp(-1.0, 1.0)
84
+ end
85
+
86
+ def extract_goal_reward(tick_results)
87
+ completed = tick_results.dig(:volition, :completed_count) || 0
88
+ failed = tick_results.dig(:volition, :failed_count) || 0
89
+
90
+ ((completed * 0.4) - (failed * 0.3)).clamp(-1.0, 1.0)
91
+ end
92
+
93
+ def extract_social_reward(tick_results)
94
+ trust_delta = tick_results.dig(:trust, :composite_delta) || 0.0
95
+ (trust_delta * 2.0).clamp(-1.0, 1.0)
96
+ end
97
+
98
+ def extract_flow_reward(tick_results)
99
+ in_flow = tick_results.dig(:flow, :in_flow)
100
+ score = tick_results.dig(:flow, :score) || 0.0
101
+
102
+ return score * 0.8 if in_flow
103
+
104
+ -0.1
105
+ end
106
+
107
+ def extract_error_reward(tick_results)
108
+ error_rate = tick_results.dig(:prediction_engine, :error_rate)
109
+ return 0.0 unless error_rate
110
+
111
+ (1.0 - (error_rate * 2.0)).clamp(-1.0, 1.0)
112
+ end
113
+
114
+ def extract_novelty_reward(tick_results)
115
+ novelty = tick_results.dig(:attention, :novelty_score) || 0.0
116
+ spotlight_count = tick_results.dig(:attention, :spotlight_count) || 0
117
+
118
+ ((novelty * 0.5) + [spotlight_count * 0.1, 0.5].min).clamp(-1.0, 1.0)
119
+ end
120
+
121
+ def extract_homeostatic_reward(tick_results)
122
+ deviation = tick_results.dig(:homeostasis, :worst_deviation) || 0.0
123
+ allostatic = tick_results.dig(:homeostasis, :allostatic_load) || 0.0
124
+
125
+ stability = 1.0 - [deviation, allostatic].max
126
+ (stability - 0.5).clamp(-1.0, 1.0)
127
+ end
128
+
129
+ def extract_domain(tick_results)
130
+ tick_results.dig(:volition, :current_domain) ||
131
+ tick_results.dig(:curiosity, :active_domain) ||
132
+ tick_results.dig(:attention, :focus_domain)
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Reward
6
+ module Runners
7
+ module Reward
8
+ include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
9
+ Legion::Extensions::Helpers.const_defined?(:Lex)
10
+
11
+ def compute_reward(tick_results: {}, **)
12
+ result = reward_store.process_tick(tick_results)
13
+
14
+ Legion::Logging.debug "[reward] reward=#{result[:reward]} rpe=#{result[:rpe]} " \
15
+ "class=#{result[:rpe_class]} learning=#{result[:learning_signal]}"
16
+
17
+ result
18
+ end
19
+
20
+ def reward_status(**)
21
+ sig = reward_store.signal
22
+ health = reward_store.health_assessment
23
+
24
+ Legion::Logging.debug "[reward] status: avg=#{sig.running_average.round(3)} " \
25
+ "predicted=#{sig.predicted_reward.round(3)} health=#{health[:status]}"
26
+
27
+ sig.to_h.merge(health: health)
28
+ end
29
+
30
+ def reward_for(domain:, **)
31
+ report = reward_store.domain_report(domain)
32
+ Legion::Logging.debug "[reward] domain=#{domain} avg=#{report[:average].round(3)} trend=#{report[:trend]}"
33
+ report
34
+ end
35
+
36
+ def reward_history(limit: 20, **)
37
+ recent = reward_store.signal.recent_rewards(limit)
38
+ Legion::Logging.debug "[reward] history: #{recent.size} entries"
39
+
40
+ {
41
+ history: recent,
42
+ total: reward_store.signal.history.size,
43
+ discounted_return: reward_store.signal.discounted_return(limit).round(4)
44
+ }
45
+ end
46
+
47
+ def domain_rewards(**)
48
+ averages = reward_store.all_domain_averages
49
+ Legion::Logging.debug "[reward] domains: #{averages.size} tracked"
50
+
51
+ {
52
+ domains: averages,
53
+ domain_count: averages.size,
54
+ best_domain: averages.max_by { |_, v| v }&.first,
55
+ worst_domain: averages.min_by { |_, v| v }&.first
56
+ }
57
+ end
58
+
59
+ def reward_stats(**)
60
+ sig = reward_store.signal
61
+ health = reward_store.health_assessment
62
+
63
+ Legion::Logging.debug '[reward] stats'
64
+
65
+ {
66
+ running_average: sig.running_average.round(4),
67
+ predicted_reward: sig.predicted_reward.round(4),
68
+ volatility: sig.reward_volatility.round(4),
69
+ tick_count: sig.tick_count,
70
+ health: health,
71
+ domains_tracked: sig.domain_history.keys.size,
72
+ history_size: sig.history.size,
73
+ discounted_return: sig.discounted_return.round(4),
74
+ anhedonic: sig.anhedonic?,
75
+ euphoric: sig.euphoric?
76
+ }
77
+ end
78
+
79
+ private
80
+
81
+ def reward_store
82
+ @reward_store ||= Helpers::RewardStore.new
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Reward
6
+ VERSION = '0.1.0'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/reward/version'
4
+ require 'legion/extensions/reward/helpers/constants'
5
+ require 'legion/extensions/reward/helpers/reward_signal'
6
+ require 'legion/extensions/reward/helpers/reward_store'
7
+ require 'legion/extensions/reward/runners/reward'
8
+ require 'legion/extensions/reward/client'
9
+
10
+ module Legion
11
+ module Extensions
12
+ module Reward
13
+ extend Legion::Extensions::Core if Legion::Extensions.const_defined?(:Core)
14
+ end
15
+ end
16
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lex-reward
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Iverson
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: legion-gaia
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ description: Computes internal reward signals from cognitive outcomes, tracks reward
27
+ prediction error, and drives reinforcement learning
28
+ email:
29
+ - matt@legionIO.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/legion/extensions/reward.rb
35
+ - lib/legion/extensions/reward/client.rb
36
+ - lib/legion/extensions/reward/helpers/constants.rb
37
+ - lib/legion/extensions/reward/helpers/reward_signal.rb
38
+ - lib/legion/extensions/reward/helpers/reward_store.rb
39
+ - lib/legion/extensions/reward/runners/reward.rb
40
+ - lib/legion/extensions/reward/version.rb
41
+ homepage: https://github.com/LegionIO/lex-reward
42
+ licenses:
43
+ - MIT
44
+ metadata:
45
+ rubygems_mfa_required: 'true'
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '3.4'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubygems_version: 3.6.9
61
+ specification_version: 4
62
+ summary: Dopaminergic reward signal engine for LegionIO cognitive agents
63
+ test_files: []