lex-conscience 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8f0fe95528d279f5002b8b2f6e0c5c92f7066677c2e6b63b44518a8a538b3018
4
+ data.tar.gz: f05fc53a2a90c1cf1adc883de909ea0e9719d08c0437eda80f54ec381777d0e1
5
+ SHA512:
6
+ metadata.gz: da828802ee102cb9bab71d2334c93ce73fa7ddae5f38c1138e57c7f8387337cef7d698c57d5aa956104bb4a201a4559c65c652161d27a6c5adcf480477014fd9
7
+ data.tar.gz: a21dd9ab957be320e6fee7cd8f85290eff17f060935be89a94bc99907de0384e497df3475f4938bd7770e8f6a9efc597aec7903f1dfbe28822113652241cf5c1
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/conscience/helpers/constants'
4
+ require 'legion/extensions/conscience/helpers/moral_evaluator'
5
+ require 'legion/extensions/conscience/helpers/moral_store'
6
+ require 'legion/extensions/conscience/runners/conscience'
7
+
8
+ module Legion
9
+ module Extensions
10
+ module Conscience
11
+ class Client
12
+ include Runners::Conscience
13
+
14
+ attr_reader :moral_store
15
+
16
+ def initialize(moral_store: nil, **)
17
+ @moral_store = moral_store || Helpers::MoralStore.new
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Conscience
6
+ module Helpers
7
+ module Constants
8
+ # Moral Foundations Theory — 6 foundations with weights summing to 1.0
9
+ # Based on Haidt & Graham (2007): Care, Fairness, Loyalty, Authority, Sanctity, Liberty
10
+ MORAL_FOUNDATIONS = {
11
+ care: { weight: 0.25, description: 'Compassion and prevention of suffering' },
12
+ fairness: { weight: 0.20, description: 'Justice, reciprocity, and proportionality' },
13
+ loyalty: { weight: 0.15, description: 'Group allegiance and trustworthiness' },
14
+ authority: { weight: 0.15, description: 'Respect for hierarchy and legitimate authority' },
15
+ sanctity: { weight: 0.15, description: 'Purity and integrity of systems' },
16
+ liberty: { weight: 0.10, description: 'Autonomy and freedom from domination' }
17
+ }.freeze
18
+
19
+ # Possible verdict outcomes from moral evaluation
20
+ MORAL_VERDICTS = %i[permitted cautioned conflicted prohibited].freeze
21
+
22
+ # EMA alpha for moral sensitivity — changes very slowly
23
+ FOUNDATION_ALPHA = 0.05
24
+
25
+ # Foundations must disagree by more than this to trigger a dilemma
26
+ CONFLICT_THRESHOLD = 0.3
27
+
28
+ # Weighted moral score below this means prohibited
29
+ PROHIBITION_THRESHOLD = -0.5
30
+
31
+ # Weighted moral score below this means cautioned
32
+ CAUTION_THRESHOLD = -0.1
33
+
34
+ # Maximum moral evaluation history entries to retain
35
+ MAX_MORAL_HISTORY = 100
36
+
37
+ # Types of ethical dilemmas that can arise when foundations conflict
38
+ DILEMMA_TYPES = %i[utilitarian deontological virtue_ethics].freeze
39
+
40
+ # Initial sensitivity value for each foundation — starts fully sensitive, decays through experience
41
+ INITIAL_SENSITIVITY = 1.0
42
+
43
+ # Moral score range (per-foundation and weighted)
44
+ MORAL_SCORE_RANGE = { min: -1.0, max: 1.0 }.freeze
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Conscience
6
+ module Helpers
7
+ class MoralEvaluator
8
+ attr_reader :sensitivities
9
+
10
+ def initialize
11
+ @sensitivities = Constants::MORAL_FOUNDATIONS.keys.to_h do |foundation|
12
+ [foundation, Constants::INITIAL_SENSITIVITY]
13
+ end
14
+ end
15
+
16
+ # Evaluate a proposed action against all 6 moral foundations.
17
+ # Returns a hash with per-foundation scores, weighted_score, verdict, and dilemma info.
18
+ def evaluate(action:, context:)
19
+ scores = per_foundation_scores(action, context)
20
+ w_score = weighted_score(scores)
21
+ v = verdict(w_score)
22
+ dilemma = detect_dilemma(scores)
23
+
24
+ {
25
+ action: action,
26
+ scores: scores,
27
+ weighted_score: w_score.round(4),
28
+ verdict: v,
29
+ dilemma: dilemma,
30
+ sensitivities: @sensitivities.transform_values { |s| s.round(4) },
31
+ evaluated_at: Time.now.utc
32
+ }
33
+ end
34
+
35
+ # Weighted sum of per-foundation scores * weights * sensitivity
36
+ def weighted_score(scores)
37
+ total = 0.0
38
+ Constants::MORAL_FOUNDATIONS.each do |foundation, config|
39
+ score = scores[foundation] || 0.0
40
+ sensitivity = @sensitivities[foundation]
41
+ total += score * config[:weight] * sensitivity
42
+ end
43
+ total.clamp(Constants::MORAL_SCORE_RANGE[:min], Constants::MORAL_SCORE_RANGE[:max])
44
+ end
45
+
46
+ # Determine overall moral verdict from a weighted score
47
+ def verdict(score)
48
+ if score <= Constants::PROHIBITION_THRESHOLD
49
+ :prohibited
50
+ elsif score < Constants::CAUTION_THRESHOLD
51
+ :cautioned
52
+ else
53
+ :permitted
54
+ end
55
+ end
56
+
57
+ # Detect a dilemma when foundations strongly disagree with each other.
58
+ # Returns nil when no dilemma, or a hash describing the conflict type and disagreeing foundations.
59
+ def detect_dilemma(scores)
60
+ pos_foundations = scores.select { |_, v| v > Constants::CONFLICT_THRESHOLD }
61
+ neg_foundations = scores.select { |_, v| v < -Constants::CONFLICT_THRESHOLD }
62
+
63
+ return nil if pos_foundations.empty? || neg_foundations.empty?
64
+
65
+ dilemma_type = classify_dilemma(pos_foundations.keys, neg_foundations.keys)
66
+
67
+ {
68
+ type: dilemma_type,
69
+ approving: pos_foundations.keys,
70
+ opposing: neg_foundations.keys,
71
+ tension: (pos_foundations.values.sum / pos_foundations.size.to_f).round(4),
72
+ counter_tension: (neg_foundations.values.sum / neg_foundations.size.to_f).abs.round(4),
73
+ detected_at: Time.now.utc
74
+ }
75
+ end
76
+
77
+ # Feedback loop: update sensitivity for a foundation based on observed outcome.
78
+ # outcome is a float in [-1.0, 1.0] where positive = action was morally good in retrospect.
79
+ def update_sensitivity(foundation, outcome)
80
+ return unless @sensitivities.key?(foundation)
81
+
82
+ current = @sensitivities[foundation]
83
+ @sensitivities[foundation] = ema(current, outcome.abs.clamp(0.0, 1.0), Constants::FOUNDATION_ALPHA)
84
+ end
85
+
86
+ private
87
+
88
+ def per_foundation_scores(action, context)
89
+ {
90
+ care: evaluate_care(action, context),
91
+ fairness: evaluate_fairness(action, context),
92
+ loyalty: evaluate_loyalty(action, context),
93
+ authority: evaluate_authority(action, context),
94
+ sanctity: evaluate_sanctity(action, context),
95
+ liberty: evaluate_liberty(action, context)
96
+ }
97
+ end
98
+
99
+ # Care/Harm — compassion axis
100
+ # harm_to_others: negative, benefit_to_others: positive
101
+ def evaluate_care(_action, context)
102
+ harm = context.fetch(:harm_to_others, 0.0).to_f.clamp(-1.0, 1.0)
103
+ benef = context.fetch(:benefit_to_others, 0.0).to_f.clamp(-1.0, 1.0)
104
+ vuln = context.fetch(:vulnerable_affected, false) ? -0.2 : 0.0
105
+
106
+ score = (benef - harm.abs) + vuln
107
+ score.clamp(-1.0, 1.0)
108
+ end
109
+
110
+ # Fairness/Cheating — justice axis
111
+ # distributional_justice, reciprocity, proportionality
112
+ def evaluate_fairness(_action, context)
113
+ justice = context.fetch(:distributional_justice, 0.0).to_f.clamp(-1.0, 1.0)
114
+ reciprocity = context.fetch(:reciprocity, 0.0).to_f.clamp(-1.0, 1.0)
115
+ proportional = context.fetch(:proportionality, 0.0).to_f.clamp(-1.0, 1.0)
116
+
117
+ ((justice + reciprocity + proportional) / 3.0).clamp(-1.0, 1.0)
118
+ end
119
+
120
+ # Loyalty/Betrayal — group allegiance axis
121
+ def evaluate_loyalty(_action, context)
122
+ alignment = context.fetch(:alignment_with_group_norms, 0.0).to_f.clamp(-1.0, 1.0)
123
+ trust_pres = context.fetch(:trust_preservation, 0.0).to_f.clamp(-1.0, 1.0)
124
+
125
+ ((alignment + trust_pres) / 2.0).clamp(-1.0, 1.0)
126
+ end
127
+
128
+ # Authority/Subversion — hierarchy respect axis
129
+ def evaluate_authority(_action, context)
130
+ compliance = context.fetch(:legitimate_authority_compliance, 0.0).to_f.clamp(-1.0, 1.0)
131
+ hierarchy = context.fetch(:hierarchy_respect, 0.0).to_f.clamp(-1.0, 1.0)
132
+
133
+ ((compliance + hierarchy) / 2.0).clamp(-1.0, 1.0)
134
+ end
135
+
136
+ # Sanctity/Degradation — system integrity axis
137
+ def evaluate_sanctity(_action, context)
138
+ integrity = context.fetch(:system_integrity, 0.0).to_f.clamp(-1.0, 1.0)
139
+ degrad = context.fetch(:degradation_prevention, 0.0).to_f.clamp(-1.0, 1.0)
140
+
141
+ ((integrity + degrad) / 2.0).clamp(-1.0, 1.0)
142
+ end
143
+
144
+ # Liberty/Oppression — autonomy axis
145
+ def evaluate_liberty(_action, context)
146
+ autonomy = context.fetch(:autonomy_preservation, 0.0).to_f.clamp(-1.0, 1.0)
147
+ consent = context.fetch(:consent_present, false) ? 0.3 : -0.2
148
+
149
+ (autonomy + consent).clamp(-1.0, 1.0)
150
+ end
151
+
152
+ # Classify the dilemma type from which foundations are in conflict
153
+ def classify_dilemma(approving, opposing)
154
+ care_side = approving.include?(:care) || opposing.include?(:care)
155
+ fair_side = approving.include?(:fairness) || opposing.include?(:fairness)
156
+ auth_side = approving.include?(:authority) || opposing.include?(:authority)
157
+
158
+ if care_side && fair_side
159
+ :utilitarian
160
+ elsif auth_side
161
+ :deontological
162
+ else
163
+ :virtue_ethics
164
+ end
165
+ end
166
+
167
+ def ema(current, observed, alpha)
168
+ (current * (1.0 - alpha)) + (observed * alpha)
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Conscience
6
+ module Helpers
7
+ class MoralStore
8
+ attr_reader :evaluator, :history, :dilemmas, :sensitivity_snapshots
9
+
10
+ def initialize(evaluator: nil)
11
+ @evaluator = evaluator || MoralEvaluator.new
12
+ @history = []
13
+ @dilemmas = []
14
+ @sensitivity_snapshots = []
15
+ @followed_count = 0
16
+ @overridden_count = 0
17
+ end
18
+
19
+ # Store a completed moral evaluation result
20
+ def record_evaluation(result)
21
+ @history << result
22
+ @history.shift while @history.size > Constants::MAX_MORAL_HISTORY
23
+
24
+ @dilemmas << result[:dilemma] if result[:dilemma]
25
+
26
+ snapshot_sensitivities(result[:verdict])
27
+
28
+ result
29
+ end
30
+
31
+ # Record whether the agent followed its moral verdict or overrode it.
32
+ # outcome: :followed | :overridden
33
+ def record_follow_through(verdict, outcome)
34
+ if outcome == :followed
35
+ @followed_count += 1
36
+ else
37
+ @overridden_count += 1
38
+ end
39
+
40
+ # Feed back into evaluator sensitivities
41
+ foundation_feedback(verdict, outcome)
42
+ end
43
+
44
+ # Ratio of evaluations where the agent followed its moral verdict
45
+ def consistency_score
46
+ total = @followed_count + @overridden_count
47
+ return 1.0 if total.zero?
48
+
49
+ (@followed_count.to_f / total).round(4)
50
+ end
51
+
52
+ # Current foundation sensitivities from the evaluator
53
+ def foundation_sensitivities
54
+ @evaluator.sensitivities.transform_values { |s| s.round(4) }
55
+ end
56
+
57
+ # Recent evaluations, newest last
58
+ def recent_evaluations(limit = 20)
59
+ @history.last(limit)
60
+ end
61
+
62
+ # Open dilemmas (not yet resolved)
63
+ def open_dilemmas
64
+ @dilemmas.last(20)
65
+ end
66
+
67
+ # Aggregate stats across all evaluations
68
+ def aggregate_stats
69
+ verdict_counts = Hash.new(0)
70
+ @history.each { |e| verdict_counts[e[:verdict]] += 1 }
71
+
72
+ {
73
+ total_evaluations: @history.size,
74
+ verdict_counts: verdict_counts,
75
+ dilemma_count: @dilemmas.size,
76
+ consistency_score: consistency_score,
77
+ followed_count: @followed_count,
78
+ overridden_count: @overridden_count,
79
+ foundation_sensitivities: foundation_sensitivities
80
+ }
81
+ end
82
+
83
+ private
84
+
85
+ def snapshot_sensitivities(verdict)
86
+ @sensitivity_snapshots << {
87
+ verdict: verdict,
88
+ sensitivities: @evaluator.sensitivities.dup,
89
+ at: Time.now.utc
90
+ }
91
+ @sensitivity_snapshots.shift while @sensitivity_snapshots.size > Constants::MAX_MORAL_HISTORY
92
+ end
93
+
94
+ def foundation_feedback(verdict, outcome)
95
+ # When an agent overrides a prohibited verdict, desensitize authority/sanctity
96
+ # When an agent follows a cautioned verdict, sensitize care/fairness
97
+ case [verdict, outcome]
98
+ when %i[prohibited overridden]
99
+ @evaluator.update_sensitivity(:care, -0.5)
100
+ @evaluator.update_sensitivity(:sanctity, -0.3)
101
+ when %i[cautioned followed]
102
+ @evaluator.update_sensitivity(:care, 0.8)
103
+ @evaluator.update_sensitivity(:fairness, 0.6)
104
+ when %i[permitted followed]
105
+ @evaluator.update_sensitivity(:liberty, 0.7)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Conscience
6
+ module Runners
7
+ module Conscience
8
+ include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
9
+ Legion::Extensions::Helpers.const_defined?(:Lex)
10
+
11
+ # Full moral assessment of a proposed action.
12
+ # action: string or symbol describing what is about to happen
13
+ # context: hash of moral context signals (harm_to_others, consent_present, etc.)
14
+ def moral_evaluate(action:, context: {}, **)
15
+ result = moral_store.evaluator.evaluate(action: action, context: context)
16
+ moral_store.record_evaluation(result)
17
+
18
+ Legion::Logging.debug "[conscience] action=#{action} verdict=#{result[:verdict]} " \
19
+ "score=#{result[:weighted_score]} dilemma=#{result[:dilemma]&.dig(:type)}"
20
+
21
+ result
22
+ end
23
+
24
+ # Current moral sensitivities and consistency score
25
+ def moral_status(**)
26
+ stats = moral_store.aggregate_stats
27
+ sensitivities = moral_store.foundation_sensitivities
28
+
29
+ Legion::Logging.debug "[conscience] consistency=#{stats[:consistency_score]} " \
30
+ "evaluations=#{stats[:total_evaluations]}"
31
+
32
+ {
33
+ sensitivities: sensitivities,
34
+ consistency: stats[:consistency_score],
35
+ stats: stats
36
+ }
37
+ end
38
+
39
+ # Recent moral evaluation history
40
+ def moral_history(limit: 20, **)
41
+ recent = moral_store.recent_evaluations(limit)
42
+ Legion::Logging.debug "[conscience] history: #{recent.size} entries"
43
+
44
+ {
45
+ history: recent,
46
+ total: moral_store.history.size,
47
+ limit: limit
48
+ }
49
+ end
50
+
51
+ # Record whether the agent actually followed or overrode its moral verdict.
52
+ # outcome: :followed | :overridden
53
+ def update_moral_outcome(action:, outcome:, verdict: nil, **)
54
+ effective_verdict = verdict || infer_last_verdict(action)
55
+
56
+ moral_store.record_follow_through(effective_verdict, outcome)
57
+
58
+ Legion::Logging.debug "[conscience] follow_through action=#{action} " \
59
+ "verdict=#{effective_verdict} outcome=#{outcome} " \
60
+ "consistency=#{moral_store.consistency_score}"
61
+
62
+ {
63
+ action: action,
64
+ verdict: effective_verdict,
65
+ outcome: outcome,
66
+ consistency: moral_store.consistency_score
67
+ }
68
+ end
69
+
70
+ # List unresolved moral dilemmas (cases where foundations strongly disagreed)
71
+ def moral_dilemmas(**)
72
+ open = moral_store.open_dilemmas
73
+ Legion::Logging.debug "[conscience] dilemmas: #{open.size} open"
74
+
75
+ {
76
+ dilemmas: open,
77
+ count: open.size
78
+ }
79
+ end
80
+
81
+ # Aggregate moral reasoning stats
82
+ def conscience_stats(**)
83
+ stats = moral_store.aggregate_stats
84
+ Legion::Logging.debug '[conscience] stats'
85
+
86
+ stats.merge(
87
+ verdict_distribution: verdict_distribution(stats[:verdict_counts]),
88
+ foundation_weights: Helpers::Constants::MORAL_FOUNDATIONS.transform_values { |v| v[:weight] }
89
+ )
90
+ end
91
+
92
+ private
93
+
94
+ def moral_store
95
+ @moral_store ||= Helpers::MoralStore.new
96
+ end
97
+
98
+ def infer_last_verdict(action)
99
+ last = moral_store.history.reverse.find { |e| e[:action] == action }
100
+ last ? last[:verdict] : :permitted
101
+ end
102
+
103
+ def verdict_distribution(verdict_counts)
104
+ total = verdict_counts.values.sum.to_f
105
+ return {} if total.zero?
106
+
107
+ verdict_counts.transform_values { |count| (count / total).round(4) }
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Conscience
6
+ VERSION = '0.1.0'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/conscience/version'
4
+ require 'legion/extensions/conscience/helpers/constants'
5
+ require 'legion/extensions/conscience/helpers/moral_evaluator'
6
+ require 'legion/extensions/conscience/helpers/moral_store'
7
+ require 'legion/extensions/conscience/runners/conscience'
8
+ require 'legion/extensions/conscience/client'
9
+
10
+ module Legion
11
+ module Extensions
12
+ module Conscience
13
+ extend Legion::Extensions::Core if Legion::Extensions.const_defined?(:Core)
14
+ end
15
+ end
16
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lex-conscience
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Iverson
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: legion-gaia
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ description: Applies Moral Foundations Theory to evaluate proposed actions, track
27
+ moral consistency, and surface ethical dilemmas before execution
28
+ email:
29
+ - matt@legionIO.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/legion/extensions/conscience.rb
35
+ - lib/legion/extensions/conscience/client.rb
36
+ - lib/legion/extensions/conscience/helpers/constants.rb
37
+ - lib/legion/extensions/conscience/helpers/moral_evaluator.rb
38
+ - lib/legion/extensions/conscience/helpers/moral_store.rb
39
+ - lib/legion/extensions/conscience/runners/conscience.rb
40
+ - lib/legion/extensions/conscience/version.rb
41
+ homepage: https://github.com/LegionIO/lex-conscience
42
+ licenses:
43
+ - MIT
44
+ metadata:
45
+ rubygems_mfa_required: 'true'
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '3.4'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubygems_version: 3.6.9
61
+ specification_version: 4
62
+ summary: Moral reasoning engine for LegionIO cognitive agents
63
+ test_files: []