lex-eval 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6e527b6bd5b88d862de94408ddd8fa8a3ef6fb5a3916b1bb888ab1278c68bf5
4
- data.tar.gz: b10274099420ae181d751f6874cc029e76657e08bafa2c2152a3f08fd9896c28
3
+ metadata.gz: 21965c1d67ff7fe624874e1b2f9ac554cc3ffdbdf9c77158864754a5f5ef6425
4
+ data.tar.gz: eb2ea1c89bfb257868b63a9c5d8347210b7163d87bb0b9e64786317443f9464f
5
5
  SHA512:
6
- metadata.gz: 0b8014e5428708f9f04d90fea9cf48b16af90216998aa694c4723ef723dde6365840ceb50a2ef64c65801f613bcf9c3df40c402dd96013f8ce2180e6307cae88
7
- data.tar.gz: 973ce2e3f5f9aa4bea347600861ef1e9e4ec741445ca7f4b0ae9142466823920c2eabd1f9608780ad2fa7634fc711b98cfe99825a38ae8de0efdde6616038193
6
+ metadata.gz: fdfe064f47e393bc4762a74aebc1dd3e079b080df73cc2eb0e2a4446816abf6f14df9b8410049c6cc715ca7027f071f974a51a6f6aa9c688844532b272c8e473
7
+ data.tar.gz: 7009275960b186e2780c723e54882fe106a0a8c219872f9748c76a70fa94e5602e4cfafecbc3546b19d6ac42d823730f40f0067805339e0af8c2835b504887bf
@@ -9,10 +9,11 @@ module Legion
9
9
  module Runners
10
10
  module CodeReview
11
11
  extend self
12
+ include Legion::Logging::Helper
12
13
 
13
14
  SPEC_TIMEOUT = 30
14
15
 
15
- def review_generated(code:, spec_code:, context:) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
16
+ def review_generated(code:, spec_code:, context:) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
16
17
  settings = validation_settings
17
18
  stages = {}
18
19
  issues = []
@@ -48,6 +49,15 @@ module Legion
48
49
  issues.concat(stages[:llm_review][:issues] || [])
49
50
  end
50
51
 
52
+ # Stage 5: QualityGate (optional, requires lex-factory)
53
+ qg_settings = settings[:quality_gate] || {}
54
+ if quality_gate_available? && qg_settings[:enabled] != false
55
+ stages[:quality_gate] = run_quality_gate(stages, qg_settings)
56
+ unless stages[:quality_gate][:pass]
57
+ issues << "quality gate failed: aggregate #{stages[:quality_gate][:aggregate]} below threshold #{stages[:quality_gate][:threshold]}"
58
+ end
59
+ end
60
+
51
61
  confidence = calculate_confidence(stages)
52
62
  verdict = confidence >= 0.5 ? :approve : :revise
53
63
 
@@ -60,7 +70,8 @@ module Legion
60
70
  return {} unless defined?(Legion::Settings)
61
71
 
62
72
  Legion::Settings.dig(:codegen, :self_generate, :validation) || {}
63
- rescue StandardError
73
+ rescue StandardError => e
74
+ log.warn("validation_settings failed: #{e.message}")
64
75
  {}
65
76
  end
66
77
 
@@ -69,6 +80,7 @@ module Legion
69
80
  begin
70
81
  RubyVM::InstructionSequence.compile(code)
71
82
  rescue SyntaxError => e
83
+ log.debug("syntax check failed: #{e.message}")
72
84
  errors << "code: #{e.message}"
73
85
  end
74
86
 
@@ -76,6 +88,7 @@ module Legion
76
88
  begin
77
89
  RubyVM::InstructionSequence.compile(spec_code)
78
90
  rescue SyntaxError => e
91
+ log.debug("spec syntax check failed: #{e.message}")
79
92
  errors << "spec: #{e.message}"
80
93
  end
81
94
  end
@@ -104,6 +117,7 @@ module Legion
104
117
  { passed: status.success?, output: stdout, errors: stderr, exit_code: status.exitstatus }
105
118
  end
106
119
  rescue StandardError => e
120
+ log.warn("spec execution failed: #{e.message}")
107
121
  { passed: false, output: '', errors: e.message, exit_code: -1 }
108
122
  end
109
123
 
@@ -122,6 +136,7 @@ module Legion
122
136
  confidence: result[:confidence] || 0.5
123
137
  }
124
138
  rescue StandardError => e
139
+ log.warn("llm review failed: #{e.message}")
125
140
  { passed: true, issues: ["llm review failed: #{e.message}"], confidence: 0.5 }
126
141
  end
127
142
 
@@ -129,6 +144,38 @@ module Legion
129
144
  defined?(Legion::LLM) && Legion::LLM.respond_to?(:chat)
130
145
  end
131
146
 
147
+ def quality_gate_available?
148
+ defined?(Legion::Extensions::Factory::Helpers::QualityGate)
149
+ end
150
+
151
+ def run_quality_gate(stages, qg_settings)
152
+ kwargs = quality_gate_dimensions(stages)
153
+ kwargs[:threshold] = qg_settings[:threshold] if qg_settings[:threshold]
154
+ Legion::Extensions::Factory::Helpers::QualityGate.score(**kwargs)
155
+ rescue StandardError => e
156
+ log.warn("quality gate failed: #{e.message}")
157
+ { pass: true, aggregate: 1.0, threshold: 0.8, scores: {}, error: e.message }
158
+ end
159
+
160
+ def quality_gate_dimensions(stages)
161
+ {
162
+ completeness: stage_passed?(stages[:syntax]) ? 1.0 : 0.0,
163
+ correctness: qg_correctness(stages[:specs]),
164
+ quality: stages.dig(:llm_review, :confidence) || 1.0,
165
+ security: stage_passed?(stages[:security]) ? 1.0 : 0.0
166
+ }
167
+ end
168
+
169
+ def qg_correctness(specs_stage)
170
+ return 1.0 unless specs_stage
171
+
172
+ stage_passed?(specs_stage) ? 1.0 : 0.3
173
+ end
174
+
175
+ def stage_passed?(stage)
176
+ stage&.dig(:passed) == true
177
+ end
178
+
132
179
  def calculate_confidence(stages)
133
180
  scores = stage_scores(stages)
134
181
  return 0.5 if scores.empty?
@@ -136,12 +183,13 @@ module Legion
136
183
  scores.sum / scores.size
137
184
  end
138
185
 
139
- def stage_scores(stages) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
186
+ def stage_scores(stages) # rubocop:disable Metrics/PerceivedComplexity
140
187
  scores = []
141
- scores << (stages[:syntax]&.dig(:passed) ? 1.0 : 0.0) if stages[:syntax]
142
- scores << (stages[:security]&.dig(:passed) ? 1.0 : 0.0) if stages[:security]
143
- scores << (stages[:specs]&.dig(:passed) ? 1.0 : 0.3) if stages[:specs]
188
+ scores << (stage_passed?(stages[:syntax]) ? 1.0 : 0.0) if stages[:syntax]
189
+ scores << (stage_passed?(stages[:security]) ? 1.0 : 0.0) if stages[:security]
190
+ scores << (stage_passed?(stages[:specs]) ? 1.0 : 0.3) if stages[:specs]
144
191
  scores << (stages.dig(:llm_review, :confidence) || 0.5) if stages[:llm_review]
192
+ scores << stages.dig(:quality_gate, :aggregate) if stages[:quality_gate]
145
193
  scores
146
194
  end
147
195
 
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Eval
6
- VERSION = '0.3.8'
6
+ VERSION = '0.3.9'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-eval
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson