lex-eval 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 21965c1d67ff7fe624874e1b2f9ac554cc3ffdbdf9c77158864754a5f5ef6425
|
|
4
|
+
data.tar.gz: eb2ea1c89bfb257868b63a9c5d8347210b7163d87bb0b9e64786317443f9464f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fdfe064f47e393bc4762a74aebc1dd3e079b080df73cc2eb0e2a4446816abf6f14df9b8410049c6cc715ca7027f071f974a51a6f6aa9c688844532b272c8e473
|
|
7
|
+
data.tar.gz: 7009275960b186e2780c723e54882fe106a0a8c219872f9748c76a70fa94e5602e4cfafecbc3546b19d6ac42d823730f40f0067805339e0af8c2835b504887bf
|
|
@@ -9,10 +9,11 @@ module Legion
|
|
|
9
9
|
module Runners
|
|
10
10
|
module CodeReview
|
|
11
11
|
extend self
|
|
12
|
+
include Legion::Logging::Helper
|
|
12
13
|
|
|
13
14
|
SPEC_TIMEOUT = 30
|
|
14
15
|
|
|
15
|
-
def review_generated(code:, spec_code:, context:) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
16
|
+
def review_generated(code:, spec_code:, context:) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
|
16
17
|
settings = validation_settings
|
|
17
18
|
stages = {}
|
|
18
19
|
issues = []
|
|
@@ -48,6 +49,15 @@ module Legion
|
|
|
48
49
|
issues.concat(stages[:llm_review][:issues] || [])
|
|
49
50
|
end
|
|
50
51
|
|
|
52
|
+
# Stage 5: QualityGate (optional, requires lex-factory)
|
|
53
|
+
qg_settings = settings[:quality_gate] || {}
|
|
54
|
+
if quality_gate_available? && qg_settings[:enabled] != false
|
|
55
|
+
stages[:quality_gate] = run_quality_gate(stages, qg_settings)
|
|
56
|
+
unless stages[:quality_gate][:pass]
|
|
57
|
+
issues << "quality gate failed: aggregate #{stages[:quality_gate][:aggregate]} below threshold #{stages[:quality_gate][:threshold]}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
51
61
|
confidence = calculate_confidence(stages)
|
|
52
62
|
verdict = confidence >= 0.5 ? :approve : :revise
|
|
53
63
|
|
|
@@ -60,7 +70,8 @@ module Legion
|
|
|
60
70
|
return {} unless defined?(Legion::Settings)
|
|
61
71
|
|
|
62
72
|
Legion::Settings.dig(:codegen, :self_generate, :validation) || {}
|
|
63
|
-
rescue StandardError
|
|
73
|
+
rescue StandardError => e
|
|
74
|
+
log.warn("validation_settings failed: #{e.message}")
|
|
64
75
|
{}
|
|
65
76
|
end
|
|
66
77
|
|
|
@@ -69,6 +80,7 @@ module Legion
|
|
|
69
80
|
begin
|
|
70
81
|
RubyVM::InstructionSequence.compile(code)
|
|
71
82
|
rescue SyntaxError => e
|
|
83
|
+
log.debug("syntax check failed: #{e.message}")
|
|
72
84
|
errors << "code: #{e.message}"
|
|
73
85
|
end
|
|
74
86
|
|
|
@@ -76,6 +88,7 @@ module Legion
|
|
|
76
88
|
begin
|
|
77
89
|
RubyVM::InstructionSequence.compile(spec_code)
|
|
78
90
|
rescue SyntaxError => e
|
|
91
|
+
log.debug("spec syntax check failed: #{e.message}")
|
|
79
92
|
errors << "spec: #{e.message}"
|
|
80
93
|
end
|
|
81
94
|
end
|
|
@@ -104,6 +117,7 @@ module Legion
|
|
|
104
117
|
{ passed: status.success?, output: stdout, errors: stderr, exit_code: status.exitstatus }
|
|
105
118
|
end
|
|
106
119
|
rescue StandardError => e
|
|
120
|
+
log.warn("spec execution failed: #{e.message}")
|
|
107
121
|
{ passed: false, output: '', errors: e.message, exit_code: -1 }
|
|
108
122
|
end
|
|
109
123
|
|
|
@@ -122,6 +136,7 @@ module Legion
|
|
|
122
136
|
confidence: result[:confidence] || 0.5
|
|
123
137
|
}
|
|
124
138
|
rescue StandardError => e
|
|
139
|
+
log.warn("llm review failed: #{e.message}")
|
|
125
140
|
{ passed: true, issues: ["llm review failed: #{e.message}"], confidence: 0.5 }
|
|
126
141
|
end
|
|
127
142
|
|
|
@@ -129,6 +144,38 @@ module Legion
|
|
|
129
144
|
defined?(Legion::LLM) && Legion::LLM.respond_to?(:chat)
|
|
130
145
|
end
|
|
131
146
|
|
|
147
|
+
def quality_gate_available?
|
|
148
|
+
defined?(Legion::Extensions::Factory::Helpers::QualityGate)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def run_quality_gate(stages, qg_settings)
|
|
152
|
+
kwargs = quality_gate_dimensions(stages)
|
|
153
|
+
kwargs[:threshold] = qg_settings[:threshold] if qg_settings[:threshold]
|
|
154
|
+
Legion::Extensions::Factory::Helpers::QualityGate.score(**kwargs)
|
|
155
|
+
rescue StandardError => e
|
|
156
|
+
log.warn("quality gate failed: #{e.message}")
|
|
157
|
+
{ pass: true, aggregate: 1.0, threshold: 0.8, scores: {}, error: e.message }
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def quality_gate_dimensions(stages)
|
|
161
|
+
{
|
|
162
|
+
completeness: stage_passed?(stages[:syntax]) ? 1.0 : 0.0,
|
|
163
|
+
correctness: qg_correctness(stages[:specs]),
|
|
164
|
+
quality: stages.dig(:llm_review, :confidence) || 1.0,
|
|
165
|
+
security: stage_passed?(stages[:security]) ? 1.0 : 0.0
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def qg_correctness(specs_stage)
|
|
170
|
+
return 1.0 unless specs_stage
|
|
171
|
+
|
|
172
|
+
stage_passed?(specs_stage) ? 1.0 : 0.3
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def stage_passed?(stage)
|
|
176
|
+
stage&.dig(:passed) == true
|
|
177
|
+
end
|
|
178
|
+
|
|
132
179
|
def calculate_confidence(stages)
|
|
133
180
|
scores = stage_scores(stages)
|
|
134
181
|
return 0.5 if scores.empty?
|
|
@@ -136,12 +183,13 @@ module Legion
|
|
|
136
183
|
scores.sum / scores.size
|
|
137
184
|
end
|
|
138
185
|
|
|
139
|
-
def stage_scores(stages) # rubocop:disable Metrics/
|
|
186
|
+
def stage_scores(stages) # rubocop:disable Metrics/PerceivedComplexity
|
|
140
187
|
scores = []
|
|
141
|
-
scores << (stages[:syntax]
|
|
142
|
-
scores << (stages[:security]
|
|
143
|
-
scores << (stages[:specs]
|
|
188
|
+
scores << (stage_passed?(stages[:syntax]) ? 1.0 : 0.0) if stages[:syntax]
|
|
189
|
+
scores << (stage_passed?(stages[:security]) ? 1.0 : 0.0) if stages[:security]
|
|
190
|
+
scores << (stage_passed?(stages[:specs]) ? 1.0 : 0.3) if stages[:specs]
|
|
144
191
|
scores << (stages.dig(:llm_review, :confidence) || 0.5) if stages[:llm_review]
|
|
192
|
+
scores << stages.dig(:quality_gate, :aggregate) if stages[:quality_gate]
|
|
145
193
|
scores
|
|
146
194
|
end
|
|
147
195
|
|