lex-eval 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1dd068d711cd3cc0c70d64f8c066e1bb03e929bc034073600a8e3946c7c65a77
4
- data.tar.gz: 6103505a44655acc55a78ac3677b2d8fef300e395d33acb47e5a545cd0f7e8e3
3
+ metadata.gz: 42e93ebdc972bbb947ca388705947747e0dc4050e7cb5483efc57471adfbca29
4
+ data.tar.gz: f3a45e7d94bd7c4670f92f11a6e838e9a0913fd8823726436efa889c017afea3
5
5
  SHA512:
6
- metadata.gz: 4b0ef19e8406c5eaf2914b22aaef87913a775a19b89b21f35fc2b9cfbfdb3f135013e027c3eefd854963700cd91ef08be0c1f3976cf597f1c2f358430a4cb565
7
- data.tar.gz: 543c853757732ced23ebdbf4d5caa1ef09a91ca9ae4f20b36d75d32cb383d5d56c50548ea873199fcb2324a44dd77ee9c8ac8efe51568a4c94d421b5e45e53d9
6
+ metadata.gz: 9597e0c4cf93d703c40d83a80107a1c2271022710dadfc666ea3ce510d327eefceb33005e456b0af1064d77c5d7d88502d3476cf7554382fc0b7d18e07421539
7
+ data.tar.gz: b8e858dbd043dfd71fe5d964c608d438ad34f56ba3da487fd3a58f8fd76ffd244f01266a0c72c892c1257ac87f0041a1a7e7aaf1fb09f6ff2ef48e53ab0c13f7
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' unless defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Eval
8
+ module Actor
9
+ class Online < Legion::Extensions::Actors::Subscription
10
+ EXCHANGE = 'llm.response'
11
+ QUEUE = 'eval.online'
12
+
13
+ def runner_class
14
+ Legion::Extensions::Eval::Runners::Online
15
+ end
16
+
17
+ def runner_function
18
+ 'evaluate_response'
19
+ end
20
+
21
+ def check_subtask?
22
+ false
23
+ end
24
+
25
+ def generate_task?
26
+ false
27
+ end
28
+
29
+ def use_runner?
30
+ false
31
+ end
32
+
33
+ def enabled?
34
+ return false unless defined?(Legion::Transport)
35
+ return false unless defined?(Legion::Extensions::Eval::Runners::Online)
36
+
37
+ online_enabled?
38
+ rescue StandardError
39
+ false
40
+ end
41
+
42
+ private
43
+
44
+ def online_enabled?
45
+ return true unless defined?(Legion::Settings)
46
+
47
+ Legion::Settings.dig(:eval, :online, :enabled) != false
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -7,6 +7,7 @@ module Legion
7
7
  include Runners::Evaluation
8
8
  include Runners::Annotation
9
9
  include Runners::AgenticReview
10
+ include Runners::Online
10
11
 
11
12
  def initialize(db: nil, **opts)
12
13
  @db = db
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Eval
6
+ module Runners
7
+ module Online
8
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
9
+ def evaluate_response(response:, evaluators: nil, sample_rate: 1.0, **)
10
+ evaluator_names = evaluators || configured_evaluators
11
+ effective_rate = sample_rate || configured_sample_rate
12
+
13
+ return { evaluated: false, reason: :sampled_out, sampled: false } unless rand <= effective_rate
14
+
15
+ scores = {}
16
+ evaluator_names.each do |name|
17
+ scores[name.to_sym] = run_single_evaluator(name, response)
18
+ end
19
+
20
+ { evaluated: true, scores: scores, sampled: true }
21
+ rescue StandardError => e
22
+ log.warn("lex-eval online: evaluate_response failed: #{e.message}")
23
+ { evaluated: false, reason: :error, error: e.message, sampled: true }
24
+ end
25
+
26
+ private
27
+
28
+ def run_single_evaluator(name, response)
29
+ loader = Helpers::TemplateLoader.new
30
+ config = loader.load_template(name.to_s) || {}
31
+ result = run_evaluation(
32
+ evaluator_name: name,
33
+ evaluator_config: config,
34
+ inputs: [{ input: response[:input] || '', output: response[:output] || '' }]
35
+ )
36
+ result.dig(:summary, :avg_score)
37
+ rescue StandardError => e
38
+ log.warn("lex-eval online: evaluator #{name} failed: #{e.message}")
39
+ nil
40
+ end
41
+
42
+ def configured_evaluators
43
+ return %w[toxicity] unless defined?(Legion::Settings)
44
+
45
+ Legion::Settings.dig(:eval, :online, :evaluators) || %w[toxicity]
46
+ end
47
+
48
+ def configured_sample_rate
49
+ return 1.0 unless defined?(Legion::Settings)
50
+
51
+ Legion::Settings.dig(:eval, :online, :sample_rate) || 1.0
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Eval
6
- VERSION = '0.2.1'
6
+ VERSION = '0.2.3'
7
7
  end
8
8
  end
9
9
  end
@@ -10,6 +10,7 @@ require_relative 'eval/helpers/guardrails'
10
10
  require_relative 'eval/runners/evaluation'
11
11
  require_relative 'eval/runners/annotation'
12
12
  require_relative 'eval/runners/agentic_review'
13
+ require_relative 'eval/runners/online'
13
14
  require_relative 'eval/client'
14
15
 
15
16
  module Legion
metadata CHANGED
@@ -1,14 +1,112 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-eval
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
8
8
  bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
- dependencies: []
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: legion-cache
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 1.3.11
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 1.3.11
26
+ - !ruby/object:Gem::Dependency
27
+ name: legion-crypt
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.4.9
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 1.4.9
40
+ - !ruby/object:Gem::Dependency
41
+ name: legion-data
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.4.17
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.4.17
54
+ - !ruby/object:Gem::Dependency
55
+ name: legion-json
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.2.1
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 1.2.1
68
+ - !ruby/object:Gem::Dependency
69
+ name: legion-logging
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.2
75
+ type: :runtime
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: 1.3.2
82
+ - !ruby/object:Gem::Dependency
83
+ name: legion-settings
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 1.3.14
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: 1.3.14
96
+ - !ruby/object:Gem::Dependency
97
+ name: legion-transport
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 1.3.9
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 1.3.9
12
110
  description: Provides LLM-as-judge and code-based evaluators for scoring LLM outputs,
13
111
  with built-in templates for hallucination, relevance, and toxicity detection.
14
112
  email:
@@ -19,6 +117,7 @@ extra_rdoc_files: []
19
117
  files:
20
118
  - README.md
21
119
  - lib/legion/extensions/eval.rb
120
+ - lib/legion/extensions/eval/actors/online.rb
22
121
  - lib/legion/extensions/eval/client.rb
23
122
  - lib/legion/extensions/eval/evaluators/base.rb
24
123
  - lib/legion/extensions/eval/evaluators/code_evaluator.rb
@@ -32,6 +131,7 @@ files:
32
131
  - lib/legion/extensions/eval/runners/agentic_review.rb
33
132
  - lib/legion/extensions/eval/runners/annotation.rb
34
133
  - lib/legion/extensions/eval/runners/evaluation.rb
134
+ - lib/legion/extensions/eval/runners/online.rb
35
135
  - lib/legion/extensions/eval/templates/code_generation.yml
36
136
  - lib/legion/extensions/eval/templates/code_readability.yml
37
137
  - lib/legion/extensions/eval/templates/faithfulness.yml