active_genie 0.0.10 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -57
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/basic.rb +75 -68
- data/lib/active_genie/battle.rb +4 -0
- data/lib/active_genie/clients/anthropic_client.rb +110 -0
- data/lib/active_genie/clients/google_client.rb +158 -0
- data/lib/active_genie/clients/helpers/retry.rb +29 -0
- data/lib/active_genie/clients/openai_client.rb +58 -38
- data/lib/active_genie/clients/unified_client.rb +5 -5
- data/lib/active_genie/concerns/loggable.rb +44 -0
- data/lib/active_genie/configuration/log_config.rb +1 -1
- data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
- data/lib/active_genie/configuration/providers/base_config.rb +85 -0
- data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
- data/lib/active_genie/configuration/providers/google_config.rb +56 -0
- data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
- data/lib/active_genie/configuration/providers_config.rb +7 -4
- data/lib/active_genie/configuration/runtime_config.rb +35 -0
- data/lib/active_genie/configuration.rb +18 -4
- data/lib/active_genie/data_extractor/README.md +0 -1
- data/lib/active_genie/data_extractor/basic.rb +22 -19
- data/lib/active_genie/data_extractor/from_informal.rb +4 -15
- data/lib/active_genie/data_extractor.rb +4 -0
- data/lib/active_genie/logger.rb +60 -14
- data/lib/active_genie/{league → ranking}/README.md +7 -7
- data/lib/active_genie/ranking/elo_round.rb +134 -0
- data/lib/active_genie/ranking/free_for_all.rb +93 -0
- data/lib/active_genie/ranking/player.rb +92 -0
- data/lib/active_genie/{league → ranking}/players_collection.rb +19 -12
- data/lib/active_genie/ranking/ranking.rb +153 -0
- data/lib/active_genie/ranking/ranking_scoring.rb +71 -0
- data/lib/active_genie/ranking.rb +12 -0
- data/lib/active_genie/scoring/README.md +1 -1
- data/lib/active_genie/scoring/basic.rb +93 -49
- data/lib/active_genie/scoring/{recommended_reviews.rb → recommended_reviewers.rb} +18 -7
- data/lib/active_genie/scoring.rb +6 -3
- data/lib/active_genie.rb +1 -1
- data/lib/tasks/benchmark.rake +27 -0
- metadata +100 -100
- data/lib/active_genie/configuration/openai_config.rb +0 -56
- data/lib/active_genie/league/elo_ranking.rb +0 -121
- data/lib/active_genie/league/free_for_all.rb +0 -62
- data/lib/active_genie/league/league.rb +0 -120
- data/lib/active_genie/league/player.rb +0 -59
- data/lib/active_genie/league.rb +0 -12
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module ActiveGenie::Ranking
|
4
|
+
class Player
|
5
|
+
def initialize(params)
|
6
|
+
params = { content: params } if params.is_a?(String)
|
7
|
+
|
8
|
+
@content = params.dig(:content) || params
|
9
|
+
@id = params.dig(:id) || Digest::MD5.hexdigest(@content)
|
10
|
+
@score = params.dig(:score) || nil
|
11
|
+
@elo = params.dig(:elo) || nil
|
12
|
+
@ffa_win_count = params.dig(:ffa_win_count) || 0
|
13
|
+
@ffa_lose_count = params.dig(:ffa_lose_count) || 0
|
14
|
+
@ffa_draw_count = params.dig(:ffa_draw_count) || 0
|
15
|
+
@eliminated = params.dig(:eliminated) || nil
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_reader :id, :content, :score, :elo,
|
19
|
+
:ffa_win_count, :ffa_lose_count, :ffa_draw_count,
|
20
|
+
:eliminated
|
21
|
+
attr_accessor :rank
|
22
|
+
|
23
|
+
def score=(value)
|
24
|
+
ActiveGenie::Logger.debug({ code: :new_score, player_id: id, score: value }) if value != @score
|
25
|
+
@score = value
|
26
|
+
@elo = generate_elo_by_score
|
27
|
+
end
|
28
|
+
|
29
|
+
def elo=(value)
|
30
|
+
ActiveGenie::Logger.debug({ code: :new_elo, player_id: id, elo: value }) if value != @elo
|
31
|
+
@elo = value
|
32
|
+
end
|
33
|
+
|
34
|
+
def eliminated=(value)
|
35
|
+
ActiveGenie::Logger.debug({ code: :new_eliminated, player_id: id, eliminated: value }) if value != @eliminated
|
36
|
+
@eliminated = value
|
37
|
+
end
|
38
|
+
|
39
|
+
def draw!
|
40
|
+
@ffa_draw_count += 1
|
41
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'draw', ffa_score: })
|
42
|
+
end
|
43
|
+
|
44
|
+
def win!
|
45
|
+
@ffa_win_count += 1
|
46
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'win', ffa_score: })
|
47
|
+
end
|
48
|
+
|
49
|
+
def lose!
|
50
|
+
@ffa_lose_count += 1
|
51
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'lose', ffa_score: })
|
52
|
+
end
|
53
|
+
|
54
|
+
def ffa_score
|
55
|
+
@ffa_win_count * 3 + @ffa_draw_count
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_h
|
59
|
+
{
|
60
|
+
id:, content:, score:, elo:,
|
61
|
+
ffa_win_count:, ffa_lose_count:, ffa_draw_count:,
|
62
|
+
eliminated:, ffa_score:
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def method_missing(method_name, *args, &block)
|
67
|
+
if method_name == :[] && args.size == 1
|
68
|
+
attr_name = args.first.to_sym
|
69
|
+
|
70
|
+
if respond_to?(attr_name)
|
71
|
+
return send(attr_name)
|
72
|
+
else
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
super
|
78
|
+
end
|
79
|
+
|
80
|
+
def respond_to_missing?(method_name, include_private = false)
|
81
|
+
method_name == :[] || super
|
82
|
+
end
|
83
|
+
|
84
|
+
def generate_elo_by_score
|
85
|
+
BASE_ELO + ((@score || 0) - 50)
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
BASE_ELO = 1000
|
91
|
+
end
|
92
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
|
-
require_relative '../utils/math'
|
2
1
|
require_relative './player'
|
3
2
|
|
4
|
-
module ActiveGenie::
|
3
|
+
module ActiveGenie::Ranking
|
5
4
|
class PlayersCollection
|
6
5
|
def initialize(param_players)
|
7
6
|
@players = build(param_players)
|
@@ -9,9 +8,11 @@ module ActiveGenie::Leaderboard
|
|
9
8
|
attr_reader :players
|
10
9
|
|
11
10
|
def coefficient_of_variation
|
12
|
-
score_list = eligible.map(&:score)
|
11
|
+
score_list = eligible.map(&:score).compact
|
12
|
+
return nil if score_list.empty?
|
13
|
+
|
13
14
|
mean = score_list.sum.to_f / score_list.size
|
14
|
-
return nil if mean == 0
|
15
|
+
return nil if mean == 0
|
15
16
|
|
16
17
|
variance = score_list.map { |num| (num - mean) ** 2 }.sum / score_list.size
|
17
18
|
standard_deviation = Math.sqrt(variance)
|
@@ -19,11 +20,11 @@ module ActiveGenie::Leaderboard
|
|
19
20
|
(standard_deviation / mean) * 100
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
+
def calc_relegation_tier
|
23
24
|
eligible[(tier_size*-1)..-1]
|
24
25
|
end
|
25
26
|
|
26
|
-
def
|
27
|
+
def calc_defender_tier
|
27
28
|
eligible[(tier_size*-2)...(tier_size*-1)]
|
28
29
|
end
|
29
30
|
|
@@ -35,22 +36,28 @@ module ActiveGenie::Leaderboard
|
|
35
36
|
@players.reject(&:eliminated).size
|
36
37
|
end
|
37
38
|
|
39
|
+
def elo_eligible?
|
40
|
+
eligible.size > 15
|
41
|
+
end
|
42
|
+
|
43
|
+
def sorted
|
44
|
+
sorted_players = @players.sort_by { |p| [-p.ffa_score, -(p.elo || 0), -(p.score || 0)] }
|
45
|
+
sorted_players.each_with_index { |p, i| p.rank = i + 1 }
|
46
|
+
sorted_players
|
47
|
+
end
|
48
|
+
|
38
49
|
def to_h
|
39
|
-
sorted.map
|
50
|
+
sorted.map { |p| p.to_h }
|
40
51
|
end
|
41
52
|
|
42
53
|
def method_missing(...)
|
43
54
|
@players.send(...)
|
44
55
|
end
|
45
56
|
|
46
|
-
def sorted
|
47
|
-
@players.sort_by { |p| [-p.league_score, -(p.elo || 0), -p.score] }
|
48
|
-
end
|
49
|
-
|
50
57
|
private
|
51
58
|
|
52
59
|
def build(param_players)
|
53
|
-
param_players.map { |
|
60
|
+
param_players.map { |p| Player.new(p) }
|
54
61
|
end
|
55
62
|
|
56
63
|
# Returns the number of players to battle in each round
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require_relative '../concerns/loggable'
|
2
|
+
require_relative './players_collection'
|
3
|
+
require_relative './free_for_all'
|
4
|
+
require_relative './elo_round'
|
5
|
+
require_relative './ranking_scoring'
|
6
|
+
|
7
|
+
# This class orchestrates player ranking through multiple evaluation stages
|
8
|
+
# using Elo ranking and free-for-all match simulations.
|
9
|
+
# 1. Sets initial scores
|
10
|
+
# 2. Eliminates low performers
|
11
|
+
# 3. Runs Elo ranking (for large groups)
|
12
|
+
# 4. Conducts free-for-all matches
|
13
|
+
#
|
14
|
+
# @example Basic usage
|
15
|
+
# Ranking.call(players, criteria)
|
16
|
+
#
|
17
|
+
# @param param_players [Array<Hash|String>] Collection of player objects to evaluate
|
18
|
+
# Example: ["Circle", "Triangle", "Square"]
|
19
|
+
# or
|
20
|
+
# [
|
21
|
+
# { content: "Circle", score: 10 },
|
22
|
+
# { content: "Triangle", score: 7 },
|
23
|
+
# { content: "Square", score: 5 }
|
24
|
+
# ]
|
25
|
+
# @param criteria [String] Evaluation criteria configuration
|
26
|
+
# Example: "What is more similar to the letter 'O'?"
|
27
|
+
# @param config [Hash] Additional configuration config
|
28
|
+
# Example: { model: "gpt-4o", api_key: ENV['OPENAI_API_KEY'] }
|
29
|
+
# @return [Hash] Final ranked player results
|
30
|
+
module ActiveGenie::Ranking
|
31
|
+
class Ranking
|
32
|
+
include ActiveGenie::Concerns::Loggable
|
33
|
+
|
34
|
+
def self.call(...)
|
35
|
+
new(...).call
|
36
|
+
end
|
37
|
+
|
38
|
+
def initialize(param_players, criteria, reviewers: [], config: {})
|
39
|
+
@criteria = criteria
|
40
|
+
@reviewers = Array(reviewers).compact.uniq
|
41
|
+
@config = ActiveGenie::Configuration.to_h(config)
|
42
|
+
@players = PlayersCollection.new(param_players)
|
43
|
+
@elo_rounds_played = 0
|
44
|
+
@elo_round_battle_count = 0
|
45
|
+
@free_for_all_battle_count = 0
|
46
|
+
@total_tokens = 0
|
47
|
+
@start_time = Time.now
|
48
|
+
end
|
49
|
+
|
50
|
+
def call
|
51
|
+
initial_log
|
52
|
+
|
53
|
+
set_initial_player_scores!
|
54
|
+
eliminate_obvious_bad_players!
|
55
|
+
|
56
|
+
while @players.elo_eligible?
|
57
|
+
elo_report = run_elo_round!
|
58
|
+
eliminate_relegation_players!
|
59
|
+
rebalance_players!(elo_report)
|
60
|
+
end
|
61
|
+
|
62
|
+
run_free_for_all!
|
63
|
+
final_logs
|
64
|
+
|
65
|
+
@players.sorted
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
SCORE_VARIATION_THRESHOLD = 15
|
71
|
+
ELIMINATION_VARIATION = 'variation_too_high'
|
72
|
+
ELIMINATION_RELEGATION = 'relegation_tier'
|
73
|
+
|
74
|
+
with_logging_context :log_context, ->(log) {
|
75
|
+
@total_tokens += log[:total_tokens] if log[:code] == :llm_usage
|
76
|
+
}
|
77
|
+
|
78
|
+
def initial_log
|
79
|
+
@players.each { |p| ActiveGenie::Logger.debug({ code: :new_player, player: p.to_h }) }
|
80
|
+
end
|
81
|
+
|
82
|
+
def set_initial_player_scores!
|
83
|
+
RankingScoring.call(@players, @criteria, reviewers: @reviewers, config: @config)
|
84
|
+
end
|
85
|
+
|
86
|
+
def eliminate_obvious_bad_players!
|
87
|
+
while @players.coefficient_of_variation >= SCORE_VARIATION_THRESHOLD
|
88
|
+
@players.eligible.last.eliminated = ELIMINATION_VARIATION
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def run_elo_round!
|
93
|
+
@elo_rounds_played += 1
|
94
|
+
|
95
|
+
elo_report = EloRound.call(@players, @criteria, config: @config)
|
96
|
+
|
97
|
+
@elo_round_battle_count += elo_report[:battles_count]
|
98
|
+
|
99
|
+
elo_report
|
100
|
+
end
|
101
|
+
|
102
|
+
def eliminate_relegation_players!
|
103
|
+
@players.calc_relegation_tier.each { |player| player.eliminated = ELIMINATION_RELEGATION }
|
104
|
+
end
|
105
|
+
|
106
|
+
def rebalance_players!(elo_report)
|
107
|
+
return if elo_report[:highest_elo_diff].negative?
|
108
|
+
|
109
|
+
@players.eligible.each do |player|
|
110
|
+
next if elo_report[:players_in_round].include?(player.id)
|
111
|
+
|
112
|
+
player.elo += elo_report[:highest_elo_diff]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def run_free_for_all!
|
117
|
+
ffa_report = FreeForAll.call(@players, @criteria, config: @config)
|
118
|
+
|
119
|
+
@free_for_all_battle_count += ffa_report[:battles_count]
|
120
|
+
end
|
121
|
+
|
122
|
+
def report
|
123
|
+
{
|
124
|
+
ranking_id: ranking_id,
|
125
|
+
players_count: @players.size,
|
126
|
+
variation_too_high: @players.select { |player| player.eliminated == ELIMINATION_VARIATION }.size,
|
127
|
+
elo_rounds_played: @elo_rounds_played,
|
128
|
+
elo_round_battle_count: @elo_round_battle_count,
|
129
|
+
relegation_tier: @players.select { |player| player.eliminated == ELIMINATION_RELEGATION }.size,
|
130
|
+
ffa_round_battle_count: @free_for_all_battle_count,
|
131
|
+
top3: @players.eligible[0..2].map(&:id),
|
132
|
+
total_tokens: @total_tokens,
|
133
|
+
duration_seconds: Time.now - @start_time,
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
def final_logs
|
138
|
+
ActiveGenie::Logger.debug({ code: :ranking_final, players: @players.sorted.map(&:to_h) })
|
139
|
+
ActiveGenie::Logger.info({ code: :ranking, **report })
|
140
|
+
end
|
141
|
+
|
142
|
+
def log_context
|
143
|
+
{ config: @config[:log], ranking_id: }
|
144
|
+
end
|
145
|
+
|
146
|
+
def ranking_id
|
147
|
+
player_ids = @players.map(&:id).join(',')
|
148
|
+
ranking_unique_key = [player_ids, @criteria, @config.to_json].join('-')
|
149
|
+
|
150
|
+
Digest::MD5.hexdigest(ranking_unique_key)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require_relative '../scoring/recommended_reviewers'
|
2
|
+
|
3
|
+
module ActiveGenie::Ranking
|
4
|
+
class RankingScoring
|
5
|
+
def self.call(...)
|
6
|
+
new(...).call
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(players, criteria, reviewers: [], config: {})
|
10
|
+
@players = players
|
11
|
+
@criteria = criteria
|
12
|
+
@config = ActiveGenie::Configuration.to_h(config)
|
13
|
+
@reviewers = Array(reviewers).compact.uniq
|
14
|
+
end
|
15
|
+
|
16
|
+
def call
|
17
|
+
ActiveGenie::Logger.with_context(log_context) do
|
18
|
+
@reviewers = generate_reviewers
|
19
|
+
|
20
|
+
players_without_score.each do |player|
|
21
|
+
# TODO: This can take a while, can be parallelized
|
22
|
+
player.score = generate_score(player)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def players_without_score
|
30
|
+
@players_without_score ||= @players.select { |player| player.score.nil? }
|
31
|
+
end
|
32
|
+
|
33
|
+
def generate_score(player)
|
34
|
+
score, reasoning = ActiveGenie::Scoring.call(
|
35
|
+
player.content,
|
36
|
+
@criteria,
|
37
|
+
@reviewers,
|
38
|
+
config: @config
|
39
|
+
).values_at('final_score', 'final_reasoning')
|
40
|
+
|
41
|
+
ActiveGenie::Logger.debug({ code: :new_score, player_id: player.id, score:, reasoning: })
|
42
|
+
|
43
|
+
score
|
44
|
+
end
|
45
|
+
|
46
|
+
def generate_reviewers
|
47
|
+
return @reviewers if @reviewers.size > 0
|
48
|
+
|
49
|
+
reviewer1, reviewer2, reviewer3 = ActiveGenie::Scoring::RecommendedReviewers.call(
|
50
|
+
[@players.sample.content, @players.sample.content].join("\n\n"),
|
51
|
+
@criteria,
|
52
|
+
config: @config
|
53
|
+
).values_at('reviewer1', 'reviewer2', 'reviewer3')
|
54
|
+
|
55
|
+
ActiveGenie::Logger.debug({ code: :new_reviewers, reviewers: [reviewer1, reviewer2, reviewer3] })
|
56
|
+
|
57
|
+
[reviewer1, reviewer2, reviewer3]
|
58
|
+
end
|
59
|
+
|
60
|
+
def log_context
|
61
|
+
{ ranking_scoring_id: }
|
62
|
+
end
|
63
|
+
|
64
|
+
def ranking_scoring_id
|
65
|
+
player_ids = players_without_score.map(&:id).join(',')
|
66
|
+
ranking_unique_key = [player_ids, @criteria, @config.to_json].join('-')
|
67
|
+
|
68
|
+
Digest::MD5.hexdigest(ranking_unique_key)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -58,7 +58,7 @@ Main interface for scoring text content.
|
|
58
58
|
- `reviewers` [Array<String>] - Optional list of specific reviewers
|
59
59
|
- `config` [Hash] - Additional configuration config
|
60
60
|
|
61
|
-
### `
|
61
|
+
### `RecommendedReviewers.call(text, criteria, config: {})`
|
62
62
|
Recommends appropriate reviewers based on content and criteria.
|
63
63
|
|
64
64
|
#### Parameters
|
@@ -25,15 +25,15 @@ module ActiveGenie::Scoring
|
|
25
25
|
# @return [Hash] The evaluation result containing the scores and reasoning
|
26
26
|
# @return [Number] :final_score The final score of the text based on the criteria and reviewers
|
27
27
|
# @return [String] :final_reasoning Detailed explanation of why the final score was reached
|
28
|
-
def self.call(
|
29
|
-
new(
|
28
|
+
def self.call(...)
|
29
|
+
new(...).call
|
30
30
|
end
|
31
31
|
|
32
32
|
def initialize(text, criteria, reviewers = [], config: {})
|
33
33
|
@text = text
|
34
34
|
@criteria = criteria
|
35
35
|
@reviewers = Array(reviewers).compact.uniq
|
36
|
-
@config = config
|
36
|
+
@config = ActiveGenie::Configuration.to_h(config)
|
37
37
|
end
|
38
38
|
|
39
39
|
def call
|
@@ -43,6 +43,42 @@ module ActiveGenie::Scoring
|
|
43
43
|
{ role: 'user', content: "Text to score: #{@text}" },
|
44
44
|
]
|
45
45
|
|
46
|
+
properties = build_properties
|
47
|
+
|
48
|
+
function = {
|
49
|
+
name: 'scoring',
|
50
|
+
description: 'Score the text based on the given criteria.',
|
51
|
+
schema: {
|
52
|
+
type: "object",
|
53
|
+
properties:,
|
54
|
+
required: properties.keys
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
result = ::ActiveGenie::Clients::UnifiedClient.function_calling(
|
59
|
+
messages,
|
60
|
+
function,
|
61
|
+
model_tier: 'lower_tier',
|
62
|
+
config: @config
|
63
|
+
)
|
64
|
+
|
65
|
+
result['final_score'] = 0 if result['final_score'].nil?
|
66
|
+
|
67
|
+
ActiveGenie::Logger.debug({
|
68
|
+
code: :scoring,
|
69
|
+
text: @text[0..30],
|
70
|
+
criteria: @criteria[0..30],
|
71
|
+
reviewers: get_or_recommend_reviewers,
|
72
|
+
score: result['final_score'],
|
73
|
+
reasoning: result['final_reasoning']
|
74
|
+
})
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def build_properties
|
46
82
|
properties = {}
|
47
83
|
get_or_recommend_reviewers.each do |reviewer|
|
48
84
|
properties["#{reviewer}_reasoning"] = {
|
@@ -57,70 +93,78 @@ module ActiveGenie::Scoring
|
|
57
93
|
}
|
58
94
|
end
|
59
95
|
|
60
|
-
|
61
|
-
|
62
|
-
description: '
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
final_score: {
|
68
|
-
type: 'number',
|
69
|
-
description: 'The final score based on the previous reviewers',
|
70
|
-
},
|
71
|
-
final_reasoning: {
|
72
|
-
type: 'string',
|
73
|
-
description: 'The final reasoning based on the previous reviewers',
|
74
|
-
}
|
75
|
-
}
|
76
|
-
}
|
96
|
+
properties[:final_score] = {
|
97
|
+
type: 'number',
|
98
|
+
description: 'The final score based on the previous reviewers',
|
99
|
+
}
|
100
|
+
properties[:final_reasoning] = {
|
101
|
+
type: 'string',
|
102
|
+
description: 'The final reasoning based on the previous reviewers',
|
77
103
|
}
|
78
104
|
|
79
|
-
|
105
|
+
properties
|
80
106
|
end
|
81
107
|
|
82
|
-
private
|
83
|
-
|
84
108
|
def get_or_recommend_reviewers
|
85
109
|
@get_or_recommend_reviewers ||= if @reviewers.count > 0
|
86
110
|
@reviewers
|
87
111
|
else
|
88
|
-
|
112
|
+
result = RecommendedReviewers.call(@text, @criteria, config: @config)
|
89
113
|
|
90
|
-
[
|
114
|
+
[result['reviewer1'], result['reviewer2'], result['reviewer3']]
|
91
115
|
end
|
92
116
|
end
|
93
117
|
|
94
118
|
PROMPT = <<~PROMPT
|
95
|
-
Evaluate and score the provided text based on predefined criteria,
|
119
|
+
Evaluate and score the provided text based on predefined criteria, using a scoring range of 0 to 100 with 100 representing the highest possible score.
|
120
|
+
|
121
|
+
Follow the instructions below to ensure a comprehensive and objective assessment.
|
96
122
|
|
97
123
|
# Evaluation Process
|
98
|
-
1. **Analysis**: Thoroughly compare the text against each criterion to ensure comprehensive evaluation.
|
99
|
-
2. **Document Deviations**: Clearly identify and document any areas where the content does not align with the specified criteria.
|
100
|
-
3. **Highlight Strengths**: Emphasize notable features or elements that enhance the overall quality or effectiveness of the content.
|
101
|
-
4. **Identify Weaknesses**: Specify areas where the content fails to meet the criteria or where improvements could be made.
|
102
124
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
-
|
125
|
+
1. **Analysis**:
|
126
|
+
- Thoroughly compare the text against each criterion for a comprehensive evaluation.
|
127
|
+
|
128
|
+
2. **Document Deviations**:
|
129
|
+
- Identify and document areas where the content does not align with the specified criteria.
|
130
|
+
|
131
|
+
3. **Highlight Strengths**:
|
132
|
+
- Note notable features or elements that enhance the quality or effectiveness of the content.
|
133
|
+
|
134
|
+
4. **Identify Weaknesses**:
|
135
|
+
- Specify areas where the content fails to meet the criteria or where improvements could be made.
|
136
|
+
|
137
|
+
# Scoring Fairness
|
138
|
+
|
139
|
+
- Ensure the assigned score reflects both the alignment with the criteria and the content's effectiveness.
|
140
|
+
- Consider if the fulfillment of other criteria compensates for areas lacking extreme details.
|
141
|
+
|
142
|
+
# Scoring Range
|
143
|
+
|
144
|
+
Segment scores into five parts before assigning a final score:
|
145
|
+
- **Terrible**: 0-20 - Content does not meet the criteria.
|
146
|
+
- **Bad**: 21-40 - Content is substandard but meets some criteria.
|
147
|
+
- **Average**: 41-60 - Content meets criteria with room for improvement.
|
148
|
+
- **Good**: 61-80 - Content exceeds criteria and is above average.
|
149
|
+
- **Great**: 81-100 - Content exceeds all expectations.
|
108
150
|
|
109
151
|
# Guidelines
|
110
|
-
- Maintain objectivity, avoiding biases or preconceived notions.
|
111
|
-
- Deconstruct each criterion into actionable components for a systematic evaluation.
|
112
|
-
- If the text lacks information, apply reasonable judgment to assign a score while clearly explaining the rationale.
|
113
|
-
PROMPT
|
114
152
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
153
|
+
- Maintain objectivity and avoid biases.
|
154
|
+
- Deconstruct each criterion into actionable components for systematic evaluation.
|
155
|
+
- Apply reasonable judgment in assigning a score, justifying your rationale clearly.
|
156
|
+
|
157
|
+
# Output Format
|
158
|
+
|
159
|
+
- Provide a detailed review including:
|
160
|
+
- A final score (0-100)
|
161
|
+
- Specific reasoning for the assigned score, detailing all evaluated criteria
|
162
|
+
- Include both positive aspects and suggested improvements
|
163
|
+
|
164
|
+
# Notes
|
165
|
+
|
166
|
+
- Consider edge cases where the text may partially align with criteria.
|
167
|
+
- If lacking information, reasonably judge and explain your scoring approach.
|
168
|
+
PROMPT
|
125
169
|
end
|
126
170
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require_relative '../clients/unified_client'
|
4
4
|
|
5
5
|
module ActiveGenie::Scoring
|
6
|
-
# The
|
6
|
+
# The RecommendedReviewers class intelligently suggests appropriate reviewer roles
|
7
7
|
# for evaluating text content based on specific criteria. It uses AI to analyze
|
8
8
|
# the content and criteria to identify the most suitable subject matter experts.
|
9
9
|
#
|
@@ -11,14 +11,14 @@ module ActiveGenie::Scoring
|
|
11
11
|
# three distinct reviewer roles with complementary expertise and perspectives.
|
12
12
|
#
|
13
13
|
# @example Getting recommended reviewers for technical content
|
14
|
-
#
|
14
|
+
# RecommendedReviewers.call("Technical documentation about API design",
|
15
15
|
# "Evaluate technical accuracy and clarity")
|
16
16
|
# # => { reviewer1: "API Architect", reviewer2: "Technical Writer",
|
17
17
|
# # reviewer3: "Developer Advocate", reasoning: "..." }
|
18
18
|
#
|
19
|
-
class
|
20
|
-
def self.call(
|
21
|
-
new(
|
19
|
+
class RecommendedReviewers
|
20
|
+
def self.call(...)
|
21
|
+
new(...).call
|
22
22
|
end
|
23
23
|
|
24
24
|
# Initializes a new reviewer recommendation instance
|
@@ -29,7 +29,7 @@ module ActiveGenie::Scoring
|
|
29
29
|
def initialize(text, criteria, config: {})
|
30
30
|
@text = text
|
31
31
|
@criteria = criteria
|
32
|
-
@config = config
|
32
|
+
@config = ActiveGenie::Configuration.to_h(config)
|
33
33
|
end
|
34
34
|
|
35
35
|
def call
|
@@ -53,7 +53,14 @@ module ActiveGenie::Scoring
|
|
53
53
|
}
|
54
54
|
}
|
55
55
|
|
56
|
-
|
56
|
+
result = client.function_calling(
|
57
|
+
messages,
|
58
|
+
function,
|
59
|
+
model_tier: 'lower_tier',
|
60
|
+
config: @config
|
61
|
+
)
|
62
|
+
|
63
|
+
result
|
57
64
|
end
|
58
65
|
|
59
66
|
private
|
@@ -72,5 +79,9 @@ module ActiveGenie::Scoring
|
|
72
79
|
- Include reasoning for how each choice supports a thorough and insightful review.
|
73
80
|
- Avoid redundant or overly similar titles/roles to maintain diversity.
|
74
81
|
PROMPT
|
82
|
+
|
83
|
+
def client
|
84
|
+
::ActiveGenie::Clients::UnifiedClient
|
85
|
+
end
|
75
86
|
end
|
76
87
|
end
|
data/lib/active_genie/scoring.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative 'scoring/basic'
|
2
|
-
require_relative 'scoring/
|
2
|
+
require_relative 'scoring/recommended_reviewers'
|
3
3
|
|
4
4
|
module ActiveGenie
|
5
5
|
# See the [Scoring README](lib/active_genie/scoring/README.md) for more information.
|
@@ -9,9 +9,12 @@ module ActiveGenie
|
|
9
9
|
def basic(...)
|
10
10
|
Basic.call(...)
|
11
11
|
end
|
12
|
+
def call(...)
|
13
|
+
Basic.call(...)
|
14
|
+
end
|
12
15
|
|
13
|
-
def
|
14
|
-
|
16
|
+
def recommended_reviewers(...)
|
17
|
+
RecommendedReviewers.call(...)
|
15
18
|
end
|
16
19
|
end
|
17
20
|
end
|
data/lib/active_genie.rb
CHANGED
@@ -5,7 +5,7 @@ module ActiveGenie
|
|
5
5
|
autoload :DataExtractor, File.join(__dir__, 'active_genie/data_extractor')
|
6
6
|
autoload :Battle, File.join(__dir__, 'active_genie/battle')
|
7
7
|
autoload :Scoring, File.join(__dir__, 'active_genie/scoring')
|
8
|
-
autoload :
|
8
|
+
autoload :Ranking, File.join(__dir__, 'active_genie/ranking')
|
9
9
|
|
10
10
|
class << self
|
11
11
|
def configure
|