active_genie 0.0.12 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +65 -22
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/basic.rb +48 -32
- data/lib/active_genie/battle.rb +4 -0
- data/lib/active_genie/clients/anthropic_client.rb +84 -0
- data/lib/active_genie/clients/base_client.rb +241 -0
- data/lib/active_genie/clients/google_client.rb +135 -0
- data/lib/active_genie/clients/helpers/retry.rb +29 -0
- data/lib/active_genie/clients/openai_client.rb +70 -91
- data/lib/active_genie/clients/unified_client.rb +4 -4
- data/lib/active_genie/concerns/loggable.rb +44 -0
- data/lib/active_genie/configuration/log_config.rb +1 -1
- data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
- data/lib/active_genie/configuration/providers/base_config.rb +85 -0
- data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
- data/lib/active_genie/configuration/providers/google_config.rb +56 -0
- data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
- data/lib/active_genie/configuration/providers_config.rb +7 -4
- data/lib/active_genie/configuration/runtime_config.rb +35 -0
- data/lib/active_genie/configuration.rb +18 -4
- data/lib/active_genie/data_extractor/basic.rb +16 -3
- data/lib/active_genie/data_extractor.rb +4 -0
- data/lib/active_genie/logger.rb +40 -21
- data/lib/active_genie/ranking/elo_round.rb +71 -50
- data/lib/active_genie/ranking/free_for_all.rb +31 -14
- data/lib/active_genie/ranking/player.rb +11 -16
- data/lib/active_genie/ranking/players_collection.rb +4 -4
- data/lib/active_genie/ranking/ranking.rb +74 -19
- data/lib/active_genie/ranking/ranking_scoring.rb +3 -3
- data/lib/active_genie/scoring/basic.rb +44 -25
- data/lib/active_genie/scoring/recommended_reviewers.rb +1 -1
- data/lib/active_genie/scoring.rb +3 -0
- data/lib/tasks/benchmark.rake +27 -0
- metadata +92 -70
- data/lib/active_genie/configuration/openai_config.rb +0 -56
@@ -13,32 +13,39 @@ module ActiveGenie::Ranking
|
|
13
13
|
@criteria = criteria
|
14
14
|
@config = config
|
15
15
|
@tmp_defenders = []
|
16
|
+
@start_time = Time.now
|
17
|
+
@total_tokens = 0
|
18
|
+
@previous_elo = {}
|
19
|
+
@previous_highest_elo = @defender_tier.max_by(&:elo).elo
|
16
20
|
end
|
17
21
|
|
18
22
|
def call
|
19
23
|
ActiveGenie::Logger.with_context(log_context) do
|
20
|
-
|
24
|
+
save_previous_elo
|
25
|
+
matches.each do |player_1, player_2|
|
21
26
|
# TODO: battle can take a while, can be parallelized
|
22
|
-
winner, loser = battle(
|
23
|
-
|
27
|
+
winner, loser = battle(player_1, player_2)
|
24
28
|
next if winner.nil? || loser.nil?
|
25
29
|
|
26
|
-
|
27
|
-
|
28
|
-
winner.elo = new_winner_elo
|
29
|
-
loser.elo = new_loser_elo
|
30
|
+
winner.elo = calculate_new_elo(winner.elo, loser.elo, 1)
|
31
|
+
loser.elo = calculate_new_elo(loser.elo, winner.elo, 0)
|
30
32
|
end
|
31
|
-
|
32
|
-
# TODO: add a round report. Duration, Elo changes, etc.
|
33
33
|
end
|
34
|
+
|
35
|
+
ActiveGenie::Logger.info({ code: :elo_round_report, **report })
|
36
|
+
|
37
|
+
report
|
34
38
|
end
|
35
39
|
|
36
40
|
private
|
37
41
|
|
38
42
|
BATTLE_PER_PLAYER = 3
|
39
|
-
LOSE_PENALTY = 15
|
40
43
|
K = 32
|
41
44
|
|
45
|
+
def save_previous_elo
|
46
|
+
@previous_elo = @players.map { |player| [player.id, player.elo] }.to_h
|
47
|
+
end
|
48
|
+
|
42
49
|
def matches
|
43
50
|
@relegation_tier.reduce([]) do |matches, attack_player|
|
44
51
|
BATTLE_PER_PLAYER.times do
|
@@ -49,53 +56,35 @@ module ActiveGenie::Ranking
|
|
49
56
|
end
|
50
57
|
|
51
58
|
def next_defense_player
|
52
|
-
@tmp_defenders = @defender_tier if @tmp_defenders.size.zero?
|
59
|
+
@tmp_defenders = @defender_tier.shuffle if @tmp_defenders.size.zero?
|
53
60
|
|
54
|
-
@tmp_defenders.
|
61
|
+
@tmp_defenders.pop
|
55
62
|
end
|
56
63
|
|
57
|
-
def battle(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
ActiveGenie::Logger.debug({
|
72
|
-
step: :elo_round_battle,
|
73
|
-
player_ids: [player_a.id, player_b.id],
|
74
|
-
winner_id: winner&.id,
|
75
|
-
loser_id: loser&.id,
|
76
|
-
reasoning: result['reasoning']
|
77
|
-
})
|
64
|
+
def battle(player_1, player_2)
|
65
|
+
ActiveGenie::Logger.with_context({ player_1_id: player_1.id, player_2_id: player_2.id }) do
|
66
|
+
result = ActiveGenie::Battle.basic(
|
67
|
+
player_1.content,
|
68
|
+
player_2.content,
|
69
|
+
@criteria,
|
70
|
+
config: @config
|
71
|
+
)
|
72
|
+
|
73
|
+
winner, loser = case result['winner']
|
74
|
+
when 'player_1' then [player_1, player_2]
|
75
|
+
when 'player_2' then [player_2, player_1]
|
76
|
+
when 'draw' then [nil, nil]
|
77
|
+
end
|
78
78
|
|
79
|
-
|
79
|
+
[winner, loser]
|
80
|
+
end
|
80
81
|
end
|
81
82
|
|
82
83
|
# INFO: Read more about the Elo rating system on https://en.wikipedia.org/wiki/Elo_rating_system
|
83
|
-
def calculate_new_elo(
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
new_winner_elo = [winner_elo + K * (1 - expected_score_a), max_defense_elo].min
|
88
|
-
new_loser_elo = [loser_elo + K * (1 - expected_score_b) - LOSE_PENALTY, min_relegation_elo].max
|
89
|
-
|
90
|
-
[new_winner_elo, new_loser_elo]
|
91
|
-
end
|
92
|
-
|
93
|
-
def max_defense_elo
|
94
|
-
@defender_tier.max_by(&:elo).elo
|
95
|
-
end
|
96
|
-
|
97
|
-
def min_relegation_elo
|
98
|
-
@relegation_tier.min_by(&:elo).elo
|
84
|
+
def calculate_new_elo(player_rating, opponent_rating, score)
|
85
|
+
expected_score = 1.0 / (1.0 + 10.0 ** ((opponent_rating - player_rating) / 400.0))
|
86
|
+
|
87
|
+
player_rating + (K * (score - expected_score)).round
|
99
88
|
end
|
100
89
|
|
101
90
|
def log_context
|
@@ -109,5 +98,37 @@ module ActiveGenie::Ranking
|
|
109
98
|
ranking_unique_key = [relegation_tier_ids, defender_tier_ids, @criteria, @config.to_json].join('-')
|
110
99
|
Digest::MD5.hexdigest(ranking_unique_key)
|
111
100
|
end
|
101
|
+
|
102
|
+
def report
|
103
|
+
{
|
104
|
+
elo_round_id:,
|
105
|
+
players_in_round: players_in_round.map(&:id),
|
106
|
+
battles_count: matches.size,
|
107
|
+
duration_seconds: Time.now - @start_time,
|
108
|
+
total_tokens: @total_tokens,
|
109
|
+
previous_highest_elo: @previous_highest_elo,
|
110
|
+
highest_elo:,
|
111
|
+
highest_elo_diff: highest_elo - @previous_highest_elo,
|
112
|
+
players_elo_diff:,
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def players_in_round
|
117
|
+
@defender_tier + @relegation_tier
|
118
|
+
end
|
119
|
+
|
120
|
+
def highest_elo
|
121
|
+
players_in_round.max_by(&:elo).elo
|
122
|
+
end
|
123
|
+
|
124
|
+
def players_elo_diff
|
125
|
+
players_in_round.map do |player|
|
126
|
+
[player.id, player.elo - @previous_elo[player.id]]
|
127
|
+
end.sort_by { |_, diff| -diff }.to_h
|
128
|
+
end
|
129
|
+
|
130
|
+
def log_observer(log)
|
131
|
+
@total_tokens += log[:total_tokens] if log[:code] == :llm_usage
|
132
|
+
end
|
112
133
|
end
|
113
134
|
end
|
@@ -10,24 +10,28 @@ module ActiveGenie::Ranking
|
|
10
10
|
@players = players
|
11
11
|
@criteria = criteria
|
12
12
|
@config = config
|
13
|
+
@start_time = Time.now
|
14
|
+
@total_tokens = 0
|
13
15
|
end
|
14
16
|
|
15
17
|
def call
|
16
|
-
ActiveGenie::Logger.with_context(log_context) do
|
17
|
-
matches.each do |
|
18
|
-
winner, loser = battle(
|
18
|
+
ActiveGenie::Logger.with_context(log_context, observer: method(:log_observer)) do
|
19
|
+
matches.each do |player_1, player_2|
|
20
|
+
winner, loser = battle(player_1, player_2)
|
19
21
|
|
20
22
|
if winner.nil? || loser.nil?
|
21
|
-
|
22
|
-
|
23
|
+
player_1.draw!
|
24
|
+
player_2.draw!
|
23
25
|
else
|
24
26
|
winner.win!
|
25
27
|
loser.lose!
|
26
28
|
end
|
27
29
|
end
|
28
|
-
|
29
|
-
# TODO: add a freeForAll report. Duration, Elo changes, etc.
|
30
30
|
end
|
31
|
+
|
32
|
+
ActiveGenie::Logger.info({ code: :free_for_all_report, **report })
|
33
|
+
|
34
|
+
report
|
31
35
|
end
|
32
36
|
|
33
37
|
private
|
@@ -38,23 +42,23 @@ module ActiveGenie::Ranking
|
|
38
42
|
@players.eligible.combination(2).to_a
|
39
43
|
end
|
40
44
|
|
41
|
-
def battle(
|
45
|
+
def battle(player_1, player_2)
|
42
46
|
result = ActiveGenie::Battle.basic(
|
43
|
-
|
44
|
-
|
47
|
+
player_1.content,
|
48
|
+
player_2.content,
|
45
49
|
@criteria,
|
46
50
|
config: @config
|
47
51
|
)
|
48
52
|
|
49
53
|
winner, loser = case result['winner']
|
50
|
-
when '
|
51
|
-
when '
|
54
|
+
when 'player_1' then [player_1, player_2, result['reasoning']]
|
55
|
+
when 'player_2' then [player_2, player_1, result['reasoning']]
|
52
56
|
when 'draw' then [nil, nil, result['reasoning']]
|
53
57
|
end
|
54
58
|
|
55
59
|
ActiveGenie::Logger.debug({
|
56
|
-
|
57
|
-
player_ids: [
|
60
|
+
code: :free_for_all_battle,
|
61
|
+
player_ids: [player_1.id, player_2.id],
|
58
62
|
winner_id: winner&.id,
|
59
63
|
loser_id: loser&.id,
|
60
64
|
reasoning: result['reasoning']
|
@@ -72,5 +76,18 @@ module ActiveGenie::Ranking
|
|
72
76
|
ranking_unique_key = [eligible_ids, @criteria, @config.to_json].join('-')
|
73
77
|
Digest::MD5.hexdigest(ranking_unique_key)
|
74
78
|
end
|
79
|
+
|
80
|
+
def report
|
81
|
+
{
|
82
|
+
free_for_all_id:,
|
83
|
+
battles_count: matches.size,
|
84
|
+
duration_seconds: Time.now - @start_time,
|
85
|
+
total_tokens: @total_tokens,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def log_observer(log)
|
90
|
+
@total_tokens += log[:total_tokens] if log[:code] == :llm_usage
|
91
|
+
end
|
75
92
|
end
|
76
93
|
end
|
@@ -21,39 +21,34 @@ module ActiveGenie::Ranking
|
|
21
21
|
attr_accessor :rank
|
22
22
|
|
23
23
|
def score=(value)
|
24
|
+
ActiveGenie::Logger.debug({ code: :new_score, player_id: id, score: value }) if value != @score
|
24
25
|
@score = value
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def elo
|
29
|
-
generate_elo_by_score if @elo.nil?
|
30
|
-
|
31
|
-
@elo
|
26
|
+
@elo = generate_elo_by_score
|
32
27
|
end
|
33
28
|
|
34
29
|
def elo=(value)
|
30
|
+
ActiveGenie::Logger.debug({ code: :new_elo, player_id: id, elo: value }) if value != @elo
|
35
31
|
@elo = value
|
36
|
-
ActiveGenie::Logger.debug({ step: :new_elo, player_id: id, elo: value })
|
37
32
|
end
|
38
33
|
|
39
34
|
def eliminated=(value)
|
35
|
+
ActiveGenie::Logger.debug({ code: :new_eliminated, player_id: id, eliminated: value }) if value != @eliminated
|
40
36
|
@eliminated = value
|
41
|
-
ActiveGenie::Logger.debug({ step: :new_eliminated, player_id: id, eliminated: value })
|
42
37
|
end
|
43
38
|
|
44
39
|
def draw!
|
45
40
|
@ffa_draw_count += 1
|
46
|
-
ActiveGenie::Logger.debug({
|
41
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'draw', ffa_score: })
|
47
42
|
end
|
48
43
|
|
49
44
|
def win!
|
50
45
|
@ffa_win_count += 1
|
51
|
-
ActiveGenie::Logger.debug({
|
46
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'win', ffa_score: })
|
52
47
|
end
|
53
48
|
|
54
49
|
def lose!
|
55
50
|
@ffa_lose_count += 1
|
56
|
-
ActiveGenie::Logger.debug({
|
51
|
+
ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'lose', ffa_score: })
|
57
52
|
end
|
58
53
|
|
59
54
|
def ffa_score
|
@@ -86,12 +81,12 @@ module ActiveGenie::Ranking
|
|
86
81
|
method_name == :[] || super
|
87
82
|
end
|
88
83
|
|
84
|
+
def generate_elo_by_score
|
85
|
+
BASE_ELO + ((@score || 0) - 50)
|
86
|
+
end
|
87
|
+
|
89
88
|
private
|
90
89
|
|
91
90
|
BASE_ELO = 1000
|
92
|
-
|
93
|
-
def generate_elo_by_score
|
94
|
-
@elo = BASE_ELO + ((@score || 0) - 50)
|
95
|
-
end
|
96
91
|
end
|
97
92
|
end
|
@@ -41,9 +41,9 @@ module ActiveGenie::Ranking
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def sorted
|
44
|
-
@players.sort_by { |p| [-p.ffa_score, -(p.elo || 0), -(p.score || 0)] }
|
45
|
-
|
46
|
-
|
44
|
+
sorted_players = @players.sort_by { |p| [-p.ffa_score, -(p.elo || 0), -(p.score || 0)] }
|
45
|
+
sorted_players.each_with_index { |p, i| p.rank = i + 1 }
|
46
|
+
sorted_players
|
47
47
|
end
|
48
48
|
|
49
49
|
def to_h
|
@@ -57,7 +57,7 @@ module ActiveGenie::Ranking
|
|
57
57
|
private
|
58
58
|
|
59
59
|
def build(param_players)
|
60
|
-
param_players.map { |
|
60
|
+
param_players.map { |p| Player.new(p) }
|
61
61
|
end
|
62
62
|
|
63
63
|
# Returns the number of players to battle in each round
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require_relative '../concerns/loggable'
|
1
2
|
require_relative './players_collection'
|
2
3
|
require_relative './free_for_all'
|
3
4
|
require_relative './elo_round'
|
@@ -28,39 +29,55 @@ require_relative './ranking_scoring'
|
|
28
29
|
# @return [Hash] Final ranked player results
|
29
30
|
module ActiveGenie::Ranking
|
30
31
|
class Ranking
|
32
|
+
include ActiveGenie::Concerns::Loggable
|
33
|
+
|
31
34
|
def self.call(...)
|
32
35
|
new(...).call
|
33
36
|
end
|
34
37
|
|
35
38
|
def initialize(param_players, criteria, reviewers: [], config: {})
|
36
|
-
@param_players = param_players
|
37
39
|
@criteria = criteria
|
38
40
|
@reviewers = Array(reviewers).compact.uniq
|
39
41
|
@config = ActiveGenie::Configuration.to_h(config)
|
40
|
-
@players =
|
42
|
+
@players = PlayersCollection.new(param_players)
|
43
|
+
@elo_rounds_played = 0
|
44
|
+
@elo_round_battle_count = 0
|
45
|
+
@free_for_all_battle_count = 0
|
46
|
+
@total_tokens = 0
|
47
|
+
@start_time = Time.now
|
41
48
|
end
|
42
49
|
|
43
50
|
def call
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
run_free_for_all!
|
51
|
+
initial_log
|
52
|
+
|
53
|
+
set_initial_player_scores!
|
54
|
+
eliminate_obvious_bad_players!
|
55
|
+
|
56
|
+
while @players.elo_eligible?
|
57
|
+
elo_report = run_elo_round!
|
58
|
+
eliminate_relegation_players!
|
59
|
+
rebalance_players!(elo_report)
|
56
60
|
end
|
57
61
|
|
62
|
+
run_free_for_all!
|
63
|
+
final_logs
|
64
|
+
|
58
65
|
@players.sorted
|
59
66
|
end
|
60
67
|
|
61
68
|
private
|
62
69
|
|
63
|
-
SCORE_VARIATION_THRESHOLD =
|
70
|
+
SCORE_VARIATION_THRESHOLD = 15
|
71
|
+
ELIMINATION_VARIATION = 'variation_too_high'
|
72
|
+
ELIMINATION_RELEGATION = 'relegation_tier'
|
73
|
+
|
74
|
+
with_logging_context :log_context, ->(log) {
|
75
|
+
@total_tokens += log[:total_tokens] || 0 if log[:code] == :llm_usage
|
76
|
+
}
|
77
|
+
|
78
|
+
def initial_log
|
79
|
+
@players.each { |p| ActiveGenie::Logger.debug({ code: :new_player, player: p.to_h }) }
|
80
|
+
end
|
64
81
|
|
65
82
|
def set_initial_player_scores!
|
66
83
|
RankingScoring.call(@players, @criteria, reviewers: @reviewers, config: @config)
|
@@ -68,20 +85,58 @@ module ActiveGenie::Ranking
|
|
68
85
|
|
69
86
|
def eliminate_obvious_bad_players!
|
70
87
|
while @players.coefficient_of_variation >= SCORE_VARIATION_THRESHOLD
|
71
|
-
@players.eligible.last.eliminated =
|
88
|
+
@players.eligible.last.eliminated = ELIMINATION_VARIATION
|
72
89
|
end
|
73
90
|
end
|
74
91
|
|
75
92
|
def run_elo_round!
|
76
|
-
|
93
|
+
@elo_rounds_played += 1
|
94
|
+
|
95
|
+
elo_report = EloRound.call(@players, @criteria, config: @config)
|
96
|
+
|
97
|
+
@elo_round_battle_count += elo_report[:battles_count]
|
98
|
+
|
99
|
+
elo_report
|
77
100
|
end
|
78
101
|
|
79
102
|
def eliminate_relegation_players!
|
80
|
-
@players.calc_relegation_tier.each { |player| player.eliminated =
|
103
|
+
@players.calc_relegation_tier.each { |player| player.eliminated = ELIMINATION_RELEGATION }
|
104
|
+
end
|
105
|
+
|
106
|
+
def rebalance_players!(elo_report)
|
107
|
+
return if elo_report[:highest_elo_diff].negative?
|
108
|
+
|
109
|
+
@players.eligible.each do |player|
|
110
|
+
next if elo_report[:players_in_round].include?(player.id)
|
111
|
+
|
112
|
+
player.elo += elo_report[:highest_elo_diff]
|
113
|
+
end
|
81
114
|
end
|
82
115
|
|
83
116
|
def run_free_for_all!
|
84
|
-
FreeForAll.call(@players, @criteria, config: @config)
|
117
|
+
ffa_report = FreeForAll.call(@players, @criteria, config: @config)
|
118
|
+
|
119
|
+
@free_for_all_battle_count += ffa_report[:battles_count]
|
120
|
+
end
|
121
|
+
|
122
|
+
def report
|
123
|
+
{
|
124
|
+
ranking_id: ranking_id,
|
125
|
+
players_count: @players.size,
|
126
|
+
variation_too_high: @players.select { |player| player.eliminated == ELIMINATION_VARIATION }.size,
|
127
|
+
elo_rounds_played: @elo_rounds_played,
|
128
|
+
elo_round_battle_count: @elo_round_battle_count,
|
129
|
+
relegation_tier: @players.select { |player| player.eliminated == ELIMINATION_RELEGATION }.size,
|
130
|
+
ffa_round_battle_count: @free_for_all_battle_count,
|
131
|
+
top3: @players.eligible[0..2].map(&:id),
|
132
|
+
total_tokens: @total_tokens,
|
133
|
+
duration_seconds: Time.now - @start_time,
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
def final_logs
|
138
|
+
ActiveGenie::Logger.debug({ code: :ranking_final, players: @players.sorted.map(&:to_h) })
|
139
|
+
ActiveGenie::Logger.info({ code: :ranking, **report })
|
85
140
|
end
|
86
141
|
|
87
142
|
def log_context
|
@@ -31,14 +31,14 @@ module ActiveGenie::Ranking
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def generate_score(player)
|
34
|
-
score, reasoning = ActiveGenie::Scoring
|
34
|
+
score, reasoning = ActiveGenie::Scoring.call(
|
35
35
|
player.content,
|
36
36
|
@criteria,
|
37
37
|
@reviewers,
|
38
38
|
config: @config
|
39
39
|
).values_at('final_score', 'final_reasoning')
|
40
40
|
|
41
|
-
ActiveGenie::Logger.debug({
|
41
|
+
ActiveGenie::Logger.debug({ code: :new_score, player_id: player.id, score:, reasoning: })
|
42
42
|
|
43
43
|
score
|
44
44
|
end
|
@@ -52,7 +52,7 @@ module ActiveGenie::Ranking
|
|
52
52
|
config: @config
|
53
53
|
).values_at('reviewer1', 'reviewer2', 'reviewer3')
|
54
54
|
|
55
|
-
ActiveGenie::Logger.debug({
|
55
|
+
ActiveGenie::Logger.debug({ code: :new_reviewers, reviewers: [reviewer1, reviewer2, reviewer3] })
|
56
56
|
|
57
57
|
[reviewer1, reviewer2, reviewer3]
|
58
58
|
end
|
@@ -43,6 +43,42 @@ module ActiveGenie::Scoring
|
|
43
43
|
{ role: 'user', content: "Text to score: #{@text}" },
|
44
44
|
]
|
45
45
|
|
46
|
+
properties = build_properties
|
47
|
+
|
48
|
+
function = {
|
49
|
+
name: 'scoring',
|
50
|
+
description: 'Score the text based on the given criteria.',
|
51
|
+
parameters: {
|
52
|
+
type: "object",
|
53
|
+
properties:,
|
54
|
+
required: properties.keys
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
result = ::ActiveGenie::Clients::UnifiedClient.function_calling(
|
59
|
+
messages,
|
60
|
+
function,
|
61
|
+
model_tier: 'lower_tier',
|
62
|
+
config: @config
|
63
|
+
)
|
64
|
+
|
65
|
+
result['final_score'] = 0 if result['final_score'].nil?
|
66
|
+
|
67
|
+
ActiveGenie::Logger.debug({
|
68
|
+
code: :scoring,
|
69
|
+
text: @text[0..30],
|
70
|
+
criteria: @criteria[0..30],
|
71
|
+
reviewers: get_or_recommend_reviewers,
|
72
|
+
score: result['final_score'],
|
73
|
+
reasoning: result['final_reasoning']
|
74
|
+
})
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def build_properties
|
46
82
|
properties = {}
|
47
83
|
get_or_recommend_reviewers.each do |reviewer|
|
48
84
|
properties["#{reviewer}_reasoning"] = {
|
@@ -57,35 +93,18 @@ module ActiveGenie::Scoring
|
|
57
93
|
}
|
58
94
|
end
|
59
95
|
|
60
|
-
|
61
|
-
|
62
|
-
description: '
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
final_score: {
|
68
|
-
type: 'number',
|
69
|
-
description: 'The final score based on the previous reviewers',
|
70
|
-
},
|
71
|
-
final_reasoning: {
|
72
|
-
type: 'string',
|
73
|
-
description: 'The final reasoning based on the previous reviewers',
|
74
|
-
}
|
75
|
-
}
|
76
|
-
}
|
96
|
+
properties[:final_score] = {
|
97
|
+
type: 'number',
|
98
|
+
description: 'The final score based on the previous reviewers',
|
99
|
+
}
|
100
|
+
properties[:final_reasoning] = {
|
101
|
+
type: 'string',
|
102
|
+
description: 'The final reasoning based on the previous reviewers',
|
77
103
|
}
|
78
104
|
|
79
|
-
|
80
|
-
messages,
|
81
|
-
function,
|
82
|
-
model_tier: 'lower_tier',
|
83
|
-
config: @config
|
84
|
-
)
|
105
|
+
properties
|
85
106
|
end
|
86
107
|
|
87
|
-
private
|
88
|
-
|
89
108
|
def get_or_recommend_reviewers
|
90
109
|
@get_or_recommend_reviewers ||= if @reviewers.count > 0
|
91
110
|
@reviewers
|
data/lib/active_genie/scoring.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
namespace :active_genie do
|
3
|
+
desc "Run benchmarks, optionally for a specific module (e.g., rake active_genie:benchmark[data_extractor])"
|
4
|
+
task :benchmark, [:module_name] do |_, args|
|
5
|
+
Rake::TestTask.new(:run_benchmarks) do |t|
|
6
|
+
t.libs << "benchmark"
|
7
|
+
|
8
|
+
if args[:module_name]
|
9
|
+
module_name = args[:module_name]
|
10
|
+
module_path = "benchmark/test_cases/#{module_name}/"
|
11
|
+
t.test_files = FileList["#{module_path}**/*_test.rb"]
|
12
|
+
puts "Running benchmarks for module: #{module_name}"
|
13
|
+
else
|
14
|
+
t.test_files = FileList["benchmark/test_cases/**/*_test.rb"]
|
15
|
+
puts "Running all benchmarks"
|
16
|
+
end
|
17
|
+
|
18
|
+
t.warning = false
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
Rake::Task[:run_benchmarks].invoke
|
23
|
+
rescue => e
|
24
|
+
puts e
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|