active_genie 0.0.12 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +65 -22
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/basic.rb +48 -32
- data/lib/active_genie/battle.rb +4 -0
- data/lib/active_genie/clients/anthropic_client.rb +84 -0
- data/lib/active_genie/clients/base_client.rb +241 -0
- data/lib/active_genie/clients/google_client.rb +135 -0
- data/lib/active_genie/clients/helpers/retry.rb +29 -0
- data/lib/active_genie/clients/openai_client.rb +70 -91
- data/lib/active_genie/clients/unified_client.rb +4 -4
- data/lib/active_genie/concerns/loggable.rb +44 -0
- data/lib/active_genie/configuration/log_config.rb +1 -1
- data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
- data/lib/active_genie/configuration/providers/base_config.rb +85 -0
- data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
- data/lib/active_genie/configuration/providers/google_config.rb +56 -0
- data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
- data/lib/active_genie/configuration/providers_config.rb +7 -4
- data/lib/active_genie/configuration/runtime_config.rb +35 -0
- data/lib/active_genie/configuration.rb +18 -4
- data/lib/active_genie/data_extractor/basic.rb +16 -3
- data/lib/active_genie/data_extractor.rb +4 -0
- data/lib/active_genie/logger.rb +40 -21
- data/lib/active_genie/ranking/elo_round.rb +71 -50
- data/lib/active_genie/ranking/free_for_all.rb +31 -14
- data/lib/active_genie/ranking/player.rb +11 -16
- data/lib/active_genie/ranking/players_collection.rb +4 -4
- data/lib/active_genie/ranking/ranking.rb +74 -19
- data/lib/active_genie/ranking/ranking_scoring.rb +3 -3
- data/lib/active_genie/scoring/basic.rb +44 -25
- data/lib/active_genie/scoring/recommended_reviewers.rb +1 -1
- data/lib/active_genie/scoring.rb +3 -0
- data/lib/tasks/benchmark.rake +27 -0
- metadata +92 -70
- data/lib/active_genie/configuration/openai_config.rb +0 -56
| @@ -13,32 +13,39 @@ module ActiveGenie::Ranking | |
| 13 13 | 
             
                  @criteria = criteria
         | 
| 14 14 | 
             
                  @config = config
         | 
| 15 15 | 
             
                  @tmp_defenders = []
         | 
| 16 | 
            +
                  @start_time = Time.now
         | 
| 17 | 
            +
                  @total_tokens = 0
         | 
| 18 | 
            +
                  @previous_elo = {}
         | 
| 19 | 
            +
                  @previous_highest_elo = @defender_tier.max_by(&:elo).elo
         | 
| 16 20 | 
             
                end
         | 
| 17 21 |  | 
| 18 22 | 
             
                def call
         | 
| 19 23 | 
             
                  ActiveGenie::Logger.with_context(log_context) do
         | 
| 20 | 
            -
                     | 
| 24 | 
            +
                    save_previous_elo
         | 
| 25 | 
            +
                    matches.each do |player_1, player_2|
         | 
| 21 26 | 
             
                      # TODO: battle can take a while, can be parallelized
         | 
| 22 | 
            -
                      winner, loser = battle( | 
| 23 | 
            -
             | 
| 27 | 
            +
                      winner, loser = battle(player_1, player_2)
         | 
| 24 28 | 
             
                      next if winner.nil? || loser.nil?
         | 
| 25 29 |  | 
| 26 | 
            -
                       | 
| 27 | 
            -
             | 
| 28 | 
            -
                      winner.elo = new_winner_elo
         | 
| 29 | 
            -
                      loser.elo = new_loser_elo
         | 
| 30 | 
            +
                      winner.elo = calculate_new_elo(winner.elo, loser.elo, 1)
         | 
| 31 | 
            +
                      loser.elo = calculate_new_elo(loser.elo, winner.elo, 0)
         | 
| 30 32 | 
             
                    end
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                    # TODO: add a round report. Duration, Elo changes, etc.
         | 
| 33 33 | 
             
                  end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  ActiveGenie::Logger.info({ code: :elo_round_report, **report })
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  report
         | 
| 34 38 | 
             
                end
         | 
| 35 39 |  | 
| 36 40 | 
             
                private
         | 
| 37 41 |  | 
| 38 42 | 
             
                BATTLE_PER_PLAYER = 3
         | 
| 39 | 
            -
                LOSE_PENALTY = 15
         | 
| 40 43 | 
             
                K = 32
         | 
| 41 44 |  | 
| 45 | 
            +
                def save_previous_elo
         | 
| 46 | 
            +
                  @previous_elo = @players.map { |player| [player.id, player.elo] }.to_h
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
             | 
| 42 49 | 
             
                def matches
         | 
| 43 50 | 
             
                  @relegation_tier.reduce([]) do |matches, attack_player|
         | 
| 44 51 | 
             
                    BATTLE_PER_PLAYER.times do
         | 
| @@ -49,53 +56,35 @@ module ActiveGenie::Ranking | |
| 49 56 | 
             
                end
         | 
| 50 57 |  | 
| 51 58 | 
             
                def next_defense_player
         | 
| 52 | 
            -
                  @tmp_defenders = @defender_tier if @tmp_defenders.size.zero?
         | 
| 59 | 
            +
                  @tmp_defenders = @defender_tier.shuffle if @tmp_defenders.size.zero?
         | 
| 53 60 |  | 
| 54 | 
            -
                  @tmp_defenders. | 
| 61 | 
            +
                  @tmp_defenders.pop
         | 
| 55 62 | 
             
                end
         | 
| 56 63 |  | 
| 57 | 
            -
                def battle( | 
| 58 | 
            -
                   | 
| 59 | 
            -
                     | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
                     | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
                  ActiveGenie::Logger.debug({
         | 
| 72 | 
            -
                    step: :elo_round_battle,
         | 
| 73 | 
            -
                    player_ids: [player_a.id, player_b.id],
         | 
| 74 | 
            -
                    winner_id: winner&.id,
         | 
| 75 | 
            -
                    loser_id: loser&.id,
         | 
| 76 | 
            -
                    reasoning: result['reasoning']
         | 
| 77 | 
            -
                  })
         | 
| 64 | 
            +
                def battle(player_1, player_2)
         | 
| 65 | 
            +
                  ActiveGenie::Logger.with_context({ player_1_id: player_1.id, player_2_id: player_2.id }) do
         | 
| 66 | 
            +
                    result = ActiveGenie::Battle.basic(
         | 
| 67 | 
            +
                      player_1.content,
         | 
| 68 | 
            +
                      player_2.content,
         | 
| 69 | 
            +
                      @criteria,
         | 
| 70 | 
            +
                      config: @config
         | 
| 71 | 
            +
                    )
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    winner, loser = case result['winner']
         | 
| 74 | 
            +
                      when 'player_1' then [player_1, player_2]
         | 
| 75 | 
            +
                      when 'player_2' then [player_2, player_1]
         | 
| 76 | 
            +
                      when 'draw' then [nil, nil]
         | 
| 77 | 
            +
                    end
         | 
| 78 78 |  | 
| 79 | 
            -
             | 
| 79 | 
            +
                    [winner, loser]
         | 
| 80 | 
            +
                  end
         | 
| 80 81 | 
             
                end
         | 
| 81 82 |  | 
| 82 83 | 
             
                # INFO: Read more about the Elo rating system on https://en.wikipedia.org/wiki/Elo_rating_system
         | 
| 83 | 
            -
                def calculate_new_elo( | 
| 84 | 
            -
                   | 
| 85 | 
            -
                   | 
| 86 | 
            -
             | 
| 87 | 
            -
                  new_winner_elo = [winner_elo + K * (1 - expected_score_a), max_defense_elo].min
         | 
| 88 | 
            -
                  new_loser_elo = [loser_elo + K * (1 - expected_score_b) - LOSE_PENALTY, min_relegation_elo].max
         | 
| 89 | 
            -
             | 
| 90 | 
            -
                  [new_winner_elo, new_loser_elo]
         | 
| 91 | 
            -
                end
         | 
| 92 | 
            -
             | 
| 93 | 
            -
                def max_defense_elo
         | 
| 94 | 
            -
                  @defender_tier.max_by(&:elo).elo
         | 
| 95 | 
            -
                end
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                def min_relegation_elo
         | 
| 98 | 
            -
                  @relegation_tier.min_by(&:elo).elo
         | 
| 84 | 
            +
                def calculate_new_elo(player_rating, opponent_rating, score)
         | 
| 85 | 
            +
                  expected_score = 1.0 / (1.0 + 10.0 ** ((opponent_rating - player_rating) / 400.0))
         | 
| 86 | 
            +
                  
         | 
| 87 | 
            +
                  player_rating + (K * (score - expected_score)).round
         | 
| 99 88 | 
             
                end
         | 
| 100 89 |  | 
| 101 90 | 
             
                def log_context
         | 
| @@ -109,5 +98,37 @@ module ActiveGenie::Ranking | |
| 109 98 | 
             
                  ranking_unique_key = [relegation_tier_ids, defender_tier_ids, @criteria, @config.to_json].join('-')
         | 
| 110 99 | 
             
                  Digest::MD5.hexdigest(ranking_unique_key)
         | 
| 111 100 | 
             
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def report
         | 
| 103 | 
            +
                  {
         | 
| 104 | 
            +
                    elo_round_id:,
         | 
| 105 | 
            +
                    players_in_round: players_in_round.map(&:id),
         | 
| 106 | 
            +
                    battles_count: matches.size,
         | 
| 107 | 
            +
                    duration_seconds: Time.now - @start_time,
         | 
| 108 | 
            +
                    total_tokens: @total_tokens,
         | 
| 109 | 
            +
                    previous_highest_elo: @previous_highest_elo,
         | 
| 110 | 
            +
                    highest_elo:,
         | 
| 111 | 
            +
                    highest_elo_diff: highest_elo - @previous_highest_elo,
         | 
| 112 | 
            +
                    players_elo_diff:,
         | 
| 113 | 
            +
                  }
         | 
| 114 | 
            +
                end
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                def players_in_round
         | 
| 117 | 
            +
                  @defender_tier + @relegation_tier
         | 
| 118 | 
            +
                end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                def highest_elo
         | 
| 121 | 
            +
                  players_in_round.max_by(&:elo).elo
         | 
| 122 | 
            +
                end
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                def players_elo_diff
         | 
| 125 | 
            +
                  players_in_round.map do |player|
         | 
| 126 | 
            +
                    [player.id, player.elo - @previous_elo[player.id]]
         | 
| 127 | 
            +
                  end.sort_by { |_, diff| -diff }.to_h
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                def log_observer(log)
         | 
| 131 | 
            +
                  @total_tokens += log[:total_tokens] if log[:code] == :llm_usage
         | 
| 132 | 
            +
                end
         | 
| 112 133 | 
             
              end
         | 
| 113 134 | 
             
            end
         | 
| @@ -10,24 +10,28 @@ module ActiveGenie::Ranking | |
| 10 10 | 
             
                  @players = players
         | 
| 11 11 | 
             
                  @criteria = criteria
         | 
| 12 12 | 
             
                  @config = config
         | 
| 13 | 
            +
                  @start_time = Time.now
         | 
| 14 | 
            +
                  @total_tokens = 0
         | 
| 13 15 | 
             
                end
         | 
| 14 16 |  | 
| 15 17 | 
             
                def call
         | 
| 16 | 
            -
                  ActiveGenie::Logger.with_context(log_context) do
         | 
| 17 | 
            -
                    matches.each do | | 
| 18 | 
            -
                      winner, loser = battle( | 
| 18 | 
            +
                  ActiveGenie::Logger.with_context(log_context, observer: method(:log_observer)) do
         | 
| 19 | 
            +
                    matches.each do |player_1, player_2|
         | 
| 20 | 
            +
                      winner, loser = battle(player_1, player_2)
         | 
| 19 21 |  | 
| 20 22 | 
             
                      if winner.nil? || loser.nil?
         | 
| 21 | 
            -
                         | 
| 22 | 
            -
                         | 
| 23 | 
            +
                        player_1.draw!
         | 
| 24 | 
            +
                        player_2.draw!
         | 
| 23 25 | 
             
                      else
         | 
| 24 26 | 
             
                        winner.win!
         | 
| 25 27 | 
             
                        loser.lose!
         | 
| 26 28 | 
             
                      end
         | 
| 27 29 | 
             
                    end
         | 
| 28 | 
            -
             | 
| 29 | 
            -
                    # TODO: add a freeForAll report. Duration, Elo changes, etc.
         | 
| 30 30 | 
             
                  end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  ActiveGenie::Logger.info({ code: :free_for_all_report, **report })
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  report
         | 
| 31 35 | 
             
                end
         | 
| 32 36 |  | 
| 33 37 | 
             
                private
         | 
| @@ -38,23 +42,23 @@ module ActiveGenie::Ranking | |
| 38 42 | 
             
                  @players.eligible.combination(2).to_a
         | 
| 39 43 | 
             
                end
         | 
| 40 44 |  | 
| 41 | 
            -
                def battle( | 
| 45 | 
            +
                def battle(player_1, player_2)
         | 
| 42 46 | 
             
                  result = ActiveGenie::Battle.basic(
         | 
| 43 | 
            -
                     | 
| 44 | 
            -
                     | 
| 47 | 
            +
                    player_1.content,
         | 
| 48 | 
            +
                    player_2.content,
         | 
| 45 49 | 
             
                    @criteria,
         | 
| 46 50 | 
             
                    config: @config
         | 
| 47 51 | 
             
                  )
         | 
| 48 52 |  | 
| 49 53 | 
             
                  winner, loser = case result['winner']
         | 
| 50 | 
            -
                    when ' | 
| 51 | 
            -
                    when ' | 
| 54 | 
            +
                    when 'player_1' then [player_1, player_2, result['reasoning']]
         | 
| 55 | 
            +
                    when 'player_2' then [player_2, player_1, result['reasoning']]
         | 
| 52 56 | 
             
                    when 'draw' then [nil, nil, result['reasoning']]
         | 
| 53 57 | 
             
                  end
         | 
| 54 58 |  | 
| 55 59 | 
             
                  ActiveGenie::Logger.debug({
         | 
| 56 | 
            -
                     | 
| 57 | 
            -
                    player_ids: [ | 
| 60 | 
            +
                    code: :free_for_all_battle,
         | 
| 61 | 
            +
                    player_ids: [player_1.id, player_2.id],
         | 
| 58 62 | 
             
                    winner_id: winner&.id,
         | 
| 59 63 | 
             
                    loser_id: loser&.id,
         | 
| 60 64 | 
             
                    reasoning: result['reasoning']
         | 
| @@ -72,5 +76,18 @@ module ActiveGenie::Ranking | |
| 72 76 | 
             
                  ranking_unique_key = [eligible_ids, @criteria, @config.to_json].join('-')
         | 
| 73 77 | 
             
                  Digest::MD5.hexdigest(ranking_unique_key)
         | 
| 74 78 | 
             
                end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def report
         | 
| 81 | 
            +
                  {
         | 
| 82 | 
            +
                    free_for_all_id:,
         | 
| 83 | 
            +
                    battles_count: matches.size,
         | 
| 84 | 
            +
                    duration_seconds: Time.now - @start_time,
         | 
| 85 | 
            +
                    total_tokens: @total_tokens,
         | 
| 86 | 
            +
                  }
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def log_observer(log)
         | 
| 90 | 
            +
                  @total_tokens += log[:total_tokens] if log[:code] == :llm_usage
         | 
| 91 | 
            +
                end
         | 
| 75 92 | 
             
              end
         | 
| 76 93 | 
             
            end
         | 
| @@ -21,39 +21,34 @@ module ActiveGenie::Ranking | |
| 21 21 | 
             
                attr_accessor :rank
         | 
| 22 22 |  | 
| 23 23 | 
             
                def score=(value)
         | 
| 24 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_score, player_id: id, score: value }) if value != @score
         | 
| 24 25 | 
             
                  @score = value
         | 
| 25 | 
            -
                   | 
| 26 | 
            -
                end
         | 
| 27 | 
            -
             | 
| 28 | 
            -
                def elo
         | 
| 29 | 
            -
                  generate_elo_by_score if @elo.nil?
         | 
| 30 | 
            -
             | 
| 31 | 
            -
                  @elo
         | 
| 26 | 
            +
                  @elo = generate_elo_by_score
         | 
| 32 27 | 
             
                end
         | 
| 33 28 |  | 
| 34 29 | 
             
                def elo=(value)
         | 
| 30 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_elo, player_id: id, elo: value }) if value != @elo
         | 
| 35 31 | 
             
                  @elo = value
         | 
| 36 | 
            -
                  ActiveGenie::Logger.debug({ step: :new_elo, player_id: id, elo: value })
         | 
| 37 32 | 
             
                end
         | 
| 38 33 |  | 
| 39 34 | 
             
                def eliminated=(value)
         | 
| 35 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_eliminated, player_id: id, eliminated: value }) if value != @eliminated
         | 
| 40 36 | 
             
                  @eliminated = value
         | 
| 41 | 
            -
                  ActiveGenie::Logger.debug({ step: :new_eliminated, player_id: id, eliminated: value })
         | 
| 42 37 | 
             
                end
         | 
| 43 38 |  | 
| 44 39 | 
             
                def draw!
         | 
| 45 40 | 
             
                  @ffa_draw_count += 1
         | 
| 46 | 
            -
                  ActiveGenie::Logger.debug({  | 
| 41 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'draw', ffa_score: })
         | 
| 47 42 | 
             
                end
         | 
| 48 43 |  | 
| 49 44 | 
             
                def win!
         | 
| 50 45 | 
             
                  @ffa_win_count += 1
         | 
| 51 | 
            -
                  ActiveGenie::Logger.debug({  | 
| 46 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'win', ffa_score: })
         | 
| 52 47 | 
             
                end
         | 
| 53 48 |  | 
| 54 49 | 
             
                def lose!
         | 
| 55 50 | 
             
                  @ffa_lose_count += 1
         | 
| 56 | 
            -
                  ActiveGenie::Logger.debug({  | 
| 51 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_ffa_score, player_id: id, result: 'lose', ffa_score: })
         | 
| 57 52 | 
             
                end
         | 
| 58 53 |  | 
| 59 54 | 
             
                def ffa_score
         | 
| @@ -86,12 +81,12 @@ module ActiveGenie::Ranking | |
| 86 81 | 
             
                  method_name == :[] || super
         | 
| 87 82 | 
             
                end
         | 
| 88 83 |  | 
| 84 | 
            +
                def generate_elo_by_score
         | 
| 85 | 
            +
                  BASE_ELO + ((@score || 0) - 50)
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 89 88 | 
             
                private
         | 
| 90 89 |  | 
| 91 90 | 
             
                BASE_ELO = 1000
         | 
| 92 | 
            -
             | 
| 93 | 
            -
                def generate_elo_by_score
         | 
| 94 | 
            -
                  @elo = BASE_ELO + ((@score || 0) - 50)
         | 
| 95 | 
            -
                end
         | 
| 96 91 | 
             
              end
         | 
| 97 92 | 
             
            end
         | 
| @@ -41,9 +41,9 @@ module ActiveGenie::Ranking | |
| 41 41 | 
             
                end
         | 
| 42 42 |  | 
| 43 43 | 
             
                def sorted
         | 
| 44 | 
            -
                  @players.sort_by { |p| [-p.ffa_score, -(p.elo || 0), -(p.score || 0)] }
         | 
| 45 | 
            -
                   | 
| 46 | 
            -
                   | 
| 44 | 
            +
                  sorted_players = @players.sort_by { |p| [-p.ffa_score, -(p.elo || 0), -(p.score || 0)] }
         | 
| 45 | 
            +
                  sorted_players.each_with_index { |p, i| p.rank = i + 1 }
         | 
| 46 | 
            +
                  sorted_players
         | 
| 47 47 | 
             
                end
         | 
| 48 48 |  | 
| 49 49 | 
             
                def to_h
         | 
| @@ -57,7 +57,7 @@ module ActiveGenie::Ranking | |
| 57 57 | 
             
                private
         | 
| 58 58 |  | 
| 59 59 | 
             
                def build(param_players)
         | 
| 60 | 
            -
                  param_players.map { | | 
| 60 | 
            +
                  param_players.map { |p| Player.new(p) }
         | 
| 61 61 | 
             
                end
         | 
| 62 62 |  | 
| 63 63 | 
             
                # Returns the number of players to battle in each round
         | 
| @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            require_relative '../concerns/loggable'
         | 
| 1 2 | 
             
            require_relative './players_collection'
         | 
| 2 3 | 
             
            require_relative './free_for_all'
         | 
| 3 4 | 
             
            require_relative './elo_round'
         | 
| @@ -28,39 +29,55 @@ require_relative './ranking_scoring' | |
| 28 29 | 
             
            # @return [Hash] Final ranked player results
         | 
| 29 30 | 
             
            module ActiveGenie::Ranking
         | 
| 30 31 | 
             
              class Ranking
         | 
| 32 | 
            +
                include ActiveGenie::Concerns::Loggable
         | 
| 33 | 
            +
             | 
| 31 34 | 
             
                def self.call(...)
         | 
| 32 35 | 
             
                  new(...).call
         | 
| 33 36 | 
             
                end
         | 
| 34 37 |  | 
| 35 38 | 
             
                def initialize(param_players, criteria, reviewers: [], config: {})
         | 
| 36 | 
            -
                  @param_players = param_players
         | 
| 37 39 | 
             
                  @criteria = criteria
         | 
| 38 40 | 
             
                  @reviewers = Array(reviewers).compact.uniq
         | 
| 39 41 | 
             
                  @config = ActiveGenie::Configuration.to_h(config)
         | 
| 40 | 
            -
                  @players =  | 
| 42 | 
            +
                  @players = PlayersCollection.new(param_players)
         | 
| 43 | 
            +
                  @elo_rounds_played = 0
         | 
| 44 | 
            +
                  @elo_round_battle_count = 0
         | 
| 45 | 
            +
                  @free_for_all_battle_count = 0
         | 
| 46 | 
            +
                  @total_tokens = 0
         | 
| 47 | 
            +
                  @start_time = Time.now
         | 
| 41 48 | 
             
                end
         | 
| 42 49 |  | 
| 43 50 | 
             
                def call
         | 
| 44 | 
            -
                   | 
| 45 | 
            -
             | 
| 46 | 
            -
                   | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
                     | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
                    end
         | 
| 54 | 
            -
              
         | 
| 55 | 
            -
                    run_free_for_all!
         | 
| 51 | 
            +
                  initial_log
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  set_initial_player_scores!
         | 
| 54 | 
            +
                  eliminate_obvious_bad_players!
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  while @players.elo_eligible?
         | 
| 57 | 
            +
                    elo_report = run_elo_round!
         | 
| 58 | 
            +
                    eliminate_relegation_players!
         | 
| 59 | 
            +
                    rebalance_players!(elo_report)
         | 
| 56 60 | 
             
                  end
         | 
| 57 61 |  | 
| 62 | 
            +
                  run_free_for_all!
         | 
| 63 | 
            +
                  final_logs
         | 
| 64 | 
            +
             | 
| 58 65 | 
             
                  @players.sorted
         | 
| 59 66 | 
             
                end
         | 
| 60 67 |  | 
| 61 68 | 
             
                private
         | 
| 62 69 |  | 
| 63 | 
            -
                SCORE_VARIATION_THRESHOLD =  | 
| 70 | 
            +
                SCORE_VARIATION_THRESHOLD = 15
         | 
| 71 | 
            +
                ELIMINATION_VARIATION = 'variation_too_high'
         | 
| 72 | 
            +
                ELIMINATION_RELEGATION = 'relegation_tier'
         | 
| 73 | 
            +
                
         | 
| 74 | 
            +
                with_logging_context :log_context, ->(log) { 
         | 
| 75 | 
            +
                  @total_tokens += log[:total_tokens] || 0 if log[:code] == :llm_usage
         | 
| 76 | 
            +
                }
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                def initial_log
         | 
| 79 | 
            +
                  @players.each { |p| ActiveGenie::Logger.debug({ code: :new_player, player: p.to_h }) }
         | 
| 80 | 
            +
                end
         | 
| 64 81 |  | 
| 65 82 | 
             
                def set_initial_player_scores!
         | 
| 66 83 | 
             
                  RankingScoring.call(@players, @criteria, reviewers: @reviewers, config: @config)
         | 
| @@ -68,20 +85,58 @@ module ActiveGenie::Ranking | |
| 68 85 |  | 
| 69 86 | 
             
                def eliminate_obvious_bad_players!
         | 
| 70 87 | 
             
                  while @players.coefficient_of_variation >= SCORE_VARIATION_THRESHOLD
         | 
| 71 | 
            -
                    @players.eligible.last.eliminated =  | 
| 88 | 
            +
                    @players.eligible.last.eliminated = ELIMINATION_VARIATION
         | 
| 72 89 | 
             
                  end
         | 
| 73 90 | 
             
                end
         | 
| 74 91 |  | 
| 75 92 | 
             
                def run_elo_round!
         | 
| 76 | 
            -
                   | 
| 93 | 
            +
                  @elo_rounds_played += 1
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                  elo_report = EloRound.call(@players, @criteria, config: @config)
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                  @elo_round_battle_count += elo_report[:battles_count]
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                  elo_report
         | 
| 77 100 | 
             
                end
         | 
| 78 101 |  | 
| 79 102 | 
             
                def eliminate_relegation_players!
         | 
| 80 | 
            -
                  @players.calc_relegation_tier.each { |player| player.eliminated =  | 
| 103 | 
            +
                  @players.calc_relegation_tier.each { |player| player.eliminated = ELIMINATION_RELEGATION }
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                def rebalance_players!(elo_report)
         | 
| 107 | 
            +
                  return if elo_report[:highest_elo_diff].negative?
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                  @players.eligible.each do |player|
         | 
| 110 | 
            +
                    next if elo_report[:players_in_round].include?(player.id)
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                    player.elo += elo_report[:highest_elo_diff]
         | 
| 113 | 
            +
                  end
         | 
| 81 114 | 
             
                end
         | 
| 82 115 |  | 
| 83 116 | 
             
                def run_free_for_all!
         | 
| 84 | 
            -
                  FreeForAll.call(@players, @criteria, config: @config)
         | 
| 117 | 
            +
                  ffa_report = FreeForAll.call(@players, @criteria, config: @config)
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                  @free_for_all_battle_count += ffa_report[:battles_count]
         | 
| 120 | 
            +
                end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                def report
         | 
| 123 | 
            +
                  {
         | 
| 124 | 
            +
                    ranking_id: ranking_id,
         | 
| 125 | 
            +
                    players_count: @players.size,
         | 
| 126 | 
            +
                    variation_too_high: @players.select { |player| player.eliminated == ELIMINATION_VARIATION }.size,
         | 
| 127 | 
            +
                    elo_rounds_played: @elo_rounds_played,
         | 
| 128 | 
            +
                    elo_round_battle_count: @elo_round_battle_count,
         | 
| 129 | 
            +
                    relegation_tier: @players.select { |player| player.eliminated == ELIMINATION_RELEGATION }.size,
         | 
| 130 | 
            +
                    ffa_round_battle_count: @free_for_all_battle_count,
         | 
| 131 | 
            +
                    top3: @players.eligible[0..2].map(&:id),
         | 
| 132 | 
            +
                    total_tokens: @total_tokens,
         | 
| 133 | 
            +
                    duration_seconds: Time.now - @start_time,
         | 
| 134 | 
            +
                  }
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
                
         | 
| 137 | 
            +
                def final_logs
         | 
| 138 | 
            +
                  ActiveGenie::Logger.debug({ code: :ranking_final, players: @players.sorted.map(&:to_h) })
         | 
| 139 | 
            +
                  ActiveGenie::Logger.info({ code: :ranking, **report })
         | 
| 85 140 | 
             
                end
         | 
| 86 141 |  | 
| 87 142 | 
             
                def log_context
         | 
| @@ -31,14 +31,14 @@ module ActiveGenie::Ranking | |
| 31 31 | 
             
                end
         | 
| 32 32 |  | 
| 33 33 | 
             
                def generate_score(player)
         | 
| 34 | 
            -
                  score, reasoning = ActiveGenie::Scoring | 
| 34 | 
            +
                  score, reasoning = ActiveGenie::Scoring.call(
         | 
| 35 35 | 
             
                    player.content,
         | 
| 36 36 | 
             
                    @criteria,
         | 
| 37 37 | 
             
                    @reviewers,
         | 
| 38 38 | 
             
                    config: @config
         | 
| 39 39 | 
             
                  ).values_at('final_score', 'final_reasoning')
         | 
| 40 40 |  | 
| 41 | 
            -
                  ActiveGenie::Logger.debug({ | 
| 41 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_score, player_id: player.id, score:, reasoning: })
         | 
| 42 42 |  | 
| 43 43 | 
             
                  score
         | 
| 44 44 | 
             
                end
         | 
| @@ -52,7 +52,7 @@ module ActiveGenie::Ranking | |
| 52 52 | 
             
                    config: @config
         | 
| 53 53 | 
             
                  ).values_at('reviewer1', 'reviewer2', 'reviewer3')
         | 
| 54 54 |  | 
| 55 | 
            -
                  ActiveGenie::Logger.debug({ | 
| 55 | 
            +
                  ActiveGenie::Logger.debug({ code: :new_reviewers, reviewers: [reviewer1, reviewer2, reviewer3] })
         | 
| 56 56 |  | 
| 57 57 | 
             
                  [reviewer1, reviewer2, reviewer3]
         | 
| 58 58 | 
             
                end
         | 
| @@ -43,6 +43,42 @@ module ActiveGenie::Scoring | |
| 43 43 | 
             
                    {  role: 'user', content: "Text to score: #{@text}" }, 
         | 
| 44 44 | 
             
                  ]
         | 
| 45 45 |  | 
| 46 | 
            +
                  properties = build_properties
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  function = {
         | 
| 49 | 
            +
                    name: 'scoring',
         | 
| 50 | 
            +
                    description: 'Score the text based on the given criteria.',
         | 
| 51 | 
            +
                    parameters: {
         | 
| 52 | 
            +
                      type: "object",
         | 
| 53 | 
            +
                      properties:,
         | 
| 54 | 
            +
                      required: properties.keys
         | 
| 55 | 
            +
                    }
         | 
| 56 | 
            +
                  }
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  result = ::ActiveGenie::Clients::UnifiedClient.function_calling(
         | 
| 59 | 
            +
                    messages,
         | 
| 60 | 
            +
                    function,
         | 
| 61 | 
            +
                    model_tier: 'lower_tier',
         | 
| 62 | 
            +
                    config: @config
         | 
| 63 | 
            +
                  )
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  result['final_score'] = 0 if result['final_score'].nil?
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  ActiveGenie::Logger.debug({
         | 
| 68 | 
            +
                    code: :scoring,
         | 
| 69 | 
            +
                    text: @text[0..30],
         | 
| 70 | 
            +
                    criteria: @criteria[0..30],
         | 
| 71 | 
            +
                    reviewers: get_or_recommend_reviewers,
         | 
| 72 | 
            +
                    score: result['final_score'],
         | 
| 73 | 
            +
                    reasoning: result['final_reasoning']
         | 
| 74 | 
            +
                  })
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  result
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                private
         | 
| 80 | 
            +
                
         | 
| 81 | 
            +
                def build_properties
         | 
| 46 82 | 
             
                  properties = {}
         | 
| 47 83 | 
             
                  get_or_recommend_reviewers.each do |reviewer|
         | 
| 48 84 | 
             
                    properties["#{reviewer}_reasoning"] = {
         | 
| @@ -57,35 +93,18 @@ module ActiveGenie::Scoring | |
| 57 93 | 
             
                    }
         | 
| 58 94 | 
             
                  end
         | 
| 59 95 |  | 
| 60 | 
            -
                   | 
| 61 | 
            -
                     | 
| 62 | 
            -
                    description: ' | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
                        final_score: {
         | 
| 68 | 
            -
                          type: 'number',
         | 
| 69 | 
            -
                          description: 'The final score based on the previous reviewers',
         | 
| 70 | 
            -
                        },
         | 
| 71 | 
            -
                        final_reasoning: {
         | 
| 72 | 
            -
                          type: 'string',
         | 
| 73 | 
            -
                          description: 'The final reasoning based on the previous reviewers',
         | 
| 74 | 
            -
                        }
         | 
| 75 | 
            -
                      }
         | 
| 76 | 
            -
                    }
         | 
| 96 | 
            +
                  properties[:final_score] = {
         | 
| 97 | 
            +
                    type: 'number',
         | 
| 98 | 
            +
                    description: 'The final score based on the previous reviewers',
         | 
| 99 | 
            +
                  }
         | 
| 100 | 
            +
                  properties[:final_reasoning] = {
         | 
| 101 | 
            +
                    type: 'string',
         | 
| 102 | 
            +
                    description: 'The final reasoning based on the previous reviewers',
         | 
| 77 103 | 
             
                  }
         | 
| 78 104 |  | 
| 79 | 
            -
                   | 
| 80 | 
            -
                    messages,
         | 
| 81 | 
            -
                    function,
         | 
| 82 | 
            -
                    model_tier: 'lower_tier',
         | 
| 83 | 
            -
                    config: @config
         | 
| 84 | 
            -
                  )
         | 
| 105 | 
            +
                  properties
         | 
| 85 106 | 
             
                end
         | 
| 86 107 |  | 
| 87 | 
            -
                private
         | 
| 88 | 
            -
             | 
| 89 108 | 
             
                def get_or_recommend_reviewers
         | 
| 90 109 | 
             
                  @get_or_recommend_reviewers ||= if @reviewers.count > 0 
         | 
| 91 110 | 
             
                    @reviewers
         | 
    
        data/lib/active_genie/scoring.rb
    CHANGED
    
    
| @@ -0,0 +1,27 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            namespace :active_genie do
         | 
| 3 | 
            +
              desc "Run benchmarks, optionally for a specific module (e.g., rake active_genie:benchmark[data_extractor])"
         | 
| 4 | 
            +
              task :benchmark, [:module_name] do |_, args|
         | 
| 5 | 
            +
                Rake::TestTask.new(:run_benchmarks) do |t|
         | 
| 6 | 
            +
                  t.libs << "benchmark"
         | 
| 7 | 
            +
                  
         | 
| 8 | 
            +
                  if args[:module_name]
         | 
| 9 | 
            +
                    module_name = args[:module_name]
         | 
| 10 | 
            +
                    module_path = "benchmark/test_cases/#{module_name}/"
         | 
| 11 | 
            +
                    t.test_files = FileList["#{module_path}**/*_test.rb"]
         | 
| 12 | 
            +
                    puts "Running benchmarks for module: #{module_name}"
         | 
| 13 | 
            +
                  else
         | 
| 14 | 
            +
                    t.test_files = FileList["benchmark/test_cases/**/*_test.rb"]
         | 
| 15 | 
            +
                    puts "Running all benchmarks"
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
                  
         | 
| 18 | 
            +
                  t.warning = false
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
                
         | 
| 21 | 
            +
                begin
         | 
| 22 | 
            +
                  Rake::Task[:run_benchmarks].invoke
         | 
| 23 | 
            +
                rescue => e
         | 
| 24 | 
            +
                  puts e
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         |