active_genie 0.25.1 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -5
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/generalist.json +36 -0
- data/lib/active_genie/battle/generalist.md +16 -0
- data/lib/active_genie/battle/generalist.rb +16 -69
- data/lib/active_genie/clients/providers/anthropic_client.rb +61 -40
- data/lib/active_genie/clients/providers/base_client.rb +44 -57
- data/lib/active_genie/clients/providers/deepseek_client.rb +57 -52
- data/lib/active_genie/clients/providers/google_client.rb +58 -60
- data/lib/active_genie/clients/providers/openai_client.rb +52 -55
- data/lib/active_genie/clients/unified_client.rb +4 -4
- data/lib/active_genie/config/battle_config.rb +2 -0
- data/lib/active_genie/config/llm_config.rb +3 -1
- data/lib/active_genie/config/log_config.rb +38 -14
- data/lib/active_genie/config/providers/anthropic_config.rb +2 -2
- data/lib/active_genie/config/providers/deepseek_config.rb +2 -2
- data/lib/active_genie/config/providers/google_config.rb +2 -2
- data/lib/active_genie/config/providers/openai_config.rb +2 -2
- data/lib/active_genie/config/providers_config.rb +4 -4
- data/lib/active_genie/config/scoring_config.rb +2 -0
- data/lib/active_genie/configuration.rb +14 -8
- data/lib/active_genie/data_extractor/from_informal.json +11 -0
- data/lib/active_genie/data_extractor/from_informal.rb +5 -13
- data/lib/active_genie/data_extractor/generalist.json +9 -0
- data/lib/active_genie/data_extractor/generalist.rb +12 -11
- data/lib/active_genie/errors/invalid_log_output_error.rb +19 -0
- data/lib/active_genie/logger.rb +13 -5
- data/lib/active_genie/{concerns → ranking/concerns}/loggable.rb +2 -5
- data/lib/active_genie/ranking/elo_round.rb +30 -28
- data/lib/active_genie/ranking/free_for_all.rb +30 -22
- data/lib/active_genie/ranking/player.rb +53 -19
- data/lib/active_genie/ranking/players_collection.rb +17 -13
- data/lib/active_genie/ranking/ranking.rb +21 -20
- data/lib/active_genie/ranking/ranking_scoring.rb +2 -20
- data/lib/active_genie/scoring/generalist.json +9 -0
- data/lib/active_genie/scoring/generalist.md +46 -0
- data/lib/active_genie/scoring/generalist.rb +13 -65
- data/lib/active_genie/scoring/recommended_reviewers.rb +2 -2
- metadata +11 -4
@@ -1,10 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
3
|
+
require_relative 'concerns/loggable'
|
4
|
+
require_relative 'players_collection'
|
5
|
+
require_relative 'free_for_all'
|
6
|
+
require_relative 'elo_round'
|
7
|
+
require_relative 'ranking_scoring'
|
8
8
|
|
9
9
|
# This class orchestrates player ranking through multiple evaluation stages
|
10
10
|
# using Elo ranking and free-for-all match simulations.
|
@@ -38,7 +38,8 @@ module ActiveGenie
|
|
38
38
|
new(...).call
|
39
39
|
end
|
40
40
|
|
41
|
-
def initialize(
|
41
|
+
def initialize(param_players, criteria, reviewers: [], config: {})
|
42
|
+
@param_players = param_players
|
42
43
|
@criteria = criteria
|
43
44
|
@reviewers = Array(reviewers).compact.uniq
|
44
45
|
@config = ActiveGenie.configuration.merge(config)
|
@@ -48,30 +49,30 @@ module ActiveGenie
|
|
48
49
|
def call
|
49
50
|
@players = create_players
|
50
51
|
|
51
|
-
|
52
|
-
|
52
|
+
ActiveGenie::Logger.with_context(log_context) do
|
53
|
+
set_initial_player_scores!
|
54
|
+
eliminate_obvious_bad_players!
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
while @players.elo_eligible?
|
57
|
+
elo_report = run_elo_round!
|
58
|
+
eliminate_relegation_players!
|
59
|
+
rebalance_players!(elo_report)
|
60
|
+
end
|
59
61
|
|
60
|
-
|
62
|
+
run_free_for_all!
|
63
|
+
end
|
61
64
|
|
62
65
|
sorted_players
|
63
66
|
end
|
64
67
|
|
65
|
-
private
|
66
|
-
|
67
68
|
ELIMINATION_VARIATION = 'variation_too_high'
|
68
69
|
ELIMINATION_RELEGATION = 'relegation_tier'
|
69
70
|
|
70
|
-
|
71
|
+
private
|
71
72
|
|
72
73
|
def create_players
|
73
|
-
players = PlayersCollection.new(param_players)
|
74
|
-
players.each { |p|
|
74
|
+
players = PlayersCollection.new(@param_players)
|
75
|
+
players.each { |p| ActiveGenie::Logger.call({ code: :new_player, player: p.to_h }) }
|
75
76
|
|
76
77
|
players
|
77
78
|
end
|
@@ -110,7 +111,7 @@ module ActiveGenie
|
|
110
111
|
|
111
112
|
def sorted_players
|
112
113
|
players = @players.sorted
|
113
|
-
|
114
|
+
ActiveGenie::Logger.call({ code: :ranking_final, players: players.map(&:to_h) })
|
114
115
|
|
115
116
|
players.map(&:to_h)
|
116
117
|
end
|
@@ -20,26 +20,8 @@ module ActiveGenie
|
|
20
20
|
ActiveGenie::Logger.with_context(log_context) do
|
21
21
|
@reviewers = generate_reviewers
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
threads = []
|
26
|
-
mutex = Mutex.new
|
27
|
-
|
28
|
-
# Take up to 3 players for parallel processing
|
29
|
-
current_batch = players_to_score.shift(3)
|
30
|
-
|
31
|
-
current_batch.each do |player|
|
32
|
-
threads << Thread.new(player) do |p|
|
33
|
-
score = generate_score(p)
|
34
|
-
|
35
|
-
mutex.synchronize do
|
36
|
-
p.score = score
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# Wait for all threads in this batch to complete
|
42
|
-
threads.each(&:join)
|
23
|
+
players_without_score.each do |player|
|
24
|
+
player.score = generate_score(player)
|
43
25
|
end
|
44
26
|
end
|
45
27
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Evaluate and score the provided text based on predefined criteria, using a scoring range of 0 to 100 with 100 representing the highest possible score.
|
2
|
+
|
3
|
+
Follow the instructions below to ensure a comprehensive and objective assessment.
|
4
|
+
|
5
|
+
# Evaluation Process
|
6
|
+
|
7
|
+
1. **Analysis**:
|
8
|
+
- Thoroughly compare the text against each criterion for a comprehensive evaluation.
|
9
|
+
2. **Document Deviations**:
|
10
|
+
- Identify and document areas where the content does not align with the specified criteria.
|
11
|
+
3. **Highlight Strengths**:
|
12
|
+
- Note notable features or elements that enhance the quality or effectiveness of the content.
|
13
|
+
4. **Identify Weaknesses**:
|
14
|
+
- Specify areas where the content fails to meet the criteria or where improvements could be made.
|
15
|
+
|
16
|
+
# Scoring Fairness
|
17
|
+
|
18
|
+
- Ensure the assigned score reflects both the alignment with the criteria and the content's effectiveness.
|
19
|
+
- Consider if the fulfillment of other criteria compensates for areas lacking extreme details.
|
20
|
+
|
21
|
+
# Scoring Range
|
22
|
+
|
23
|
+
Segment scores into five parts before assigning a final score:
|
24
|
+
- **Terrible**: 0-20 - Content does not meet the criteria.
|
25
|
+
- **Bad**: 21-40 - Content is substandard but meets some criteria.
|
26
|
+
- **Average**: 41-60 - Content meets criteria with room for improvement.
|
27
|
+
- **Good**: 61-80 - Content exceeds criteria and is above average.
|
28
|
+
- **Great**: 81-100 - Content exceeds all expectations.
|
29
|
+
|
30
|
+
# Guidelines
|
31
|
+
|
32
|
+
- Maintain objectivity and avoid biases.
|
33
|
+
- Deconstruct each criterion into actionable components for systematic evaluation.
|
34
|
+
- Apply reasonable judgment in assigning a score, justifying your rationale clearly.
|
35
|
+
|
36
|
+
# Output Format
|
37
|
+
|
38
|
+
- Provide a detailed review including:
|
39
|
+
- A final score (0-100)
|
40
|
+
- Specific reasoning for the assigned score, detailing all evaluated criteria
|
41
|
+
- Include both positive aspects and suggested improvements
|
42
|
+
|
43
|
+
# Notes
|
44
|
+
|
45
|
+
- Consider edge cases where the text may partially align with criteria.
|
46
|
+
- If lacking information, reasonably judge and explain your scoring approach.
|
@@ -44,21 +44,9 @@ module ActiveGenie
|
|
44
44
|
{ role: 'user', content: "Text to score: #{@text}" }
|
45
45
|
]
|
46
46
|
|
47
|
-
properties = build_properties
|
48
|
-
|
49
|
-
function = {
|
50
|
-
name: 'scoring',
|
51
|
-
description: 'Score the text based on the given criteria.',
|
52
|
-
parameters: {
|
53
|
-
type: 'object',
|
54
|
-
properties:,
|
55
|
-
required: properties.keys
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
47
|
result = ::ActiveGenie::Clients::UnifiedClient.function_calling(
|
60
48
|
messages,
|
61
|
-
|
49
|
+
build_function,
|
62
50
|
config: @config
|
63
51
|
)
|
64
52
|
|
@@ -76,8 +64,20 @@ module ActiveGenie
|
|
76
64
|
result
|
77
65
|
end
|
78
66
|
|
67
|
+
PROMPT = File.read(File.join(__dir__, 'generalist.md'))
|
68
|
+
|
79
69
|
private
|
80
70
|
|
71
|
+
def build_function
|
72
|
+
properties = build_properties
|
73
|
+
|
74
|
+
function = JSON.parse(File.read(File.join(__dir__, 'generalist.json')), symbolize_names: true)
|
75
|
+
function[:parameters][:properties] = properties
|
76
|
+
function[:parameters][:required] = properties.keys
|
77
|
+
|
78
|
+
function
|
79
|
+
end
|
80
|
+
|
81
81
|
def build_properties
|
82
82
|
properties = {}
|
83
83
|
reviewers.each do |reviewer|
|
@@ -114,58 +114,6 @@ module ActiveGenie
|
|
114
114
|
[result['reviewer1'], result['reviewer2'], result['reviewer3']]
|
115
115
|
end
|
116
116
|
end
|
117
|
-
|
118
|
-
PROMPT = <<~PROMPT
|
119
|
-
Evaluate and score the provided text based on predefined criteria, using a scoring range of 0 to 100 with 100 representing the highest possible score.
|
120
|
-
|
121
|
-
Follow the instructions below to ensure a comprehensive and objective assessment.
|
122
|
-
|
123
|
-
# Evaluation Process
|
124
|
-
|
125
|
-
1. **Analysis**:
|
126
|
-
- Thoroughly compare the text against each criterion for a comprehensive evaluation.
|
127
|
-
|
128
|
-
2. **Document Deviations**:
|
129
|
-
- Identify and document areas where the content does not align with the specified criteria.
|
130
|
-
|
131
|
-
3. **Highlight Strengths**:
|
132
|
-
- Note notable features or elements that enhance the quality or effectiveness of the content.
|
133
|
-
|
134
|
-
4. **Identify Weaknesses**:
|
135
|
-
- Specify areas where the content fails to meet the criteria or where improvements could be made.
|
136
|
-
|
137
|
-
# Scoring Fairness
|
138
|
-
|
139
|
-
- Ensure the assigned score reflects both the alignment with the criteria and the content's effectiveness.
|
140
|
-
- Consider if the fulfillment of other criteria compensates for areas lacking extreme details.
|
141
|
-
|
142
|
-
# Scoring Range
|
143
|
-
|
144
|
-
Segment scores into five parts before assigning a final score:
|
145
|
-
- **Terrible**: 0-20 - Content does not meet the criteria.
|
146
|
-
- **Bad**: 21-40 - Content is substandard but meets some criteria.
|
147
|
-
- **Average**: 41-60 - Content meets criteria with room for improvement.
|
148
|
-
- **Good**: 61-80 - Content exceeds criteria and is above average.
|
149
|
-
- **Great**: 81-100 - Content exceeds all expectations.
|
150
|
-
|
151
|
-
# Guidelines
|
152
|
-
|
153
|
-
- Maintain objectivity and avoid biases.
|
154
|
-
- Deconstruct each criterion into actionable components for systematic evaluation.
|
155
|
-
- Apply reasonable judgment in assigning a score, justifying your rationale clearly.
|
156
|
-
|
157
|
-
# Output Format
|
158
|
-
|
159
|
-
- Provide a detailed review including:
|
160
|
-
- A final score (0-100)
|
161
|
-
- Specific reasoning for the assigned score, detailing all evaluated criteria
|
162
|
-
- Include both positive aspects and suggested improvements
|
163
|
-
|
164
|
-
# Notes
|
165
|
-
|
166
|
-
- Consider edge cases where the text may partially align with criteria.
|
167
|
-
- If lacking information, reasonably judge and explain your scoring approach.
|
168
|
-
PROMPT
|
169
117
|
end
|
170
118
|
end
|
171
119
|
end
|
@@ -62,8 +62,6 @@ module ActiveGenie
|
|
62
62
|
)
|
63
63
|
end
|
64
64
|
|
65
|
-
private
|
66
|
-
|
67
65
|
PROMPT = <<~PROMPT
|
68
66
|
Identify the top 3 suitable reviewer titles or roles based on the provided text and criteria. Selected reviewers must possess subject matter expertise, offer valuable insights, and ensure diverse yet aligned perspectives on the content.
|
69
67
|
|
@@ -79,6 +77,8 @@ module ActiveGenie
|
|
79
77
|
- Avoid redundant or overly similar titles/roles to maintain diversity.
|
80
78
|
PROMPT
|
81
79
|
|
80
|
+
private
|
81
|
+
|
82
82
|
def client
|
83
83
|
::ActiveGenie::Clients::UnifiedClient
|
84
84
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_genie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Radamés Roriz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
The lodash for GenAI, stop reinventing the wheel
|
@@ -26,6 +26,8 @@ files:
|
|
26
26
|
- lib/active_genie.rb
|
27
27
|
- lib/active_genie/battle.rb
|
28
28
|
- lib/active_genie/battle/README.md
|
29
|
+
- lib/active_genie/battle/generalist.json
|
30
|
+
- lib/active_genie/battle/generalist.md
|
29
31
|
- lib/active_genie/battle/generalist.rb
|
30
32
|
- lib/active_genie/clients/providers/anthropic_client.rb
|
31
33
|
- lib/active_genie/clients/providers/base_client.rb
|
@@ -33,7 +35,6 @@ files:
|
|
33
35
|
- lib/active_genie/clients/providers/google_client.rb
|
34
36
|
- lib/active_genie/clients/providers/openai_client.rb
|
35
37
|
- lib/active_genie/clients/unified_client.rb
|
36
|
-
- lib/active_genie/concerns/loggable.rb
|
37
38
|
- lib/active_genie/config/battle_config.rb
|
38
39
|
- lib/active_genie/config/data_extractor_config.rb
|
39
40
|
- lib/active_genie/config/llm_config.rb
|
@@ -49,13 +50,17 @@ files:
|
|
49
50
|
- lib/active_genie/configuration.rb
|
50
51
|
- lib/active_genie/data_extractor.rb
|
51
52
|
- lib/active_genie/data_extractor/README.md
|
53
|
+
- lib/active_genie/data_extractor/from_informal.json
|
52
54
|
- lib/active_genie/data_extractor/from_informal.rb
|
55
|
+
- lib/active_genie/data_extractor/generalist.json
|
53
56
|
- lib/active_genie/data_extractor/generalist.md
|
54
57
|
- lib/active_genie/data_extractor/generalist.rb
|
58
|
+
- lib/active_genie/errors/invalid_log_output_error.rb
|
55
59
|
- lib/active_genie/errors/invalid_provider_error.rb
|
56
60
|
- lib/active_genie/logger.rb
|
57
61
|
- lib/active_genie/ranking.rb
|
58
62
|
- lib/active_genie/ranking/README.md
|
63
|
+
- lib/active_genie/ranking/concerns/loggable.rb
|
59
64
|
- lib/active_genie/ranking/elo_round.rb
|
60
65
|
- lib/active_genie/ranking/free_for_all.rb
|
61
66
|
- lib/active_genie/ranking/player.rb
|
@@ -64,6 +69,8 @@ files:
|
|
64
69
|
- lib/active_genie/ranking/ranking_scoring.rb
|
65
70
|
- lib/active_genie/scoring.rb
|
66
71
|
- lib/active_genie/scoring/README.md
|
72
|
+
- lib/active_genie/scoring/generalist.json
|
73
|
+
- lib/active_genie/scoring/generalist.md
|
67
74
|
- lib/active_genie/scoring/generalist.rb
|
68
75
|
- lib/active_genie/scoring/recommended_reviewers.rb
|
69
76
|
- lib/tasks/benchmark.rake
|
@@ -86,7 +93,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
86
93
|
requirements:
|
87
94
|
- - ">="
|
88
95
|
- !ruby/object:Gem::Version
|
89
|
-
version: 3.
|
96
|
+
version: 3.4.0
|
90
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
98
|
requirements:
|
92
99
|
- - ">="
|