active_genie 0.0.2 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +133 -47
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +39 -0
- data/lib/active_genie/battle/basic.rb +125 -0
- data/lib/active_genie/battle.rb +13 -0
- data/lib/{requester → active_genie/clients}/openai.rb +5 -4
- data/lib/{requester/requester.rb → active_genie/clients/router.rb} +8 -8
- data/lib/active_genie/configuration.rb +3 -2
- data/lib/active_genie/data_extractor/README.md +132 -0
- data/lib/active_genie/data_extractor/basic.rb +88 -0
- data/lib/active_genie/data_extractor/from_informal.rb +58 -0
- data/lib/active_genie/data_extractor.rb +17 -0
- data/lib/active_genie/leaderboard/elo_ranking.rb +88 -0
- data/lib/active_genie/leaderboard/leaderboard.rb +72 -0
- data/lib/active_genie/leaderboard/league.rb +48 -0
- data/lib/active_genie/leaderboard/player.rb +52 -0
- data/lib/active_genie/leaderboard/players_collection.rb +68 -0
- data/lib/active_genie/leaderboard.rb +11 -0
- data/lib/active_genie/scoring/README.md +80 -0
- data/lib/active_genie/scoring/basic.rb +117 -0
- data/lib/active_genie/scoring/recommended_reviews.rb +78 -0
- data/lib/active_genie/scoring.rb +17 -0
- data/lib/active_genie/utils/math.rb +15 -0
- data/lib/active_genie.rb +20 -8
- data/lib/tasks/install.rake +1 -1
- data/lib/tasks/templates/{active_ai.yml → active_genie.yml} +1 -1
- metadata +122 -17
- data/lib/data_extractor/README.md +0 -103
- data/lib/data_extractor/data_extractor.rb +0 -88
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../clients/router'
|
4
|
+
|
5
|
+
module ActiveGenie::Scoring
|
6
|
+
# The Basic class provides a foundation for scoring text content against specified criteria
|
7
|
+
# using AI-powered evaluation. It supports both single and multiple reviewer scenarios,
|
8
|
+
# with the ability to automatically recommend reviewers when none are specified.
|
9
|
+
#
|
10
|
+
# The scoring process evaluates text based on given criteria and returns detailed feedback
|
11
|
+
# including individual reviewer scores, reasoning, and a final aggregated score.
|
12
|
+
#
|
13
|
+
# @example Basic usage with a single reviewer
|
14
|
+
# Basic.call("Sample text", "Evaluate grammar and clarity", ["Grammar Expert"])
|
15
|
+
#
|
16
|
+
# @example Usage with automatic reviewer recommendation
|
17
|
+
# Basic.call("Sample text", "Evaluate technical accuracy")
|
18
|
+
#
|
19
|
+
class Basic
|
20
|
+
# @param text [String] The text content to be evaluated
|
21
|
+
# @param criteria [String] The evaluation criteria or rubric to assess against
|
22
|
+
# @param reviewers [Array<String>] Optional list of specific reviewers. If empty,
|
23
|
+
# reviewers will be automatically recommended based on the content and criteria
|
24
|
+
# @param options [Hash] Additional configuration options that modify the scoring behavior
|
25
|
+
# @option options [Boolean] :detailed_feedback Request more detailed feedback in the reasoning
|
26
|
+
# @option options [Hash] :reviewer_weights Custom weights for different reviewers
|
27
|
+
# @return [Hash] The evaluation result containing the scores and reasoning
|
28
|
+
# @return [Number] :final_score The final score of the text based on the criteria and reviewers
|
29
|
+
# @return [String] :final_reasoning Detailed explanation of why the final score was reached
|
30
|
+
def self.call(text, criteria, reviewers = [], options: {})
|
31
|
+
new(text, criteria, reviewers, options:).call
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(text, criteria, reviewers = [], options: {})
|
35
|
+
@text = text
|
36
|
+
@criteria = criteria
|
37
|
+
@reviewers = Array(reviewers).compact.uniq
|
38
|
+
@options = options
|
39
|
+
end
|
40
|
+
|
41
|
+
def call
|
42
|
+
messages = [
|
43
|
+
{ role: 'system', content: PROMPT },
|
44
|
+
{ role: 'user', content: "Scoring criteria: #{@criteria}" },
|
45
|
+
{ role: 'user', content: "Text to score: #{@text}" },
|
46
|
+
]
|
47
|
+
|
48
|
+
properties = {}
|
49
|
+
get_or_recommend_reviewers.each do |reviewer|
|
50
|
+
properties["#{reviewer}_reasoning"] = {
|
51
|
+
type: 'string',
|
52
|
+
description: "The reasoning of the scoring process by #{reviewer}.",
|
53
|
+
}
|
54
|
+
properties["#{reviewer}_score"] = {
|
55
|
+
type: 'number',
|
56
|
+
description: "The score given by #{reviewer}.",
|
57
|
+
min: 0,
|
58
|
+
max: 100
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
function = {
|
63
|
+
name: 'scoring',
|
64
|
+
description: 'Score the text based on the given criteria.',
|
65
|
+
schema: {
|
66
|
+
type: "object",
|
67
|
+
properties: {
|
68
|
+
**properties,
|
69
|
+
final_score: {
|
70
|
+
type: 'number',
|
71
|
+
description: 'The final score based on the previous reviewers',
|
72
|
+
},
|
73
|
+
final_reasoning: {
|
74
|
+
type: 'string',
|
75
|
+
description: 'The final reasoning based on the previous reviewers',
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
::ActiveGenie::Clients::Router.function_calling(messages, function, options: @options)
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def get_or_recommend_reviewers
|
87
|
+
@get_or_recommend_reviewers ||= if @reviewers.count > 0
|
88
|
+
@reviewers
|
89
|
+
else
|
90
|
+
recommended_reviews = RecommendedReviews.call(@text, @criteria, options: @options)
|
91
|
+
|
92
|
+
[recommended_reviews[:reviewer1], recommended_reviews[:reviewer2], recommended_reviews[:reviewer3]]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
PROMPT = <<~PROMPT
|
97
|
+
Evaluate and score the provided text based on predefined criteria, which may include rules, keywords, or patterns. Use a scoring range of 0 to 100, with 100 representing the highest possible score. Follow the instructions below to ensure an accurate and objective assessment.
|
98
|
+
|
99
|
+
# Evaluation Process
|
100
|
+
1. **Analysis**: Thoroughly compare the text against each criterion to ensure comprehensive evaluation.
|
101
|
+
2. **Document Deviations**: Clearly identify and document any areas where the content does not align with the specified criteria.
|
102
|
+
3. **Highlight Strengths**: Emphasize notable features or elements that enhance the overall quality or effectiveness of the content.
|
103
|
+
4. **Identify Weaknesses**: Specify areas where the content fails to meet the criteria or where improvements could be made.
|
104
|
+
|
105
|
+
# Output Requirements
|
106
|
+
Provide a detailed review, including:
|
107
|
+
- A final score (0-100)
|
108
|
+
- Specific reasoning for the assigned score, covering all evaluated criteria.
|
109
|
+
- Ensure the reasoning includes both positive aspects and suggested improvements.
|
110
|
+
|
111
|
+
# Guidelines
|
112
|
+
- Maintain objectivity, avoiding biases or preconceived notions.
|
113
|
+
- Deconstruct each criterion into actionable components for a systematic evaluation.
|
114
|
+
- If the text lacks information, apply reasonable judgment to assign a score while clearly explaining the rationale.
|
115
|
+
PROMPT
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../clients/router.rb'
|
4
|
+
|
5
|
+
module ActiveGenie::Scoring
|
6
|
+
# The RecommendedReviews class intelligently suggests appropriate reviewer roles
|
7
|
+
# for evaluating text content based on specific criteria. It uses AI to analyze
|
8
|
+
# the content and criteria to identify the most suitable subject matter experts.
|
9
|
+
#
|
10
|
+
# The class ensures a balanced and comprehensive review process by recommending
|
11
|
+
# three distinct reviewer roles with complementary expertise and perspectives.
|
12
|
+
#
|
13
|
+
# @example Getting recommended reviewers for technical content
|
14
|
+
# RecommendedReviews.call("Technical documentation about API design",
|
15
|
+
# "Evaluate technical accuracy and clarity")
|
16
|
+
# # => { reviewer1: "API Architect", reviewer2: "Technical Writer",
|
17
|
+
# # reviewer3: "Developer Advocate", reasoning: "..." }
|
18
|
+
#
|
19
|
+
class RecommendedReviews
|
20
|
+
def self.call(text, criteria, options: {})
|
21
|
+
new(text, criteria, options:).call
|
22
|
+
end
|
23
|
+
|
24
|
+
# Initializes a new reviewer recommendation instance
|
25
|
+
#
|
26
|
+
# @param text [String] The text content to analyze for reviewer recommendations
|
27
|
+
# @param criteria [String] The evaluation criteria that will guide reviewer selection
|
28
|
+
# @param options [Hash] Additional configuration options that modify the recommendation process
|
29
|
+
# @option options [Boolean] :prefer_technical Whether to favor technical expertise
|
30
|
+
# @option options [Boolean] :prefer_domain Whether to favor domain expertise
|
31
|
+
def initialize(text, criteria, options: {})
|
32
|
+
@text = text
|
33
|
+
@criteria = criteria
|
34
|
+
@options = options
|
35
|
+
end
|
36
|
+
|
37
|
+
def call
|
38
|
+
messages = [
|
39
|
+
{ role: 'system', content: PROMPT },
|
40
|
+
{ role: 'user', content: "Scoring criteria: #{@criteria}" },
|
41
|
+
{ role: 'user', content: "Text to score: #{@text}" },
|
42
|
+
]
|
43
|
+
|
44
|
+
function = {
|
45
|
+
name: 'identify_reviewers',
|
46
|
+
description: 'Discover reviewers based on the text and given criteria.',
|
47
|
+
schema: {
|
48
|
+
type: "object",
|
49
|
+
properties: {
|
50
|
+
reasoning: { type: 'string' },
|
51
|
+
reviewer1: { type: 'string' },
|
52
|
+
reviewer2: { type: 'string' },
|
53
|
+
reviewer3: { type: 'string' },
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
::ActiveGenie::Clients::Router.function_calling(messages, function, options: @options)
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
PROMPT = <<~PROMPT
|
64
|
+
Identify the top 3 suitable reviewer titles or roles based on the provided text and criteria. Selected reviewers must possess subject matter expertise, offer valuable insights, and ensure diverse yet aligned perspectives on the content.
|
65
|
+
|
66
|
+
# Instructions
|
67
|
+
1. **Analyze the Text and Criteria**: Examine the content and criteria to identify relevant reviewer titles or roles.
|
68
|
+
2. **Determine Subject Matter Expertise**: Select reviewers with substantial knowledge or experience in the subject area.
|
69
|
+
3. **Evaluate Insight Contribution**: Prioritize titles or roles capable of providing meaningful and actionable feedback on the content.
|
70
|
+
4. **Incorporate Perspective Diversity**: Ensure the selection includes reviewers with varied but complementary viewpoints while maintaining alignment with the criteria.
|
71
|
+
|
72
|
+
# Constraints
|
73
|
+
- Selected reviewers must align with the content’s subject matter and criteria.
|
74
|
+
- Include reasoning for how each choice supports a thorough and insightful review.
|
75
|
+
- Avoid redundant or overly similar titles/roles to maintain diversity.
|
76
|
+
PROMPT
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'scoring/basic'
|
2
|
+
require_relative 'scoring/recommended_reviews'
|
3
|
+
|
4
|
+
module ActiveGenie
|
5
|
+
# Text evaluation system that provides detailed scoring and feedback using multiple expert reviewers
|
6
|
+
module Scoring
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def basic(...)
|
10
|
+
Basic.call(...)
|
11
|
+
end
|
12
|
+
|
13
|
+
def recommended_reviews(...)
|
14
|
+
RecommendedReviews.call(...)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module ActiveGenie::Utils
|
2
|
+
module Math
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def self.calculate_new_elo(winner, loser, k: 32)
|
6
|
+
expected_score_a = 1 / (1 + 10**((loser - winner) / 400))
|
7
|
+
expected_score_b = 1 - expected_score_a
|
8
|
+
|
9
|
+
new_elo_winner = winner + k * (1 - expected_score_a)
|
10
|
+
new_elo_loser = loser + k * (1 - expected_score_b)
|
11
|
+
|
12
|
+
[new_elo_winner, new_elo_loser]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/active_genie.rb
CHANGED
@@ -1,22 +1,34 @@
|
|
1
1
|
module ActiveGenie
|
2
|
-
autoload :DataExtractor, File.join(__dir__, 'data_extractor/data_extractor')
|
3
2
|
autoload :Configuration, File.join(__dir__, 'active_genie/configuration')
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
# Modules
|
5
|
+
autoload :DataExtractor, File.join(__dir__, 'active_genie/data_extractor')
|
6
|
+
autoload :Battle, File.join(__dir__, 'active_genie/battle')
|
7
|
+
autoload :Scoring, File.join(__dir__, 'active_genie/scoring')
|
8
|
+
autoload :Leaderboard, File.join(__dir__, 'active_genie/leaderboard')
|
9
9
|
|
10
|
+
class << self
|
10
11
|
def configure
|
11
12
|
yield(config) if block_given?
|
12
13
|
end
|
13
|
-
|
14
|
+
|
15
|
+
def load_tasks
|
16
|
+
return unless defined?(Rake)
|
17
|
+
|
18
|
+
Rake::Task.define_task(:environment)
|
19
|
+
Dir.glob(File.join(__dir__, 'tasks', '*.rake')).each { |r| load r }
|
20
|
+
end
|
21
|
+
|
22
|
+
def config
|
23
|
+
@config ||= Configuration.new
|
24
|
+
end
|
25
|
+
|
14
26
|
def [](key)
|
15
27
|
config.values[key.to_s]
|
16
28
|
end
|
17
29
|
|
18
|
-
def config_by_model(model)
|
19
|
-
config.values[model&.to_s] || config.values.values.first || {}
|
30
|
+
def config_by_model(model = nil)
|
31
|
+
config.values[model&.to_s&.downcase&.strip] || config.values.values.first || {}
|
20
32
|
end
|
21
33
|
end
|
22
34
|
end
|
data/lib/tasks/install.rake
CHANGED
@@ -3,7 +3,7 @@ require 'fileutils'
|
|
3
3
|
namespace :active_genie do
|
4
4
|
desc 'Install active_genie configuration file'
|
5
5
|
task :install do
|
6
|
-
source = File.join(
|
6
|
+
source = File.join(__dir__, 'templates', 'active_genie.yml')
|
7
7
|
target = File.join('config', 'active_genie.yml')
|
8
8
|
|
9
9
|
FileUtils.cp(source, target)
|
metadata
CHANGED
@@ -1,22 +1,113 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_genie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Radamés Roriz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
description: "# ActiveGenie \U0001F9DE♂️\n> Transform your Ruby application with
|
14
|
+
powerful, production-ready GenAI features\n\n[data:image/s3,"s3://crabby-images/33f7f/33f7f59404c71f490e8eacbd9b04252987adbd4e" alt="Gem Version"](https://badge.fury.io/rb/active_genie)\n[data:image/s3,"s3://crabby-images/3f5bf/3f5bf589448c33a4908bd6c97d54fc02581c0a5c" alt="Ruby"](https://github.com/roriz/active_genie/actions/workflows/ruby.yml)\n\nActiveGenie
|
15
|
+
is a Ruby gem that provides a polished, production-ready interface for working with
|
16
|
+
Generative AI (GenAI) models. Just like ActiveStorage simplifies file handling in
|
17
|
+
Rails, ActiveGenie makes it effortless to integrate GenAI capabilities into your
|
18
|
+
Ruby applications.\n\n## Features\n\n- \U0001F3AF **Data Extraction**: Extract structured
|
19
|
+
data from unstructured text with type validation\n- \U0001F4CA **Smart Scoring**:
|
20
|
+
Multi-reviewer evaluation system with automatic expert selection\n- \U0001F4AD **Sentiment
|
21
|
+
Analysis**: Advanced sentiment analysis with customizable rules\n- \U0001F512 **Safe
|
22
|
+
& Secure**: Built-in validation and sanitization\n- \U0001F6E0️ **Configurable**:
|
23
|
+
Supports multiple GenAI providers and models\n\n## Installation\n\n1. Add to your
|
24
|
+
Gemfile:\n```ruby\ngem 'active_genie'\n```\n\n2. Install the gem:\n```shell\nbundle
|
25
|
+
install\n```\n\n3. Generate the configuration:\n```shell\necho \"ActiveGenie.load_tasks\"
|
26
|
+
>> Rakefile\nrails g active_genie:install\n```\n\n4. [Optional] Configure your credentials
|
27
|
+
in `config/active_genie.yml`:\n```yaml\nGPT-4o-mini:\n api_key: <%= ENV['OPENAI_API_KEY']
|
28
|
+
%>\n provider: \"openai\"\n\nclaude-3-5-sonnet:\n api_key: <%= ENV['ANTHROPIC_API_KEY']
|
29
|
+
%>\n provider: \"anthropic\"\n```\n\n> The first key will be used as default in
|
30
|
+
all modules, in this example `GPT-4o-mini`\n\n## Quick Start\n\n### Data Extractor\nExtract
|
31
|
+
structured data from text using AI-powered analysis, handling informal language
|
32
|
+
and complex expressions.\n\n```ruby\ntext = \"Nike Air Max 90 - Size 42 - $199.99\"\nschema
|
33
|
+
= {\n brand: { \n type: 'string',\n enum: [\"Nike\", \"Adidas\", \"Puma\"]\n
|
34
|
+
\ },\n price: { \n type: 'number',\n minimum: 0\n },\n size: {\n type:
|
35
|
+
'integer',\n minimum: 35,\n maximum: 46\n }\n}\n\nresult = ActiveGenie::DataExtractor.call(text,
|
36
|
+
schema)\n# => { \n# brand: \"Nike\", \n# brand_explanation: \"Brand name
|
37
|
+
found at start of text\",\n# price: 199.99,\n# price_explanation: \"Price
|
38
|
+
found in USD format at end\",\n# size: 42,\n# size_explanation: \"Size
|
39
|
+
explicitly stated in the middle\"\n# }\n```\n\nFeatures:\n- Structured data extraction
|
40
|
+
with type validation\n- Schema-based extraction with custom constraints\n- Informal
|
41
|
+
text analysis (litotes, hedging)\n- Detailed explanations for extracted values\n\nSee
|
42
|
+
the [Data Extractor README](lib/active_genie/data_extractor/README.md) for informal
|
43
|
+
text processing, advanced schemas, and detailed interface documentation.\n\n###
|
44
|
+
Scoring\nText evaluation system that provides detailed scoring and feedback using
|
45
|
+
multiple expert reviewers. Get balanced scoring through AI-powered expert reviewers
|
46
|
+
that automatically adapt to your content.\n\n```ruby\ntext = \"The code implements
|
47
|
+
a binary search algorithm with O(log n) complexity\"\ncriteria = \"Evaluate technical
|
48
|
+
accuracy and clarity\"\n\nresult = ActiveGenie::Scoring::Basic.call(text, criteria)\n#
|
49
|
+
=> {\n# algorithm_expert_score: 95,\n# algorithm_expert_reasoning: \"Accurately
|
50
|
+
describes binary search and its complexity\",\n# technical_writer_score: 90,\n#
|
51
|
+
\ technical_writer_reasoning: \"Clear and concise explanation of the algorithm\",\n#
|
52
|
+
\ final_score: 92.5\n# }\n```\n\nFeatures:\n- Multi-reviewer evaluation with
|
53
|
+
automatic expert selection\n- Detailed feedback with scoring reasoning\n- Customizable
|
54
|
+
reviewer weights\n- Flexible evaluation criteria\n\nSee the [Scoring README](lib/active_genie/scoring/README.md)
|
55
|
+
for advanced usage, custom reviewers, and detailed interface documentation.\n\n###
|
56
|
+
Battle\nAI-powered battle evaluation system that determines winners between two
|
57
|
+
players based on specified criteria.\n\n```ruby\nrequire 'active_genie'\n\nplayer_a
|
58
|
+
= \"Implementation uses dependency injection for better testability\"\nplayer_b
|
59
|
+
= \"Code has high test coverage but tightly coupled components\"\ncriteria = \"Evaluate
|
60
|
+
code quality and maintainability\"\n\nresult = ActiveGenie::Battle::Basic.call(player_a,
|
61
|
+
player_b, criteria)\n# => {\n# winner_player: \"Implementation uses dependency
|
62
|
+
injection for better testability\",\n# reasoning: \"Player A's implementation
|
63
|
+
demonstrates better maintainability through dependency injection, \n# which
|
64
|
+
allows for easier testing and component replacement. While Player B has good test
|
65
|
+
coverage, \n# the tight coupling makes the code harder to maintain
|
66
|
+
and modify.\",\n# what_could_be_changed_to_avoid_draw: \"Focus on specific
|
67
|
+
architectural patterns and design principles\"\n# }\n```\n\nFeatures:\n- Multi-reviewer
|
68
|
+
evaluation with automatic expert selection\n- Detailed feedback with scoring reasoning\n-
|
69
|
+
Customizable reviewer weights\n- Flexible evaluation criteria\n\nSee the [Battle
|
70
|
+
README](lib/active_genie/battle/README.md) for advanced usage, custom reviewers,
|
71
|
+
and detailed interface documentation.\n\n### Summarizer (WIP)\nThe summarizer is
|
72
|
+
a tool that can be used to summarize a given text. It uses a set of rules to summarize
|
73
|
+
the text out of the box. Uses the best practices of prompt engineering and engineering
|
74
|
+
to make the summarization as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext
|
75
|
+
= \"Example text to be summarized. The fox jumps over the dog\"\nsummarized_text
|
76
|
+
= ActiveGenie::Summarizer.call(text)\nputs summarized_text # => \"The fox jumps
|
77
|
+
over the dog\"\n```\n\n### Language detector (WIP)\nThe language detector is a tool
|
78
|
+
that can be used to detect the language of a given text. It uses a set of rules
|
79
|
+
to detect the language of the text out of the box. Uses the best practices of prompt
|
80
|
+
engineering and engineering to make the language detection as accurate as possible.\n\n```ruby\nrequire
|
81
|
+
'active_genie'\n\ntext = \"Example text to be detected\"\nlanguage = ActiveGenie::LanguageDetector.call(text)\nputs
|
82
|
+
language # => \"en\"\n```\n\n### Translator (WIP)\nThe translator is a tool that
|
83
|
+
can be used to translate a given text. It uses a set of rules to translate the text
|
84
|
+
out of the box. Uses the best practices of prompt engineering and engineering to
|
85
|
+
make the translation as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext
|
86
|
+
= \"Example text to be translated\"\ntranslated_text = ActiveGenie::Translator.call(text,
|
87
|
+
from: 'en', to: 'pt')\nputs translated_text # => \"Exemplo de texto a ser traduzido\"\n```\n\n###
|
88
|
+
Sentiment analyzer (WIP)\nThe sentiment analyzer is a tool that can be used to analyze
|
89
|
+
the sentiment of a given text. It uses a set of rules to analyze the sentiment of
|
90
|
+
the text out of the box. Uses the best practices of prompt engineering and engineering
|
91
|
+
to make the sentiment analysis as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext
|
92
|
+
= \"Example text to be analyzed\"\nsentiment = ActiveGenie::SentimentAnalyzer.call(text)\nputs
|
93
|
+
sentiment # => \"positive\"\n```\n\n### Elo ranking (WIP)\nThe Elo ranking is a
|
94
|
+
tool that can be used to rank a set of items. It uses a set of rules to rank the
|
95
|
+
items out of the box. Uses the best practices of prompt engineering and engineering
|
96
|
+
to make the ranking as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\nitems
|
97
|
+
= ['Square', 'Circle', 'Triangle']\ncriterias = 'items that look rounded'\nranked_items
|
98
|
+
= ActiveGenie::EloRanking.call(items, criterias, rounds: 10)\nputs ranked_items
|
99
|
+
# => [{ name: \"Circle\", score: 1500 }, { name: \"Square\", score: 800 }, { name:
|
100
|
+
\"Triangle\", score: 800 }]\n```\n\n\n## Configuration Options\n\n| Option | Description
|
101
|
+
| Default |\n|--------|-------------|---------|\n| `provider` | LLM provider (openai,
|
102
|
+
anthropic, etc) | `nil` |\n| `model` | Model to use | `nil` |\n| `api_key` | Provider
|
103
|
+
API key | `nil` |\n| `timeout` | Request timeout in seconds | `5` |\n| `max_retries`
|
104
|
+
| Maximum retry attempts | `3` |\n\n> **Note:** Each module can append its own set
|
105
|
+
of configuration options, see the individual module documentation for details.\n\n##
|
106
|
+
Contributing\n\n1. Fork the repository\n2. Create your feature branch (`git checkout
|
107
|
+
-b feature/amazing-feature`)\n3. Commit your changes (`git commit -m 'Add amazing
|
108
|
+
feature'`)\n4. Push to the branch (`git push origin feature/amazing-feature`)\n5.
|
109
|
+
Open a Pull Request\n## License\n\nThis project is licensed under the MIT License
|
110
|
+
- see the [LICENSE](LICENSE) file for details.\n\n"
|
20
111
|
email:
|
21
112
|
- radames@roriz.dev
|
22
113
|
executables: []
|
@@ -27,13 +118,29 @@ files:
|
|
27
118
|
- README.md
|
28
119
|
- VERSION
|
29
120
|
- lib/active_genie.rb
|
121
|
+
- lib/active_genie/battle.rb
|
122
|
+
- lib/active_genie/battle/README.md
|
123
|
+
- lib/active_genie/battle/basic.rb
|
124
|
+
- lib/active_genie/clients/openai.rb
|
125
|
+
- lib/active_genie/clients/router.rb
|
30
126
|
- lib/active_genie/configuration.rb
|
31
|
-
- lib/data_extractor
|
32
|
-
- lib/data_extractor/
|
33
|
-
- lib/
|
34
|
-
- lib/
|
127
|
+
- lib/active_genie/data_extractor.rb
|
128
|
+
- lib/active_genie/data_extractor/README.md
|
129
|
+
- lib/active_genie/data_extractor/basic.rb
|
130
|
+
- lib/active_genie/data_extractor/from_informal.rb
|
131
|
+
- lib/active_genie/leaderboard.rb
|
132
|
+
- lib/active_genie/leaderboard/elo_ranking.rb
|
133
|
+
- lib/active_genie/leaderboard/leaderboard.rb
|
134
|
+
- lib/active_genie/leaderboard/league.rb
|
135
|
+
- lib/active_genie/leaderboard/player.rb
|
136
|
+
- lib/active_genie/leaderboard/players_collection.rb
|
137
|
+
- lib/active_genie/scoring.rb
|
138
|
+
- lib/active_genie/scoring/README.md
|
139
|
+
- lib/active_genie/scoring/basic.rb
|
140
|
+
- lib/active_genie/scoring/recommended_reviews.rb
|
141
|
+
- lib/active_genie/utils/math.rb
|
35
142
|
- lib/tasks/install.rake
|
36
|
-
- lib/tasks/templates/
|
143
|
+
- lib/tasks/templates/active_genie.yml
|
37
144
|
homepage: https://github.com/Roriz/active_genie
|
38
145
|
licenses:
|
39
146
|
- Apache-2.0
|
@@ -41,7 +148,6 @@ metadata:
|
|
41
148
|
homepage_uri: https://github.com/Roriz/active_genie
|
42
149
|
documentation_uri: https://github.com/Roriz/active_genie/wiki
|
43
150
|
changelog_uri: https://github.com/Roriz/active_genie/blob/master/CHANGELOG.md
|
44
|
-
source_code_uri: https://github.com/Roriz/active_genie
|
45
151
|
bug_tracker_uri: https://github.com/Roriz/active_genie/issues
|
46
152
|
rubygems_mfa_required: 'true'
|
47
153
|
post_install_message:
|
@@ -62,6 +168,5 @@ requirements: []
|
|
62
168
|
rubygems_version: 3.5.3
|
63
169
|
signing_key:
|
64
170
|
specification_version: 4
|
65
|
-
summary:
|
66
|
-
summarization, scoring, and ranking.
|
171
|
+
summary: Transform your Ruby application with powerful, production-ready GenAI features
|
67
172
|
test_files: []
|
@@ -1,103 +0,0 @@
|
|
1
|
-
# Data Extractor
|
2
|
-
Extract structured data from text using LLM-powered analysis, handling informal language and complex expressions.
|
3
|
-
|
4
|
-
## ✨ Features
|
5
|
-
- Simple extraction - Extract structured data from unstructured text
|
6
|
-
- Informal extraction - Identifies and handles informal language patterns
|
7
|
-
|
8
|
-
## Basic Usage
|
9
|
-
|
10
|
-
Extract structured data from text using predefined schemas:
|
11
|
-
|
12
|
-
```ruby
|
13
|
-
text = "iPhone 14 Pro Max"
|
14
|
-
schema = {
|
15
|
-
brand: { type: 'string' },
|
16
|
-
model: { type: 'string' }
|
17
|
-
}
|
18
|
-
result = ActiveGenie::DataExtractor.call(text, schema)
|
19
|
-
# => { brand: "iPhone", model: "14 Pro Max" }
|
20
|
-
|
21
|
-
product = "Nike Air Max 90 - Size 42 - $199.99"
|
22
|
-
schema = {
|
23
|
-
brand: {
|
24
|
-
type: 'string',
|
25
|
-
enum: ["Nike", "Adidas", "Puma"]
|
26
|
-
},
|
27
|
-
price: {
|
28
|
-
type: 'number',
|
29
|
-
minimum: 0
|
30
|
-
},
|
31
|
-
currency: {
|
32
|
-
type: 'string',
|
33
|
-
enum: ["USD", "EUR"]
|
34
|
-
},
|
35
|
-
size: {
|
36
|
-
type: 'integer',
|
37
|
-
minimum: 35,
|
38
|
-
maximum: 46
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
result = ActiveGenie::DataExtractor.call(product, schema)
|
43
|
-
# => { brand: "Nike", price: 199.99, size: 42, currency: "USD" }
|
44
|
-
```
|
45
|
-
|
46
|
-
## Informal Text Processing
|
47
|
-
|
48
|
-
The `from_informal` method helps extract structured data from casual, conversational text by interpreting common informal expressions and linguistic patterns like:
|
49
|
-
|
50
|
-
- Affirmative expressions ("sure", "no problem", "you bet")
|
51
|
-
- Negative expressions ("nah", "not really", "pass")
|
52
|
-
- Hedging ("maybe", "I guess", "probably")
|
53
|
-
- Litotes ("not bad", "not the worst")
|
54
|
-
|
55
|
-
### Example
|
56
|
-
|
57
|
-
```ruby
|
58
|
-
text = <<~TEXT
|
59
|
-
system: Would you like to proceed with the installation?
|
60
|
-
user: not bad
|
61
|
-
TEXT
|
62
|
-
|
63
|
-
data_to_extract = {
|
64
|
-
user_consent: { type: 'boolean' }
|
65
|
-
}
|
66
|
-
|
67
|
-
result = ActiveGenie::DataExtractor.from_informal(text, data_to_extract)
|
68
|
-
puts result # => { user_consent: true }
|
69
|
-
```
|
70
|
-
|
71
|
-
### Usage Notes
|
72
|
-
- Best suited for processing conversational user inputs
|
73
|
-
- Handles ambiguous or indirect responses
|
74
|
-
- Useful for chatbots and conversational interfaces
|
75
|
-
- May require more processing time and tokens
|
76
|
-
- Accuracy depends on context clarity
|
77
|
-
|
78
|
-
⚠️ Performance Impact: This method uses additional language processing, which can increase token usage and processing time.
|
79
|
-
|
80
|
-
## Interface
|
81
|
-
`.call(text, data_to_extract, options = {})`
|
82
|
-
Extracts structured data from text based on a schema.
|
83
|
-
|
84
|
-
### Parameters
|
85
|
-
| Name | Type | Description | Required | Example | Default |
|
86
|
-
| --- | --- | --- | --- | --- | --- |
|
87
|
-
| `text` | `String` | The text to extract data from. Max 1000 chars. | Yes | "These Nike shoes are red" | - |
|
88
|
-
| `data_to_extract` | `Hash` | [JSON Schema object](https://json-schema.org/understanding-json-schema/reference/object) defining data structure | Yes | `{ category: { type: 'string', enum: ["shoes"] } }` | - |
|
89
|
-
| `options` | `Hash` | Additional provider configuration options | No | `{ model: "gpt-4" }` | `{}` |
|
90
|
-
|
91
|
-
### Returns
|
92
|
-
`Hash` - Dynamic hash based on the `data_to_extract` schema.
|
93
|
-
|
94
|
-
### Options
|
95
|
-
| Name | Type | Description | Default |
|
96
|
-
| --- | --- | --- | --- |
|
97
|
-
| `model` | `String` | The model name | `YAML.load_file(config_file).first.model` |
|
98
|
-
| `api_key` | `String` | The API key to use or api_key from model on config.yml | `YAML.load_file(config_file).first.api_key` |
|
99
|
-
|
100
|
-
⚠️ Performance Considerations
|
101
|
-
- Processes may require multiple LLM calls
|
102
|
-
- Expect ~100 tokens per request + the text from input
|
103
|
-
- Consider background processing for production use
|
@@ -1,88 +0,0 @@
|
|
1
|
-
require_relative '../requester/requester.rb'
|
2
|
-
|
3
|
-
module ActiveGenie
|
4
|
-
class DataExtractor
|
5
|
-
class << self
|
6
|
-
# Extracts data from user_texts based on the schema defined in data_to_extract.
|
7
|
-
# @param text [String] The text to extract data from.
|
8
|
-
# @param data_to_extract [Hash] The schema to extract data from the text.
|
9
|
-
# @param options [Hash] The options to pass to the function.
|
10
|
-
# @return [Hash] The extracted data.
|
11
|
-
def call(text, data_to_extract, options: {})
|
12
|
-
messages = [
|
13
|
-
{ role: 'system', content: PROMPT },
|
14
|
-
{ role: 'user', content: text[0..1000] }
|
15
|
-
]
|
16
|
-
function = {
|
17
|
-
name: 'data_extractor',
|
18
|
-
description: 'Extract structured and typed data from user messages.',
|
19
|
-
schema: {
|
20
|
-
type: "object",
|
21
|
-
properties: data_to_extract_with_explaination(data_to_extract)
|
22
|
-
}
|
23
|
-
}
|
24
|
-
|
25
|
-
::ActiveGenie::Requester.function_calling(messages, function, options)
|
26
|
-
end
|
27
|
-
|
28
|
-
def from_informal(text, data_to_extract, options = {})
|
29
|
-
messages = [
|
30
|
-
{ role: 'system', content: PROMPT },
|
31
|
-
{ role: 'user', content: text }
|
32
|
-
]
|
33
|
-
properties = data_to_extract_with_explaination(data_to_extract)
|
34
|
-
properties[:message_litote] = {
|
35
|
-
type: 'boolean',
|
36
|
-
description: 'Return true if the message is a litote. A litote is a figure of speech that uses understatement to emphasize a point by stating a negative to further affirm a positive, often incorporating double negatives for effect.'
|
37
|
-
}
|
38
|
-
properties[:litote_rephrased] = {
|
39
|
-
type: 'string',
|
40
|
-
description: 'The true meaning of the litote. Rephrase the message to a positive statement.'
|
41
|
-
}
|
42
|
-
|
43
|
-
function = {
|
44
|
-
name: 'data_extractor',
|
45
|
-
description: 'Extract structured and typed data from user messages.',
|
46
|
-
schema: { type: "object", properties: }
|
47
|
-
}
|
48
|
-
|
49
|
-
::ActiveGenie::Requester.function_calling(messages, function, options)
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
|
54
|
-
PROMPT = <<~PROMPT
|
55
|
-
Extract structured and typed data from user messages.
|
56
|
-
Identify relevant information within user messages and categorize it into predefined data fields with specific data types.
|
57
|
-
|
58
|
-
# Steps
|
59
|
-
1. **Identify Data Types**: Determine the types of data to collect, such as names, dates, email addresses, phone numbers, etc.
|
60
|
-
2. **Extract Information**: Use pattern recognition and language understanding to identify and extract the relevant pieces of data from the user message.
|
61
|
-
3. **Categorize Data**: Assign the extracted data to the appropriate predefined fields.
|
62
|
-
4. **Structure Data**: Format the extracted and categorized data in a structured format, such as JSON.
|
63
|
-
|
64
|
-
# Output Format
|
65
|
-
The output should be a JSON object containing fields with their corresponding extracted values. If a value is not found, the field should still be included with a null value.
|
66
|
-
|
67
|
-
# Notes
|
68
|
-
- Handle missing or partial information gracefully.
|
69
|
-
- Manage multiple occurrences of similar data points by prioritizing the first one unless specified otherwise.
|
70
|
-
- Be flexible to handle variations in data format and language clues.
|
71
|
-
PROMPT
|
72
|
-
|
73
|
-
def data_to_extract_with_explaination(data_to_extract)
|
74
|
-
with_explaination = {}
|
75
|
-
|
76
|
-
data_to_extract.each do |key, value|
|
77
|
-
with_explaination[key] = value
|
78
|
-
with_explaination["#{key}_explanation"] = {
|
79
|
-
type: 'string',
|
80
|
-
description: "The chain of thought that led to the conclusion about: #{key}. Can be blank if the user didn't provide any context",
|
81
|
-
}
|
82
|
-
end
|
83
|
-
|
84
|
-
with_explaination
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|