sentiment_insights 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/EXPORT_USAGE.md +325 -0
- data/Gemfile.lock +9 -1
- data/README.md +114 -18
- data/lib/sentiment_insights/clients/entities/claude_client.rb +131 -0
- data/lib/sentiment_insights/clients/key_phrases/claude_client.rb +151 -0
- data/lib/sentiment_insights/clients/sentiment/claude_client.rb +126 -0
- data/lib/sentiment_insights/configuration.rb +2 -1
- data/lib/sentiment_insights/export/base_exporter.rb +308 -0
- data/lib/sentiment_insights/export/csv_exporter.rb +261 -0
- data/lib/sentiment_insights/export/excel_exporter.rb +334 -0
- data/lib/sentiment_insights/export/exportable.rb +152 -0
- data/lib/sentiment_insights/export/exporter.rb +169 -0
- data/lib/sentiment_insights/export/json_exporter.rb +183 -0
- data/lib/sentiment_insights/insights/entities.rb +7 -2
- data/lib/sentiment_insights/insights/key_phrases.rb +6 -2
- data/lib/sentiment_insights/insights/sentiment.rb +7 -3
- data/lib/sentiment_insights/version.rb +1 -1
- data/lib/sentiment_insights.rb +1 -0
- data/sentiment_insights.gemspec +3 -0
- metadata +26 -2
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module SentimentInsights
|
7
|
+
module Clients
|
8
|
+
module Entities
|
9
|
+
class ClaudeClient
|
10
|
+
DEFAULT_MODEL = "claude-3-haiku-20240307"
|
11
|
+
DEFAULT_RETRIES = 3
|
12
|
+
|
13
|
+
def initialize(api_key: ENV['CLAUDE_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES)
|
14
|
+
@api_key = api_key or raise ArgumentError, "Claude API key is required"
|
15
|
+
@model = model
|
16
|
+
@max_retries = max_retries
|
17
|
+
@logger = Logger.new($stdout)
|
18
|
+
end
|
19
|
+
|
20
|
+
def extract_batch(entries, question: nil, prompt: nil)
|
21
|
+
responses = []
|
22
|
+
entity_map = Hash.new { |h, k| h[k] = [] }
|
23
|
+
|
24
|
+
entries.each_with_index do |entry, index|
|
25
|
+
sentence = entry[:answer].to_s.strip
|
26
|
+
next if sentence.empty?
|
27
|
+
|
28
|
+
response_id = "r_#{index + 1}"
|
29
|
+
entities = extract_entities_from_sentence(sentence, question: question, prompt: prompt)
|
30
|
+
|
31
|
+
responses << {
|
32
|
+
id: response_id,
|
33
|
+
sentence: sentence,
|
34
|
+
segment: entry[:segment] || {}
|
35
|
+
}
|
36
|
+
|
37
|
+
entities.each do |ent|
|
38
|
+
next if ent[:text].to_s.empty? || ent[:type].to_s.empty?
|
39
|
+
key = [ent[:text].downcase, ent[:type]]
|
40
|
+
entity_map[key] << response_id
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
entity_records = entity_map.map do |(text, type), ref_ids|
|
45
|
+
{
|
46
|
+
entity: text,
|
47
|
+
type: type,
|
48
|
+
mentions: ref_ids.uniq,
|
49
|
+
summary: nil
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
{ entities: entity_records, responses: responses }
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def extract_entities_from_sentence(text, question: nil, prompt: nil)
|
59
|
+
# Default prompt with interpolation placeholders
|
60
|
+
default_prompt = <<~PROMPT
|
61
|
+
Extract named entities from this sentence based on the question.
|
62
|
+
Return them as a JSON array with each item having "text" and "type" (e.g., PERSON, ORGANIZATION, LOCATION, PRODUCT).
|
63
|
+
%{question}
|
64
|
+
Sentence: "%{text}"
|
65
|
+
PROMPT
|
66
|
+
|
67
|
+
# If a custom prompt is provided, interpolate %{text} and %{question} if present
|
68
|
+
if prompt
|
69
|
+
interpolated = prompt.dup
|
70
|
+
interpolated.gsub!('%{text}', text.to_s)
|
71
|
+
interpolated.gsub!('%{question}', question.to_s) if question
|
72
|
+
interpolated.gsub!('{text}', text.to_s)
|
73
|
+
interpolated.gsub!('{question}', question.to_s) if question
|
74
|
+
prompt_to_use = interpolated
|
75
|
+
else
|
76
|
+
question_line = question ? "Question: #{question}" : ""
|
77
|
+
prompt_to_use = default_prompt % { question: question_line, text: text }
|
78
|
+
end
|
79
|
+
|
80
|
+
body = build_request_body(prompt_to_use)
|
81
|
+
response = post_claude(body)
|
82
|
+
|
83
|
+
begin
|
84
|
+
raw_json = response.dig("content", 0, "text").to_s.strip
|
85
|
+
JSON.parse(raw_json, symbolize_names: true)
|
86
|
+
rescue JSON::ParserError => e
|
87
|
+
@logger.warn "Failed to parse entity JSON: #{e.message}"
|
88
|
+
[]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def build_request_body(prompt)
|
93
|
+
{
|
94
|
+
model: @model,
|
95
|
+
max_tokens: 1000,
|
96
|
+
messages: [{ role: "user", content: prompt }]
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def post_claude(body)
|
101
|
+
uri = URI("https://api.anthropic.com/v1/messages")
|
102
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
103
|
+
http.use_ssl = true
|
104
|
+
|
105
|
+
attempt = 0
|
106
|
+
while attempt < @max_retries
|
107
|
+
attempt += 1
|
108
|
+
|
109
|
+
request = Net::HTTP::Post.new(uri)
|
110
|
+
request["Content-Type"] = "application/json"
|
111
|
+
request["x-api-key"] = @api_key
|
112
|
+
request["anthropic-version"] = "2023-06-01"
|
113
|
+
request.body = JSON.generate(body)
|
114
|
+
|
115
|
+
begin
|
116
|
+
response = http.request(request)
|
117
|
+
return JSON.parse(response.body) if response.code.to_i == 200
|
118
|
+
@logger.warn "Claude entity extraction failed (#{response.code}): #{response.body}"
|
119
|
+
rescue => e
|
120
|
+
@logger.error "Error during entity extraction: #{e.class} - #{e.message}"
|
121
|
+
end
|
122
|
+
|
123
|
+
sleep(2 ** (attempt - 1)) if attempt < @max_retries
|
124
|
+
end
|
125
|
+
|
126
|
+
{}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module SentimentInsights
|
7
|
+
module Clients
|
8
|
+
module KeyPhrases
|
9
|
+
class ClaudeClient
|
10
|
+
DEFAULT_MODEL = "claude-3-haiku-20240307"
|
11
|
+
DEFAULT_RETRIES = 3
|
12
|
+
|
13
|
+
def initialize(api_key: ENV['CLAUDE_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES)
|
14
|
+
@api_key = api_key or raise ArgumentError, "Claude API key is required"
|
15
|
+
@model = model
|
16
|
+
@max_retries = max_retries
|
17
|
+
@logger = Logger.new($stdout)
|
18
|
+
end
|
19
|
+
|
20
|
+
def extract_batch(entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil)
|
21
|
+
responses = []
|
22
|
+
phrase_map = Hash.new { |h, k| h[k] = [] }
|
23
|
+
|
24
|
+
entries.each_with_index do |entry, index|
|
25
|
+
sentence = entry[:answer].to_s.strip
|
26
|
+
next if sentence.empty?
|
27
|
+
|
28
|
+
response_id = "r_#{index + 1}"
|
29
|
+
|
30
|
+
# Extract key phrases
|
31
|
+
phrases = extract_key_phrases(sentence, question: question, prompt: key_phrase_prompt)
|
32
|
+
|
33
|
+
# Get sentiment for this response
|
34
|
+
sentiment = get_sentiment(sentence, prompt: sentiment_prompt)
|
35
|
+
|
36
|
+
responses << {
|
37
|
+
id: response_id,
|
38
|
+
sentence: sentence,
|
39
|
+
sentiment: sentiment,
|
40
|
+
segment: entry[:segment] || {}
|
41
|
+
}
|
42
|
+
|
43
|
+
phrases.each do |phrase|
|
44
|
+
next if phrase.strip.empty?
|
45
|
+
phrase_map[phrase.downcase] << response_id
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
phrase_records = phrase_map.map do |phrase, ref_ids|
|
50
|
+
{
|
51
|
+
phrase: phrase,
|
52
|
+
mentions: ref_ids.uniq,
|
53
|
+
summary: nil
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
{ phrases: phrase_records, responses: responses }
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def extract_key_phrases(text, question: nil, prompt: nil)
|
63
|
+
default_prompt = <<~PROMPT.strip
|
64
|
+
Extract the most important key phrases that represent the main ideas or feedback in the sentence below.
|
65
|
+
Ignore stop words and return each key phrase in its natural form, comma-separated.
|
66
|
+
|
67
|
+
Question: %{question}
|
68
|
+
|
69
|
+
Text: %{text}
|
70
|
+
PROMPT
|
71
|
+
|
72
|
+
if prompt
|
73
|
+
interpolated = prompt.dup
|
74
|
+
interpolated.gsub!('%{text}', text.to_s)
|
75
|
+
interpolated.gsub!('%{question}', question.to_s) if question
|
76
|
+
interpolated.gsub!('{text}', text.to_s)
|
77
|
+
interpolated.gsub!('{question}', question.to_s) if question
|
78
|
+
prompt_to_use = interpolated
|
79
|
+
else
|
80
|
+
question_line = question ? question.to_s : ""
|
81
|
+
prompt_to_use = default_prompt % { question: question_line, text: text }
|
82
|
+
end
|
83
|
+
|
84
|
+
body = build_request_body(prompt_to_use)
|
85
|
+
response = post_claude(body)
|
86
|
+
|
87
|
+
content = response.dig("content", 0, "text").to_s.strip
|
88
|
+
content.split(',').map(&:strip).reject(&:empty?)
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_sentiment(text, prompt: nil)
|
92
|
+
default_prompt = <<~PROMPT
|
93
|
+
Classify the sentiment of this text as Positive, Neutral, or Negative.
|
94
|
+
Reply with just the sentiment label.
|
95
|
+
|
96
|
+
Text: "#{text}"
|
97
|
+
PROMPT
|
98
|
+
|
99
|
+
prompt_to_use = prompt ? prompt.gsub('%{text}', text) : default_prompt
|
100
|
+
|
101
|
+
body = build_request_body(prompt_to_use)
|
102
|
+
response = post_claude(body)
|
103
|
+
|
104
|
+
content = response.dig("content", 0, "text").to_s.strip.downcase
|
105
|
+
case content
|
106
|
+
when /positive/ then :positive
|
107
|
+
when /negative/ then :negative
|
108
|
+
else :neutral
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def build_request_body(prompt)
|
113
|
+
{
|
114
|
+
model: @model,
|
115
|
+
max_tokens: 1000,
|
116
|
+
messages: [{ role: "user", content: prompt }]
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def post_claude(body)
|
121
|
+
uri = URI("https://api.anthropic.com/v1/messages")
|
122
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
123
|
+
http.use_ssl = true
|
124
|
+
|
125
|
+
attempt = 0
|
126
|
+
while attempt < @max_retries
|
127
|
+
attempt += 1
|
128
|
+
|
129
|
+
request = Net::HTTP::Post.new(uri)
|
130
|
+
request["Content-Type"] = "application/json"
|
131
|
+
request["x-api-key"] = @api_key
|
132
|
+
request["anthropic-version"] = "2023-06-01"
|
133
|
+
request.body = JSON.generate(body)
|
134
|
+
|
135
|
+
begin
|
136
|
+
response = http.request(request)
|
137
|
+
return JSON.parse(response.body) if response.code.to_i == 200
|
138
|
+
@logger.warn "Claude key phrase extraction failed (#{response.code}): #{response.body}"
|
139
|
+
rescue => e
|
140
|
+
@logger.error "Error during key phrase extraction: #{e.class} - #{e.message}"
|
141
|
+
end
|
142
|
+
|
143
|
+
sleep(2 ** (attempt - 1)) if attempt < @max_retries
|
144
|
+
end
|
145
|
+
|
146
|
+
{}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module SentimentInsights
|
7
|
+
module Clients
|
8
|
+
module Sentiment
|
9
|
+
class ClaudeClient
|
10
|
+
DEFAULT_MODEL = "claude-3-haiku-20240307"
|
11
|
+
DEFAULT_RETRIES = 3
|
12
|
+
|
13
|
+
def initialize(api_key: ENV['CLAUDE_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES, return_scores: true)
|
14
|
+
@api_key = api_key or raise ArgumentError, "Claude API key is required"
|
15
|
+
@model = model
|
16
|
+
@max_retries = max_retries
|
17
|
+
@return_scores = return_scores
|
18
|
+
@logger = Logger.new($stdout)
|
19
|
+
end
|
20
|
+
|
21
|
+
def analyze_entries(entries, question: nil, prompt: nil, batch_size: 50)
|
22
|
+
all_sentiments = []
|
23
|
+
|
24
|
+
entries.each_slice(batch_size) do |batch|
|
25
|
+
prompt_content = build_prompt_content(batch, question: question, prompt: prompt)
|
26
|
+
request_body = {
|
27
|
+
model: @model,
|
28
|
+
max_tokens: 1000,
|
29
|
+
messages: [
|
30
|
+
{ role: "user", content: prompt_content }
|
31
|
+
]
|
32
|
+
}
|
33
|
+
|
34
|
+
uri = URI("https://api.anthropic.com/v1/messages")
|
35
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
36
|
+
http.use_ssl = true
|
37
|
+
|
38
|
+
response_content = nil
|
39
|
+
attempt = 0
|
40
|
+
|
41
|
+
while attempt < @max_retries
|
42
|
+
attempt += 1
|
43
|
+
request = Net::HTTP::Post.new(uri)
|
44
|
+
request["Content-Type"] = "application/json"
|
45
|
+
request["x-api-key"] = @api_key
|
46
|
+
request["anthropic-version"] = "2023-06-01"
|
47
|
+
request.body = JSON.generate(request_body)
|
48
|
+
|
49
|
+
begin
|
50
|
+
response = http.request(request)
|
51
|
+
rescue StandardError => e
|
52
|
+
@logger.error "Claude API request error: #{e.class} - #{e.message}"
|
53
|
+
raise
|
54
|
+
end
|
55
|
+
|
56
|
+
status = response.code.to_i
|
57
|
+
if status == 429
|
58
|
+
@logger.warn "Rate limit (HTTP 429) on attempt #{attempt}. Retrying..."
|
59
|
+
sleep(2 ** (attempt - 1))
|
60
|
+
next
|
61
|
+
elsif status != 200
|
62
|
+
@logger.error "Request failed (#{status}): #{response.body}"
|
63
|
+
raise "Claude API Error: #{status}"
|
64
|
+
else
|
65
|
+
data = JSON.parse(response.body)
|
66
|
+
response_content = data.dig("content", 0, "text")
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
sentiments = parse_sentiments(response_content, batch.size)
|
72
|
+
all_sentiments.concat(sentiments)
|
73
|
+
end
|
74
|
+
|
75
|
+
all_sentiments
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def build_prompt_content(entries, question: nil, prompt: nil)
|
81
|
+
content = ""
|
82
|
+
content << "Question: #{question}\n\n" if question
|
83
|
+
|
84
|
+
# Use custom instructions or default
|
85
|
+
instructions = prompt || <<~DEFAULT
|
86
|
+
For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive).
|
87
|
+
|
88
|
+
Reply with a numbered list like:
|
89
|
+
1. Positive (0.9)
|
90
|
+
2. Negative (-0.8)
|
91
|
+
3. Neutral (0.0)
|
92
|
+
DEFAULT
|
93
|
+
|
94
|
+
content << instructions.strip + "\n\n"
|
95
|
+
|
96
|
+
entries.each_with_index do |entry, index|
|
97
|
+
content << "#{index + 1}. \"#{entry[:answer]}\"\n"
|
98
|
+
end
|
99
|
+
|
100
|
+
content
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse_sentiments(content, expected_count)
|
104
|
+
sentiments = []
|
105
|
+
|
106
|
+
content.to_s.strip.split(/\r?\n/).each do |line|
|
107
|
+
if line.strip =~ /^\d+[\.:)]?\s*(Positive|Negative|Neutral)\s*\(([-\d\.]+)\)/i
|
108
|
+
label = $1.downcase.to_sym
|
109
|
+
score = $2.to_f
|
110
|
+
sentiments << { label: label, score: score }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if sentiments.size != expected_count
|
115
|
+
@logger.warn "Expected #{expected_count} results, got #{sentiments.size}. Padding with neutral."
|
116
|
+
while sentiments.size < expected_count
|
117
|
+
sentiments << { label: :neutral, score: 0.0 }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
sentiments.first(expected_count)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -1,10 +1,11 @@
|
|
1
1
|
module SentimentInsights
|
2
2
|
class Configuration
|
3
|
-
attr_accessor :provider, :openai_api_key, :aws_region
|
3
|
+
attr_accessor :provider, :openai_api_key, :aws_region, :claude_api_key
|
4
4
|
|
5
5
|
def initialize
|
6
6
|
@provider = :openai
|
7
7
|
@openai_api_key = ENV["OPENAI_API_KEY"]
|
8
|
+
@claude_api_key = ENV["CLAUDE_API_KEY"]
|
8
9
|
@aws_region = "us-east-1"
|
9
10
|
end
|
10
11
|
end
|