sentiment_insights 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'logger'
5
+
6
+ module SentimentInsights
7
+ module Clients
8
+ module Entities
9
+ class OpenAIClient
10
+ DEFAULT_MODEL = "gpt-3.5-turbo"
11
+ DEFAULT_RETRIES = 3
12
+
13
+ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES)
14
+ @api_key = api_key or raise ArgumentError, "OpenAI API key is required"
15
+ @model = model
16
+ @max_retries = max_retries
17
+ @logger = Logger.new($stdout)
18
+ end
19
+
20
+ def extract_batch(entries, question: nil)
21
+ responses = []
22
+ entity_map = Hash.new { |h, k| h[k] = [] }
23
+
24
+ entries.each_with_index do |entry, index|
25
+ sentence = entry[:answer].to_s.strip
26
+ next if sentence.empty?
27
+
28
+ response_id = "r_#{index + 1}"
29
+ entities = extract_entities_from_sentence(sentence)
30
+
31
+ responses << {
32
+ id: response_id,
33
+ sentence: sentence,
34
+ segment: entry[:segment] || {}
35
+ }
36
+
37
+ entities.each do |ent|
38
+ key = [ent[:text].downcase, ent[:type]]
39
+ entity_map[key] << response_id
40
+ end
41
+ end
42
+
43
+ entity_records = entity_map.map do |(text, type), ref_ids|
44
+ {
45
+ entity: text,
46
+ type: type,
47
+ mentions: ref_ids.uniq,
48
+ summary: nil
49
+ }
50
+ end
51
+
52
+ { entities: entity_records, responses: responses }
53
+ end
54
+
55
+ private
56
+
57
+ def extract_entities_from_sentence(text)
58
+ prompt = <<~PROMPT
59
+ Extract named entities from this sentence. Return them as a JSON array with each item having "text" and "type" (e.g., PERSON, ORGANIZATION, LOCATION, PRODUCT).
60
+ Sentence: "#{text}"
61
+ PROMPT
62
+
63
+ body = build_request_body(prompt)
64
+ response = post_openai(body)
65
+
66
+ begin
67
+ raw_json = response.dig("choices", 0, "message", "content").to_s.strip
68
+ JSON.parse(raw_json, symbolize_names: true)
69
+ rescue JSON::ParserError => e
70
+ @logger.warn "Failed to parse entity JSON: #{e.message}"
71
+ []
72
+ end
73
+ end
74
+
75
+ def build_request_body(prompt)
76
+ {
77
+ model: @model,
78
+ messages: [{ role: "user", content: prompt }],
79
+ temperature: 0.3
80
+ }
81
+ end
82
+
83
+ def post_openai(body)
84
+ uri = URI("https://api.openai.com/v1/chat/completions")
85
+ http = Net::HTTP.new(uri.host, uri.port)
86
+ http.use_ssl = true
87
+
88
+ attempt = 0
89
+ while attempt < @max_retries
90
+ attempt += 1
91
+
92
+ request = Net::HTTP::Post.new(uri)
93
+ request["Content-Type"] = "application/json"
94
+ request["Authorization"] = "Bearer #{@api_key}"
95
+ request.body = JSON.generate(body)
96
+
97
+ begin
98
+ response = http.request(request)
99
+ return JSON.parse(response.body) if response.code.to_i == 200
100
+ @logger.warn "OpenAI entity extraction failed (#{response.code}): #{response.body}"
101
+ rescue => e
102
+ @logger.error "Error during entity extraction: #{e.class} - #{e.message}"
103
+ end
104
+
105
+ sleep(2 ** (attempt - 1)) if attempt < @max_retries
106
+ end
107
+
108
+ {}
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,89 @@
1
+ require 'aws-sdk-comprehend'
2
+ require 'logger'
3
+
4
+ module SentimentInsights
5
+ module Clients
6
+ module KeyPhrases
7
+ class AwsClient
8
+ MAX_BATCH_SIZE = 25
9
+
10
+ def initialize(region: 'us-east-1')
11
+ @comprehend = Aws::Comprehend::Client.new(region: region)
12
+ @logger = Logger.new($stdout)
13
+ end
14
+
15
+ def extract_batch(entries, question: nil)
16
+ responses = []
17
+ phrase_map = Hash.new { |h, k| h[k] = [] }
18
+
19
+ # Split into batches for AWS Comprehend
20
+ entries.each_slice(MAX_BATCH_SIZE).with_index do |batch, batch_idx|
21
+ texts = batch.map { |e| e[:answer].to_s.strip[0...5000] }
22
+
23
+ begin
24
+ phrase_resp = @comprehend.batch_detect_key_phrases({
25
+ text_list: texts,
26
+ language_code: 'en'
27
+ })
28
+
29
+ sentiment_resp = @comprehend.batch_detect_sentiment({
30
+ text_list: texts,
31
+ language_code: 'en'
32
+ })
33
+
34
+ phrase_resp.result_list.each_with_index do |phrase_result, idx|
35
+ sentiment_result = sentiment_resp.result_list.find { |s| s.index == phrase_result.index }
36
+ sentiment_label = sentiment_result&.sentiment&.downcase&.to_sym || :neutral
37
+
38
+ entry_index = (batch_idx * MAX_BATCH_SIZE) + idx
39
+ entry = entries[entry_index]
40
+ sentence = texts[idx]
41
+ response_id = "r_#{entry_index + 1}"
42
+
43
+ responses << {
44
+ id: response_id,
45
+ sentence: sentence,
46
+ sentiment: sentiment_label,
47
+ segment: entry[:segment] || {}
48
+ }
49
+
50
+ phrases = phrase_result.key_phrases.map { |p| p.text.downcase.strip }.uniq
51
+ phrases.each { |phrase| phrase_map[phrase] << response_id }
52
+ end
53
+
54
+ phrase_resp.error_list.each do |error|
55
+ @logger.warn "AWS KeyPhrase error at index #{error.index}: #{error.error_code}"
56
+ end
57
+
58
+ sentiment_resp.error_list.each do |error|
59
+ @logger.warn "AWS Sentiment error at index #{error.index}: #{error.error_code}"
60
+ end
61
+
62
+ rescue Aws::Comprehend::Errors::ServiceError => e
63
+ @logger.error "AWS Comprehend batch error: #{e.message}"
64
+ batch.each_with_index do |entry, i|
65
+ entry_index = (batch_idx * MAX_BATCH_SIZE) + i
66
+ responses << {
67
+ id: "r_#{entry_index + 1}",
68
+ sentence: entry[:answer],
69
+ sentiment: :neutral,
70
+ segment: entry[:segment] || {}
71
+ }
72
+ end
73
+ end
74
+ end
75
+
76
+ phrases = phrase_map.map do |phrase, ref_ids|
77
+ {
78
+ phrase: phrase,
79
+ mentions: ref_ids.uniq,
80
+ summary: nil
81
+ }
82
+ end
83
+
84
+ { phrases: phrases, responses: responses }
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,119 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'logger'
5
+ require_relative '../sentiment/open_ai_client'
6
+
7
+ module SentimentInsights
8
+ module Clients
9
+ module KeyPhrases
10
+ class OpenAIClient
11
+ DEFAULT_MODEL = "gpt-3.5-turbo"
12
+ DEFAULT_RETRIES = 3
13
+
14
+ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES)
15
+ @api_key = api_key or raise ArgumentError, "OpenAI API key is required"
16
+ @model = model
17
+ @max_retries = max_retries
18
+ @logger = Logger.new($stdout)
19
+ @sentiment_client = SentimentInsights::Clients::Sentiment::OpenAIClient.new(api_key: @api_key, model: @model)
20
+ end
21
+
22
+ # Extract key phrases from entries and enrich with sentiment
23
+ def extract_batch(entries, question: nil)
24
+ responses = []
25
+ phrase_map = Hash.new { |h, k| h[k] = [] }
26
+
27
+ # Fetch sentiments in batch from sentiment client
28
+ sentiments = @sentiment_client.analyze_entries(entries, question: question)
29
+
30
+ entries.each_with_index do |entry, index|
31
+ sentence = entry[:answer].to_s.strip
32
+ next if sentence.empty?
33
+
34
+ response_id = "r_#{index + 1}"
35
+ phrases = extract_phrases_from_sentence(sentence)
36
+
37
+ sentiment = sentiments[index] || { label: :neutral }
38
+
39
+ responses << {
40
+ id: response_id,
41
+ sentence: sentence,
42
+ sentiment: sentiment[:label],
43
+ segment: entry[:segment] || {}
44
+ }
45
+
46
+ phrases.each do |phrase|
47
+ phrase_map[phrase.downcase] << response_id
48
+ end
49
+ end
50
+
51
+ phrases = phrase_map.map do |phrase, ref_ids|
52
+ {
53
+ phrase: phrase,
54
+ mentions: ref_ids.uniq,
55
+ summary: nil
56
+ }
57
+ end
58
+
59
+ { phrases: phrases, responses: responses }
60
+ end
61
+
62
+ private
63
+
64
+ def extract_phrases_from_sentence(text)
65
+ prompt = <<~PROMPT
66
+ Extract the key phrases from this sentence:
67
+ "#{text}"
68
+ Return them as a comma-separated list.
69
+ PROMPT
70
+
71
+ body = build_request_body(prompt)
72
+ response = post_openai(body)
73
+ parse_phrases(response)
74
+ end
75
+
76
+ def build_request_body(prompt)
77
+ {
78
+ model: @model,
79
+ messages: [{ role: "user", content: prompt }],
80
+ temperature: 0.3
81
+ }
82
+ end
83
+
84
+ def post_openai(body)
85
+ uri = URI("https://api.openai.com/v1/chat/completions")
86
+ http = Net::HTTP.new(uri.host, uri.port)
87
+ http.use_ssl = true
88
+
89
+ attempt = 0
90
+ while attempt < @max_retries
91
+ attempt += 1
92
+
93
+ request = Net::HTTP::Post.new(uri)
94
+ request["Content-Type"] = "application/json"
95
+ request["Authorization"] = "Bearer #{@api_key}"
96
+ request.body = JSON.generate(body)
97
+
98
+ begin
99
+ response = http.request(request)
100
+ return JSON.parse(response.body) if response.code.to_i == 200
101
+ @logger.warn "OpenAI request failed (#{response.code}): #{response.body}"
102
+ rescue => e
103
+ @logger.error "OpenAI HTTP error: #{e.class} - #{e.message}"
104
+ end
105
+
106
+ sleep(2 ** (attempt - 1)) if attempt < @max_retries
107
+ end
108
+
109
+ {}
110
+ end
111
+
112
+ def parse_phrases(response)
113
+ text = response.dig("choices", 0, "message", "content").to_s.strip
114
+ text.split(/,|\n/).map(&:strip).reject(&:empty?)
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,72 @@
1
+ require 'aws-sdk-comprehend'
2
+ require 'logger'
3
+
4
+ module SentimentInsights
5
+ module Clients
6
+ module Sentiment
7
+ class AwsComprehendClient
8
+ MAX_BATCH_SIZE = 25 # AWS limit
9
+
10
+ def initialize(region: 'us-east-1')
11
+ @client = Aws::Comprehend::Client.new(region: region)
12
+ @logger = Logger.new($stdout)
13
+ end
14
+
15
+ # Analyze a batch of entries using AWS Comprehend.
16
+ # @param entries [Array<Hash>] each with :answer key
17
+ # @return [Array<Hash>] each with :label (symbol) and :score (float)
18
+ def analyze_entries(entries, question: nil)
19
+ results = []
20
+
21
+ entries.each_slice(MAX_BATCH_SIZE) do |batch|
22
+ texts = batch.map { |entry| entry[:answer].to_s.strip[0...5000] } # max per AWS
23
+
24
+ begin
25
+ resp = @client.batch_detect_sentiment({
26
+ text_list: texts,
27
+ language_code: "en"
28
+ })
29
+
30
+ resp.result_list.each do |r|
31
+ label = r.sentiment.downcase.to_sym # :positive, :neutral, :negative, :mixed
32
+ score = compute_score(r.sentiment, r.sentiment_score)
33
+ results << { label: label, score: score }
34
+ end
35
+
36
+ # handle errors (will match by index)
37
+ resp.error_list.each do |error|
38
+ @logger.warn "AWS Comprehend error at index #{error.index}: #{error.error_code}"
39
+ results.insert(error.index, { label: :neutral, score: 0.0 })
40
+ end
41
+
42
+ rescue Aws::Comprehend::Errors::ServiceError => e
43
+ @logger.error "AWS Comprehend batch error: #{e.message}"
44
+ batch.size.times { results << { label: :neutral, score: 0.0 } }
45
+ end
46
+ end
47
+
48
+ results
49
+ end
50
+
51
+ private
52
+
53
+ # Convert AWS sentiment score hash to a single signed score.
54
+ def compute_score(label, scores)
55
+ case label.upcase
56
+ when "POSITIVE"
57
+ scores.positive.to_f
58
+ when "NEGATIVE"
59
+ -scores.negative.to_f
60
+ when "NEUTRAL"
61
+ 0.0
62
+ when "MIXED"
63
+ # Optionally: net positive - negative for mixed
64
+ (scores.positive.to_f - scores.negative.to_f).round(2)
65
+ else
66
+ 0.0
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,115 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'logger'
5
+
6
+ module SentimentInsights
7
+ module Clients
8
+ module Sentiment
9
+ class OpenAIClient
10
+ DEFAULT_MODEL = "gpt-3.5-turbo"
11
+ DEFAULT_RETRIES = 3
12
+
13
+ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES, return_scores: true)
14
+ @api_key = api_key or raise ArgumentError, "OpenAI API key is required"
15
+ @model = model
16
+ @max_retries = max_retries
17
+ @return_scores = return_scores
18
+ @logger = Logger.new($stdout)
19
+ end
20
+
21
+ def analyze_entries(entries, question: nil)
22
+ prompt_content = build_prompt_content(entries, question)
23
+ request_body = {
24
+ model: @model,
25
+ messages: [
26
+ { role: "user", content: prompt_content }
27
+ ],
28
+ temperature: 0.0
29
+ }
30
+
31
+ uri = URI("https://api.openai.com/v1/chat/completions")
32
+ http = Net::HTTP.new(uri.host, uri.port)
33
+ http.use_ssl = true
34
+
35
+ response_content = nil
36
+ attempt = 0
37
+
38
+ while attempt < @max_retries
39
+ attempt += 1
40
+ request = Net::HTTP::Post.new(uri)
41
+ request["Content-Type"] = "application/json"
42
+ request["Authorization"] = "Bearer #{@api_key}"
43
+ request.body = JSON.generate(request_body)
44
+
45
+ begin
46
+ response = http.request(request)
47
+ rescue StandardError => e
48
+ @logger.error "OpenAI API request error: #{e.class} - #{e.message}"
49
+ raise
50
+ end
51
+
52
+ status = response.code.to_i
53
+ if status == 429
54
+ @logger.warn "Rate limit (HTTP 429) on attempt #{attempt}. Retrying..."
55
+ sleep(2 ** (attempt - 1))
56
+ next
57
+ elsif status != 200
58
+ @logger.error "Request failed (#{status}): #{response.body}"
59
+ raise "OpenAI API Error: #{status}"
60
+ else
61
+ data = JSON.parse(response.body)
62
+ response_content = data.dig("choices", 0, "message", "content")
63
+ break
64
+ end
65
+ end
66
+
67
+ parse_sentiments(response_content, entries.size)
68
+ end
69
+
70
+ private
71
+
72
+ def build_prompt_content(entries, question)
73
+ prompt = ""
74
+ prompt << "Question: #{question}\n" if question
75
+ prompt << <<~INSTRUCTIONS
76
+ For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive).
77
+
78
+ Reply with a numbered list like:
79
+ 1. Positive (0.9)
80
+ 2. Negative (-0.8)
81
+ 3. Neutral (0.0)
82
+
83
+ INSTRUCTIONS
84
+
85
+ entries.each_with_index do |entry, index|
86
+ prompt << "#{index + 1}. \"#{entry[:answer]}\"\n"
87
+ end
88
+
89
+ prompt
90
+ end
91
+
92
+ def parse_sentiments(content, expected_count)
93
+ sentiments = []
94
+
95
+ content.to_s.strip.split(/\r?\n/).each do |line|
96
+ if line.strip =~ /^\d+[\.:)]?\s*(Positive|Negative|Neutral)\s*\(([-\d\.]+)\)/i
97
+ label = $1.downcase.to_sym
98
+ score = $2.to_f
99
+ sentiments << { label: label, score: score }
100
+ end
101
+ end
102
+
103
+ if sentiments.size != expected_count
104
+ @logger.warn "Expected #{expected_count} results, got #{sentiments.size}. Padding with neutral."
105
+ while sentiments.size < expected_count
106
+ sentiments << { label: :neutral, score: 0.0 }
107
+ end
108
+ end
109
+
110
+ sentiments.first(expected_count)
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,33 @@
1
+ require 'sentimental'
2
+
3
+ module SentimentInsights
4
+ module Clients
5
+ module Sentiment
6
+ # Client that uses the Sentimental gem for local sentiment analysis.
7
+ class SentimentalClient
8
+ def initialize
9
+ @analyzer = Sentimental.new
10
+ @analyzer.load_defaults # load built-in positive/negative word scores
11
+ end
12
+
13
+ # Analyzes each entry's answer text and returns an array of sentiment results.
14
+ # @param entries [Array<Hash>] An array of response hashes (each with :answer).
15
+ # @param question [String, nil] (unused) Global question context, not needed for local analysis.
16
+ # @return [Array<Hash>] An array of hashes with sentiment classification and score for each entry.
17
+ def analyze_entries(entries, question: nil)
18
+ puts "Inside sentimental"
19
+ entries.map do |entry|
20
+ text = entry[:answer].to_s.strip
21
+ label = @analyzer.sentiment(text) # :positive, :neutral, or :negative
22
+ score = case label
23
+ when :positive then 1.0
24
+ when :negative then -1.0
25
+ else 0.0
26
+ end
27
+ { label: label, score: score }
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,11 @@
1
+ module SentimentInsights
2
+ class Configuration
3
+ attr_accessor :provider, :openai_api_key, :aws_region
4
+
5
+ def initialize
6
+ @provider = :openai
7
+ @openai_api_key = ENV["OPENAI_API_KEY"]
8
+ @aws_region = "us-east-1"
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,66 @@
1
+ module SentimentInsights
2
+ module Insights
3
+ # Extracts and summarizes named entities from survey responses
4
+ class Entities
5
+ def initialize(provider: nil, provider_client: nil)
6
+ effective_provider = provider || SentimentInsights.configuration&.provider || :sentimental
7
+
8
+ @provider_client = provider_client || case effective_provider
9
+ when :openai
10
+ require_relative '../clients/entities/open_ai_client'
11
+ Clients::Entities::OpenAIClient.new
12
+ when :aws
13
+ require_relative '../clients/entities/aws_client'
14
+ Clients::Entities::AwsClient.new
15
+ when :sentimental
16
+ raise NotImplementedError, "Entity recognition is not supported for the 'sentimental' provider"
17
+ else
18
+ raise ArgumentError, "Unsupported provider: #{effective_provider}"
19
+ end
20
+ end
21
+
22
+ # Extract named entities and build summarized output
23
+ # @param entries [Array<Hash>] each with :answer and optional :segment
24
+ # @return [Hash] { entities: [...], responses: [...] }
25
+ def extract(entries, question: nil)
26
+ entries = entries.to_a
27
+ raw_result = @provider_client.extract_batch(entries, question: question)
28
+
29
+ puts "raw_result = #{raw_result}"
30
+ responses = raw_result[:responses] || []
31
+ entities = raw_result[:entities] || []
32
+
33
+ # Index responses by ID
34
+ response_index = responses.each_with_object({}) { |r, h| h[r[:id]] = r }
35
+
36
+ enriched_entities = entities.map do |entity_entry|
37
+ mentions = entity_entry[:mentions] || []
38
+ mention_responses = mentions.map { |id| response_index[id] }.compact
39
+
40
+ segment_dist = Hash.new { |h, k| h[k] = Hash.new(0) }
41
+
42
+ mention_responses.each do |resp|
43
+ (resp[:segment] || {}).each do |seg_key, seg_val|
44
+ segment_dist[seg_key][seg_val] += 1
45
+ end
46
+ end
47
+
48
+ {
49
+ entity: entity_entry[:entity],
50
+ type: entity_entry[:type],
51
+ mentions: mentions,
52
+ summary: {
53
+ total_mentions: mentions.size,
54
+ segment_distribution: segment_dist
55
+ }
56
+ }
57
+ end
58
+
59
+ {
60
+ entities: enriched_entities,
61
+ responses: responses
62
+ }
63
+ end
64
+ end
65
+ end
66
+ end